From 84e478c6f1eb9c4bfa1fff2f8108e9a061b46428 Mon Sep 17 00:00:00 2001
From: Don Zickus <dzickus@redhat.com>
Date: Fri, 5 Feb 2010 21:47:05 -0500
Subject: nmi_watchdog: Config option to enable new nmi_watchdog

These are the bits that enable the new nmi_watchdog and safely
isolate the old nmi_watchdog.  Only one or the other can run,
not both at the same time.

Signed-off-by: Don Zickus <dzickus@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: gorcunov@gmail.com
Cc: aris@redhat.com
Cc: peterz@infradead.org
LKML-Reference: <1265424425-31562-4-git-send-email-dzickus@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/nmi.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index b752e807add..a42ff0bef70 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -47,4 +47,8 @@ static inline bool trigger_all_cpu_backtrace(void)
 }
 #endif
 
+#ifdef CONFIG_NMI_WATCHDOG
+int hw_nmi_is_cpu_stuck(struct pt_regs *);
+#endif
+
 #endif
-- 
cgit v1.2.3-70-g09d2


From 504d7cf10ee42bb76b9556859f23d4121dee0a77 Mon Sep 17 00:00:00 2001
From: Don Zickus <dzickus@redhat.com>
Date: Fri, 12 Feb 2010 17:19:19 -0500
Subject: nmi_watchdog: Compile and portability fixes

The original patch was x86_64 centric.  Changed the code to make
it less so.

ested by building and running on a powerpc.

Signed-off-by: Don Zickus <dzickus@redhat.com>
Cc: peterz@infradead.org
Cc: gorcunov@gmail.com
Cc: aris@redhat.com
LKML-Reference: <1266013161-31197-2-git-send-email-dzickus@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/nmi.h    |  2 ++
 arch/x86/kernel/apic/hw_nmi.c | 21 ++++++++++++-----
 include/linux/nmi.h           |  9 ++++++++
 kernel/nmi_watchdog.c         | 52 ++++++++++++++++++++++++++++++++++---------
 kernel/sysctl.c               | 15 ++++++++++++-
 5 files changed, 82 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h
index 93da9c3f334..5b41b0feb6d 100644
--- a/arch/x86/include/asm/nmi.h
+++ b/arch/x86/include/asm/nmi.h
@@ -17,7 +17,9 @@ int do_nmi_callback(struct pt_regs *regs, int cpu);
 
 extern void die_nmi(char *str, struct pt_regs *regs, int do_panic);
 extern int check_nmi_watchdog(void);
+#if !defined(CONFIG_NMI_WATCHDOG)
 extern int nmi_watchdog_enabled;
+#endif
 extern int avail_to_resrv_perfctr_nmi_bit(unsigned int);
 extern int reserve_perfctr_nmi(unsigned int);
 extern void release_perfctr_nmi(unsigned int);
diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c
index 8c0e6a410d0..312d772c5c3 100644
--- a/arch/x86/kernel/apic/hw_nmi.c
+++ b/arch/x86/kernel/apic/hw_nmi.c
@@ -32,8 +32,13 @@ static DEFINE_PER_CPU(unsigned, last_irq_sum);
  */
 static inline unsigned int get_timer_irqs(int cpu)
 {
-        return per_cpu(irq_stat, cpu).apic_timer_irqs +
-                per_cpu(irq_stat, cpu).irq0_irqs;
+	unsigned int irqs = per_cpu(irq_stat, cpu).irq0_irqs;
+
+#if defined(CONFIG_X86_LOCAL_APIC)
+	irqs += per_cpu(irq_stat, cpu).apic_timer_irqs;
+#endif
+
+        return irqs;
 }
 
 static inline int mce_in_progress(void)
@@ -82,6 +87,11 @@ int hw_nmi_is_cpu_stuck(struct pt_regs *regs)
 	}
 }
 
+u64 hw_nmi_get_sample_period(void)
+{
+        return cpu_khz * 1000;
+}
+
 void arch_trigger_all_cpu_backtrace(void)
 {
 	int i;
@@ -100,15 +110,16 @@ void arch_trigger_all_cpu_backtrace(void)
 }
 
 /* STUB calls to mimic old nmi_watchdog behaviour */
+#if defined(CONFIG_X86_LOCAL_APIC)
 unsigned int nmi_watchdog = NMI_NONE;
 EXPORT_SYMBOL(nmi_watchdog);
+void acpi_nmi_enable(void) { return; }
+void acpi_nmi_disable(void) { return; }
+#endif
 atomic_t nmi_active = ATOMIC_INIT(0);           /* oprofile uses this */
 EXPORT_SYMBOL(nmi_active);
-int nmi_watchdog_enabled;
 int unknown_nmi_panic;
 void cpu_nmi_set_wd_enabled(void) { return; }
-void acpi_nmi_enable(void) { return; }
-void acpi_nmi_disable(void) { return; }
 void stop_apic_nmi_watchdog(void *unused) { return; }
 void setup_apic_nmi_watchdog(void *unused) { return; }
 int __init check_nmi_watchdog(void) { return 0; }
diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index a42ff0bef70..794e7354c5b 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -20,10 +20,14 @@ extern void touch_nmi_watchdog(void);
 extern void acpi_nmi_disable(void);
 extern void acpi_nmi_enable(void);
 #else
+#ifndef CONFIG_NMI_WATCHDOG
 static inline void touch_nmi_watchdog(void)
 {
 	touch_softlockup_watchdog();
 }
+#else
+extern void touch_nmi_watchdog(void);
+#endif
 static inline void acpi_nmi_disable(void) { }
 static inline void acpi_nmi_enable(void) { }
 #endif
@@ -49,6 +53,11 @@ static inline bool trigger_all_cpu_backtrace(void)
 
 #ifdef CONFIG_NMI_WATCHDOG
 int hw_nmi_is_cpu_stuck(struct pt_regs *);
+u64 hw_nmi_get_sample_period(void);
+extern int nmi_watchdog_enabled;
+struct ctl_table;
+extern int proc_nmi_enabled(struct ctl_table *, int ,
+                        void __user *, size_t *, loff_t *);
 #endif
 
 #endif
diff --git a/kernel/nmi_watchdog.c b/kernel/nmi_watchdog.c
index 36817b214d6..73c1954a97b 100644
--- a/kernel/nmi_watchdog.c
+++ b/kernel/nmi_watchdog.c
@@ -30,6 +30,8 @@ static DEFINE_PER_CPU(struct perf_event *, nmi_watchdog_ev);
 static DEFINE_PER_CPU(int, nmi_watchdog_touch);
 static DEFINE_PER_CPU(long, alert_counter);
 
+static int panic_on_timeout;
+
 void touch_nmi_watchdog(void)
 {
 	__raw_get_cpu_var(nmi_watchdog_touch) = 1;
@@ -46,19 +48,49 @@ void touch_all_nmi_watchdog(void)
 	touch_softlockup_watchdog();
 }
 
+static int __init setup_nmi_watchdog(char *str)
+{
+        if (!strncmp(str, "panic", 5)) {
+                panic_on_timeout = 1;
+                str = strchr(str, ',');
+                if (!str)
+                        return 1;
+                ++str;
+        }
+        return 1;
+}
+__setup("nmi_watchdog=", setup_nmi_watchdog);
+
 #ifdef CONFIG_SYSCTL
 /*
  * proc handler for /proc/sys/kernel/nmi_watchdog
  */
+int nmi_watchdog_enabled;
+
 int proc_nmi_enabled(struct ctl_table *table, int write,
 		     void __user *buffer, size_t *length, loff_t *ppos)
 {
 	int cpu;
 
-	if (per_cpu(nmi_watchdog_ev, smp_processor_id()) == NULL)
+	if (!write) {
+		struct perf_event *event;
+		for_each_online_cpu(cpu) {
+			event = per_cpu(nmi_watchdog_ev, cpu);
+			if (event->state > PERF_EVENT_STATE_OFF) {
+				nmi_watchdog_enabled = 1;
+				break;
+			}
+		}
+		proc_dointvec(table, write, buffer, length, ppos);
+		return 0;
+	}
+
+	if (per_cpu(nmi_watchdog_ev, smp_processor_id()) == NULL) {
 		nmi_watchdog_enabled = 0;
-	else
-		nmi_watchdog_enabled = 1;
+		proc_dointvec(table, write, buffer, length, ppos);
+		printk("NMI watchdog failed configuration, can not be enabled\n");
+		return 0;
+	}
 
 	touch_all_nmi_watchdog();
 	proc_dointvec(table, write, buffer, length, ppos);
@@ -81,8 +113,6 @@ struct perf_event_attr wd_attr = {
 	.disabled = 1,
 };
 
-static int panic_on_timeout;
-
 void wd_overflow(struct perf_event *event, int nmi,
 		 struct perf_sample_data *data,
 		 struct pt_regs *regs)
@@ -103,11 +133,11 @@ void wd_overflow(struct perf_event *event, int nmi,
 		 */
 		per_cpu(alert_counter,cpu) += 1;
 		if (per_cpu(alert_counter,cpu) == 5) {
-			/*
-			 * die_nmi will return ONLY if NOTIFY_STOP happens..
-			 */
-			die_nmi("BUG: NMI Watchdog detected LOCKUP",
-				regs, panic_on_timeout);
+			if (panic_on_timeout) {
+				panic("NMI Watchdog detected LOCKUP on cpu %d", cpu);
+			} else {
+				WARN(1, "NMI Watchdog detected LOCKUP on cpu %d", cpu);
+			}
 		}
 	} else {
 		per_cpu(alert_counter,cpu) = 0;
@@ -133,7 +163,7 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
 	case CPU_ONLINE:
 	case CPU_ONLINE_FROZEN:
 		/* originally wanted the below chunk to be in CPU_UP_PREPARE, but caps is unpriv for non-CPU0 */
-		wd_attr.sample_period = cpu_khz * 1000;
+		wd_attr.sample_period = hw_nmi_get_sample_period();
 		event = perf_event_create_kernel_counter(&wd_attr, hotcpu, -1, wd_overflow);
 		if (IS_ERR(event)) {
 			printk(KERN_ERR "nmi watchdog failed to create perf event on %i: %p\n", hotcpu, event);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 8a68b244846..ac72c9e6bd9 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -60,6 +60,10 @@
 #include <asm/io.h>
 #endif
 
+#ifdef CONFIG_NMI_WATCHDOG
+#include <linux/nmi.h>
+#endif
+
 
 #if defined(CONFIG_SYSCTL)
 
@@ -692,7 +696,16 @@ static struct ctl_table kern_table[] = {
 		.mode		= 0444,
 		.proc_handler	= proc_dointvec,
 	},
-#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
+#if defined(CONFIG_NMI_WATCHDOG)
+	{
+		.procname       = "nmi_watchdog",
+		.data           = &nmi_watchdog_enabled,
+		.maxlen         = sizeof (int),
+		.mode           = 0644,
+		.proc_handler   = proc_nmi_enabled,
+	},
+#endif
+#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) && !defined(CONFIG_NMI_WATCHDOG)
 	{
 		.procname       = "unknown_nmi_panic",
 		.data           = &unknown_nmi_panic,
-- 
cgit v1.2.3-70-g09d2


From 47195d57636604ff6048b0d7aa3e4ed9643f6073 Mon Sep 17 00:00:00 2001
From: Don Zickus <dzickus@redhat.com>
Date: Mon, 22 Feb 2010 18:09:03 -0500
Subject: nmi_watchdog: Clean up various small details

Mostly copy/paste whitespace damage with a couple of nitpicks by
the checkpatch script. Fix the struct definition as requested by Ingo too.

Signed-off-by: Don Zickus <dzickus@redhat.com>
Cc: peterz@infradead.org
Cc: gorcunov@gmail.com
Cc: aris@redhat.com
LKML-Reference: <1266880143-24943-1-git-send-email-dzickus@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
--
 arch/x86/kernel/apic/hw_nmi.c |   14 +++++------
 arch/x86/kernel/traps.c       |    6 ++--
 include/linux/nmi.h           |    2 -
 kernel/nmi_watchdog.c         |   51 ++++++++++++++++++++----------------------
 4 files changed, 36 insertions(+), 37 deletions(-)
---
 arch/x86/kernel/apic/hw_nmi.c | 14 ++++++------
 arch/x86/kernel/traps.c       |  6 ++---
 include/linux/nmi.h           |  2 +-
 kernel/nmi_watchdog.c         | 51 +++++++++++++++++++++----------------------
 4 files changed, 36 insertions(+), 37 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c
index 0b4d205a6b8..e8b78a0be5d 100644
--- a/arch/x86/kernel/apic/hw_nmi.c
+++ b/arch/x86/kernel/apic/hw_nmi.c
@@ -38,15 +38,15 @@ static inline unsigned int get_timer_irqs(int cpu)
 	irqs += per_cpu(irq_stat, cpu).apic_timer_irqs;
 #endif
 
-        return irqs;
+	return irqs;
 }
 
 static inline int mce_in_progress(void)
 {
 #if defined(CONFIG_X86_MCE)
-        return atomic_read(&mce_entry) > 0;
+	return atomic_read(&mce_entry) > 0;
 #endif
-        return 0;
+	return 0;
 }
 
 int hw_nmi_is_cpu_stuck(struct pt_regs *regs)
@@ -69,9 +69,9 @@ int hw_nmi_is_cpu_stuck(struct pt_regs *regs)
 	}
 
 	/* if we are doing an mce, just assume the cpu is not stuck */
-        /* Could check oops_in_progress here too, but it's safer not to */
-        if (mce_in_progress())
-                return 0;
+	/* Could check oops_in_progress here too, but it's safer not to */
+	if (mce_in_progress())
+		return 0;
 
 	/* We determine if the cpu is stuck by checking whether any
 	 * interrupts have happened since we last checked.  Of course
@@ -89,7 +89,7 @@ int hw_nmi_is_cpu_stuck(struct pt_regs *regs)
 
 u64 hw_nmi_get_sample_period(void)
 {
-        return cpu_khz * 1000;
+	return cpu_khz * 1000;
 }
 
 #ifdef ARCH_HAS_NMI_WATCHDOG
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 973cbc4f044..bdc7fab3ef3 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -402,9 +402,9 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
 			return;
 
 #ifdef CONFIG_X86_LOCAL_APIC
-	        if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
-	        			                == NOTIFY_STOP)
-	                return;
+		if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
+							== NOTIFY_STOP)
+			return;
 
 #ifndef CONFIG_NMI_WATCHDOG
 		/*
diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index 794e7354c5b..22cc7960b64 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -57,7 +57,7 @@ u64 hw_nmi_get_sample_period(void);
 extern int nmi_watchdog_enabled;
 struct ctl_table;
 extern int proc_nmi_enabled(struct ctl_table *, int ,
-                        void __user *, size_t *, loff_t *);
+			void __user *, size_t *, loff_t *);
 #endif
 
 #endif
diff --git a/kernel/nmi_watchdog.c b/kernel/nmi_watchdog.c
index 3c75cbf3acb..0a6f57f537a 100644
--- a/kernel/nmi_watchdog.c
+++ b/kernel/nmi_watchdog.c
@@ -50,31 +50,31 @@ void touch_all_nmi_watchdog(void)
 
 static int __init setup_nmi_watchdog(char *str)
 {
-        if (!strncmp(str, "panic", 5)) {
-                panic_on_timeout = 1;
-                str = strchr(str, ',');
-                if (!str)
-                        return 1;
-                ++str;
-        }
-        return 1;
+	if (!strncmp(str, "panic", 5)) {
+		panic_on_timeout = 1;
+		str = strchr(str, ',');
+		if (!str)
+			return 1;
+		++str;
+	}
+	return 1;
 }
 __setup("nmi_watchdog=", setup_nmi_watchdog);
 
 struct perf_event_attr wd_hw_attr = {
-	.type = PERF_TYPE_HARDWARE,
-	.config = PERF_COUNT_HW_CPU_CYCLES,
-	.size = sizeof(struct perf_event_attr),
-	.pinned = 1,
-	.disabled = 1,
+	.type		= PERF_TYPE_HARDWARE,
+	.config		= PERF_COUNT_HW_CPU_CYCLES,
+	.size		= sizeof(struct perf_event_attr),
+	.pinned		= 1,
+	.disabled	= 1,
 };
 
 struct perf_event_attr wd_sw_attr = {
-	.type = PERF_TYPE_SOFTWARE,
-	.config = PERF_COUNT_SW_CPU_CLOCK,
-	.size = sizeof(struct perf_event_attr),
-	.pinned = 1,
-	.disabled = 1,
+	.type		= PERF_TYPE_SOFTWARE,
+	.config		= PERF_COUNT_SW_CPU_CLOCK,
+	.size		= sizeof(struct perf_event_attr),
+	.pinned		= 1,
+	.disabled	= 1,
 };
 
 void wd_overflow(struct perf_event *event, int nmi,
@@ -95,16 +95,15 @@ void wd_overflow(struct perf_event *event, int nmi,
 		 * Ayiee, looks like this CPU is stuck ...
 		 * wait a few IRQs (5 seconds) before doing the oops ...
 		 */
-		per_cpu(alert_counter,cpu) += 1;
-		if (per_cpu(alert_counter,cpu) == 5) {
-			if (panic_on_timeout) {
+		per_cpu(alert_counter, cpu) += 1;
+		if (per_cpu(alert_counter, cpu) == 5) {
+			if (panic_on_timeout)
 				panic("NMI Watchdog detected LOCKUP on cpu %d", cpu);
-			} else {
+			else
 				WARN(1, "NMI Watchdog detected LOCKUP on cpu %d", cpu);
-			}
 		}
 	} else {
-		per_cpu(alert_counter,cpu) = 0;
+		per_cpu(alert_counter, cpu) = 0;
 	}
 
 	return;
@@ -126,7 +125,7 @@ static int enable_nmi_watchdog(int cpu)
 		event = perf_event_create_kernel_counter(wd_attr, cpu, -1, wd_overflow);
 		if (IS_ERR(event)) {
 			/* hardware doesn't exist or not supported, fallback to software events */
-			printk("nmi_watchdog: hardware not available, trying software events\n");
+			printk(KERN_INFO "nmi_watchdog: hardware not available, trying software events\n");
 			wd_attr = &wd_sw_attr;
 			wd_attr->sample_period = NSEC_PER_SEC;
 			event = perf_event_create_kernel_counter(wd_attr, cpu, -1, wd_overflow);
@@ -182,7 +181,7 @@ int proc_nmi_enabled(struct ctl_table *table, int write,
 	if (nmi_watchdog_enabled) {
 		for_each_online_cpu(cpu)
 			if (enable_nmi_watchdog(cpu)) {
-				printk("NMI watchdog failed configuration, "
+				printk(KERN_ERR "NMI watchdog failed configuration, "
 					" can not be enabled\n");
 			}
 	} else {
-- 
cgit v1.2.3-70-g09d2


From 4e639fdf0d0d745648aa62228ab8a0d9c03a563f Mon Sep 17 00:00:00 2001
From: Peter Jones <pjones@redhat.com>
Date: Thu, 25 Feb 2010 15:37:17 -0500
Subject: ibft: Update iBFT handling for v1.03 of the spec.

- Use struct acpi_table_ibft instead of struct ibft_table_header
- Don't do reserve_ibft_region() on UEFI machines (section 1.4.3.1)
- If ibft_addr isn't initialized when ibft_init() is called, check for
  ACPI-based tables.
- Fix compiler error when CONFIG_ACPI is not defined.

Signed-off-by: Konrad Rzeszutek Wilk <konrad@kernel.org>
Signed-off-by: Peter Jones <pjones@redhat.com>
Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
---
 drivers/firmware/iscsi_ibft.c      | 30 ++++++++++++++++++------------
 drivers/firmware/iscsi_ibft_find.c | 35 ++++++++++++++++++++++++++++++-----
 include/linux/iscsi_ibft.h         | 12 ++----------
 3 files changed, 50 insertions(+), 27 deletions(-)

(limited to 'include')

diff --git a/drivers/firmware/iscsi_ibft.c b/drivers/firmware/iscsi_ibft.c
index ed2801c378d..b3ab24f9d78 100644
--- a/drivers/firmware/iscsi_ibft.c
+++ b/drivers/firmware/iscsi_ibft.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright 2007 Red Hat, Inc.
+ *  Copyright 2007-2010 Red Hat, Inc.
  *  by Peter Jones <pjones@redhat.com>
  *  Copyright 2008 IBM, Inc.
  *  by Konrad Rzeszutek <konradr@linux.vnet.ibm.com>
@@ -19,6 +19,9 @@
  *
  * Changelog:
  *
+ *  06 Jan 2010 - Peter Jones <pjones@redhat.com>
+ *    New changelog entries are in the git log from now on.  Not here.
+ *
  *  14 Mar 2008 - Konrad Rzeszutek <ketuzsezr@darnok.org>
  *    Updated comments and copyrights. (v0.4.9)
  *
@@ -78,9 +81,10 @@
 #include <linux/stat.h>
 #include <linux/string.h>
 #include <linux/types.h>
+#include <linux/acpi.h>
 
-#define IBFT_ISCSI_VERSION "0.4.9"
-#define IBFT_ISCSI_DATE "2008-Mar-14"
+#define IBFT_ISCSI_VERSION "0.5.0"
+#define IBFT_ISCSI_DATE "2010-Feb-25"
 
 MODULE_AUTHOR("Peter Jones <pjones@redhat.com> and \
 Konrad Rzeszutek <ketuzsezr@darnok.org>");
@@ -238,7 +242,7 @@ static const char *ibft_initiator_properties[] =
  */
 
 struct ibft_kobject {
-	struct ibft_table_header *header;
+	struct acpi_table_ibft *header;
 	union {
 		struct ibft_initiator *initiator;
 		struct ibft_nic *nic;
@@ -536,12 +540,13 @@ static int __init ibft_check_device(void)
 	u8 *pos;
 	u8 csum = 0;
 
-	len = ibft_addr->length;
+	len = ibft_addr->header.length;
 
 	/* Sanity checking of iBFT. */
-	if (ibft_addr->revision != 1) {
+	if (ibft_addr->header.revision != 1) {
 		printk(KERN_ERR "iBFT module supports only revision 1, " \
-				"while this is %d.\n", ibft_addr->revision);
+				"while this is %d.\n",
+				ibft_addr->header.revision);
 		return -ENOENT;
 	}
 	for (pos = (u8 *)ibft_addr; pos < (u8 *)ibft_addr + len; pos++)
@@ -558,7 +563,7 @@ static int __init ibft_check_device(void)
 /*
  * Helper function for ibft_register_kobjects.
  */
-static int __init ibft_create_kobject(struct ibft_table_header *header,
+static int __init ibft_create_kobject(struct acpi_table_ibft *header,
 				       struct ibft_hdr *hdr,
 				       struct list_head *list)
 {
@@ -596,7 +601,7 @@ static int __init ibft_create_kobject(struct ibft_table_header *header,
 	default:
 		printk(KERN_ERR "iBFT has unknown structure type (%d). " \
 				"Report this bug to %.6s!\n", hdr->id,
-				header->oem_id);
+				header->header.oem_id);
 		rc = 1;
 		break;
 	}
@@ -649,7 +654,7 @@ out_invalid_struct:
  * found add them on the passed-in list. We do not support the other
  * fields at this point, so they are skipped.
  */
-static int __init ibft_register_kobjects(struct ibft_table_header *header,
+static int __init ibft_register_kobjects(struct acpi_table_ibft *header,
 					  struct list_head *list)
 {
 	struct ibft_control *control = NULL;
@@ -660,7 +665,7 @@ static int __init ibft_register_kobjects(struct ibft_table_header *header,
 
 	control = (void *)header + sizeof(*header);
 	end = (void *)control + control->hdr.length;
-	eot_offset = (void *)header + header->length - (void *)control;
+	eot_offset = (void *)header + header->header.length - (void *)control;
 	rc = ibft_verify_hdr("control", (struct ibft_hdr *)control, id_control,
 			     sizeof(*control));
 
@@ -672,7 +677,8 @@ static int __init ibft_register_kobjects(struct ibft_table_header *header,
 	}
 	for (ptr = &control->initiator_off; ptr < end; ptr += sizeof(u16)) {
 		offset = *(u16 *)ptr;
-		if (offset && offset < header->length && offset < eot_offset) {
+		if (offset && offset < header->header.length &&
+						offset < eot_offset) {
 			rc = ibft_create_kobject(header,
 						 (void *)header + offset,
 						 list);
diff --git a/drivers/firmware/iscsi_ibft_find.c b/drivers/firmware/iscsi_ibft_find.c
index d6470ef36e4..dd85555d329 100644
--- a/drivers/firmware/iscsi_ibft_find.c
+++ b/drivers/firmware/iscsi_ibft_find.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright 2007 Red Hat, Inc.
+ *  Copyright 2007-2010 Red Hat, Inc.
  *  by Peter Jones <pjones@redhat.com>
  *  Copyright 2007 IBM, Inc.
  *  by Konrad Rzeszutek <konradr@linux.vnet.ibm.com>
@@ -22,6 +22,7 @@
 #include <linux/blkdev.h>
 #include <linux/ctype.h>
 #include <linux/device.h>
+#include <linux/efi.h>
 #include <linux/err.h>
 #include <linux/init.h>
 #include <linux/limits.h>
@@ -30,13 +31,15 @@
 #include <linux/stat.h>
 #include <linux/string.h>
 #include <linux/types.h>
+#include <linux/acpi.h>
+#include <linux/iscsi_ibft.h>
 
 #include <asm/mmzone.h>
 
 /*
  * Physical location of iSCSI Boot Format Table.
  */
-struct ibft_table_header *ibft_addr;
+struct acpi_table_ibft *ibft_addr;
 EXPORT_SYMBOL_GPL(ibft_addr);
 
 #define IBFT_SIGN "iBFT"
@@ -46,6 +49,13 @@ EXPORT_SYMBOL_GPL(ibft_addr);
 #define VGA_MEM 0xA0000 /* VGA buffer */
 #define VGA_SIZE 0x20000 /* 128kB */
 
+#ifdef CONFIG_ACPI
+static int __init acpi_find_ibft(struct acpi_table_header *header)
+{
+	ibft_addr = (struct acpi_table_ibft *)header;
+	return 0;
+}
+#endif /* CONFIG_ACPI */
 
 /*
  * Routine used to find the iSCSI Boot Format Table. The logical
@@ -59,6 +69,11 @@ unsigned long __init find_ibft_region(unsigned long *sizep)
 
 	ibft_addr = NULL;
 
+	/* iBFT 1.03 section 1.4.3.1 mandates that UEFI machines will
+	 * only use ACPI for this */
+	if (efi_enabled)
+		return 0;
+
 	for (pos = IBFT_START; pos < IBFT_END; pos += 16) {
 		/* The table can't be inside the VGA BIOS reserved space,
 		 * so skip that area */
@@ -72,14 +87,24 @@ unsigned long __init find_ibft_region(unsigned long *sizep)
 			/* if the length of the table extends past 1M,
 			 * the table cannot be valid. */
 			if (pos + len <= (IBFT_END-1)) {
-				ibft_addr = (struct ibft_table_header *)virt;
+				ibft_addr = (struct acpi_table_ibft *)virt;
 				break;
 			}
 		}
 	}
+#ifdef CONFIG_ACPI
+	/*
+	 * One spec says "IBFT", the other says "iBFT". We have to check
+	 * for both.
+	 */
+	if (!ibft_addr)
+		acpi_table_parse(ACPI_SIG_IBFT, acpi_find_ibft);
+	if (!ibft_addr)
+		acpi_table_parse("iBFT", acpi_find_ibft);
+#endif /* CONFIG_ACPI */
 	if (ibft_addr) {
-		*sizep = PAGE_ALIGN(len);
-		return pos;
+		*sizep = PAGE_ALIGN(ibft_addr->header.length);
+		return (u64)isa_virt_to_bus(ibft_addr);
 	}
 
 	*sizep = 0;
diff --git a/include/linux/iscsi_ibft.h b/include/linux/iscsi_ibft.h
index d2e4042f8f5..8ba7e5b9d62 100644
--- a/include/linux/iscsi_ibft.h
+++ b/include/linux/iscsi_ibft.h
@@ -21,21 +21,13 @@
 #ifndef ISCSI_IBFT_H
 #define ISCSI_IBFT_H
 
-struct ibft_table_header {
-	char signature[4];
-	u32 length;
-	u8 revision;
-	u8 checksum;
-	char oem_id[6];
-	char oem_table_id[8];
-	char reserved[24];
-} __attribute__((__packed__));
+#include <acpi/acpi.h>
 
 /*
  * Logical location of iSCSI Boot Format Table.
  * If the value is NULL there is no iBFT on the machine.
  */
-extern struct ibft_table_header *ibft_addr;
+extern struct acpi_table_ibft *ibft_addr;
 
 /*
  * Routine used to find and reserve the iSCSI Boot Format Table. The
-- 
cgit v1.2.3-70-g09d2


From ba4ee30c6c797de148dcc7254cf6d531aba71d9b Mon Sep 17 00:00:00 2001
From: Mike Christie <michaelc@cs.wisc.edu>
Date: Mon, 12 Apr 2010 18:06:17 +0000
Subject: ibft: separate ibft parsing from sysfs interface

Not all iscsi drivers support ibft. For drivers like be2iscsi
that do not but are bootable through a vendor firmware specific
format/process this patch moves the sysfs interface from the ibft code
to a lib module. This then allows userspace tools to search for iscsi boot
info in a common place and in a common format.

ibft iscsi boot info is exported in the same place as it was
before: /sys/firmware/ibft.

vendor/fw boot info gets export in /sys/firmware/iscsi_bootX, where X is the
scsi host number of the HBA. Underneath these parent dirs, the
target, ethernet, and initiator dirs are the same as they were before.

Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: Konrad Rzeszutek Wilk <konrad@kernel.org>
Signed-off-by: Peter Jones <pjones@redhat.com>
---
 drivers/firmware/Kconfig            |   8 +
 drivers/firmware/Makefile           |   1 +
 drivers/firmware/iscsi_boot_sysfs.c | 481 ++++++++++++++++++++++++++++++++++++
 include/linux/iscsi_boot_sysfs.h    | 123 +++++++++
 4 files changed, 613 insertions(+)
 create mode 100644 drivers/firmware/iscsi_boot_sysfs.c
 create mode 100644 include/linux/iscsi_boot_sysfs.h

(limited to 'include')

diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig
index 1b03ba1d083..571d2182613 100644
--- a/drivers/firmware/Kconfig
+++ b/drivers/firmware/Kconfig
@@ -122,6 +122,14 @@ config ISCSI_IBFT_FIND
 	  is necessary for iSCSI Boot Firmware Table Attributes module to work
 	  properly.
 
+config ISCSI_BOOT_SYSFS
+	tristate "iSCSI Boot Sysfs Interface"
+	default	n
+	help
+	  This option enables support for exposing iSCSI boot information
+	  via sysfs to userspace. If you wish to export this information,
+	  say Y. Otherwise, say N.
+
 config ISCSI_IBFT
 	tristate "iSCSI Boot Firmware Table Attributes module"
 	depends on ISCSI_IBFT_FIND
diff --git a/drivers/firmware/Makefile b/drivers/firmware/Makefile
index 1c3c17343db..5fe7e166292 100644
--- a/drivers/firmware/Makefile
+++ b/drivers/firmware/Makefile
@@ -10,4 +10,5 @@ obj-$(CONFIG_DCDBAS)		+= dcdbas.o
 obj-$(CONFIG_DMIID)		+= dmi-id.o
 obj-$(CONFIG_ISCSI_IBFT_FIND)	+= iscsi_ibft_find.o
 obj-$(CONFIG_ISCSI_IBFT)	+= iscsi_ibft.o
+obj-$(CONFIG_ISCSI_BOOT_SYSFS)	+= iscsi_boot_sysfs.o
 obj-$(CONFIG_FIRMWARE_MEMMAP)	+= memmap.o
diff --git a/drivers/firmware/iscsi_boot_sysfs.c b/drivers/firmware/iscsi_boot_sysfs.c
new file mode 100644
index 00000000000..df6bff7366c
--- /dev/null
+++ b/drivers/firmware/iscsi_boot_sysfs.c
@@ -0,0 +1,481 @@
+/*
+ * Export the iSCSI boot info to userland via sysfs.
+ *
+ * Copyright (C) 2010 Red Hat, Inc.  All rights reserved.
+ * Copyright (C) 2010 Mike Christie
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License v2.0 as published by
+ * the Free Software Foundation
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/sysfs.h>
+#include <linux/capability.h>
+#include <linux/iscsi_boot_sysfs.h>
+
+
+MODULE_AUTHOR("Mike Christie <michaelc@cs.wisc.edu>");
+MODULE_DESCRIPTION("sysfs interface and helpers to export iSCSI boot information");
+MODULE_LICENSE("GPL");
+/*
+ * The kobject and attribute structures.
+ */
+struct iscsi_boot_attr {
+	struct attribute attr;
+	int type;
+	ssize_t (*show) (void *data, int type, char *buf);
+};
+
+/*
+ * The routine called for all sysfs attributes.
+ */
+static ssize_t iscsi_boot_show_attribute(struct kobject *kobj,
+					 struct attribute *attr, char *buf)
+{
+	struct iscsi_boot_kobj *boot_kobj =
+			container_of(kobj, struct iscsi_boot_kobj, kobj);
+	struct iscsi_boot_attr *boot_attr =
+			container_of(attr, struct iscsi_boot_attr, attr);
+	ssize_t ret = -EIO;
+	char *str = buf;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EACCES;
+
+	if (boot_kobj->show)
+		ret = boot_kobj->show(boot_kobj->data, boot_attr->type, str);
+	return ret;
+}
+
+static const struct sysfs_ops iscsi_boot_attr_ops = {
+	.show = iscsi_boot_show_attribute,
+};
+
+static void iscsi_boot_kobj_release(struct kobject *kobj)
+{
+	struct iscsi_boot_kobj *boot_kobj =
+			container_of(kobj, struct iscsi_boot_kobj, kobj);
+
+	kfree(boot_kobj->data);
+	kfree(boot_kobj);
+}
+
+static struct kobj_type iscsi_boot_ktype = {
+	.release = iscsi_boot_kobj_release,
+	.sysfs_ops = &iscsi_boot_attr_ops,
+};
+
+#define iscsi_boot_rd_attr(fnname, sysfs_name, attr_type)		\
+static struct iscsi_boot_attr iscsi_boot_attr_##fnname = {	\
+	.attr	= { .name = __stringify(sysfs_name), .mode = 0444 },	\
+	.type	= attr_type,						\
+}
+
+/* Target attrs */
+iscsi_boot_rd_attr(tgt_index, index, ISCSI_BOOT_TGT_INDEX);
+iscsi_boot_rd_attr(tgt_flags, flags, ISCSI_BOOT_TGT_FLAGS);
+iscsi_boot_rd_attr(tgt_ip, ip-addr, ISCSI_BOOT_TGT_IP_ADDR);
+iscsi_boot_rd_attr(tgt_port, port, ISCSI_BOOT_TGT_PORT);
+iscsi_boot_rd_attr(tgt_lun, lun, ISCSI_BOOT_TGT_LUN);
+iscsi_boot_rd_attr(tgt_chap, chap-type, ISCSI_BOOT_TGT_CHAP_TYPE);
+iscsi_boot_rd_attr(tgt_nic, nic-assoc, ISCSI_BOOT_TGT_NIC_ASSOC);
+iscsi_boot_rd_attr(tgt_name, target-name, ISCSI_BOOT_TGT_NAME);
+iscsi_boot_rd_attr(tgt_chap_name, chap-name, ISCSI_BOOT_TGT_CHAP_NAME);
+iscsi_boot_rd_attr(tgt_chap_secret, chap-secret, ISCSI_BOOT_TGT_CHAP_SECRET);
+iscsi_boot_rd_attr(tgt_chap_rev_name, rev-chap-name,
+		   ISCSI_BOOT_TGT_REV_CHAP_NAME);
+iscsi_boot_rd_attr(tgt_chap_rev_secret, rev-chap-name-secret,
+		   ISCSI_BOOT_TGT_REV_CHAP_SECRET);
+
+static struct attribute *target_attrs[] = {
+	&iscsi_boot_attr_tgt_index.attr,
+	&iscsi_boot_attr_tgt_flags.attr,
+	&iscsi_boot_attr_tgt_ip.attr,
+	&iscsi_boot_attr_tgt_port.attr,
+	&iscsi_boot_attr_tgt_lun.attr,
+	&iscsi_boot_attr_tgt_chap.attr,
+	&iscsi_boot_attr_tgt_nic.attr,
+	&iscsi_boot_attr_tgt_name.attr,
+	&iscsi_boot_attr_tgt_chap_name.attr,
+	&iscsi_boot_attr_tgt_chap_secret.attr,
+	&iscsi_boot_attr_tgt_chap_rev_name.attr,
+	&iscsi_boot_attr_tgt_chap_rev_secret.attr,
+	NULL
+};
+
+static mode_t iscsi_boot_tgt_attr_is_visible(struct kobject *kobj,
+					     struct attribute *attr, int i)
+{
+	struct iscsi_boot_kobj *boot_kobj =
+			container_of(kobj, struct iscsi_boot_kobj, kobj);
+
+	if (attr ==  &iscsi_boot_attr_tgt_index.attr)
+		return boot_kobj->is_visible(boot_kobj->data,
+					     ISCSI_BOOT_TGT_INDEX);
+	else if (attr == &iscsi_boot_attr_tgt_flags.attr)
+		return boot_kobj->is_visible(boot_kobj->data,
+					     ISCSI_BOOT_TGT_FLAGS);
+	else if (attr == &iscsi_boot_attr_tgt_ip.attr)
+		return boot_kobj->is_visible(boot_kobj->data,
+					      ISCSI_BOOT_TGT_IP_ADDR);
+	else if (attr == &iscsi_boot_attr_tgt_port.attr)
+		return boot_kobj->is_visible(boot_kobj->data,
+					      ISCSI_BOOT_TGT_PORT);
+	else if (attr == &iscsi_boot_attr_tgt_lun.attr)
+		return boot_kobj->is_visible(boot_kobj->data,
+					      ISCSI_BOOT_TGT_LUN);
+	else if (attr == &iscsi_boot_attr_tgt_chap.attr)
+		return boot_kobj->is_visible(boot_kobj->data,
+					     ISCSI_BOOT_TGT_CHAP_TYPE);
+	else if (attr == &iscsi_boot_attr_tgt_nic.attr)
+		return boot_kobj->is_visible(boot_kobj->data,
+					     ISCSI_BOOT_TGT_NIC_ASSOC);
+	else if (attr == &iscsi_boot_attr_tgt_name.attr)
+		return boot_kobj->is_visible(boot_kobj->data,
+					     ISCSI_BOOT_TGT_NAME);
+	else if (attr == &iscsi_boot_attr_tgt_chap_name.attr)
+		return boot_kobj->is_visible(boot_kobj->data,
+					     ISCSI_BOOT_TGT_CHAP_NAME);
+	else if (attr == &iscsi_boot_attr_tgt_chap_secret.attr)
+		return boot_kobj->is_visible(boot_kobj->data,
+					     ISCSI_BOOT_TGT_CHAP_SECRET);
+	else if (attr == &iscsi_boot_attr_tgt_chap_rev_name.attr)
+		return boot_kobj->is_visible(boot_kobj->data,
+					     ISCSI_BOOT_TGT_REV_CHAP_NAME);
+	else if (attr == &iscsi_boot_attr_tgt_chap_rev_secret.attr)
+		return boot_kobj->is_visible(boot_kobj->data,
+					     ISCSI_BOOT_TGT_REV_CHAP_SECRET);
+	return 0;
+}
+
+static struct attribute_group iscsi_boot_target_attr_group = {
+	.attrs = target_attrs,
+	.is_visible = iscsi_boot_tgt_attr_is_visible,
+};
+
+/* Ethernet attrs */
+iscsi_boot_rd_attr(eth_index, index, ISCSI_BOOT_ETH_INDEX);
+iscsi_boot_rd_attr(eth_flags, flags, ISCSI_BOOT_ETH_FLAGS);
+iscsi_boot_rd_attr(eth_ip, ip-addr, ISCSI_BOOT_ETH_IP_ADDR);
+iscsi_boot_rd_attr(eth_subnet, subnet-mask, ISCSI_BOOT_ETH_SUBNET_MASK);
+iscsi_boot_rd_attr(eth_origin, origin, ISCSI_BOOT_ETH_ORIGIN);
+iscsi_boot_rd_attr(eth_gateway, gateway, ISCSI_BOOT_ETH_GATEWAY);
+iscsi_boot_rd_attr(eth_primary_dns, primary-dns, ISCSI_BOOT_ETH_PRIMARY_DNS);
+iscsi_boot_rd_attr(eth_secondary_dns, secondary-dns,
+		   ISCSI_BOOT_ETH_SECONDARY_DNS);
+iscsi_boot_rd_attr(eth_dhcp, dhcp, ISCSI_BOOT_ETH_DHCP);
+iscsi_boot_rd_attr(eth_vlan, vlan, ISCSI_BOOT_ETH_VLAN);
+iscsi_boot_rd_attr(eth_mac, mac, ISCSI_BOOT_ETH_MAC);
+iscsi_boot_rd_attr(eth_hostname, hostname, ISCSI_BOOT_ETH_HOSTNAME);
+
+static struct attribute *ethernet_attrs[] = {
+	&iscsi_boot_attr_eth_index.attr,
+	&iscsi_boot_attr_eth_flags.attr,
+	&iscsi_boot_attr_eth_ip.attr,
+	&iscsi_boot_attr_eth_subnet.attr,
+	&iscsi_boot_attr_eth_origin.attr,
+	&iscsi_boot_attr_eth_gateway.attr,
+	&iscsi_boot_attr_eth_primary_dns.attr,
+	&iscsi_boot_attr_eth_secondary_dns.attr,
+	&iscsi_boot_attr_eth_dhcp.attr,
+	&iscsi_boot_attr_eth_vlan.attr,
+	&iscsi_boot_attr_eth_mac.attr,
+	&iscsi_boot_attr_eth_hostname.attr,
+	NULL
+};
+
+static mode_t iscsi_boot_eth_attr_is_visible(struct kobject *kobj,
+					     struct attribute *attr, int i)
+{
+	struct iscsi_boot_kobj *boot_kobj =
+			container_of(kobj, struct iscsi_boot_kobj, kobj);
+
+	if (attr ==  &iscsi_boot_attr_eth_index.attr)
+		return boot_kobj->is_visible(boot_kobj->data,
+					     ISCSI_BOOT_ETH_INDEX);
+	else if (attr ==  &iscsi_boot_attr_eth_flags.attr)
+		return boot_kobj->is_visible(boot_kobj->data,
+					     ISCSI_BOOT_ETH_FLAGS);
+	else if (attr ==  &iscsi_boot_attr_eth_ip.attr)
+		return boot_kobj->is_visible(boot_kobj->data,
+					     ISCSI_BOOT_ETH_IP_ADDR);
+	else if (attr ==  &iscsi_boot_attr_eth_subnet.attr)
+		return boot_kobj->is_visible(boot_kobj->data,
+					     ISCSI_BOOT_ETH_SUBNET_MASK);
+	else if (attr ==  &iscsi_boot_attr_eth_origin.attr)
+		return boot_kobj->is_visible(boot_kobj->data,
+					     ISCSI_BOOT_ETH_ORIGIN);
+	else if (attr ==  &iscsi_boot_attr_eth_gateway.attr)
+		return boot_kobj->is_visible(boot_kobj->data,
+					     ISCSI_BOOT_ETH_GATEWAY);
+	else if (attr ==  &iscsi_boot_attr_eth_primary_dns.attr)
+		return boot_kobj->is_visible(boot_kobj->data,
+					     ISCSI_BOOT_ETH_PRIMARY_DNS);
+	else if (attr ==  &iscsi_boot_attr_eth_secondary_dns.attr)
+		return boot_kobj->is_visible(boot_kobj->data,
+					     ISCSI_BOOT_ETH_SECONDARY_DNS);
+	else if (attr ==  &iscsi_boot_attr_eth_dhcp.attr)
+		return boot_kobj->is_visible(boot_kobj->data,
+					     ISCSI_BOOT_ETH_DHCP);
+	else if (attr ==  &iscsi_boot_attr_eth_vlan.attr)
+		return boot_kobj->is_visible(boot_kobj->data,
+					     ISCSI_BOOT_ETH_VLAN);
+	else if (attr ==  &iscsi_boot_attr_eth_mac.attr)
+		return boot_kobj->is_visible(boot_kobj->data,
+					     ISCSI_BOOT_ETH_MAC);
+	else if (attr ==  &iscsi_boot_attr_eth_hostname.attr)
+		return boot_kobj->is_visible(boot_kobj->data,
+					     ISCSI_BOOT_ETH_HOSTNAME);
+	return 0;
+}
+
+static struct attribute_group iscsi_boot_ethernet_attr_group = {
+	.attrs = ethernet_attrs,
+	.is_visible = iscsi_boot_eth_attr_is_visible,
+};
+
+/* Initiator attrs */
+iscsi_boot_rd_attr(ini_index, index, ISCSI_BOOT_INI_INDEX);
+iscsi_boot_rd_attr(ini_flags, flags, ISCSI_BOOT_INI_FLAGS);
+iscsi_boot_rd_attr(ini_isns, isns-server, ISCSI_BOOT_INI_ISNS_SERVER);
+iscsi_boot_rd_attr(ini_slp, slp-server, ISCSI_BOOT_INI_SLP_SERVER);
+iscsi_boot_rd_attr(ini_primary_radius, pri-radius-server,
+		   ISCSI_BOOT_INI_PRI_RADIUS_SERVER);
+iscsi_boot_rd_attr(ini_secondary_radius, sec-radius-server,
+		   ISCSI_BOOT_INI_SEC_RADIUS_SERVER);
+iscsi_boot_rd_attr(ini_name, initiator-name, ISCSI_BOOT_INI_INITIATOR_NAME);
+
+static struct attribute *initiator_attrs[] = {
+	&iscsi_boot_attr_ini_index.attr,
+	&iscsi_boot_attr_ini_flags.attr,
+	&iscsi_boot_attr_ini_isns.attr,
+	&iscsi_boot_attr_ini_slp.attr,
+	&iscsi_boot_attr_ini_primary_radius.attr,
+	&iscsi_boot_attr_ini_secondary_radius.attr,
+	&iscsi_boot_attr_ini_name.attr,
+	NULL
+};
+
+static mode_t iscsi_boot_ini_attr_is_visible(struct kobject *kobj,
+					     struct attribute *attr, int i)
+{
+	struct iscsi_boot_kobj *boot_kobj =
+			container_of(kobj, struct iscsi_boot_kobj, kobj);
+
+	if (attr ==  &iscsi_boot_attr_ini_index.attr)
+		return boot_kobj->is_visible(boot_kobj->data,
+					     ISCSI_BOOT_INI_INDEX);
+	if (attr ==  &iscsi_boot_attr_ini_flags.attr)
+		return boot_kobj->is_visible(boot_kobj->data,
+					     ISCSI_BOOT_INI_FLAGS);
+	if (attr ==  &iscsi_boot_attr_ini_isns.attr)
+		return boot_kobj->is_visible(boot_kobj->data,
+					     ISCSI_BOOT_INI_ISNS_SERVER);
+	if (attr ==  &iscsi_boot_attr_ini_slp.attr)
+		return boot_kobj->is_visible(boot_kobj->data,
+					     ISCSI_BOOT_INI_SLP_SERVER);
+	if (attr ==  &iscsi_boot_attr_ini_primary_radius.attr)
+		return boot_kobj->is_visible(boot_kobj->data,
+					     ISCSI_BOOT_INI_PRI_RADIUS_SERVER);
+	if (attr ==  &iscsi_boot_attr_ini_secondary_radius.attr)
+		return boot_kobj->is_visible(boot_kobj->data,
+					     ISCSI_BOOT_INI_SEC_RADIUS_SERVER);
+	if (attr ==  &iscsi_boot_attr_ini_name.attr)
+		return boot_kobj->is_visible(boot_kobj->data,
+					     ISCSI_BOOT_INI_INITIATOR_NAME);
+
+	return 0;
+}
+
+static struct attribute_group iscsi_boot_initiator_attr_group = {
+	.attrs = initiator_attrs,
+	.is_visible = iscsi_boot_ini_attr_is_visible,
+};
+
+static struct iscsi_boot_kobj *
+iscsi_boot_create_kobj(struct iscsi_boot_kset *boot_kset,
+		       struct attribute_group *attr_group,
+		       const char *name, int index, void *data,
+		       ssize_t (*show) (void *data, int type, char *buf),
+		       mode_t (*is_visible) (void *data, int type))
+{
+	struct iscsi_boot_kobj *boot_kobj;
+
+	boot_kobj = kzalloc(sizeof(*boot_kobj), GFP_KERNEL);
+	if (!boot_kobj)
+		return NULL;
+	INIT_LIST_HEAD(&boot_kobj->list);
+
+	boot_kobj->kobj.kset = boot_kset->kset;
+	if (kobject_init_and_add(&boot_kobj->kobj, &iscsi_boot_ktype,
+				 NULL, name, index)) {
+		kfree(boot_kobj);
+		return NULL;
+	}
+	boot_kobj->data = data;
+	boot_kobj->show = show;
+	boot_kobj->is_visible = is_visible;
+
+	if (sysfs_create_group(&boot_kobj->kobj, attr_group)) {
+		/*
+		 * We do not want to free this because the caller
+		 * will assume that since the creation call failed
+		 * the boot kobj was not setup and the normal release
+		 * path is not being run.
+		 */
+		boot_kobj->data = NULL;
+		kobject_put(&boot_kobj->kobj);
+		return NULL;
+	}
+	boot_kobj->attr_group = attr_group;
+
+	kobject_uevent(&boot_kobj->kobj, KOBJ_ADD);
+	/* Nothing broke so lets add it to the list. */
+	list_add_tail(&boot_kobj->list, &boot_kset->kobj_list);
+	return boot_kobj;
+}
+
+static void iscsi_boot_remove_kobj(struct iscsi_boot_kobj *boot_kobj)
+{
+	list_del(&boot_kobj->list);
+	sysfs_remove_group(&boot_kobj->kobj, boot_kobj->attr_group);
+	kobject_put(&boot_kobj->kobj);
+}
+
+/**
+ * iscsi_boot_create_target() - create boot target sysfs dir
+ * @boot_kset: boot kset
+ * @index: the target id
+ * @data: driver specific data for target
+ * @show: attr show function
+ * @is_visible: attr visibility function
+ *
+ * Note: The boot sysfs lib will free the data passed in for the caller
+ * when all refs to the target kobject have been released.
+ */
+struct iscsi_boot_kobj *
+iscsi_boot_create_target(struct iscsi_boot_kset *boot_kset, int index,
+			 void *data,
+			 ssize_t (*show) (void *data, int type, char *buf),
+			 mode_t (*is_visible) (void *data, int type))
+{
+	return iscsi_boot_create_kobj(boot_kset, &iscsi_boot_target_attr_group,
+				      "target%d", index, data, show, is_visible);
+}
+EXPORT_SYMBOL_GPL(iscsi_boot_create_target);
+
+/**
+ * iscsi_boot_create_initiator() - create boot initiator sysfs dir
+ * @boot_kset: boot kset
+ * @index: the initiator id
+ * @data: driver specific data
+ * @show: attr show function
+ * @is_visible: attr visibility function
+ *
+ * Note: The boot sysfs lib will free the data passed in for the caller
+ * when all refs to the initiator kobject have been released.
+ */
+struct iscsi_boot_kobj *
+iscsi_boot_create_initiator(struct iscsi_boot_kset *boot_kset, int index,
+			    void *data,
+			    ssize_t (*show) (void *data, int type, char *buf),
+			    mode_t (*is_visible) (void *data, int type))
+{
+	return iscsi_boot_create_kobj(boot_kset,
+				      &iscsi_boot_initiator_attr_group,
+				      "initiator", index, data, show,
+				      is_visible);
+}
+EXPORT_SYMBOL_GPL(iscsi_boot_create_initiator);
+
+/**
+ * iscsi_boot_create_ethernet() - create boot ethernet sysfs dir
+ * @boot_kset: boot kset
+ * @index: the ethernet device id
+ * @data: driver specific data
+ * @show: attr show function
+ * @is_visible: attr visibility function
+ *
+ * Note: The boot sysfs lib will free the data passed in for the caller
+ * when all refs to the ethernet kobject have been released.
+ */
+struct iscsi_boot_kobj *
+iscsi_boot_create_ethernet(struct iscsi_boot_kset *boot_kset, int index,
+			   void *data,
+			   ssize_t (*show) (void *data, int type, char *buf),
+			   mode_t (*is_visible) (void *data, int type))
+{
+	return iscsi_boot_create_kobj(boot_kset,
+				      &iscsi_boot_ethernet_attr_group,
+				      "ethernet%d", index, data, show,
+				      is_visible);
+}
+EXPORT_SYMBOL_GPL(iscsi_boot_create_ethernet);
+
+/**
+ * iscsi_boot_create_kset() - creates root sysfs tree
+ * @set_name: name of root dir
+ */
+struct iscsi_boot_kset *iscsi_boot_create_kset(const char *set_name)
+{
+	struct iscsi_boot_kset *boot_kset;
+
+	boot_kset = kzalloc(sizeof(*boot_kset), GFP_KERNEL);
+	if (!boot_kset)
+		return NULL;
+
+	boot_kset->kset = kset_create_and_add(set_name, NULL, firmware_kobj);
+	if (!boot_kset->kset) {
+		kfree(boot_kset);
+		return NULL;
+	}
+
+	INIT_LIST_HEAD(&boot_kset->kobj_list);
+	return boot_kset;
+}
+EXPORT_SYMBOL_GPL(iscsi_boot_create_kset);
+
+/**
+ * iscsi_boot_create_host_kset() - creates root sysfs tree for a scsi host
+ * @hostno: host number of scsi host
+ */
+struct iscsi_boot_kset *iscsi_boot_create_host_kset(unsigned int hostno)
+{
+	struct iscsi_boot_kset *boot_kset;
+	char *set_name;
+
+	set_name = kasprintf(GFP_KERNEL, "iscsi_boot%u", hostno);
+	if (!set_name)
+		return NULL;
+
+	boot_kset = iscsi_boot_create_kset(set_name);
+	kfree(set_name);
+	return boot_kset;
+}
+EXPORT_SYMBOL_GPL(iscsi_boot_create_host_kset);
+
+/**
+ * iscsi_boot_destroy_kset() - destroy kset and kobjects under it
+ * @boot_kset: boot kset
+ *
+ * This will remove the kset and kobjects and attrs under it.
+ */
+void iscsi_boot_destroy_kset(struct iscsi_boot_kset *boot_kset)
+{
+	struct iscsi_boot_kobj *boot_kobj, *tmp_kobj;
+
+	list_for_each_entry_safe(boot_kobj, tmp_kobj,
+				 &boot_kset->kobj_list, list)
+		iscsi_boot_remove_kobj(boot_kobj);
+
+	kset_unregister(boot_kset->kset);
+}
+EXPORT_SYMBOL_GPL(iscsi_boot_destroy_kset);
diff --git a/include/linux/iscsi_boot_sysfs.h b/include/linux/iscsi_boot_sysfs.h
new file mode 100644
index 00000000000..f1e6c184f14
--- /dev/null
+++ b/include/linux/iscsi_boot_sysfs.h
@@ -0,0 +1,123 @@
+/*
+ * Export the iSCSI boot info to userland via sysfs.
+ *
+ * Copyright (C) 2010 Red Hat, Inc.  All rights reserved.
+ * Copyright (C) 2010 Mike Christie
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License v2.0 as published by
+ * the Free Software Foundation
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+#ifndef _ISCSI_BOOT_SYSFS_
+#define _ISCSI_BOOT_SYSFS_
+
+/*
+ * The text attributes names for each of the kobjects.
+*/
+enum iscsi_boot_eth_properties_enum {
+	ISCSI_BOOT_ETH_INDEX,
+	ISCSI_BOOT_ETH_FLAGS,
+	ISCSI_BOOT_ETH_IP_ADDR,
+	ISCSI_BOOT_ETH_SUBNET_MASK,
+	ISCSI_BOOT_ETH_ORIGIN,
+	ISCSI_BOOT_ETH_GATEWAY,
+	ISCSI_BOOT_ETH_PRIMARY_DNS,
+	ISCSI_BOOT_ETH_SECONDARY_DNS,
+	ISCSI_BOOT_ETH_DHCP,
+	ISCSI_BOOT_ETH_VLAN,
+	ISCSI_BOOT_ETH_MAC,
+	/* eth_pci_bdf - this is replaced by link to the device itself. */
+	ISCSI_BOOT_ETH_HOSTNAME,
+	ISCSI_BOOT_ETH_END_MARKER,
+};
+
+enum iscsi_boot_tgt_properties_enum {
+	ISCSI_BOOT_TGT_INDEX,
+	ISCSI_BOOT_TGT_FLAGS,
+	ISCSI_BOOT_TGT_IP_ADDR,
+	ISCSI_BOOT_TGT_PORT,
+	ISCSI_BOOT_TGT_LUN,
+	ISCSI_BOOT_TGT_CHAP_TYPE,
+	ISCSI_BOOT_TGT_NIC_ASSOC,
+	ISCSI_BOOT_TGT_NAME,
+	ISCSI_BOOT_TGT_CHAP_NAME,
+	ISCSI_BOOT_TGT_CHAP_SECRET,
+	ISCSI_BOOT_TGT_REV_CHAP_NAME,
+	ISCSI_BOOT_TGT_REV_CHAP_SECRET,
+	ISCSI_BOOT_TGT_END_MARKER,
+};
+
+enum iscsi_boot_initiator_properties_enum {
+	ISCSI_BOOT_INI_INDEX,
+	ISCSI_BOOT_INI_FLAGS,
+	ISCSI_BOOT_INI_ISNS_SERVER,
+	ISCSI_BOOT_INI_SLP_SERVER,
+	ISCSI_BOOT_INI_PRI_RADIUS_SERVER,
+	ISCSI_BOOT_INI_SEC_RADIUS_SERVER,
+	ISCSI_BOOT_INI_INITIATOR_NAME,
+	ISCSI_BOOT_INI_END_MARKER,
+};
+
+struct attribute_group;
+
+struct iscsi_boot_kobj {
+	struct kobject kobj;
+	struct attribute_group *attr_group;
+	struct list_head list;
+
+	/*
+	 * Pointer to store driver specific info. If set this will
+	 * be freed for the LLD when the kobj release function is called.
+	 */
+	void *data;
+	/*
+	 * Driver specific show function.
+	 *
+	 * The enum of the type. This can be any value of the above
+	 * properties.
+	 */
+	ssize_t (*show) (void *data, int type, char *buf);
+
+	/*
+	 * Drivers specific visibility function.
+	 * The function should return if they the attr should be readable
+	 * writable or should not be shown.
+	 *
+	 * The enum of the type. This can be any value of the above
+	 * properties.
+	 */
+	mode_t (*is_visible) (void *data, int type);
+};
+
+struct iscsi_boot_kset {
+	struct list_head kobj_list;
+	struct kset *kset;
+};
+
+struct iscsi_boot_kobj *
+iscsi_boot_create_initiator(struct iscsi_boot_kset *boot_kset, int index,
+			    void *data,
+			    ssize_t (*show) (void *data, int type, char *buf),
+			    mode_t (*is_visible) (void *data, int type));
+
+struct iscsi_boot_kobj *
+iscsi_boot_create_ethernet(struct iscsi_boot_kset *boot_kset, int index,
+			   void *data,
+			   ssize_t (*show) (void *data, int type, char *buf),
+			   mode_t (*is_visible) (void *data, int type));
+struct iscsi_boot_kobj *
+iscsi_boot_create_target(struct iscsi_boot_kset *boot_kset, int index,
+			 void *data,
+			 ssize_t (*show) (void *data, int type, char *buf),
+			 mode_t (*is_visible) (void *data, int type));
+
+struct iscsi_boot_kset *iscsi_boot_create_kset(const char *set_name);
+struct iscsi_boot_kset *iscsi_boot_create_host_kset(unsigned int hostno);
+void iscsi_boot_destroy_kset(struct iscsi_boot_kset *boot_kset);
+
+#endif
-- 
cgit v1.2.3-70-g09d2


From 58687acba59266735adb8ccd9b5b9aa2c7cd205b Mon Sep 17 00:00:00 2001
From: Don Zickus <dzickus@redhat.com>
Date: Fri, 7 May 2010 17:11:44 -0400
Subject: lockup_detector: Combine nmi_watchdog and softlockup detector

The new nmi_watchdog (which uses the perf event subsystem) is very
similar in structure to the softlockup detector.  Using Ingo's
suggestion, I combined the two functionalities into one file:
kernel/watchdog.c.

Now both the nmi_watchdog (or hardlockup detector) and softlockup
detector sit on top of the perf event subsystem, which is run every
60 seconds or so to see if there are any lockups.

To detect hardlockups, cpus not responding to interrupts, I
implemented an hrtimer that runs 5 times for every perf event
overflow event.  If that stops counting on a cpu, then the cpu is
most likely in trouble.

To detect softlockups, tasks not yielding to the scheduler, I used the
previous kthread idea that now gets kicked every time the hrtimer fires.
If the kthread isn't being scheduled neither is anyone else and the
warning is printed to the console.

I tested this on x86_64 and both the softlockup and hardlockup paths
work.

V2:
- cleaned up the Kconfig and softlockup combination
- surrounded hardlockup cases with #ifdef CONFIG_PERF_EVENTS_NMI
- seperated out the softlockup case from perf event subsystem
- re-arranged the enabling/disabling nmi watchdog from proc space
- added cpumasks for hardlockup failure cases
- removed fallback to soft events if no PMU exists for hard events

V3:
- comment cleanups
- drop support for older softlockup code
- per_cpu cleanups
- completely remove software clock base hardlockup detector
- use per_cpu masking on hard/soft lockup detection
- #ifdef cleanups
- rename config option NMI_WATCHDOG to LOCKUP_DETECTOR
- documentation additions

V4:
- documentation fixes
- convert per_cpu to __get_cpu_var
- powerpc compile fixes

V5:
- split apart warn flags for hard and soft lockups

TODO:
- figure out how to make an arch-agnostic clock2cycles call
  (if possible) to feed into perf events as a sample period

[fweisbec: merged conflict patch]

Signed-off-by: Don Zickus <dzickus@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Cyrill Gorcunov <gorcunov@gmail.com>
Cc: Eric Paris <eparis@redhat.com>
Cc: Randy Dunlap <randy.dunlap@oracle.com>
LKML-Reference: <1273266711-18706-2-git-send-email-dzickus@redhat.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 Documentation/kernel-parameters.txt |   2 +
 arch/x86/include/asm/nmi.h          |   2 +-
 arch/x86/kernel/apic/Makefile       |   4 +-
 arch/x86/kernel/apic/hw_nmi.c       |   2 +-
 arch/x86/kernel/traps.c             |   4 +-
 include/linux/nmi.h                 |   8 +-
 include/linux/sched.h               |   6 +
 init/Kconfig                        |   5 +-
 kernel/Makefile                     |   3 +-
 kernel/sysctl.c                     |  21 +-
 kernel/watchdog.c                   | 592 ++++++++++++++++++++++++++++++++++++
 lib/Kconfig.debug                   |  30 +-
 12 files changed, 650 insertions(+), 29 deletions(-)
 create mode 100644 kernel/watchdog.c

(limited to 'include')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 839b21b0699..dfe8d1c226c 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1777,6 +1777,8 @@ and is between 256 and 4096 characters. It is defined in the file
 
 	nousb		[USB] Disable the USB subsystem
 
+	nowatchdog	[KNL] Disable the lockup detector.
+
 	nowb		[ARM]
 
 	nox2apic	[X86-64,APIC] Do not enable x2APIC mode.
diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h
index 5b41b0feb6d..932f0f86b4b 100644
--- a/arch/x86/include/asm/nmi.h
+++ b/arch/x86/include/asm/nmi.h
@@ -17,7 +17,7 @@ int do_nmi_callback(struct pt_regs *regs, int cpu);
 
 extern void die_nmi(char *str, struct pt_regs *regs, int do_panic);
 extern int check_nmi_watchdog(void);
-#if !defined(CONFIG_NMI_WATCHDOG)
+#if !defined(CONFIG_LOCKUP_DETECTOR)
 extern int nmi_watchdog_enabled;
 #endif
 extern int avail_to_resrv_perfctr_nmi_bit(unsigned int);
diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile
index 1a4512e48d2..52f32e0ea19 100644
--- a/arch/x86/kernel/apic/Makefile
+++ b/arch/x86/kernel/apic/Makefile
@@ -3,10 +3,10 @@
 #
 
 obj-$(CONFIG_X86_LOCAL_APIC)	+= apic.o apic_noop.o probe_$(BITS).o ipi.o
-ifneq ($(CONFIG_NMI_WATCHDOG),y)
+ifneq ($(CONFIG_LOCKUP_DETECTOR),y)
 obj-$(CONFIG_X86_LOCAL_APIC)	+= nmi.o
 endif
-obj-$(CONFIG_NMI_WATCHDOG)	+= hw_nmi.o
+obj-$(CONFIG_LOCKUP_DETECTOR)	+= hw_nmi.o
 
 obj-$(CONFIG_X86_IO_APIC)	+= io_apic.o
 obj-$(CONFIG_SMP)		+= ipi.o
diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c
index e8b78a0be5d..79425f96fce 100644
--- a/arch/x86/kernel/apic/hw_nmi.c
+++ b/arch/x86/kernel/apic/hw_nmi.c
@@ -89,7 +89,7 @@ int hw_nmi_is_cpu_stuck(struct pt_regs *regs)
 
 u64 hw_nmi_get_sample_period(void)
 {
-	return cpu_khz * 1000;
+	return (u64)(cpu_khz) * 1000 * 60;
 }
 
 #ifdef ARCH_HAS_NMI_WATCHDOG
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index bdc7fab3ef3..bd347c2b34d 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -406,7 +406,7 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
 							== NOTIFY_STOP)
 			return;
 
-#ifndef CONFIG_NMI_WATCHDOG
+#ifndef CONFIG_LOCKUP_DETECTOR
 		/*
 		 * Ok, so this is none of the documented NMI sources,
 		 * so it must be the NMI watchdog.
@@ -414,7 +414,7 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
 		if (nmi_watchdog_tick(regs, reason))
 			return;
 		if (!do_nmi_callback(regs, cpu))
-#endif /* !CONFIG_NMI_WATCHDOG */
+#endif /* !CONFIG_LOCKUP_DETECTOR */
 			unknown_nmi_error(reason, regs);
 #else
 		unknown_nmi_error(reason, regs);
diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index 22cc7960b64..abd48aacaf7 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -20,7 +20,7 @@ extern void touch_nmi_watchdog(void);
 extern void acpi_nmi_disable(void);
 extern void acpi_nmi_enable(void);
 #else
-#ifndef CONFIG_NMI_WATCHDOG
+#ifndef CONFIG_LOCKUP_DETECTOR
 static inline void touch_nmi_watchdog(void)
 {
 	touch_softlockup_watchdog();
@@ -51,12 +51,12 @@ static inline bool trigger_all_cpu_backtrace(void)
 }
 #endif
 
-#ifdef CONFIG_NMI_WATCHDOG
+#ifdef CONFIG_LOCKUP_DETECTOR
 int hw_nmi_is_cpu_stuck(struct pt_regs *);
 u64 hw_nmi_get_sample_period(void);
-extern int nmi_watchdog_enabled;
+extern int watchdog_enabled;
 struct ctl_table;
-extern int proc_nmi_enabled(struct ctl_table *, int ,
+extern int proc_dowatchdog_enabled(struct ctl_table *, int ,
 			void __user *, size_t *, loff_t *);
 #endif
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index dad7f668ebf..37efe8fa530 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -346,6 +346,12 @@ extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
 					 size_t *lenp, loff_t *ppos);
 #endif
 
+#ifdef CONFIG_LOCKUP_DETECTOR
+extern int proc_dowatchdog_thresh(struct ctl_table *table, int write,
+				  void __user *buffer,
+				  size_t *lenp, loff_t *ppos);
+#endif
+
 /* Attach to any functions which should be ignored in wchan output. */
 #define __sched		__attribute__((__section__(".sched.text")))
 
diff --git a/init/Kconfig b/init/Kconfig
index c6c8903cb53..e44e25422f2 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -944,8 +944,11 @@ config PERF_USE_VMALLOC
 
 config PERF_EVENTS_NMI
 	bool
+	depends on PERF_EVENTS
 	help
-	  Arch has support for nmi_watchdog
+	  System hardware can generate an NMI using the perf event
+	  subsystem.  Also has support for calculating CPU cycle events
+	  to determine how many clock cycles in a given period.
 
 menu "Kernel Performance Events And Counters"
 
diff --git a/kernel/Makefile b/kernel/Makefile
index d5c30060ac1..6adeafc3e25 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -76,9 +76,8 @@ obj-$(CONFIG_GCOV_KERNEL) += gcov/
 obj-$(CONFIG_AUDIT_TREE) += audit_tree.o
 obj-$(CONFIG_KPROBES) += kprobes.o
 obj-$(CONFIG_KGDB) += kgdb.o
-obj-$(CONFIG_DETECT_SOFTLOCKUP) += softlockup.o
-obj-$(CONFIG_NMI_WATCHDOG) += nmi_watchdog.o
 obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o
+obj-$(CONFIG_LOCKUP_DETECTOR) += watchdog.o
 obj-$(CONFIG_GENERIC_HARDIRQS) += irq/
 obj-$(CONFIG_SECCOMP) += seccomp.o
 obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index a38af430f0d..0f9adda85f9 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -74,7 +74,7 @@
 #include <scsi/sg.h>
 #endif
 
-#ifdef CONFIG_NMI_WATCHDOG
+#ifdef CONFIG_LOCKUP_DETECTOR
 #include <linux/nmi.h>
 #endif
 
@@ -686,16 +686,25 @@ static struct ctl_table kern_table[] = {
 		.mode		= 0444,
 		.proc_handler	= proc_dointvec,
 	},
-#if defined(CONFIG_NMI_WATCHDOG)
+#if defined(CONFIG_LOCKUP_DETECTOR)
 	{
-		.procname       = "nmi_watchdog",
-		.data           = &nmi_watchdog_enabled,
+		.procname       = "watchdog",
+		.data           = &watchdog_enabled,
 		.maxlen         = sizeof (int),
 		.mode           = 0644,
-		.proc_handler   = proc_nmi_enabled,
+		.proc_handler   = proc_dowatchdog_enabled,
+	},
+	{
+		.procname	= "watchdog_thresh",
+		.data		= &softlockup_thresh,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dowatchdog_thresh,
+		.extra1		= &neg_one,
+		.extra2		= &sixty,
 	},
 #endif
-#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) && !defined(CONFIG_NMI_WATCHDOG)
+#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) && !defined(CONFIG_LOCKUP_DETECTOR)
 	{
 		.procname       = "unknown_nmi_panic",
 		.data           = &unknown_nmi_panic,
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
new file mode 100644
index 00000000000..6b7fad8497a
--- /dev/null
+++ b/kernel/watchdog.c
@@ -0,0 +1,592 @@
+/*
+ * Detect hard and soft lockups on a system
+ *
+ * started by Don Zickus, Copyright (C) 2010 Red Hat, Inc.
+ *
+ * this code detects hard lockups: incidents in where on a CPU
+ * the kernel does not respond to anything except NMI.
+ *
+ * Note: Most of this code is borrowed heavily from softlockup.c,
+ * so thanks to Ingo for the initial implementation.
+ * Some chunks also taken from arch/x86/kernel/apic/nmi.c, thanks
+ * to those contributors as well.
+ */
+
+#include <linux/mm.h>
+#include <linux/cpu.h>
+#include <linux/nmi.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/freezer.h>
+#include <linux/kthread.h>
+#include <linux/lockdep.h>
+#include <linux/notifier.h>
+#include <linux/module.h>
+#include <linux/sysctl.h>
+
+#include <asm/irq_regs.h>
+#include <linux/perf_event.h>
+
+int watchdog_enabled;
+int __read_mostly softlockup_thresh = 60;
+
+static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts);
+static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog);
+static DEFINE_PER_CPU(struct hrtimer, watchdog_hrtimer);
+static DEFINE_PER_CPU(bool, softlockup_touch_sync);
+static DEFINE_PER_CPU(bool, hard_watchdog_warn);
+static DEFINE_PER_CPU(bool, soft_watchdog_warn);
+#ifdef CONFIG_PERF_EVENTS_NMI
+static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts);
+static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved);
+static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
+#endif
+
+static int __read_mostly did_panic;
+static int __initdata no_watchdog;
+
+
+/* boot commands */
+/*
+ * Should we panic when a soft-lockup or hard-lockup occurs:
+ */
+#ifdef CONFIG_PERF_EVENTS_NMI
+static int hardlockup_panic;
+
+static int __init hardlockup_panic_setup(char *str)
+{
+	if (!strncmp(str, "panic", 5))
+		hardlockup_panic = 1;
+	return 1;
+}
+__setup("nmi_watchdog=", hardlockup_panic_setup);
+#endif
+
+unsigned int __read_mostly softlockup_panic =
+			CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE;
+
+static int __init softlockup_panic_setup(char *str)
+{
+	softlockup_panic = simple_strtoul(str, NULL, 0);
+
+	return 1;
+}
+__setup("softlockup_panic=", softlockup_panic_setup);
+
+static int __init nowatchdog_setup(char *str)
+{
+	no_watchdog = 1;
+	return 1;
+}
+__setup("nowatchdog", nowatchdog_setup);
+
+/* deprecated */
+static int __init nosoftlockup_setup(char *str)
+{
+	no_watchdog = 1;
+	return 1;
+}
+__setup("nosoftlockup", nosoftlockup_setup);
+/*  */
+
+
+/*
+ * Returns seconds, approximately.  We don't need nanosecond
+ * resolution, and we don't need to waste time with a big divide when
+ * 2^30ns == 1.074s.
+ */
+static unsigned long get_timestamp(int this_cpu)
+{
+	return cpu_clock(this_cpu) >> 30LL;  /* 2^30 ~= 10^9 */
+}
+
+static unsigned long get_sample_period(void)
+{
+	/*
+	 * convert softlockup_thresh from seconds to ns
+	 * the divide by 5 is to give hrtimer 5 chances to
+	 * increment before the hardlockup detector generates
+	 * a warning
+	 */
+	return softlockup_thresh / 5 * NSEC_PER_SEC;
+}
+
+/* Commands for resetting the watchdog */
+static void __touch_watchdog(void)
+{
+	int this_cpu = raw_smp_processor_id();
+
+	__get_cpu_var(watchdog_touch_ts) = get_timestamp(this_cpu);
+}
+
+void touch_watchdog(void)
+{
+	__get_cpu_var(watchdog_touch_ts) = 0;
+}
+EXPORT_SYMBOL(touch_watchdog);
+
+void touch_all_watchdog(void)
+{
+	int cpu;
+
+	/*
+	 * this is done lockless
+	 * do we care if a 0 races with a timestamp?
+	 * all it means is the softlock check starts one cycle later
+	 */
+	for_each_online_cpu(cpu)
+		per_cpu(watchdog_touch_ts, cpu) = 0;
+}
+
+void touch_nmi_watchdog(void)
+{
+	touch_watchdog();
+}
+EXPORT_SYMBOL(touch_nmi_watchdog);
+
+void touch_all_nmi_watchdog(void)
+{
+	touch_all_watchdog();
+}
+
+void touch_softlockup_watchdog(void)
+{
+	touch_watchdog();
+}
+
+void touch_all_softlockup_watchdogs(void)
+{
+	touch_all_watchdog();
+}
+
+void touch_softlockup_watchdog_sync(void)
+{
+	__raw_get_cpu_var(softlockup_touch_sync) = true;
+	__raw_get_cpu_var(watchdog_touch_ts) = 0;
+}
+
+void softlockup_tick(void)
+{
+}
+
+#ifdef CONFIG_PERF_EVENTS_NMI
+/* watchdog detector functions */
+static int is_hardlockup(int cpu)
+{
+	unsigned long hrint = per_cpu(hrtimer_interrupts, cpu);
+
+	if (per_cpu(hrtimer_interrupts_saved, cpu) == hrint)
+		return 1;
+
+	per_cpu(hrtimer_interrupts_saved, cpu) = hrint;
+	return 0;
+}
+#endif
+
+static int is_softlockup(unsigned long touch_ts, int cpu)
+{
+	unsigned long now = get_timestamp(cpu);
+
+	/* Warn about unreasonable delays: */
+	if (time_after(now, touch_ts + softlockup_thresh))
+		return now - touch_ts;
+
+	return 0;
+}
+
+static int
+watchdog_panic(struct notifier_block *this, unsigned long event, void *ptr)
+{
+	did_panic = 1;
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block panic_block = {
+	.notifier_call = watchdog_panic,
+};
+
+#ifdef CONFIG_PERF_EVENTS_NMI
+static struct perf_event_attr wd_hw_attr = {
+	.type		= PERF_TYPE_HARDWARE,
+	.config		= PERF_COUNT_HW_CPU_CYCLES,
+	.size		= sizeof(struct perf_event_attr),
+	.pinned		= 1,
+	.disabled	= 1,
+};
+
+/* Callback function for perf event subsystem */
+void watchdog_overflow_callback(struct perf_event *event, int nmi,
+		 struct perf_sample_data *data,
+		 struct pt_regs *regs)
+{
+	int this_cpu = smp_processor_id();
+	unsigned long touch_ts = per_cpu(watchdog_touch_ts, this_cpu);
+
+	if (touch_ts == 0) {
+		__touch_watchdog();
+		return;
+	}
+
+	/* check for a hardlockup
+	 * This is done by making sure our timer interrupt
+	 * is incrementing.  The timer interrupt should have
+	 * fired multiple times before we overflow'd.  If it hasn't
+	 * then this is a good indication the cpu is stuck
+	 */
+	if (is_hardlockup(this_cpu)) {
+		/* only print hardlockups once */
+		if (__get_cpu_var(hard_watchdog_warn) == true)
+			return;
+
+		if (hardlockup_panic)
+			panic("Watchdog detected hard LOCKUP on cpu %d", this_cpu);
+		else
+			WARN(1, "Watchdog detected hard LOCKUP on cpu %d", this_cpu);
+
+		__get_cpu_var(hard_watchdog_warn) = true;
+		return;
+	}
+
+	__get_cpu_var(hard_watchdog_warn) = false;
+	return;
+}
+static void watchdog_interrupt_count(void)
+{
+	__get_cpu_var(hrtimer_interrupts)++;
+}
+#else
+static inline void watchdog_interrupt_count(void) { return; }
+#endif /* CONFIG_PERF_EVENTS_NMI */
+
+/* watchdog kicker functions */
+static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
+{
+	int this_cpu = smp_processor_id();
+	unsigned long touch_ts = __get_cpu_var(watchdog_touch_ts);
+	struct pt_regs *regs = get_irq_regs();
+	int duration;
+
+	/* kick the hardlockup detector */
+	watchdog_interrupt_count();
+
+	/* kick the softlockup detector */
+	wake_up_process(__get_cpu_var(softlockup_watchdog));
+
+	/* .. and repeat */
+	hrtimer_forward_now(hrtimer, ns_to_ktime(get_sample_period()));
+
+	if (touch_ts == 0) {
+		if (unlikely(per_cpu(softlockup_touch_sync, this_cpu))) {
+			/*
+			 * If the time stamp was touched atomically
+			 * make sure the scheduler tick is up to date.
+			 */
+			per_cpu(softlockup_touch_sync, this_cpu) = false;
+			sched_clock_tick();
+		}
+		__touch_watchdog();
+		return HRTIMER_RESTART;
+	}
+
+	/* check for a softlockup
+	 * This is done by making sure a high priority task is
+	 * being scheduled.  The task touches the watchdog to
+	 * indicate it is getting cpu time.  If it hasn't then
+	 * this is a good indication some task is hogging the cpu
+	 */
+	duration = is_softlockup(touch_ts, this_cpu);
+	if (unlikely(duration)) {
+		/* only warn once */
+		if (__get_cpu_var(soft_watchdog_warn) == true)
+			return HRTIMER_RESTART;
+
+		printk(KERN_ERR "BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",
+			this_cpu, duration,
+			current->comm, task_pid_nr(current));
+		print_modules();
+		print_irqtrace_events(current);
+		if (regs)
+			show_regs(regs);
+		else
+			dump_stack();
+
+		if (softlockup_panic)
+			panic("softlockup: hung tasks");
+		__get_cpu_var(soft_watchdog_warn) = true;
+	} else
+		__get_cpu_var(soft_watchdog_warn) = false;
+
+	return HRTIMER_RESTART;
+}
+
+
+/*
+ * The watchdog thread - touches the timestamp.
+ */
+static int watchdog(void *__bind_cpu)
+{
+	struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
+	struct hrtimer *hrtimer = &per_cpu(watchdog_hrtimer, (unsigned long)__bind_cpu);
+
+	sched_setscheduler(current, SCHED_FIFO, &param);
+
+	/* initialize timestamp */
+	__touch_watchdog();
+
+	/* kick off the timer for the hardlockup detector */
+	/* done here because hrtimer_start can only pin to smp_processor_id() */
+	hrtimer_start(hrtimer, ns_to_ktime(get_sample_period()),
+		      HRTIMER_MODE_REL_PINNED);
+
+	set_current_state(TASK_INTERRUPTIBLE);
+	/*
+	 * Run briefly once per second to reset the softlockup timestamp.
+	 * If this gets delayed for more than 60 seconds then the
+	 * debug-printout triggers in softlockup_tick().
+	 */
+	while (!kthread_should_stop()) {
+		__touch_watchdog();
+		schedule();
+
+		if (kthread_should_stop())
+			break;
+
+		set_current_state(TASK_INTERRUPTIBLE);
+	}
+	__set_current_state(TASK_RUNNING);
+
+	return 0;
+}
+
+
+#ifdef CONFIG_PERF_EVENTS_NMI
+static int watchdog_nmi_enable(int cpu)
+{
+	struct perf_event_attr *wd_attr;
+	struct perf_event *event = per_cpu(watchdog_ev, cpu);
+
+	/* is it already setup and enabled? */
+	if (event && event->state > PERF_EVENT_STATE_OFF)
+		goto out;
+
+	/* it is setup but not enabled */
+	if (event != NULL)
+		goto out_enable;
+
+	/* Try to register using hardware perf events */
+	wd_attr = &wd_hw_attr;
+	wd_attr->sample_period = hw_nmi_get_sample_period();
+	event = perf_event_create_kernel_counter(wd_attr, cpu, -1, watchdog_overflow_callback);
+	if (!IS_ERR(event)) {
+		printk(KERN_INFO "NMI watchdog enabled, takes one hw-pmu counter.\n");
+		goto out_save;
+	}
+
+	printk(KERN_ERR "NMI watchdog failed to create perf event on cpu%i: %p\n", cpu, event);
+	return -1;
+
+	/* success path */
+out_save:
+	per_cpu(watchdog_ev, cpu) = event;
+out_enable:
+	perf_event_enable(per_cpu(watchdog_ev, cpu));
+out:
+	return 0;
+}
+
+static void watchdog_nmi_disable(int cpu)
+{
+	struct perf_event *event = per_cpu(watchdog_ev, cpu);
+
+	if (event) {
+		perf_event_disable(event);
+		per_cpu(watchdog_ev, cpu) = NULL;
+
+		/* should be in cleanup, but blocks oprofile */
+		perf_event_release_kernel(event);
+	}
+	return;
+}
+#else
+static int watchdog_nmi_enable(int cpu) { return 0; }
+static void watchdog_nmi_disable(int cpu) { return; }
+#endif /* CONFIG_PERF_EVENTS_NMI */
+
+/* prepare/enable/disable routines */
+static int watchdog_prepare_cpu(int cpu)
+{
+	struct hrtimer *hrtimer = &per_cpu(watchdog_hrtimer, cpu);
+
+	WARN_ON(per_cpu(softlockup_watchdog, cpu));
+	hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	hrtimer->function = watchdog_timer_fn;
+
+	return 0;
+}
+
+static int watchdog_enable(int cpu)
+{
+	struct task_struct *p = per_cpu(softlockup_watchdog, cpu);
+
+	/* enable the perf event */
+	if (watchdog_nmi_enable(cpu) != 0)
+		return -1;
+
+	/* create the watchdog thread */
+	if (!p) {
+		p = kthread_create(watchdog, (void *)(unsigned long)cpu, "watchdog/%d", cpu);
+		if (IS_ERR(p)) {
+			printk(KERN_ERR "softlockup watchdog for %i failed\n", cpu);
+			return -1;
+		}
+		kthread_bind(p, cpu);
+		per_cpu(watchdog_touch_ts, cpu) = 0;
+		per_cpu(softlockup_watchdog, cpu) = p;
+		wake_up_process(p);
+	}
+
+	return 0;
+}
+
+static void watchdog_disable(int cpu)
+{
+	struct task_struct *p = per_cpu(softlockup_watchdog, cpu);
+	struct hrtimer *hrtimer = &per_cpu(watchdog_hrtimer, cpu);
+
+	/*
+	 * cancel the timer first to stop incrementing the stats
+	 * and waking up the kthread
+	 */
+	hrtimer_cancel(hrtimer);
+
+	/* disable the perf event */
+	watchdog_nmi_disable(cpu);
+
+	/* stop the watchdog thread */
+	if (p) {
+		per_cpu(softlockup_watchdog, cpu) = NULL;
+		kthread_stop(p);
+	}
+
+	/* if any cpu succeeds, watchdog is considered enabled for the system */
+	watchdog_enabled = 1;
+}
+
+static void watchdog_enable_all_cpus(void)
+{
+	int cpu;
+	int result;
+
+	for_each_online_cpu(cpu)
+		result += watchdog_enable(cpu);
+
+	if (result)
+		printk(KERN_ERR "watchdog: failed to be enabled on some cpus\n");
+
+}
+
+static void watchdog_disable_all_cpus(void)
+{
+	int cpu;
+
+	for_each_online_cpu(cpu)
+		watchdog_disable(cpu);
+
+	/* if all watchdogs are disabled, then they are disabled for the system */
+	watchdog_enabled = 0;
+}
+
+
+/* sysctl functions */
+#ifdef CONFIG_SYSCTL
+/*
+ * proc handler for /proc/sys/kernel/nmi_watchdog
+ */
+
+int proc_dowatchdog_enabled(struct ctl_table *table, int write,
+		     void __user *buffer, size_t *length, loff_t *ppos)
+{
+	proc_dointvec(table, write, buffer, length, ppos);
+
+	if (watchdog_enabled)
+		watchdog_enable_all_cpus();
+	else
+		watchdog_disable_all_cpus();
+	return 0;
+}
+
+int proc_dowatchdog_thresh(struct ctl_table *table, int write,
+			     void __user *buffer,
+			     size_t *lenp, loff_t *ppos)
+{
+	return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+}
+
+/* stub functions */
+int proc_dosoftlockup_thresh(struct ctl_table *table, int write,
+			     void __user *buffer,
+			     size_t *lenp, loff_t *ppos)
+{
+	return proc_dowatchdog_thresh(table, write, buffer, lenp, ppos);
+}
+/* end of stub functions */
+#endif /* CONFIG_SYSCTL */
+
+
+/*
+ * Create/destroy watchdog threads as CPUs come and go:
+ */
+static int __cpuinit
+cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
+{
+	int hotcpu = (unsigned long)hcpu;
+
+	switch (action) {
+	case CPU_UP_PREPARE:
+	case CPU_UP_PREPARE_FROZEN:
+		if (watchdog_prepare_cpu(hotcpu))
+			return NOTIFY_BAD;
+		break;
+	case CPU_ONLINE:
+	case CPU_ONLINE_FROZEN:
+		if (watchdog_enable(hotcpu))
+			return NOTIFY_BAD;
+		break;
+#ifdef CONFIG_HOTPLUG_CPU
+	case CPU_UP_CANCELED:
+	case CPU_UP_CANCELED_FROZEN:
+		watchdog_disable(hotcpu);
+		break;
+	case CPU_DEAD:
+	case CPU_DEAD_FROZEN:
+		watchdog_disable(hotcpu);
+		break;
+#endif /* CONFIG_HOTPLUG_CPU */
+	}
+	return NOTIFY_OK;
+}
+
+static struct notifier_block __cpuinitdata cpu_nfb = {
+	.notifier_call = cpu_callback
+};
+
+static int __init spawn_watchdog_task(void)
+{
+	void *cpu = (void *)(long)smp_processor_id();
+	int err;
+
+	if (no_watchdog)
+		return 0;
+
+	err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
+	WARN_ON(err == NOTIFY_BAD);
+
+	cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
+	register_cpu_notifier(&cpu_nfb);
+
+	atomic_notifier_chain_register(&panic_notifier_list, &panic_block);
+
+	return 0;
+}
+early_initcall(spawn_watchdog_task);
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 220ae6063b6..49e285dcaf5 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -153,7 +153,7 @@ config DEBUG_SHIRQ
 	  points; some don't and need to be caught.
 
 config DETECT_SOFTLOCKUP
-	bool "Detect Soft Lockups"
+	bool
 	depends on DEBUG_KERNEL && !S390
 	default y
 	help
@@ -171,17 +171,27 @@ config DETECT_SOFTLOCKUP
 	   can be detected via the NMI-watchdog, on platforms that
 	   support it.)
 
-config NMI_WATCHDOG
-	bool "Detect Hard Lockups with an NMI Watchdog"
-	depends on DEBUG_KERNEL && PERF_EVENTS && PERF_EVENTS_NMI
+config LOCKUP_DETECTOR
+	bool "Detect Hard and Soft Lockups"
+	depends on DEBUG_KERNEL
+	default DETECT_SOFTLOCKUP
 	help
-	  Say Y here to enable the kernel to use the NMI as a watchdog
-	  to detect hard lockups.  This is useful when a cpu hangs for no
-	  reason but can still respond to NMIs.  A backtrace is displayed
-	  for reviewing and reporting.
+	  Say Y here to enable the kernel to act as a watchdog to detect
+	  hard and soft lockups.
+
+	  Softlockups are bugs that cause the kernel to loop in kernel
+	  mode for more than 60 seconds, without giving other tasks a
+	  chance to run.  The current stack trace is displayed upon
+	  detection and the system will stay locked up.
+
+	  Hardlockups are bugs that cause the CPU to loop in kernel mode
+	  for more than 60 seconds, without letting other interrupts have a
+	  chance to run.  The current stack trace is displayed upon detection
+	  and the system will stay locked up.
 
-	  The overhead should be minimal, just an extra NMI every few
-	  seconds.
+	  The overhead should be minimal.  A periodic hrtimer runs to
+	  generate interrupts and kick the watchdog task every 10-12 seconds.
+	  An NMI is generated every 60 seconds or so to check for hardlockups.
 
 config BOOTPARAM_SOFTLOCKUP_PANIC
 	bool "Panic (Reboot) On Soft Lockups"
-- 
cgit v1.2.3-70-g09d2


From 332fbdbca3f7716c5620970755ae054d213bcc4e Mon Sep 17 00:00:00 2001
From: Don Zickus <dzickus@redhat.com>
Date: Fri, 7 May 2010 17:11:45 -0400
Subject: lockup_detector: Touch_softlockup cleanups and softlockup_tick
 removal

Just some code cleanup to make touch_softlockup clearer and remove the
softlockup_tick function as it is no longer needed.

Also remove the /proc softlockup_thres call as it has been changed to
watchdog_thres.

Signed-off-by: Don Zickus <dzickus@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Cyrill Gorcunov <gorcunov@gmail.com>
Cc: Eric Paris <eparis@redhat.com>
Cc: Randy Dunlap <randy.dunlap@oracle.com>
LKML-Reference: <1273266711-18706-3-git-send-email-dzickus@redhat.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 include/linux/sched.h | 16 +++-------------
 kernel/sysctl.c       |  9 ---------
 kernel/timer.c        |  1 -
 kernel/watchdog.c     | 35 +++--------------------------------
 4 files changed, 6 insertions(+), 55 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 37efe8fa530..33f9b2ad0bb 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -312,19 +312,15 @@ extern void scheduler_tick(void);
 extern void sched_show_task(struct task_struct *p);
 
 #ifdef CONFIG_DETECT_SOFTLOCKUP
-extern void softlockup_tick(void);
 extern void touch_softlockup_watchdog(void);
 extern void touch_softlockup_watchdog_sync(void);
 extern void touch_all_softlockup_watchdogs(void);
-extern int proc_dosoftlockup_thresh(struct ctl_table *table, int write,
-				    void __user *buffer,
-				    size_t *lenp, loff_t *ppos);
+extern int proc_dowatchdog_thresh(struct ctl_table *table, int write,
+				  void __user *buffer,
+				  size_t *lenp, loff_t *ppos);
 extern unsigned int  softlockup_panic;
 extern int softlockup_thresh;
 #else
-static inline void softlockup_tick(void)
-{
-}
 static inline void touch_softlockup_watchdog(void)
 {
 }
@@ -346,12 +342,6 @@ extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
 					 size_t *lenp, loff_t *ppos);
 #endif
 
-#ifdef CONFIG_LOCKUP_DETECTOR
-extern int proc_dowatchdog_thresh(struct ctl_table *table, int write,
-				  void __user *buffer,
-				  size_t *lenp, loff_t *ppos);
-#endif
-
 /* Attach to any functions which should be ignored in wchan output. */
 #define __sched		__attribute__((__section__(".sched.text")))
 
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 0f9adda85f9..999bc3fccf4 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -817,15 +817,6 @@ static struct ctl_table kern_table[] = {
 		.extra1		= &zero,
 		.extra2		= &one,
 	},
-	{
-		.procname	= "softlockup_thresh",
-		.data		= &softlockup_thresh,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dosoftlockup_thresh,
-		.extra1		= &neg_one,
-		.extra2		= &sixty,
-	},
 #endif
 #ifdef CONFIG_DETECT_HUNG_TASK
 	{
diff --git a/kernel/timer.c b/kernel/timer.c
index aeb6a54f277..e8de5eb07a0 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1225,7 +1225,6 @@ void run_local_timers(void)
 {
 	hrtimer_run_queues();
 	raise_softirq(TIMER_SOFTIRQ);
-	softlockup_tick();
 }
 
 /*
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 6b7fad8497a..f1541b7e324 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -119,13 +119,12 @@ static void __touch_watchdog(void)
 	__get_cpu_var(watchdog_touch_ts) = get_timestamp(this_cpu);
 }
 
-void touch_watchdog(void)
+void touch_softlockup_watchdog(void)
 {
 	__get_cpu_var(watchdog_touch_ts) = 0;
 }
-EXPORT_SYMBOL(touch_watchdog);
 
-void touch_all_watchdog(void)
+void touch_all_softlockup_watchdogs(void)
 {
 	int cpu;
 
@@ -140,35 +139,16 @@ void touch_all_watchdog(void)
 
 void touch_nmi_watchdog(void)
 {
-	touch_watchdog();
+	touch_softlockup_watchdog();
 }
 EXPORT_SYMBOL(touch_nmi_watchdog);
 
-void touch_all_nmi_watchdog(void)
-{
-	touch_all_watchdog();
-}
-
-void touch_softlockup_watchdog(void)
-{
-	touch_watchdog();
-}
-
-void touch_all_softlockup_watchdogs(void)
-{
-	touch_all_watchdog();
-}
-
 void touch_softlockup_watchdog_sync(void)
 {
 	__raw_get_cpu_var(softlockup_touch_sync) = true;
 	__raw_get_cpu_var(watchdog_touch_ts) = 0;
 }
 
-void softlockup_tick(void)
-{
-}
-
 #ifdef CONFIG_PERF_EVENTS_NMI
 /* watchdog detector functions */
 static int is_hardlockup(int cpu)
@@ -522,15 +502,6 @@ int proc_dowatchdog_thresh(struct ctl_table *table, int write,
 {
 	return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
 }
-
-/* stub functions */
-int proc_dosoftlockup_thresh(struct ctl_table *table, int write,
-			     void __user *buffer,
-			     size_t *lenp, loff_t *ppos)
-{
-	return proc_dowatchdog_thresh(table, write, buffer, lenp, ppos);
-}
-/* end of stub functions */
 #endif /* CONFIG_SYSCTL */
 
 
-- 
cgit v1.2.3-70-g09d2


From 19cc36c0f0457e5c6629ec24036fbbe8255c88ec Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Thu, 13 May 2010 02:30:49 +0200
Subject: lockup_detector: Fix forgotten config conversion

Fix forgotten CONFIG_DETECT_SOFTLOCKUP -> CONFIG_LOCKUP_DETECTOR
in sched.h

Fixes:
	arch/x86/built-in.o: In function `touch_nmi_watchdog':
	(.text+0x1bd59): undefined reference to `touch_softlockup_watchdog'
	kernel/built-in.o: In function `show_state_filter':
	(.text+0x10d01): undefined reference to `touch_all_softlockup_watchdogs'
	kernel/built-in.o: In function `sched_clock_idle_wakeup_event':
	(.text+0x362f9): undefined reference to `touch_softlockup_watchdog'
	kernel/built-in.o: In function `timekeeping_resume':
	timekeeping.c:(.text+0x38757): undefined reference to `touch_softlockup_watchdog'
	kernel/built-in.o: In function `tick_nohz_handler':
	tick-sched.c:(.text+0x3e5b9): undefined reference to `touch_softlockup_watchdog'
	kernel/built-in.o: In function `tick_sched_timer':
	tick-sched.c:(.text+0x3e671): undefined reference to `touch_softlockup_watchdog'
	kernel/built-in.o: In function `tick_check_idle':
	(.text+0x3e90b): undefined reference to `touch_softlockup_watchdog'

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Cyrill Gorcunov <gorcunov@gmail.com>
Cc: Eric Paris <eparis@redhat.com>
Cc: Randy Dunlap <randy.dunlap@oracle.com>
---
 include/linux/sched.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 33f9b2ad0bb..3958e0cd24f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -311,7 +311,7 @@ extern void scheduler_tick(void);
 
 extern void sched_show_task(struct task_struct *p);
 
-#ifdef CONFIG_DETECT_SOFTLOCKUP
+#ifdef CONFIG_LOCKUP_DETECTOR
 extern void touch_softlockup_watchdog(void);
 extern void touch_softlockup_watchdog_sync(void);
 extern void touch_all_softlockup_watchdogs(void);
-- 
cgit v1.2.3-70-g09d2


From cafcd80d216bc2136b8edbb794327e495792c666 Mon Sep 17 00:00:00 2001
From: Don Zickus <dzickus@redhat.com>
Date: Fri, 14 May 2010 11:11:21 -0400
Subject: lockup_detector: Cross arch compile fixes

Combining the softlockup and hardlockup code causes watchdog.c
to build even without the hardlockup detection support.

So if an arch, that has the previous and the new nmi watchdog
implementations cohabiting, wants to know if the generic one
is in use, CONFIG_LOCKUP_DETECTOR is not a reliable check.
We need to use CONFIG_HARDLOCKUP_DETECTOR instead.

Fixes:
	kernel/built-in.o: In function `touch_nmi_watchdog':
	(.text+0x449bc): multiple definition of `touch_nmi_watchdog'
	arch/sparc/kernel/built-in.o:(.text+0x11b28): first defined here

Signed-off-by: Don Zickus <dzickus@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Cyrill Gorcunov <gorcunov@gmail.com>
LKML-Reference: <20100514151121.GR15159@redhat.com>
[ use CONFIG_HARDLOCKUP_DETECTOR instead of CONFIG_PERF_EVENTS_NMI]
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 arch/x86/kernel/apic/Makefile | 4 ++--
 include/linux/nmi.h           | 2 +-
 kernel/watchdog.c             | 7 +++++--
 3 files changed, 8 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile
index 52f32e0ea19..910f20b457c 100644
--- a/arch/x86/kernel/apic/Makefile
+++ b/arch/x86/kernel/apic/Makefile
@@ -3,10 +3,10 @@
 #
 
 obj-$(CONFIG_X86_LOCAL_APIC)	+= apic.o apic_noop.o probe_$(BITS).o ipi.o
-ifneq ($(CONFIG_LOCKUP_DETECTOR),y)
+ifneq ($(CONFIG_HARDLOCKUP_DETECTOR),y)
 obj-$(CONFIG_X86_LOCAL_APIC)	+= nmi.o
 endif
-obj-$(CONFIG_LOCKUP_DETECTOR)	+= hw_nmi.o
+obj-$(CONFIG_HARDLOCKUP_DETECTOR)	+= hw_nmi.o
 
 obj-$(CONFIG_X86_IO_APIC)	+= io_apic.o
 obj-$(CONFIG_SMP)		+= ipi.o
diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index abd48aacaf7..06aab5eee13 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -20,7 +20,7 @@ extern void touch_nmi_watchdog(void);
 extern void acpi_nmi_disable(void);
 extern void acpi_nmi_enable(void);
 #else
-#ifndef CONFIG_LOCKUP_DETECTOR
+#ifndef CONFIG_HARDLOCKUP_DETECTOR
 static inline void touch_nmi_watchdog(void)
 {
 	touch_softlockup_watchdog();
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 83fb63155cb..e53622c1465 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -31,13 +31,13 @@ int watchdog_enabled;
 int __read_mostly softlockup_thresh = 60;
 
 static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts);
-static DEFINE_PER_CPU(bool, watchdog_nmi_touch);
 static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog);
 static DEFINE_PER_CPU(struct hrtimer, watchdog_hrtimer);
 static DEFINE_PER_CPU(bool, softlockup_touch_sync);
-static DEFINE_PER_CPU(bool, hard_watchdog_warn);
 static DEFINE_PER_CPU(bool, soft_watchdog_warn);
 #ifdef CONFIG_HARDLOCKUP_DETECTOR
+static DEFINE_PER_CPU(bool, hard_watchdog_warn);
+static DEFINE_PER_CPU(bool, watchdog_nmi_touch);
 static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts);
 static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved);
 static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
@@ -139,6 +139,7 @@ void touch_all_softlockup_watchdogs(void)
 		per_cpu(watchdog_touch_ts, cpu) = 0;
 }
 
+#ifdef CONFIG_HARDLOCKUP_DETECTOR
 void touch_nmi_watchdog(void)
 {
 	__get_cpu_var(watchdog_nmi_touch) = true;
@@ -146,6 +147,8 @@ void touch_nmi_watchdog(void)
 }
 EXPORT_SYMBOL(touch_nmi_watchdog);
 
+#endif
+
 void touch_softlockup_watchdog_sync(void)
 {
 	__raw_get_cpu_var(softlockup_touch_sync) = true;
-- 
cgit v1.2.3-70-g09d2


From b6f4bb383d69cac46f17e2305720f9a3d426c5ed Mon Sep 17 00:00:00 2001
From: "apatard@mandriva.com" <apatard@mandriva.com>
Date: Sat, 15 May 2010 17:30:01 +0200
Subject: ASoC: Add SOC_DOUBLE_R_SX_TLV control

This patch is adding a new control which has the following capabilities:
- tlv
- variable data size (for instance, 7 ou 8 bit)
- double mixer
- data range centered around 0

Signed-off-by: Arnaud Patard <apatard@mandriva.com>
Acked-by: Liam Girdwood <lrg@opensource.wolfsonmicro.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 include/sound/soc.h  | 21 ++++++++++++
 sound/soc/soc-core.c | 95 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 116 insertions(+)

(limited to 'include')

diff --git a/include/sound/soc.h b/include/sound/soc.h
index 697e7ffe39d..65e9d03ed4f 100644
--- a/include/sound/soc.h
+++ b/include/sound/soc.h
@@ -170,6 +170,21 @@
 	.get = xhandler_get, .put = xhandler_put, \
 	.private_value = (unsigned long)&xenum }
 
+#define SOC_DOUBLE_R_SX_TLV(xname, xreg_left, xreg_right, xshift,\
+		xmin, xmax, tlv_array) \
+{	.iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = (xname), \
+	.access = SNDRV_CTL_ELEM_ACCESS_TLV_READ | \
+		  SNDRV_CTL_ELEM_ACCESS_READWRITE, \
+	.tlv.p = (tlv_array), \
+	.info = snd_soc_info_volsw_2r_sx, \
+	.get = snd_soc_get_volsw_2r_sx, \
+	.put = snd_soc_put_volsw_2r_sx, \
+	.private_value = (unsigned long)&(struct soc_mixer_control) \
+		{.reg = xreg_left, \
+		 .rreg = xreg_right, .shift = xshift, \
+		 .min = xmin, .max = xmax} }
+
+
 /*
  * Simplified versions of above macros, declaring a struct and calculating
  * ARRAY_SIZE internally
@@ -329,6 +344,12 @@ int snd_soc_put_volsw_s8(struct snd_kcontrol *kcontrol,
 	struct snd_ctl_elem_value *ucontrol);
 int snd_soc_limit_volume(struct snd_soc_codec *codec,
 	const char *name, int max);
+int snd_soc_info_volsw_2r_sx(struct snd_kcontrol *kcontrol,
+	struct snd_ctl_elem_info *uinfo);
+int snd_soc_get_volsw_2r_sx(struct snd_kcontrol *kcontrol,
+	struct snd_ctl_elem_value *ucontrol);
+int snd_soc_put_volsw_2r_sx(struct snd_kcontrol *kcontrol,
+	struct snd_ctl_elem_value *ucontrol);
 
 /**
  * struct snd_soc_jack_pin - Describes a pin to update based on jack detection
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index e1043f64473..6220bc1ee42 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -2351,6 +2351,101 @@ int snd_soc_limit_volume(struct snd_soc_codec *codec,
 }
 EXPORT_SYMBOL_GPL(snd_soc_limit_volume);
 
+/**
+ * snd_soc_info_volsw_2r_sx - double with tlv and variable data size
+ *  mixer info callback
+ * @kcontrol: mixer control
+ * @uinfo: control element information
+ *
+ * Returns 0 for success.
+ */
+int snd_soc_info_volsw_2r_sx(struct snd_kcontrol *kcontrol,
+			struct snd_ctl_elem_info *uinfo)
+{
+	struct soc_mixer_control *mc =
+		(struct soc_mixer_control *)kcontrol->private_value;
+	int max = mc->max;
+	int min = mc->min;
+
+	uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER;
+	uinfo->count = 2;
+	uinfo->value.integer.min = 0;
+	uinfo->value.integer.max = max-min;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(snd_soc_info_volsw_2r_sx);
+
+/**
+ * snd_soc_get_volsw_2r_sx - double with tlv and variable data size
+ *  mixer get callback
+ * @kcontrol: mixer control
+ * @uinfo: control element information
+ *
+ * Returns 0 for success.
+ */
+int snd_soc_get_volsw_2r_sx(struct snd_kcontrol *kcontrol,
+			struct snd_ctl_elem_value *ucontrol)
+{
+	struct soc_mixer_control *mc =
+		(struct soc_mixer_control *)kcontrol->private_value;
+	struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol);
+	unsigned int mask = (1<<mc->shift)-1;
+	int min = mc->min;
+	int val = snd_soc_read(codec, mc->reg) & mask;
+	int valr = snd_soc_read(codec, mc->rreg) & mask;
+
+	ucontrol->value.integer.value[0] = ((val & 0xff)-min);
+	ucontrol->value.integer.value[1] = ((valr & 0xff)-min);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(snd_soc_get_volsw_2r_sx);
+
+/**
+ * snd_soc_put_volsw_2r_sx - double with tlv and variable data size
+ *  mixer put callback
+ * @kcontrol: mixer control
+ * @uinfo: control element information
+ *
+ * Returns 0 for success.
+ */
+int snd_soc_put_volsw_2r_sx(struct snd_kcontrol *kcontrol,
+			struct snd_ctl_elem_value *ucontrol)
+{
+	struct soc_mixer_control *mc =
+		(struct soc_mixer_control *)kcontrol->private_value;
+	struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol);
+	unsigned int mask = (1<<mc->shift)-1;
+	int min = mc->min;
+	int ret;
+	unsigned int val, valr, oval, ovalr;
+
+	val = ((ucontrol->value.integer.value[0]+min) & 0xff);
+	val &= mask;
+	valr = ((ucontrol->value.integer.value[1]+min) & 0xff);
+	valr &= mask;
+
+	oval = snd_soc_read(codec, mc->reg) & mask;
+	ovalr = snd_soc_read(codec, mc->rreg) & mask;
+
+	ret = 0;
+	if (oval != val) {
+		ret = snd_soc_write(codec, mc->reg, val);
+		if (ret < 0)
+			return 0;
+		ret = 1;
+	}
+	if (ovalr != valr) {
+		ret = snd_soc_write(codec, mc->rreg, valr);
+		if (ret < 0)
+			return 0;
+		ret = 1;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(snd_soc_put_volsw_2r_sx);
+
 /**
  * snd_soc_dai_set_sysclk - configure DAI system or master clock.
  * @dai: DAI
-- 
cgit v1.2.3-70-g09d2


From 15c0cee6c809a137e0fc7f1d2b0867cc03473c0c Mon Sep 17 00:00:00 2001
From: Ben Collins <bcollins@bluecherry.net>
Date: Fri, 28 May 2010 11:43:45 -0400
Subject: ALSA: pcm: Define G723 3-bit and 5-bit formats

This defines the 24bps and 40bps (8khz sample rate) G.723 codec
formats. They are going to be used once I submit the driver for
an mpeg4/g723 compression card.

I've updated the signed value to -1 as per Takashi's comments
since these are non-linear formats.

Signed-off-by: Ben Collins <bcollins@bluecherry.net>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/asound.h |  6 +++++-
 include/sound/pcm.h    |  4 ++++
 sound/core/pcm_misc.c  | 16 ++++++++++++++++
 3 files changed, 25 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/sound/asound.h b/include/sound/asound.h
index 9f1eecf99e6..a1803ecea34 100644
--- a/include/sound/asound.h
+++ b/include/sound/asound.h
@@ -212,7 +212,11 @@ typedef int __bitwise snd_pcm_format_t;
 #define	SNDRV_PCM_FORMAT_S18_3BE	((__force snd_pcm_format_t) 41)	/* in three bytes */
 #define	SNDRV_PCM_FORMAT_U18_3LE	((__force snd_pcm_format_t) 42)	/* in three bytes */
 #define	SNDRV_PCM_FORMAT_U18_3BE	((__force snd_pcm_format_t) 43)	/* in three bytes */
-#define	SNDRV_PCM_FORMAT_LAST		SNDRV_PCM_FORMAT_U18_3BE
+#define	SNDRV_PCM_FORMAT_G723_24	((__force snd_pcm_format_t) 44) /* 8 samples in 3 bytes */
+#define	SNDRV_PCM_FORMAT_G723_24_1B	((__force snd_pcm_format_t) 45) /* 1 sample in 1 byte */
+#define	SNDRV_PCM_FORMAT_G723_40	((__force snd_pcm_format_t) 46) /* 8 Samples in 5 bytes */
+#define	SNDRV_PCM_FORMAT_G723_40_1B	((__force snd_pcm_format_t) 47) /* 1 sample in 1 byte */
+#define	SNDRV_PCM_FORMAT_LAST		SNDRV_PCM_FORMAT_G723_40_1B
 
 #ifdef SNDRV_LITTLE_ENDIAN
 #define	SNDRV_PCM_FORMAT_S16		SNDRV_PCM_FORMAT_S16_LE
diff --git a/include/sound/pcm.h b/include/sound/pcm.h
index dd76cdede64..07fd630db88 100644
--- a/include/sound/pcm.h
+++ b/include/sound/pcm.h
@@ -174,6 +174,10 @@ struct snd_pcm_ops {
 #define SNDRV_PCM_FMTBIT_U18_3LE	(1ULL << SNDRV_PCM_FORMAT_U18_3LE)
 #define SNDRV_PCM_FMTBIT_S18_3BE	(1ULL << SNDRV_PCM_FORMAT_S18_3BE)
 #define SNDRV_PCM_FMTBIT_U18_3BE	(1ULL << SNDRV_PCM_FORMAT_U18_3BE)
+#define SNDRV_PCM_FMTBIT_G723_24	(1ULL << SNDRV_PCM_FORMAT_G723_24)
+#define SNDRV_PCM_FMTBIT_G723_24_1B	(1ULL << SNDRV_PCM_FORMAT_G723_24_1B)
+#define SNDRV_PCM_FMTBIT_G723_40	(1ULL << SNDRV_PCM_FORMAT_G723_40)
+#define SNDRV_PCM_FMTBIT_G723_40_1B	(1ULL << SNDRV_PCM_FORMAT_G723_40_1B)
 
 #ifdef SNDRV_LITTLE_ENDIAN
 #define SNDRV_PCM_FMTBIT_S16		SNDRV_PCM_FMTBIT_S16_LE
diff --git a/sound/core/pcm_misc.c b/sound/core/pcm_misc.c
index ea2bf82c937..434af3c56d5 100644
--- a/sound/core/pcm_misc.c
+++ b/sound/core/pcm_misc.c
@@ -128,6 +128,14 @@ static struct pcm_format_data pcm_formats[SNDRV_PCM_FORMAT_LAST+1] = {
 		.width = 4, .phys = 4, .le = -1, .signd = -1,
 		.silence = {},
 	},
+	[SNDRV_PCM_FORMAT_G723_24] = {
+		.width = 3, .phys = 3, .le = -1, .signd = -1,
+		.silence = {},
+	},
+	[SNDRV_PCM_FORMAT_G723_40] = {
+		.width = 5, .phys = 5, .le = -1, .signd = -1,
+		.silence = {},
+	},
 	/* FIXME: the following three formats are not defined properly yet */
 	[SNDRV_PCM_FORMAT_MPEG] = {
 		.le = -1, .signd = -1,
@@ -186,6 +194,14 @@ static struct pcm_format_data pcm_formats[SNDRV_PCM_FORMAT_LAST+1] = {
 		.width = 18, .phys = 24, .le = 0, .signd = 0,
 		.silence = { 0x02, 0x00, 0x00 },
 	},
+	[SNDRV_PCM_FORMAT_G723_24_1B] = {
+		.width = 3, .phys = 8, .le = -1, .signd = -1,
+		.silence = {},
+	},
+	[SNDRV_PCM_FORMAT_G723_40_1B] = {
+		.width = 5, .phys = 8, .le = -1, .signd = -1,
+		.silence = {},
+	},
 };
 
 
-- 
cgit v1.2.3-70-g09d2


From 06c4648d46d1b757d6b9591a86810be79818b60c Mon Sep 17 00:00:00 2001
From: Ian Campbell <Ian.Campbell@citrix.com>
Date: Wed, 26 May 2010 00:09:42 +0000
Subject: arp_notify: allow drivers to explicitly request a notification event.

Currently such notifications are only generated when the device comes up or the
address changes. However one use case for these notifications is to enable
faster network recovery after a virtual machine migration (by causing switches
to relearn their MAC tables). A migration appears to the network stack as a
temporary loss of carrier and therefore does not trigger either of the current
conditions. Rather than adding carrier up as a trigger (which can cause issues
when interfaces a flapping) simply add an interface which the driver can use
to explicitly trigger the notification.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Cc: Stephen Hemminger <shemminger@linux-foundation.org>
Cc: Jeremy Fitzhardinge <jeremy@goop.org>
Cc: David S. Miller <davem@davemloft.net>
Cc: netdev@vger.kernel.org
Cc: stable@kernel.org
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  2 ++
 include/linux/notifier.h  |  1 +
 net/ipv4/devinet.c        |  1 +
 net/sched/sch_generic.c   | 18 ++++++++++++++++++
 4 files changed, 22 insertions(+)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 40291f37502..a24916156f4 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1772,6 +1772,8 @@ extern void netif_carrier_on(struct net_device *dev);
 
 extern void netif_carrier_off(struct net_device *dev);
 
+extern void netif_notify_peers(struct net_device *dev);
+
 /**
  *	netif_dormant_on - mark device as dormant.
  *	@dev: network device
diff --git a/include/linux/notifier.h b/include/linux/notifier.h
index 540703b555c..22c2abb6197 100644
--- a/include/linux/notifier.h
+++ b/include/linux/notifier.h
@@ -210,6 +210,7 @@ static inline int notifier_to_errno(int ret)
 #define NETDEV_POST_INIT	0x0010
 #define NETDEV_UNREGISTER_BATCH 0x0011
 #define NETDEV_BONDING_DESLAVE  0x0012
+#define NETDEV_NOTIFY_PEERS	0x0012
 
 #define SYS_DOWN	0x0001	/* Notify of system down */
 #define SYS_RESTART	SYS_DOWN
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 382bc768ed5..da14c49284f 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1081,6 +1081,7 @@ static int inetdev_event(struct notifier_block *this, unsigned long event,
 		}
 		ip_mc_up(in_dev);
 		/* fall through */
+	case NETDEV_NOTIFY_PEERS:
 	case NETDEV_CHANGEADDR:
 		/* Send gratuitous ARP to notify of link change */
 		if (IN_DEV_ARP_NOTIFY(in_dev)) {
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index a63029ef3ed..bd1892fe4b2 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -327,6 +327,24 @@ void netif_carrier_off(struct net_device *dev)
 }
 EXPORT_SYMBOL(netif_carrier_off);
 
+/**
+ * 	netif_notify_peers - notify network peers about existence of @dev
+ * 	@dev: network device
+ *
+ * Generate traffic such that interested network peers are aware of
+ * @dev, such as by generating a gratuitous ARP. This may be used when
+ * a device wants to inform the rest of the network about some sort of
+ * reconfiguration such as a failover event or virtual machine
+ * migration.
+ */
+void netif_notify_peers(struct net_device *dev)
+{
+	rtnl_lock();
+	call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, dev);
+	rtnl_unlock();
+}
+EXPORT_SYMBOL(netif_notify_peers);
+
 /* "NOOP" scheduler: the best scheduler, recommended for all interfaces
    under all circumstances. It is difficult to invent anything faster or
    cheaper.
-- 
cgit v1.2.3-70-g09d2


From 38117d1495e587fbb10d6e55733139a27893cef5 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 31 May 2010 00:28:35 -0700
Subject: net: Fix NETDEV_NOTIFY_PEERS to not conflict with
 NETDEV_BONDING_DESLAVE.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/notifier.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/notifier.h b/include/linux/notifier.h
index 22c2abb6197..b2f1a4d8355 100644
--- a/include/linux/notifier.h
+++ b/include/linux/notifier.h
@@ -210,7 +210,7 @@ static inline int notifier_to_errno(int ret)
 #define NETDEV_POST_INIT	0x0010
 #define NETDEV_UNREGISTER_BATCH 0x0011
 #define NETDEV_BONDING_DESLAVE  0x0012
-#define NETDEV_NOTIFY_PEERS	0x0012
+#define NETDEV_NOTIFY_PEERS	0x0013
 
 #define SYS_DOWN	0x0001	/* Notify of system down */
 #define SYS_RESTART	SYS_DOWN
-- 
cgit v1.2.3-70-g09d2


From ea762b047e13ba1cba4d58323b5c00a566610198 Mon Sep 17 00:00:00 2001
From: "apatard@mandriva.com" <apatard@mandriva.com>
Date: Thu, 27 May 2010 14:57:40 +0200
Subject: ASoC: Add SND_SOC_DAPM_PRE_POST_PMD event

Some systems codecs need to configure some registers before and after
powering down some of their part. As a convenience add a macro for that.

Signed-off-by: Arnaud Patard <apatard@mandriva.com>
Acked-by: Liam Girdwood <lrg@slimlogic.co.uk>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 include/sound/soc-dapm.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h
index 66ff4c124db..c5d9987bc89 100644
--- a/include/sound/soc-dapm.h
+++ b/include/sound/soc-dapm.h
@@ -273,6 +273,8 @@
 #define SND_SOC_DAPM_POST_PMD	0x8		/* after widget power down */
 #define SND_SOC_DAPM_PRE_REG	0x10	/* before audio path setup */
 #define SND_SOC_DAPM_POST_REG	0x20	/* after audio path setup */
+#define SND_SOC_DAPM_PRE_POST_PMD \
+				(SND_SOC_DAPM_PRE_PMD | SND_SOC_DAPM_POST_PMD)
 
 /* convenience event type detection */
 #define SND_SOC_DAPM_EVENT_ON(e)	\
-- 
cgit v1.2.3-70-g09d2


From 01d73a6967f12fe6c4bbde1834a9fe662264a2eb Mon Sep 17 00:00:00 2001
From: Jordan Crouse <jcrouse@codeaurora.org>
Date: Thu, 27 May 2010 13:40:24 -0600
Subject: drm: Remove drm_resource wrappers

Remove the drm_resource wrappers and directly use the
actual PCI and/or platform functions in their place.

[airlied: fixup nouveau properly to build]

Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
Reviewed-by: Matt Turner <mattst88@gmail.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_bufs.c                | 13 -------------
 drivers/gpu/drm/i915/i915_dma.c           |  6 +++---
 drivers/gpu/drm/mga/mga_dma.c             |  4 ++--
 drivers/gpu/drm/nouveau/nouveau_bo.c      |  2 +-
 drivers/gpu/drm/nouveau/nouveau_channel.c |  3 ++-
 drivers/gpu/drm/nouveau/nouveau_mem.c     | 16 +++++++++-------
 drivers/gpu/drm/nouveau/nv20_graph.c      |  4 ++--
 drivers/gpu/drm/nouveau/nv40_graph.c      |  2 +-
 drivers/gpu/drm/nouveau/nv50_instmem.c    |  2 +-
 drivers/gpu/drm/radeon/evergreen.c        |  4 ++--
 drivers/gpu/drm/radeon/r100.c             |  4 ++--
 drivers/gpu/drm/radeon/r600.c             |  4 ++--
 drivers/gpu/drm/radeon/radeon_bios.c      |  2 +-
 drivers/gpu/drm/radeon/radeon_cp.c        |  8 ++++----
 drivers/gpu/drm/radeon/radeon_device.c    |  4 ++--
 drivers/gpu/drm/radeon/rs600.c            |  4 ++--
 drivers/gpu/drm/radeon/rs690.c            |  4 ++--
 drivers/gpu/drm/radeon/rv770.c            |  4 ++--
 drivers/gpu/drm/savage/savage_bci.c       | 24 +++++++++++++-----------
 include/drm/drmP.h                        |  4 ----
 20 files changed, 53 insertions(+), 65 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_bufs.c b/drivers/gpu/drm/drm_bufs.c
index f7ba82ebf65..7783035871e 100644
--- a/drivers/gpu/drm/drm_bufs.c
+++ b/drivers/gpu/drm/drm_bufs.c
@@ -39,19 +39,6 @@
 #include <asm/shmparam.h>
 #include "drmP.h"
 
-resource_size_t drm_get_resource_start(struct drm_device *dev, unsigned int resource)
-{
-	return pci_resource_start(dev->pdev, resource);
-}
-EXPORT_SYMBOL(drm_get_resource_start);
-
-resource_size_t drm_get_resource_len(struct drm_device *dev, unsigned int resource)
-{
-	return pci_resource_len(dev->pdev, resource);
-}
-
-EXPORT_SYMBOL(drm_get_resource_len);
-
 static struct drm_map_list *drm_find_matching_map(struct drm_device *dev,
 						  struct drm_local_map *map)
 {
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 2a6b5de5ae5..9fe2d08d9e9 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1429,7 +1429,7 @@ static int i915_load_modeset_init(struct drm_device *dev,
 	int fb_bar = IS_I9XX(dev) ? 2 : 0;
 	int ret = 0;
 
-	dev->mode_config.fb_base = drm_get_resource_start(dev, fb_bar) &
+	dev->mode_config.fb_base = pci_resource_start(dev->pdev, fb_bar) &
 		0xff000000;
 
 	/* Basic memrange allocator for stolen space (aka vram) */
@@ -1612,8 +1612,8 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
 
 	/* Add register map (needed for suspend/resume) */
 	mmio_bar = IS_I9XX(dev) ? 0 : 1;
-	base = drm_get_resource_start(dev, mmio_bar);
-	size = drm_get_resource_len(dev, mmio_bar);
+	base = pci_resource_start(dev->pdev, mmio_bar);
+	size = pci_resource_len(dev->pdev, mmio_bar);
 
 	if (i915_get_bridge_dev(dev)) {
 		ret = -EIO;
diff --git a/drivers/gpu/drm/mga/mga_dma.c b/drivers/gpu/drm/mga/mga_dma.c
index 3c917fb3a60..ccc129c328a 100644
--- a/drivers/gpu/drm/mga/mga_dma.c
+++ b/drivers/gpu/drm/mga/mga_dma.c
@@ -405,8 +405,8 @@ int mga_driver_load(struct drm_device * dev, unsigned long flags)
 	dev_priv->usec_timeout = MGA_DEFAULT_USEC_TIMEOUT;
 	dev_priv->chipset = flags;
 
-	dev_priv->mmio_base = drm_get_resource_start(dev, 1);
-	dev_priv->mmio_size = drm_get_resource_len(dev, 1);
+	dev_priv->mmio_base = pci_resource_start(dev->pdev, 1);
+	dev_priv->mmio_size = pci_resource_len(dev->pdev, 1);
 
 	dev->counters += 3;
 	dev->types[6] = _DRM_STAT_IRQ;
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 6f3c1952237..9f5ab467775 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -783,7 +783,7 @@ nouveau_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem)
 		break;
 	case TTM_PL_VRAM:
 		mem->bus.offset = mem->mm_node->start << PAGE_SHIFT;
-		mem->bus.base = drm_get_resource_start(dev, 1);
+		mem->bus.base = pci_resource_start(dev->pdev, 1);
 		mem->bus.is_iomem = true;
 		break;
 	default:
diff --git a/drivers/gpu/drm/nouveau/nouveau_channel.c b/drivers/gpu/drm/nouveau/nouveau_channel.c
index 1fc57ef5829..06555c7cde5 100644
--- a/drivers/gpu/drm/nouveau/nouveau_channel.c
+++ b/drivers/gpu/drm/nouveau/nouveau_channel.c
@@ -62,7 +62,8 @@ nouveau_channel_pushbuf_ctxdma_init(struct nouveau_channel *chan)
 		 * VRAM.
 		 */
 		ret = nouveau_gpuobj_dma_new(chan, NV_CLASS_DMA_IN_MEMORY,
-					     drm_get_resource_start(dev, 1),
+					     pci_resource_start(dev->pdev,
+					     1),
 					     dev_priv->fb_available_size,
 					     NV_DMA_ACCESS_RO,
 					     NV_DMA_TARGET_PCI, &pushbuf);
diff --git a/drivers/gpu/drm/nouveau/nouveau_mem.c b/drivers/gpu/drm/nouveau/nouveau_mem.c
index 775a7017af6..37c7bf8e829 100644
--- a/drivers/gpu/drm/nouveau/nouveau_mem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_mem.c
@@ -471,8 +471,9 @@ void nouveau_mem_close(struct drm_device *dev)
 	}
 
 	if (dev_priv->fb_mtrr) {
-		drm_mtrr_del(dev_priv->fb_mtrr, drm_get_resource_start(dev, 1),
-			     drm_get_resource_len(dev, 1), DRM_MTRR_WC);
+		drm_mtrr_del(dev_priv->fb_mtrr,
+			     pci_resource_start(dev->pdev, 1),
+			     pci_resource_len(dev->pdev, 1), DRM_MTRR_WC);
 		dev_priv->fb_mtrr = 0;
 	}
 }
@@ -632,7 +633,7 @@ nouveau_mem_init(struct drm_device *dev)
 	struct ttm_bo_device *bdev = &dev_priv->ttm.bdev;
 	int ret, dma_bits = 32;
 
-	dev_priv->fb_phys = drm_get_resource_start(dev, 1);
+	dev_priv->fb_phys = pci_resource_start(dev->pdev, 1);
 	dev_priv->gart_info.type = NOUVEAU_GART_NONE;
 
 	if (dev_priv->card_type >= NV_50 &&
@@ -664,8 +665,9 @@ nouveau_mem_init(struct drm_device *dev)
 
 	dev_priv->fb_available_size = dev_priv->vram_size;
 	dev_priv->fb_mappable_pages = dev_priv->fb_available_size;
-	if (dev_priv->fb_mappable_pages > drm_get_resource_len(dev, 1))
-		dev_priv->fb_mappable_pages = drm_get_resource_len(dev, 1);
+	if (dev_priv->fb_mappable_pages > pci_resource_len(dev->pdev, 1))
+		dev_priv->fb_mappable_pages =
+			pci_resource_len(dev->pdev, 1);
 	dev_priv->fb_mappable_pages >>= PAGE_SHIFT;
 
 	/* remove reserved space at end of vram from available amount */
@@ -717,8 +719,8 @@ nouveau_mem_init(struct drm_device *dev)
 		return ret;
 	}
 
-	dev_priv->fb_mtrr = drm_mtrr_add(drm_get_resource_start(dev, 1),
-					 drm_get_resource_len(dev, 1),
+	dev_priv->fb_mtrr = drm_mtrr_add(pci_resource_start(dev->pdev, 1),
+					 pci_resource_len(dev->pdev, 1),
 					 DRM_MTRR_WC);
 
 	return 0;
diff --git a/drivers/gpu/drm/nouveau/nv20_graph.c b/drivers/gpu/drm/nouveau/nv20_graph.c
index d6fc0a82f03..fe2349b115f 100644
--- a/drivers/gpu/drm/nouveau/nv20_graph.c
+++ b/drivers/gpu/drm/nouveau/nv20_graph.c
@@ -616,7 +616,7 @@ nv20_graph_init(struct drm_device *dev)
 	nv_wr32(dev, NV10_PGRAPH_SURFACE, tmp);
 
 	/* begin RAM config */
-	vramsz = drm_get_resource_len(dev, 0) - 1;
+	vramsz = pci_resource_len(dev->pdev, 0) - 1;
 	nv_wr32(dev, 0x4009A4, nv_rd32(dev, NV04_PFB_CFG0));
 	nv_wr32(dev, 0x4009A8, nv_rd32(dev, NV04_PFB_CFG1));
 	nv_wr32(dev, NV10_PGRAPH_RDI_INDEX, 0x00EA0000);
@@ -717,7 +717,7 @@ nv30_graph_init(struct drm_device *dev)
 	nv_wr32(dev, 0x0040075c             , 0x00000001);
 
 	/* begin RAM config */
-	/* vramsz = drm_get_resource_len(dev, 0) - 1; */
+	/* vramsz = pci_resource_len(dev->pdev, 0) - 1; */
 	nv_wr32(dev, 0x4009A4, nv_rd32(dev, NV04_PFB_CFG0));
 	nv_wr32(dev, 0x4009A8, nv_rd32(dev, NV04_PFB_CFG1));
 	if (dev_priv->chipset != 0x34) {
diff --git a/drivers/gpu/drm/nouveau/nv40_graph.c b/drivers/gpu/drm/nouveau/nv40_graph.c
index 704a25d04ac..65b13b54c5a 100644
--- a/drivers/gpu/drm/nouveau/nv40_graph.c
+++ b/drivers/gpu/drm/nouveau/nv40_graph.c
@@ -367,7 +367,7 @@ nv40_graph_init(struct drm_device *dev)
 		nv40_graph_set_region_tiling(dev, i, 0, 0, 0);
 
 	/* begin RAM config */
-	vramsz = drm_get_resource_len(dev, 0) - 1;
+	vramsz = pci_resource_len(dev->pdev, 0) - 1;
 	switch (dev_priv->chipset) {
 	case 0x40:
 		nv_wr32(dev, 0x4009A4, nv_rd32(dev, NV04_PFB_CFG0));
diff --git a/drivers/gpu/drm/nouveau/nv50_instmem.c b/drivers/gpu/drm/nouveau/nv50_instmem.c
index 5f21df31f3a..71c01b6e573 100644
--- a/drivers/gpu/drm/nouveau/nv50_instmem.c
+++ b/drivers/gpu/drm/nouveau/nv50_instmem.c
@@ -241,7 +241,7 @@ nv50_instmem_init(struct drm_device *dev)
 		return ret;
 	BAR0_WI32(priv->fb_bar->gpuobj, 0x00, 0x7fc00000);
 	BAR0_WI32(priv->fb_bar->gpuobj, 0x04, 0x40000000 +
-					      drm_get_resource_len(dev, 1) - 1);
+					      pci_resource_len(dev->pdev, 1) - 1);
 	BAR0_WI32(priv->fb_bar->gpuobj, 0x08, 0x40000000);
 	BAR0_WI32(priv->fb_bar->gpuobj, 0x0c, 0x00000000);
 	BAR0_WI32(priv->fb_bar->gpuobj, 0x10, 0x00000000);
diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
index 8c8e4d3cbaa..a4745e49ecf 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -1300,8 +1300,8 @@ int evergreen_mc_init(struct radeon_device *rdev)
 	}
 	rdev->mc.vram_width = numchan * chansize;
 	/* Could aper size report 0 ? */
-	rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0);
-	rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0);
+	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
+	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
 	/* Setup GPU memory space */
 	/* size in MB on evergreen */
 	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index cc004b05d63..c485c2cec4d 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -2284,8 +2284,8 @@ void r100_vram_init_sizes(struct radeon_device *rdev)
 	u64 config_aper_size;
 
 	/* work out accessible VRAM */
-	rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0);
-	rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0);
+	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
+	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
 	rdev->mc.visible_vram_size = r100_get_accessible_vram(rdev);
 	/* FIXME we don't use the second aperture yet when we could use it */
 	if (rdev->mc.visible_vram_size > rdev->mc.aper_size)
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index 44e96a2ae25..4959619f885 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -1118,8 +1118,8 @@ int r600_mc_init(struct radeon_device *rdev)
 	}
 	rdev->mc.vram_width = numchan * chansize;
 	/* Could aper size report 0 ? */
-	rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0);
-	rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0);
+	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
+	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
 	/* Setup GPU memory space */
 	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE);
 	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE);
diff --git a/drivers/gpu/drm/radeon/radeon_bios.c b/drivers/gpu/drm/radeon/radeon_bios.c
index fbba938f804..91f5b5a29a9 100644
--- a/drivers/gpu/drm/radeon/radeon_bios.c
+++ b/drivers/gpu/drm/radeon/radeon_bios.c
@@ -49,7 +49,7 @@ static bool igp_read_bios_from_vram(struct radeon_device *rdev)
 	resource_size_t size = 256 * 1024; /* ??? */
 
 	rdev->bios = NULL;
-	vram_base = drm_get_resource_start(rdev->ddev, 0);
+	vram_base = pci_resource_start(rdev->pdev, 0);
 	bios = ioremap(vram_base, size);
 	if (!bios) {
 		return false;
diff --git a/drivers/gpu/drm/radeon/radeon_cp.c b/drivers/gpu/drm/radeon/radeon_cp.c
index 2f042a3c0e6..eb6b9eed734 100644
--- a/drivers/gpu/drm/radeon/radeon_cp.c
+++ b/drivers/gpu/drm/radeon/radeon_cp.c
@@ -2120,8 +2120,8 @@ int radeon_driver_load(struct drm_device *dev, unsigned long flags)
 	else
 		dev_priv->flags |= RADEON_IS_PCI;
 
-	ret = drm_addmap(dev, drm_get_resource_start(dev, 2),
-			 drm_get_resource_len(dev, 2), _DRM_REGISTERS,
+	ret = drm_addmap(dev, pci_resource_start(dev->pdev, 2),
+			 pci_resource_len(dev->pdev, 2), _DRM_REGISTERS,
 			 _DRM_READ_ONLY | _DRM_DRIVER, &dev_priv->mmio);
 	if (ret != 0)
 		return ret;
@@ -2194,9 +2194,9 @@ int radeon_driver_firstopen(struct drm_device *dev)
 
 	dev_priv->gart_info.table_size = RADEON_PCIGART_TABLE_SIZE;
 
-	dev_priv->fb_aper_offset = drm_get_resource_start(dev, 0);
+	dev_priv->fb_aper_offset = pci_resource_start(dev->pdev, 0);
 	ret = drm_addmap(dev, dev_priv->fb_aper_offset,
-			 drm_get_resource_len(dev, 0), _DRM_FRAME_BUFFER,
+			 pci_resource_len(dev->pdev, 0), _DRM_FRAME_BUFFER,
 			 _DRM_WRITE_COMBINING, &map);
 	if (ret != 0)
 		return ret;
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index fdc3fdf78ac..2a897a7ca26 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -648,8 +648,8 @@ int radeon_device_init(struct radeon_device *rdev,
 
 	/* Registers mapping */
 	/* TODO: block userspace mapping of io register */
-	rdev->rmmio_base = drm_get_resource_start(rdev->ddev, 2);
-	rdev->rmmio_size = drm_get_resource_len(rdev->ddev, 2);
+	rdev->rmmio_base = pci_resource_start(rdev->pdev, 2);
+	rdev->rmmio_size = pci_resource_len(rdev->pdev, 2);
 	rdev->rmmio = ioremap(rdev->rmmio_base, rdev->rmmio_size);
 	if (rdev->rmmio == NULL) {
 		return -ENOMEM;
diff --git a/drivers/gpu/drm/radeon/rs600.c b/drivers/gpu/drm/radeon/rs600.c
index 79887cac5b5..340c7611f2a 100644
--- a/drivers/gpu/drm/radeon/rs600.c
+++ b/drivers/gpu/drm/radeon/rs600.c
@@ -685,8 +685,8 @@ void rs600_mc_init(struct radeon_device *rdev)
 {
 	u64 base;
 
-	rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0);
-	rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0);
+	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
+	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
 	rdev->mc.vram_is_ddr = true;
 	rdev->mc.vram_width = 128;
 	rdev->mc.real_vram_size = RREG32(RADEON_CONFIG_MEMSIZE);
diff --git a/drivers/gpu/drm/radeon/rs690.c b/drivers/gpu/drm/radeon/rs690.c
index bcc33195ebc..a18ba98885f 100644
--- a/drivers/gpu/drm/radeon/rs690.c
+++ b/drivers/gpu/drm/radeon/rs690.c
@@ -151,8 +151,8 @@ void rs690_mc_init(struct radeon_device *rdev)
 	rdev->mc.vram_width = 128;
 	rdev->mc.real_vram_size = RREG32(RADEON_CONFIG_MEMSIZE);
 	rdev->mc.mc_vram_size = rdev->mc.real_vram_size;
-	rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0);
-	rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0);
+	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
+	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
 	base = RREG32_MC(R_000100_MCCFG_FB_LOCATION);
 	base = G_000100_MC_FB_START(base) << 16;
diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c
index 253f24aec03..5c7f0b97c6a 100644
--- a/drivers/gpu/drm/radeon/rv770.c
+++ b/drivers/gpu/drm/radeon/rv770.c
@@ -908,8 +908,8 @@ int rv770_mc_init(struct radeon_device *rdev)
 	}
 	rdev->mc.vram_width = numchan * chansize;
 	/* Could aper size report 0 ? */
-	rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0);
-	rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0);
+	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
+	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
 	/* Setup GPU memory space */
 	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE);
 	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE);
diff --git a/drivers/gpu/drm/savage/savage_bci.c b/drivers/gpu/drm/savage/savage_bci.c
index 2d0c9ca484c..f576232846c 100644
--- a/drivers/gpu/drm/savage/savage_bci.c
+++ b/drivers/gpu/drm/savage/savage_bci.c
@@ -573,13 +573,13 @@ int savage_driver_firstopen(struct drm_device *dev)
 	dev_priv->mtrr[2].handle = -1;
 	if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
 		fb_rsrc = 0;
-		fb_base = drm_get_resource_start(dev, 0);
+		fb_base = pci_resource_start(dev->pdev, 0);
 		fb_size = SAVAGE_FB_SIZE_S3;
 		mmio_base = fb_base + SAVAGE_FB_SIZE_S3;
 		aper_rsrc = 0;
 		aperture_base = fb_base + SAVAGE_APERTURE_OFFSET;
 		/* this should always be true */
-		if (drm_get_resource_len(dev, 0) == 0x08000000) {
+		if (pci_resource_len(dev->pdev, 0) == 0x08000000) {
 			/* Don't make MMIO write-cobining! We need 3
 			 * MTRRs. */
 			dev_priv->mtrr[0].base = fb_base;
@@ -599,18 +599,19 @@ int savage_driver_firstopen(struct drm_device *dev)
 					 dev_priv->mtrr[2].size, DRM_MTRR_WC);
 		} else {
 			DRM_ERROR("strange pci_resource_len %08llx\n",
-				  (unsigned long long)drm_get_resource_len(dev, 0));
+				  (unsigned long long)
+				  pci_resource_len(dev->pdev, 0));
 		}
 	} else if (dev_priv->chipset != S3_SUPERSAVAGE &&
 		   dev_priv->chipset != S3_SAVAGE2000) {
-		mmio_base = drm_get_resource_start(dev, 0);
+		mmio_base = pci_resource_start(dev->pdev, 0);
 		fb_rsrc = 1;
-		fb_base = drm_get_resource_start(dev, 1);
+		fb_base = pci_resource_start(dev->pdev, 1);
 		fb_size = SAVAGE_FB_SIZE_S4;
 		aper_rsrc = 1;
 		aperture_base = fb_base + SAVAGE_APERTURE_OFFSET;
 		/* this should always be true */
-		if (drm_get_resource_len(dev, 1) == 0x08000000) {
+		if (pci_resource_len(dev->pdev, 1) == 0x08000000) {
 			/* Can use one MTRR to cover both fb and
 			 * aperture. */
 			dev_priv->mtrr[0].base = fb_base;
@@ -620,15 +621,16 @@ int savage_driver_firstopen(struct drm_device *dev)
 					 dev_priv->mtrr[0].size, DRM_MTRR_WC);
 		} else {
 			DRM_ERROR("strange pci_resource_len %08llx\n",
-				  (unsigned long long)drm_get_resource_len(dev, 1));
+				  (unsigned long long)
+				  pci_resource_len(dev->pdev, 1));
 		}
 	} else {
-		mmio_base = drm_get_resource_start(dev, 0);
+		mmio_base = pci_resource_start(dev->pdev, 0);
 		fb_rsrc = 1;
-		fb_base = drm_get_resource_start(dev, 1);
-		fb_size = drm_get_resource_len(dev, 1);
+		fb_base = pci_resource_start(dev->pdev, 1);
+		fb_size = pci_resource_len(dev->pdev, 1);
 		aper_rsrc = 2;
-		aperture_base = drm_get_resource_start(dev, 2);
+		aperture_base = pci_resource_start(dev->pdev, 2);
 		/* Automatic MTRR setup will do the right thing. */
 	}
 
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index c1b987158df..8f7f5cb4a86 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -1273,10 +1273,6 @@ extern int drm_freebufs(struct drm_device *dev, void *data,
 extern int drm_mapbufs(struct drm_device *dev, void *data,
 		       struct drm_file *file_priv);
 extern int drm_order(unsigned long size);
-extern resource_size_t drm_get_resource_start(struct drm_device *dev,
-					      unsigned int resource);
-extern resource_size_t drm_get_resource_len(struct drm_device *dev,
-					    unsigned int resource);
 
 				/* DMA support (drm_dma.h) */
 extern int drm_dma_setup(struct drm_device *dev);
-- 
cgit v1.2.3-70-g09d2


From dcdb167402cbdca1d021bdfa5f63995ee0a79317 Mon Sep 17 00:00:00 2001
From: Jordan Crouse <jcrouse@codeaurora.org>
Date: Thu, 27 May 2010 13:40:25 -0600
Subject: drm: Add support for platform devices to register as DRM devices

Allow platform devices without PCI resources to be DRM devices.

[airlied: fixup warnings with dev pointers]

Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/Kconfig               |   4 +-
 drivers/gpu/drm/Makefile              |   2 +-
 drivers/gpu/drm/drm_drv.c             |  37 ++-------
 drivers/gpu/drm/drm_edid.c            |   4 +-
 drivers/gpu/drm/drm_info.c            |  23 ++++--
 drivers/gpu/drm/drm_ioctl.c           |  71 +++++++++++------
 drivers/gpu/drm/drm_irq.c             |  15 ++--
 drivers/gpu/drm/drm_pci.c             | 143 ++++++++++++++++++++++++++++++++++
 drivers/gpu/drm/drm_platform.c        | 122 +++++++++++++++++++++++++++++
 drivers/gpu/drm/drm_stub.c            |  89 +--------------------
 drivers/gpu/drm/drm_sysfs.c           |   3 +-
 drivers/gpu/drm/i915/i915_dma.c       |   1 +
 drivers/gpu/drm/i915/i915_drv.c       |   2 +-
 drivers/gpu/drm/nouveau/nouveau_drv.c |   2 +-
 drivers/gpu/drm/radeon/radeon_drv.c   |   2 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.c   |   2 +-
 include/drm/drmP.h                    |  52 +++++++++++--
 17 files changed, 402 insertions(+), 172 deletions(-)
 create mode 100644 drivers/gpu/drm/drm_platform.c

(limited to 'include')

diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index 88910e5a2c7..520ab23d8a3 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -6,7 +6,7 @@
 #
 menuconfig DRM
 	tristate "Direct Rendering Manager (XFree86 4.1.0 and higher DRI support)"
-	depends on (AGP || AGP=n) && PCI && !EMULATED_CMPXCHG && MMU
+	depends on (AGP || AGP=n) && !EMULATED_CMPXCHG && MMU
 	select I2C
 	select I2C_ALGOBIT
 	select SLOW_WORK
@@ -17,7 +17,7 @@ menuconfig DRM
 	  These modules provide support for synchronization, security, and
 	  DMA transfers. Please see <http://dri.sourceforge.net/> for more
 	  details.  You should also select and configure AGP
-	  (/dev/agpgart) support.
+	  (/dev/agpgart) support if it is available for your platform.
 
 config DRM_KMS_HELPER
 	tristate
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index abe3f446ca4..b4b2b480d0c 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -9,7 +9,7 @@ drm-y       :=	drm_auth.o drm_buffer.o drm_bufs.o drm_cache.o \
 		drm_drv.o drm_fops.o drm_gem.o drm_ioctl.o drm_irq.o \
 		drm_lock.o drm_memory.o drm_proc.o drm_stub.o drm_vm.o \
 		drm_agpsupport.o drm_scatter.o ati_pcigart.o drm_pci.o \
-		drm_sysfs.o drm_hashtab.o drm_sman.o drm_mm.o \
+		drm_platform.o drm_sysfs.o drm_hashtab.o drm_sman.o drm_mm.o \
 		drm_crtc.o drm_modes.o drm_edid.o \
 		drm_info.o drm_debugfs.o drm_encoder_slave.o
 
diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
index 4a66201edae..510bc87d98f 100644
--- a/drivers/gpu/drm/drm_drv.c
+++ b/drivers/gpu/drm/drm_drv.c
@@ -243,47 +243,20 @@ int drm_lastclose(struct drm_device * dev)
  *
  * Initializes an array of drm_device structures, and attempts to
  * initialize all available devices, using consecutive minors, registering the
- * stubs and initializing the AGP device.
+ * stubs and initializing the device.
  *
  * Expands the \c DRIVER_PREINIT and \c DRIVER_POST_INIT macros before and
  * after the initialization for driver customization.
  */
 int drm_init(struct drm_driver *driver)
 {
-	struct pci_dev *pdev = NULL;
-	const struct pci_device_id *pid;
-	int i;
-
 	DRM_DEBUG("\n");
-
 	INIT_LIST_HEAD(&driver->device_list);
 
-	if (driver->driver_features & DRIVER_MODESET)
-		return pci_register_driver(&driver->pci_driver);
-
-	/* If not using KMS, fall back to stealth mode manual scanning. */
-	for (i = 0; driver->pci_driver.id_table[i].vendor != 0; i++) {
-		pid = &driver->pci_driver.id_table[i];
-
-		/* Loop around setting up a DRM device for each PCI device
-		 * matching our ID and device class.  If we had the internal
-		 * function that pci_get_subsys and pci_get_class used, we'd
-		 * be able to just pass pid in instead of doing a two-stage
-		 * thing.
-		 */
-		pdev = NULL;
-		while ((pdev =
-			pci_get_subsys(pid->vendor, pid->device, pid->subvendor,
-				       pid->subdevice, pdev)) != NULL) {
-			if ((pdev->class & pid->class_mask) != pid->class)
-				continue;
-
-			/* stealth mode requires a manual probe */
-			pci_dev_get(pdev);
-			drm_get_dev(pdev, pid, driver);
-		}
-	}
-	return 0;
+	if (driver->driver_features & DRIVER_USE_PLATFORM_DEVICE)
+		return drm_platform_init(driver);
+	else
+		return drm_pci_init(driver);
 }
 
 EXPORT_SYMBOL(drm_init);
diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
index c1981861bbb..83d8072066c 100644
--- a/drivers/gpu/drm/drm_edid.c
+++ b/drivers/gpu/drm/drm_edid.c
@@ -282,7 +282,7 @@ drm_do_get_edid(struct drm_connector *connector, struct i2c_adapter *adapter)
 	return block;
 
 carp:
-	dev_warn(&connector->dev->pdev->dev, "%s: EDID block %d invalid.\n",
+	dev_warn(connector->dev->dev, "%s: EDID block %d invalid.\n",
 		 drm_get_connector_name(connector), j);
 
 out:
@@ -1626,7 +1626,7 @@ int drm_add_edid_modes(struct drm_connector *connector, struct edid *edid)
 		return 0;
 	}
 	if (!drm_edid_is_valid(edid)) {
-		dev_warn(&connector->dev->pdev->dev, "%s: EDID invalid.\n",
+		dev_warn(connector->dev->dev, "%s: EDID invalid.\n",
 			 drm_get_connector_name(connector));
 		return 0;
 	}
diff --git a/drivers/gpu/drm/drm_info.c b/drivers/gpu/drm/drm_info.c
index f0f6c6b93f3..2ef2c782724 100644
--- a/drivers/gpu/drm/drm_info.c
+++ b/drivers/gpu/drm/drm_info.c
@@ -51,13 +51,24 @@ int drm_name_info(struct seq_file *m, void *data)
 	if (!master)
 		return 0;
 
-	if (master->unique) {
-		seq_printf(m, "%s %s %s\n",
-			   dev->driver->pci_driver.name,
-			   pci_name(dev->pdev), master->unique);
+	if (drm_core_check_feature(dev, DRIVER_USE_PLATFORM_DEVICE)) {
+		if (master->unique) {
+			seq_printf(m, "%s %s %s\n",
+					dev->driver->platform_device->name,
+					dev_name(dev->dev), master->unique);
+		} else {
+			seq_printf(m, "%s\n",
+				dev->driver->platform_device->name);
+		}
 	} else {
-		seq_printf(m, "%s %s\n", dev->driver->pci_driver.name,
-			   pci_name(dev->pdev));
+		if (master->unique) {
+			seq_printf(m, "%s %s %s\n",
+				dev->driver->pci_driver.name,
+				dev_name(dev->dev), master->unique);
+		} else {
+			seq_printf(m, "%s %s\n", dev->driver->pci_driver.name,
+				dev_name(dev->dev));
+		}
 	}
 
 	return 0;
diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
index 9b9ff46c237..76d3d18056d 100644
--- a/drivers/gpu/drm/drm_ioctl.c
+++ b/drivers/gpu/drm/drm_ioctl.c
@@ -132,32 +132,57 @@ static int drm_set_busid(struct drm_device *dev, struct drm_file *file_priv)
 	struct drm_master *master = file_priv->master;
 	int len;
 
-	if (master->unique != NULL)
-		return -EBUSY;
-
-	master->unique_len = 40;
-	master->unique_size = master->unique_len;
-	master->unique = kmalloc(master->unique_size, GFP_KERNEL);
-	if (master->unique == NULL)
-		return -ENOMEM;
+	if (drm_core_check_feature(dev, DRIVER_USE_PLATFORM_DEVICE)) {
+		master->unique_len = 10 + strlen(dev->platformdev->name);
+		master->unique = kmalloc(master->unique_len + 1, GFP_KERNEL);
+
+		if (master->unique == NULL)
+			return -ENOMEM;
+
+		len = snprintf(master->unique, master->unique_len,
+			"platform:%s", dev->platformdev->name);
+
+		if (len > master->unique_len)
+			DRM_ERROR("Unique buffer overflowed\n");
+
+		dev->devname =
+			kmalloc(strlen(dev->platformdev->name) +
+				master->unique_len + 2, GFP_KERNEL);
+
+		if (dev->devname == NULL)
+			return -ENOMEM;
+
+		sprintf(dev->devname, "%s@%s", dev->platformdev->name,
+			master->unique);
+
+	} else {
+		master->unique_len = 40;
+		master->unique_size = master->unique_len;
+		master->unique = kmalloc(master->unique_size, GFP_KERNEL);
+		if (master->unique == NULL)
+			return -ENOMEM;
+
+		len = snprintf(master->unique, master->unique_len,
+			"pci:%04x:%02x:%02x.%d",
+			drm_get_pci_domain(dev),
+			dev->pdev->bus->number,
+			PCI_SLOT(dev->pdev->devfn),
+			PCI_FUNC(dev->pdev->devfn));
+		if (len >= master->unique_len)
+			DRM_ERROR("buffer overflow");
+		else
+			master->unique_len = len;
 
-	len = snprintf(master->unique, master->unique_len, "pci:%04x:%02x:%02x.%d",
-		       drm_get_pci_domain(dev),
-		       dev->pdev->bus->number,
-		       PCI_SLOT(dev->pdev->devfn),
-		       PCI_FUNC(dev->pdev->devfn));
-	if (len >= master->unique_len)
-		DRM_ERROR("buffer overflow");
-	else
-		master->unique_len = len;
+		dev->devname =
+			kmalloc(strlen(dev->driver->pci_driver.name) +
+				master->unique_len + 2, GFP_KERNEL);
 
-	dev->devname = kmalloc(strlen(dev->driver->pci_driver.name) +
-			       master->unique_len + 2, GFP_KERNEL);
-	if (dev->devname == NULL)
-		return -ENOMEM;
+		if (dev->devname == NULL)
+			return -ENOMEM;
 
-	sprintf(dev->devname, "%s@%s", dev->driver->pci_driver.name,
-		master->unique);
+		sprintf(dev->devname, "%s@%s", dev->driver->pci_driver.name,
+			master->unique);
+	}
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c
index a263b7070fc..6353b625e09 100644
--- a/drivers/gpu/drm/drm_irq.c
+++ b/drivers/gpu/drm/drm_irq.c
@@ -57,6 +57,9 @@ int drm_irq_by_busid(struct drm_device *dev, void *data,
 {
 	struct drm_irq_busid *p = data;
 
+	if (drm_core_check_feature(dev, DRIVER_USE_PLATFORM_DEVICE))
+		return -EINVAL;
+
 	if (!drm_core_check_feature(dev, DRIVER_HAVE_IRQ))
 		return -EINVAL;
 
@@ -211,7 +214,7 @@ int drm_irq_install(struct drm_device *dev)
 	if (!drm_core_check_feature(dev, DRIVER_HAVE_IRQ))
 		return -EINVAL;
 
-	if (dev->pdev->irq == 0)
+	if (drm_dev_to_irq(dev) == 0)
 		return -EINVAL;
 
 	mutex_lock(&dev->struct_mutex);
@@ -229,7 +232,7 @@ int drm_irq_install(struct drm_device *dev)
 	dev->irq_enabled = 1;
 	mutex_unlock(&dev->struct_mutex);
 
-	DRM_DEBUG("irq=%d\n", dev->pdev->irq);
+	DRM_DEBUG("irq=%d\n", drm_dev_to_irq(dev));
 
 	/* Before installing handler */
 	dev->driver->irq_preinstall(dev);
@@ -302,14 +305,14 @@ int drm_irq_uninstall(struct drm_device * dev)
 	if (!irq_enabled)
 		return -EINVAL;
 
-	DRM_DEBUG("irq=%d\n", dev->pdev->irq);
+	DRM_DEBUG("irq=%d\n", drm_dev_to_irq(dev));
 
 	if (!drm_core_check_feature(dev, DRIVER_MODESET))
 		vga_client_register(dev->pdev, NULL, NULL, NULL);
 
 	dev->driver->irq_uninstall(dev);
 
-	free_irq(dev->pdev->irq, dev);
+	free_irq(drm_dev_to_irq(dev), dev);
 
 	return 0;
 }
@@ -341,7 +344,7 @@ int drm_control(struct drm_device *dev, void *data,
 		if (drm_core_check_feature(dev, DRIVER_MODESET))
 			return 0;
 		if (dev->if_version < DRM_IF_VERSION(1, 2) &&
-		    ctl->irq != dev->pdev->irq)
+		    ctl->irq != drm_dev_to_irq(dev))
 			return -EINVAL;
 		return drm_irq_install(dev);
 	case DRM_UNINST_HANDLER:
@@ -651,7 +654,7 @@ int drm_wait_vblank(struct drm_device *dev, void *data,
 	int ret = 0;
 	unsigned int flags, seq, crtc;
 
-	if ((!dev->pdev->irq) || (!dev->irq_enabled))
+	if ((!drm_dev_to_irq(dev)) || (!dev->irq_enabled))
 		return -EINVAL;
 
 	if (vblwait->request.type & _DRM_VBLANK_SIGNAL)
diff --git a/drivers/gpu/drm/drm_pci.c b/drivers/gpu/drm/drm_pci.c
index 2ea9ad4a8d6..e20f78b542a 100644
--- a/drivers/gpu/drm/drm_pci.c
+++ b/drivers/gpu/drm/drm_pci.c
@@ -124,4 +124,147 @@ void drm_pci_free(struct drm_device * dev, drm_dma_handle_t * dmah)
 
 EXPORT_SYMBOL(drm_pci_free);
 
+#ifdef CONFIG_PCI
+/**
+ * Register.
+ *
+ * \param pdev - PCI device structure
+ * \param ent entry from the PCI ID table with device type flags
+ * \return zero on success or a negative number on failure.
+ *
+ * Attempt to gets inter module "drm" information. If we are first
+ * then register the character device and inter module information.
+ * Try and register, if we fail to register, backout previous work.
+ */
+int drm_get_pci_dev(struct pci_dev *pdev, const struct pci_device_id *ent,
+		    struct drm_driver *driver)
+{
+	struct drm_device *dev;
+	int ret;
+
+	DRM_DEBUG("\n");
+
+	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+	if (!dev)
+		return -ENOMEM;
+
+	ret = pci_enable_device(pdev);
+	if (ret)
+		goto err_g1;
+
+	pci_set_master(pdev);
+
+	dev->pdev = pdev;
+	dev->dev = &pdev->dev;
+
+	dev->pci_device = pdev->device;
+	dev->pci_vendor = pdev->vendor;
+
+#ifdef __alpha__
+	dev->hose = pdev->sysdata;
+#endif
+
+	if ((ret = drm_fill_in_dev(dev, ent, driver))) {
+		printk(KERN_ERR "DRM: Fill_in_dev failed.\n");
+		goto err_g2;
+	}
+
+	if (drm_core_check_feature(dev, DRIVER_MODESET)) {
+		pci_set_drvdata(pdev, dev);
+		ret = drm_get_minor(dev, &dev->control, DRM_MINOR_CONTROL);
+		if (ret)
+			goto err_g2;
+	}
+
+	if ((ret = drm_get_minor(dev, &dev->primary, DRM_MINOR_LEGACY)))
+		goto err_g3;
+
+	if (dev->driver->load) {
+		ret = dev->driver->load(dev, ent->driver_data);
+		if (ret)
+			goto err_g4;
+	}
+
+	/* setup the grouping for the legacy output */
+	if (drm_core_check_feature(dev, DRIVER_MODESET)) {
+		ret = drm_mode_group_init_legacy_group(dev,
+						&dev->primary->mode_group);
+		if (ret)
+			goto err_g4;
+	}
+
+	list_add_tail(&dev->driver_item, &driver->device_list);
+
+	DRM_INFO("Initialized %s %d.%d.%d %s for %s on minor %d\n",
+		 driver->name, driver->major, driver->minor, driver->patchlevel,
+		 driver->date, pci_name(pdev), dev->primary->index);
+
+	return 0;
+
+err_g4:
+	drm_put_minor(&dev->primary);
+err_g3:
+	if (drm_core_check_feature(dev, DRIVER_MODESET))
+		drm_put_minor(&dev->control);
+err_g2:
+	pci_disable_device(pdev);
+err_g1:
+	kfree(dev);
+	return ret;
+}
+EXPORT_SYMBOL(drm_get_pci_dev);
+
+/**
+ * PCI device initialization. Called via drm_init at module load time,
+ *
+ * \return zero on success or a negative number on failure.
+ *
+ * Initializes a drm_device structures,registering the
+ * stubs and initializing the AGP device.
+ *
+ * Expands the \c DRIVER_PREINIT and \c DRIVER_POST_INIT macros before and
+ * after the initialization for driver customization.
+ */
+int drm_pci_init(struct drm_driver *driver)
+{
+	struct pci_dev *pdev = NULL;
+	const struct pci_device_id *pid;
+	int i;
+
+	if (driver->driver_features & DRIVER_MODESET)
+		return pci_register_driver(&driver->pci_driver);
+
+	/* If not using KMS, fall back to stealth mode manual scanning. */
+	for (i = 0; driver->pci_driver.id_table[i].vendor != 0; i++) {
+		pid = &driver->pci_driver.id_table[i];
+
+		/* Loop around setting up a DRM device for each PCI device
+		 * matching our ID and device class.  If we had the internal
+		 * function that pci_get_subsys and pci_get_class used, we'd
+		 * be able to just pass pid in instead of doing a two-stage
+		 * thing.
+		 */
+		pdev = NULL;
+		while ((pdev =
+			pci_get_subsys(pid->vendor, pid->device, pid->subvendor,
+				       pid->subdevice, pdev)) != NULL) {
+			if ((pdev->class & pid->class_mask) != pid->class)
+				continue;
+
+			/* stealth mode requires a manual probe */
+			pci_dev_get(pdev);
+			drm_get_pci_dev(pdev, pid, driver);
+		}
+	}
+	return 0;
+}
+
+#else
+
+int drm_pci_init(struct drm_driver *driver)
+{
+	return -1;
+}
+
+#endif
 /*@}*/
diff --git a/drivers/gpu/drm/drm_platform.c b/drivers/gpu/drm/drm_platform.c
new file mode 100644
index 00000000000..460e9a3afa8
--- /dev/null
+++ b/drivers/gpu/drm/drm_platform.c
@@ -0,0 +1,122 @@
+/*
+ * Derived from drm_pci.c
+ *
+ * Copyright 2003 José Fonseca.
+ * Copyright 2003 Leif Delgass.
+ * Copyright (c) 2009, Code Aurora Forum.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "drmP.h"
+
+/**
+ * Register.
+ *
+ * \param platdev - Platform device struture
+ * \return zero on success or a negative number on failure.
+ *
+ * Attempt to gets inter module "drm" information. If we are first
+ * then register the character device and inter module information.
+ * Try and register, if we fail to register, backout previous work.
+ */
+
+int drm_get_platform_dev(struct platform_device *platdev,
+			 struct drm_driver *driver)
+{
+	struct drm_device *dev;
+	int ret;
+
+	DRM_DEBUG("\n");
+
+	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+	if (!dev)
+		return -ENOMEM;
+
+	dev->platformdev = platdev;
+	dev->dev = &platdev->dev;
+
+	ret = drm_fill_in_dev(dev, NULL, driver);
+
+	if (ret) {
+		printk(KERN_ERR "DRM: Fill_in_dev failed.\n");
+		goto err_g1;
+	}
+
+	if (drm_core_check_feature(dev, DRIVER_MODESET)) {
+		dev_set_drvdata(&platdev->dev, dev);
+		ret = drm_get_minor(dev, &dev->control, DRM_MINOR_CONTROL);
+		if (ret)
+			goto err_g1;
+	}
+
+	ret = drm_get_minor(dev, &dev->primary, DRM_MINOR_LEGACY);
+	if (ret)
+		goto err_g2;
+
+	if (dev->driver->load) {
+		ret = dev->driver->load(dev, 0);
+		if (ret)
+			goto err_g3;
+	}
+
+	/* setup the grouping for the legacy output */
+	if (drm_core_check_feature(dev, DRIVER_MODESET)) {
+		ret = drm_mode_group_init_legacy_group(dev,
+				&dev->primary->mode_group);
+		if (ret)
+			goto err_g3;
+	}
+
+	list_add_tail(&dev->driver_item, &driver->device_list);
+
+	DRM_INFO("Initialized %s %d.%d.%d %s on minor %d\n",
+		 driver->name, driver->major, driver->minor, driver->patchlevel,
+		 driver->date, dev->primary->index);
+
+	return 0;
+
+err_g3:
+	drm_put_minor(&dev->primary);
+err_g2:
+	if (drm_core_check_feature(dev, DRIVER_MODESET))
+		drm_put_minor(&dev->control);
+err_g1:
+	kfree(dev);
+	return ret;
+}
+EXPORT_SYMBOL(drm_get_platform_dev);
+
+/**
+ * Platform device initialization. Called via drm_init at module load time,
+ *
+ * \return zero on success or a negative number on failure.
+ *
+ * Initializes a drm_device structures,registering the
+ * stubs
+ *
+ * Expands the \c DRIVER_PREINIT and \c DRIVER_POST_INIT macros before and
+ * after the initialization for driver customization.
+ */
+
+int drm_platform_init(struct drm_driver *driver)
+{
+	return drm_get_platform_dev(driver->platform_device, driver);
+}
diff --git a/drivers/gpu/drm/drm_stub.c b/drivers/gpu/drm/drm_stub.c
index a0c365f2e52..63575e2fa88 100644
--- a/drivers/gpu/drm/drm_stub.c
+++ b/drivers/gpu/drm/drm_stub.c
@@ -224,7 +224,7 @@ int drm_dropmaster_ioctl(struct drm_device *dev, void *data,
 	return 0;
 }
 
-static int drm_fill_in_dev(struct drm_device * dev, struct pci_dev *pdev,
+int drm_fill_in_dev(struct drm_device *dev,
 			   const struct pci_device_id *ent,
 			   struct drm_driver *driver)
 {
@@ -245,14 +245,6 @@ static int drm_fill_in_dev(struct drm_device * dev, struct pci_dev *pdev,
 
 	idr_init(&dev->drw_idr);
 
-	dev->pdev = pdev;
-	dev->pci_device = pdev->device;
-	dev->pci_vendor = pdev->vendor;
-
-#ifdef __alpha__
-	dev->hose = pdev->sysdata;
-#endif
-
 	if (drm_ht_create(&dev->map_hash, 12)) {
 		return -ENOMEM;
 	}
@@ -321,7 +313,7 @@ static int drm_fill_in_dev(struct drm_device * dev, struct pci_dev *pdev,
  * create the proc init entry via proc_init(). This routines assigns
  * minor numbers to secondary heads of multi-headed cards
  */
-static int drm_get_minor(struct drm_device *dev, struct drm_minor **minor, int type)
+int drm_get_minor(struct drm_device *dev, struct drm_minor **minor, int type)
 {
 	struct drm_minor *new_minor;
 	int ret;
@@ -387,83 +379,6 @@ err_idr:
 	return ret;
 }
 
-/**
- * Register.
- *
- * \param pdev - PCI device structure
- * \param ent entry from the PCI ID table with device type flags
- * \return zero on success or a negative number on failure.
- *
- * Attempt to gets inter module "drm" information. If we are first
- * then register the character device and inter module information.
- * Try and register, if we fail to register, backout previous work.
- */
-int drm_get_dev(struct pci_dev *pdev, const struct pci_device_id *ent,
-		struct drm_driver *driver)
-{
-	struct drm_device *dev;
-	int ret;
-
-	DRM_DEBUG("\n");
-
-	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
-	if (!dev)
-		return -ENOMEM;
-
-	ret = pci_enable_device(pdev);
-	if (ret)
-		goto err_g1;
-
-	pci_set_master(pdev);
-	if ((ret = drm_fill_in_dev(dev, pdev, ent, driver))) {
-		printk(KERN_ERR "DRM: Fill_in_dev failed.\n");
-		goto err_g2;
-	}
-
-	if (drm_core_check_feature(dev, DRIVER_MODESET)) {
-		pci_set_drvdata(pdev, dev);
-		ret = drm_get_minor(dev, &dev->control, DRM_MINOR_CONTROL);
-		if (ret)
-			goto err_g2;
-	}
-
-	if ((ret = drm_get_minor(dev, &dev->primary, DRM_MINOR_LEGACY)))
-		goto err_g3;
-
-	if (dev->driver->load) {
-		ret = dev->driver->load(dev, ent->driver_data);
-		if (ret)
-			goto err_g4;
-	}
-
-        /* setup the grouping for the legacy output */
-	if (drm_core_check_feature(dev, DRIVER_MODESET)) {
-		ret = drm_mode_group_init_legacy_group(dev, &dev->primary->mode_group);
-		if (ret)
-			goto err_g4;
-	}
-
-	list_add_tail(&dev->driver_item, &driver->device_list);
-
-	DRM_INFO("Initialized %s %d.%d.%d %s for %s on minor %d\n",
-		 driver->name, driver->major, driver->minor, driver->patchlevel,
-		 driver->date, pci_name(pdev), dev->primary->index);
-
-	return 0;
-
-err_g4:
-	drm_put_minor(&dev->primary);
-err_g3:
-	if (drm_core_check_feature(dev, DRIVER_MODESET))
-		drm_put_minor(&dev->control);
-err_g2:
-	pci_disable_device(pdev);
-err_g1:
-	kfree(dev);
-	return ret;
-}
-EXPORT_SYMBOL(drm_get_dev);
-
 /**
  * Put a secondary minor number.
  *
diff --git a/drivers/gpu/drm/drm_sysfs.c b/drivers/gpu/drm/drm_sysfs.c
index 3a3a451d0bf..14d9d829ef2 100644
--- a/drivers/gpu/drm/drm_sysfs.c
+++ b/drivers/gpu/drm/drm_sysfs.c
@@ -488,7 +488,8 @@ int drm_sysfs_device_add(struct drm_minor *minor)
 	int err;
 	char *minor_str;
 
-	minor->kdev.parent = &minor->dev->pdev->dev;
+	minor->kdev.parent = minor->dev->dev;
+
 	minor->kdev.class = drm_class;
 	minor->kdev.release = drm_sysfs_device_release;
 	minor->kdev.devt = minor->device;
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 9fe2d08d9e9..9bed5617e0e 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -34,6 +34,7 @@
 #include "i915_drm.h"
 #include "i915_drv.h"
 #include "i915_trace.h"
+#include <linux/pci.h>
 #include <linux/vgaarb.h>
 #include <linux/acpi.h>
 #include <linux/pnp.h>
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 5c51e45ab68..b7aecf5ea1f 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -435,7 +435,7 @@ int i965_reset(struct drm_device *dev, u8 flags)
 static int __devinit
 i915_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
-	return drm_get_dev(pdev, ent, &driver);
+	return drm_get_pci_dev(pdev, ent, &driver);
 }
 
 static void
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.c b/drivers/gpu/drm/nouveau/nouveau_drv.c
index c6079e36669..f60a2b2ae44 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.c
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.c
@@ -132,7 +132,7 @@ static struct drm_driver driver;
 static int __devinit
 nouveau_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
-	return drm_get_dev(pdev, ent, &driver);
+	return drm_get_pci_dev(pdev, ent, &driver);
 }
 
 static void
diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c
index 4afba1eca2a..683e281b409 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.c
+++ b/drivers/gpu/drm/radeon/radeon_drv.c
@@ -236,7 +236,7 @@ static struct drm_driver kms_driver;
 static int __devinit
 radeon_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
-	return drm_get_dev(pdev, ent, &kms_driver);
+	return drm_get_pci_dev(pdev, ent, &kms_driver);
 }
 
 static void
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index 0c9c0811f42..f7f248dbff5 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -758,7 +758,7 @@ static struct drm_driver driver = {
 
 static int vmw_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
-	return drm_get_dev(pdev, ent, &driver);
+	return drm_get_pci_dev(pdev, ent, &driver);
 }
 
 static int __init vmwgfx_init(void)
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index 8f7f5cb4a86..6235169d595 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -9,6 +9,7 @@
 /*
  * Copyright 1999 Precision Insight, Inc., Cedar Park, Texas.
  * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California.
+ * Copyright (c) 2009-2010, Code Aurora Forum.
  * All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
@@ -48,6 +49,7 @@
 #include <linux/proc_fs.h>
 #include <linux/init.h>
 #include <linux/file.h>
+#include <linux/platform_device.h>
 #include <linux/pci.h>
 #include <linux/jiffies.h>
 #include <linux/smp_lock.h>	/* For (un)lock_kernel */
@@ -144,6 +146,7 @@ extern void drm_ut_debug_printk(unsigned int request_level,
 #define DRIVER_IRQ_VBL2    0x800
 #define DRIVER_GEM         0x1000
 #define DRIVER_MODESET     0x2000
+#define DRIVER_USE_PLATFORM_DEVICE  0x4000
 
 /***********************************************************************/
 /** \name Begin the DRM... */
@@ -823,6 +826,7 @@ struct drm_driver {
 	int num_ioctls;
 	struct file_operations fops;
 	struct pci_driver pci_driver;
+	struct platform_device *platform_device;
 	/* List of devices hanging off this driver */
 	struct list_head device_list;
 };
@@ -1015,12 +1019,16 @@ struct drm_device {
 
 	struct drm_agp_head *agp;	/**< AGP data */
 
+	struct device *dev;             /**< Device structure */
 	struct pci_dev *pdev;		/**< PCI device structure */
 	int pci_vendor;			/**< PCI vendor id */
 	int pci_device;			/**< PCI device id */
 #ifdef __alpha__
 	struct pci_controller *hose;
 #endif
+
+	struct platform_device *platformdev; /**< Platform device struture */
+
 	struct drm_sg_mem *sg;	/**< Scatter gather memory */
 	int num_crtcs;                  /**< Number of CRTCs on this device */
 	void *dev_private;		/**< device private data */
@@ -1060,17 +1068,21 @@ struct drm_device {
 
 };
 
-static inline int drm_dev_to_irq(struct drm_device *dev)
-{
-	return dev->pdev->irq;
-}
-
 static __inline__ int drm_core_check_feature(struct drm_device *dev,
 					     int feature)
 {
 	return ((dev->driver->driver_features & feature) ? 1 : 0);
 }
 
+
+static inline int drm_dev_to_irq(struct drm_device *dev)
+{
+	if (drm_core_check_feature(dev, DRIVER_USE_PLATFORM_DEVICE))
+		return platform_get_irq(dev->platformdev, 0);
+	else
+		return dev->pdev->irq;
+}
+
 #ifdef __alpha__
 #define drm_get_pci_domain(dev) dev->hose->index
 #else
@@ -1347,8 +1359,11 @@ extern int drm_dropmaster_ioctl(struct drm_device *dev, void *data,
 struct drm_master *drm_master_create(struct drm_minor *minor);
 extern struct drm_master *drm_master_get(struct drm_master *master);
 extern void drm_master_put(struct drm_master **master);
-extern int drm_get_dev(struct pci_dev *pdev, const struct pci_device_id *ent,
-		       struct drm_driver *driver);
+extern int drm_get_pci_dev(struct pci_dev *pdev,
+			   const struct pci_device_id *ent,
+			   struct drm_driver *driver);
+extern int drm_get_platform_dev(struct platform_device *pdev,
+				struct drm_driver *driver);
 extern void drm_put_dev(struct drm_device *dev);
 extern int drm_put_minor(struct drm_minor **minor);
 extern unsigned int drm_debug;
@@ -1525,6 +1540,9 @@ static __inline__ struct drm_local_map *drm_core_findmap(struct drm_device *dev,
 
 static __inline__ int drm_device_is_agp(struct drm_device *dev)
 {
+	if (drm_core_check_feature(dev, DRIVER_USE_PLATFORM_DEVICE))
+		return 0;
+
 	if (dev->driver->device_is_agp != NULL) {
 		int err = (*dev->driver->device_is_agp) (dev);
 
@@ -1538,7 +1556,10 @@ static __inline__ int drm_device_is_agp(struct drm_device *dev)
 
 static __inline__ int drm_device_is_pcie(struct drm_device *dev)
 {
-	return pci_find_capability(dev->pdev, PCI_CAP_ID_EXP);
+	if (drm_core_check_feature(dev, DRIVER_USE_PLATFORM_DEVICE))
+		return 0;
+	else
+		return pci_find_capability(dev->pdev, PCI_CAP_ID_EXP);
 }
 
 static __inline__ void drm_core_dropmap(struct drm_local_map *map)
@@ -1546,6 +1567,21 @@ static __inline__ void drm_core_dropmap(struct drm_local_map *map)
 }
 
 #include "drm_mem_util.h"
+
+static inline void *drm_get_device(struct drm_device *dev)
+{
+	if (drm_core_check_feature(dev, DRIVER_USE_PLATFORM_DEVICE))
+		return dev->platformdev;
+	else
+		return dev->pdev;
+}
+
+extern int drm_platform_init(struct drm_driver *driver);
+extern int drm_pci_init(struct drm_driver *driver);
+extern int drm_fill_in_dev(struct drm_device *dev,
+			   const struct pci_device_id *ent,
+			   struct drm_driver *driver);
+int drm_get_minor(struct drm_device *dev, struct drm_minor **minor, int type);
 /*@}*/
 
 #endif				/* __KERNEL__ */
-- 
cgit v1.2.3-70-g09d2


From bc135b23d01acf7ee926aaf75b0020c86d3869f9 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 2 Jun 2010 03:23:51 -0700
Subject: net: Define accessors to manipulate QDISC_STATE_RUNNING

Define three helpers to manipulate QDISC_STATE_RUNNIG flag, that a
second patch will move on another location.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_sched.h   |  2 +-
 include/net/sch_generic.h | 15 +++++++++++++++
 net/core/dev.c            |  4 ++--
 net/sched/sch_generic.c   |  4 ++--
 4 files changed, 20 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index 9d4d87cc970..d9549af6929 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -95,7 +95,7 @@ extern void __qdisc_run(struct Qdisc *q);
 
 static inline void qdisc_run(struct Qdisc *q)
 {
-	if (!test_and_set_bit(__QDISC_STATE_RUNNING, &q->state))
+	if (qdisc_run_begin(q))
 		__qdisc_run(q);
 }
 
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 03ca5d82675..9707daed761 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -76,6 +76,21 @@ struct Qdisc {
 	struct rcu_head     rcu_head;
 };
 
+static inline bool qdisc_is_running(struct Qdisc *qdisc)
+{
+	return test_bit(__QDISC_STATE_RUNNING, &qdisc->state);
+}
+
+static inline bool qdisc_run_begin(struct Qdisc *qdisc)
+{
+	return !test_and_set_bit(__QDISC_STATE_RUNNING, &qdisc->state);
+}
+
+static inline void qdisc_run_end(struct Qdisc *qdisc)
+{
+	clear_bit(__QDISC_STATE_RUNNING, &qdisc->state);
+}
+
 struct Qdisc_class_ops {
 	/* Child qdisc manipulation */
 	struct netdev_queue *	(*select_queue)(struct Qdisc *, struct tcmsg *);
diff --git a/net/core/dev.c b/net/core/dev.c
index 983a3c1d65c..2733226d90b 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2047,7 +2047,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
 		kfree_skb(skb);
 		rc = NET_XMIT_DROP;
 	} else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) &&
-		   !test_and_set_bit(__QDISC_STATE_RUNNING, &q->state)) {
+		   qdisc_run_begin(q)) {
 		/*
 		 * This is a work-conserving queue; there are no old skbs
 		 * waiting to be sent out; and the qdisc is not running -
@@ -2059,7 +2059,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
 		if (sch_direct_xmit(skb, q, dev, txq, root_lock))
 			__qdisc_run(q);
 		else
-			clear_bit(__QDISC_STATE_RUNNING, &q->state);
+			qdisc_run_end(q);
 
 		rc = NET_XMIT_SUCCESS;
 	} else {
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index bd1892fe4b2..37b86eab677 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -205,7 +205,7 @@ void __qdisc_run(struct Qdisc *q)
 		}
 	}
 
-	clear_bit(__QDISC_STATE_RUNNING, &q->state);
+	qdisc_run_end(q);
 }
 
 unsigned long dev_trans_start(struct net_device *dev)
@@ -797,7 +797,7 @@ static bool some_qdisc_is_busy(struct net_device *dev)
 
 		spin_lock_bh(root_lock);
 
-		val = (test_bit(__QDISC_STATE_RUNNING, &q->state) ||
+		val = (qdisc_is_running(q) ||
 		       test_bit(__QDISC_STATE_SCHED, &q->state));
 
 		spin_unlock_bh(root_lock);
-- 
cgit v1.2.3-70-g09d2


From 371121057607e3127e19b3fa094330181b5b031e Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 2 Jun 2010 03:24:13 -0700
Subject: net: QDISC_STATE_RUNNING dont need atomic bit ops

__QDISC_STATE_RUNNING is always changed while qdisc lock is held.

We can avoid two atomic operations in xmit path, if we move this bit in
a new __state container.

Location of this __state container is carefully chosen so that fast path
only dirties one qdisc cache line.

THROTTLED bit could later be moved into this __state location too, to
avoid dirtying first qdisc cache line.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 9707daed761..b3591e4a514 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -23,11 +23,17 @@ struct qdisc_rate_table {
 };
 
 enum qdisc_state_t {
-	__QDISC_STATE_RUNNING,
 	__QDISC_STATE_SCHED,
 	__QDISC_STATE_DEACTIVATED,
 };
 
+/*
+ * following bits are only changed while qdisc lock is held
+ */
+enum qdisc___state_t {
+	__QDISC___STATE_RUNNING,
+};
+
 struct qdisc_size_table {
 	struct list_head	list;
 	struct tc_sizespec	szopts;
@@ -72,23 +78,24 @@ struct Qdisc {
 	unsigned long		state;
 	struct sk_buff_head	q;
 	struct gnet_stats_basic_packed bstats;
+	unsigned long		__state;
 	struct gnet_stats_queue	qstats;
 	struct rcu_head     rcu_head;
 };
 
 static inline bool qdisc_is_running(struct Qdisc *qdisc)
 {
-	return test_bit(__QDISC_STATE_RUNNING, &qdisc->state);
+	return test_bit(__QDISC___STATE_RUNNING, &qdisc->__state);
 }
 
 static inline bool qdisc_run_begin(struct Qdisc *qdisc)
 {
-	return !test_and_set_bit(__QDISC_STATE_RUNNING, &qdisc->state);
+	return !__test_and_set_bit(__QDISC___STATE_RUNNING, &qdisc->__state);
 }
 
 static inline void qdisc_run_end(struct Qdisc *qdisc)
 {
-	clear_bit(__QDISC_STATE_RUNNING, &qdisc->state);
+	__clear_bit(__QDISC___STATE_RUNNING, &qdisc->__state);
 }
 
 struct Qdisc_class_ops {
-- 
cgit v1.2.3-70-g09d2


From 79640a4ca6955e3ebdb7038508fa7a0cd7fa5527 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 2 Jun 2010 05:09:29 -0700
Subject: net: add additional lock to qdisc to increase throughput

When many cpus compete for sending frames on a given qdisc, the qdisc
spinlock suffers from very high contention.

The cpu owning __QDISC_STATE_RUNNING bit has same priority to acquire
the lock, and cannot dequeue packets fast enough, since it must wait for
this lock for each dequeued packet.

One solution to this problem is to force all cpus spinning on a second
lock before trying to get the main lock, when/if they see
__QDISC_STATE_RUNNING already set.

The owning cpu then compete with at most one other cpu for the main
lock, allowing for higher dequeueing rate.

Based on a previous patch from Alexander Duyck. I added the heuristic to
avoid the atomic in fast path, and put the new lock far away from the
cache line used by the dequeue worker. Also try to release the busylock
lock as late as possible.

Tests with following script gave a boost from ~50.000 pps to ~600.000
pps on a dual quad core machine (E5450 @3.00GHz), tg3 driver.
(A single netperf flow can reach ~800.000 pps on this platform)

for j in `seq 0 3`; do
  for i in `seq 0 7`; do
    netperf -H 192.168.0.1 -t UDP_STREAM -l 60 -N -T $i -- -m 6 &
  done
done

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h |  3 ++-
 net/core/dev.c            | 29 +++++++++++++++++++++++++----
 net/sched/sch_generic.c   |  1 +
 3 files changed, 28 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index b3591e4a514..b35301b0c7b 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -80,7 +80,8 @@ struct Qdisc {
 	struct gnet_stats_basic_packed bstats;
 	unsigned long		__state;
 	struct gnet_stats_queue	qstats;
-	struct rcu_head     rcu_head;
+	struct rcu_head		rcu_head;
+	spinlock_t		busylock;
 };
 
 static inline bool qdisc_is_running(struct Qdisc *qdisc)
diff --git a/net/core/dev.c b/net/core/dev.c
index 2733226d90b..ffca5c1066f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2040,8 +2040,18 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
 				 struct netdev_queue *txq)
 {
 	spinlock_t *root_lock = qdisc_lock(q);
+	bool contended = qdisc_is_running(q);
 	int rc;
 
+	/*
+	 * Heuristic to force contended enqueues to serialize on a
+	 * separate lock before trying to get qdisc main lock.
+	 * This permits __QDISC_STATE_RUNNING owner to get the lock more often
+	 * and dequeue packets faster.
+	 */
+	if (unlikely(contended))
+		spin_lock(&q->busylock);
+
 	spin_lock(root_lock);
 	if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
 		kfree_skb(skb);
@@ -2056,19 +2066,30 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
 		if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE))
 			skb_dst_force(skb);
 		__qdisc_update_bstats(q, skb->len);
-		if (sch_direct_xmit(skb, q, dev, txq, root_lock))
+		if (sch_direct_xmit(skb, q, dev, txq, root_lock)) {
+			if (unlikely(contended)) {
+				spin_unlock(&q->busylock);
+				contended = false;
+			}
 			__qdisc_run(q);
-		else
+		} else
 			qdisc_run_end(q);
 
 		rc = NET_XMIT_SUCCESS;
 	} else {
 		skb_dst_force(skb);
 		rc = qdisc_enqueue_root(skb, q);
-		qdisc_run(q);
+		if (qdisc_run_begin(q)) {
+			if (unlikely(contended)) {
+				spin_unlock(&q->busylock);
+				contended = false;
+			}
+			__qdisc_run(q);
+		}
 	}
 	spin_unlock(root_lock);
-
+	if (unlikely(contended))
+		spin_unlock(&q->busylock);
 	return rc;
 }
 
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 37b86eab677..d20fcd2a551 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -561,6 +561,7 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
 
 	INIT_LIST_HEAD(&sch->list);
 	skb_queue_head_init(&sch->q);
+	spin_lock_init(&sch->busylock);
 	sch->ops = ops;
 	sch->enqueue = ops->enqueue;
 	sch->dequeue = ops->dequeue;
-- 
cgit v1.2.3-70-g09d2


From c2d9ba9bce8d7323ca96f239e1f505c14d6244fb Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 1 Jun 2010 06:51:19 +0000
Subject: net: CONFIG_NET_NS reduction

Use read_pnet() and write_pnet() to reduce number of ifdef CONFIG_NET_NS

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h            |  6 +-----
 include/net/cfg80211.h               | 15 ++-------------
 include/net/genetlink.h              | 15 ++-------------
 include/net/netfilter/nf_conntrack.h |  6 +-----
 include/net/sock.h                   | 10 ++--------
 net/ipv6/addrlabel.c                 |  6 +-----
 net/netfilter/nf_conntrack_core.c    |  8 ++------
 7 files changed, 11 insertions(+), 55 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index a24916156f4..bd6b75317d5 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1087,11 +1087,7 @@ static inline void netdev_for_each_tx_queue(struct net_device *dev,
 static inline
 struct net *dev_net(const struct net_device *dev)
 {
-#ifdef CONFIG_NET_NS
-	return dev->nd_net;
-#else
-	return &init_net;
-#endif
+	return read_pnet(&dev->nd_net);
 }
 
 static inline
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index b44a2e5321a..e7ebeb8bdf7 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1330,26 +1330,15 @@ struct wiphy {
 	char priv[0] __attribute__((__aligned__(NETDEV_ALIGN)));
 };
 
-#ifdef CONFIG_NET_NS
-static inline struct net *wiphy_net(struct wiphy *wiphy)
-{
-	return wiphy->_net;
-}
-
-static inline void wiphy_net_set(struct wiphy *wiphy, struct net *net)
-{
-	wiphy->_net = net;
-}
-#else
 static inline struct net *wiphy_net(struct wiphy *wiphy)
 {
-	return &init_net;
+	return read_pnet(&wiphy->_net);
 }
 
 static inline void wiphy_net_set(struct wiphy *wiphy, struct net *net)
 {
+	write_pnet(&wiphy->_net, net);
 }
-#endif
 
 /**
  * wiphy_priv - return priv from wiphy
diff --git a/include/net/genetlink.h b/include/net/genetlink.h
index eb551baafc0..f7dcd2c7041 100644
--- a/include/net/genetlink.h
+++ b/include/net/genetlink.h
@@ -68,26 +68,15 @@ struct genl_info {
 #endif
 };
 
-#ifdef CONFIG_NET_NS
 static inline struct net *genl_info_net(struct genl_info *info)
 {
-	return info->_net;
+	return read_pnet(&info->_net);
 }
 
 static inline void genl_info_net_set(struct genl_info *info, struct net *net)
 {
-	info->_net = net;
+	write_pnet(&info->_net, net);
 }
-#else
-static inline struct net *genl_info_net(struct genl_info *info)
-{
-	return &init_net;
-}
-
-static inline void genl_info_net_set(struct genl_info *info, struct net *net)
-{
-}
-#endif
 
 /**
  * struct genl_ops - generic netlink operations
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index bde095f7e84..bbfdd945308 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -152,11 +152,7 @@ extern struct net init_net;
 
 static inline struct net *nf_ct_net(const struct nf_conn *ct)
 {
-#ifdef CONFIG_NET_NS
-	return ct->ct_net;
-#else
-	return &init_net;
-#endif
+	return read_pnet(&ct->ct_net);
 }
 
 /* Alter reply tuple (maybe alter helper). */
diff --git a/include/net/sock.h b/include/net/sock.h
index ca241ea1487..3461e5d1e9a 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1724,19 +1724,13 @@ static inline void sk_eat_skb(struct sock *sk, struct sk_buff *skb, int copied_e
 static inline
 struct net *sock_net(const struct sock *sk)
 {
-#ifdef CONFIG_NET_NS
-	return sk->sk_net;
-#else
-	return &init_net;
-#endif
+	return read_pnet(&sk->sk_net);
 }
 
 static inline
 void sock_net_set(struct sock *sk, struct net *net)
 {
-#ifdef CONFIG_NET_NS
-	sk->sk_net = net;
-#endif
+	write_pnet(&sk->sk_net, net);
 }
 
 /*
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index 8c4348cb195..f0e774cea38 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -53,11 +53,7 @@ static struct ip6addrlbl_table
 static inline
 struct net *ip6addrlbl_net(const struct ip6addrlbl_entry *lbl)
 {
-#ifdef CONFIG_NET_NS
-	return lbl->lbl_net;
-#else
-	return &init_net;
-#endif
+	return read_pnet(&lbl->lbl_net);
 }
 
 /*
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index eeeb8bc7398..77288980fae 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -619,9 +619,7 @@ struct nf_conn *nf_conntrack_alloc(struct net *net, u16 zone,
 	ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev = NULL;
 	/* Don't set timer yet: wait for confirmation */
 	setup_timer(&ct->timeout, death_by_timeout, (unsigned long)ct);
-#ifdef CONFIG_NET_NS
-	ct->ct_net = net;
-#endif
+	write_pnet(&ct->ct_net, net);
 #ifdef CONFIG_NF_CONNTRACK_ZONES
 	if (zone) {
 		struct nf_conntrack_zone *nf_ct_zone;
@@ -1363,9 +1361,7 @@ static int nf_conntrack_init_init_net(void)
 		goto err_extend;
 #endif
 	/* Set up fake conntrack: to never be deleted, not in any hashes */
-#ifdef CONFIG_NET_NS
-	nf_conntrack_untracked.ct_net = &init_net;
-#endif
+	write_pnet(&nf_conntrack_untracked.ct_net, &init_net);
 	atomic_set(&nf_conntrack_untracked.ct_general.use, 1);
 	/*  - and look it like as a confirmed connection */
 	set_bit(IPS_CONFIRMED_BIT, &nf_conntrack_untracked.status);
-- 
cgit v1.2.3-70-g09d2


From 614f60fa9d73a9e8fdff3df83381907fea7c5649 Mon Sep 17 00:00:00 2001
From: Scott McMillan <scott.a.mcmillan@intel.com>
Date: Wed, 2 Jun 2010 05:53:56 -0700
Subject: packet_mmap: expose hw packet timestamps to network packet capture
 utilities

This patch adds a setting, PACKET_TIMESTAMP, to specify the packet
timestamp source that is exported to capture utilities like tcpdump by
packet_mmap.

PACKET_TIMESTAMP accepts the same integer bit field as
SO_TIMESTAMPING.  However, only the SOF_TIMESTAMPING_SYS_HARDWARE and
SOF_TIMESTAMPING_RAW_HARDWARE values are currently recognized by
PACKET_TIMESTAMP.  SOF_TIMESTAMPING_SYS_HARDWARE takes precedence over
SOF_TIMESTAMPING_RAW_HARDWARE if both bits are set.

If PACKET_TIMESTAMP is not set, a software timestamp generated inside
the networking stack is used (the behavior before this setting was
added).

Signed-off-by: Scott McMillan <scott.a.mcmillan@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/packet_mmap.txt | 26 ++++++++++++++++++++++
 include/linux/if_packet.h                |  1 +
 net/packet/af_packet.c                   | 37 ++++++++++++++++++++++++++++++--
 3 files changed, 62 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/Documentation/networking/packet_mmap.txt b/Documentation/networking/packet_mmap.txt
index 98f71a5cef0..2546aa4dc23 100644
--- a/Documentation/networking/packet_mmap.txt
+++ b/Documentation/networking/packet_mmap.txt
@@ -493,6 +493,32 @@ The user can also use poll() to check if a buffer is available:
     pfd.events = POLLOUT;
     retval = poll(&pfd, 1, timeout);
 
+-------------------------------------------------------------------------------
++ PACKET_TIMESTAMP
+-------------------------------------------------------------------------------
+
+The PACKET_TIMESTAMP setting determines the source of the timestamp in
+the packet meta information.  If your NIC is capable of timestamping
+packets in hardware, you can request those hardware timestamps to used.
+Note: you may need to enable the generation of hardware timestamps with
+SIOCSHWTSTAMP.
+
+PACKET_TIMESTAMP accepts the same integer bit field as
+SO_TIMESTAMPING.  However, only the SOF_TIMESTAMPING_SYS_HARDWARE
+and SOF_TIMESTAMPING_RAW_HARDWARE values are recognized by
+PACKET_TIMESTAMP.  SOF_TIMESTAMPING_SYS_HARDWARE takes precedence over
+SOF_TIMESTAMPING_RAW_HARDWARE if both bits are set.
+
+    int req = 0;
+    req |= SOF_TIMESTAMPING_SYS_HARDWARE;
+    setsockopt(fd, SOL_PACKET, PACKET_TIMESTAMP, (void *) &req, sizeof(req))
+
+If PACKET_TIMESTAMP is not set, a software timestamp generated inside
+the networking stack is used (the behavior before this setting was added).
+
+See include/linux/net_tstamp.h and Documentation/networking/timestamping
+for more information on hardware timestamps.
+
 --------------------------------------------------------------------------------
 + THANKS
 --------------------------------------------------------------------------------
diff --git a/include/linux/if_packet.h b/include/linux/if_packet.h
index 6ac23ef1801..72bfa5a034d 100644
--- a/include/linux/if_packet.h
+++ b/include/linux/if_packet.h
@@ -48,6 +48,7 @@ struct sockaddr_ll {
 #define PACKET_LOSS			14
 #define PACKET_VNET_HDR			15
 #define PACKET_TX_TIMESTAMP		16
+#define PACKET_TIMESTAMP		17
 
 struct tpacket_stats {
 	unsigned int	tp_packets;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 2078a277e06..9a17f28b125 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -83,6 +83,7 @@
 #include <linux/if_vlan.h>
 #include <linux/virtio_net.h>
 #include <linux/errqueue.h>
+#include <linux/net_tstamp.h>
 
 #ifdef CONFIG_INET
 #include <net/inet_common.h>
@@ -202,6 +203,7 @@ struct packet_sock {
 	unsigned int		tp_hdrlen;
 	unsigned int		tp_reserve;
 	unsigned int		tp_loss:1;
+	unsigned int		tp_tstamp;
 	struct packet_type	prot_hook ____cacheline_aligned_in_smp;
 };
 
@@ -656,6 +658,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
 	struct sk_buff *copy_skb = NULL;
 	struct timeval tv;
 	struct timespec ts;
+	struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb);
 
 	if (skb->pkt_type == PACKET_LOOPBACK)
 		goto drop;
@@ -737,7 +740,13 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
 		h.h1->tp_snaplen = snaplen;
 		h.h1->tp_mac = macoff;
 		h.h1->tp_net = netoff;
-		if (skb->tstamp.tv64)
+		if ((po->tp_tstamp & SOF_TIMESTAMPING_SYS_HARDWARE)
+				&& shhwtstamps->syststamp.tv64)
+			tv = ktime_to_timeval(shhwtstamps->syststamp);
+		else if ((po->tp_tstamp & SOF_TIMESTAMPING_RAW_HARDWARE)
+				&& shhwtstamps->hwtstamp.tv64)
+			tv = ktime_to_timeval(shhwtstamps->hwtstamp);
+		else if (skb->tstamp.tv64)
 			tv = ktime_to_timeval(skb->tstamp);
 		else
 			do_gettimeofday(&tv);
@@ -750,7 +759,13 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
 		h.h2->tp_snaplen = snaplen;
 		h.h2->tp_mac = macoff;
 		h.h2->tp_net = netoff;
-		if (skb->tstamp.tv64)
+		if ((po->tp_tstamp & SOF_TIMESTAMPING_SYS_HARDWARE)
+				&& shhwtstamps->syststamp.tv64)
+			ts = ktime_to_timespec(shhwtstamps->syststamp);
+		else if ((po->tp_tstamp & SOF_TIMESTAMPING_RAW_HARDWARE)
+				&& shhwtstamps->hwtstamp.tv64)
+			ts = ktime_to_timespec(shhwtstamps->hwtstamp);
+		else if (skb->tstamp.tv64)
 			ts = ktime_to_timespec(skb->tstamp);
 		else
 			getnstimeofday(&ts);
@@ -2027,6 +2042,18 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
 		po->has_vnet_hdr = !!val;
 		return 0;
 	}
+	case PACKET_TIMESTAMP:
+	{
+		int val;
+
+		if (optlen != sizeof(val))
+			return -EINVAL;
+		if (copy_from_user(&val, optval, sizeof(val)))
+			return -EFAULT;
+
+		po->tp_tstamp = val;
+		return 0;
+	}
 	default:
 		return -ENOPROTOOPT;
 	}
@@ -2119,6 +2146,12 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
 		val = po->tp_loss;
 		data = &val;
 		break;
+	case PACKET_TIMESTAMP:
+		if (len > sizeof(int))
+			len = sizeof(int);
+		val = po->tp_tstamp;
+		data = &val;
+		break;
 	default:
 		return -ENOPROTOOPT;
 	}
-- 
cgit v1.2.3-70-g09d2


From 20c59de2e6b6bc74bbf714dcd4e720afe8d516cf Mon Sep 17 00:00:00 2001
From: Arnaud Ebalard <arno@natisbad.org>
Date: Tue, 1 Jun 2010 21:35:01 +0000
Subject: ipv6: Refactor update of IPv6 flowi destination address for srcrt
 (RH) option

There are more than a dozen occurrences of following code in the
IPv6 stack:

    if (opt && opt->srcrt) {
            struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
            ipv6_addr_copy(&final, &fl.fl6_dst);
            ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
            final_p = &final;
    }

Replace those with a helper. Note that the helper overrides final_p
in all cases. This is ok as final_p was previously initialized to
NULL when declared.

Signed-off-by: Arnaud Ebalard <arno@natisbad.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ipv6.h               |  4 ++++
 net/dccp/ipv6.c                  | 30 ++++++------------------------
 net/ipv6/af_inet6.c              |  9 ++-------
 net/ipv6/datagram.c              | 18 ++++--------------
 net/ipv6/exthdrs.c               | 24 ++++++++++++++++++++++++
 net/ipv6/inet6_connection_sock.c |  9 ++-------
 net/ipv6/raw.c                   | 10 ++--------
 net/ipv6/syncookies.c            |  9 ++-------
 net/ipv6/tcp_ipv6.c              | 27 ++++++---------------------
 net/ipv6/udp.c                   | 11 +++--------
 10 files changed, 55 insertions(+), 96 deletions(-)

(limited to 'include')

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 2600b69757b..f5808d596aa 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -551,6 +551,10 @@ extern int 			ipv6_ext_hdr(u8 nexthdr);
 
 extern int ipv6_find_tlv(struct sk_buff *skb, int offset, int type);
 
+extern struct in6_addr *fl6_update_dst(struct flowi *fl,
+				       const struct ipv6_txoptions *opt,
+				       struct in6_addr *orig);
+
 /*
  *	socket options (ipv6_sockglue.c)
  */
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 09169889959..6e3f32575df 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -248,7 +248,7 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req,
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct sk_buff *skb;
 	struct ipv6_txoptions *opt = NULL;
-	struct in6_addr *final_p = NULL, final;
+	struct in6_addr *final_p, final;
 	struct flowi fl;
 	int err = -1;
 	struct dst_entry *dst;
@@ -265,13 +265,7 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req,
 
 	opt = np->opt;
 
-	if (opt != NULL && opt->srcrt != NULL) {
-		const struct rt0_hdr *rt0 = (struct rt0_hdr *)opt->srcrt;
-
-		ipv6_addr_copy(&final, &fl.fl6_dst);
-		ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
-		final_p = &final;
-	}
+	final_p = fl6_update_dst(&fl, opt, &final);
 
 	err = ip6_dst_lookup(sk, &dst, &fl);
 	if (err)
@@ -545,19 +539,13 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
 		goto out_overflow;
 
 	if (dst == NULL) {
-		struct in6_addr *final_p = NULL, final;
+		struct in6_addr *final_p, final;
 		struct flowi fl;
 
 		memset(&fl, 0, sizeof(fl));
 		fl.proto = IPPROTO_DCCP;
 		ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr);
-		if (opt != NULL && opt->srcrt != NULL) {
-			const struct rt0_hdr *rt0 = (struct rt0_hdr *)opt->srcrt;
-
-			ipv6_addr_copy(&final, &fl.fl6_dst);
-			ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
-			final_p = &final;
-		}
+		final_p = fl6_update_dst(&fl, opt, &final);
 		ipv6_addr_copy(&fl.fl6_src, &ireq6->loc_addr);
 		fl.oif = sk->sk_bound_dev_if;
 		fl.fl_ip_dport = inet_rsk(req)->rmt_port;
@@ -885,7 +873,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 	struct inet_sock *inet = inet_sk(sk);
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct dccp_sock *dp = dccp_sk(sk);
-	struct in6_addr *saddr = NULL, *final_p = NULL, final;
+	struct in6_addr *saddr = NULL, *final_p, final;
 	struct flowi fl;
 	struct dst_entry *dst;
 	int addr_type;
@@ -988,13 +976,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 	fl.fl_ip_sport = inet->inet_sport;
 	security_sk_classify_flow(sk, &fl);
 
-	if (np->opt != NULL && np->opt->srcrt != NULL) {
-		const struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
-
-		ipv6_addr_copy(&final, &fl.fl6_dst);
-		ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
-		final_p = &final;
-	}
+	final_p = fl6_update_dst(&fl, np->opt, &final);
 
 	err = ip6_dst_lookup(sk, &dst, &fl);
 	if (err)
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index e733942dafe..94b1b9c954b 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -651,7 +651,7 @@ int inet6_sk_rebuild_header(struct sock *sk)
 
 	if (dst == NULL) {
 		struct inet_sock *inet = inet_sk(sk);
-		struct in6_addr *final_p = NULL, final;
+		struct in6_addr *final_p, final;
 		struct flowi fl;
 
 		memset(&fl, 0, sizeof(fl));
@@ -665,12 +665,7 @@ int inet6_sk_rebuild_header(struct sock *sk)
 		fl.fl_ip_sport = inet->inet_sport;
 		security_sk_classify_flow(sk, &fl);
 
-		if (np->opt && np->opt->srcrt) {
-			struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
-			ipv6_addr_copy(&final, &fl.fl6_dst);
-			ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
-			final_p = &final;
-		}
+		final_p = fl6_update_dst(&fl, np->opt, &final);
 
 		err = ip6_dst_lookup(sk, &dst, &fl);
 		if (err) {
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 712684687c9..7d929a22cbc 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -38,10 +38,11 @@ int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	struct sockaddr_in6	*usin = (struct sockaddr_in6 *) uaddr;
 	struct inet_sock      	*inet = inet_sk(sk);
 	struct ipv6_pinfo      	*np = inet6_sk(sk);
-	struct in6_addr		*daddr, *final_p = NULL, final;
+	struct in6_addr		*daddr, *final_p, final;
 	struct dst_entry	*dst;
 	struct flowi		fl;
 	struct ip6_flowlabel	*flowlabel = NULL;
+	struct ipv6_txoptions   *opt;
 	int			addr_type;
 	int			err;
 
@@ -155,19 +156,8 @@ ipv4_connected:
 
 	security_sk_classify_flow(sk, &fl);
 
-	if (flowlabel) {
-		if (flowlabel->opt && flowlabel->opt->srcrt) {
-			struct rt0_hdr *rt0 = (struct rt0_hdr *) flowlabel->opt->srcrt;
-			ipv6_addr_copy(&final, &fl.fl6_dst);
-			ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
-			final_p = &final;
-		}
-	} else if (np->opt && np->opt->srcrt) {
-		struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
-		ipv6_addr_copy(&final, &fl.fl6_dst);
-		ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
-		final_p = &final;
-	}
+	opt = flowlabel ? flowlabel->opt : np->opt;
+	final_p = fl6_update_dst(&fl, opt, &final);
 
 	err = ip6_dst_lookup(sk, &dst, &fl);
 	if (err)
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 8a659f92d17..853a633a94d 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -874,3 +874,27 @@ struct ipv6_txoptions *ipv6_fixup_options(struct ipv6_txoptions *opt_space,
 	return opt;
 }
 
+/**
+ * fl6_update_dst - update flowi destination address with info given
+ *                  by srcrt option, if any.
+ *
+ * @fl: flowi for which fl6_dst is to be updated
+ * @opt: struct ipv6_txoptions in which to look for srcrt opt
+ * @orig: copy of original fl6_dst address if modified
+ *
+ * Returns NULL if no txoptions or no srcrt, otherwise returns orig
+ * and initial value of fl->fl6_dst set in orig
+ */
+struct in6_addr *fl6_update_dst(struct flowi *fl,
+				const struct ipv6_txoptions *opt,
+				struct in6_addr *orig)
+{
+	if (!opt || !opt->srcrt)
+		return NULL;
+
+	ipv6_addr_copy(orig, &fl->fl6_dst);
+	ipv6_addr_copy(&fl->fl6_dst, ((struct rt0_hdr *)opt->srcrt)->addr);
+	return orig;
+}
+
+EXPORT_SYMBOL_GPL(fl6_update_dst);
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index 0c5e3c3b7fd..8a1628023bd 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -185,7 +185,7 @@ int inet6_csk_xmit(struct sk_buff *skb)
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct flowi fl;
 	struct dst_entry *dst;
-	struct in6_addr *final_p = NULL, final;
+	struct in6_addr *final_p, final;
 
 	memset(&fl, 0, sizeof(fl));
 	fl.proto = sk->sk_protocol;
@@ -199,12 +199,7 @@ int inet6_csk_xmit(struct sk_buff *skb)
 	fl.fl_ip_dport = inet->inet_dport;
 	security_sk_classify_flow(sk, &fl);
 
-	if (np->opt && np->opt->srcrt) {
-		struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
-		ipv6_addr_copy(&final, &fl.fl6_dst);
-		ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
-		final_p = &final;
-	}
+	final_p = fl6_update_dst(&fl, np->opt, &final);
 
 	dst = __inet6_csk_dst_check(sk, np->dst_cookie);
 
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 4a4dcbe4f8b..864eb8e03b1 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -725,7 +725,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
 {
 	struct ipv6_txoptions opt_space;
 	struct sockaddr_in6 * sin6 = (struct sockaddr_in6 *) msg->msg_name;
-	struct in6_addr *daddr, *final_p = NULL, final;
+	struct in6_addr *daddr, *final_p, final;
 	struct inet_sock *inet = inet_sk(sk);
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct raw6_sock *rp = raw6_sk(sk);
@@ -847,13 +847,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
 	if (ipv6_addr_any(&fl.fl6_src) && !ipv6_addr_any(&np->saddr))
 		ipv6_addr_copy(&fl.fl6_src, &np->saddr);
 
-	/* merge ip6_build_xmit from ip6_output */
-	if (opt && opt->srcrt) {
-		struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
-		ipv6_addr_copy(&final, &fl.fl6_dst);
-		ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
-		final_p = &final;
-	}
+	final_p = fl6_update_dst(&fl, opt, &final);
 
 	if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst))
 		fl.oif = np->mcast_oif;
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 34d1f0690d7..12383705dba 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -240,17 +240,12 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 	 * me if there is a preferred way.
 	 */
 	{
-		struct in6_addr *final_p = NULL, final;
+		struct in6_addr *final_p, final;
 		struct flowi fl;
 		memset(&fl, 0, sizeof(fl));
 		fl.proto = IPPROTO_TCP;
 		ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr);
-		if (np->opt && np->opt->srcrt) {
-			struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
-			ipv6_addr_copy(&final, &fl.fl6_dst);
-			ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
-			final_p = &final;
-		}
+		final_p = fl6_update_dst(&fl, np->opt, &final);
 		ipv6_addr_copy(&fl.fl6_src, &ireq6->loc_addr);
 		fl.oif = sk->sk_bound_dev_if;
 		fl.mark = sk->sk_mark;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 2b7c3a100e2..e487080d02d 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -129,7 +129,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
-	struct in6_addr *saddr = NULL, *final_p = NULL, final;
+	struct in6_addr *saddr = NULL, *final_p, final;
 	struct flowi fl;
 	struct dst_entry *dst;
 	int addr_type;
@@ -250,12 +250,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 	fl.fl_ip_dport = usin->sin6_port;
 	fl.fl_ip_sport = inet->inet_sport;
 
-	if (np->opt && np->opt->srcrt) {
-		struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
-		ipv6_addr_copy(&final, &fl.fl6_dst);
-		ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
-		final_p = &final;
-	}
+	final_p = fl6_update_dst(&fl, np->opt, &final);
 
 	security_sk_classify_flow(sk, &fl);
 
@@ -477,7 +472,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct sk_buff * skb;
 	struct ipv6_txoptions *opt = NULL;
-	struct in6_addr * final_p = NULL, final;
+	struct in6_addr * final_p, final;
 	struct flowi fl;
 	struct dst_entry *dst;
 	int err = -1;
@@ -494,12 +489,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
 	security_req_classify_flow(req, &fl);
 
 	opt = np->opt;
-	if (opt && opt->srcrt) {
-		struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
-		ipv6_addr_copy(&final, &fl.fl6_dst);
-		ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
-		final_p = &final;
-	}
+	final_p = fl6_update_dst(&fl, opt, &final);
 
 	err = ip6_dst_lookup(sk, &dst, &fl);
 	if (err)
@@ -1392,18 +1382,13 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 		goto out_overflow;
 
 	if (dst == NULL) {
-		struct in6_addr *final_p = NULL, final;
+		struct in6_addr *final_p, final;
 		struct flowi fl;
 
 		memset(&fl, 0, sizeof(fl));
 		fl.proto = IPPROTO_TCP;
 		ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
-		if (opt && opt->srcrt) {
-			struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
-			ipv6_addr_copy(&final, &fl.fl6_dst);
-			ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
-			final_p = &final;
-		}
+		final_p = fl6_update_dst(&fl, opt, &final);
 		ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
 		fl.oif = sk->sk_bound_dev_if;
 		fl.mark = sk->sk_mark;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 3048f906c04..4aea57dec75 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -929,7 +929,7 @@ int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk,
 	struct inet_sock *inet = inet_sk(sk);
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) msg->msg_name;
-	struct in6_addr *daddr, *final_p = NULL, final;
+	struct in6_addr *daddr, *final_p, final;
 	struct ipv6_txoptions *opt = NULL;
 	struct ip6_flowlabel *flowlabel = NULL;
 	struct flowi fl;
@@ -1099,14 +1099,9 @@ do_udp_sendmsg:
 		ipv6_addr_copy(&fl.fl6_src, &np->saddr);
 	fl.fl_ip_sport = inet->inet_sport;
 
-	/* merge ip6_build_xmit from ip6_output */
-	if (opt && opt->srcrt) {
-		struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
-		ipv6_addr_copy(&final, &fl.fl6_dst);
-		ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
-		final_p = &final;
+	final_p = fl6_update_dst(&fl, opt, &final);
+	if (final_p)
 		connected = 0;
-	}
 
 	if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst)) {
 		fl.oif = np->mcast_oif;
-- 
cgit v1.2.3-70-g09d2


From ab95bfe01f9872459c8678572ccadbf646badad0 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Tue, 1 Jun 2010 21:52:08 +0000
Subject: net: replace hooks in __netif_receive_skb V5

What this patch does is it removes two receive frame hooks (for bridge and for
macvlan) from __netif_receive_skb. These are replaced them with a single
hook for both. It only supports one hook per device because it makes no
sense to do bridging and macvlan on the same device.

Then a network driver (of virtual netdev like macvlan or bridge) can register
an rx_handler for needed net device.

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/macvlan.c      |  19 +++++---
 include/linux/if_bridge.h  |   2 -
 include/linux/if_macvlan.h |   4 --
 include/linux/netdevice.h  |   7 +++
 net/bridge/br.c            |   2 -
 net/bridge/br_if.c         |   8 +++
 net/bridge/br_input.c      |  12 +++--
 net/bridge/br_private.h    |   3 +-
 net/core/dev.c             | 119 +++++++++++++++++++++------------------------
 9 files changed, 93 insertions(+), 83 deletions(-)

(limited to 'include')

diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 87e8d4cb405..53422ce26f7 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -145,15 +145,16 @@ static void macvlan_broadcast(struct sk_buff *skb,
 }
 
 /* called under rcu_read_lock() from netif_receive_skb */
-static struct sk_buff *macvlan_handle_frame(struct macvlan_port *port,
-					    struct sk_buff *skb)
+static struct sk_buff *macvlan_handle_frame(struct sk_buff *skb)
 {
+	struct macvlan_port *port;
 	const struct ethhdr *eth = eth_hdr(skb);
 	const struct macvlan_dev *vlan;
 	const struct macvlan_dev *src;
 	struct net_device *dev;
 	unsigned int len;
 
+	port = rcu_dereference(skb->dev->macvlan_port);
 	if (is_multicast_ether_addr(eth->h_dest)) {
 		src = macvlan_hash_lookup(port, eth->h_source);
 		if (!src)
@@ -515,6 +516,7 @@ static int macvlan_port_create(struct net_device *dev)
 {
 	struct macvlan_port *port;
 	unsigned int i;
+	int err;
 
 	if (dev->type != ARPHRD_ETHER || dev->flags & IFF_LOOPBACK)
 		return -EINVAL;
@@ -528,13 +530,21 @@ static int macvlan_port_create(struct net_device *dev)
 	for (i = 0; i < MACVLAN_HASH_SIZE; i++)
 		INIT_HLIST_HEAD(&port->vlan_hash[i]);
 	rcu_assign_pointer(dev->macvlan_port, port);
-	return 0;
+
+	err = netdev_rx_handler_register(dev, macvlan_handle_frame);
+	if (err) {
+		rcu_assign_pointer(dev->macvlan_port, NULL);
+		kfree(port);
+	}
+
+	return err;
 }
 
 static void macvlan_port_destroy(struct net_device *dev)
 {
 	struct macvlan_port *port = dev->macvlan_port;
 
+	netdev_rx_handler_unregister(dev);
 	rcu_assign_pointer(dev->macvlan_port, NULL);
 	synchronize_rcu();
 	kfree(port);
@@ -767,14 +777,12 @@ static int __init macvlan_init_module(void)
 	int err;
 
 	register_netdevice_notifier(&macvlan_notifier_block);
-	macvlan_handle_frame_hook = macvlan_handle_frame;
 
 	err = macvlan_link_register(&macvlan_link_ops);
 	if (err < 0)
 		goto err1;
 	return 0;
 err1:
-	macvlan_handle_frame_hook = NULL;
 	unregister_netdevice_notifier(&macvlan_notifier_block);
 	return err;
 }
@@ -782,7 +790,6 @@ err1:
 static void __exit macvlan_cleanup_module(void)
 {
 	rtnl_link_unregister(&macvlan_link_ops);
-	macvlan_handle_frame_hook = NULL;
 	unregister_netdevice_notifier(&macvlan_notifier_block);
 }
 
diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
index 938b7e81df9..0d241a5c490 100644
--- a/include/linux/if_bridge.h
+++ b/include/linux/if_bridge.h
@@ -102,8 +102,6 @@ struct __fdb_entry {
 #include <linux/netdevice.h>
 
 extern void brioctl_set(int (*ioctl_hook)(struct net *, unsigned int, void __user *));
-extern struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
-					       struct sk_buff *skb);
 extern int (*br_should_route_hook)(struct sk_buff *skb);
 
 #endif
diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h
index 9ea047aca79..c26a0e4f0ce 100644
--- a/include/linux/if_macvlan.h
+++ b/include/linux/if_macvlan.h
@@ -84,8 +84,4 @@ extern int macvlan_link_register(struct rtnl_link_ops *ops);
 extern netdev_tx_t macvlan_start_xmit(struct sk_buff *skb,
 				      struct net_device *dev);
 
-
-extern struct sk_buff *(*macvlan_handle_frame_hook)(struct macvlan_port *,
-						    struct sk_buff *);
-
 #endif /* _LINUX_IF_MACVLAN_H */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index bd6b75317d5..5156b806924 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -381,6 +381,8 @@ enum gro_result {
 };
 typedef enum gro_result gro_result_t;
 
+typedef struct sk_buff *rx_handler_func_t(struct sk_buff *skb);
+
 extern void __napi_schedule(struct napi_struct *n);
 
 static inline int napi_disable_pending(struct napi_struct *n)
@@ -957,6 +959,7 @@ struct net_device {
 #endif
 
 	struct netdev_queue	rx_queue;
+	rx_handler_func_t	*rx_handler;
 
 	struct netdev_queue	*_tx ____cacheline_aligned_in_smp;
 
@@ -1689,6 +1692,10 @@ static inline void napi_free_frags(struct napi_struct *napi)
 	napi->skb = NULL;
 }
 
+extern int netdev_rx_handler_register(struct net_device *dev,
+				      rx_handler_func_t *rx_handler);
+extern void netdev_rx_handler_unregister(struct net_device *dev);
+
 extern void		netif_nit_deliver(struct sk_buff *skb);
 extern int		dev_valid_name(const char *name);
 extern int		dev_ioctl(struct net *net, unsigned int cmd, void __user *);
diff --git a/net/bridge/br.c b/net/bridge/br.c
index 76357b54775..c8436fa3134 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -63,7 +63,6 @@ static int __init br_init(void)
 		goto err_out4;
 
 	brioctl_set(br_ioctl_deviceless_stub);
-	br_handle_frame_hook = br_handle_frame;
 
 #if defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE)
 	br_fdb_test_addr_hook = br_fdb_test_addr;
@@ -100,7 +99,6 @@ static void __exit br_deinit(void)
 	br_fdb_test_addr_hook = NULL;
 #endif
 
-	br_handle_frame_hook = NULL;
 	br_fdb_fini();
 }
 
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 18b245e2c00..d9242342837 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -147,6 +147,7 @@ static void del_nbp(struct net_bridge_port *p)
 
 	list_del_rcu(&p->list);
 
+	netdev_rx_handler_unregister(dev);
 	rcu_assign_pointer(dev->br_port, NULL);
 
 	br_multicast_del_port(p);
@@ -429,6 +430,11 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
 		goto err2;
 
 	rcu_assign_pointer(dev->br_port, p);
+
+	err = netdev_rx_handler_register(dev, br_handle_frame);
+	if (err)
+		goto err3;
+
 	dev_disable_lro(dev);
 
 	list_add_rcu(&p->list, &br->port_list);
@@ -451,6 +457,8 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
 	br_netpoll_enable(br, dev);
 
 	return 0;
+err3:
+	rcu_assign_pointer(dev->br_port, NULL);
 err2:
 	br_fdb_delete_by_port(br, p, 1);
 err1:
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index d36e700f7a2..99647d8f95c 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -131,15 +131,19 @@ static inline int is_link_local(const unsigned char *dest)
 }
 
 /*
- * Called via br_handle_frame_hook.
  * Return NULL if skb is handled
- * note: already called with rcu_read_lock (preempt_disabled)
+ * note: already called with rcu_read_lock (preempt_disabled) from
+ * netif_receive_skb
  */
-struct sk_buff *br_handle_frame(struct net_bridge_port *p, struct sk_buff *skb)
+struct sk_buff *br_handle_frame(struct sk_buff *skb)
 {
+	struct net_bridge_port *p;
 	const unsigned char *dest = eth_hdr(skb)->h_dest;
 	int (*rhook)(struct sk_buff *skb);
 
+	if (skb->pkt_type == PACKET_LOOPBACK)
+		return skb;
+
 	if (!is_valid_ether_addr(eth_hdr(skb)->h_source))
 		goto drop;
 
@@ -147,6 +151,8 @@ struct sk_buff *br_handle_frame(struct net_bridge_port *p, struct sk_buff *skb)
 	if (!skb)
 		return NULL;
 
+	p = rcu_dereference(skb->dev->br_port);
+
 	if (unlikely(is_link_local(dest))) {
 		/* Pause frames shouldn't be passed up by driver anyway */
 		if (skb->protocol == htons(ETH_P_PAUSE))
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 0f4a74bc6a9..c83519b555b 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -331,8 +331,7 @@ extern void br_features_recompute(struct net_bridge *br);
 
 /* br_input.c */
 extern int br_handle_frame_finish(struct sk_buff *skb);
-extern struct sk_buff *br_handle_frame(struct net_bridge_port *p,
-				       struct sk_buff *skb);
+extern struct sk_buff *br_handle_frame(struct sk_buff *skb);
 
 /* br_ioctl.c */
 extern int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
diff --git a/net/core/dev.c b/net/core/dev.c
index ffca5c1066f..ec01a5998d7 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2604,70 +2604,14 @@ static inline int deliver_skb(struct sk_buff *skb,
 	return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
 }
 
-#if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
-
-#if defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE)
+#if (defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)) && \
+    (defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE))
 /* This hook is defined here for ATM LANE */
 int (*br_fdb_test_addr_hook)(struct net_device *dev,
 			     unsigned char *addr) __read_mostly;
 EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
 #endif
 
-/*
- * If bridge module is loaded call bridging hook.
- *  returns NULL if packet was consumed.
- */
-struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
-					struct sk_buff *skb) __read_mostly;
-EXPORT_SYMBOL_GPL(br_handle_frame_hook);
-
-static inline struct sk_buff *handle_bridge(struct sk_buff *skb,
-					    struct packet_type **pt_prev, int *ret,
-					    struct net_device *orig_dev)
-{
-	struct net_bridge_port *port;
-
-	if (skb->pkt_type == PACKET_LOOPBACK ||
-	    (port = rcu_dereference(skb->dev->br_port)) == NULL)
-		return skb;
-
-	if (*pt_prev) {
-		*ret = deliver_skb(skb, *pt_prev, orig_dev);
-		*pt_prev = NULL;
-	}
-
-	return br_handle_frame_hook(port, skb);
-}
-#else
-#define handle_bridge(skb, pt_prev, ret, orig_dev)	(skb)
-#endif
-
-#if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE)
-struct sk_buff *(*macvlan_handle_frame_hook)(struct macvlan_port *p,
-					     struct sk_buff *skb) __read_mostly;
-EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook);
-
-static inline struct sk_buff *handle_macvlan(struct sk_buff *skb,
-					     struct packet_type **pt_prev,
-					     int *ret,
-					     struct net_device *orig_dev)
-{
-	struct macvlan_port *port;
-
-	port = rcu_dereference(skb->dev->macvlan_port);
-	if (!port)
-		return skb;
-
-	if (*pt_prev) {
-		*ret = deliver_skb(skb, *pt_prev, orig_dev);
-		*pt_prev = NULL;
-	}
-	return macvlan_handle_frame_hook(port, skb);
-}
-#else
-#define handle_macvlan(skb, pt_prev, ret, orig_dev)	(skb)
-#endif
-
 #ifdef CONFIG_NET_CLS_ACT
 /* TODO: Maybe we should just force sch_ingress to be compiled in
  * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
@@ -2763,6 +2707,47 @@ void netif_nit_deliver(struct sk_buff *skb)
 	rcu_read_unlock();
 }
 
+/**
+ *	netdev_rx_handler_register - register receive handler
+ *	@dev: device to register a handler for
+ *	@rx_handler: receive handler to register
+ *
+ *	Register a receive hander for a device. This handler will then be
+ *	called from __netif_receive_skb. A negative errno code is returned
+ *	on a failure.
+ *
+ *	The caller must hold the rtnl_mutex.
+ */
+int netdev_rx_handler_register(struct net_device *dev,
+			       rx_handler_func_t *rx_handler)
+{
+	ASSERT_RTNL();
+
+	if (dev->rx_handler)
+		return -EBUSY;
+
+	rcu_assign_pointer(dev->rx_handler, rx_handler);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(netdev_rx_handler_register);
+
+/**
+ *	netdev_rx_handler_unregister - unregister receive handler
+ *	@dev: device to unregister a handler from
+ *
+ *	Unregister a receive hander from a device.
+ *
+ *	The caller must hold the rtnl_mutex.
+ */
+void netdev_rx_handler_unregister(struct net_device *dev)
+{
+
+	ASSERT_RTNL();
+	rcu_assign_pointer(dev->rx_handler, NULL);
+}
+EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister);
+
 static inline void skb_bond_set_mac_by_master(struct sk_buff *skb,
 					      struct net_device *master)
 {
@@ -2815,6 +2800,7 @@ EXPORT_SYMBOL(__skb_bond_should_drop);
 static int __netif_receive_skb(struct sk_buff *skb)
 {
 	struct packet_type *ptype, *pt_prev;
+	rx_handler_func_t *rx_handler;
 	struct net_device *orig_dev;
 	struct net_device *master;
 	struct net_device *null_or_orig;
@@ -2877,12 +2863,17 @@ static int __netif_receive_skb(struct sk_buff *skb)
 ncls:
 #endif
 
-	skb = handle_bridge(skb, &pt_prev, &ret, orig_dev);
-	if (!skb)
-		goto out;
-	skb = handle_macvlan(skb, &pt_prev, &ret, orig_dev);
-	if (!skb)
-		goto out;
+	/* Handle special case of bridge or macvlan */
+	rx_handler = rcu_dereference(skb->dev->rx_handler);
+	if (rx_handler) {
+		if (pt_prev) {
+			ret = deliver_skb(skb, pt_prev, orig_dev);
+			pt_prev = NULL;
+		}
+		skb = rx_handler(skb);
+		if (!skb)
+			goto out;
+	}
 
 	/*
 	 * Make sure frames received on VLAN interfaces stacked on
-- 
cgit v1.2.3-70-g09d2


From 77c2061d10a408d0220c2b0e7faefe52d9c41008 Mon Sep 17 00:00:00 2001
From: Walter Goldens <goldenstranger@yahoo.com>
Date: Tue, 18 May 2010 04:44:54 -0700
Subject: wireless: fix several minor description typos

Signed-off-by: Walter Goldens <goldenstranger@yahoo.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/ath/ath5k/eeprom.c   | 2 +-
 drivers/net/wireless/ath/ath9k/hw.h       | 2 +-
 drivers/net/wireless/iwmc3200wifi/hal.c   | 2 +-
 drivers/net/wireless/libertas/scan.c      | 2 +-
 drivers/net/wireless/orinoco/hermes_dld.c | 2 +-
 drivers/net/wireless/rt2x00/rt2x00dev.c   | 2 +-
 drivers/net/wireless/zd1211rw/zd_mac.c    | 2 +-
 drivers/net/wireless/zd1211rw/zd_usb.c    | 2 +-
 include/linux/nl80211.h                   | 2 +-
 include/net/cfg80211.h                    | 2 +-
 net/mac80211/mlme.c                       | 2 +-
 net/mac80211/status.c                     | 2 +-
 net/mac80211/work.c                       | 2 +-
 13 files changed, 13 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/drivers/net/wireless/ath/ath5k/eeprom.c b/drivers/net/wireless/ath/ath5k/eeprom.c
index ed0263672d6..8490348379a 100644
--- a/drivers/net/wireless/ath/ath5k/eeprom.c
+++ b/drivers/net/wireless/ath/ath5k/eeprom.c
@@ -715,7 +715,7 @@ ath5k_eeprom_convert_pcal_info_5111(struct ath5k_hw *ah, int mode,
 
 		/* Only one curve for RF5111
 		 * find out which one and place
-		 * in in pd_curves.
+		 * in pd_curves.
 		 * Note: ee_x_gain is reversed here */
 		for (idx = 0; idx < AR5K_EEPROM_N_PD_CURVES; idx++) {
 
diff --git a/drivers/net/wireless/ath/ath9k/hw.h b/drivers/net/wireless/ath/ath9k/hw.h
index 5cf0714f069..ffc9249b02c 100644
--- a/drivers/net/wireless/ath/ath9k/hw.h
+++ b/drivers/net/wireless/ath/ath9k/hw.h
@@ -461,7 +461,7 @@ struct ath9k_hw_version {
 #define AR_GENTMR_BIT(_index)	(1 << (_index))
 
 /*
- * Using de Bruijin sequence to to look up 1's index in a 32 bit number
+ * Using de Bruijin sequence to look up 1's index in a 32 bit number
  * debruijn32 = 0000 0111 0111 1100 1011 0101 0011 0001
  */
 #define debruijn32 0x077CB531U
diff --git a/drivers/net/wireless/iwmc3200wifi/hal.c b/drivers/net/wireless/iwmc3200wifi/hal.c
index 9531b18cf72..907ac890997 100644
--- a/drivers/net/wireless/iwmc3200wifi/hal.c
+++ b/drivers/net/wireless/iwmc3200wifi/hal.c
@@ -54,7 +54,7 @@
  *   LMAC. If you look at LMAC commands you'll se that they
  *   are actually regular iwlwifi target commands encapsulated
  *   into a special UMAC command called UMAC passthrough.
- *   This is due to the fact the the host talks exclusively
+ *   This is due to the fact the host talks exclusively
  *   to the UMAC and so there needs to be a special UMAC
  *   command for talking to the LMAC.
  *   This is how a wifi command is layed out:
diff --git a/drivers/net/wireless/libertas/scan.c b/drivers/net/wireless/libertas/scan.c
index 24cd54b3a80..7d82f13bdf1 100644
--- a/drivers/net/wireless/libertas/scan.c
+++ b/drivers/net/wireless/libertas/scan.c
@@ -666,7 +666,7 @@ void lbs_scan_worker(struct work_struct *work)
 /**
  *  @brief Interpret a BSS scan response returned from the firmware
  *
- *  Parse the various fixed fields and IEs passed back for a a BSS probe
+ *  Parse the various fixed fields and IEs passed back for a BSS probe
  *  response or beacon from the scan command.  Record information as needed
  *  in the scan table struct bss_descriptor for that entry.
  *
diff --git a/drivers/net/wireless/orinoco/hermes_dld.c b/drivers/net/wireless/orinoco/hermes_dld.c
index 6da85e75fce..f750f49bbd4 100644
--- a/drivers/net/wireless/orinoco/hermes_dld.c
+++ b/drivers/net/wireless/orinoco/hermes_dld.c
@@ -68,7 +68,7 @@ struct dblock {
 } __attribute__ ((packed));
 
 /*
- * Plug Data References are located in in the image after the last data
+ * Plug Data References are located in the image after the last data
  * block.  They refer to areas in the adapter memory where the plug data
  * items with matching ID should be written.
  */
diff --git a/drivers/net/wireless/rt2x00/rt2x00dev.c b/drivers/net/wireless/rt2x00/rt2x00dev.c
index 3ae468c4d76..2ed32e02a06 100644
--- a/drivers/net/wireless/rt2x00/rt2x00dev.c
+++ b/drivers/net/wireless/rt2x00/rt2x00dev.c
@@ -224,7 +224,7 @@ void rt2x00lib_txdone(struct queue_entry *entry,
 	/*
 	 * If the IV/EIV data was stripped from the frame before it was
 	 * passed to the hardware, we should now reinsert it again because
-	 * mac80211 will expect the the same data to be present it the
+	 * mac80211 will expect the same data to be present it the
 	 * frame as it was passed to us.
 	 */
 	if (test_bit(CONFIG_SUPPORT_HW_CRYPTO, &rt2x00dev->flags))
diff --git a/drivers/net/wireless/zd1211rw/zd_mac.c b/drivers/net/wireless/zd1211rw/zd_mac.c
index b0b666019a9..163a8a06b22 100644
--- a/drivers/net/wireless/zd1211rw/zd_mac.c
+++ b/drivers/net/wireless/zd1211rw/zd_mac.c
@@ -855,7 +855,7 @@ int zd_mac_rx(struct ieee80211_hw *hw, const u8 *buffer, unsigned int length)
 	if (skb == NULL)
 		return -ENOMEM;
 	if (need_padding) {
-		/* Make sure the the payload data is 4 byte aligned. */
+		/* Make sure the payload data is 4 byte aligned. */
 		skb_reserve(skb, 2);
 	}
 
diff --git a/drivers/net/wireless/zd1211rw/zd_usb.c b/drivers/net/wireless/zd1211rw/zd_usb.c
index c257940b71b..818e1480ca9 100644
--- a/drivers/net/wireless/zd1211rw/zd_usb.c
+++ b/drivers/net/wireless/zd1211rw/zd_usb.c
@@ -844,7 +844,7 @@ out:
  * @usb: a &struct zd_usb pointer
  * @urb: URB to be freed
  *
- * Frees the the transmission URB, which means to put it on the free URB
+ * Frees the transmission URB, which means to put it on the free URB
  * list.
  */
 static void free_tx_urb(struct zd_usb *usb, struct urb *urb)
diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index b7c77f9712f..64fb32b93a2 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -132,7 +132,7 @@
  * 	%NL80211_ATTR_REG_RULE_POWER_MAX_ANT_GAIN and
  * 	%NL80211_ATTR_REG_RULE_POWER_MAX_EIRP.
  * @NL80211_CMD_REQ_SET_REG: ask the wireless core to set the regulatory domain
- * 	to the the specified ISO/IEC 3166-1 alpha2 country code. The core will
+ * 	to the specified ISO/IEC 3166-1 alpha2 country code. The core will
  * 	store this as a valid request and then query userspace for it.
  *
  * @NL80211_CMD_GET_MESH_PARAMS: Get mesh networking properties for the
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index b44a2e5321a..049e507d2f8 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -89,7 +89,7 @@ enum ieee80211_channel_flags {
  * @max_power: maximum transmission power (in dBm)
  * @beacon_found: helper to regulatory code to indicate when a beacon
  *	has been found on this channel. Use regulatory_hint_found_beacon()
- *	to enable this, this is is useful only on 5 GHz band.
+ *	to enable this, this is useful only on 5 GHz band.
  * @orig_mag: internal use
  * @orig_mpwr: internal use
  */
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 0839c4e8fd2..31e3386b8d4 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1763,7 +1763,7 @@ static void ieee80211_sta_work(struct work_struct *work)
 
 	/*
 	 * ieee80211_queue_work() should have picked up most cases,
-	 * here we'll pick the the rest.
+	 * here we'll pick the rest.
 	 */
 	if (WARN(local->suspended, "STA MLME work scheduled while "
 		 "going to suspend\n"))
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index 94613af009f..34da67995d9 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -47,7 +47,7 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local,
 	/*
 	 * This skb 'survived' a round-trip through the driver, and
 	 * hopefully the driver didn't mangle it too badly. However,
-	 * we can definitely not rely on the the control information
+	 * we can definitely not rely on the control information
 	 * being correct. Clear it so we don't get junk there, and
 	 * indicate that it needs new processing, but must not be
 	 * modified/encrypted again.
diff --git a/net/mac80211/work.c b/net/mac80211/work.c
index be3d4a69869..4157717ed78 100644
--- a/net/mac80211/work.c
+++ b/net/mac80211/work.c
@@ -840,7 +840,7 @@ static void ieee80211_work_work(struct work_struct *work)
 
 	/*
 	 * ieee80211_queue_work() should have picked up most cases,
-	 * here we'll pick the the rest.
+	 * here we'll pick the rest.
 	 */
 	if (WARN(local->suspended, "work scheduled while going to suspend\n"))
 		return;
-- 
cgit v1.2.3-70-g09d2


From 252aa631f88080920a7083ac5a5844ffc5463629 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 19 May 2010 12:17:12 +0200
Subject: cfg80211: make action channel type optional

When sending action frames, we want to verify
that we do that on the correct channel. However,
checking the channel type in addition can get in
the way, since the channel type could change on
the fly during an association, and it's not
useful to have the channel type anyway since it
has no effect on the transmission. Therefore,
make it optional to specify so that if wanted,
it can still be checked, but is not required.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/cfg80211.h     | 1 +
 net/mac80211/cfg.c         | 4 +++-
 net/mac80211/ieee80211_i.h | 1 +
 net/mac80211/mlme.c        | 9 ++++++---
 net/wireless/core.h        | 1 +
 net/wireless/mlme.c        | 3 ++-
 net/wireless/nl80211.c     | 3 +++
 7 files changed, 17 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 049e507d2f8..0c3c214772e 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1168,6 +1168,7 @@ struct cfg80211_ops {
 	int	(*action)(struct wiphy *wiphy, struct net_device *dev,
 			  struct ieee80211_channel *chan,
 			  enum nl80211_channel_type channel_type,
+			  bool channel_type_valid,
 			  const u8 *buf, size_t len, u64 *cookie);
 
 	int	(*set_power_mgmt)(struct wiphy *wiphy, struct net_device *dev,
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index c7000a6ca37..f8c49c5ad8a 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1554,10 +1554,12 @@ static int ieee80211_cancel_remain_on_channel(struct wiphy *wiphy,
 static int ieee80211_action(struct wiphy *wiphy, struct net_device *dev,
 			    struct ieee80211_channel *chan,
 			    enum nl80211_channel_type channel_type,
+			    bool channel_type_valid,
 			    const u8 *buf, size_t len, u64 *cookie)
 {
 	return ieee80211_mgd_action(IEEE80211_DEV_TO_SUB_IF(dev), chan,
-				    channel_type, buf, len, cookie);
+				    channel_type, channel_type_valid,
+				    buf, len, cookie);
 }
 
 struct cfg80211_ops mac80211_config_ops = {
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 1a9e2da37a9..d4677efd3a3 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -988,6 +988,7 @@ int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata,
 int ieee80211_mgd_action(struct ieee80211_sub_if_data *sdata,
 			 struct ieee80211_channel *chan,
 			 enum nl80211_channel_type channel_type,
+			 bool channel_type_valid,
 			 const u8 *buf, size_t len, u64 *cookie);
 ieee80211_rx_result ieee80211_sta_rx_mgmt(struct ieee80211_sub_if_data *sdata,
 					  struct sk_buff *skb);
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 31e3386b8d4..29c3a75a7ad 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -2308,6 +2308,7 @@ int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata,
 int ieee80211_mgd_action(struct ieee80211_sub_if_data *sdata,
 			 struct ieee80211_channel *chan,
 			 enum nl80211_channel_type channel_type,
+			 bool channel_type_valid,
 			 const u8 *buf, size_t len, u64 *cookie)
 {
 	struct ieee80211_local *local = sdata->local;
@@ -2315,9 +2316,11 @@ int ieee80211_mgd_action(struct ieee80211_sub_if_data *sdata,
 	struct sk_buff *skb;
 
 	/* Check that we are on the requested channel for transmission */
-	if ((chan != local->tmp_channel ||
-	     channel_type != local->tmp_channel_type) &&
-	    (chan != local->oper_channel ||
+	if (chan != local->tmp_channel &&
+	    chan != local->oper_channel)
+		return -EBUSY;
+	if (channel_type_valid &&
+	    (channel_type != local->tmp_channel_type &&
 	     channel_type != local->_oper_channel_type))
 		return -EBUSY;
 
diff --git a/net/wireless/core.h b/net/wireless/core.h
index ae930acf75e..63d57ae399c 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -339,6 +339,7 @@ int cfg80211_mlme_action(struct cfg80211_registered_device *rdev,
 			 struct net_device *dev,
 			 struct ieee80211_channel *chan,
 			 enum nl80211_channel_type channel_type,
+			 bool channel_type_valid,
 			 const u8 *buf, size_t len, u64 *cookie);
 
 /* SME */
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index 48ead6f0426..f69ae19f497 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -827,6 +827,7 @@ int cfg80211_mlme_action(struct cfg80211_registered_device *rdev,
 			 struct net_device *dev,
 			 struct ieee80211_channel *chan,
 			 enum nl80211_channel_type channel_type,
+			 bool channel_type_valid,
 			 const u8 *buf, size_t len, u64 *cookie)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
@@ -855,7 +856,7 @@ int cfg80211_mlme_action(struct cfg80211_registered_device *rdev,
 
 	/* Transmit the Action frame as requested by user space */
 	return rdev->ops->action(&rdev->wiphy, dev, chan, channel_type,
-				 buf, len, cookie);
+				 channel_type_valid, buf, len, cookie);
 }
 
 bool cfg80211_rx_action(struct net_device *dev, int freq, const u8 *buf,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index db71150b804..90ab3c8519b 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -4681,6 +4681,7 @@ static int nl80211_action(struct sk_buff *skb, struct genl_info *info)
 	struct net_device *dev;
 	struct ieee80211_channel *chan;
 	enum nl80211_channel_type channel_type = NL80211_CHAN_NO_HT;
+	bool channel_type_valid = false;
 	u32 freq;
 	int err;
 	void *hdr;
@@ -4722,6 +4723,7 @@ static int nl80211_action(struct sk_buff *skb, struct genl_info *info)
 			err = -EINVAL;
 			goto out;
 		}
+		channel_type_valid = true;
 	}
 
 	freq = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]);
@@ -4745,6 +4747,7 @@ static int nl80211_action(struct sk_buff *skb, struct genl_info *info)
 		goto free_msg;
 	}
 	err = cfg80211_mlme_action(rdev, dev, chan, channel_type,
+				   channel_type_valid,
 				   nla_data(info->attrs[NL80211_ATTR_FRAME]),
 				   nla_len(info->attrs[NL80211_ATTR_FRAME]),
 				   &cookie);
-- 
cgit v1.2.3-70-g09d2


From b5f7e7554753e2cc3ef3bef0271fdb32027df2ba Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 2 Jun 2010 12:05:27 +0000
Subject: ipv4: add LINUX_MIB_IPRPFILTER snmp counter

Christoph Lameter mentioned that packets could be dropped in input path
because of rp_filter settings, without any SNMP counter being
incremented. System administrator can have a hard time to track the
problem.

This patch introduces a new counter, LINUX_MIB_IPRPFILTER, incremented
each time we drop a packet because Reverse Path Filter triggers.

(We receive an IPv4 datagram on a given interface, and find the route to
send an answer would use another interface)

netstat -s | grep IPReversePathFilter
    IPReversePathFilter: 21714

Reported-by: Christoph Lameter <cl@linux-foundation.org>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/snmp.h    |  1 +
 net/ipv4/fib_frontend.c |  6 ++++--
 net/ipv4/ip_input.c     |  3 +++
 net/ipv4/proc.c         |  1 +
 net/ipv4/route.c        | 31 ++++++++++++++++++-------------
 5 files changed, 27 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/linux/snmp.h b/include/linux/snmp.h
index 52797714ade..ebb0c80ffd6 100644
--- a/include/linux/snmp.h
+++ b/include/linux/snmp.h
@@ -229,6 +229,7 @@ enum
 	LINUX_MIB_TCPBACKLOGDROP,
 	LINUX_MIB_TCPMINTTLDROP, /* RFC 5082 */
 	LINUX_MIB_TCPDEFERACCEPTDROP,
+	LINUX_MIB_IPRPFILTER, /* IP Reverse Path Filter (rp_filter) */
 	__LINUX_MIB_MAX
 };
 
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 4f0ed458c88..e830f7a123b 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -284,7 +284,7 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
 	if (no_addr)
 		goto last_resort;
 	if (rpf == 1)
-		goto e_inval;
+		goto e_rpf;
 	fl.oif = dev->ifindex;
 
 	ret = 0;
@@ -299,7 +299,7 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
 
 last_resort:
 	if (rpf)
-		goto e_inval;
+		goto e_rpf;
 	*spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
 	*itag = 0;
 	return 0;
@@ -308,6 +308,8 @@ e_inval_res:
 	fib_res_put(&res);
 e_inval:
 	return -EINVAL;
+e_rpf:
+	return -EXDEV;
 }
 
 static inline __be32 sk_extract_addr(struct sockaddr *addr)
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index d930dc5e4d8..d52c9da644c 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -340,6 +340,9 @@ static int ip_rcv_finish(struct sk_buff *skb)
 			else if (err == -ENETUNREACH)
 				IP_INC_STATS_BH(dev_net(skb->dev),
 						IPSTATS_MIB_INNOROUTES);
+			else if (err == -EXDEV)
+				NET_INC_STATS_BH(dev_net(skb->dev),
+						 LINUX_MIB_IPRPFILTER);
 			goto drop;
 		}
 	}
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 3dc9914c1dc..e320ca6b3ef 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -252,6 +252,7 @@ static const struct snmp_mib snmp4_net_list[] = {
 	SNMP_MIB_ITEM("TCPBacklogDrop", LINUX_MIB_TCPBACKLOGDROP),
 	SNMP_MIB_ITEM("TCPMinTTLDrop", LINUX_MIB_TCPMINTTLDROP),
 	SNMP_MIB_ITEM("TCPDeferAcceptDrop", LINUX_MIB_TCPDEFERACCEPTDROP),
+	SNMP_MIB_ITEM("IPReversePathFilter", LINUX_MIB_IPRPFILTER),
 	SNMP_MIB_SENTINEL
 };
 
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 8495bceec76..d377b45005f 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1851,6 +1851,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 	__be32 spec_dst;
 	struct in_device *in_dev = in_dev_get(dev);
 	u32 itag = 0;
+	int err;
 
 	/* Primary sanity checks. */
 
@@ -1865,10 +1866,12 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 		if (!ipv4_is_local_multicast(daddr))
 			goto e_inval;
 		spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK);
-	} else if (fib_validate_source(saddr, 0, tos, 0,
-					dev, &spec_dst, &itag, 0) < 0)
-		goto e_inval;
-
+	} else {
+		err = fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst,
+					  &itag, 0);
+		if (err < 0)
+			goto e_err;
+	}
 	rth = dst_alloc(&ipv4_dst_ops);
 	if (!rth)
 		goto e_nobufs;
@@ -1920,8 +1923,10 @@ e_nobufs:
 	return -ENOBUFS;
 
 e_inval:
+	err = -EINVAL;
+e_err:
 	in_dev_put(in_dev);
-	return -EINVAL;
+	return err;
 }
 
 
@@ -1985,7 +1990,6 @@ static int __mkroute_input(struct sk_buff *skb,
 		ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr,
 					 saddr);
 
-		err = -EINVAL;
 		goto cleanup;
 	}
 
@@ -2157,13 +2161,12 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 		goto brd_input;
 
 	if (res.type == RTN_LOCAL) {
-		int result;
-		result = fib_validate_source(saddr, daddr, tos,
+		err = fib_validate_source(saddr, daddr, tos,
 					     net->loopback_dev->ifindex,
 					     dev, &spec_dst, &itag, skb->mark);
-		if (result < 0)
-			goto martian_source;
-		if (result)
+		if (err < 0)
+			goto martian_source_keep_err;
+		if (err)
 			flags |= RTCF_DIRECTSRC;
 		spec_dst = daddr;
 		goto local_input;
@@ -2191,7 +2194,7 @@ brd_input:
 		err = fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst,
 					  &itag, skb->mark);
 		if (err < 0)
-			goto martian_source;
+			goto martian_source_keep_err;
 		if (err)
 			flags |= RTCF_DIRECTSRC;
 	}
@@ -2272,8 +2275,10 @@ e_nobufs:
 	goto done;
 
 martian_source:
+	err = -EINVAL;
+martian_source_keep_err:
 	ip_handle_martian_source(dev, in_dev, skb, daddr, saddr);
-	goto e_inval;
+	goto done;
 }
 
 int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr,
-- 
cgit v1.2.3-70-g09d2


From bc10502dba37d3b210efd9f3867212298f13b78e Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 3 Jun 2010 03:21:52 -0700
Subject: net: use __packed annotation

cleanup patch.

Use new __packed annotation in net/ and include/
(except netfilter)

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_ether.h   |  2 +-
 include/linux/if_fddi.h    |  8 ++---
 include/linux/if_frad.h    |  2 +-
 include/linux/if_hippi.h   |  8 ++---
 include/linux/if_pppox.h   |  8 ++---
 include/linux/ipv6.h       |  4 +--
 include/linux/isdnif.h     |  2 +-
 include/linux/mISDNif.h    |  2 +-
 include/linux/nbd.h        |  2 +-
 include/linux/ncp.h        | 10 +++---
 include/linux/ncp_fs_sb.h  | 14 ++++----
 include/linux/phonet.h     |  4 +--
 include/linux/rds.h        | 12 +++----
 include/linux/sctp.h       | 80 +++++++++++++++++++++++-----------------------
 include/linux/wlp.h        | 22 ++++++-------
 include/net/dn_dev.h       |  8 ++---
 include/net/dn_nsp.h       | 16 +++++-----
 include/net/ip6_tunnel.h   |  2 +-
 include/net/ipx.h          |  8 ++---
 include/net/mip6.h         |  2 +-
 include/net/ndisc.h        |  2 +-
 include/net/sctp/structs.h |  4 +--
 include/rxrpc/packet.h     |  8 ++---
 net/bluetooth/bnep/bnep.h  |  8 ++---
 net/compat.c               |  6 ++--
 net/iucv/iucv.c            | 14 ++++----
 net/mac80211/cfg.c         |  2 +-
 net/mac80211/ieee80211_i.h |  2 +-
 net/mac80211/rx.c          |  2 +-
 net/sctp/sm_make_chunk.c   |  2 +-
 30 files changed, 133 insertions(+), 133 deletions(-)

(limited to 'include')

diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h
index bed7a4682b9..c831467774d 100644
--- a/include/linux/if_ether.h
+++ b/include/linux/if_ether.h
@@ -119,7 +119,7 @@ struct ethhdr {
 	unsigned char	h_dest[ETH_ALEN];	/* destination eth addr	*/
 	unsigned char	h_source[ETH_ALEN];	/* source ether addr	*/
 	__be16		h_proto;		/* packet type ID field	*/
-} __attribute__((packed));
+} __packed;
 
 #ifdef __KERNEL__
 #include <linux/skbuff.h>
diff --git a/include/linux/if_fddi.h b/include/linux/if_fddi.h
index 5459c5c0993..9947c39e62f 100644
--- a/include/linux/if_fddi.h
+++ b/include/linux/if_fddi.h
@@ -67,7 +67,7 @@ struct fddi_8022_1_hdr {
 	__u8	dsap;					/* destination service access point */
 	__u8	ssap;					/* source service access point */
 	__u8	ctrl;					/* control byte #1 */
-} __attribute__ ((packed));
+} __packed;
 
 /* Define 802.2 Type 2 header */
 struct fddi_8022_2_hdr {
@@ -75,7 +75,7 @@ struct fddi_8022_2_hdr {
 	__u8	ssap;					/* source service access point */
 	__u8	ctrl_1;					/* control byte #1 */
 	__u8	ctrl_2;					/* control byte #2 */
-} __attribute__ ((packed));
+} __packed;
 
 /* Define 802.2 SNAP header */
 #define FDDI_K_OUI_LEN	3
@@ -85,7 +85,7 @@ struct fddi_snap_hdr {
 	__u8	ctrl;					/* always 0x03 */
 	__u8	oui[FDDI_K_OUI_LEN];	/* organizational universal id */
 	__be16	ethertype;				/* packet type ID field */
-} __attribute__ ((packed));
+} __packed;
 
 /* Define FDDI LLC frame header */
 struct fddihdr {
@@ -98,7 +98,7 @@ struct fddihdr {
 		struct fddi_8022_2_hdr		llc_8022_2;
 		struct fddi_snap_hdr		llc_snap;
 		} hdr;
-} __attribute__ ((packed));
+} __packed;
 
 #ifdef __KERNEL__
 #include <linux/netdevice.h>
diff --git a/include/linux/if_frad.h b/include/linux/if_frad.h
index 80b3a1056a5..191ee0869bc 100644
--- a/include/linux/if_frad.h
+++ b/include/linux/if_frad.h
@@ -135,7 +135,7 @@ struct frhdr
    __be16 PID;
 
 #define IP_NLPID pad 
-} __attribute__((packed));
+} __packed;
 
 /* see RFC 1490 for the definition of the following */
 #define FRAD_I_UI		0x03
diff --git a/include/linux/if_hippi.h b/include/linux/if_hippi.h
index 8d038eb8db5..5fe5f307c6f 100644
--- a/include/linux/if_hippi.h
+++ b/include/linux/if_hippi.h
@@ -104,7 +104,7 @@ struct hippi_fp_hdr {
 	__be32		fixed;
 #endif
 	__be32		d2_size;
-} __attribute__ ((packed));
+} __packed;
 
 struct hippi_le_hdr {
 #if defined (__BIG_ENDIAN_BITFIELD)
@@ -129,7 +129,7 @@ struct hippi_le_hdr {
 	__u8		daddr[HIPPI_ALEN];
 	__u16		locally_administered;
 	__u8		saddr[HIPPI_ALEN];
-} __attribute__ ((packed));
+} __packed;
 
 #define HIPPI_OUI_LEN	3
 /*
@@ -142,12 +142,12 @@ struct hippi_snap_hdr {
 	__u8	ctrl;			/* always 0x03 */
 	__u8	oui[HIPPI_OUI_LEN];	/* organizational universal id (zero)*/
 	__be16	ethertype;		/* packet type ID field */
-} __attribute__ ((packed));
+} __packed;
 
 struct hippi_hdr {
 	struct hippi_fp_hdr	fp;
 	struct hippi_le_hdr	le;
 	struct hippi_snap_hdr	snap;
-} __attribute__ ((packed));
+} __packed;
 
 #endif	/* _LINUX_IF_HIPPI_H */
diff --git a/include/linux/if_pppox.h b/include/linux/if_pppox.h
index a6577af0c4e..1925e0c3f16 100644
--- a/include/linux/if_pppox.h
+++ b/include/linux/if_pppox.h
@@ -59,7 +59,7 @@ struct sockaddr_pppox {
        union{ 
                struct pppoe_addr       pppoe; 
        }sa_addr; 
-}__attribute__ ((packed)); 
+} __packed;
 
 /* The use of the above union isn't viable because the size of this
  * struct must stay fixed over time -- applications use sizeof(struct
@@ -70,7 +70,7 @@ struct sockaddr_pppol2tp {
 	sa_family_t     sa_family;      /* address family, AF_PPPOX */
 	unsigned int    sa_protocol;    /* protocol identifier */
 	struct pppol2tp_addr pppol2tp;
-}__attribute__ ((packed));
+} __packed;
 
 /* The L2TPv3 protocol changes tunnel and session ids from 16 to 32
  * bits. So we need a different sockaddr structure.
@@ -79,7 +79,7 @@ struct sockaddr_pppol2tpv3 {
 	sa_family_t     sa_family;      /* address family, AF_PPPOX */
 	unsigned int    sa_protocol;    /* protocol identifier */
 	struct pppol2tpv3_addr pppol2tp;
-} __attribute__ ((packed));
+} __packed;
 
 /*********************************************************************
  *
@@ -129,7 +129,7 @@ struct pppoe_hdr {
 	__be16 sid;
 	__be16 length;
 	struct pppoe_tag tag[0];
-} __attribute__ ((packed));
+} __packed;
 
 /* Length of entire PPPoE + PPP header */
 #define PPPOE_SES_HLEN	8
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 99e1ab7e3ee..940e2159535 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -58,7 +58,7 @@ struct ipv6_opt_hdr {
 	/* 
 	 * TLV encoded option data follows.
 	 */
-} __attribute__ ((packed));	/* required for some archs */
+} __packed;	/* required for some archs */
 
 #define ipv6_destopt_hdr ipv6_opt_hdr
 #define ipv6_hopopt_hdr  ipv6_opt_hdr
@@ -99,7 +99,7 @@ struct ipv6_destopt_hao {
 	__u8			type;
 	__u8			length;
 	struct in6_addr		addr;
-} __attribute__ ((__packed__));
+} __packed;
 
 /*
  *	IPv6 fixed header
diff --git a/include/linux/isdnif.h b/include/linux/isdnif.h
index b9b5a684ed6..b8c23f88dd5 100644
--- a/include/linux/isdnif.h
+++ b/include/linux/isdnif.h
@@ -317,7 +317,7 @@ typedef struct T30_s {
 	__u8 r_scantime;
 	__u8 r_id[FAXIDLEN];
 	__u8 r_code;
-} __attribute__((packed)) T30_s;
+} __packed T30_s;
 
 #define ISDN_TTY_FAX_CONN_IN	0
 #define ISDN_TTY_FAX_CONN_OUT	1
diff --git a/include/linux/mISDNif.h b/include/linux/mISDNif.h
index 78c3bed1c3f..b5e7f220248 100644
--- a/include/linux/mISDNif.h
+++ b/include/linux/mISDNif.h
@@ -251,7 +251,7 @@
 struct mISDNhead {
 	unsigned int	prim;
 	unsigned int	id;
-}  __attribute__((packed));
+}  __packed;
 
 #define MISDN_HEADER_LEN	sizeof(struct mISDNhead)
 #define MAX_DATA_SIZE		2048
diff --git a/include/linux/nbd.h b/include/linux/nbd.h
index 155719dab81..bb58854a806 100644
--- a/include/linux/nbd.h
+++ b/include/linux/nbd.h
@@ -88,7 +88,7 @@ struct nbd_request {
 	char handle[8];
 	__be64 from;
 	__be32 len;
-} __attribute__ ((packed));
+} __packed;
 
 /*
  * This is the reply packet that nbd-server sends back to the client after
diff --git a/include/linux/ncp.h b/include/linux/ncp.h
index 99f0adeeb3f..3ace8370e61 100644
--- a/include/linux/ncp.h
+++ b/include/linux/ncp.h
@@ -27,7 +27,7 @@ struct ncp_request_header {
 	__u8 conn_high;
 	__u8 function;
 	__u8 data[0];
-} __attribute__((packed));
+} __packed;
 
 #define NCP_REPLY                (0x3333)
 #define NCP_WATCHDOG		 (0x3E3E)
@@ -42,7 +42,7 @@ struct ncp_reply_header {
 	__u8 completion_code;
 	__u8 connection_state;
 	__u8 data[0];
-} __attribute__((packed));
+} __packed;
 
 #define NCP_VOLNAME_LEN (16)
 #define NCP_NUMBER_OF_VOLUMES (256)
@@ -158,7 +158,7 @@ struct nw_info_struct {
 #ifdef __KERNEL__
 	struct nw_nfs_info nfs;
 #endif
-} __attribute__((packed));
+} __packed;
 
 /* modify mask - use with MODIFY_DOS_INFO structure */
 #define DM_ATTRIBUTES		  (cpu_to_le32(0x02))
@@ -190,12 +190,12 @@ struct nw_modify_dos_info {
 	__u16 inheritanceGrantMask;
 	__u16 inheritanceRevokeMask;
 	__u32 maximumSpace;
-} __attribute__((packed));
+} __packed;
 
 struct nw_search_sequence {
 	__u8 volNumber;
 	__u32 dirBase;
 	__u32 sequence;
-} __attribute__((packed));
+} __packed;
 
 #endif				/* _LINUX_NCP_H */
diff --git a/include/linux/ncp_fs_sb.h b/include/linux/ncp_fs_sb.h
index 5ec9ca67168..8da05bc098c 100644
--- a/include/linux/ncp_fs_sb.h
+++ b/include/linux/ncp_fs_sb.h
@@ -104,13 +104,13 @@ struct ncp_server {
 
 		unsigned int state;		/* STREAM only: receiver state */
 		struct {
-			__u32 magic __attribute__((packed));
-			__u32 len __attribute__((packed));
-			__u16 type __attribute__((packed));
-			__u16 p1 __attribute__((packed));
-			__u16 p2 __attribute__((packed));
-			__u16 p3 __attribute__((packed));
-			__u16 type2 __attribute__((packed));
+			__u32 magic __packed;
+			__u32 len __packed;
+			__u16 type __packed;
+			__u16 p1 __packed;
+			__u16 p2 __packed;
+			__u16 p3 __packed;
+			__u16 type2 __packed;
 		} buf;				/* STREAM only: temporary buffer */
 		unsigned char* ptr;		/* STREAM only: pointer to data */
 		size_t len;			/* STREAM only: length of data to receive */
diff --git a/include/linux/phonet.h b/include/linux/phonet.h
index e5126cff9b2..24426c3d6b5 100644
--- a/include/linux/phonet.h
+++ b/include/linux/phonet.h
@@ -56,7 +56,7 @@ struct phonethdr {
 	__be16	pn_length;
 	__u8	pn_robj;
 	__u8	pn_sobj;
-} __attribute__((packed));
+} __packed;
 
 /* Common Phonet payload header */
 struct phonetmsg {
@@ -98,7 +98,7 @@ struct sockaddr_pn {
 	__u8 spn_dev;
 	__u8 spn_resource;
 	__u8 spn_zero[sizeof(struct sockaddr) - sizeof(sa_family_t) - 3];
-} __attribute__ ((packed));
+} __packed;
 
 /* Well known address */
 #define PN_DEV_PC	0x10
diff --git a/include/linux/rds.h b/include/linux/rds.h
index cab4994c2f6..24bce3ded9e 100644
--- a/include/linux/rds.h
+++ b/include/linux/rds.h
@@ -100,7 +100,7 @@
 struct rds_info_counter {
 	u_int8_t	name[32];
 	u_int64_t	value;
-} __attribute__((packed));
+} __packed;
 
 #define RDS_INFO_CONNECTION_FLAG_SENDING	0x01
 #define RDS_INFO_CONNECTION_FLAG_CONNECTING	0x02
@@ -115,7 +115,7 @@ struct rds_info_connection {
 	__be32		faddr;
 	u_int8_t	transport[TRANSNAMSIZ];		/* null term ascii */
 	u_int8_t	flags;
-} __attribute__((packed));
+} __packed;
 
 struct rds_info_flow {
 	__be32		laddr;
@@ -123,7 +123,7 @@ struct rds_info_flow {
 	u_int32_t	bytes;
 	__be16		lport;
 	__be16		fport;
-} __attribute__((packed));
+} __packed;
 
 #define RDS_INFO_MESSAGE_FLAG_ACK               0x01
 #define RDS_INFO_MESSAGE_FLAG_FAST_ACK          0x02
@@ -136,7 +136,7 @@ struct rds_info_message {
 	__be16		lport;
 	__be16		fport;
 	u_int8_t	flags;
-} __attribute__((packed));
+} __packed;
 
 struct rds_info_socket {
 	u_int32_t	sndbuf;
@@ -146,7 +146,7 @@ struct rds_info_socket {
 	__be16		connected_port;
 	u_int32_t	rcvbuf;
 	u_int64_t	inum;
-} __attribute__((packed));
+} __packed;
 
 struct rds_info_tcp_socket {
 	__be32          local_addr;
@@ -158,7 +158,7 @@ struct rds_info_tcp_socket {
 	u_int32_t       last_sent_nxt;
 	u_int32_t       last_expected_una;
 	u_int32_t       last_seen_una;
-} __attribute__((packed));
+} __packed;
 
 #define RDS_IB_GID_LEN	16
 struct rds_info_rdma_connection {
diff --git a/include/linux/sctp.h b/include/linux/sctp.h
index c20d3ce673c..c11a28706fa 100644
--- a/include/linux/sctp.h
+++ b/include/linux/sctp.h
@@ -61,7 +61,7 @@ typedef struct sctphdr {
 	__be16 dest;
 	__be32 vtag;
 	__le32 checksum;
-} __attribute__((packed)) sctp_sctphdr_t;
+} __packed sctp_sctphdr_t;
 
 #ifdef __KERNEL__
 #include <linux/skbuff.h>
@@ -77,7 +77,7 @@ typedef struct sctp_chunkhdr {
 	__u8 type;
 	__u8 flags;
 	__be16 length;
-} __attribute__((packed)) sctp_chunkhdr_t;
+} __packed sctp_chunkhdr_t;
 
 
 /* Section 3.2.  Chunk Type Values.
@@ -167,7 +167,7 @@ enum { SCTP_CHUNK_FLAG_T = 0x01 };
 typedef struct sctp_paramhdr {
 	__be16 type;
 	__be16 length;
-} __attribute__((packed)) sctp_paramhdr_t;
+} __packed sctp_paramhdr_t;
 
 typedef enum {
 
@@ -228,12 +228,12 @@ typedef struct sctp_datahdr {
 	__be16 ssn;
 	__be32 ppid;
 	__u8  payload[0];
-} __attribute__((packed)) sctp_datahdr_t;
+} __packed sctp_datahdr_t;
 
 typedef struct sctp_data_chunk {
         sctp_chunkhdr_t chunk_hdr;
         sctp_datahdr_t  data_hdr;
-} __attribute__((packed)) sctp_data_chunk_t;
+} __packed sctp_data_chunk_t;
 
 /* DATA Chuck Specific Flags */
 enum {
@@ -259,78 +259,78 @@ typedef struct sctp_inithdr {
 	__be16 num_inbound_streams;
 	__be32 initial_tsn;
 	__u8  params[0];
-} __attribute__((packed)) sctp_inithdr_t;
+} __packed sctp_inithdr_t;
 
 typedef struct sctp_init_chunk {
 	sctp_chunkhdr_t chunk_hdr;
 	sctp_inithdr_t init_hdr;
-} __attribute__((packed)) sctp_init_chunk_t;
+} __packed sctp_init_chunk_t;
 
 
 /* Section 3.3.2.1. IPv4 Address Parameter (5) */
 typedef struct sctp_ipv4addr_param {
 	sctp_paramhdr_t param_hdr;
 	struct in_addr  addr;
-} __attribute__((packed)) sctp_ipv4addr_param_t;
+} __packed sctp_ipv4addr_param_t;
 
 /* Section 3.3.2.1. IPv6 Address Parameter (6) */
 typedef struct sctp_ipv6addr_param {
 	sctp_paramhdr_t param_hdr;
 	struct in6_addr addr;
-} __attribute__((packed)) sctp_ipv6addr_param_t;
+} __packed sctp_ipv6addr_param_t;
 
 /* Section 3.3.2.1 Cookie Preservative (9) */
 typedef struct sctp_cookie_preserve_param {
 	sctp_paramhdr_t param_hdr;
 	__be32          lifespan_increment;
-} __attribute__((packed)) sctp_cookie_preserve_param_t;
+} __packed sctp_cookie_preserve_param_t;
 
 /* Section 3.3.2.1 Host Name Address (11) */
 typedef struct sctp_hostname_param {
 	sctp_paramhdr_t param_hdr;
 	uint8_t hostname[0];
-} __attribute__((packed)) sctp_hostname_param_t;
+} __packed sctp_hostname_param_t;
 
 /* Section 3.3.2.1 Supported Address Types (12) */
 typedef struct sctp_supported_addrs_param {
 	sctp_paramhdr_t param_hdr;
 	__be16 types[0];
-} __attribute__((packed)) sctp_supported_addrs_param_t;
+} __packed sctp_supported_addrs_param_t;
 
 /* Appendix A. ECN Capable (32768) */
 typedef struct sctp_ecn_capable_param {
 	sctp_paramhdr_t param_hdr;
-} __attribute__((packed)) sctp_ecn_capable_param_t;
+} __packed sctp_ecn_capable_param_t;
 
 /* ADDIP Section 3.2.6 Adaptation Layer Indication */
 typedef struct sctp_adaptation_ind_param {
 	struct sctp_paramhdr param_hdr;
 	__be32 adaptation_ind;
-} __attribute__((packed)) sctp_adaptation_ind_param_t;
+} __packed sctp_adaptation_ind_param_t;
 
 /* ADDIP Section 4.2.7 Supported Extensions Parameter */
 typedef struct sctp_supported_ext_param {
 	struct sctp_paramhdr param_hdr;
 	__u8 chunks[0];
-} __attribute__((packed)) sctp_supported_ext_param_t;
+} __packed sctp_supported_ext_param_t;
 
 /* AUTH Section 3.1 Random */
 typedef struct sctp_random_param {
 	sctp_paramhdr_t param_hdr;
 	__u8 random_val[0];
-} __attribute__((packed)) sctp_random_param_t;
+} __packed sctp_random_param_t;
 
 /* AUTH Section 3.2 Chunk List */
 typedef struct sctp_chunks_param {
 	sctp_paramhdr_t param_hdr;
 	__u8 chunks[0];
-} __attribute__((packed)) sctp_chunks_param_t;
+} __packed sctp_chunks_param_t;
 
 /* AUTH Section 3.3 HMAC Algorithm */
 typedef struct sctp_hmac_algo_param {
 	sctp_paramhdr_t param_hdr;
 	__be16 hmac_ids[0];
-} __attribute__((packed)) sctp_hmac_algo_param_t;
+} __packed sctp_hmac_algo_param_t;
 
 /* RFC 2960.  Section 3.3.3 Initiation Acknowledgement (INIT ACK) (2):
  *   The INIT ACK chunk is used to acknowledge the initiation of an SCTP
@@ -342,13 +342,13 @@ typedef sctp_init_chunk_t sctp_initack_chunk_t;
 typedef struct sctp_cookie_param {
 	sctp_paramhdr_t p;
 	__u8 body[0];
-} __attribute__((packed)) sctp_cookie_param_t;
+} __packed sctp_cookie_param_t;
 
 /* Section 3.3.3.1 Unrecognized Parameters (8) */
 typedef struct sctp_unrecognized_param {
 	sctp_paramhdr_t param_hdr;
 	sctp_paramhdr_t unrecognized;
-} __attribute__((packed)) sctp_unrecognized_param_t;
+} __packed sctp_unrecognized_param_t;
 
 
@@ -363,7 +363,7 @@ typedef struct sctp_unrecognized_param {
 typedef struct sctp_gap_ack_block {
 	__be16 start;
 	__be16 end;
-} __attribute__((packed)) sctp_gap_ack_block_t;
+} __packed sctp_gap_ack_block_t;
 
 typedef __be32 sctp_dup_tsn_t;
 
@@ -378,12 +378,12 @@ typedef struct sctp_sackhdr {
 	__be16 num_gap_ack_blocks;
 	__be16 num_dup_tsns;
 	sctp_sack_variable_t variable[0];
-} __attribute__((packed)) sctp_sackhdr_t;
+} __packed sctp_sackhdr_t;
 
 typedef struct sctp_sack_chunk {
 	sctp_chunkhdr_t chunk_hdr;
 	sctp_sackhdr_t sack_hdr;
-} __attribute__((packed)) sctp_sack_chunk_t;
+} __packed sctp_sack_chunk_t;
 
 
 /* RFC 2960.  Section 3.3.5 Heartbeat Request (HEARTBEAT) (4):
@@ -395,12 +395,12 @@ typedef struct sctp_sack_chunk {
 
 typedef struct sctp_heartbeathdr {
 	sctp_paramhdr_t info;
-} __attribute__((packed)) sctp_heartbeathdr_t;
+} __packed sctp_heartbeathdr_t;
 
 typedef struct sctp_heartbeat_chunk {
 	sctp_chunkhdr_t chunk_hdr;
 	sctp_heartbeathdr_t hb_hdr;
-} __attribute__((packed)) sctp_heartbeat_chunk_t;
+} __packed sctp_heartbeat_chunk_t;
 
 
 /* For the abort and shutdown ACK we must carry the init tag in the
@@ -409,7 +409,7 @@ typedef struct sctp_heartbeat_chunk {
  */
 typedef struct sctp_abort_chunk {
         sctp_chunkhdr_t uh;
-} __attribute__((packed)) sctp_abort_chunk_t;
+} __packed sctp_abort_chunk_t;
 
 
 /* For the graceful shutdown we must carry the tag (in common header)
@@ -417,12 +417,12 @@ typedef struct sctp_abort_chunk {
  */
 typedef struct sctp_shutdownhdr {
 	__be32 cum_tsn_ack;
-} __attribute__((packed)) sctp_shutdownhdr_t;
+} __packed sctp_shutdownhdr_t;
 
 struct sctp_shutdown_chunk_t {
         sctp_chunkhdr_t    chunk_hdr;
         sctp_shutdownhdr_t shutdown_hdr;
-} __attribute__ ((packed));
+} __packed;
 
 /* RFC 2960.  Section 3.3.10 Operation Error (ERROR) (9) */
 
@@ -430,12 +430,12 @@ typedef struct sctp_errhdr {
 	__be16 cause;
 	__be16 length;
 	__u8  variable[0];
-} __attribute__((packed)) sctp_errhdr_t;
+} __packed sctp_errhdr_t;
 
 typedef struct sctp_operr_chunk {
         sctp_chunkhdr_t chunk_hdr;
 	sctp_errhdr_t   err_hdr;
-} __attribute__((packed)) sctp_operr_chunk_t;
+} __packed sctp_operr_chunk_t;
 
 /* RFC 2960 3.3.10 - Operation Error
  *
@@ -525,7 +525,7 @@ typedef struct sctp_ecnehdr {
 typedef struct sctp_ecne_chunk {
 	sctp_chunkhdr_t chunk_hdr;
 	sctp_ecnehdr_t ence_hdr;
-} __attribute__((packed)) sctp_ecne_chunk_t;
+} __packed sctp_ecne_chunk_t;
 
 /* RFC 2960.  Appendix A.  Explicit Congestion Notification.
  *   Congestion Window Reduced (CWR) (13)
@@ -537,7 +537,7 @@ typedef struct sctp_cwrhdr {
 typedef struct sctp_cwr_chunk {
 	sctp_chunkhdr_t chunk_hdr;
 	sctp_cwrhdr_t cwr_hdr;
-} __attribute__((packed)) sctp_cwr_chunk_t;
+} __packed sctp_cwr_chunk_t;
 
 /* PR-SCTP
  * 3.2 Forward Cumulative TSN Chunk Definition (FORWARD TSN)
@@ -588,17 +588,17 @@ typedef struct sctp_cwr_chunk {
 struct sctp_fwdtsn_skip {
 	__be16 stream;
 	__be16 ssn;
-} __attribute__((packed));
+} __packed;
 
 struct sctp_fwdtsn_hdr {
 	__be32 new_cum_tsn;
 	struct sctp_fwdtsn_skip skip[0];
-} __attribute((packed));
+} __packed;
 
 struct sctp_fwdtsn_chunk {
 	struct sctp_chunkhdr chunk_hdr;
 	struct sctp_fwdtsn_hdr fwdtsn_hdr;
-} __attribute((packed));
+} __packed;
 
 
 /* ADDIP
@@ -636,17 +636,17 @@ struct sctp_fwdtsn_chunk {
 typedef struct sctp_addip_param {
 	sctp_paramhdr_t	param_hdr;
 	__be32		crr_id;
-} __attribute__((packed)) sctp_addip_param_t;
+} __packed sctp_addip_param_t;
 
 typedef struct sctp_addiphdr {
 	__be32	serial;
 	__u8	params[0];
-} __attribute__((packed)) sctp_addiphdr_t;
+} __packed sctp_addiphdr_t;
 
 typedef struct sctp_addip_chunk {
 	sctp_chunkhdr_t chunk_hdr;
 	sctp_addiphdr_t addip_hdr;
-} __attribute__((packed)) sctp_addip_chunk_t;
+} __packed sctp_addip_chunk_t;
 
 /* AUTH
  * Section 4.1  Authentication Chunk (AUTH)
@@ -701,11 +701,11 @@ typedef struct sctp_authhdr {
 	__be16 shkey_id;
 	__be16 hmac_id;
 	__u8   hmac[0];
-} __attribute__((packed)) sctp_authhdr_t;
+} __packed sctp_authhdr_t;
 
 typedef struct sctp_auth_chunk {
 	sctp_chunkhdr_t chunk_hdr;
 	sctp_authhdr_t auth_hdr;
-} __attribute__((packed)) sctp_auth_chunk_t;
+} __packed sctp_auth_chunk_t;
 
 #endif /* __LINUX_SCTP_H__ */
diff --git a/include/linux/wlp.h b/include/linux/wlp.h
index ac95ce6606a..c76fe239250 100644
--- a/include/linux/wlp.h
+++ b/include/linux/wlp.h
@@ -300,7 +300,7 @@ struct wlp_ie {
 	__le16 cycle_param;
 	__le16 acw_anchor_addr;
 	u8 wssid_hash_list[];
-} __attribute__((packed));
+} __packed;
 
 static inline int wlp_ie_hash_length(struct wlp_ie *ie)
 {
@@ -324,7 +324,7 @@ static inline void wlp_ie_set_hash_length(struct wlp_ie *ie, int hash_length)
  */
 struct wlp_nonce {
 	u8 data[16];
-} __attribute__((packed));
+} __packed;
 
 /**
  * WLP UUID
@@ -336,7 +336,7 @@ struct wlp_nonce {
  */
 struct wlp_uuid {
 	u8 data[16];
-} __attribute__((packed));
+} __packed;
 
 
 /**
@@ -348,7 +348,7 @@ struct wlp_dev_type {
 	u8 OUI[3];
 	u8 OUIsubdiv;
 	__le16 subID;
-} __attribute__((packed));
+} __packed;
 
 /**
  * WLP frame header
@@ -357,7 +357,7 @@ struct wlp_dev_type {
 struct wlp_frame_hdr {
 	__le16 mux_hdr;			/* WLP_PROTOCOL_ID */
 	enum wlp_frame_type type:8;
-} __attribute__((packed));
+} __packed;
 
 /**
  * WLP attribute field header
@@ -368,7 +368,7 @@ struct wlp_frame_hdr {
 struct wlp_attr_hdr {
 	__le16 type;
 	__le16 length;
-} __attribute__((packed));
+} __packed;
 
 /**
  * Device information commonly used together
@@ -401,13 +401,13 @@ struct wlp_device_info {
 struct wlp_attr_##name {						\
 	struct wlp_attr_hdr hdr;					\
 	type name;							\
-} __attribute__((packed));
+} __packed;
 
 #define wlp_attr_array(type, name)					\
 struct wlp_attr_##name {						\
 	struct wlp_attr_hdr hdr;					\
 	type name[];							\
-} __attribute__((packed));
+} __packed;
 
 /**
  * WLP association attribute fields
@@ -483,7 +483,7 @@ struct wlp_wss_info {
 	struct wlp_attr_accept_enrl accept;
 	struct wlp_attr_wss_sec_status sec_stat;
 	struct wlp_attr_wss_bcast bcast;
-} __attribute__((packed));
+} __packed;
 
 /* WLP WSS Information */
 wlp_attr_array(struct wlp_wss_info, wss_info)
@@ -520,7 +520,7 @@ wlp_attr(u8, wlp_assc_err)
 struct wlp_frame_std_abbrv_hdr {
 	struct wlp_frame_hdr hdr;
 	u8 tag;
-} __attribute__((packed));
+} __packed;
 
 /**
  * WLP association frames
@@ -533,7 +533,7 @@ struct wlp_frame_assoc {
 	struct wlp_attr_version version;
 	struct wlp_attr_msg_type msg_type;
 	u8 attr[];
-} __attribute__((packed));
+} __packed;
 
 /* Ethernet to dev address mapping */
 struct wlp_eda {
diff --git a/include/net/dn_dev.h b/include/net/dn_dev.h
index 511a459ec10..0916bbf3bdf 100644
--- a/include/net/dn_dev.h
+++ b/include/net/dn_dev.h
@@ -101,7 +101,7 @@ struct dn_short_packet {
 	__le16 dstnode;
 	__le16 srcnode;
 	__u8   forward;
-} __attribute__((packed));
+} __packed;
 
 struct dn_long_packet {
 	__u8   msgflg;
@@ -115,7 +115,7 @@ struct dn_long_packet {
 	__u8   visit_ct;
 	__u8   s_class;
 	__u8   pt;
-} __attribute__((packed));
+} __packed;
 
 /*------------------------- DRP - Routing messages ---------------------*/
 
@@ -132,7 +132,7 @@ struct endnode_hello_message {
 	__u8   mpd;
 	__u8   datalen;
 	__u8   data[2];
-} __attribute__((packed));
+} __packed;
 
 struct rtnode_hello_message {
 	__u8   msgflg;
@@ -144,7 +144,7 @@ struct rtnode_hello_message {
 	__u8   area;
 	__le16  timer;
 	__u8   mpd;
-} __attribute__((packed));
+} __packed;
 
 
 extern void dn_dev_init(void);
diff --git a/include/net/dn_nsp.h b/include/net/dn_nsp.h
index 17d43d2db5e..e43a2893f13 100644
--- a/include/net/dn_nsp.h
+++ b/include/net/dn_nsp.h
@@ -74,18 +74,18 @@ struct nsp_data_seg_msg {
 	__u8   msgflg;
 	__le16 dstaddr;
 	__le16 srcaddr;
-} __attribute__((packed));
+} __packed;
 
 struct nsp_data_opt_msg {
 	__le16 acknum;
 	__le16 segnum;
 	__le16 lsflgs;
-} __attribute__((packed));
+} __packed;
 
 struct nsp_data_opt_msg1 {
 	__le16 acknum;
 	__le16 segnum;
-} __attribute__((packed));
+} __packed;
 
 
 /* Acknowledgment Message (data/other data)                             */
@@ -94,13 +94,13 @@ struct nsp_data_ack_msg {
 	__le16 dstaddr;
 	__le16 srcaddr;
 	__le16 acknum;
-} __attribute__((packed));
+} __packed;
 
 /* Connect Acknowledgment Message */
 struct  nsp_conn_ack_msg {
 	__u8 msgflg;
 	__le16 dstaddr;
-} __attribute__((packed));
+} __packed;
 
 
 /* Connect Initiate/Retransmit Initiate/Connect Confirm */
@@ -117,7 +117,7 @@ struct  nsp_conn_init_msg {
 #define NSP_FC_MASK   0x0c            /* FC type mask         */
 	__u8   info;
 	__le16 segsize;
-} __attribute__((packed));
+} __packed;
 
 /* Disconnect Initiate/Disconnect Confirm */
 struct  nsp_disconn_init_msg {
@@ -125,7 +125,7 @@ struct  nsp_disconn_init_msg {
 	__le16 dstaddr;
 	__le16 srcaddr;
 	__le16 reason;
-} __attribute__((packed));
+} __packed;
 
 
@@ -135,7 +135,7 @@ struct  srcobj_fmt {
 	__le16 grpcode;
 	__le16 usrcode;
 	__u8   dlen;
-} __attribute__((packed));
+} __packed;
 
 /*
  * A collection of functions for manipulating the sequence
diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h
index fbf9d1cda27..fc94ec568a5 100644
--- a/include/net/ip6_tunnel.h
+++ b/include/net/ip6_tunnel.h
@@ -27,6 +27,6 @@ struct ipv6_tlv_tnl_enc_lim {
 	__u8 type;		/* type-code for option         */
 	__u8 length;		/* option length                */
 	__u8 encap_limit;	/* tunnel encapsulation limit   */
-} __attribute__ ((packed));
+} __packed;
 
 #endif
diff --git a/include/net/ipx.h b/include/net/ipx.h
index ef51a668ba1..05d7e4a88b4 100644
--- a/include/net/ipx.h
+++ b/include/net/ipx.h
@@ -27,9 +27,9 @@ struct ipx_address {
 #define IPX_MAX_PPROP_HOPS 8
 
 struct ipxhdr {
-	__be16			ipx_checksum __attribute__ ((packed));
+	__be16			ipx_checksum __packed;
 #define IPX_NO_CHECKSUM	cpu_to_be16(0xFFFF)
-	__be16			ipx_pktsize __attribute__ ((packed));
+	__be16			ipx_pktsize __packed;
 	__u8			ipx_tctrl;
 	__u8			ipx_type;
 #define IPX_TYPE_UNKNOWN	0x00
@@ -38,8 +38,8 @@ struct ipxhdr {
 #define IPX_TYPE_SPX		0x05	/* SPX protocol */
 #define IPX_TYPE_NCP		0x11	/* $lots for docs on this (SPIT) */
 #define IPX_TYPE_PPROP		0x14	/* complicated flood fill brdcast */
-	struct ipx_address	ipx_dest __attribute__ ((packed));
-	struct ipx_address	ipx_source __attribute__ ((packed));
+	struct ipx_address	ipx_dest __packed;
+	struct ipx_address	ipx_source __packed;
 };
 
 static __inline__ struct ipxhdr *ipx_hdr(struct sk_buff *skb)
diff --git a/include/net/mip6.h b/include/net/mip6.h
index a83ad1982a9..26ba99b5a4b 100644
--- a/include/net/mip6.h
+++ b/include/net/mip6.h
@@ -39,7 +39,7 @@ struct ip6_mh {
 	__u16	ip6mh_cksum;
 	/* Followed by type specific messages */
 	__u8	data[0];
-} __attribute__ ((__packed__));
+} __packed;
 
 #define IP6_MH_TYPE_BRR		0   /* Binding Refresh Request */
 #define IP6_MH_TYPE_HOTI	1   /* HOTI Message   */
diff --git a/include/net/ndisc.h b/include/net/ndisc.h
index f76f22d0572..895997bc2ea 100644
--- a/include/net/ndisc.h
+++ b/include/net/ndisc.h
@@ -82,7 +82,7 @@ struct ra_msg {
 struct nd_opt_hdr {
 	__u8		nd_opt_type;
 	__u8		nd_opt_len;
-} __attribute__((__packed__));
+} __packed;
 
 
 extern int			ndisc_init(void);
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 4b860116e09..f9e7473613b 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -443,7 +443,7 @@ struct sctp_signed_cookie {
 	__u8 signature[SCTP_SECRET_SIZE];
 	__u32 __pad;		/* force sctp_cookie alignment to 64 bits */
 	struct sctp_cookie c;
-} __attribute__((packed));
+} __packed;
 
 /* This is another convenience type to allocate memory for address
  * params for the maximum size and pass such structures around
@@ -488,7 +488,7 @@ typedef struct sctp_sender_hb_info {
 	union sctp_addr daddr;
 	unsigned long sent_at;
 	__u64 hb_nonce;
-} __attribute__((packed)) sctp_sender_hb_info_t;
+} __packed sctp_sender_hb_info_t;
 
 /*
  *  RFC 2960 1.3.2 Sequenced Delivery within Streams
diff --git a/include/rxrpc/packet.h b/include/rxrpc/packet.h
index b69e6e173ea..9b2c30897e5 100644
--- a/include/rxrpc/packet.h
+++ b/include/rxrpc/packet.h
@@ -65,7 +65,7 @@ struct rxrpc_header {
 	};
 	__be16		serviceId;	/* service ID */
 
-} __attribute__((packed));
+} __packed;
 
 #define __rxrpc_header_off(X) offsetof(struct rxrpc_header,X)
 
@@ -120,7 +120,7 @@ struct rxrpc_ackpacket {
 #define RXRPC_ACK_TYPE_NACK		0
 #define RXRPC_ACK_TYPE_ACK		1
 
-} __attribute__((packed));
+} __packed;
 
 /*
  * ACK packets can have a further piece of information tagged on the end
@@ -141,7 +141,7 @@ struct rxkad_challenge {
 	__be32		nonce;		/* encrypted random number */
 	__be32		min_level;	/* minimum security level */
 	__be32		__padding;	/* padding to 8-byte boundary */
-} __attribute__((packed));
+} __packed;
 
 /*****************************************************************************/
 /*
@@ -164,7 +164,7 @@ struct rxkad_response {
 
 	__be32		kvno;		/* Kerberos key version number */
 	__be32		ticket_len;	/* Kerberos ticket length  */
-} __attribute__((packed));
+} __packed;
 
 /*****************************************************************************/
 /*
diff --git a/net/bluetooth/bnep/bnep.h b/net/bluetooth/bnep/bnep.h
index 0d9e506f5d5..70672544db8 100644
--- a/net/bluetooth/bnep/bnep.h
+++ b/net/bluetooth/bnep/bnep.h
@@ -86,26 +86,26 @@ struct bnep_setup_conn_req {
 	__u8  ctrl;
 	__u8  uuid_size;
 	__u8  service[0];
-} __attribute__((packed));
+} __packed;
 
 struct bnep_set_filter_req {
 	__u8  type;
 	__u8  ctrl;
 	__be16 len;
 	__u8  list[0];
-} __attribute__((packed));
+} __packed;
 
 struct bnep_control_rsp {
 	__u8  type;
 	__u8  ctrl;
 	__be16 resp;
-} __attribute__((packed));
+} __packed;
 
 struct bnep_ext_hdr {
 	__u8  type;
 	__u8  len;
 	__u8  data[0];
-} __attribute__((packed));
+} __packed;
 
 /* BNEP ioctl defines */
 #define BNEPCONNADD	_IOW('B', 200, int)
diff --git a/net/compat.c b/net/compat.c
index ec24d9edb02..1cf75905f13 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -531,7 +531,7 @@ struct compat_group_req {
 	__u32				 gr_interface;
 	struct __kernel_sockaddr_storage gr_group
 		__attribute__ ((aligned(4)));
-} __attribute__ ((packed));
+} __packed;
 
 struct compat_group_source_req {
 	__u32				 gsr_interface;
@@ -539,7 +539,7 @@ struct compat_group_source_req {
 		__attribute__ ((aligned(4)));
 	struct __kernel_sockaddr_storage gsr_source
 		__attribute__ ((aligned(4)));
-} __attribute__ ((packed));
+} __packed;
 
 struct compat_group_filter {
 	__u32				 gf_interface;
@@ -549,7 +549,7 @@ struct compat_group_filter {
 	__u32				 gf_numsrc;
 	struct __kernel_sockaddr_storage gf_slist[1]
 		__attribute__ ((aligned(4)));
-} __attribute__ ((packed));
+} __packed;
 
 #define __COMPAT_GF0_SIZE (sizeof(struct compat_group_filter) - \
 			sizeof(struct __kernel_sockaddr_storage))
diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c
index f28ad2cc842..499c045d691 100644
--- a/net/iucv/iucv.c
+++ b/net/iucv/iucv.c
@@ -1463,7 +1463,7 @@ struct iucv_path_pending {
 	u32 res3;
 	u8  ippollfg;
 	u8  res4[3];
-} __attribute__ ((packed));
+} __packed;
 
 static void iucv_path_pending(struct iucv_irq_data *data)
 {
@@ -1524,7 +1524,7 @@ struct iucv_path_complete {
 	u32 res3;
 	u8  ippollfg;
 	u8  res4[3];
-} __attribute__ ((packed));
+} __packed;
 
 static void iucv_path_complete(struct iucv_irq_data *data)
 {
@@ -1554,7 +1554,7 @@ struct iucv_path_severed {
 	u32 res4;
 	u8  ippollfg;
 	u8  res5[3];
-} __attribute__ ((packed));
+} __packed;
 
 static void iucv_path_severed(struct iucv_irq_data *data)
 {
@@ -1590,7 +1590,7 @@ struct iucv_path_quiesced {
 	u32 res4;
 	u8  ippollfg;
 	u8  res5[3];
-} __attribute__ ((packed));
+} __packed;
 
 static void iucv_path_quiesced(struct iucv_irq_data *data)
 {
@@ -1618,7 +1618,7 @@ struct iucv_path_resumed {
 	u32 res4;
 	u8  ippollfg;
 	u8  res5[3];
-} __attribute__ ((packed));
+} __packed;
 
 static void iucv_path_resumed(struct iucv_irq_data *data)
 {
@@ -1649,7 +1649,7 @@ struct iucv_message_complete {
 	u32 ipbfln2f;
 	u8  ippollfg;
 	u8  res2[3];
-} __attribute__ ((packed));
+} __packed;
 
 static void iucv_message_complete(struct iucv_irq_data *data)
 {
@@ -1694,7 +1694,7 @@ struct iucv_message_pending {
 	u32 ipbfln2f;
 	u8  ippollfg;
 	u8  res2[3];
-} __attribute__ ((packed));
+} __packed;
 
 static void iucv_message_pending(struct iucv_irq_data *data)
 {
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index c7000a6ca37..a2ed0f7b556 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -600,7 +600,7 @@ struct iapp_layer2_update {
 	u8 ssap;		/* 0 */
 	u8 control;
 	u8 xid_info[3];
-} __attribute__ ((packed));
+} __packed;
 
 static void ieee80211_send_layer2_update(struct sta_info *sta)
 {
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 1a9e2da37a9..ec3e5c3e27b 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1084,7 +1084,7 @@ struct ieee80211_tx_status_rtap_hdr {
 	u8 padding_for_rate;
 	__le16 tx_flags;
 	u8 data_retries;
-} __attribute__ ((packed));
+} __packed;
 
 
 /* HT */
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 6e2a7bcd8cb..2d9a2ee94e1 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2139,7 +2139,7 @@ static void ieee80211_rx_cooked_monitor(struct ieee80211_rx_data *rx,
 		u8 rate_or_pad;
 		__le16 chan_freq;
 		__le16 chan_flags;
-	} __attribute__ ((packed)) *rthdr;
+	} __packed *rthdr;
 	struct sk_buff *skb = rx->skb, *skb2;
 	struct net_device *prev_dev = NULL;
 	struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index bd2a50b482a..246f9292465 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -1817,7 +1817,7 @@ malformed:
 struct __sctp_missing {
 	__be32 num_missing;
 	__be16 type;
-}  __attribute__((packed));
+}  __packed;
 
 /*
  * Report a missing mandatory parameter.
-- 
cgit v1.2.3-70-g09d2


From 095dfdb0c479661f437b24b85e31f0d0b841eab6 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 26 May 2010 17:19:25 +0200
Subject: mac80211: remove tx status ampdu_ack_map

There's a single use of this struct member, but
as it is write-only it clearly not necessary.
Thus we can free up some space here, even if we
don't need it right now it seems pointless to
carry around the variable.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/iwlwifi/iwl-agn-tx.c | 1 -
 include/net/mac80211.h                    | 5 +----
 2 files changed, 1 insertion(+), 5 deletions(-)

(limited to 'include')

diff --git a/drivers/net/wireless/iwlwifi/iwl-agn-tx.c b/drivers/net/wireless/iwlwifi/iwl-agn-tx.c
index 52bec104046..bde342b5df8 100644
--- a/drivers/net/wireless/iwlwifi/iwl-agn-tx.c
+++ b/drivers/net/wireless/iwlwifi/iwl-agn-tx.c
@@ -1252,7 +1252,6 @@ static int iwlagn_tx_status_reply_compressed_ba(struct iwl_priv *priv,
 	info->flags |= IEEE80211_TX_STAT_ACK;
 	info->flags |= IEEE80211_TX_STAT_AMPDU;
 	info->status.ampdu_ack_len = successes;
-	info->status.ampdu_ack_map = bitmap;
 	info->status.ampdu_len = agg->frame_count;
 	iwlagn_hwrate_to_tx_control(priv, agg->rate_n_flags, info);
 
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index de22cbfef23..f26440a46df 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -412,8 +412,6 @@ struct ieee80211_tx_rate {
  * @driver_data: array of driver_data pointers
  * @ampdu_ack_len: number of acked aggregated frames.
  * 	relevant only if IEEE80211_TX_STAT_AMPDU was set.
- * @ampdu_ack_map: block ack bit map for the aggregation.
- * 	relevant only if IEEE80211_TX_STAT_AMPDU was set.
  * @ampdu_len: number of aggregated frames.
  * 	relevant only if IEEE80211_TX_STAT_AMPDU was set.
  * @ack_signal: signal strength of the ACK frame
@@ -448,10 +446,9 @@ struct ieee80211_tx_info {
 		struct {
 			struct ieee80211_tx_rate rates[IEEE80211_TX_MAX_RATES];
 			u8 ampdu_ack_len;
-			u64 ampdu_ack_map;
 			int ack_signal;
 			u8 ampdu_len;
-			/* 7 bytes free */
+			/* 15 bytes free */
 		} status;
 		struct {
 			struct ieee80211_tx_rate driver_rates[
-- 
cgit v1.2.3-70-g09d2


From 2b2c009ecf71f4c66ff8420b63dddbc9737e04e3 Mon Sep 17 00:00:00 2001
From: Juuso Oikarinen <juuso.oikarinen@nokia.com>
Date: Thu, 27 May 2010 15:32:13 +0300
Subject: mac80211: Add support for hardware ARP query filtering

Some hardware allow extended filtering of ARP frames not intended for
the host. To perform such filtering, the hardware needs to know the current
IP address(es) of the host, bound to its interface.

Add support for ARP filtering to mac80211 by adding a new op to the driver
interface, allowing to configure the current IP addresses. This op is called
upon association with the currently configured address(es), and when
associated whenever the IP address(es) change.

This patch adds configuration of IPv4 addresses only, as IPv6 addresses don't
need ARP filtering.

Signed-off-by: Juuso Oikarinen <juuso.oikarinen@nokia.com>
Reviewed-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/mac80211.h      | 14 ++++++++++
 net/mac80211/driver-ops.h   | 17 ++++++++++++
 net/mac80211/driver-trace.h | 25 ++++++++++++++++++
 net/mac80211/ieee80211_i.h  |  2 ++
 net/mac80211/main.c         | 63 ++++++++++++++++++++++++++++++++++++++++++++-
 net/mac80211/mlme.c         | 11 +++++++-
 6 files changed, 130 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index f26440a46df..74b9b49ddfa 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -19,6 +19,7 @@
 #include <linux/wireless.h>
 #include <linux/device.h>
 #include <linux/ieee80211.h>
+#include <linux/inetdevice.h>
 #include <net/cfg80211.h>
 
 /**
@@ -1532,6 +1533,16 @@ enum ieee80211_ampdu_mlme_action {
  *	of the bss parameters has changed when a call is made. The callback
  *	can sleep.
  *
+ * @configure_arp_filter: Configuration function for hardware ARP query filter.
+ *	This function is called with all the IP addresses configured to the
+ *	interface as argument - all ARP queries targeted to any of these
+ *	addresses must pass through. If the hardware filter does not support
+ *	enought addresses, hardware filtering must be disabled. The ifa_list
+ *	argument may be NULL, indicating that filtering must be disabled.
+ *	This function is called upon association complete with current
+ *	address(es), and while associated whenever the IP address(es) change.
+ *	The callback can sleep.
+ *
  * @prepare_multicast: Prepare for multicast filter configuration.
  *	This callback is optional, and its return value is passed
  *	to configure_filter(). This callback must be atomic.
@@ -1671,6 +1682,9 @@ struct ieee80211_ops {
 				 struct ieee80211_vif *vif,
 				 struct ieee80211_bss_conf *info,
 				 u32 changed);
+	int (*configure_arp_filter)(struct ieee80211_hw *hw,
+				    struct ieee80211_vif *vif,
+				    struct in_ifaddr *ifa_list);
 	u64 (*prepare_multicast)(struct ieee80211_hw *hw,
 				 struct netdev_hw_addr_list *mc_list);
 	void (*configure_filter)(struct ieee80211_hw *hw,
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index 4f227131665..978850ee3a5 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -83,6 +83,23 @@ static inline void drv_bss_info_changed(struct ieee80211_local *local,
 	trace_drv_bss_info_changed(local, sdata, info, changed);
 }
 
+struct in_ifaddr;
+static inline int drv_configure_arp_filter(struct ieee80211_local *local,
+					   struct ieee80211_vif *vif,
+					   struct in_ifaddr *ifa_list)
+{
+	int ret = 0;
+
+	might_sleep();
+
+	if (local->ops->configure_arp_filter)
+		ret = local->ops->configure_arp_filter(&local->hw, vif,
+						       ifa_list);
+
+	trace_drv_configure_arp_filter(local, vif_to_sdata(vif), ifa_list, ret);
+	return ret;
+}
+
 static inline u64 drv_prepare_multicast(struct ieee80211_local *local,
 					struct netdev_hw_addr_list *mc_list)
 {
diff --git a/net/mac80211/driver-trace.h b/net/mac80211/driver-trace.h
index 6a9b2342a9c..577460da2ea 100644
--- a/net/mac80211/driver-trace.h
+++ b/net/mac80211/driver-trace.h
@@ -219,6 +219,31 @@ TRACE_EVENT(drv_bss_info_changed,
 	)
 );
 
+TRACE_EVENT(drv_configure_arp_filter,
+	TP_PROTO(struct ieee80211_local *local,
+		 struct ieee80211_sub_if_data *sdata,
+		 struct in_ifaddr *ifa_list, int ret),
+
+	TP_ARGS(local, sdata, ifa_list, ret),
+
+	TP_STRUCT__entry(
+		LOCAL_ENTRY
+		VIF_ENTRY
+		__field(int, ret)
+	),
+
+	TP_fast_assign(
+		LOCAL_ASSIGN;
+		VIF_ASSIGN;
+		__entry->ret = ret;
+	),
+
+	TP_printk(
+		VIF_PR_FMT LOCAL_PR_FMT " ret:%d",
+		VIF_PR_ARG, LOCAL_PR_ARG, __entry->ret
+	)
+);
+
 TRACE_EVENT(drv_prepare_multicast,
 	TP_PROTO(struct ieee80211_local *local, int mc_count, u64 ret),
 
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index d4677efd3a3..47d67537f17 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -851,6 +851,7 @@ struct ieee80211_local {
 	struct work_struct dynamic_ps_disable_work;
 	struct timer_list dynamic_ps_timer;
 	struct notifier_block network_latency_notifier;
+	struct notifier_block ifa_notifier;
 
 	int user_power_level; /* in dBm */
 	int power_constr_level; /* in dBm */
@@ -997,6 +998,7 @@ void ieee80211_send_pspoll(struct ieee80211_local *local,
 void ieee80211_recalc_ps(struct ieee80211_local *local, s32 latency);
 int ieee80211_max_network_latency(struct notifier_block *nb,
 				  unsigned long data, void *dummy);
+int ieee80211_set_arp_filter(struct ieee80211_sub_if_data *sdata);
 void ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
 				      struct ieee80211_channel_sw_ie *sw_elem,
 				      struct ieee80211_bss *bss,
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index c8548e61f86..4051b232c6e 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -329,6 +329,58 @@ static void ieee80211_recalc_smps_work(struct work_struct *work)
 	mutex_unlock(&local->iflist_mtx);
 }
 
+int ieee80211_set_arp_filter(struct ieee80211_sub_if_data *sdata)
+{
+	struct in_device *idev;
+	int ret = 0;
+
+	BUG_ON(!sdata);
+	ASSERT_RTNL();
+
+	idev = sdata->dev->ip_ptr;
+	if (!idev)
+		return 0;
+
+	ret = drv_configure_arp_filter(sdata->local, &sdata->vif,
+				       idev->ifa_list);
+	return ret;
+}
+
+static int ieee80211_ifa_changed(struct notifier_block *nb,
+				 unsigned long data, void *arg)
+{
+	struct in_ifaddr *ifa = arg;
+	struct ieee80211_local *local =
+		container_of(nb, struct ieee80211_local,
+			     ifa_notifier);
+	struct net_device *ndev = ifa->ifa_dev->dev;
+	struct wireless_dev *wdev = ndev->ieee80211_ptr;
+	struct ieee80211_sub_if_data *sdata;
+	struct ieee80211_if_managed *ifmgd;
+
+	/* Make sure it's our interface that got changed */
+	if (!wdev)
+		return NOTIFY_DONE;
+
+	if (wdev->wiphy != local->hw.wiphy)
+		return NOTIFY_DONE;
+
+	/* We are concerned about IP addresses only when associated */
+	sdata = IEEE80211_DEV_TO_SUB_IF(ndev);
+
+	/* ARP filtering is only supported in managed mode */
+	if (sdata->vif.type != NL80211_IFTYPE_STATION)
+		return NOTIFY_DONE;
+
+	ifmgd = &sdata->u.mgd;
+	mutex_lock(&ifmgd->mtx);
+	if (ifmgd->associated)
+		ieee80211_set_arp_filter(sdata);
+	mutex_unlock(&ifmgd->mtx);
+
+	return NOTIFY_DONE;
+}
+
 struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
 					const struct ieee80211_ops *ops)
 {
@@ -612,14 +664,22 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 		ieee80211_max_network_latency;
 	result = pm_qos_add_notifier(PM_QOS_NETWORK_LATENCY,
 				     &local->network_latency_notifier);
-
 	if (result) {
 		rtnl_lock();
 		goto fail_pm_qos;
 	}
 
+	local->ifa_notifier.notifier_call = ieee80211_ifa_changed;
+	result = register_inetaddr_notifier(&local->ifa_notifier);
+	if (result)
+		goto fail_ifa;
+
 	return 0;
 
+ fail_ifa:
+	pm_qos_remove_notifier(PM_QOS_NETWORK_LATENCY,
+			       &local->network_latency_notifier);
+	rtnl_lock();
  fail_pm_qos:
 	ieee80211_led_exit(local);
 	ieee80211_remove_interfaces(local);
@@ -647,6 +707,7 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw)
 
 	pm_qos_remove_notifier(PM_QOS_NETWORK_LATENCY,
 			       &local->network_latency_notifier);
+	unregister_inetaddr_notifier(&local->ifa_notifier);
 
 	rtnl_lock();
 
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 29c3a75a7ad..7e720133358 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -2078,8 +2078,17 @@ static enum work_done_result ieee80211_assoc_done(struct ieee80211_work *wk,
 			cfg80211_send_assoc_timeout(wk->sdata->dev,
 						    wk->filter_ta);
 			return WORK_DONE_DESTROY;
+		} else {
+			mutex_unlock(&wk->sdata->u.mgd.mtx);
+
+			/*
+			 * configure ARP filter IP addresses to the driver,
+			 * intentionally outside the mgd mutex.
+			 */
+			rtnl_lock();
+			ieee80211_set_arp_filter(wk->sdata);
+			rtnl_unlock();
 		}
-		mutex_unlock(&wk->sdata->u.mgd.mtx);
 	}
 
 	cfg80211_send_rx_assoc(wk->sdata->dev, skb->data, skb->len);
-- 
cgit v1.2.3-70-g09d2


From 6a8579d0e62c0eac428184ce45e86bc46677724a Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 27 May 2010 14:41:07 +0200
Subject: mac80211: clean up ieee80211_stop_tx_ba_session

There's no sense in letting anything but internal
mac80211 functions set the initiator to anything
but WLAN_BACK_INITIATOR, since WLAN_BACK_RECIPIENT
is only valid when we have received a frame from
the peer, which we react to directly in mac80211.

The debugfs code I recently added got this wrong
as well.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/iwlwifi/iwl-agn-rs.c |  3 +--
 include/net/mac80211.h                    |  6 ++----
 net/mac80211/agg-tx.c                     |  7 +++----
 net/mac80211/debugfs_sta.c                |  3 +--
 net/mac80211/driver-trace.h               | 10 ++++------
 5 files changed, 11 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/drivers/net/wireless/iwlwifi/iwl-agn-rs.c b/drivers/net/wireless/iwlwifi/iwl-agn-rs.c
index cf4a95bae4f..40933a5de02 100644
--- a/drivers/net/wireless/iwlwifi/iwl-agn-rs.c
+++ b/drivers/net/wireless/iwlwifi/iwl-agn-rs.c
@@ -313,8 +313,7 @@ static int rs_tl_turn_on_agg_for_tid(struct iwl_priv *priv,
 			 */
 			IWL_DEBUG_HT(priv, "Fail start Tx agg on tid: %d\n",
 				tid);
-			ieee80211_stop_tx_ba_session(sta, tid,
-						WLAN_BACK_INITIATOR);
+			ieee80211_stop_tx_ba_session(sta, tid);
 		}
 	} else
 		IWL_ERR(priv, "Fail finding valid aggregation tid: %d\n", tid);
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 74b9b49ddfa..2e728611c57 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -2345,16 +2345,14 @@ void ieee80211_start_tx_ba_cb_irqsafe(struct ieee80211_vif *vif, const u8 *ra,
  * ieee80211_stop_tx_ba_session - Stop a Block Ack session.
  * @sta: the station whose BA session to stop
  * @tid: the TID to stop BA.
- * @initiator: if indicates initiator DELBA frame will be sent.
  *
- * Return: error if no sta with matching da found, success otherwise
+ * Return: negative error if the TID is invalid, or no aggregation active
  *
  * Although mac80211/low level driver/user space application can estimate
  * the need to stop aggregation on a certain RA/TID, the session level
  * will be managed by the mac80211.
  */
-int ieee80211_stop_tx_ba_session(struct ieee80211_sta *sta, u16 tid,
-				 enum ieee80211_back_parties initiator);
+int ieee80211_stop_tx_ba_session(struct ieee80211_sta *sta, u16 tid);
 
 /**
  * ieee80211_stop_tx_ba_cb - low level driver ready to stop aggregate.
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index c163d0a149f..feb15c4a1fa 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -538,14 +538,13 @@ int __ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
 	return ret;
 }
 
-int ieee80211_stop_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid,
-				 enum ieee80211_back_parties initiator)
+int ieee80211_stop_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid)
 {
 	struct sta_info *sta = container_of(pubsta, struct sta_info, sta);
 	struct ieee80211_sub_if_data *sdata = sta->sdata;
 	struct ieee80211_local *local = sdata->local;
 
-	trace_api_stop_tx_ba_session(pubsta, tid, initiator);
+	trace_api_stop_tx_ba_session(pubsta, tid);
 
 	if (!local->ops->ampdu_action)
 		return -EINVAL;
@@ -553,7 +552,7 @@ int ieee80211_stop_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid,
 	if (tid >= STA_TID_NUM)
 		return -EINVAL;
 
-	return __ieee80211_stop_tx_ba_session(sta, tid, initiator);
+	return __ieee80211_stop_tx_ba_session(sta, tid, WLAN_BACK_INITIATOR);
 }
 EXPORT_SYMBOL(ieee80211_stop_tx_ba_session);
 
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index e763f1529dd..9f140612224 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -210,8 +210,7 @@ static ssize_t sta_agg_status_write(struct file *file, const char __user *userbu
 		if (start)
 			ret = ieee80211_start_tx_ba_session(&sta->sta, tid);
 		else
-			ret = ieee80211_stop_tx_ba_session(&sta->sta, tid,
-							   WLAN_BACK_RECIPIENT);
+			ret = ieee80211_stop_tx_ba_session(&sta->sta, tid);
 	} else {
 		__ieee80211_stop_rx_ba_session(sta, tid, WLAN_BACK_RECIPIENT, 3);
 		ret = 0;
diff --git a/net/mac80211/driver-trace.h b/net/mac80211/driver-trace.h
index 577460da2ea..6b90630151a 100644
--- a/net/mac80211/driver-trace.h
+++ b/net/mac80211/driver-trace.h
@@ -876,25 +876,23 @@ TRACE_EVENT(api_start_tx_ba_cb,
 );
 
 TRACE_EVENT(api_stop_tx_ba_session,
-	TP_PROTO(struct ieee80211_sta *sta, u16 tid, u16 initiator),
+	TP_PROTO(struct ieee80211_sta *sta, u16 tid),
 
-	TP_ARGS(sta, tid, initiator),
+	TP_ARGS(sta, tid),
 
 	TP_STRUCT__entry(
 		STA_ENTRY
 		__field(u16, tid)
-		__field(u16, initiator)
 	),
 
 	TP_fast_assign(
 		STA_ASSIGN;
 		__entry->tid = tid;
-		__entry->initiator = initiator;
 	),
 
 	TP_printk(
-		STA_PR_FMT " tid:%d initiator:%d",
-		STA_PR_ARG, __entry->tid, __entry->initiator
+		STA_PR_FMT " tid:%d",
+		STA_PR_ARG, __entry->tid
 	)
 );
 
-- 
cgit v1.2.3-70-g09d2


From 724df615928b7050d33b6243f60b12bd87484fc7 Mon Sep 17 00:00:00 2001
From: "Justin P. Mattock" <justinmattock@gmail.com>
Date: Wed, 26 May 2010 09:22:40 -0700
Subject: fix comment typo in netdevice.h

Fix missing "of" in comment.

 Signed-off-by: Justin P. Mattock <justinmattock@gmail.com>

Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 include/linux/netdevice.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index a1bff651816..c761c903772 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -775,7 +775,7 @@ struct net_device {
 	/*
 	 * This is the first field of the "visible" part of this structure
 	 * (i.e. as seen by users in the "Space.c" file).  It is the name
-	 * the interface.
+	 * of the interface.
 	 */
 	char			name[IFNAMSIZ];
 
-- 
cgit v1.2.3-70-g09d2


From 38a6cc7538d3c44b76f9dcea607a171adcc0208e Mon Sep 17 00:00:00 2001
From: Sujith <Sujith.Manoharan@atheros.com>
Date: Wed, 19 May 2010 11:32:30 +0530
Subject: mac80211: Remove deprecated sta_notify commands

STA_NOTIFY_ADD and STA_NOTIFY_REMOVE have no users anymore,
and station addition/removal are indicated to drivers
using sta_add() and sta_remove(), which can sleep.

Signed-off-by: Sujith <Sujith.Manoharan@atheros.com>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/mac80211.h    | 6 +-----
 net/mac80211/driver-ops.h | 6 ------
 2 files changed, 1 insertion(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 2e728611c57..e3c1d479400 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -879,16 +879,12 @@ struct ieee80211_sta {
  * enum sta_notify_cmd - sta notify command
  *
  * Used with the sta_notify() callback in &struct ieee80211_ops, this
- * indicates addition and removal of a station to station table,
- * or if a associated station made a power state transition.
+ * indicates if an associated station made a power state transition.
  *
- * @STA_NOTIFY_ADD: (DEPRECATED) a station was added to the station table
- * @STA_NOTIFY_REMOVE: (DEPRECATED) a station being removed from the station table
  * @STA_NOTIFY_SLEEP: a station is now sleeping
  * @STA_NOTIFY_AWAKE: a sleeping station woke up
  */
 enum sta_notify_cmd {
-	STA_NOTIFY_ADD, STA_NOTIFY_REMOVE,
 	STA_NOTIFY_SLEEP, STA_NOTIFY_AWAKE,
 };
 
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index 978850ee3a5..d1139e4f88a 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -269,9 +269,6 @@ static inline int drv_sta_add(struct ieee80211_local *local,
 
 	if (local->ops->sta_add)
 		ret = local->ops->sta_add(&local->hw, &sdata->vif, sta);
-	else if (local->ops->sta_notify)
-		local->ops->sta_notify(&local->hw, &sdata->vif,
-					STA_NOTIFY_ADD, sta);
 
 	trace_drv_sta_add(local, sdata, sta, ret);
 
@@ -286,9 +283,6 @@ static inline void drv_sta_remove(struct ieee80211_local *local,
 
 	if (local->ops->sta_remove)
 		local->ops->sta_remove(&local->hw, &sdata->vif, sta);
-	else if (local->ops->sta_notify)
-		local->ops->sta_notify(&local->hw, &sdata->vif,
-					STA_NOTIFY_REMOVE, sta);
 
 	trace_drv_sta_remove(local, sdata, sta);
 }
-- 
cgit v1.2.3-70-g09d2


From 14f92952bf74a365ca7f9dfbec158e7c933ea723 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Thu, 3 Jun 2010 19:37:27 -0700
Subject: ssb: add dma_dev to ssb_device structure

Add dma_dev, a pointer to struct device, to struct ssb_device.  We pass it
to the generic DMA API with SSB_BUSTYPE_PCI and SSB_BUSTYPE_SSB.
ssb_devices_register() sets up it properly.

This is preparation for replacing the ssb bus specific DMA API (ssb_dma_*)
with the generic DMA API.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Acked-by: Michael Buesch <mb@bu3sch.de>
Cc: Gary Zambrano <zambrano@broadcom.com>
Cc: Stefano Brivio <stefano.brivio@polimi.it>
Cc: Larry Finger <Larry.Finger@lwfinger.net>
Cc: John W. Linville <linville@tuxdriver.com>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/ssb/main.c      | 2 ++
 include/linux/ssb/ssb.h | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/ssb/main.c b/drivers/ssb/main.c
index 51275aac5b3..a732b396e60 100644
--- a/drivers/ssb/main.c
+++ b/drivers/ssb/main.c
@@ -486,6 +486,7 @@ static int ssb_devices_register(struct ssb_bus *bus)
 #ifdef CONFIG_SSB_PCIHOST
 			sdev->irq = bus->host_pci->irq;
 			dev->parent = &bus->host_pci->dev;
+			sdev->dma_dev = dev->parent;
 #endif
 			break;
 		case SSB_BUSTYPE_PCMCIA:
@@ -501,6 +502,7 @@ static int ssb_devices_register(struct ssb_bus *bus)
 			break;
 		case SSB_BUSTYPE_SSB:
 			dev->dma_mask = &dev->coherent_dma_mask;
+			sdev->dma_dev = dev;
 			break;
 		}
 
diff --git a/include/linux/ssb/ssb.h b/include/linux/ssb/ssb.h
index a2608bff9c7..0d5f04316b3 100644
--- a/include/linux/ssb/ssb.h
+++ b/include/linux/ssb/ssb.h
@@ -167,7 +167,7 @@ struct ssb_device {
 	 * is an optimization. */
 	const struct ssb_bus_ops *ops;
 
-	struct device *dev;
+	struct device *dev, *dma_dev;
 
 	struct ssb_bus *bus;
 	struct ssb_device_id id;
-- 
cgit v1.2.3-70-g09d2


From 467429b475e56f154f93b3b14fd75f238d14597a Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Thu, 3 Jun 2010 19:37:44 -0700
Subject: ssb: remove the ssb DMA API

Now they are unnecessary.  We can use the generic DMA API with any bus.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Cc: Michael Buesch <mb@bu3sch.de>
Cc: Gary Zambrano <zambrano@broadcom.com>
Cc: Stefano Brivio <stefano.brivio@polimi.it>
Cc: Larry Finger <Larry.Finger@lwfinger.net>
Cc: John W. Linville <linville@tuxdriver.com>
Cc: David S. Miller <davem@davemloft.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/ssb/main.c      |  74 -----------------------
 include/linux/ssb/ssb.h | 157 ------------------------------------------------
 2 files changed, 231 deletions(-)

(limited to 'include')

diff --git a/drivers/ssb/main.c b/drivers/ssb/main.c
index a732b396e60..7cee7f4eb60 100644
--- a/drivers/ssb/main.c
+++ b/drivers/ssb/main.c
@@ -1228,80 +1228,6 @@ u32 ssb_dma_translation(struct ssb_device *dev)
 }
 EXPORT_SYMBOL(ssb_dma_translation);
 
-int ssb_dma_set_mask(struct ssb_device *dev, u64 mask)
-{
-#ifdef CONFIG_SSB_PCIHOST
-	int err;
-#endif
-
-	switch (dev->bus->bustype) {
-	case SSB_BUSTYPE_PCI:
-#ifdef CONFIG_SSB_PCIHOST
-		err = pci_set_dma_mask(dev->bus->host_pci, mask);
-		if (err)
-			return err;
-		err = pci_set_consistent_dma_mask(dev->bus->host_pci, mask);
-		return err;
-#endif
-	case SSB_BUSTYPE_SSB:
-		return dma_set_mask(dev->dev, mask);
-	default:
-		__ssb_dma_not_implemented(dev);
-	}
-	return -ENOSYS;
-}
-EXPORT_SYMBOL(ssb_dma_set_mask);
-
-void * ssb_dma_alloc_consistent(struct ssb_device *dev, size_t size,
-				dma_addr_t *dma_handle, gfp_t gfp_flags)
-{
-	switch (dev->bus->bustype) {
-	case SSB_BUSTYPE_PCI:
-#ifdef CONFIG_SSB_PCIHOST
-		if (gfp_flags & GFP_DMA) {
-			/* Workaround: The PCI API does not support passing
-			 * a GFP flag. */
-			return dma_alloc_coherent(&dev->bus->host_pci->dev,
-						  size, dma_handle, gfp_flags);
-		}
-		return pci_alloc_consistent(dev->bus->host_pci, size, dma_handle);
-#endif
-	case SSB_BUSTYPE_SSB:
-		return dma_alloc_coherent(dev->dev, size, dma_handle, gfp_flags);
-	default:
-		__ssb_dma_not_implemented(dev);
-	}
-	return NULL;
-}
-EXPORT_SYMBOL(ssb_dma_alloc_consistent);
-
-void ssb_dma_free_consistent(struct ssb_device *dev, size_t size,
-			     void *vaddr, dma_addr_t dma_handle,
-			     gfp_t gfp_flags)
-{
-	switch (dev->bus->bustype) {
-	case SSB_BUSTYPE_PCI:
-#ifdef CONFIG_SSB_PCIHOST
-		if (gfp_flags & GFP_DMA) {
-			/* Workaround: The PCI API does not support passing
-			 * a GFP flag. */
-			dma_free_coherent(&dev->bus->host_pci->dev,
-					  size, vaddr, dma_handle);
-			return;
-		}
-		pci_free_consistent(dev->bus->host_pci, size,
-				    vaddr, dma_handle);
-		return;
-#endif
-	case SSB_BUSTYPE_SSB:
-		dma_free_coherent(dev->dev, size, vaddr, dma_handle);
-		return;
-	default:
-		__ssb_dma_not_implemented(dev);
-	}
-}
-EXPORT_SYMBOL(ssb_dma_free_consistent);
-
 int ssb_bus_may_powerdown(struct ssb_bus *bus)
 {
 	struct ssb_chipcommon *cc;
diff --git a/include/linux/ssb/ssb.h b/include/linux/ssb/ssb.h
index 0d5f04316b3..623b704fdc4 100644
--- a/include/linux/ssb/ssb.h
+++ b/include/linux/ssb/ssb.h
@@ -470,14 +470,6 @@ extern u32 ssb_dma_translation(struct ssb_device *dev);
 #define SSB_DMA_TRANSLATION_MASK	0xC0000000
 #define SSB_DMA_TRANSLATION_SHIFT	30
 
-extern int ssb_dma_set_mask(struct ssb_device *dev, u64 mask);
-
-extern void * ssb_dma_alloc_consistent(struct ssb_device *dev, size_t size,
-				       dma_addr_t *dma_handle, gfp_t gfp_flags);
-extern void ssb_dma_free_consistent(struct ssb_device *dev, size_t size,
-				    void *vaddr, dma_addr_t dma_handle,
-				    gfp_t gfp_flags);
-
 static inline void __cold __ssb_dma_not_implemented(struct ssb_device *dev)
 {
 #ifdef CONFIG_SSB_DEBUG
@@ -486,155 +478,6 @@ static inline void __cold __ssb_dma_not_implemented(struct ssb_device *dev)
 #endif /* DEBUG */
 }
 
-static inline int ssb_dma_mapping_error(struct ssb_device *dev, dma_addr_t addr)
-{
-	switch (dev->bus->bustype) {
-	case SSB_BUSTYPE_PCI:
-#ifdef CONFIG_SSB_PCIHOST
-		return pci_dma_mapping_error(dev->bus->host_pci, addr);
-#endif
-		break;
-	case SSB_BUSTYPE_SSB:
-		return dma_mapping_error(dev->dev, addr);
-	default:
-		break;
-	}
-	__ssb_dma_not_implemented(dev);
-	return -ENOSYS;
-}
-
-static inline dma_addr_t ssb_dma_map_single(struct ssb_device *dev, void *p,
-					    size_t size, enum dma_data_direction dir)
-{
-	switch (dev->bus->bustype) {
-	case SSB_BUSTYPE_PCI:
-#ifdef CONFIG_SSB_PCIHOST
-		return pci_map_single(dev->bus->host_pci, p, size, dir);
-#endif
-		break;
-	case SSB_BUSTYPE_SSB:
-		return dma_map_single(dev->dev, p, size, dir);
-	default:
-		break;
-	}
-	__ssb_dma_not_implemented(dev);
-	return 0;
-}
-
-static inline void ssb_dma_unmap_single(struct ssb_device *dev, dma_addr_t dma_addr,
-					size_t size, enum dma_data_direction dir)
-{
-	switch (dev->bus->bustype) {
-	case SSB_BUSTYPE_PCI:
-#ifdef CONFIG_SSB_PCIHOST
-		pci_unmap_single(dev->bus->host_pci, dma_addr, size, dir);
-		return;
-#endif
-		break;
-	case SSB_BUSTYPE_SSB:
-		dma_unmap_single(dev->dev, dma_addr, size, dir);
-		return;
-	default:
-		break;
-	}
-	__ssb_dma_not_implemented(dev);
-}
-
-static inline void ssb_dma_sync_single_for_cpu(struct ssb_device *dev,
-					       dma_addr_t dma_addr,
-					       size_t size,
-					       enum dma_data_direction dir)
-{
-	switch (dev->bus->bustype) {
-	case SSB_BUSTYPE_PCI:
-#ifdef CONFIG_SSB_PCIHOST
-		pci_dma_sync_single_for_cpu(dev->bus->host_pci, dma_addr,
-					    size, dir);
-		return;
-#endif
-		break;
-	case SSB_BUSTYPE_SSB:
-		dma_sync_single_for_cpu(dev->dev, dma_addr, size, dir);
-		return;
-	default:
-		break;
-	}
-	__ssb_dma_not_implemented(dev);
-}
-
-static inline void ssb_dma_sync_single_for_device(struct ssb_device *dev,
-						  dma_addr_t dma_addr,
-						  size_t size,
-						  enum dma_data_direction dir)
-{
-	switch (dev->bus->bustype) {
-	case SSB_BUSTYPE_PCI:
-#ifdef CONFIG_SSB_PCIHOST
-		pci_dma_sync_single_for_device(dev->bus->host_pci, dma_addr,
-					       size, dir);
-		return;
-#endif
-		break;
-	case SSB_BUSTYPE_SSB:
-		dma_sync_single_for_device(dev->dev, dma_addr, size, dir);
-		return;
-	default:
-		break;
-	}
-	__ssb_dma_not_implemented(dev);
-}
-
-static inline void ssb_dma_sync_single_range_for_cpu(struct ssb_device *dev,
-						     dma_addr_t dma_addr,
-						     unsigned long offset,
-						     size_t size,
-						     enum dma_data_direction dir)
-{
-	switch (dev->bus->bustype) {
-	case SSB_BUSTYPE_PCI:
-#ifdef CONFIG_SSB_PCIHOST
-		/* Just sync everything. That's all the PCI API can do. */
-		pci_dma_sync_single_for_cpu(dev->bus->host_pci, dma_addr,
-					    offset + size, dir);
-		return;
-#endif
-		break;
-	case SSB_BUSTYPE_SSB:
-		dma_sync_single_range_for_cpu(dev->dev, dma_addr, offset,
-					      size, dir);
-		return;
-	default:
-		break;
-	}
-	__ssb_dma_not_implemented(dev);
-}
-
-static inline void ssb_dma_sync_single_range_for_device(struct ssb_device *dev,
-							dma_addr_t dma_addr,
-							unsigned long offset,
-							size_t size,
-							enum dma_data_direction dir)
-{
-	switch (dev->bus->bustype) {
-	case SSB_BUSTYPE_PCI:
-#ifdef CONFIG_SSB_PCIHOST
-		/* Just sync everything. That's all the PCI API can do. */
-		pci_dma_sync_single_for_device(dev->bus->host_pci, dma_addr,
-					       offset + size, dir);
-		return;
-#endif
-		break;
-	case SSB_BUSTYPE_SSB:
-		dma_sync_single_range_for_device(dev->dev, dma_addr, offset,
-						 size, dir);
-		return;
-	default:
-		break;
-	}
-	__ssb_dma_not_implemented(dev);
-}
-
-
 #ifdef CONFIG_SSB_PCIHOST
 /* PCI-host wrapper driver */
 extern int ssb_pcihost_register(struct pci_driver *driver);
-- 
cgit v1.2.3-70-g09d2


From 5360bd776f73d0a7da571d72a09a03f237e99900 Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@tilera.com>
Date: Fri, 28 May 2010 23:01:00 -0400
Subject: Fix up the "generic" unistd.h ABI to be more useful.

Reserve 16 "architecture-specific" syscall numbers starting at 244.

Allow use of the sys_sync_file_range2() API with the generic unistd.h
by specifying __ARCH_WANT_SYNC_FILE_RANGE2 before including it.

Allow using the generic unistd.h to create the "compat" syscall table
by specifying __SYSCALL_COMPAT before including it.

Use sys_fadvise64_64 for __NR3264_fadvise64 in both 32- and 64-bit mode.

Request the appropriate __ARCH_WANT_COMPAT_SYS_xxx values when
some deprecated syscall modes are selected.

As part of this change to fix up the syscalls, also provide a couple
of missing signal-related syscall prototypes in <linux/syscalls.h>.

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
---
 include/asm-generic/unistd.h | 26 ++++++++++++++++++++------
 include/linux/syscalls.h     |  4 ++++
 2 files changed, 24 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/unistd.h b/include/asm-generic/unistd.h
index 6a0b30f78a6..30218b4fa4e 100644
--- a/include/asm-generic/unistd.h
+++ b/include/asm-generic/unistd.h
@@ -18,7 +18,7 @@
 #define __SYSCALL(x, y)
 #endif
 
-#if __BITS_PER_LONG == 32
+#if __BITS_PER_LONG == 32 || defined(__SYSCALL_COMPAT)
 #define __SC_3264(_nr, _32, _64) __SYSCALL(_nr, _32)
 #else
 #define __SC_3264(_nr, _32, _64) __SYSCALL(_nr, _64)
@@ -241,8 +241,13 @@ __SYSCALL(__NR_sync, sys_sync)
 __SYSCALL(__NR_fsync, sys_fsync)
 #define __NR_fdatasync 83
 __SYSCALL(__NR_fdatasync, sys_fdatasync)
+#ifdef __ARCH_WANT_SYNC_FILE_RANGE2
+#define __NR_sync_file_range2 84
+__SYSCALL(__NR_sync_file_range2, sys_sync_file_range2)
+#else
 #define __NR_sync_file_range 84
-__SYSCALL(__NR_sync_file_range, sys_sync_file_range) /* .long sys_sync_file_range2, */
+__SYSCALL(__NR_sync_file_range, sys_sync_file_range)
+#endif
 
 /* fs/timerfd.c */
 #define __NR_timerfd_create 85
@@ -580,7 +585,7 @@ __SYSCALL(__NR_execve, sys_execve)	/* .long sys_execve_wrapper */
 __SC_3264(__NR3264_mmap, sys_mmap2, sys_mmap)
 /* mm/fadvise.c */
 #define __NR3264_fadvise64 223
-__SC_3264(__NR3264_fadvise64, sys_fadvise64_64, sys_fadvise64)
+__SYSCALL(__NR3264_fadvise64, sys_fadvise64_64)
 
 /* mm/, CONFIG_MMU only */
 #ifndef __ARCH_NOMMU
@@ -627,8 +632,14 @@ __SYSCALL(__NR_accept4, sys_accept4)
 #define __NR_recvmmsg 243
 __SYSCALL(__NR_recvmmsg, sys_recvmmsg)
 
+/*
+ * Architectures may provide up to 16 syscalls of their own
+ * starting with this value.
+ */
+#define __NR_arch_specific_syscall 244
+
 #undef __NR_syscalls
-#define __NR_syscalls 244
+#define __NR_syscalls 260
 
 /*
  * All syscalls below here should go away really,
@@ -694,7 +705,8 @@ __SYSCALL(__NR_signalfd, sys_signalfd)
 #define __NR_syscalls (__NR_signalfd+1)
 #endif /* __ARCH_WANT_SYSCALL_NO_FLAGS */
 
-#if __BITS_PER_LONG == 32 && defined(__ARCH_WANT_SYSCALL_OFF_T)
+#if (__BITS_PER_LONG == 32 || defined(__SYSCALL_COMPAT)) && \
+     defined(__ARCH_WANT_SYSCALL_OFF_T)
 #define __NR_sendfile 1046
 __SYSCALL(__NR_sendfile, sys_sendfile)
 #define __NR_ftruncate 1047
@@ -740,6 +752,7 @@ __SYSCALL(__NR_getpgrp, sys_getpgrp)
 __SYSCALL(__NR_pause, sys_pause)
 #define __NR_time 1062
 #define __ARCH_WANT_SYS_TIME
+#define __ARCH_WANT_COMPAT_SYS_TIME
 __SYSCALL(__NR_time, sys_time)
 #define __NR_utime 1063
 #define __ARCH_WANT_SYS_UTIME
@@ -801,7 +814,7 @@ __SYSCALL(__NR_fork, sys_ni_syscall)
  * Here we map the numbers so that both versions
  * use the same syscall table layout.
  */
-#if __BITS_PER_LONG == 64
+#if __BITS_PER_LONG == 64 && !defined(__SYSCALL_COMPAT)
 #define __NR_fcntl __NR3264_fcntl
 #define __NR_statfs __NR3264_statfs
 #define __NR_fstatfs __NR3264_fstatfs
@@ -848,6 +861,7 @@ __SYSCALL(__NR_fork, sys_ni_syscall)
 #endif
 #define __ARCH_WANT_SYS_RT_SIGACTION
 #define __ARCH_WANT_SYS_RT_SIGSUSPEND
+#define __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND
 
 /*
  * "Conditional" syscalls
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index a1a86a53bc7..4a19d9bb836 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -364,9 +364,13 @@ asmlinkage long sys_init_module(void __user *umod, unsigned long len,
 asmlinkage long sys_delete_module(const char __user *name_user,
 				unsigned int flags);
 
+asmlinkage long sys_rt_sigaction(int sig, const struct sigaction __user *act,
+				 struct sigaction __user *oact,
+				 size_t sigsetsize);
 asmlinkage long sys_rt_sigprocmask(int how, sigset_t __user *set,
 				sigset_t __user *oset, size_t sigsetsize);
 asmlinkage long sys_rt_sigpending(sigset_t __user *set, size_t sigsetsize);
+asmlinkage long sys_rt_sigsuspend(sigset_t __user *unewset, size_t sigsetsize);
 asmlinkage long sys_rt_sigtimedwait(const sigset_t __user *uthese,
 				siginfo_t __user *uinfo,
 				const struct timespec __user *uts,
-- 
cgit v1.2.3-70-g09d2


From b78462ebc6a4ef9074aa80abebcdd470dc5f0ce0 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Wed, 2 Jun 2010 12:24:37 +0000
Subject: skbuff: add check for non-linear to warn_if_lro and needs_linearize

We can avoid an unecessary cache miss by checking if the skb is non-linear
before accessing gso_size/gso_type in skb_warn_if_lro, the same can also be
done to avoid a cache miss on nr_frags if data_len is 0.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 3 ++-
 net/core/dev.c         | 7 ++++---
 2 files changed, 6 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index bf243fc5495..645e78d395f 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2129,7 +2129,8 @@ static inline bool skb_warn_if_lro(const struct sk_buff *skb)
 	/* LRO sets gso_size but not gso_type, whereas if GSO is really
 	 * wanted then gso_type will be set. */
 	struct skb_shared_info *shinfo = skb_shinfo(skb);
-	if (shinfo->gso_size != 0 && unlikely(shinfo->gso_type == 0)) {
+	if (skb_is_nonlinear(skb) && shinfo->gso_size != 0 &&
+	    unlikely(shinfo->gso_type == 0)) {
 		__skb_warn_lro_forwarding(skb);
 		return true;
 	}
diff --git a/net/core/dev.c b/net/core/dev.c
index ec01a5998d7..3abb3a6058b 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2103,9 +2103,10 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
 static inline int skb_needs_linearize(struct sk_buff *skb,
 				      struct net_device *dev)
 {
-	return (skb_has_frags(skb) && !(dev->features & NETIF_F_FRAGLIST)) ||
-	       (skb_shinfo(skb)->nr_frags && (!(dev->features & NETIF_F_SG) ||
-					      illegal_highdma(dev, skb)));
+	return skb_is_nonlinear(skb) &&
+	       ((skb_has_frags(skb) && !(dev->features & NETIF_F_FRAGLIST)) ||
+	        (skb_shinfo(skb)->nr_frags && (!(dev->features & NETIF_F_SG) ||
+					      illegal_highdma(dev, skb))));
 }
 
 /**
-- 
cgit v1.2.3-70-g09d2


From bb1d912323d5dd50e1079e389f4e964be14f0ae3 Mon Sep 17 00:00:00 2001
From: Andy Gospodarek <andy@greyhouse.net>
Date: Wed, 2 Jun 2010 08:40:18 +0000
Subject: bonding: allow user-controlled output slave selection

v2: changed bonding module version, modified to apply on top of changes
from previous patch in series, and updated documentation to elaborate on
multiqueue awareness that now exists in bonding driver.

This patch give the user the ability to control the output slave for
round-robin and active-backup bonding.  Similar functionality was
discussed in the past, but Jay Vosburgh indicated he would rather see a
feature like this added to existing modes rather than creating a
completely new mode.  Jay's thoughts as well as Neil's input surrounding
some of the issues with the first implementation pushed us toward a
design that relied on the queue_mapping rather than skb marks.
Round-robin and active-backup modes were chosen as the first users of
this slave selection as they seemed like the most logical choices when
considering a multi-switch environment.

Round-robin mode works without any modification, but active-backup does
require inclusion of the first patch in this series and setting
the 'all_slaves_active' flag.  This will allow reception of unicast traffic on
any of the backup interfaces.

This was tested with IPv4-based filters as well as VLAN-based filters
with good results.

More information as well as a configuration example is available in the
patch to Documentation/networking/bonding.txt.

Signed-off-by: Andy Gospodarek <andy@greyhouse.net>
Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/bonding.txt |  84 ++++++++++++++++++++++++-
 drivers/net/bonding/bond_main.c      |  75 +++++++++++++++++++++-
 drivers/net/bonding/bond_sysfs.c     | 116 +++++++++++++++++++++++++++++++++++
 drivers/net/bonding/bonding.h        |   9 ++-
 include/linux/if_bonding.h           |   1 +
 5 files changed, 278 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/Documentation/networking/bonding.txt b/Documentation/networking/bonding.txt
index 61f516b135b..d0914781830 100644
--- a/Documentation/networking/bonding.txt
+++ b/Documentation/networking/bonding.txt
@@ -49,6 +49,7 @@ Table of Contents
 3.3	Configuring Bonding Manually with Ifenslave
 3.3.1		Configuring Multiple Bonds Manually
 3.4	Configuring Bonding Manually via Sysfs
+3.5	Overriding Configuration for Special Cases
 
 4. Querying Bonding Configuration
 4.1	Bonding Configuration
@@ -1318,8 +1319,87 @@ echo 2000 > /sys/class/net/bond1/bonding/arp_interval
 echo +eth2 > /sys/class/net/bond1/bonding/slaves
 echo +eth3 > /sys/class/net/bond1/bonding/slaves
 
-
-4. Querying Bonding Configuration 
+3.5 Overriding Configuration for Special Cases
+----------------------------------------------
+When using the bonding driver, the physical port which transmits a frame is
+typically selected by the bonding driver, and is not relevant to the user or
+system administrator.  The output port is simply selected using the policies of
+the selected bonding mode.  On occasion however, it is helpful to direct certain
+classes of traffic to certain physical interfaces on output to implement
+slightly more complex policies.  For example, to reach a web server over a
+bonded interface in which eth0 connects to a private network, while eth1
+connects via a public network, it may be desirous to bias the bond to send said
+traffic over eth0 first, using eth1 only as a fall back, while all other traffic
+can safely be sent over either interface.  Such configurations may be achieved
+using the traffic control utilities inherent in linux.
+
+By default the bonding driver is multiqueue aware and 16 queues are created
+when the driver initializes (see Documentation/networking/multiqueue.txt
+for details).  If more or less queues are desired the module parameter
+tx_queues can be used to change this value.  There is no sysfs parameter
+available as the allocation is done at module init time.
+
+The output of the file /proc/net/bonding/bondX has changed so the output Queue
+ID is now printed for each slave:
+
+Bonding Mode: fault-tolerance (active-backup)
+Primary Slave: None
+Currently Active Slave: eth0
+MII Status: up
+MII Polling Interval (ms): 0
+Up Delay (ms): 0
+Down Delay (ms): 0
+
+Slave Interface: eth0
+MII Status: up
+Link Failure Count: 0
+Permanent HW addr: 00:1a:a0:12:8f:cb
+Slave queue ID: 0
+
+Slave Interface: eth1
+MII Status: up
+Link Failure Count: 0
+Permanent HW addr: 00:1a:a0:12:8f:cc
+Slave queue ID: 2
+
+The queue_id for a slave can be set using the command:
+
+# echo "eth1:2" > /sys/class/net/bond0/bonding/queue_id
+
+Any interface that needs a queue_id set should set it with multiple calls
+like the one above until proper priorities are set for all interfaces.  On
+distributions that allow configuration via initscripts, multiple 'queue_id'
+arguments can be added to BONDING_OPTS to set all needed slave queues.
+
+These queue id's can be used in conjunction with the tc utility to configure
+a multiqueue qdisc and filters to bias certain traffic to transmit on certain
+slave devices.  For instance, say we wanted, in the above configuration to
+force all traffic bound to 192.168.1.100 to use eth1 in the bond as its output
+device. The following commands would accomplish this:
+
+# tc qdisc add dev bond0 handle 1 root multiq
+
+# tc filter add dev bond0 protocol ip parent 1: prio 1 u32 match ip dst \
+	192.168.1.100 action skbedit queue_mapping 2
+
+These commands tell the kernel to attach a multiqueue queue discipline to the
+bond0 interface and filter traffic enqueued to it, such that packets with a dst
+ip of 192.168.1.100 have their output queue mapping value overwritten to 2.
+This value is then passed into the driver, causing the normal output path
+selection policy to be overridden, selecting instead qid 2, which maps to eth1.
+
+Note that qid values begin at 1.  Qid 0 is reserved to initiate to the driver
+that normal output policy selection should take place.  One benefit to simply
+leaving the qid for a slave to 0 is the multiqueue awareness in the bonding
+driver that is now present.  This awareness allows tc filters to be placed on
+slave devices as well as bond devices and the bonding driver will simply act as
+a pass-through for selecting output queues on the slave device rather than 
+output port selection.
+
+This feature first appeared in bonding driver version 3.7.0 and support for
+output slave selection was limited to round-robin and active-backup modes.
+
+4 Querying Bonding Configuration
 =================================
 
 4.1 Bonding Configuration
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index f22f6bf4385..1b19276cff1 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -90,6 +90,7 @@
 #define BOND_LINK_ARP_INTERV	0
 
 static int max_bonds	= BOND_DEFAULT_MAX_BONDS;
+static int tx_queues	= BOND_DEFAULT_TX_QUEUES;
 static int num_grat_arp = 1;
 static int num_unsol_na = 1;
 static int miimon	= BOND_LINK_MON_INTERV;
@@ -111,6 +112,8 @@ static struct bond_params bonding_defaults;
 
 module_param(max_bonds, int, 0);
 MODULE_PARM_DESC(max_bonds, "Max number of bonded devices");
+module_param(tx_queues, int, 0);
+MODULE_PARM_DESC(tx_queues, "Max number of transmit queues (default = 16)");
 module_param(num_grat_arp, int, 0644);
 MODULE_PARM_DESC(num_grat_arp, "Number of gratuitous ARP packets to send on failover event");
 module_param(num_unsol_na, int, 0644);
@@ -1540,6 +1543,12 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
 		goto err_undo_flags;
 	}
 
+	/*
+	 * Set the new_slave's queue_id to be zero.  Queue ID mapping
+	 * is set via sysfs or module option if desired.
+	 */
+	new_slave->queue_id = 0;
+
 	/* Save slave's original mtu and then set it to match the bond */
 	new_slave->original_mtu = slave_dev->mtu;
 	res = dev_set_mtu(slave_dev, bond->dev->mtu);
@@ -3285,6 +3294,7 @@ static void bond_info_show_slave(struct seq_file *seq,
 		else
 			seq_puts(seq, "Aggregator ID: N/A\n");
 	}
+	seq_printf(seq, "Slave queue ID: %d\n", slave->queue_id);
 }
 
 static int bond_info_seq_show(struct seq_file *seq, void *v)
@@ -4421,9 +4431,59 @@ static void bond_set_xmit_hash_policy(struct bonding *bond)
 	}
 }
 
+/*
+ * Lookup the slave that corresponds to a qid
+ */
+static inline int bond_slave_override(struct bonding *bond,
+				      struct sk_buff *skb)
+{
+	int i, res = 1;
+	struct slave *slave = NULL;
+	struct slave *check_slave;
+
+	read_lock(&bond->lock);
+
+	if (!BOND_IS_OK(bond) || !skb->queue_mapping)
+		goto out;
+
+	/* Find out if any slaves have the same mapping as this skb. */
+	bond_for_each_slave(bond, check_slave, i) {
+		if (check_slave->queue_id == skb->queue_mapping) {
+			slave = check_slave;
+			break;
+		}
+	}
+
+	/* If the slave isn't UP, use default transmit policy. */
+	if (slave && slave->queue_id && IS_UP(slave->dev) &&
+	    (slave->link == BOND_LINK_UP)) {
+		res = bond_dev_queue_xmit(bond, skb, slave->dev);
+	}
+
+out:
+	read_unlock(&bond->lock);
+	return res;
+}
+
+static u16 bond_select_queue(struct net_device *dev, struct sk_buff *skb)
+{
+	/*
+	 * This helper function exists to help dev_pick_tx get the correct
+	 * destination queue.  Using a helper function skips the a call to
+	 * skb_tx_hash and will put the skbs in the queue we expect on their
+	 * way down to the bonding driver.
+	 */
+	return skb->queue_mapping;
+}
+
 static netdev_tx_t bond_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
-	const struct bonding *bond = netdev_priv(dev);
+	struct bonding *bond = netdev_priv(dev);
+
+	if (TX_QUEUE_OVERRIDE(bond->params.mode)) {
+		if (!bond_slave_override(bond, skb))
+			return NETDEV_TX_OK;
+	}
 
 	switch (bond->params.mode) {
 	case BOND_MODE_ROUNDROBIN:
@@ -4508,6 +4568,7 @@ static const struct net_device_ops bond_netdev_ops = {
 	.ndo_open		= bond_open,
 	.ndo_stop		= bond_close,
 	.ndo_start_xmit		= bond_start_xmit,
+	.ndo_select_queue	= bond_select_queue,
 	.ndo_get_stats		= bond_get_stats,
 	.ndo_do_ioctl		= bond_do_ioctl,
 	.ndo_set_multicast_list	= bond_set_multicast_list,
@@ -4776,6 +4837,13 @@ static int bond_check_params(struct bond_params *params)
 		}
 	}
 
+	if (tx_queues < 1 || tx_queues > 255) {
+		pr_warning("Warning: tx_queues (%d) should be between "
+			   "1 and 255, resetting to %d\n",
+			   tx_queues, BOND_DEFAULT_TX_QUEUES);
+		tx_queues = BOND_DEFAULT_TX_QUEUES;
+	}
+
 	if ((all_slaves_active != 0) && (all_slaves_active != 1)) {
 		pr_warning("Warning: all_slaves_active module parameter (%d), "
 			   "not of valid value (0/1), so it was set to "
@@ -4953,6 +5021,7 @@ static int bond_check_params(struct bond_params *params)
 	params->primary[0] = 0;
 	params->primary_reselect = primary_reselect_value;
 	params->fail_over_mac = fail_over_mac_value;
+	params->tx_queues = tx_queues;
 	params->all_slaves_active = all_slaves_active;
 
 	if (primary) {
@@ -5040,8 +5109,8 @@ int bond_create(struct net *net, const char *name)
 
 	rtnl_lock();
 
-	bond_dev = alloc_netdev(sizeof(struct bonding), name ? name : "",
-				bond_setup);
+	bond_dev = alloc_netdev_mq(sizeof(struct bonding), name ? name : "",
+				bond_setup, tx_queues);
 	if (!bond_dev) {
 		pr_err("%s: eek! can't alloc netdev!\n", name);
 		rtnl_unlock();
diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c
index 066311a5e08..f9a034361a8 100644
--- a/drivers/net/bonding/bond_sysfs.c
+++ b/drivers/net/bonding/bond_sysfs.c
@@ -1411,6 +1411,121 @@ static ssize_t bonding_show_ad_partner_mac(struct device *d,
 }
 static DEVICE_ATTR(ad_partner_mac, S_IRUGO, bonding_show_ad_partner_mac, NULL);
 
+/*
+ * Show the queue_ids of the slaves in the current bond.
+ */
+static ssize_t bonding_show_queue_id(struct device *d,
+				     struct device_attribute *attr,
+				     char *buf)
+{
+	struct slave *slave;
+	int i, res = 0;
+	struct bonding *bond = to_bond(d);
+
+	if (!rtnl_trylock())
+		return restart_syscall();
+
+	read_lock(&bond->lock);
+	bond_for_each_slave(bond, slave, i) {
+		if (res > (PAGE_SIZE - 6)) {
+			/* not enough space for another interface name */
+			if ((PAGE_SIZE - res) > 10)
+				res = PAGE_SIZE - 10;
+			res += sprintf(buf + res, "++more++ ");
+			break;
+		}
+		res += sprintf(buf + res, "%s:%d ",
+			       slave->dev->name, slave->queue_id);
+	}
+	read_unlock(&bond->lock);
+	if (res)
+		buf[res-1] = '\n'; /* eat the leftover space */
+	rtnl_unlock();
+	return res;
+}
+
+/*
+ * Set the queue_ids of the  slaves in the current bond.  The bond
+ * interface must be enslaved for this to work.
+ */
+static ssize_t bonding_store_queue_id(struct device *d,
+				      struct device_attribute *attr,
+				      const char *buffer, size_t count)
+{
+	struct slave *slave, *update_slave;
+	struct bonding *bond = to_bond(d);
+	u16 qid;
+	int i, ret = count;
+	char *delim;
+	struct net_device *sdev = NULL;
+
+	if (!rtnl_trylock())
+		return restart_syscall();
+
+	/* delim will point to queue id if successful */
+	delim = strchr(buffer, ':');
+	if (!delim)
+		goto err_no_cmd;
+
+	/*
+	 * Terminate string that points to device name and bump it
+	 * up one, so we can read the queue id there.
+	 */
+	*delim = '\0';
+	if (sscanf(++delim, "%hd\n", &qid) != 1)
+		goto err_no_cmd;
+
+	/* Check buffer length, valid ifname and queue id */
+	if (strlen(buffer) > IFNAMSIZ ||
+	    !dev_valid_name(buffer) ||
+	    qid > bond->params.tx_queues)
+		goto err_no_cmd;
+
+	/* Get the pointer to that interface if it exists */
+	sdev = __dev_get_by_name(dev_net(bond->dev), buffer);
+	if (!sdev)
+		goto err_no_cmd;
+
+	read_lock(&bond->lock);
+
+	/* Search for thes slave and check for duplicate qids */
+	update_slave = NULL;
+	bond_for_each_slave(bond, slave, i) {
+		if (sdev == slave->dev)
+			/*
+			 * We don't need to check the matching
+			 * slave for dups, since we're overwriting it
+			 */
+			update_slave = slave;
+		else if (qid && qid == slave->queue_id) {
+			goto err_no_cmd_unlock;
+		}
+	}
+
+	if (!update_slave)
+		goto err_no_cmd_unlock;
+
+	/* Actually set the qids for the slave */
+	update_slave->queue_id = qid;
+
+	read_unlock(&bond->lock);
+out:
+	rtnl_unlock();
+	return ret;
+
+err_no_cmd_unlock:
+	read_unlock(&bond->lock);
+err_no_cmd:
+	pr_info("invalid input for queue_id set for %s.\n",
+		bond->dev->name);
+	ret = -EPERM;
+	goto out;
+}
+
+static DEVICE_ATTR(queue_id, S_IRUGO | S_IWUSR, bonding_show_queue_id,
+		   bonding_store_queue_id);
+
+
 /*
  * Show and set the all_slaves_active flag.
  */
@@ -1489,6 +1604,7 @@ static struct attribute *per_bond_attrs[] = {
 	&dev_attr_ad_actor_key.attr,
 	&dev_attr_ad_partner_key.attr,
 	&dev_attr_ad_partner_mac.attr,
+	&dev_attr_queue_id.attr,
 	&dev_attr_all_slaves_active.attr,
 	NULL,
 };
diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
index cecdea2a629..c6fdd851579 100644
--- a/drivers/net/bonding/bonding.h
+++ b/drivers/net/bonding/bonding.h
@@ -23,8 +23,8 @@
 #include "bond_3ad.h"
 #include "bond_alb.h"
 
-#define DRV_VERSION	"3.6.0"
-#define DRV_RELDATE	"September 26, 2009"
+#define DRV_VERSION	"3.7.0"
+#define DRV_RELDATE	"June 2, 2010"
 #define DRV_NAME	"bonding"
 #define DRV_DESCRIPTION	"Ethernet Channel Bonding Driver"
 
@@ -60,6 +60,9 @@
 		 ((mode) == BOND_MODE_TLB)          ||	\
 		 ((mode) == BOND_MODE_ALB))
 
+#define TX_QUEUE_OVERRIDE(mode)				\
+			(((mode) == BOND_MODE_ACTIVEBACKUP) ||	\
+			 ((mode) == BOND_MODE_ROUNDROBIN))
 /*
  * Less bad way to call ioctl from within the kernel; this needs to be
  * done some other way to get the call out of interrupt context.
@@ -131,6 +134,7 @@ struct bond_params {
 	char primary[IFNAMSIZ];
 	int primary_reselect;
 	__be32 arp_targets[BOND_MAX_ARP_TARGETS];
+	int tx_queues;
 	int all_slaves_active;
 };
 
@@ -165,6 +169,7 @@ struct slave {
 	u8     perm_hwaddr[ETH_ALEN];
 	u16    speed;
 	u8     duplex;
+	u16    queue_id;
 	struct ad_slave_info ad_info; /* HUGE - better to dynamically alloc */
 	struct tlb_slave_info tlb_info;
 };
diff --git a/include/linux/if_bonding.h b/include/linux/if_bonding.h
index cd525fae3c9..2c7994372bd 100644
--- a/include/linux/if_bonding.h
+++ b/include/linux/if_bonding.h
@@ -83,6 +83,7 @@
 
 #define BOND_DEFAULT_MAX_BONDS  1   /* Default maximum number of devices to support */
 
+#define BOND_DEFAULT_TX_QUEUES 16   /* Default number of tx queues per device */
 /* hashing types */
 #define BOND_XMIT_POLICY_LAYER2		0 /* layer 2 (MAC only), default */
 #define BOND_XMIT_POLICY_LAYER34	1 /* layer 3+4 (IP ^ (TCP || UDP)) */
-- 
cgit v1.2.3-70-g09d2


From a8b690f98baf9fb1902b8eeab801351ea603fa3a Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Mon, 7 Jun 2010 00:43:42 -0700
Subject: tcp: Fix slowness in read /proc/net/tcp

This patch address a serious performance issue in reading the
TCP sockets table (/proc/net/tcp).

Reading the full table is done by a number of sequential read
operations.  At each read operation, a seek is done to find the
last socket that was previously read.  This seek operation requires
that the sockets in the table need to be counted up to the current
file position, and to count each of these requires taking a lock for
each non-empty bucket.  The whole algorithm is O(n^2).

The fix is to cache the last bucket value, offset within the bucket,
and the file position returned by the last read operation.   On the
next sequential read, the bucket and offset are used to find the
last read socket immediately without needing ot scan the previous
buckets  the table.  This algorithm t read the whole table is O(n).

The improvement offered by this patch is easily show by performing
cat'ing /proc/net/tcp on a machine with a lot of connections.  With
about 182K connections in the table, I see the following:

- Without patch
time cat /proc/net/tcp > /dev/null

real	1m56.729s
user	0m0.214s
sys	1m56.344s

- With patch
time cat /proc/net/tcp > /dev/null

real	0m0.894s
user	0m0.290s
sys	0m0.594s

Signed-off-by: Tom Herbert <therbert@google.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h   |  3 +-
 net/ipv4/tcp_ipv4.c | 92 ++++++++++++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 86 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index a1449144848..57316648441 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1413,7 +1413,8 @@ struct tcp_iter_state {
 	sa_family_t		family;
 	enum tcp_seq_states	state;
 	struct sock		*syn_wait_sk;
-	int			bucket, sbucket, num, uid;
+	int			bucket, offset, sbucket, num, uid;
+	loff_t			last_pos;
 };
 
 extern int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index acdc4c98985..7f976af27bf 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1980,6 +1980,11 @@ static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
 		hlist_nulls_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
 }
 
+/*
+ * Get next listener socket follow cur.  If cur is NULL, get first socket
+ * starting from bucket given in st->bucket; when st->bucket is zero the
+ * very first socket in the hash table is returned.
+ */
 static void *listening_get_next(struct seq_file *seq, void *cur)
 {
 	struct inet_connection_sock *icsk;
@@ -1990,14 +1995,15 @@ static void *listening_get_next(struct seq_file *seq, void *cur)
 	struct net *net = seq_file_net(seq);
 
 	if (!sk) {
-		st->bucket = 0;
-		ilb = &tcp_hashinfo.listening_hash[0];
+		ilb = &tcp_hashinfo.listening_hash[st->bucket];
 		spin_lock_bh(&ilb->lock);
 		sk = sk_nulls_head(&ilb->head);
+		st->offset = 0;
 		goto get_sk;
 	}
 	ilb = &tcp_hashinfo.listening_hash[st->bucket];
 	++st->num;
+	++st->offset;
 
 	if (st->state == TCP_SEQ_STATE_OPENREQ) {
 		struct request_sock *req = cur;
@@ -2012,6 +2018,7 @@ static void *listening_get_next(struct seq_file *seq, void *cur)
 				}
 				req = req->dl_next;
 			}
+			st->offset = 0;
 			if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
 				break;
 get_req:
@@ -2047,6 +2054,7 @@ start_req:
 		read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
 	}
 	spin_unlock_bh(&ilb->lock);
+	st->offset = 0;
 	if (++st->bucket < INET_LHTABLE_SIZE) {
 		ilb = &tcp_hashinfo.listening_hash[st->bucket];
 		spin_lock_bh(&ilb->lock);
@@ -2060,7 +2068,12 @@ out:
 
 static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
 {
-	void *rc = listening_get_next(seq, NULL);
+	struct tcp_iter_state *st = seq->private;
+	void *rc;
+
+	st->bucket = 0;
+	st->offset = 0;
+	rc = listening_get_next(seq, NULL);
 
 	while (rc && *pos) {
 		rc = listening_get_next(seq, rc);
@@ -2075,13 +2088,18 @@ static inline int empty_bucket(struct tcp_iter_state *st)
 		hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain);
 }
 
+/*
+ * Get first established socket starting from bucket given in st->bucket.
+ * If st->bucket is zero, the very first socket in the hash is returned.
+ */
 static void *established_get_first(struct seq_file *seq)
 {
 	struct tcp_iter_state *st = seq->private;
 	struct net *net = seq_file_net(seq);
 	void *rc = NULL;
 
-	for (st->bucket = 0; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
+	st->offset = 0;
+	for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
 		struct sock *sk;
 		struct hlist_nulls_node *node;
 		struct inet_timewait_sock *tw;
@@ -2126,6 +2144,7 @@ static void *established_get_next(struct seq_file *seq, void *cur)
 	struct net *net = seq_file_net(seq);
 
 	++st->num;
+	++st->offset;
 
 	if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
 		tw = cur;
@@ -2142,6 +2161,7 @@ get_tw:
 		st->state = TCP_SEQ_STATE_ESTABLISHED;
 
 		/* Look for next non empty bucket */
+		st->offset = 0;
 		while (++st->bucket <= tcp_hashinfo.ehash_mask &&
 				empty_bucket(st))
 			;
@@ -2169,7 +2189,11 @@ out:
 
 static void *established_get_idx(struct seq_file *seq, loff_t pos)
 {
-	void *rc = established_get_first(seq);
+	struct tcp_iter_state *st = seq->private;
+	void *rc;
+
+	st->bucket = 0;
+	rc = established_get_first(seq);
 
 	while (rc && pos) {
 		rc = established_get_next(seq, rc);
@@ -2194,24 +2218,72 @@ static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
 	return rc;
 }
 
+static void *tcp_seek_last_pos(struct seq_file *seq)
+{
+	struct tcp_iter_state *st = seq->private;
+	int offset = st->offset;
+	int orig_num = st->num;
+	void *rc = NULL;
+
+	switch (st->state) {
+	case TCP_SEQ_STATE_OPENREQ:
+	case TCP_SEQ_STATE_LISTENING:
+		if (st->bucket >= INET_LHTABLE_SIZE)
+			break;
+		st->state = TCP_SEQ_STATE_LISTENING;
+		rc = listening_get_next(seq, NULL);
+		while (offset-- && rc)
+			rc = listening_get_next(seq, rc);
+		if (rc)
+			break;
+		st->bucket = 0;
+		/* Fallthrough */
+	case TCP_SEQ_STATE_ESTABLISHED:
+	case TCP_SEQ_STATE_TIME_WAIT:
+		st->state = TCP_SEQ_STATE_ESTABLISHED;
+		if (st->bucket > tcp_hashinfo.ehash_mask)
+			break;
+		rc = established_get_first(seq);
+		while (offset-- && rc)
+			rc = established_get_next(seq, rc);
+	}
+
+	st->num = orig_num;
+
+	return rc;
+}
+
 static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
 {
 	struct tcp_iter_state *st = seq->private;
+	void *rc;
+
+	if (*pos && *pos == st->last_pos) {
+		rc = tcp_seek_last_pos(seq);
+		if (rc)
+			goto out;
+	}
+
 	st->state = TCP_SEQ_STATE_LISTENING;
 	st->num = 0;
-	return *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
+	st->bucket = 0;
+	st->offset = 0;
+	rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
+
+out:
+	st->last_pos = *pos;
+	return rc;
 }
 
 static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
+	struct tcp_iter_state *st = seq->private;
 	void *rc = NULL;
-	struct tcp_iter_state *st;
 
 	if (v == SEQ_START_TOKEN) {
 		rc = tcp_get_idx(seq, 0);
 		goto out;
 	}
-	st = seq->private;
 
 	switch (st->state) {
 	case TCP_SEQ_STATE_OPENREQ:
@@ -2219,6 +2291,8 @@ static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 		rc = listening_get_next(seq, v);
 		if (!rc) {
 			st->state = TCP_SEQ_STATE_ESTABLISHED;
+			st->bucket = 0;
+			st->offset = 0;
 			rc	  = established_get_first(seq);
 		}
 		break;
@@ -2229,6 +2303,7 @@ static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 	}
 out:
 	++*pos;
+	st->last_pos = *pos;
 	return rc;
 }
 
@@ -2267,6 +2342,7 @@ static int tcp_seq_open(struct inode *inode, struct file *file)
 
 	s = ((struct seq_file *)file->private_data)->private;
 	s->family		= afinfo->family;
+	s->last_pos 		= 0;
 	return 0;
 }
 
-- 
cgit v1.2.3-70-g09d2


From 139ef32b0e6b88b00b5e3e74d052d938f178dc9b Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@tilera.com>
Date: Mon, 7 Jun 2010 08:48:13 -0400
Subject: Revert adding some arch-specific signal syscalls to
 <linux/syscalls.h>.

It turns out there is some variance on the calling conventions for
these syscalls, and <asm-generic/syscalls.h> is already the mechanism
used to handle this.  Switch arch/tile over to using that mechanism and
tweak the calling conventions for a couple of tile syscalls to match
<asm-generic/syscalls.h>.

Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
---
 arch/tile/include/asm/syscalls.h | 22 +---------------------
 arch/tile/kernel/process.c       |  2 +-
 arch/tile/kernel/signal.c        |  4 ++--
 arch/tile/kernel/sys.c           |  2 +-
 include/linux/syscalls.h         |  4 ----
 5 files changed, 5 insertions(+), 29 deletions(-)

(limited to 'include')

diff --git a/arch/tile/include/asm/syscalls.h b/arch/tile/include/asm/syscalls.h
index e1be54d1a7d..9f2b8e2f69d 100644
--- a/arch/tile/include/asm/syscalls.h
+++ b/arch/tile/include/asm/syscalls.h
@@ -22,21 +22,7 @@
 #include <linux/linkage.h>
 #include <linux/signal.h>
 #include <linux/types.h>
-
-/* kernel/process.c */
-int sys_fork(struct pt_regs *);
-int sys_vfork(struct pt_regs *);
-int sys_clone(unsigned long clone_flags, unsigned long newsp,
-	      int __user *parent_tidptr, int __user *child_tidptr,
-	      struct pt_regs *);
-int sys_execve(char __user *path, char __user *__user *argv,
-	       char __user *__user *envp, struct pt_regs *);
-
-/* kernel/signal.c */
-int sys_sigaltstack(const stack_t __user *, stack_t __user *,
-		    struct pt_regs *);
-long sys_rt_sigreturn(struct pt_regs *);
-int sys_raise_fpe(int code, unsigned long addr, struct pt_regs*);
+#include <asm-generic/syscalls.h>
 
 /* kernel/sys.c */
 ssize_t sys32_readahead(int fd, u32 offset_lo, u32 offset_hi, u32 count);
@@ -45,12 +31,6 @@ long sys32_fadvise64(int fd, u32 offset_lo, u32 offset_hi,
 int sys32_fadvise64_64(int fd, u32 offset_lo, u32 offset_hi,
 		       u32 len_lo, u32 len_hi, int advice);
 long sys_flush_cache(void);
-long sys_mmap(unsigned long addr, unsigned long len,
-	      unsigned long prot, unsigned long flags,
-	      unsigned long fd, unsigned long offset);
-long sys_mmap2(unsigned long addr, unsigned long len,
-	       unsigned long prot, unsigned long flags,
-	       unsigned long fd, unsigned long offset);
 
 #ifndef __tilegx__
 /* mm/fault.c */
diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c
index 824f230e6d1..c70ff14a48e 100644
--- a/arch/tile/kernel/process.c
+++ b/arch/tile/kernel/process.c
@@ -502,7 +502,7 @@ int _sys_fork(struct pt_regs *regs)
 }
 
 int _sys_clone(unsigned long clone_flags, unsigned long newsp,
-	       int __user *parent_tidptr, int __user *child_tidptr,
+	       void __user *parent_tidptr, void __user *child_tidptr,
 	       struct pt_regs *regs)
 {
 	if (!newsp)
diff --git a/arch/tile/kernel/signal.c b/arch/tile/kernel/signal.c
index 7ea85eb8524..45835cfad40 100644
--- a/arch/tile/kernel/signal.c
+++ b/arch/tile/kernel/signal.c
@@ -43,8 +43,8 @@
 /* Caller before callee in this file; other callee is in assembler */
 void do_signal(struct pt_regs *regs);
 
-int _sys_sigaltstack(const stack_t __user *uss,
-		     stack_t __user *uoss, struct pt_regs *regs)
+long _sys_sigaltstack(const stack_t __user *uss,
+                      stack_t __user *uoss, struct pt_regs *regs)
 {
 	return do_sigaltstack(uss, uoss, regs->sp);
 }
diff --git a/arch/tile/kernel/sys.c b/arch/tile/kernel/sys.c
index a3d982b212b..0427978cea0 100644
--- a/arch/tile/kernel/sys.c
+++ b/arch/tile/kernel/sys.c
@@ -95,7 +95,7 @@ SYSCALL_DEFINE6(mmap2, unsigned long, addr, unsigned long, len,
  */
 SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len,
 		unsigned long, prot, unsigned long, flags,
-		unsigned long, fd, unsigned long, offset)
+		unsigned long, fd, off_t, offset)
 {
 	if (offset & ((1 << PAGE_SHIFT) - 1))
 		return -EINVAL;
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 1e3cd5fec7e..7f614ce274a 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -364,13 +364,9 @@ asmlinkage long sys_init_module(void __user *umod, unsigned long len,
 asmlinkage long sys_delete_module(const char __user *name_user,
 				unsigned int flags);
 
-asmlinkage long sys_rt_sigaction(int sig, const struct sigaction __user *act,
-				 struct sigaction __user *oact,
-				 size_t sigsetsize);
 asmlinkage long sys_rt_sigprocmask(int how, sigset_t __user *set,
 				sigset_t __user *oset, size_t sigsetsize);
 asmlinkage long sys_rt_sigpending(sigset_t __user *set, size_t sigsetsize);
-asmlinkage long sys_rt_sigsuspend(sigset_t __user *unewset, size_t sigsetsize);
 asmlinkage long sys_rt_sigtimedwait(const sigset_t __user *uthese,
 				siginfo_t __user *uinfo,
 				const struct timespec __user *uts,
-- 
cgit v1.2.3-70-g09d2


From abbceff7d7a884968e876e52578da1db4a4f6b54 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Mon, 10 May 2010 15:15:12 -0400
Subject: swiotlb: add the swiotlb initialization function with iotlb memory

This enables the caller to initialize swiotlb with its own iotlb
memory.

See "swiotlb: swiotlb: add swiotlb_tbl_map_single library function" for
full description of patchset.

[v2: changed ..with_tlb to ..with_tbl]

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Tested-by: Albert Herranz <albert_herranz@yahoo.es>
---
 include/linux/swiotlb.h |  1 +
 lib/swiotlb.c           | 48 ++++++++++++++++++++++++++++++------------------
 2 files changed, 31 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 81a4e213c6c..b406261d888 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -23,6 +23,7 @@ extern int swiotlb_force;
 #define IO_TLB_SHIFT 11
 
 extern void swiotlb_init(int verbose);
+extern void swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose);
 
 extern void
 *swiotlb_alloc_coherent(struct device *hwdev, size_t size,
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index 783aff00024..ec61e1507d0 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -140,28 +140,14 @@ void swiotlb_print_info(void)
 	       (unsigned long long)pend);
 }
 
-/*
- * Statically reserve bounce buffer space and initialize bounce buffer data
- * structures for the software IO TLB used to implement the DMA API.
- */
-void __init
-swiotlb_init_with_default_size(size_t default_size, int verbose)
+void __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
 {
 	unsigned long i, bytes;
 
-	if (!io_tlb_nslabs) {
-		io_tlb_nslabs = (default_size >> IO_TLB_SHIFT);
-		io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
-	}
+	bytes = nslabs << IO_TLB_SHIFT;
 
-	bytes = io_tlb_nslabs << IO_TLB_SHIFT;
-
-	/*
-	 * Get IO TLB memory from the low pages
-	 */
-	io_tlb_start = alloc_bootmem_low_pages(bytes);
-	if (!io_tlb_start)
-		panic("Cannot allocate SWIOTLB buffer");
+	io_tlb_nslabs = nslabs;
+	io_tlb_start = tlb;
 	io_tlb_end = io_tlb_start + bytes;
 
 	/*
@@ -185,6 +171,32 @@ swiotlb_init_with_default_size(size_t default_size, int verbose)
 		swiotlb_print_info();
 }
 
+/*
+ * Statically reserve bounce buffer space and initialize bounce buffer data
+ * structures for the software IO TLB used to implement the DMA API.
+ */
+void __init
+swiotlb_init_with_default_size(size_t default_size, int verbose)
+{
+	unsigned long bytes;
+
+	if (!io_tlb_nslabs) {
+		io_tlb_nslabs = (default_size >> IO_TLB_SHIFT);
+		io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
+	}
+
+	bytes = io_tlb_nslabs << IO_TLB_SHIFT;
+
+	/*
+	 * Get IO TLB memory from the low pages
+	 */
+	io_tlb_start = alloc_bootmem_low_pages(bytes);
+	if (!io_tlb_start)
+		panic("Cannot allocate SWIOTLB buffer");
+
+	swiotlb_init_with_tbl(io_tlb_start, io_tlb_nslabs, verbose);
+}
+
 void __init
 swiotlb_init(int verbose)
 {
-- 
cgit v1.2.3-70-g09d2


From 22d48269984fc93a71f65a54aa422aacf5fdb926 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Mon, 10 May 2010 15:01:15 -0500
Subject: swiotlb: search and replace "int dir" with "enum dma_data_direction
 dir"

.. to catch anybody doing something funky.

See "swiotlb: swiotlb: add swiotlb_tbl_map_single library function" for
full description of patchset.

[v2: swiotlb_sync_single_range_* no more - removed usage]
[v3: enum dma_data_direction direction -> enum dma_data_direction dir]

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Acked-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Tested-by: Albert Herranz <albert_herranz@yahoo.es>
---
 include/linux/swiotlb.h |  4 ++--
 lib/swiotlb.c           | 23 +++++++++++++----------
 2 files changed, 15 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index b406261d888..250d766f17f 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -43,11 +43,11 @@ extern void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr,
 
 extern int
 swiotlb_map_sg(struct device *hwdev, struct scatterlist *sg, int nents,
-	       int direction);
+	       enum dma_data_direction dir);
 
 extern void
 swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nents,
-		 int direction);
+		 enum dma_data_direction dir);
 
 extern int
 swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems,
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index 1fc15bf6394..5f60157f31d 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -374,7 +374,8 @@ static void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size,
 }
 
 void *swiotlb_tbl_map_single(struct device *hwdev, dma_addr_t tbl_dma_addr,
-			     phys_addr_t phys, size_t size, int dir)
+			     phys_addr_t phys, size_t size,
+			     enum dma_data_direction dir)
 {
 	unsigned long flags;
 	char *dma_addr;
@@ -481,7 +482,8 @@ found:
  */
 
 static void *
-map_single(struct device *hwdev, phys_addr_t phys, size_t size, int dir)
+map_single(struct device *hwdev, phys_addr_t phys, size_t size,
+	   enum dma_data_direction dir)
 {
 	dma_addr_t start_dma_addr = swiotlb_virt_to_bus(hwdev, io_tlb_start);
 
@@ -493,7 +495,7 @@ map_single(struct device *hwdev, phys_addr_t phys, size_t size, int dir)
  */
 static void
 swiotlb_tbl_unmap_single(struct device *hwdev, char *dma_addr, size_t size,
-			int dir)
+			enum dma_data_direction dir)
 {
 	unsigned long flags;
 	int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
@@ -534,7 +536,7 @@ swiotlb_tbl_unmap_single(struct device *hwdev, char *dma_addr, size_t size,
 
 static void
 swiotlb_tbl_sync_single(struct device *hwdev, char *dma_addr, size_t size,
-	    int dir, int target)
+	    enum dma_data_direction dir, int target)
 {
 	int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT;
 	phys_addr_t phys = io_tlb_orig_addr[index];
@@ -624,7 +626,8 @@ swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr,
 EXPORT_SYMBOL(swiotlb_free_coherent);
 
 static void
-swiotlb_full(struct device *dev, size_t size, int dir, int do_panic)
+swiotlb_full(struct device *dev, size_t size, enum dma_data_direction dir,
+	     int do_panic)
 {
 	/*
 	 * Ran out of IOMMU space for this operation. This is very bad.
@@ -702,7 +705,7 @@ EXPORT_SYMBOL_GPL(swiotlb_map_page);
  * whatever the device wrote there.
  */
 static void unmap_single(struct device *hwdev, dma_addr_t dev_addr,
-			 size_t size, int dir)
+			 size_t size, enum dma_data_direction dir)
 {
 	phys_addr_t paddr = dma_to_phys(hwdev, dev_addr);
 
@@ -745,7 +748,7 @@ EXPORT_SYMBOL_GPL(swiotlb_unmap_page);
  */
 static void
 swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr,
-		    size_t size, int dir, int target)
+		    size_t size, enum dma_data_direction dir, int target)
 {
 	phys_addr_t paddr = dma_to_phys(hwdev, dev_addr);
 
@@ -832,7 +835,7 @@ EXPORT_SYMBOL(swiotlb_map_sg_attrs);
 
 int
 swiotlb_map_sg(struct device *hwdev, struct scatterlist *sgl, int nelems,
-	       int dir)
+	       enum dma_data_direction dir)
 {
 	return swiotlb_map_sg_attrs(hwdev, sgl, nelems, dir, NULL);
 }
@@ -859,7 +862,7 @@ EXPORT_SYMBOL(swiotlb_unmap_sg_attrs);
 
 void
 swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sgl, int nelems,
-		 int dir)
+		 enum dma_data_direction dir)
 {
 	return swiotlb_unmap_sg_attrs(hwdev, sgl, nelems, dir, NULL);
 }
@@ -874,7 +877,7 @@ EXPORT_SYMBOL(swiotlb_unmap_sg);
  */
 static void
 swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sgl,
-		int nelems, int dir, int target)
+		int nelems, enum dma_data_direction dir, int target)
 {
 	struct scatterlist *sg;
 	int i;
-- 
cgit v1.2.3-70-g09d2


From d7ef1533a90f432615d25729c2477bac9e72051d Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Fri, 28 May 2010 11:37:10 -0400
Subject: swiotlb: Make swiotlb bookkeeping functions visible in the header
 file.

We put the functions dealing with the operations on
the SWIOTLB buffer in the header and make those functions non-static.
And also make the functions exported via EXPORT_SYMBOL_GPL.

See "swiotlb: swiotlb: add swiotlb_tbl_map_single library function" for
full description of patchset.

[v2: swiotlb_sync_single_range_for_* no more. Remove usage.]

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Acked-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Tested-by: Albert Herranz <albert_herranz@yahoo.es>
---
 include/linux/swiotlb.h | 22 ++++++++++++++++++++++
 lib/swiotlb.c           | 29 ++++++++++++++---------------
 2 files changed, 36 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 250d766f17f..8c0e349f4a6 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -25,6 +25,28 @@ extern int swiotlb_force;
 extern void swiotlb_init(int verbose);
 extern void swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose);
 
+/*
+ * Enumeration for sync targets
+ */
+enum dma_sync_target {
+	SYNC_FOR_CPU = 0,
+	SYNC_FOR_DEVICE = 1,
+};
+extern void *swiotlb_tbl_map_single(struct device *hwdev, dma_addr_t tbl_dma_addr,
+				    phys_addr_t phys, size_t size,
+				    enum dma_data_direction dir);
+
+extern void swiotlb_tbl_unmap_single(struct device *hwdev, char *dma_addr,
+				     size_t size, enum dma_data_direction dir);
+
+extern void swiotlb_tbl_sync_single(struct device *hwdev, char *dma_addr,
+				    size_t size, enum dma_data_direction dir,
+				    enum dma_sync_target target);
+
+/* Accessory functions. */
+extern void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size,
+			   enum dma_data_direction dir);
+
 extern void
 *swiotlb_alloc_coherent(struct device *hwdev, size_t size,
 			dma_addr_t *dma_handle, gfp_t flags);
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index 5f60157f31d..34e3082632d 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -50,14 +50,6 @@
  */
 #define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT)
 
-/*
- * Enumeration for sync targets
- */
-enum dma_sync_target {
-	SYNC_FOR_CPU = 0,
-	SYNC_FOR_DEVICE = 1,
-};
-
 int swiotlb_force;
 
 /*
@@ -335,8 +327,8 @@ static int is_swiotlb_buffer(phys_addr_t paddr)
 /*
  * Bounce: copy the swiotlb buffer back to the original dma location
  */
-static void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size,
-			   enum dma_data_direction dir)
+void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size,
+		    enum dma_data_direction dir)
 {
 	unsigned long pfn = PFN_DOWN(phys);
 
@@ -372,6 +364,7 @@ static void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size,
 			memcpy(phys_to_virt(phys), dma_addr, size);
 	}
 }
+EXPORT_SYMBOL_GPL(swiotlb_bounce);
 
 void *swiotlb_tbl_map_single(struct device *hwdev, dma_addr_t tbl_dma_addr,
 			     phys_addr_t phys, size_t size,
@@ -476,6 +469,7 @@ found:
 
 	return dma_addr;
 }
+EXPORT_SYMBOL_GPL(swiotlb_tbl_map_single);
 
 /*
  * Allocates bounce buffer and returns its kernel virtual address.
@@ -493,7 +487,7 @@ map_single(struct device *hwdev, phys_addr_t phys, size_t size,
 /*
  * dma_addr is the kernel virtual address of the bounce buffer to unmap.
  */
-static void
+void
 swiotlb_tbl_unmap_single(struct device *hwdev, char *dma_addr, size_t size,
 			enum dma_data_direction dir)
 {
@@ -533,10 +527,12 @@ swiotlb_tbl_unmap_single(struct device *hwdev, char *dma_addr, size_t size,
 	}
 	spin_unlock_irqrestore(&io_tlb_lock, flags);
 }
+EXPORT_SYMBOL_GPL(swiotlb_tbl_unmap_single);
 
-static void
+void
 swiotlb_tbl_sync_single(struct device *hwdev, char *dma_addr, size_t size,
-	    enum dma_data_direction dir, int target)
+			enum dma_data_direction dir,
+			enum dma_sync_target target)
 {
 	int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT;
 	phys_addr_t phys = io_tlb_orig_addr[index];
@@ -560,6 +556,7 @@ swiotlb_tbl_sync_single(struct device *hwdev, char *dma_addr, size_t size,
 		BUG();
 	}
 }
+EXPORT_SYMBOL_GPL(swiotlb_tbl_sync_single);
 
 void *
 swiotlb_alloc_coherent(struct device *hwdev, size_t size,
@@ -748,7 +745,8 @@ EXPORT_SYMBOL_GPL(swiotlb_unmap_page);
  */
 static void
 swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr,
-		    size_t size, enum dma_data_direction dir, int target)
+		    size_t size, enum dma_data_direction dir,
+		    enum dma_sync_target target)
 {
 	phys_addr_t paddr = dma_to_phys(hwdev, dev_addr);
 
@@ -877,7 +875,8 @@ EXPORT_SYMBOL(swiotlb_unmap_sg);
  */
 static void
 swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sgl,
-		int nelems, enum dma_data_direction dir, int target)
+		int nelems, enum dma_data_direction dir,
+		enum dma_sync_target target)
 {
 	struct scatterlist *sg;
 	int i;
-- 
cgit v1.2.3-70-g09d2


From 66018506e15bea62de4eefc3298f170b4bfcf5ef Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 7 Jun 2010 03:12:08 +0000
Subject: ip: Router Alert RCU conversion

Straightforward conversion to RCU.

One rwlock becomes a spinlock, and is static.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip.h       |  2 +-
 net/ipv4/ip_input.c    | 11 +++--------
 net/ipv4/ip_sockglue.c | 23 ++++++++++++++---------
 3 files changed, 18 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/include/net/ip.h b/include/net/ip.h
index 452f229c380..9982c97f0bd 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -62,10 +62,10 @@ struct ip_ra_chain {
 	struct ip_ra_chain	*next;
 	struct sock		*sk;
 	void			(*destructor)(struct sock *);
+	struct rcu_head		rcu;
 };
 
 extern struct ip_ra_chain *ip_ra_chain;
-extern rwlock_t ip_ra_lock;
 
 /* IP flags. */
 #define IP_CE		0x8000		/* Flag: "Congestion"		*/
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index d52c9da644c..d274078b166 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -146,7 +146,7 @@
 #include <linux/netlink.h>
 
 /*
- *	Process Router Attention IP option
+ *	Process Router Attention IP option (RFC 2113)
  */
 int ip_call_ra_chain(struct sk_buff *skb)
 {
@@ -155,8 +155,7 @@ int ip_call_ra_chain(struct sk_buff *skb)
 	struct sock *last = NULL;
 	struct net_device *dev = skb->dev;
 
-	read_lock(&ip_ra_lock);
-	for (ra = ip_ra_chain; ra; ra = ra->next) {
+	for (ra = rcu_dereference(ip_ra_chain); ra; ra = rcu_dereference(ra->next)) {
 		struct sock *sk = ra->sk;
 
 		/* If socket is bound to an interface, only report
@@ -167,10 +166,8 @@ int ip_call_ra_chain(struct sk_buff *skb)
 		     sk->sk_bound_dev_if == dev->ifindex) &&
 		    net_eq(sock_net(sk), dev_net(dev))) {
 			if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
-				if (ip_defrag(skb, IP_DEFRAG_CALL_RA_CHAIN)) {
-					read_unlock(&ip_ra_lock);
+				if (ip_defrag(skb, IP_DEFRAG_CALL_RA_CHAIN))
 					return 1;
-				}
 			}
 			if (last) {
 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
@@ -183,10 +180,8 @@ int ip_call_ra_chain(struct sk_buff *skb)
 
 	if (last) {
 		raw_rcv(last, skb);
-		read_unlock(&ip_ra_lock);
 		return 1;
 	}
-	read_unlock(&ip_ra_lock);
 	return 0;
 }
 
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index ce231780a2b..08b9519a24f 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -239,7 +239,12 @@ int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc)
    sent to multicast group to reach destination designated router.
  */
 struct ip_ra_chain *ip_ra_chain;
-DEFINE_RWLOCK(ip_ra_lock);
+static DEFINE_SPINLOCK(ip_ra_lock);
+
+static void ip_ra_free_rcu(struct rcu_head *head)
+{
+	kfree(container_of(head, struct ip_ra_chain, rcu));
+}
 
 int ip_ra_control(struct sock *sk, unsigned char on,
 		  void (*destructor)(struct sock *))
@@ -251,35 +256,35 @@ int ip_ra_control(struct sock *sk, unsigned char on,
 
 	new_ra = on ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL;
 
-	write_lock_bh(&ip_ra_lock);
+	spin_lock_bh(&ip_ra_lock);
 	for (rap = &ip_ra_chain; (ra = *rap) != NULL; rap = &ra->next) {
 		if (ra->sk == sk) {
 			if (on) {
-				write_unlock_bh(&ip_ra_lock);
+				spin_unlock_bh(&ip_ra_lock);
 				kfree(new_ra);
 				return -EADDRINUSE;
 			}
-			*rap = ra->next;
-			write_unlock_bh(&ip_ra_lock);
+			rcu_assign_pointer(*rap, ra->next);
+			spin_unlock_bh(&ip_ra_lock);
 
 			if (ra->destructor)
 				ra->destructor(sk);
 			sock_put(sk);
-			kfree(ra);
+			call_rcu(&ra->rcu, ip_ra_free_rcu);
 			return 0;
 		}
 	}
 	if (new_ra == NULL) {
-		write_unlock_bh(&ip_ra_lock);
+		spin_unlock_bh(&ip_ra_lock);
 		return -ENOBUFS;
 	}
 	new_ra->sk = sk;
 	new_ra->destructor = destructor;
 
 	new_ra->next = ra;
-	*rap = new_ra;
+	rcu_assign_pointer(*rap, new_ra);
 	sock_hold(sk);
-	write_unlock_bh(&ip_ra_lock);
+	spin_unlock_bh(&ip_ra_lock);
 
 	return 0;
 }
-- 
cgit v1.2.3-70-g09d2


From bb69ae049fcc986fcd742eb90ca0d44a7a49c9f1 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 7 Jun 2010 11:42:13 +0000
Subject: anycast: Some RCU conversions

- dev_get_by_flags() changed to dev_get_by_flags_rcu()

- ipv6_sock_ac_join() dont touch dev & idev refcounts
- ipv6_sock_ac_drop() dont touch dev & idev refcounts
- ipv6_sock_ac_close() dont touch dev & idev refcounts
- ipv6_dev_ac_dec() dount touch idev refcount
- ipv6_chk_acast_addr() dont touch idev refcount

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: Hideaki YOSHIFUJI <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  4 +--
 net/core/dev.c            | 14 +++-----
 net/ipv6/anycast.c        | 90 ++++++++++++++++++++++-------------------------
 3 files changed, 49 insertions(+), 59 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 5156b806924..c319f28d699 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1271,8 +1271,8 @@ extern void		dev_add_pack(struct packet_type *pt);
 extern void		dev_remove_pack(struct packet_type *pt);
 extern void		__dev_remove_pack(struct packet_type *pt);
 
-extern struct net_device	*dev_get_by_flags(struct net *net, unsigned short flags,
-						  unsigned short mask);
+extern struct net_device	*dev_get_by_flags_rcu(struct net *net, unsigned short flags,
+						      unsigned short mask);
 extern struct net_device	*dev_get_by_name(struct net *net, const char *name);
 extern struct net_device	*dev_get_by_name_rcu(struct net *net, const char *name);
 extern struct net_device	*__dev_get_by_name(struct net *net, const char *name);
diff --git a/net/core/dev.c b/net/core/dev.c
index c8d127718ff..6f330cee79a 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -803,35 +803,31 @@ struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
 EXPORT_SYMBOL(dev_getfirstbyhwtype);
 
 /**
- *	dev_get_by_flags - find any device with given flags
+ *	dev_get_by_flags_rcu - find any device with given flags
  *	@net: the applicable net namespace
  *	@if_flags: IFF_* values
  *	@mask: bitmask of bits in if_flags to check
  *
  *	Search for any interface with the given flags. Returns NULL if a device
- *	is not found or a pointer to the device. The device returned has
- *	had a reference added and the pointer is safe until the user calls
- *	dev_put to indicate they have finished with it.
+ *	is not found or a pointer to the device. Must be called inside
+ *	rcu_read_lock(), and result refcount is unchanged.
  */
 
-struct net_device *dev_get_by_flags(struct net *net, unsigned short if_flags,
+struct net_device *dev_get_by_flags_rcu(struct net *net, unsigned short if_flags,
 				    unsigned short mask)
 {
 	struct net_device *dev, *ret;
 
 	ret = NULL;
-	rcu_read_lock();
 	for_each_netdev_rcu(net, dev) {
 		if (((dev->flags ^ if_flags) & mask) == 0) {
-			dev_hold(dev);
 			ret = dev;
 			break;
 		}
 	}
-	rcu_read_unlock();
 	return ret;
 }
-EXPORT_SYMBOL(dev_get_by_flags);
+EXPORT_SYMBOL(dev_get_by_flags_rcu);
 
 /**
  *	dev_valid_name - check if name is okay for network device
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index b5b07054508..f058fbd808c 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -77,41 +77,40 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, struct in6_addr *addr)
 	pac->acl_next = NULL;
 	ipv6_addr_copy(&pac->acl_addr, addr);
 
+	rcu_read_lock();
 	if (ifindex == 0) {
 		struct rt6_info *rt;
 
 		rt = rt6_lookup(net, addr, NULL, 0, 0);
 		if (rt) {
 			dev = rt->rt6i_dev;
-			dev_hold(dev);
 			dst_release(&rt->u.dst);
 		} else if (ishost) {
 			err = -EADDRNOTAVAIL;
-			goto out_free_pac;
+			goto error;
 		} else {
 			/* router, no matching interface: just pick one */
-
-			dev = dev_get_by_flags(net, IFF_UP, IFF_UP|IFF_LOOPBACK);
+			dev = dev_get_by_flags_rcu(net, IFF_UP,
+						   IFF_UP | IFF_LOOPBACK);
 		}
 	} else
-		dev = dev_get_by_index(net, ifindex);
+		dev = dev_get_by_index_rcu(net, ifindex);
 
 	if (dev == NULL) {
 		err = -ENODEV;
-		goto out_free_pac;
+		goto error;
 	}
 
-	idev = in6_dev_get(dev);
+	idev = __in6_dev_get(dev);
 	if (!idev) {
 		if (ifindex)
 			err = -ENODEV;
 		else
 			err = -EADDRNOTAVAIL;
-		goto out_dev_put;
+		goto error;
 	}
 	/* reset ishost, now that we have a specific device */
 	ishost = !idev->cnf.forwarding;
-	in6_dev_put(idev);
 
 	pac->acl_ifindex = dev->ifindex;
 
@@ -124,26 +123,22 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, struct in6_addr *addr)
 		if (ishost)
 			err = -EADDRNOTAVAIL;
 		if (err)
-			goto out_dev_put;
+			goto error;
 	}
 
 	err = ipv6_dev_ac_inc(dev, addr);
-	if (err)
-		goto out_dev_put;
-
-	write_lock_bh(&ipv6_sk_ac_lock);
-	pac->acl_next = np->ipv6_ac_list;
-	np->ipv6_ac_list = pac;
-	write_unlock_bh(&ipv6_sk_ac_lock);
-
-	dev_put(dev);
-
-	return 0;
+	if (!err) {
+		write_lock_bh(&ipv6_sk_ac_lock);
+		pac->acl_next = np->ipv6_ac_list;
+		np->ipv6_ac_list = pac;
+		write_unlock_bh(&ipv6_sk_ac_lock);
+		pac = NULL;
+	}
 
-out_dev_put:
-	dev_put(dev);
-out_free_pac:
-	sock_kfree_s(sk, pac, sizeof(*pac));
+error:
+	rcu_read_unlock();
+	if (pac)
+		sock_kfree_s(sk, pac, sizeof(*pac));
 	return err;
 }
 
@@ -176,11 +171,12 @@ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, struct in6_addr *addr)
 
 	write_unlock_bh(&ipv6_sk_ac_lock);
 
-	dev = dev_get_by_index(net, pac->acl_ifindex);
-	if (dev) {
+	rcu_read_lock();
+	dev = dev_get_by_index_rcu(net, pac->acl_ifindex);
+	if (dev)
 		ipv6_dev_ac_dec(dev, &pac->acl_addr);
-		dev_put(dev);
-	}
+	rcu_read_unlock();
+
 	sock_kfree_s(sk, pac, sizeof(*pac));
 	return 0;
 }
@@ -199,13 +195,12 @@ void ipv6_sock_ac_close(struct sock *sk)
 	write_unlock_bh(&ipv6_sk_ac_lock);
 
 	prev_index = 0;
+	rcu_read_lock();
 	while (pac) {
 		struct ipv6_ac_socklist *next = pac->acl_next;
 
 		if (pac->acl_ifindex != prev_index) {
-			if (dev)
-				dev_put(dev);
-			dev = dev_get_by_index(net, pac->acl_ifindex);
+			dev = dev_get_by_index_rcu(net, pac->acl_ifindex);
 			prev_index = pac->acl_ifindex;
 		}
 		if (dev)
@@ -213,8 +208,7 @@ void ipv6_sock_ac_close(struct sock *sk)
 		sock_kfree_s(sk, pac, sizeof(*pac));
 		pac = next;
 	}
-	if (dev)
-		dev_put(dev);
+	rcu_read_unlock();
 }
 
 #if 0
@@ -363,33 +357,32 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, struct in6_addr *addr)
 	return 0;
 }
 
+/* called with rcu_read_lock() */
 static int ipv6_dev_ac_dec(struct net_device *dev, struct in6_addr *addr)
 {
-	int ret;
-	struct inet6_dev *idev = in6_dev_get(dev);
+	struct inet6_dev *idev = __in6_dev_get(dev);
+
 	if (idev == NULL)
 		return -ENODEV;
-	ret = __ipv6_dev_ac_dec(idev, addr);
-	in6_dev_put(idev);
-	return ret;
+	return __ipv6_dev_ac_dec(idev, addr);
 }
 
 /*
  *	check if the interface has this anycast address
+ *	called with rcu_read_lock()
  */
 static int ipv6_chk_acast_dev(struct net_device *dev, struct in6_addr *addr)
 {
 	struct inet6_dev *idev;
 	struct ifacaddr6 *aca;
 
-	idev = in6_dev_get(dev);
+	idev = __in6_dev_get(dev);
 	if (idev) {
 		read_lock_bh(&idev->lock);
 		for (aca = idev->ac_list; aca; aca = aca->aca_next)
 			if (ipv6_addr_equal(&aca->aca_addr, addr))
 				break;
 		read_unlock_bh(&idev->lock);
-		in6_dev_put(idev);
 		return aca != NULL;
 	}
 	return 0;
@@ -403,14 +396,15 @@ int ipv6_chk_acast_addr(struct net *net, struct net_device *dev,
 {
 	int found = 0;
 
-	if (dev)
-		return ipv6_chk_acast_dev(dev, addr);
 	rcu_read_lock();
-	for_each_netdev_rcu(net, dev)
-		if (ipv6_chk_acast_dev(dev, addr)) {
-			found = 1;
-			break;
-		}
+	if (dev)
+		found = ipv6_chk_acast_dev(dev, addr);
+	else
+		for_each_netdev_rcu(net, dev)
+			if (ipv6_chk_acast_dev(dev, addr)) {
+				found = 1;
+				break;
+			}
 	rcu_read_unlock();
 	return found;
 }
-- 
cgit v1.2.3-70-g09d2


From 339bb99e4a8ba1f8960eed21d50be808b35ad22a Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 8 Jun 2010 14:11:19 +0200
Subject: netfilter: xt_rateest: Better struct xt_rateest layout

We currently dirty two cache lines in struct xt_rateest, this hurts SMP
performance.

This patch moves lock/bstats/rstats at beginning of structure so that
they share a single cache line.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/net/netfilter/xt_rateest.h | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/xt_rateest.h b/include/net/netfilter/xt_rateest.h
index ddbf37e1961..b1d780e21ce 100644
--- a/include/net/netfilter/xt_rateest.h
+++ b/include/net/netfilter/xt_rateest.h
@@ -2,13 +2,17 @@
 #define _XT_RATEEST_H
 
 struct xt_rateest {
+	/* keep lock and bstats on same cache line to speedup xt_rateest_tg() */
+	struct gnet_stats_basic_packed	bstats;
+	spinlock_t			lock;
+	/* keep rstats and lock on same cache line to speedup xt_rateest_mt() */
+	struct gnet_stats_rate_est	rstats;
+
+	/* following fields not accessed in hot path */
 	struct hlist_node		list;
 	char				name[IFNAMSIZ];
 	unsigned int			refcnt;
-	spinlock_t			lock;
 	struct gnet_estimator		params;
-	struct gnet_stats_rate_est	rstats;
-	struct gnet_stats_basic_packed	bstats;
 };
 
 extern struct xt_rateest *xt_rateest_lookup(const char *name);
-- 
cgit v1.2.3-70-g09d2


From abe37c4b84502d6931e04e94c9c2c45b4da8c889 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Mon, 7 Jun 2010 11:12:27 +0200
Subject: wireless: fix kernel-doc

Fix a whole bunch of kernel-doc warnings
and errors that crop up when running it on
mac80211 and cfg80211; the latter isn't
normally done so lots of bit-rot happened.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/cfg80211.h | 117 +++++++++++++++++++++++++++++++++++++------------
 include/net/mac80211.h |   4 +-
 2 files changed, 90 insertions(+), 31 deletions(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 0c3c214772e..22ab9d88cf4 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -37,6 +37,7 @@
  *
  * @IEEE80211_BAND_2GHZ: 2.4GHz ISM band
  * @IEEE80211_BAND_5GHZ: around 5GHz band (4.9-5.7)
+ * @IEEE80211_NUM_BANDS: number of defined bands
  */
 enum ieee80211_band {
 	IEEE80211_BAND_2GHZ = NL80211_BAND_2GHZ,
@@ -188,6 +189,7 @@ struct ieee80211_sta_ht_cap {
  *	in this band. Must be sorted to give a valid "supported
  *	rates" IE, i.e. CCK rates first, then OFDM.
  * @n_bitrates: Number of bitrates in @bitrates
+ * @ht_cap: HT capabilities in this band
  */
 struct ieee80211_supported_band {
 	struct ieee80211_channel *channels;
@@ -225,6 +227,7 @@ struct vif_params {
  * @seq: sequence counter (IV/PN) for TKIP and CCMP keys, only used
  *	with the get_key() callback, must be in little endian,
  *	length given by @seq_len.
+ * @seq_len: length of @seq.
  */
 struct key_params {
 	u8 *key;
@@ -237,6 +240,8 @@ struct key_params {
 /**
  * enum survey_info_flags - survey information flags
  *
+ * @SURVEY_INFO_NOISE_DBM: noise (in dBm) was filled in
+ *
  * Used by the driver to indicate which info in &struct survey_info
  * it has filled in during the get_survey().
  */
@@ -247,13 +252,13 @@ enum survey_info_flags {
 /**
  * struct survey_info - channel survey response
  *
- * Used by dump_survey() to report back per-channel survey information.
- *
  * @channel: the channel this survey record reports, mandatory
  * @filled: bitflag of flags from &enum survey_info_flags
  * @noise: channel noise in dBm. This and all following fields are
  *     optional
  *
+ * Used by dump_survey() to report back per-channel survey information.
+ *
  * This structure can later be expanded with things like
  * channel duty cycle etc.
  */
@@ -288,7 +293,7 @@ struct beacon_parameters {
  *
  * @PLINK_ACTION_INVALID: action 0 is reserved
  * @PLINK_ACTION_OPEN: start mesh peer link establishment
- * @PLINK_ACTION_BLOCL: block traffic from this mesh peer
+ * @PLINK_ACTION_BLOCK: block traffic from this mesh peer
  */
 enum plink_actions {
 	PLINK_ACTION_INVALID,
@@ -311,6 +316,8 @@ enum plink_actions {
  *	(bitmask of BIT(NL80211_STA_FLAG_...))
  * @listen_interval: listen interval or -1 for no change
  * @aid: AID or zero for no change
+ * @plink_action: plink action to take
+ * @ht_capa: HT capabilities of station
  */
 struct station_parameters {
 	u8 *supported_rates;
@@ -448,13 +455,13 @@ enum monitor_flags {
  * Used by the driver to indicate which info in &struct mpath_info it has filled
  * in during get_station() or dump_station().
  *
- * MPATH_INFO_FRAME_QLEN: @frame_qlen filled
- * MPATH_INFO_SN: @sn filled
- * MPATH_INFO_METRIC: @metric filled
- * MPATH_INFO_EXPTIME: @exptime filled
- * MPATH_INFO_DISCOVERY_TIMEOUT: @discovery_timeout filled
- * MPATH_INFO_DISCOVERY_RETRIES: @discovery_retries filled
- * MPATH_INFO_FLAGS: @flags filled
+ * @MPATH_INFO_FRAME_QLEN: @frame_qlen filled
+ * @MPATH_INFO_SN: @sn filled
+ * @MPATH_INFO_METRIC: @metric filled
+ * @MPATH_INFO_EXPTIME: @exptime filled
+ * @MPATH_INFO_DISCOVERY_TIMEOUT: @discovery_timeout filled
+ * @MPATH_INFO_DISCOVERY_RETRIES: @discovery_retries filled
+ * @MPATH_INFO_FLAGS: @flags filled
  */
 enum mpath_info_flags {
 	MPATH_INFO_FRAME_QLEN		= BIT(0),
@@ -587,6 +594,7 @@ struct cfg80211_ssid {
  * @ie_len: length of ie in octets
  * @wiphy: the wiphy this was for
  * @dev: the interface
+ * @aborted: (internal) scan request was notified as aborted
  */
 struct cfg80211_scan_request {
 	struct cfg80211_ssid *ssids;
@@ -623,6 +631,7 @@ enum cfg80211_signal_type {
  * This structure describes a BSS (which may also be a mesh network)
  * for use in scan results and similar.
  *
+ * @channel: channel this BSS is on
  * @bssid: BSSID of the BSS
  * @tsf: timestamp of last received update
  * @beacon_interval: the beacon interval as from the frame
@@ -826,8 +835,8 @@ struct cfg80211_ibss_params {
  * @ssid: SSID
  * @ssid_len: Length of ssid in octets
  * @auth_type: Authentication type (algorithm)
- * @assoc_ie: IEs for association request
- * @assoc_ie_len: Length of assoc_ie in octets
+ * @ie: IEs for association request
+ * @ie_len: Length of assoc_ie in octets
  * @privacy: indicates whether privacy-enabled APs should be used
  * @crypto: crypto settings
  * @key_len: length of WEP key for shared key authentication
@@ -850,10 +859,11 @@ struct cfg80211_connect_params {
 
 /**
  * enum wiphy_params_flags - set_wiphy_params bitfield values
- * WIPHY_PARAM_RETRY_SHORT: wiphy->retry_short has changed
- * WIPHY_PARAM_RETRY_LONG: wiphy->retry_long has changed
- * WIPHY_PARAM_FRAG_THRESHOLD: wiphy->frag_threshold has changed
- * WIPHY_PARAM_RTS_THRESHOLD: wiphy->rts_threshold has changed
+ * @WIPHY_PARAM_RETRY_SHORT: wiphy->retry_short has changed
+ * @WIPHY_PARAM_RETRY_LONG: wiphy->retry_long has changed
+ * @WIPHY_PARAM_FRAG_THRESHOLD: wiphy->frag_threshold has changed
+ * @WIPHY_PARAM_RTS_THRESHOLD: wiphy->rts_threshold has changed
+ * @WIPHY_PARAM_COVERAGE_CLASS: coverage class changed
  */
 enum wiphy_params_flags {
 	WIPHY_PARAM_RETRY_SHORT		= 1 << 0,
@@ -949,10 +959,16 @@ struct cfg80211_pmksa {
  * @del_beacon: Remove beacon configuration and stop sending the beacon.
  *
  * @add_station: Add a new station.
- *
  * @del_station: Remove a station; @mac may be NULL to remove all stations.
- *
  * @change_station: Modify a given station.
+ * @get_station: get station information for the station identified by @mac
+ * @dump_station: dump station callback -- resume dump at index @idx
+ *
+ * @add_mpath: add a fixed mesh path
+ * @del_mpath: delete a given mesh path
+ * @change_mpath: change a given mesh path
+ * @get_mpath: get a mesh path for the given parameters
+ * @dump_mpath: dump mesh path callback -- resume dump at index @idx
  *
  * @get_mesh_params: Put the current mesh parameters into *params
  *
@@ -960,8 +976,6 @@ struct cfg80211_pmksa {
  *	The mask is a bitfield which tells us which parameters to
  *	set, and which to leave alone.
  *
- * @set_mesh_cfg: set mesh parameters (by now, just mesh id)
- *
  * @change_bss: Modify parameters for a given BSS.
  *
  * @set_txq_params: Set TX queue parameters
@@ -1002,6 +1016,8 @@ struct cfg80211_pmksa {
  * @get_tx_power: store the current TX power into the dbm variable;
  *	return 0 if successful
  *
+ * @set_wds_peer: set the WDS peer for a WDS interface
+ *
  * @rfkill_poll: polls the hw rfkill line, use cfg80211 reporting
  *	functions to adjust rfkill hw state
  *
@@ -1019,6 +1035,8 @@ struct cfg80211_pmksa {
  *
  * @testmode_cmd: run a test mode command
  *
+ * @set_bitrate_mask: set the bitrate mask configuration
+ *
  * @set_pmksa: Cache a PMKID for a BSSID. This is mostly useful for fullmac
  *	devices running firmwares capable of generating the (re) association
  *	RSN IE. It allows for faster roaming between WPA2 BSSIDs.
@@ -1231,8 +1249,6 @@ struct mac_address {
 
 /**
  * struct wiphy - wireless hardware description
- * @idx: the wiphy index assigned to this item
- * @class_dev: the class device representing /sys/class/ieee80211/<wiphy-name>
  * @reg_notifier: the driver's regulatory notification callback
  * @regd: the driver's regulatory domain, if one was requested via
  * 	the regulatory_hint() API. This can be used by the driver
@@ -1246,7 +1262,7 @@ struct mac_address {
  * @frag_threshold: Fragmentation threshold (dot11FragmentationThreshold);
  *	-1 = fragmentation disabled, only odd values >= 256 used
  * @rts_threshold: RTS threshold (dot11RTSThreshold); -1 = RTS/CTS disabled
- * @net: the network namespace this wiphy currently lives in
+ * @_net: the network namespace this wiphy currently lives in
  * @perm_addr: permanent MAC address of this device
  * @addr_mask: If the device supports multiple MAC addresses by masking,
  *	set this to a mask with variable bits set to 1, e.g. if the last
@@ -1259,6 +1275,28 @@ struct mac_address {
  *	by default for perm_addr. In this case, the mask should be set to
  *	all-zeroes. In this case it is assumed that the device can handle
  *	the same number of arbitrary MAC addresses.
+ * @debugfsdir: debugfs directory used for this wiphy, will be renamed
+ *	automatically on wiphy renames
+ * @dev: (virtual) struct device for this wiphy
+ * @wext: wireless extension handlers
+ * @priv: driver private data (sized according to wiphy_new() parameter)
+ * @interface_modes: bitmask of interfaces types valid for this wiphy,
+ *	must be set by driver
+ * @flags: wiphy flags, see &enum wiphy_flags
+ * @bss_priv_size: each BSS struct has private data allocated with it,
+ *	this variable determines its size
+ * @max_scan_ssids: maximum number of SSIDs the device can scan for in
+ *	any given scan
+ * @max_scan_ie_len: maximum length of user-controlled IEs device can
+ *	add to probe request frames transmitted during a scan, must not
+ *	include fixed IEs like supported rates
+ * @coverage_class: current coverage class
+ * @fw_version: firmware version for ethtool reporting
+ * @hw_version: hardware version for ethtool reporting
+ * @max_num_pmkids: maximum number of PMKIDs supported by device
+ * @privid: a pointer that drivers can use to identify if an arbitrary
+ *	wiphy is theirs, e.g. in global notifiers
+ * @bands: information about bands/channels supported by this device
  */
 struct wiphy {
 	/* assign these fields before you register the wiphy */
@@ -1472,13 +1510,14 @@ struct cfg80211_cached_keys;
  * @ssid: (private) Used by the internal configuration code
  * @ssid_len: (private) Used by the internal configuration code
  * @wext: (private) Used by the internal wireless extensions compat code
- * @wext_bssid: (private) Used by the internal wireless extensions compat code
  * @use_4addr: indicates 4addr mode is used on this interface, must be
  *	set by driver (if supported) on add_interface BEFORE registering the
  *	netdev and may otherwise be used by driver read-only, will be update
  *	by cfg80211 on change_interface
  * @action_registrations: list of registrations for action frames
  * @action_registrations_lock: lock for the list
+ * @mtx: mutex used to lock data in this struct
+ * @cleanup_work: work struct used for cleanup that can't be done directly
  */
 struct wireless_dev {
 	struct wiphy *wiphy;
@@ -1552,11 +1591,13 @@ static inline void *wdev_priv(struct wireless_dev *wdev)
 
 /**
  * ieee80211_channel_to_frequency - convert channel number to frequency
+ * @chan: channel number
  */
 extern int ieee80211_channel_to_frequency(int chan);
 
 /**
  * ieee80211_frequency_to_channel - convert frequency to channel number
+ * @freq: center frequency
  */
 extern int ieee80211_frequency_to_channel(int freq);
 
@@ -1571,6 +1612,8 @@ extern struct ieee80211_channel *__ieee80211_get_channel(struct wiphy *wiphy,
 							 int freq);
 /**
  * ieee80211_get_channel - get channel struct from wiphy for specified frequency
+ * @wiphy: the struct wiphy to get the channel for
+ * @freq: the center frequency of the channel
  */
 static inline struct ieee80211_channel *
 ieee80211_get_channel(struct wiphy *wiphy, int freq)
@@ -1631,9 +1674,6 @@ struct ieee80211_radiotap_vendor_namespaces {
  * @is_radiotap_ns: indicates whether the current namespace is the default
  *	radiotap namespace or not
  *
- * @overrides: override standard radiotap fields
- * @n_overrides: number of overrides
- *
  * @_rtheader: pointer to the radiotap header we are walking through
  * @_max_length: length of radiotap header in cpu byte ordering
  * @_arg_index: next argument index
@@ -1949,10 +1989,12 @@ int cfg80211_wext_giwap(struct net_device *dev,
 void cfg80211_scan_done(struct cfg80211_scan_request *request, bool aborted);
 
 /**
- * cfg80211_inform_bss - inform cfg80211 of a new BSS
+ * cfg80211_inform_bss_frame - inform cfg80211 of a received BSS frame
  *
  * @wiphy: the wiphy reporting the BSS
- * @bss: the found BSS
+ * @channel: The channel the frame was received on
+ * @mgmt: the management frame (probe response or beacon)
+ * @len: length of the management frame
  * @signal: the signal strength, type depends on the wiphy's signal_type
  * @gfp: context flags
  *
@@ -1965,6 +2007,23 @@ cfg80211_inform_bss_frame(struct wiphy *wiphy,
 			  struct ieee80211_mgmt *mgmt, size_t len,
 			  s32 signal, gfp_t gfp);
 
+/**
+ * cfg80211_inform_bss - inform cfg80211 of a new BSS
+ *
+ * @wiphy: the wiphy reporting the BSS
+ * @channel: The channel the frame was received on
+ * @bssid: the BSSID of the BSS
+ * @timestamp: the TSF timestamp sent by the peer
+ * @capability: the capability field sent by the peer
+ * @beacon_interval: the beacon interval announced by the peer
+ * @ie: additional IEs sent by the peer
+ * @ielen: length of the additional IEs
+ * @signal: the signal strength, type depends on the wiphy's signal_type
+ * @gfp: context flags
+ *
+ * This informs cfg80211 that BSS information was found and
+ * the BSS should be updated/added.
+ */
 struct cfg80211_bss*
 cfg80211_inform_bss(struct wiphy *wiphy,
 		    struct ieee80211_channel *channel,
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index e3c1d479400..abb3b1a9ddc 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -313,9 +313,10 @@ enum mac80211_tx_control_flags {
 	IEEE80211_TX_INTFL_NL80211_FRAME_TX	= BIT(21),
 	IEEE80211_TX_CTL_LDPC			= BIT(22),
 	IEEE80211_TX_CTL_STBC			= BIT(23) | BIT(24),
-#define IEEE80211_TX_CTL_STBC_SHIFT		23
 };
 
+#define IEEE80211_TX_CTL_STBC_SHIFT		23
+
 /**
  * enum mac80211_rate_control_flags - per-rate flags set by the
  *	Rate Control algorithm.
@@ -813,7 +814,6 @@ enum ieee80211_key_flags {
  *	encrypted in hardware.
  * @alg: The key algorithm.
  * @flags: key flags, see &enum ieee80211_key_flags.
- * @ap_addr: AP's MAC address
  * @keyidx: the key index (0-3)
  * @keylen: key material length
  * @key: key material. For ALG_TKIP the key is encoded as a 256-bit (32 byte)
-- 
cgit v1.2.3-70-g09d2


From 5bfddbd46a95c978f4d3c992339cbdf4f4b790a3 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 8 Jun 2010 16:09:52 +0200
Subject: netfilter: nf_conntrack: IPS_UNTRACKED bit

NOTRACK makes all cpus share a cache line on nf_conntrack_untracked
twice per packet. This is bad for performance.
__read_mostly annotation is also a bad choice.

This patch introduces IPS_UNTRACKED bit so that we can use later a
per_cpu untrack structure more easily.

A new helper, nf_ct_untracked_get() returns a pointer to
nf_conntrack_untracked.

Another one, nf_ct_untracked_status_or() is used by nf_nat_init() to add
IPS_NAT_DONE_MASK bits to untracked status.

nf_ct_is_untracked() prototype is changed to work on a nf_conn pointer.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter/nf_conntrack_common.h  |  4 ++++
 include/net/netfilter/nf_conntrack.h           | 12 +++++++++---
 include/net/netfilter/nf_conntrack_core.h      |  2 +-
 net/ipv4/netfilter/nf_nat_core.c               |  2 +-
 net/ipv4/netfilter/nf_nat_standalone.c         |  2 +-
 net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c |  2 +-
 net/netfilter/nf_conntrack_core.c              | 11 ++++++++---
 net/netfilter/nf_conntrack_netlink.c           |  2 +-
 net/netfilter/xt_CT.c                          |  4 ++--
 net/netfilter/xt_NOTRACK.c                     |  2 +-
 net/netfilter/xt_TEE.c                         |  4 ++--
 net/netfilter/xt_cluster.c                     |  2 +-
 net/netfilter/xt_conntrack.c                   | 11 ++++++-----
 net/netfilter/xt_socket.c                      |  2 +-
 net/netfilter/xt_state.c                       | 14 ++++++++------
 15 files changed, 47 insertions(+), 29 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h
index 14e6d32002c..1afd18c855e 100644
--- a/include/linux/netfilter/nf_conntrack_common.h
+++ b/include/linux/netfilter/nf_conntrack_common.h
@@ -76,6 +76,10 @@ enum ip_conntrack_status {
 	/* Conntrack is a template */
 	IPS_TEMPLATE_BIT = 11,
 	IPS_TEMPLATE = (1 << IPS_TEMPLATE_BIT),
+
+	/* Conntrack is a fake untracked entry */
+	IPS_UNTRACKED_BIT = 12,
+	IPS_UNTRACKED = (1 << IPS_UNTRACKED_BIT),
 };
 
 /* Connection tracking event types */
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index bde095f7e84..3bc38c70bbb 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -261,7 +261,13 @@ extern s16 (*nf_ct_nat_offset)(const struct nf_conn *ct,
 			       u32 seq);
 
 /* Fake conntrack entry for untracked connections */
-extern struct nf_conn nf_conntrack_untracked;
+static inline struct nf_conn *nf_ct_untracked_get(void)
+{
+	extern struct nf_conn nf_conntrack_untracked;
+
+	return &nf_conntrack_untracked;
+}
+extern void nf_ct_untracked_status_or(unsigned long bits);
 
 /* Iterate over all conntracks: if iter returns true, it's deleted. */
 extern void
@@ -289,9 +295,9 @@ static inline int nf_ct_is_dying(struct nf_conn *ct)
 	return test_bit(IPS_DYING_BIT, &ct->status);
 }
 
-static inline int nf_ct_is_untracked(const struct sk_buff *skb)
+static inline int nf_ct_is_untracked(const struct nf_conn *ct)
 {
-	return (skb->nfct == &nf_conntrack_untracked.ct_general);
+	return test_bit(IPS_UNTRACKED_BIT, &ct->status);
 }
 
 extern int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp);
diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h
index 3d7524fba19..aced085132e 100644
--- a/include/net/netfilter/nf_conntrack_core.h
+++ b/include/net/netfilter/nf_conntrack_core.h
@@ -60,7 +60,7 @@ static inline int nf_conntrack_confirm(struct sk_buff *skb)
 	struct nf_conn *ct = (struct nf_conn *)skb->nfct;
 	int ret = NF_ACCEPT;
 
-	if (ct && ct != &nf_conntrack_untracked) {
+	if (ct && !nf_ct_is_untracked(ct)) {
 		if (!nf_ct_is_confirmed(ct))
 			ret = __nf_conntrack_confirm(skb);
 		if (likely(ret == NF_ACCEPT))
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index 4f8bddb760c..c7719b283ad 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -742,7 +742,7 @@ static int __init nf_nat_init(void)
 	spin_unlock_bh(&nf_nat_lock);
 
 	/* Initialize fake conntrack so that NAT will skip it */
-	nf_conntrack_untracked.status |= IPS_NAT_DONE_MASK;
+	nf_ct_untracked_status_or(IPS_NAT_DONE_MASK);
 
 	l3proto = nf_ct_l3proto_find_get((u_int16_t)AF_INET);
 
diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c
index beb25819c9c..6723c682250 100644
--- a/net/ipv4/netfilter/nf_nat_standalone.c
+++ b/net/ipv4/netfilter/nf_nat_standalone.c
@@ -98,7 +98,7 @@ nf_nat_fn(unsigned int hooknum,
 		return NF_ACCEPT;
 
 	/* Don't try to NAT if this packet is not conntracked */
-	if (ct == &nf_conntrack_untracked)
+	if (nf_ct_is_untracked(ct))
 		return NF_ACCEPT;
 
 	nat = nfct_nat(ct);
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index 9be81776415..1df3c8b6bf4 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -208,7 +208,7 @@ icmpv6_error(struct net *net, struct nf_conn *tmpl,
 	type = icmp6h->icmp6_type - 130;
 	if (type >= 0 && type < sizeof(noct_valid_new) &&
 	    noct_valid_new[type]) {
-		skb->nfct = &nf_conntrack_untracked.ct_general;
+		skb->nfct = &nf_ct_untracked_get()->ct_general;
 		skb->nfctinfo = IP_CT_NEW;
 		nf_conntrack_get(skb->nfct);
 		return NF_ACCEPT;
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index eeeb8bc7398..6c1da212380 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -62,7 +62,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_htable_size);
 unsigned int nf_conntrack_max __read_mostly;
 EXPORT_SYMBOL_GPL(nf_conntrack_max);
 
-struct nf_conn nf_conntrack_untracked __read_mostly;
+struct nf_conn nf_conntrack_untracked;
 EXPORT_SYMBOL_GPL(nf_conntrack_untracked);
 
 static int nf_conntrack_hash_rnd_initted;
@@ -1321,6 +1321,12 @@ EXPORT_SYMBOL_GPL(nf_conntrack_set_hashsize);
 module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint,
 		  &nf_conntrack_htable_size, 0600);
 
+void nf_ct_untracked_status_or(unsigned long bits)
+{
+	nf_conntrack_untracked.status |= bits;
+}
+EXPORT_SYMBOL_GPL(nf_ct_untracked_status_or);
+
 static int nf_conntrack_init_init_net(void)
 {
 	int max_factor = 8;
@@ -1368,8 +1374,7 @@ static int nf_conntrack_init_init_net(void)
 #endif
 	atomic_set(&nf_conntrack_untracked.ct_general.use, 1);
 	/*  - and look it like as a confirmed connection */
-	set_bit(IPS_CONFIRMED_BIT, &nf_conntrack_untracked.status);
-
+	nf_ct_untracked_status_or(IPS_CONFIRMED | IPS_UNTRACKED);
 	return 0;
 
 #ifdef CONFIG_NF_CONNTRACK_ZONES
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index c42ff6aa441..5bae1cd15ee 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -480,7 +480,7 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
 	int err;
 
 	/* ignore our fake conntrack entry */
-	if (ct == &nf_conntrack_untracked)
+	if (nf_ct_is_untracked(ct))
 		return 0;
 
 	if (events & (1 << IPCT_DESTROY)) {
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index 562bf3266e0..0cb6053f02f 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -67,7 +67,7 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par)
 		return -EINVAL;
 
 	if (info->flags & XT_CT_NOTRACK) {
-		ct = &nf_conntrack_untracked;
+		ct = nf_ct_untracked_get();
 		atomic_inc(&ct->ct_general.use);
 		goto out;
 	}
@@ -132,7 +132,7 @@ static void xt_ct_tg_destroy(const struct xt_tgdtor_param *par)
 	struct nf_conn *ct = info->ct;
 	struct nf_conn_help *help;
 
-	if (ct != &nf_conntrack_untracked) {
+	if (!nf_ct_is_untracked(ct)) {
 		help = nfct_help(ct);
 		if (help)
 			module_put(help->helper->me);
diff --git a/net/netfilter/xt_NOTRACK.c b/net/netfilter/xt_NOTRACK.c
index 512b9123252..9d782181b6c 100644
--- a/net/netfilter/xt_NOTRACK.c
+++ b/net/netfilter/xt_NOTRACK.c
@@ -23,7 +23,7 @@ notrack_tg(struct sk_buff *skb, const struct xt_action_param *par)
 	   If there is a real ct entry correspondig to this packet,
 	   it'll hang aroun till timing out. We don't deal with it
 	   for performance reasons. JK */
-	skb->nfct = &nf_conntrack_untracked.ct_general;
+	skb->nfct = &nf_ct_untracked_get()->ct_general;
 	skb->nfctinfo = IP_CT_NEW;
 	nf_conntrack_get(skb->nfct);
 
diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c
index 859d9fd429c..7a118267c4c 100644
--- a/net/netfilter/xt_TEE.c
+++ b/net/netfilter/xt_TEE.c
@@ -104,7 +104,7 @@ tee_tg4(struct sk_buff *skb, const struct xt_action_param *par)
 #ifdef WITH_CONNTRACK
 	/* Avoid counting cloned packets towards the original connection. */
 	nf_conntrack_put(skb->nfct);
-	skb->nfct     = &nf_conntrack_untracked.ct_general;
+	skb->nfct     = &nf_ct_untracked_get()->ct_general;
 	skb->nfctinfo = IP_CT_NEW;
 	nf_conntrack_get(skb->nfct);
 #endif
@@ -177,7 +177,7 @@ tee_tg6(struct sk_buff *skb, const struct xt_action_param *par)
 
 #ifdef WITH_CONNTRACK
 	nf_conntrack_put(skb->nfct);
-	skb->nfct     = &nf_conntrack_untracked.ct_general;
+	skb->nfct     = &nf_ct_untracked_get()->ct_general;
 	skb->nfctinfo = IP_CT_NEW;
 	nf_conntrack_get(skb->nfct);
 #endif
diff --git a/net/netfilter/xt_cluster.c b/net/netfilter/xt_cluster.c
index 30b95a1c1c8..f4af1bfafb1 100644
--- a/net/netfilter/xt_cluster.c
+++ b/net/netfilter/xt_cluster.c
@@ -120,7 +120,7 @@ xt_cluster_mt(const struct sk_buff *skb, struct xt_action_param *par)
 	if (ct == NULL)
 		return false;
 
-	if (ct == &nf_conntrack_untracked)
+	if (nf_ct_is_untracked(ct))
 		return false;
 
 	if (ct->master)
diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c
index 39681f10291..e536710ad91 100644
--- a/net/netfilter/xt_conntrack.c
+++ b/net/netfilter/xt_conntrack.c
@@ -123,11 +123,12 @@ conntrack_mt(const struct sk_buff *skb, struct xt_action_param *par,
 
 	ct = nf_ct_get(skb, &ctinfo);
 
-	if (ct == &nf_conntrack_untracked)
-		statebit = XT_CONNTRACK_STATE_UNTRACKED;
-	else if (ct != NULL)
-		statebit = XT_CONNTRACK_STATE_BIT(ctinfo);
-	else
+	if (ct) {
+		if (nf_ct_is_untracked(ct))
+			statebit = XT_CONNTRACK_STATE_UNTRACKED;
+		else
+			statebit = XT_CONNTRACK_STATE_BIT(ctinfo);
+	} else
 		statebit = XT_CONNTRACK_STATE_INVALID;
 
 	if (info->match_flags & XT_CONNTRACK_STATE) {
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index 3d54c236a1b..1ca89908cba 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -127,7 +127,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
 	 * reply packet of an established SNAT-ted connection. */
 
 	ct = nf_ct_get(skb, &ctinfo);
-	if (ct && (ct != &nf_conntrack_untracked) &&
+	if (ct && !nf_ct_is_untracked(ct) &&
 	    ((iph->protocol != IPPROTO_ICMP &&
 	      ctinfo == IP_CT_IS_REPLY + IP_CT_ESTABLISHED) ||
 	     (iph->protocol == IPPROTO_ICMP &&
diff --git a/net/netfilter/xt_state.c b/net/netfilter/xt_state.c
index e12e053d378..a507922d80c 100644
--- a/net/netfilter/xt_state.c
+++ b/net/netfilter/xt_state.c
@@ -26,14 +26,16 @@ state_mt(const struct sk_buff *skb, struct xt_action_param *par)
 	const struct xt_state_info *sinfo = par->matchinfo;
 	enum ip_conntrack_info ctinfo;
 	unsigned int statebit;
+	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
 
-	if (nf_ct_is_untracked(skb))
-		statebit = XT_STATE_UNTRACKED;
-	else if (!nf_ct_get(skb, &ctinfo))
+	if (!ct)
 		statebit = XT_STATE_INVALID;
-	else
-		statebit = XT_STATE_BIT(ctinfo);
-
+	else {
+		if (nf_ct_is_untracked(ct))
+			statebit = XT_STATE_UNTRACKED;
+		else
+			statebit = XT_STATE_BIT(ctinfo);
+	}
 	return (sinfo->statemask & statebit);
 }
 
-- 
cgit v1.2.3-70-g09d2


From 50a323b73069b169385a8ac65633dee837a7d13f Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 8 Jun 2010 21:40:36 +0200
Subject: sched: define and use CPU_PRI_* enums for cpu notifier priorities

Instead of hardcoding priority 10 and 20 in sched and perf, collect
them into CPU_PRI_* enums.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 include/linux/cpu.h        | 9 +++++++++
 include/linux/perf_event.h | 2 +-
 kernel/sched.c             | 2 +-
 3 files changed, 11 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index e287863ac05..2d9073883ea 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -48,6 +48,15 @@ extern ssize_t arch_cpu_release(const char *, size_t);
 #endif
 struct notifier_block;
 
+/*
+ * CPU notifier priorities.
+ */
+enum {
+	/* migration should happen before other stuff but after perf */
+	CPU_PRI_PERF		= 20,
+	CPU_PRI_MIGRATION	= 10,
+};
+
 #ifdef CONFIG_SMP
 /* Need to know about CPUs going up/down? */
 #if defined(CONFIG_HOTPLUG_CPU) || !defined(MODULE)
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 5d0266d9498..469e03e96fe 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1068,7 +1068,7 @@ static inline void perf_event_disable(struct perf_event *event)		{ }
 #define perf_cpu_notifier(fn)					\
 do {								\
 	static struct notifier_block fn##_nb __cpuinitdata =	\
-		{ .notifier_call = fn, .priority = 20 };	\
+		{ .notifier_call = fn, .priority = CPU_PRI_PERF }; \
 	fn(&fn##_nb, (unsigned long)CPU_UP_PREPARE,		\
 		(void *)(unsigned long)smp_processor_id());	\
 	fn(&fn##_nb, (unsigned long)CPU_STARTING,		\
diff --git a/kernel/sched.c b/kernel/sched.c
index f8b8996228d..552faf8d358 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -5801,7 +5801,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
  */
 static struct notifier_block __cpuinitdata migration_notifier = {
 	.notifier_call = migration_call,
-	.priority = 10
+	.priority = CPU_PRI_MIGRATION,
 };
 
 static int __init migration_init(void)
-- 
cgit v1.2.3-70-g09d2


From 3a101d0548e925ab16ca6aaa8cf4f767d322ddb0 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 8 Jun 2010 21:40:36 +0200
Subject: sched: adjust when cpu_active and cpuset configurations are updated
 during cpu on/offlining

Currently, when a cpu goes down, cpu_active is cleared before
CPU_DOWN_PREPARE starts and cpuset configuration is updated from a
default priority cpu notifier.  When a cpu is coming up, it's set
before CPU_ONLINE but cpuset configuration again is updated from the
same cpu notifier.

For cpu notifiers, this presents an inconsistent state.  Threads which
a CPU_DOWN_PREPARE notifier expects to be bound to the CPU can be
migrated to other cpus because the cpu is no more inactive.

Fix it by updating cpu_active in the highest priority cpu notifier and
cpuset configuration in the second highest when a cpu is coming up.
Down path is updated similarly.  This guarantees that all other cpu
notifiers see consistent cpu_active and cpuset configuration.

cpuset_track_online_cpus() notifier is converted to
cpuset_update_active_cpus() which just updates the configuration and
now called from cpuset_cpu_[in]active() notifiers registered from
sched_init_smp().  If cpuset is disabled, cpuset_update_active_cpus()
degenerates into partition_sched_domains() making separate notifier
for !CONFIG_CPUSETS unnecessary.

This problem is triggered by cmwq.  During CPU_DOWN_PREPARE, hotplug
callback creates a kthread and kthread_bind()s it to the target cpu,
and the thread is expected to run on that cpu.

* Ingo's test discovered __cpuinit/exit markups were incorrect.
  Fixed.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Paul Menage <menage@google.com>
---
 include/linux/cpu.h    | 16 ++++++++++++
 include/linux/cpuset.h |  6 +++++
 kernel/cpu.c           |  6 -----
 kernel/cpuset.c        | 21 ++--------------
 kernel/sched.c         | 67 +++++++++++++++++++++++++++++++++++++-------------
 5 files changed, 74 insertions(+), 42 deletions(-)

(limited to 'include')

diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 2d9073883ea..de6b1722cdc 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -52,6 +52,22 @@ struct notifier_block;
  * CPU notifier priorities.
  */
 enum {
+	/*
+	 * SCHED_ACTIVE marks a cpu which is coming up active during
+	 * CPU_ONLINE and CPU_DOWN_FAILED and must be the first
+	 * notifier.  CPUSET_ACTIVE adjusts cpuset according to
+	 * cpu_active mask right after SCHED_ACTIVE.  During
+	 * CPU_DOWN_PREPARE, SCHED_INACTIVE and CPUSET_INACTIVE are
+	 * ordered in the similar way.
+	 *
+	 * This ordering guarantees consistent cpu_active mask and
+	 * migration behavior to all cpu notifiers.
+	 */
+	CPU_PRI_SCHED_ACTIVE	= INT_MAX,
+	CPU_PRI_CPUSET_ACTIVE	= INT_MAX - 1,
+	CPU_PRI_SCHED_INACTIVE	= INT_MIN + 1,
+	CPU_PRI_CPUSET_INACTIVE	= INT_MIN,
+
 	/* migration should happen before other stuff but after perf */
 	CPU_PRI_PERF		= 20,
 	CPU_PRI_MIGRATION	= 10,
diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index 457ed765a11..f20eb8f1602 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -20,6 +20,7 @@ extern int number_of_cpusets;	/* How many cpusets are defined in system? */
 
 extern int cpuset_init(void);
 extern void cpuset_init_smp(void);
+extern void cpuset_update_active_cpus(void);
 extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask);
 extern int cpuset_cpus_allowed_fallback(struct task_struct *p);
 extern nodemask_t cpuset_mems_allowed(struct task_struct *p);
@@ -132,6 +133,11 @@ static inline void set_mems_allowed(nodemask_t nodemask)
 static inline int cpuset_init(void) { return 0; }
 static inline void cpuset_init_smp(void) {}
 
+static inline void cpuset_update_active_cpus(void)
+{
+	partition_sched_domains(1, NULL, NULL);
+}
+
 static inline void cpuset_cpus_allowed(struct task_struct *p,
 				       struct cpumask *mask)
 {
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 97d1b426a4a..f6e726f1849 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -235,11 +235,8 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
 		return -EINVAL;
 
 	cpu_hotplug_begin();
-	set_cpu_active(cpu, false);
 	err = __cpu_notify(CPU_DOWN_PREPARE | mod, hcpu, -1, &nr_calls);
 	if (err) {
-		set_cpu_active(cpu, true);
-
 		nr_calls--;
 		__cpu_notify(CPU_DOWN_FAILED | mod, hcpu, nr_calls, NULL);
 		printk("%s: attempt to take down CPU %u failed\n",
@@ -249,7 +246,6 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
 
 	err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu));
 	if (err) {
-		set_cpu_active(cpu, true);
 		/* CPU didn't die: tell everyone.  Can't complain. */
 		cpu_notify_nofail(CPU_DOWN_FAILED | mod, hcpu);
 
@@ -321,8 +317,6 @@ static int __cpuinit _cpu_up(unsigned int cpu, int tasks_frozen)
 		goto out_notify;
 	BUG_ON(!cpu_online(cpu));
 
-	set_cpu_active(cpu, true);
-
 	/* Now call notifier in preparation. */
 	cpu_notify(CPU_ONLINE | mod, hcpu);
 
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 02b9611eadd..05727dcaa80 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -2113,31 +2113,17 @@ static void scan_for_empty_cpusets(struct cpuset *root)
  * but making no active use of cpusets.
  *
  * This routine ensures that top_cpuset.cpus_allowed tracks
- * cpu_online_map on each CPU hotplug (cpuhp) event.
+ * cpu_active_mask on each CPU hotplug (cpuhp) event.
  *
  * Called within get_online_cpus().  Needs to call cgroup_lock()
  * before calling generate_sched_domains().
  */
-static int cpuset_track_online_cpus(struct notifier_block *unused_nb,
-				unsigned long phase, void *unused_cpu)
+void __cpuexit cpuset_update_active_cpus(void)
 {
 	struct sched_domain_attr *attr;
 	cpumask_var_t *doms;
 	int ndoms;
 
-	switch (phase) {
-	case CPU_ONLINE:
-	case CPU_ONLINE_FROZEN:
-	case CPU_DOWN_PREPARE:
-	case CPU_DOWN_PREPARE_FROZEN:
-	case CPU_DOWN_FAILED:
-	case CPU_DOWN_FAILED_FROZEN:
-		break;
-
-	default:
-		return NOTIFY_DONE;
-	}
-
 	cgroup_lock();
 	mutex_lock(&callback_mutex);
 	cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask);
@@ -2148,8 +2134,6 @@ static int cpuset_track_online_cpus(struct notifier_block *unused_nb,
 
 	/* Have scheduler rebuild the domains */
 	partition_sched_domains(ndoms, doms, attr);
-
-	return NOTIFY_OK;
 }
 
 #ifdef CONFIG_MEMORY_HOTPLUG
@@ -2203,7 +2187,6 @@ void __init cpuset_init_smp(void)
 	cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask);
 	top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
 
-	hotcpu_notifier(cpuset_track_online_cpus, 0);
 	hotplug_memory_notifier(cpuset_track_online_nodes, 10);
 
 	cpuset_wq = create_singlethread_workqueue("cpuset");
diff --git a/kernel/sched.c b/kernel/sched.c
index 552faf8d358..2b942e49d0f 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -5804,17 +5804,46 @@ static struct notifier_block __cpuinitdata migration_notifier = {
 	.priority = CPU_PRI_MIGRATION,
 };
 
+static int __cpuinit sched_cpu_active(struct notifier_block *nfb,
+				      unsigned long action, void *hcpu)
+{
+	switch (action & ~CPU_TASKS_FROZEN) {
+	case CPU_ONLINE:
+	case CPU_DOWN_FAILED:
+		set_cpu_active((long)hcpu, true);
+		return NOTIFY_OK;
+	default:
+		return NOTIFY_DONE;
+	}
+}
+
+static int __cpuinit sched_cpu_inactive(struct notifier_block *nfb,
+					unsigned long action, void *hcpu)
+{
+	switch (action & ~CPU_TASKS_FROZEN) {
+	case CPU_DOWN_PREPARE:
+		set_cpu_active((long)hcpu, false);
+		return NOTIFY_OK;
+	default:
+		return NOTIFY_DONE;
+	}
+}
+
 static int __init migration_init(void)
 {
 	void *cpu = (void *)(long)smp_processor_id();
 	int err;
 
-	/* Start one for the boot CPU: */
+	/* Initialize migration for the boot CPU */
 	err = migration_call(&migration_notifier, CPU_UP_PREPARE, cpu);
 	BUG_ON(err == NOTIFY_BAD);
 	migration_call(&migration_notifier, CPU_ONLINE, cpu);
 	register_cpu_notifier(&migration_notifier);
 
+	/* Register cpu active notifiers */
+	cpu_notifier(sched_cpu_active, CPU_PRI_SCHED_ACTIVE);
+	cpu_notifier(sched_cpu_inactive, CPU_PRI_SCHED_INACTIVE);
+
 	return 0;
 }
 early_initcall(migration_init);
@@ -7273,29 +7302,35 @@ int __init sched_create_sysfs_power_savings_entries(struct sysdev_class *cls)
 }
 #endif /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */
 
-#ifndef CONFIG_CPUSETS
 /*
- * Add online and remove offline CPUs from the scheduler domains.
- * When cpusets are enabled they take over this function.
+ * Update cpusets according to cpu_active mask.  If cpusets are
+ * disabled, cpuset_update_active_cpus() becomes a simple wrapper
+ * around partition_sched_domains().
  */
-static int update_sched_domains(struct notifier_block *nfb,
-				unsigned long action, void *hcpu)
+static int __cpuexit cpuset_cpu_active(struct notifier_block *nfb,
+				       unsigned long action, void *hcpu)
 {
-	switch (action) {
+	switch (action & ~CPU_TASKS_FROZEN) {
 	case CPU_ONLINE:
-	case CPU_ONLINE_FROZEN:
-	case CPU_DOWN_PREPARE:
-	case CPU_DOWN_PREPARE_FROZEN:
 	case CPU_DOWN_FAILED:
-	case CPU_DOWN_FAILED_FROZEN:
-		partition_sched_domains(1, NULL, NULL);
+		cpuset_update_active_cpus();
 		return NOTIFY_OK;
+	default:
+		return NOTIFY_DONE;
+	}
+}
 
+static int __cpuexit cpuset_cpu_inactive(struct notifier_block *nfb,
+					 unsigned long action, void *hcpu)
+{
+	switch (action & ~CPU_TASKS_FROZEN) {
+	case CPU_DOWN_PREPARE:
+		cpuset_update_active_cpus();
+		return NOTIFY_OK;
 	default:
 		return NOTIFY_DONE;
 	}
 }
-#endif
 
 static int update_runtime(struct notifier_block *nfb,
 				unsigned long action, void *hcpu)
@@ -7341,10 +7376,8 @@ void __init sched_init_smp(void)
 	mutex_unlock(&sched_domains_mutex);
 	put_online_cpus();
 
-#ifndef CONFIG_CPUSETS
-	/* XXX: Theoretical race here - CPU may be hotplugged now */
-	hotcpu_notifier(update_sched_domains, 0);
-#endif
+	hotcpu_notifier(cpuset_cpu_active, CPU_PRI_CPUSET_ACTIVE);
+	hotcpu_notifier(cpuset_cpu_inactive, CPU_PRI_CPUSET_INACTIVE);
 
 	/* RT runtime code needs to handle some hotplug events */
 	hotcpu_notifier(update_runtime, 0);
-- 
cgit v1.2.3-70-g09d2


From 21aa9af03d06cb1d19a3738e5cf12acff984e69b Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 8 Jun 2010 21:40:37 +0200
Subject: sched: add hooks for workqueue

Concurrency managed workqueue needs to know when workers are going to
sleep and waking up.  Using these two hooks, cmwq keeps track of the
current concurrency level and throttles execution of new works if it's
too high and wakes up another worker from the sleep hook if it becomes
too low.

This patch introduces PF_WQ_WORKER to identify workqueue workers and
adds the following two hooks.

* wq_worker_waking_up(): called when a worker is woken up.

* wq_worker_sleeping(): called when a worker is going to sleep and may
  return a pointer to a local task which should be woken up.  The
  returned task is woken up using try_to_wake_up_local() which is
  simplified ttwu which is called under rq lock and can only wake up
  local tasks.

Both hooks are currently defined as noop in kernel/workqueue_sched.h.
Later cmwq implementation will replace them with proper
implementation.

These hooks are hard coded as they'll always be enabled.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h    |  1 +
 kernel/fork.c            |  2 +-
 kernel/sched.c           | 53 ++++++++++++++++++++++++++++++++++++++++++++++--
 kernel/workqueue_sched.h | 16 +++++++++++++++
 4 files changed, 69 insertions(+), 3 deletions(-)
 create mode 100644 kernel/workqueue_sched.h

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index f118809c953..edc3dd168d8 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1696,6 +1696,7 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *
 #define PF_EXITING	0x00000004	/* getting shut down */
 #define PF_EXITPIDONE	0x00000008	/* pi exit done on shut down */
 #define PF_VCPU		0x00000010	/* I'm a virtual CPU */
+#define PF_WQ_WORKER	0x00000020	/* I'm a workqueue worker */
 #define PF_FORKNOEXEC	0x00000040	/* forked but didn't exec */
 #define PF_MCE_PROCESS  0x00000080      /* process policy on mce errors */
 #define PF_SUPERPRIV	0x00000100	/* used super-user privileges */
diff --git a/kernel/fork.c b/kernel/fork.c
index b6cce14ba04..a82a65cef74 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -907,7 +907,7 @@ static void copy_flags(unsigned long clone_flags, struct task_struct *p)
 {
 	unsigned long new_flags = p->flags;
 
-	new_flags &= ~PF_SUPERPRIV;
+	new_flags &= ~(PF_SUPERPRIV | PF_WQ_WORKER);
 	new_flags |= PF_FORKNOEXEC;
 	new_flags |= PF_STARTING;
 	p->flags = new_flags;
diff --git a/kernel/sched.c b/kernel/sched.c
index 96eafd5f345..edd5a54b95d 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -77,6 +77,7 @@
 #include <asm/irq_regs.h>
 
 #include "sched_cpupri.h"
+#include "workqueue_sched.h"
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/sched.h>
@@ -2306,6 +2307,9 @@ static inline void ttwu_post_activation(struct task_struct *p, struct rq *rq,
 		rq->idle_stamp = 0;
 	}
 #endif
+	/* if a worker is waking up, notify workqueue */
+	if ((p->flags & PF_WQ_WORKER) && success)
+		wq_worker_waking_up(p, cpu_of(rq));
 }
 
 /**
@@ -2413,6 +2417,37 @@ out:
 	return success;
 }
 
+/**
+ * try_to_wake_up_local - try to wake up a local task with rq lock held
+ * @p: the thread to be awakened
+ *
+ * Put @p on the run-queue if it's not alredy there.  The caller must
+ * ensure that this_rq() is locked, @p is bound to this_rq() and not
+ * the current task.  this_rq() stays locked over invocation.
+ */
+static void try_to_wake_up_local(struct task_struct *p)
+{
+	struct rq *rq = task_rq(p);
+	bool success = false;
+
+	BUG_ON(rq != this_rq());
+	BUG_ON(p == current);
+	lockdep_assert_held(&rq->lock);
+
+	if (!(p->state & TASK_NORMAL))
+		return;
+
+	if (!p->se.on_rq) {
+		if (likely(!task_running(rq, p))) {
+			schedstat_inc(rq, ttwu_count);
+			schedstat_inc(rq, ttwu_local);
+		}
+		ttwu_activate(p, rq, false, false, true, ENQUEUE_WAKEUP);
+		success = true;
+	}
+	ttwu_post_activation(p, rq, 0, success);
+}
+
 /**
  * wake_up_process - Wake up a specific process
  * @p: The process to be woken up.
@@ -3618,10 +3653,24 @@ need_resched_nonpreemptible:
 	clear_tsk_need_resched(prev);
 
 	if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {
-		if (unlikely(signal_pending_state(prev->state, prev)))
+		if (unlikely(signal_pending_state(prev->state, prev))) {
 			prev->state = TASK_RUNNING;
-		else
+		} else {
+			/*
+			 * If a worker is going to sleep, notify and
+			 * ask workqueue whether it wants to wake up a
+			 * task to maintain concurrency.  If so, wake
+			 * up the task.
+			 */
+			if (prev->flags & PF_WQ_WORKER) {
+				struct task_struct *to_wakeup;
+
+				to_wakeup = wq_worker_sleeping(prev, cpu);
+				if (to_wakeup)
+					try_to_wake_up_local(to_wakeup);
+			}
 			deactivate_task(rq, prev, DEQUEUE_SLEEP);
+		}
 		switch_count = &prev->nvcsw;
 	}
 
diff --git a/kernel/workqueue_sched.h b/kernel/workqueue_sched.h
new file mode 100644
index 00000000000..af040babb74
--- /dev/null
+++ b/kernel/workqueue_sched.h
@@ -0,0 +1,16 @@
+/*
+ * kernel/workqueue_sched.h
+ *
+ * Scheduler hooks for concurrency managed workqueue.  Only to be
+ * included from sched.c and workqueue.c.
+ */
+static inline void wq_worker_waking_up(struct task_struct *task,
+				       unsigned int cpu)
+{
+}
+
+static inline struct task_struct *wq_worker_sleeping(struct task_struct *task,
+						     unsigned int cpu)
+{
+	return NULL;
+}
-- 
cgit v1.2.3-70-g09d2


From b0f82b81fe6bbcf78d478071f33e44554726bc81 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Thu, 20 May 2010 07:47:21 +0200
Subject: perf: Drop the skip argument from perf_arch_fetch_regs_caller

Drop this argument now that we always want to rewind only to the
state of the first caller.
It means frame pointers are not necessary anymore to reliably get
the source of an event. But this also means we need this helper
to be a macro now, as an inline function is not an option since
we need to know when to provide a default implentation.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
Cc: David Miller <davem@davemloft.net>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 arch/powerpc/include/asm/perf_event.h | 12 ++++++++++++
 arch/powerpc/kernel/misc.S            | 26 --------------------------
 arch/sparc/include/asm/perf_event.h   |  8 ++++++++
 arch/sparc/kernel/helpers.S           |  6 +++---
 arch/x86/include/asm/perf_event.h     | 13 +++++++++++++
 arch/x86/include/asm/stacktrace.h     |  7 ++-----
 arch/x86/kernel/cpu/perf_event.c      | 16 ----------------
 include/linux/perf_event.h            | 32 +++++++-------------------------
 include/trace/ftrace.h                |  2 +-
 kernel/perf_event.c                   |  5 -----
 kernel/trace/trace_event_perf.c       |  2 --
 11 files changed, 46 insertions(+), 83 deletions(-)

(limited to 'include')

diff --git a/arch/powerpc/include/asm/perf_event.h b/arch/powerpc/include/asm/perf_event.h
index e6d4ce69b12..5c16b891d50 100644
--- a/arch/powerpc/include/asm/perf_event.h
+++ b/arch/powerpc/include/asm/perf_event.h
@@ -21,3 +21,15 @@
 #ifdef CONFIG_FSL_EMB_PERF_EVENT
 #include <asm/perf_event_fsl_emb.h>
 #endif
+
+#ifdef CONFIG_PERF_EVENTS
+#include <asm/ptrace.h>
+#include <asm/reg.h>
+
+#define perf_arch_fetch_caller_regs(regs, __ip)			\
+	do {							\
+		(regs)->nip = __ip;				\
+		(regs)->gpr[1] = *(unsigned long *)__get_SP();	\
+		asm volatile("mfmsr %0" : "=r" ((regs)->msr));	\
+	} while (0)
+#endif
diff --git a/arch/powerpc/kernel/misc.S b/arch/powerpc/kernel/misc.S
index 22e507c8a55..2d29752cbe1 100644
--- a/arch/powerpc/kernel/misc.S
+++ b/arch/powerpc/kernel/misc.S
@@ -127,29 +127,3 @@ _GLOBAL(__setup_cpu_power7)
 _GLOBAL(__restore_cpu_power7)
 	/* place holder */
 	blr
-
-/*
- * Get a minimal set of registers for our caller's nth caller.
- * r3 = regs pointer, r5 = n.
- *
- * We only get R1 (stack pointer), NIP (next instruction pointer)
- * and LR (link register).  These are all we can get in the
- * general case without doing complicated stack unwinding, but
- * fortunately they are enough to do a stack backtrace, which
- * is all we need them for.
- */
-_GLOBAL(perf_arch_fetch_caller_regs)
-	mr	r6,r1
-	cmpwi	r5,0
-	mflr	r4
-	ble	2f
-	mtctr	r5
-1:	PPC_LL	r6,0(r6)
-	bdnz	1b
-	PPC_LL	r4,PPC_LR_STKOFF(r6)
-2:	PPC_LL	r7,0(r6)
-	PPC_LL	r7,PPC_LR_STKOFF(r7)
-	PPC_STL	r6,GPR1-STACK_FRAME_OVERHEAD(r3)
-	PPC_STL	r4,_NIP-STACK_FRAME_OVERHEAD(r3)
-	PPC_STL	r7,_LINK-STACK_FRAME_OVERHEAD(r3)
-	blr
diff --git a/arch/sparc/include/asm/perf_event.h b/arch/sparc/include/asm/perf_event.h
index 7e2669894ce..74c4e0cd889 100644
--- a/arch/sparc/include/asm/perf_event.h
+++ b/arch/sparc/include/asm/perf_event.h
@@ -6,7 +6,15 @@ extern void set_perf_event_pending(void);
 #define	PERF_EVENT_INDEX_OFFSET	0
 
 #ifdef CONFIG_PERF_EVENTS
+#include <asm/ptrace.h>
+
 extern void init_hw_perf_events(void);
+
+extern void
+__perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip);
+
+#define perf_arch_fetch_caller_regs(pt_regs, ip)	\
+	__perf_arch_fetch_caller_regs(pt_regs, ip, 1);
 #else
 static inline void init_hw_perf_events(void)	{ }
 #endif
diff --git a/arch/sparc/kernel/helpers.S b/arch/sparc/kernel/helpers.S
index 92090cc9e82..682fee06a16 100644
--- a/arch/sparc/kernel/helpers.S
+++ b/arch/sparc/kernel/helpers.S
@@ -47,9 +47,9 @@ stack_trace_flush:
 	.size		stack_trace_flush,.-stack_trace_flush
 
 #ifdef CONFIG_PERF_EVENTS
-	.globl		perf_arch_fetch_caller_regs
-	.type		perf_arch_fetch_caller_regs,#function
-perf_arch_fetch_caller_regs:
+	.globl		__perf_arch_fetch_caller_regs
+	.type		__perf_arch_fetch_caller_regs,#function
+__perf_arch_fetch_caller_regs:
 	/* We always read the %pstate into %o5 since we will use
 	 * that to construct a fake %tstate to store into the regs.
 	 */
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 254883d0c7e..02de29830ff 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -140,6 +140,19 @@ extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
 extern unsigned long perf_misc_flags(struct pt_regs *regs);
 #define perf_misc_flags(regs)	perf_misc_flags(regs)
 
+#include <asm/stacktrace.h>
+
+/*
+ * We abuse bit 3 from flags to pass exact information, see perf_misc_flags
+ * and the comment with PERF_EFLAGS_EXACT.
+ */
+#define perf_arch_fetch_caller_regs(regs, __ip)		{	\
+	(regs)->ip = (__ip);					\
+	(regs)->bp = caller_frame_pointer();			\
+	(regs)->cs = __KERNEL_CS;				\
+	regs->flags = 0;					\
+}
+
 #else
 static inline void init_hw_perf_events(void)		{ }
 static inline void perf_events_lapic_init(void)	{ }
diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h
index a957463d3c7..2b16a2ad23d 100644
--- a/arch/x86/include/asm/stacktrace.h
+++ b/arch/x86/include/asm/stacktrace.h
@@ -78,17 +78,14 @@ struct stack_frame_ia32 {
     u32 return_address;
 };
 
-static inline unsigned long rewind_frame_pointer(int n)
+static inline unsigned long caller_frame_pointer(void)
 {
 	struct stack_frame *frame;
 
 	get_bp(frame);
 
 #ifdef CONFIG_FRAME_POINTER
-	while (n--) {
-		if (probe_kernel_address(&frame->next_frame, frame))
-			break;
-	}
+	frame = frame->next_frame;
 #endif
 
 	return (unsigned long)frame;
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 9632fb61e8f..2c075fe573d 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1706,22 +1706,6 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
 	return entry;
 }
 
-void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip)
-{
-	regs->ip = ip;
-	/*
-	 * perf_arch_fetch_caller_regs adds another call, we need to increment
-	 * the skip level
-	 */
-	regs->bp = rewind_frame_pointer(skip + 1);
-	regs->cs = __KERNEL_CS;
-	/*
-	 * We abuse bit 3 to pass exact information, see perf_misc_flags
-	 * and the comment with PERF_EFLAGS_EXACT.
-	 */
-	regs->flags = 0;
-}
-
 unsigned long perf_instruction_pointer(struct pt_regs *regs)
 {
 	unsigned long ip;
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index fb6c91eac7e..bea785cef49 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -905,8 +905,10 @@ extern atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX];
 
 extern void __perf_sw_event(u32, u64, int, struct pt_regs *, u64);
 
-extern void
-perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip);
+#ifndef perf_arch_fetch_caller_regs
+static inline void
+perf_arch_fetch_caller_regs(struct regs *regs, unsigned long ip) { }
+#endif
 
 /*
  * Take a snapshot of the regs. Skip ip and frame pointer to
@@ -916,31 +918,11 @@ perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip);
  * - bp for callchains
  * - eflags, for future purposes, just in case
  */
-static inline void perf_fetch_caller_regs(struct pt_regs *regs, int skip)
+static inline void perf_fetch_caller_regs(struct pt_regs *regs)
 {
-	unsigned long ip;
-
 	memset(regs, 0, sizeof(*regs));
 
-	switch (skip) {
-	case 1 :
-		ip = CALLER_ADDR0;
-		break;
-	case 2 :
-		ip = CALLER_ADDR1;
-		break;
-	case 3 :
-		ip = CALLER_ADDR2;
-		break;
-	case 4:
-		ip = CALLER_ADDR3;
-		break;
-	/* No need to support further for now */
-	default:
-		ip = 0;
-	}
-
-	return perf_arch_fetch_caller_regs(regs, ip, skip);
+	perf_arch_fetch_caller_regs(regs, CALLER_ADDR0);
 }
 
 static inline void
@@ -950,7 +932,7 @@ perf_sw_event(u32 event_id, u64 nr, int nmi, struct pt_regs *regs, u64 addr)
 		struct pt_regs hot_regs;
 
 		if (!regs) {
-			perf_fetch_caller_regs(&hot_regs, 1);
+			perf_fetch_caller_regs(&hot_regs);
 			regs = &hot_regs;
 		}
 		__perf_sw_event(event_id, nr, nmi, regs, addr);
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 3d685d1f2a0..8ee8b6e6b25 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -705,7 +705,7 @@ perf_trace_##call(void *__data, proto)					\
 	int __data_size;						\
 	int rctx;							\
 									\
-	perf_fetch_caller_regs(&__regs, 1);				\
+	perf_fetch_caller_regs(&__regs);				\
 									\
 	__data_size = ftrace_get_offsets_##call(&__data_offsets, args); \
 	__entry_size = ALIGN(__data_size + sizeof(*entry) + sizeof(u32),\
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index e099650cd24..9ae4dbcdf46 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -2851,11 +2851,6 @@ __weak struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
 	return NULL;
 }
 
-__weak
-void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip)
-{
-}
-
 
 /*
  * We assume there is only KVM supporting the callbacks.
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index cb6f365016e..21db1d3a48d 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -9,8 +9,6 @@
 #include <linux/kprobes.h>
 #include "trace.h"
 
-EXPORT_SYMBOL_GPL(perf_arch_fetch_caller_regs);
-
 static char *perf_trace_buf[4];
 
 /*
-- 
cgit v1.2.3-70-g09d2


From 30dbb20e68e6f7df974b77d2350ebad5eb6f6c9e Mon Sep 17 00:00:00 2001
From: Américo Wang <xiyou.wangcong@gmail.com>
Date: Wed, 26 May 2010 18:57:53 +0800
Subject: tracing: Remove boot tracer

The boot tracer is useless. It simply logs the initcalls
but in fact these initcalls are also logged through printk
while using the initcall_debug kernel parameter.

Nobody seem to be using it so far. Then just remove it.

Signed-off-by: WANG Cong <xiyou.wangcong@gmail.com>
Cc: Chase Douglas <chase.douglas@canonical.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Li Zefan <lizf@cn.fujitsu.com>
LKML-Reference: <20100526105753.GA5677@cr0.nay.redhat.com>
[ remove the hooks in main.c, and the headers ]
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 include/trace/boot.h         |  60 --------------
 init/main.c                  |  27 +++----
 kernel/trace/Kconfig         |  17 ----
 kernel/trace/Makefile        |   1 -
 kernel/trace/trace.c         |   3 -
 kernel/trace/trace.h         |   8 --
 kernel/trace/trace_boot.c    | 185 -------------------------------------------
 kernel/trace/trace_entries.h |  27 -------
 8 files changed, 10 insertions(+), 318 deletions(-)
 delete mode 100644 include/trace/boot.h
 delete mode 100644 kernel/trace/trace_boot.c

(limited to 'include')

diff --git a/include/trace/boot.h b/include/trace/boot.h
deleted file mode 100644
index 088ea089e31..00000000000
--- a/include/trace/boot.h
+++ /dev/null
@@ -1,60 +0,0 @@
-#ifndef _LINUX_TRACE_BOOT_H
-#define _LINUX_TRACE_BOOT_H
-
-#include <linux/module.h>
-#include <linux/kallsyms.h>
-#include <linux/init.h>
-
-/*
- * Structure which defines the trace of an initcall
- * while it is called.
- * You don't have to fill the func field since it is
- * only used internally by the tracer.
- */
-struct boot_trace_call {
-	pid_t			caller;
-	char			func[KSYM_SYMBOL_LEN];
-};
-
-/*
- * Structure which defines the trace of an initcall
- * while it returns.
- */
-struct boot_trace_ret {
-	char			func[KSYM_SYMBOL_LEN];
-	int				result;
-	unsigned long long	duration;		/* nsecs */
-};
-
-#ifdef CONFIG_BOOT_TRACER
-/* Append the traces on the ring-buffer */
-extern void trace_boot_call(struct boot_trace_call *bt, initcall_t fn);
-extern void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn);
-
-/* Tells the tracer that smp_pre_initcall is finished.
- * So we can start the tracing
- */
-extern void start_boot_trace(void);
-
-/* Resume the tracing of other necessary events
- * such as sched switches
- */
-extern void enable_boot_trace(void);
-
-/* Suspend this tracing. Actually, only sched_switches tracing have
- * to be suspended. Initcalls doesn't need it.)
- */
-extern void disable_boot_trace(void);
-#else
-static inline
-void trace_boot_call(struct boot_trace_call *bt, initcall_t fn) { }
-
-static inline
-void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn) { }
-
-static inline void start_boot_trace(void) { }
-static inline void enable_boot_trace(void) { }
-static inline void disable_boot_trace(void) { }
-#endif /* CONFIG_BOOT_TRACER */
-
-#endif /* __LINUX_TRACE_BOOT_H */
diff --git a/init/main.c b/init/main.c
index 3bdb152f412..94f65efdc65 100644
--- a/init/main.c
+++ b/init/main.c
@@ -70,7 +70,6 @@
 #include <linux/sfi.h>
 #include <linux/shmem_fs.h>
 #include <linux/slab.h>
-#include <trace/boot.h>
 
 #include <asm/io.h>
 #include <asm/bugs.h>
@@ -715,38 +714,33 @@ int initcall_debug;
 core_param(initcall_debug, initcall_debug, bool, 0644);
 
 static char msgbuf[64];
-static struct boot_trace_call call;
-static struct boot_trace_ret ret;
 
 int do_one_initcall(initcall_t fn)
 {
 	int count = preempt_count();
 	ktime_t calltime, delta, rettime;
+	unsigned long long duration;
+	int ret;
 
 	if (initcall_debug) {
-		call.caller = task_pid_nr(current);
-		printk("calling  %pF @ %i\n", fn, call.caller);
+		printk("calling  %pF @ %i\n", fn, task_pid_nr(current));
 		calltime = ktime_get();
-		trace_boot_call(&call, fn);
-		enable_boot_trace();
 	}
 
-	ret.result = fn();
+	ret = fn();
 
 	if (initcall_debug) {
-		disable_boot_trace();
 		rettime = ktime_get();
 		delta = ktime_sub(rettime, calltime);
-		ret.duration = (unsigned long long) ktime_to_ns(delta) >> 10;
-		trace_boot_ret(&ret, fn);
-		printk("initcall %pF returned %d after %Ld usecs\n", fn,
-			ret.result, ret.duration);
+		duration = (unsigned long long) ktime_to_ns(delta) >> 10;
+		printk("initcall %pF returned %d after %lld usecs\n", fn,
+			ret, duration);
 	}
 
 	msgbuf[0] = 0;
 
-	if (ret.result && ret.result != -ENODEV && initcall_debug)
-		sprintf(msgbuf, "error code %d ", ret.result);
+	if (ret && ret != -ENODEV && initcall_debug)
+		sprintf(msgbuf, "error code %d ", ret);
 
 	if (preempt_count() != count) {
 		strlcat(msgbuf, "preemption imbalance ", sizeof(msgbuf));
@@ -760,7 +754,7 @@ int do_one_initcall(initcall_t fn)
 		printk("initcall %pF returned with %s\n", fn, msgbuf);
 	}
 
-	return ret.result;
+	return ret;
 }
 
 
@@ -880,7 +874,6 @@ static int __init kernel_init(void * unused)
 	smp_prepare_cpus(setup_max_cpus);
 
 	do_pre_smp_initcalls();
-	start_boot_trace();
 
 	smp_init();
 	sched_init_smp();
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 8b1797c4545..572992abc71 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -229,23 +229,6 @@ config FTRACE_SYSCALLS
 	help
 	  Basic tracer to catch the syscall entry and exit events.
 
-config BOOT_TRACER
-	bool "Trace boot initcalls"
-	select GENERIC_TRACER
-	select CONTEXT_SWITCH_TRACER
-	help
-	  This tracer helps developers to optimize boot times: it records
-	  the timings of the initcalls and traces key events and the identity
-	  of tasks that can cause boot delays, such as context-switches.
-
-	  Its aim is to be parsed by the scripts/bootgraph.pl tool to
-	  produce pretty graphics about boot inefficiencies, giving a visual
-	  representation of the delays during initcalls - but the raw
-	  /debug/tracing/trace text output is readable too.
-
-	  You must pass in initcall_debug and ftrace=initcall to the kernel
-	  command line to enable this on bootup.
-
 config TRACE_BRANCH_PROFILING
 	bool
 	select GENERIC_TRACER
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index ffb1a5b0550..c3aaeba8237 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -38,7 +38,6 @@ obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o
 obj-$(CONFIG_NOP_TRACER) += trace_nop.o
 obj-$(CONFIG_STACK_TRACER) += trace_stack.o
 obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o
-obj-$(CONFIG_BOOT_TRACER) += trace_boot.o
 obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += trace_functions_graph.o
 obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o
 obj-$(CONFIG_KMEMTRACE) += kmemtrace.o
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 55e48511d7c..036fbc22858 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -4603,9 +4603,6 @@ __init static int tracer_alloc_buffers(void)
 
 	register_tracer(&nop_trace);
 	current_trace = &nop_trace;
-#ifdef CONFIG_BOOT_TRACER
-	register_tracer(&boot_tracer);
-#endif
 	/* All seems OK, enable tracing */
 	tracing_disabled = 0;
 
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 2cd96399463..75a5e800a73 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -9,10 +9,8 @@
 #include <linux/mmiotrace.h>
 #include <linux/tracepoint.h>
 #include <linux/ftrace.h>
-#include <trace/boot.h>
 #include <linux/kmemtrace.h>
 #include <linux/hw_breakpoint.h>
-
 #include <linux/trace_seq.h>
 #include <linux/ftrace_event.h>
 
@@ -29,8 +27,6 @@ enum trace_type {
 	TRACE_MMIO_RW,
 	TRACE_MMIO_MAP,
 	TRACE_BRANCH,
-	TRACE_BOOT_CALL,
-	TRACE_BOOT_RET,
 	TRACE_GRAPH_RET,
 	TRACE_GRAPH_ENT,
 	TRACE_USER_STACK,
@@ -48,8 +44,6 @@ enum kmemtrace_type_id {
 	KMEMTRACE_TYPE_PAGES,		/* __get_free_pages() and friends. */
 };
 
-extern struct tracer boot_tracer;
-
 #undef __field
 #define __field(type, item)		type	item;
 
@@ -209,8 +203,6 @@ extern void __ftrace_bad_type(void);
 			  TRACE_MMIO_RW);				\
 		IF_ASSIGN(var, ent, struct trace_mmiotrace_map,		\
 			  TRACE_MMIO_MAP);				\
-		IF_ASSIGN(var, ent, struct trace_boot_call, TRACE_BOOT_CALL);\
-		IF_ASSIGN(var, ent, struct trace_boot_ret, TRACE_BOOT_RET);\
 		IF_ASSIGN(var, ent, struct trace_branch, TRACE_BRANCH); \
 		IF_ASSIGN(var, ent, struct ftrace_graph_ent_entry,	\
 			  TRACE_GRAPH_ENT);		\
diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c
deleted file mode 100644
index c21d5f3956a..00000000000
--- a/kernel/trace/trace_boot.c
+++ /dev/null
@@ -1,185 +0,0 @@
-/*
- * ring buffer based initcalls tracer
- *
- * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com>
- *
- */
-
-#include <linux/init.h>
-#include <linux/debugfs.h>
-#include <linux/ftrace.h>
-#include <linux/kallsyms.h>
-#include <linux/time.h>
-
-#include "trace.h"
-#include "trace_output.h"
-
-static struct trace_array *boot_trace;
-static bool pre_initcalls_finished;
-
-/* Tells the boot tracer that the pre_smp_initcalls are finished.
- * So we are ready .
- * It doesn't enable sched events tracing however.
- * You have to call enable_boot_trace to do so.
- */
-void start_boot_trace(void)
-{
-	pre_initcalls_finished = true;
-}
-
-void enable_boot_trace(void)
-{
-	if (boot_trace && pre_initcalls_finished)
-		tracing_start_sched_switch_record();
-}
-
-void disable_boot_trace(void)
-{
-	if (boot_trace && pre_initcalls_finished)
-		tracing_stop_sched_switch_record();
-}
-
-static int boot_trace_init(struct trace_array *tr)
-{
-	boot_trace = tr;
-
-	if (!tr)
-		return 0;
-
-	tracing_reset_online_cpus(tr);
-
-	tracing_sched_switch_assign_trace(tr);
-	return 0;
-}
-
-static enum print_line_t
-initcall_call_print_line(struct trace_iterator *iter)
-{
-	struct trace_entry *entry = iter->ent;
-	struct trace_seq *s = &iter->seq;
-	struct trace_boot_call *field;
-	struct boot_trace_call *call;
-	u64 ts;
-	unsigned long nsec_rem;
-	int ret;
-
-	trace_assign_type(field, entry);
-	call = &field->boot_call;
-	ts = iter->ts;
-	nsec_rem = do_div(ts, NSEC_PER_SEC);
-
-	ret = trace_seq_printf(s, "[%5ld.%09ld] calling  %s @ %i\n",
-			(unsigned long)ts, nsec_rem, call->func, call->caller);
-
-	if (!ret)
-		return TRACE_TYPE_PARTIAL_LINE;
-	else
-		return TRACE_TYPE_HANDLED;
-}
-
-static enum print_line_t
-initcall_ret_print_line(struct trace_iterator *iter)
-{
-	struct trace_entry *entry = iter->ent;
-	struct trace_seq *s = &iter->seq;
-	struct trace_boot_ret *field;
-	struct boot_trace_ret *init_ret;
-	u64 ts;
-	unsigned long nsec_rem;
-	int ret;
-
-	trace_assign_type(field, entry);
-	init_ret = &field->boot_ret;
-	ts = iter->ts;
-	nsec_rem = do_div(ts, NSEC_PER_SEC);
-
-	ret = trace_seq_printf(s, "[%5ld.%09ld] initcall %s "
-			"returned %d after %llu msecs\n",
-			(unsigned long) ts,
-			nsec_rem,
-			init_ret->func, init_ret->result, init_ret->duration);
-
-	if (!ret)
-		return TRACE_TYPE_PARTIAL_LINE;
-	else
-		return TRACE_TYPE_HANDLED;
-}
-
-static enum print_line_t initcall_print_line(struct trace_iterator *iter)
-{
-	struct trace_entry *entry = iter->ent;
-
-	switch (entry->type) {
-	case TRACE_BOOT_CALL:
-		return initcall_call_print_line(iter);
-	case TRACE_BOOT_RET:
-		return initcall_ret_print_line(iter);
-	default:
-		return TRACE_TYPE_UNHANDLED;
-	}
-}
-
-struct tracer boot_tracer __read_mostly =
-{
-	.name		= "initcall",
-	.init		= boot_trace_init,
-	.reset		= tracing_reset_online_cpus,
-	.print_line	= initcall_print_line,
-};
-
-void trace_boot_call(struct boot_trace_call *bt, initcall_t fn)
-{
-	struct ftrace_event_call *call = &event_boot_call;
-	struct ring_buffer_event *event;
-	struct ring_buffer *buffer;
-	struct trace_boot_call *entry;
-	struct trace_array *tr = boot_trace;
-
-	if (!tr || !pre_initcalls_finished)
-		return;
-
-	/* Get its name now since this function could
-	 * disappear because it is in the .init section.
-	 */
-	sprint_symbol(bt->func, (unsigned long)fn);
-	preempt_disable();
-
-	buffer = tr->buffer;
-	event = trace_buffer_lock_reserve(buffer, TRACE_BOOT_CALL,
-					  sizeof(*entry), 0, 0);
-	if (!event)
-		goto out;
-	entry	= ring_buffer_event_data(event);
-	entry->boot_call = *bt;
-	if (!filter_check_discard(call, entry, buffer, event))
-		trace_buffer_unlock_commit(buffer, event, 0, 0);
- out:
-	preempt_enable();
-}
-
-void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn)
-{
-	struct ftrace_event_call *call = &event_boot_ret;
-	struct ring_buffer_event *event;
-	struct ring_buffer *buffer;
-	struct trace_boot_ret *entry;
-	struct trace_array *tr = boot_trace;
-
-	if (!tr || !pre_initcalls_finished)
-		return;
-
-	sprint_symbol(bt->func, (unsigned long)fn);
-	preempt_disable();
-
-	buffer = tr->buffer;
-	event = trace_buffer_lock_reserve(buffer, TRACE_BOOT_RET,
-					  sizeof(*entry), 0, 0);
-	if (!event)
-		goto out;
-	entry	= ring_buffer_event_data(event);
-	entry->boot_ret = *bt;
-	if (!filter_check_discard(call, entry, buffer, event))
-		trace_buffer_unlock_commit(buffer, event, 0, 0);
- out:
-	preempt_enable();
-}
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
index dc008c1240d..c293364c984 100644
--- a/kernel/trace/trace_entries.h
+++ b/kernel/trace/trace_entries.h
@@ -271,33 +271,6 @@ FTRACE_ENTRY(mmiotrace_map, trace_mmiotrace_map,
 		 __entry->map_id, __entry->opcode)
 );
 
-FTRACE_ENTRY(boot_call, trace_boot_call,
-
-	TRACE_BOOT_CALL,
-
-	F_STRUCT(
-		__field_struct(	struct boot_trace_call,	boot_call	)
-		__field_desc(	pid_t,	boot_call,	caller		)
-		__array_desc(	char,	boot_call,	func,	KSYM_SYMBOL_LEN)
-	),
-
-	F_printk("%d  %s", __entry->caller, __entry->func)
-);
-
-FTRACE_ENTRY(boot_ret, trace_boot_ret,
-
-	TRACE_BOOT_RET,
-
-	F_STRUCT(
-		__field_struct(	struct boot_trace_ret,	boot_ret	)
-		__array_desc(	char,	boot_ret,	func,	KSYM_SYMBOL_LEN)
-		__field_desc(	int,	boot_ret,	result		)
-		__field_desc(	unsigned long, boot_ret, duration	)
-	),
-
-	F_printk("%s %d %lx",
-		 __entry->func, __entry->result, __entry->duration)
-);
 
 #define TRACE_FUNC_SIZE 30
 #define TRACE_FILE_SIZE 20
-- 
cgit v1.2.3-70-g09d2


From c676329abb2b8359d9a5d734dec0c81779823fd6 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 25 May 2010 10:48:51 +0200
Subject: sched_clock: Add local_clock() API and improve documentation

For people who otherwise get to write: cpu_clock(smp_processor_id()),
there is now: local_clock().

Also, as per suggestion from Andrew, provide some documentation on
the various clock interfaces, and minimize the unsigned long long vs
u64 mess.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Jens Axboe <jaxboe@fusionio.com>
LKML-Reference: <1275052414.1645.52.camel@laptop>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/parisc/kernel/ftrace.c |  4 +-
 include/linux/sched.h       | 37 ++++++++++--------
 kernel/lockdep.c            |  2 +-
 kernel/perf_event.c         |  2 +-
 kernel/rcutorture.c         |  3 +-
 kernel/sched.c              |  2 +-
 kernel/sched_clock.c        | 95 ++++++++++++++++++++++++++++++++++++++++-----
 kernel/trace/trace_clock.c  |  2 +-
 8 files changed, 113 insertions(+), 34 deletions(-)

(limited to 'include')

diff --git a/arch/parisc/kernel/ftrace.c b/arch/parisc/kernel/ftrace.c
index 9877372ffdb..5beb97bafbb 100644
--- a/arch/parisc/kernel/ftrace.c
+++ b/arch/parisc/kernel/ftrace.c
@@ -82,7 +82,7 @@ unsigned long ftrace_return_to_handler(unsigned long retval0,
 	unsigned long ret;
 
 	pop_return_trace(&trace, &ret);
-	trace.rettime = cpu_clock(raw_smp_processor_id());
+	trace.rettime = local_clock();
 	ftrace_graph_return(&trace);
 
 	if (unlikely(!ret)) {
@@ -126,7 +126,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
 		return;
 	}
 
-	calltime = cpu_clock(raw_smp_processor_id());
+	calltime = local_clock();
 
 	if (push_return_trace(old, calltime,
 				self_addr, &trace.depth) == -EBUSY) {
diff --git a/include/linux/sched.h b/include/linux/sched.h
index edc3dd168d8..c2d4316a04b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1791,20 +1791,23 @@ static inline int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask)
 #endif
 
 /*
- * Architectures can set this to 1 if they have specified
- * CONFIG_HAVE_UNSTABLE_SCHED_CLOCK in their arch Kconfig,
- * but then during bootup it turns out that sched_clock()
- * is reliable after all:
+ * Do not use outside of architecture code which knows its limitations.
+ *
+ * sched_clock() has no promise of monotonicity or bounded drift between
+ * CPUs, use (which you should not) requires disabling IRQs.
+ *
+ * Please use one of the three interfaces below.
  */
-#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
-extern int sched_clock_stable;
-#endif
-
-/* ftrace calls sched_clock() directly */
 extern unsigned long long notrace sched_clock(void);
+/*
+ * See the comment in kernel/sched_clock.c
+ */
+extern u64 cpu_clock(int cpu);
+extern u64 local_clock(void);
+extern u64 sched_clock_cpu(int cpu);
+
 
 extern void sched_clock_init(void);
-extern u64 sched_clock_cpu(int cpu);
 
 #ifndef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
 static inline void sched_clock_tick(void)
@@ -1819,17 +1822,19 @@ static inline void sched_clock_idle_wakeup_event(u64 delta_ns)
 {
 }
 #else
+/*
+ * Architectures can set this to 1 if they have specified
+ * CONFIG_HAVE_UNSTABLE_SCHED_CLOCK in their arch Kconfig,
+ * but then during bootup it turns out that sched_clock()
+ * is reliable after all:
+ */
+extern int sched_clock_stable;
+
 extern void sched_clock_tick(void);
 extern void sched_clock_idle_sleep_event(void);
 extern void sched_clock_idle_wakeup_event(u64 delta_ns);
 #endif
 
-/*
- * For kernel-internal use: high-speed (but slightly incorrect) per-cpu
- * clock constructed from sched_clock():
- */
-extern unsigned long long cpu_clock(int cpu);
-
 extern unsigned long long
 task_sched_runtime(struct task_struct *task);
 extern unsigned long long thread_group_sched_runtime(struct task_struct *task);
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 54286798c37..f2852a51023 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -146,7 +146,7 @@ static DEFINE_PER_CPU(struct lock_class_stats[MAX_LOCKDEP_KEYS],
 
 static inline u64 lockstat_clock(void)
 {
-	return cpu_clock(smp_processor_id());
+	return local_clock();
 }
 
 static int lock_point(unsigned long points[], unsigned long ip)
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 31d6afe9259..109c5ec8893 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -214,7 +214,7 @@ static void perf_unpin_context(struct perf_event_context *ctx)
 
 static inline u64 perf_clock(void)
 {
-	return cpu_clock(raw_smp_processor_id());
+	return local_clock();
 }
 
 /*
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index 6535ac8bc6a..2e2726d790b 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -239,8 +239,7 @@ static unsigned long
 rcu_random(struct rcu_random_state *rrsp)
 {
 	if (--rrsp->rrs_count < 0) {
-		rrsp->rrs_state +=
-			(unsigned long)cpu_clock(raw_smp_processor_id());
+		rrsp->rrs_state += (unsigned long)local_clock();
 		rrsp->rrs_count = RCU_RANDOM_REFRESH;
 	}
 	rrsp->rrs_state = rrsp->rrs_state * RCU_RANDOM_MULT + RCU_RANDOM_ADD;
diff --git a/kernel/sched.c b/kernel/sched.c
index 8f351c56567..3abd8f780da 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1647,7 +1647,7 @@ static void update_shares(struct sched_domain *sd)
 	if (root_task_group_empty())
 		return;
 
-	now = cpu_clock(raw_smp_processor_id());
+	now = local_clock();
 	elapsed = now - sd->last_update;
 
 	if (elapsed >= (s64)(u64)sysctl_sched_shares_ratelimit) {
diff --git a/kernel/sched_clock.c b/kernel/sched_clock.c
index 906a0f718cb..52f1a149bfb 100644
--- a/kernel/sched_clock.c
+++ b/kernel/sched_clock.c
@@ -10,19 +10,55 @@
  *   Ingo Molnar <mingo@redhat.com>
  *   Guillaume Chazarain <guichaz@gmail.com>
  *
- * Create a semi stable clock from a mixture of other events, including:
- *  - gtod
+ *
+ * What:
+ *
+ * cpu_clock(i) provides a fast (execution time) high resolution
+ * clock with bounded drift between CPUs. The value of cpu_clock(i)
+ * is monotonic for constant i. The timestamp returned is in nanoseconds.
+ *
+ * ######################### BIG FAT WARNING ##########################
+ * # when comparing cpu_clock(i) to cpu_clock(j) for i != j, time can #
+ * # go backwards !!                                                  #
+ * ####################################################################
+ *
+ * There is no strict promise about the base, although it tends to start
+ * at 0 on boot (but people really shouldn't rely on that).
+ *
+ * cpu_clock(i)       -- can be used from any context, including NMI.
+ * sched_clock_cpu(i) -- must be used with local IRQs disabled (implied by NMI)
+ * local_clock()      -- is cpu_clock() on the current cpu.
+ *
+ * How:
+ *
+ * The implementation either uses sched_clock() when
+ * !CONFIG_HAVE_UNSTABLE_SCHED_CLOCK, which means in that case the
+ * sched_clock() is assumed to provide these properties (mostly it means
+ * the architecture provides a globally synchronized highres time source).
+ *
+ * Otherwise it tries to create a semi stable clock from a mixture of other
+ * clocks, including:
+ *
+ *  - GTOD (clock monotomic)
  *  - sched_clock()
  *  - explicit idle events
  *
- * We use gtod as base and the unstable clock deltas. The deltas are filtered,
- * making it monotonic and keeping it within an expected window.
+ * We use GTOD as base and use sched_clock() deltas to improve resolution. The
+ * deltas are filtered to provide monotonicity and keeping it within an
+ * expected window.
  *
  * Furthermore, explicit sleep and wakeup hooks allow us to account for time
  * that is otherwise invisible (TSC gets stopped).
  *
- * The clock: sched_clock_cpu() is monotonic per cpu, and should be somewhat
- * consistent between cpus (never more than 2 jiffies difference).
+ *
+ * Notes:
+ *
+ * The !IRQ-safetly of sched_clock() and sched_clock_cpu() comes from things
+ * like cpufreq interrupts that can change the base clock (TSC) multiplier
+ * and cause funny jumps in time -- although the filtering provided by
+ * sched_clock_cpu() should mitigate serious artifacts we cannot rely on it
+ * in general since for !CONFIG_HAVE_UNSTABLE_SCHED_CLOCK we fully rely on
+ * sched_clock().
  */
 #include <linux/spinlock.h>
 #include <linux/hardirq.h>
@@ -170,6 +206,11 @@ again:
 	return val;
 }
 
+/*
+ * Similar to cpu_clock(), but requires local IRQs to be disabled.
+ *
+ * See cpu_clock().
+ */
 u64 sched_clock_cpu(int cpu)
 {
 	struct sched_clock_data *scd;
@@ -237,9 +278,19 @@ void sched_clock_idle_wakeup_event(u64 delta_ns)
 }
 EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event);
 
-unsigned long long cpu_clock(int cpu)
+/*
+ * As outlined at the top, provides a fast, high resolution, nanosecond
+ * time source that is monotonic per cpu argument and has bounded drift
+ * between cpus.
+ *
+ * ######################### BIG FAT WARNING ##########################
+ * # when comparing cpu_clock(i) to cpu_clock(j) for i != j, time can #
+ * # go backwards !!                                                  #
+ * ####################################################################
+ */
+u64 cpu_clock(int cpu)
 {
-	unsigned long long clock;
+	u64 clock;
 	unsigned long flags;
 
 	local_irq_save(flags);
@@ -249,6 +300,25 @@ unsigned long long cpu_clock(int cpu)
 	return clock;
 }
 
+/*
+ * Similar to cpu_clock() for the current cpu. Time will only be observed
+ * to be monotonic if care is taken to only compare timestampt taken on the
+ * same CPU.
+ *
+ * See cpu_clock().
+ */
+u64 local_clock(void)
+{
+	u64 clock;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	clock = sched_clock_cpu(smp_processor_id());
+	local_irq_restore(flags);
+
+	return clock;
+}
+
 #else /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */
 
 void sched_clock_init(void)
@@ -264,12 +334,17 @@ u64 sched_clock_cpu(int cpu)
 	return sched_clock();
 }
 
-
-unsigned long long cpu_clock(int cpu)
+u64 cpu_clock(int cpu)
 {
 	return sched_clock_cpu(cpu);
 }
 
+u64 local_clock(void)
+{
+	return sched_clock_cpu(0);
+}
+
 #endif /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */
 
 EXPORT_SYMBOL_GPL(cpu_clock);
+EXPORT_SYMBOL_GPL(local_clock);
diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c
index 9d589d8dcd1..1723e2b8c58 100644
--- a/kernel/trace/trace_clock.c
+++ b/kernel/trace/trace_clock.c
@@ -56,7 +56,7 @@ u64 notrace trace_clock_local(void)
  */
 u64 notrace trace_clock(void)
 {
-	return cpu_clock(raw_smp_processor_id());
+	return local_clock();
 }
 
 
-- 
cgit v1.2.3-70-g09d2


From 83cd4fe27ad8446619b2e030b171b858501de87d Mon Sep 17 00:00:00 2001
From: Venkatesh Pallipadi <venki@google.com>
Date: Fri, 21 May 2010 17:09:41 -0700
Subject: sched: Change nohz idle load balancing logic to push model

In the new push model, all idle CPUs indeed go into nohz mode. There is
still the concept of idle load balancer (performing the load balancing
on behalf of all the idle cpu's in the system). Busy CPU kicks the nohz
balancer when any of the nohz CPUs need idle load balancing.
The kickee CPU does the idle load balancing on behalf of all idle CPUs
instead of the normal idle balance.

This addresses the below two problems with the current nohz ilb logic:
* the idle load balancer continued to have periodic ticks during idle and
  wokeup frequently, even though it did not have any rebalancing to do on
  behalf of any of the idle CPUs.
* On x86 and CPUs that have APIC timer stoppage on idle CPUs, this
  periodic wakeup can result in a periodic additional interrupt on a CPU
  doing the timer broadcast.

Also currently we are migrating the unpinned timers from an idle to the cpu
doing idle load balancing (when all the cpus in the system are idle,
there is no idle load balancing cpu and timers get added to the same idle cpu
where the request was made. So the existing optimization works only on semi idle
system).

And In semi idle system, we no longer have periodic ticks on the idle load
balancer CPU. Using that cpu will add more delays to the timers than intended
(as that cpu's timer base may not be uptodate wrt jiffies etc). This was
causing mysterious slowdowns during boot etc.

For now, in the semi idle case, use the nearest busy cpu for migrating timers
from an idle cpu.  This is good for power-savings anyway.

Signed-off-by: Venkatesh Pallipadi <venki@google.com>
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Thomas Gleixner <tglx@linutronix.de>
LKML-Reference: <1274486981.2840.46.camel@sbs-t61.sc.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h    |   9 +-
 kernel/hrtimer.c         |   8 +-
 kernel/sched.c           |  34 ++++-
 kernel/sched_fair.c      | 329 ++++++++++++++++++++++++++++-------------------
 kernel/time/tick-sched.c |   8 +-
 kernel/timer.c           |   8 +-
 6 files changed, 237 insertions(+), 159 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index c2d4316a04b..a3e5b1cd043 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -271,14 +271,11 @@ extern int runqueue_is_locked(int cpu);
 
 extern cpumask_var_t nohz_cpu_mask;
 #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ)
-extern int select_nohz_load_balancer(int cpu);
-extern int get_nohz_load_balancer(void);
+extern void select_nohz_load_balancer(int stop_tick);
+extern int get_nohz_timer_target(void);
 extern int nohz_ratelimit(int cpu);
 #else
-static inline int select_nohz_load_balancer(int cpu)
-{
-	return 0;
-}
+static inline void select_nohz_load_balancer(int stop_tick) { }
 
 static inline int nohz_ratelimit(int cpu)
 {
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 5c69e996bd0..e934339fbbe 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -144,12 +144,8 @@ struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer,
 static int hrtimer_get_target(int this_cpu, int pinned)
 {
 #ifdef CONFIG_NO_HZ
-	if (!pinned && get_sysctl_timer_migration() && idle_cpu(this_cpu)) {
-		int preferred_cpu = get_nohz_load_balancer();
-
-		if (preferred_cpu >= 0)
-			return preferred_cpu;
-	}
+	if (!pinned && get_sysctl_timer_migration() && idle_cpu(this_cpu))
+		return get_nohz_timer_target();
 #endif
 	return this_cpu;
 }
diff --git a/kernel/sched.c b/kernel/sched.c
index a757f6b11cb..132950b33dd 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -460,7 +460,7 @@ struct rq {
 	unsigned long last_load_update_tick;
 #ifdef CONFIG_NO_HZ
 	u64 nohz_stamp;
-	unsigned char in_nohz_recently;
+	unsigned char nohz_balance_kick;
 #endif
 	unsigned int skip_clock_update;
 
@@ -1194,6 +1194,27 @@ static void resched_cpu(int cpu)
 }
 
 #ifdef CONFIG_NO_HZ
+/*
+ * In the semi idle case, use the nearest busy cpu for migrating timers
+ * from an idle cpu.  This is good for power-savings.
+ *
+ * We don't do similar optimization for completely idle system, as
+ * selecting an idle cpu will add more delays to the timers than intended
+ * (as that cpu's timer base may not be uptodate wrt jiffies etc).
+ */
+int get_nohz_timer_target(void)
+{
+	int cpu = smp_processor_id();
+	int i;
+	struct sched_domain *sd;
+
+	for_each_domain(cpu, sd) {
+		for_each_cpu(i, sched_domain_span(sd))
+			if (!idle_cpu(i))
+				return i;
+	}
+	return cpu;
+}
 /*
  * When add_timer_on() enqueues a timer into the timer wheel of an
  * idle CPU then this timer might expire before the next timer event
@@ -7791,6 +7812,10 @@ void __init sched_init(void)
 		rq->idle_stamp = 0;
 		rq->avg_idle = 2*sysctl_sched_migration_cost;
 		rq_attach_root(rq, &def_root_domain);
+#ifdef CONFIG_NO_HZ
+		rq->nohz_balance_kick = 0;
+		init_sched_softirq_csd(&per_cpu(remote_sched_softirq_cb, i));
+#endif
 #endif
 		init_rq_hrtick(rq);
 		atomic_set(&rq->nr_iowait, 0);
@@ -7835,8 +7860,11 @@ void __init sched_init(void)
 	zalloc_cpumask_var(&nohz_cpu_mask, GFP_NOWAIT);
 #ifdef CONFIG_SMP
 #ifdef CONFIG_NO_HZ
-	zalloc_cpumask_var(&nohz.cpu_mask, GFP_NOWAIT);
-	alloc_cpumask_var(&nohz.ilb_grp_nohz_mask, GFP_NOWAIT);
+	zalloc_cpumask_var(&nohz.idle_cpus_mask, GFP_NOWAIT);
+	alloc_cpumask_var(&nohz.grp_idle_mask, GFP_NOWAIT);
+	atomic_set(&nohz.load_balancer, nr_cpu_ids);
+	atomic_set(&nohz.first_pick_cpu, nr_cpu_ids);
+	atomic_set(&nohz.second_pick_cpu, nr_cpu_ids);
 #endif
 	/* May be allocated at isolcpus cmdline parse time */
 	if (cpu_isolated_map == NULL)
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 22b8b4f2b61..6ee2e0af665 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -3091,13 +3091,40 @@ out_unlock:
 }
 
 #ifdef CONFIG_NO_HZ
+
+static DEFINE_PER_CPU(struct call_single_data, remote_sched_softirq_cb);
+
+static void trigger_sched_softirq(void *data)
+{
+	raise_softirq_irqoff(SCHED_SOFTIRQ);
+}
+
+static inline void init_sched_softirq_csd(struct call_single_data *csd)
+{
+	csd->func = trigger_sched_softirq;
+	csd->info = NULL;
+	csd->flags = 0;
+	csd->priv = 0;
+}
+
+/*
+ * idle load balancing details
+ * - One of the idle CPUs nominates itself as idle load_balancer, while
+ *   entering idle.
+ * - This idle load balancer CPU will also go into tickless mode when
+ *   it is idle, just like all other idle CPUs
+ * - When one of the busy CPUs notice that there may be an idle rebalancing
+ *   needed, they will kick the idle load balancer, which then does idle
+ *   load balancing for all the idle CPUs.
+ */
 static struct {
 	atomic_t load_balancer;
-	cpumask_var_t cpu_mask;
-	cpumask_var_t ilb_grp_nohz_mask;
-} nohz ____cacheline_aligned = {
-	.load_balancer = ATOMIC_INIT(-1),
-};
+	atomic_t first_pick_cpu;
+	atomic_t second_pick_cpu;
+	cpumask_var_t idle_cpus_mask;
+	cpumask_var_t grp_idle_mask;
+	unsigned long next_balance;     /* in jiffy units */
+} nohz ____cacheline_aligned;
 
 int get_nohz_load_balancer(void)
 {
@@ -3151,17 +3178,17 @@ static inline struct sched_domain *lowest_flag_domain(int cpu, int flag)
  */
 static inline int is_semi_idle_group(struct sched_group *ilb_group)
 {
-	cpumask_and(nohz.ilb_grp_nohz_mask, nohz.cpu_mask,
+	cpumask_and(nohz.grp_idle_mask, nohz.idle_cpus_mask,
 					sched_group_cpus(ilb_group));
 
 	/*
 	 * A sched_group is semi-idle when it has atleast one busy cpu
 	 * and atleast one idle cpu.
 	 */
-	if (cpumask_empty(nohz.ilb_grp_nohz_mask))
+	if (cpumask_empty(nohz.grp_idle_mask))
 		return 0;
 
-	if (cpumask_equal(nohz.ilb_grp_nohz_mask, sched_group_cpus(ilb_group)))
+	if (cpumask_equal(nohz.grp_idle_mask, sched_group_cpus(ilb_group)))
 		return 0;
 
 	return 1;
@@ -3194,7 +3221,7 @@ static int find_new_ilb(int cpu)
 	 * Optimize for the case when we have no idle CPUs or only one
 	 * idle CPU. Don't walk the sched_domain hierarchy in such cases
 	 */
-	if (cpumask_weight(nohz.cpu_mask) < 2)
+	if (cpumask_weight(nohz.idle_cpus_mask) < 2)
 		goto out_done;
 
 	for_each_flag_domain(cpu, sd, SD_POWERSAVINGS_BALANCE) {
@@ -3202,7 +3229,7 @@ static int find_new_ilb(int cpu)
 
 		do {
 			if (is_semi_idle_group(ilb_group))
-				return cpumask_first(nohz.ilb_grp_nohz_mask);
+				return cpumask_first(nohz.grp_idle_mask);
 
 			ilb_group = ilb_group->next;
 
@@ -3210,98 +3237,116 @@ static int find_new_ilb(int cpu)
 	}
 
 out_done:
-	return cpumask_first(nohz.cpu_mask);
+	return nr_cpu_ids;
 }
 #else /*  (CONFIG_SCHED_MC || CONFIG_SCHED_SMT) */
 static inline int find_new_ilb(int call_cpu)
 {
-	return cpumask_first(nohz.cpu_mask);
+	return nr_cpu_ids;
 }
 #endif
 
+/*
+ * Kick a CPU to do the nohz balancing, if it is time for it. We pick the
+ * nohz_load_balancer CPU (if there is one) otherwise fallback to any idle
+ * CPU (if there is one).
+ */
+static void nohz_balancer_kick(int cpu)
+{
+	int ilb_cpu;
+
+	nohz.next_balance++;
+
+	ilb_cpu = get_nohz_load_balancer();
+
+	if (ilb_cpu >= nr_cpu_ids) {
+		ilb_cpu = cpumask_first(nohz.idle_cpus_mask);
+		if (ilb_cpu >= nr_cpu_ids)
+			return;
+	}
+
+	if (!cpu_rq(ilb_cpu)->nohz_balance_kick) {
+		struct call_single_data *cp;
+
+		cpu_rq(ilb_cpu)->nohz_balance_kick = 1;
+		cp = &per_cpu(remote_sched_softirq_cb, cpu);
+		__smp_call_function_single(ilb_cpu, cp, 0);
+	}
+	return;
+}
+
 /*
  * This routine will try to nominate the ilb (idle load balancing)
  * owner among the cpus whose ticks are stopped. ilb owner will do the idle
- * load balancing on behalf of all those cpus. If all the cpus in the system
- * go into this tickless mode, then there will be no ilb owner (as there is
- * no need for one) and all the cpus will sleep till the next wakeup event
- * arrives...
- *
- * For the ilb owner, tick is not stopped. And this tick will be used
- * for idle load balancing. ilb owner will still be part of
- * nohz.cpu_mask..
+ * load balancing on behalf of all those cpus.
  *
- * While stopping the tick, this cpu will become the ilb owner if there
- * is no other owner. And will be the owner till that cpu becomes busy
- * or if all cpus in the system stop their ticks at which point
- * there is no need for ilb owner.
+ * When the ilb owner becomes busy, we will not have new ilb owner until some
+ * idle CPU wakes up and goes back to idle or some busy CPU tries to kick
+ * idle load balancing by kicking one of the idle CPUs.
  *
- * When the ilb owner becomes busy, it nominates another owner, during the
- * next busy scheduler_tick()
+ * Ticks are stopped for the ilb owner as well, with busy CPU kicking this
+ * ilb owner CPU in future (when there is a need for idle load balancing on
+ * behalf of all idle CPUs).
  */
-int select_nohz_load_balancer(int stop_tick)
+void select_nohz_load_balancer(int stop_tick)
 {
 	int cpu = smp_processor_id();
 
 	if (stop_tick) {
-		cpu_rq(cpu)->in_nohz_recently = 1;
-
 		if (!cpu_active(cpu)) {
 			if (atomic_read(&nohz.load_balancer) != cpu)
-				return 0;
+				return;
 
 			/*
 			 * If we are going offline and still the leader,
 			 * give up!
 			 */
-			if (atomic_cmpxchg(&nohz.load_balancer, cpu, -1) != cpu)
+			if (atomic_cmpxchg(&nohz.load_balancer, cpu,
+					   nr_cpu_ids) != cpu)
 				BUG();
 
-			return 0;
+			return;
 		}
 
-		cpumask_set_cpu(cpu, nohz.cpu_mask);
+		cpumask_set_cpu(cpu, nohz.idle_cpus_mask);
 
-		/* time for ilb owner also to sleep */
-		if (cpumask_weight(nohz.cpu_mask) == num_active_cpus()) {
-			if (atomic_read(&nohz.load_balancer) == cpu)
-				atomic_set(&nohz.load_balancer, -1);
-			return 0;
-		}
+		if (atomic_read(&nohz.first_pick_cpu) == cpu)
+			atomic_cmpxchg(&nohz.first_pick_cpu, cpu, nr_cpu_ids);
+		if (atomic_read(&nohz.second_pick_cpu) == cpu)
+			atomic_cmpxchg(&nohz.second_pick_cpu, cpu, nr_cpu_ids);
 
-		if (atomic_read(&nohz.load_balancer) == -1) {
-			/* make me the ilb owner */
-			if (atomic_cmpxchg(&nohz.load_balancer, -1, cpu) == -1)
-				return 1;
-		} else if (atomic_read(&nohz.load_balancer) == cpu) {
+		if (atomic_read(&nohz.load_balancer) >= nr_cpu_ids) {
 			int new_ilb;
 
-			if (!(sched_smt_power_savings ||
-						sched_mc_power_savings))
-				return 1;
+			/* make me the ilb owner */
+			if (atomic_cmpxchg(&nohz.load_balancer, nr_cpu_ids,
+					   cpu) != nr_cpu_ids)
+				return;
+
 			/*
 			 * Check to see if there is a more power-efficient
 			 * ilb.
 			 */
 			new_ilb = find_new_ilb(cpu);
 			if (new_ilb < nr_cpu_ids && new_ilb != cpu) {
-				atomic_set(&nohz.load_balancer, -1);
+				atomic_set(&nohz.load_balancer, nr_cpu_ids);
 				resched_cpu(new_ilb);
-				return 0;
+				return;
 			}
-			return 1;
+			return;
 		}
 	} else {
-		if (!cpumask_test_cpu(cpu, nohz.cpu_mask))
-			return 0;
+		if (!cpumask_test_cpu(cpu, nohz.idle_cpus_mask))
+			return;
 
-		cpumask_clear_cpu(cpu, nohz.cpu_mask);
+		cpumask_clear_cpu(cpu, nohz.idle_cpus_mask);
 
 		if (atomic_read(&nohz.load_balancer) == cpu)
-			if (atomic_cmpxchg(&nohz.load_balancer, cpu, -1) != cpu)
+			if (atomic_cmpxchg(&nohz.load_balancer, cpu,
+					   nr_cpu_ids) != cpu)
 				BUG();
 	}
-	return 0;
+	return;
 }
 #endif
 
@@ -3383,11 +3428,101 @@ out:
 		rq->next_balance = next_balance;
 }
 
+#ifdef CONFIG_NO_HZ
 /*
- * run_rebalance_domains is triggered when needed from the scheduler tick.
- * In CONFIG_NO_HZ case, the idle load balance owner will do the
+ * In CONFIG_NO_HZ case, the idle balance kickee will do the
  * rebalancing for all the cpus for whom scheduler ticks are stopped.
  */
+static void nohz_idle_balance(int this_cpu, enum cpu_idle_type idle)
+{
+	struct rq *this_rq = cpu_rq(this_cpu);
+	struct rq *rq;
+	int balance_cpu;
+
+	if (idle != CPU_IDLE || !this_rq->nohz_balance_kick)
+		return;
+
+	for_each_cpu(balance_cpu, nohz.idle_cpus_mask) {
+		if (balance_cpu == this_cpu)
+			continue;
+
+		/*
+		 * If this cpu gets work to do, stop the load balancing
+		 * work being done for other cpus. Next load
+		 * balancing owner will pick it up.
+		 */
+		if (need_resched()) {
+			this_rq->nohz_balance_kick = 0;
+			break;
+		}
+
+		raw_spin_lock_irq(&this_rq->lock);
+		update_cpu_load(this_rq);
+		raw_spin_unlock_irq(&this_rq->lock);
+
+		rebalance_domains(balance_cpu, CPU_IDLE);
+
+		rq = cpu_rq(balance_cpu);
+		if (time_after(this_rq->next_balance, rq->next_balance))
+			this_rq->next_balance = rq->next_balance;
+	}
+	nohz.next_balance = this_rq->next_balance;
+	this_rq->nohz_balance_kick = 0;
+}
+
+/*
+ * Current heuristic for kicking the idle load balancer
+ * - first_pick_cpu is the one of the busy CPUs. It will kick
+ *   idle load balancer when it has more than one process active. This
+ *   eliminates the need for idle load balancing altogether when we have
+ *   only one running process in the system (common case).
+ * - If there are more than one busy CPU, idle load balancer may have
+ *   to run for active_load_balance to happen (i.e., two busy CPUs are
+ *   SMT or core siblings and can run better if they move to different
+ *   physical CPUs). So, second_pick_cpu is the second of the busy CPUs
+ *   which will kick idle load balancer as soon as it has any load.
+ */
+static inline int nohz_kick_needed(struct rq *rq, int cpu)
+{
+	unsigned long now = jiffies;
+	int ret;
+	int first_pick_cpu, second_pick_cpu;
+
+	if (time_before(now, nohz.next_balance))
+		return 0;
+
+	if (!rq->nr_running)
+		return 0;
+
+	first_pick_cpu = atomic_read(&nohz.first_pick_cpu);
+	second_pick_cpu = atomic_read(&nohz.second_pick_cpu);
+
+	if (first_pick_cpu < nr_cpu_ids && first_pick_cpu != cpu &&
+	    second_pick_cpu < nr_cpu_ids && second_pick_cpu != cpu)
+		return 0;
+
+	ret = atomic_cmpxchg(&nohz.first_pick_cpu, nr_cpu_ids, cpu);
+	if (ret == nr_cpu_ids || ret == cpu) {
+		atomic_cmpxchg(&nohz.second_pick_cpu, cpu, nr_cpu_ids);
+		if (rq->nr_running > 1)
+			return 1;
+	} else {
+		ret = atomic_cmpxchg(&nohz.second_pick_cpu, nr_cpu_ids, cpu);
+		if (ret == nr_cpu_ids || ret == cpu) {
+			if (rq->nr_running)
+				return 1;
+		}
+	}
+	return 0;
+}
+#else
+static void nohz_idle_balance(int this_cpu, enum cpu_idle_type idle) { }
+#endif
+
+/*
+ * run_rebalance_domains is triggered when needed from the scheduler tick.
+ * Also triggered for nohz idle balancing (with nohz_balancing_kick set).
+ */
 static void run_rebalance_domains(struct softirq_action *h)
 {
 	int this_cpu = smp_processor_id();
@@ -3397,40 +3532,12 @@ static void run_rebalance_domains(struct softirq_action *h)
 
 	rebalance_domains(this_cpu, idle);
 
-#ifdef CONFIG_NO_HZ
 	/*
-	 * If this cpu is the owner for idle load balancing, then do the
+	 * If this cpu has a pending nohz_balance_kick, then do the
 	 * balancing on behalf of the other idle cpus whose ticks are
 	 * stopped.
 	 */
-	if (this_rq->idle_at_tick &&
-	    atomic_read(&nohz.load_balancer) == this_cpu) {
-		struct rq *rq;
-		int balance_cpu;
-
-		for_each_cpu(balance_cpu, nohz.cpu_mask) {
-			if (balance_cpu == this_cpu)
-				continue;
-
-			/*
-			 * If this cpu gets work to do, stop the load balancing
-			 * work being done for other cpus. Next load
-			 * balancing owner will pick it up.
-			 */
-			if (need_resched())
-				break;
-
-			rq = cpu_rq(balance_cpu);
-			raw_spin_lock_irq(&rq->lock);
-			update_cpu_load(rq);
-			raw_spin_unlock_irq(&rq->lock);
-			rebalance_domains(balance_cpu, CPU_IDLE);
-
-			if (time_after(this_rq->next_balance, rq->next_balance))
-				this_rq->next_balance = rq->next_balance;
-		}
-	}
-#endif
+	nohz_idle_balance(this_cpu, idle);
 }
 
 static inline int on_null_domain(int cpu)
@@ -3440,57 +3547,17 @@ static inline int on_null_domain(int cpu)
 
 /*
  * Trigger the SCHED_SOFTIRQ if it is time to do periodic load balancing.
- *
- * In case of CONFIG_NO_HZ, this is the place where we nominate a new
- * idle load balancing owner or decide to stop the periodic load balancing,
- * if the whole system is idle.
  */
 static inline void trigger_load_balance(struct rq *rq, int cpu)
 {
-#ifdef CONFIG_NO_HZ
-	/*
-	 * If we were in the nohz mode recently and busy at the current
-	 * scheduler tick, then check if we need to nominate new idle
-	 * load balancer.
-	 */
-	if (rq->in_nohz_recently && !rq->idle_at_tick) {
-		rq->in_nohz_recently = 0;
-
-		if (atomic_read(&nohz.load_balancer) == cpu) {
-			cpumask_clear_cpu(cpu, nohz.cpu_mask);
-			atomic_set(&nohz.load_balancer, -1);
-		}
-
-		if (atomic_read(&nohz.load_balancer) == -1) {
-			int ilb = find_new_ilb(cpu);
-
-			if (ilb < nr_cpu_ids)
-				resched_cpu(ilb);
-		}
-	}
-
-	/*
-	 * If this cpu is idle and doing idle load balancing for all the
-	 * cpus with ticks stopped, is it time for that to stop?
-	 */
-	if (rq->idle_at_tick && atomic_read(&nohz.load_balancer) == cpu &&
-	    cpumask_weight(nohz.cpu_mask) == num_online_cpus()) {
-		resched_cpu(cpu);
-		return;
-	}
-
-	/*
-	 * If this cpu is idle and the idle load balancing is done by
-	 * someone else, then no need raise the SCHED_SOFTIRQ
-	 */
-	if (rq->idle_at_tick && atomic_read(&nohz.load_balancer) != cpu &&
-	    cpumask_test_cpu(cpu, nohz.cpu_mask))
-		return;
-#endif
 	/* Don't need to rebalance while attached to NULL domain */
 	if (time_after_eq(jiffies, rq->next_balance) &&
 	    likely(!on_null_domain(cpu)))
 		raise_softirq(SCHED_SOFTIRQ);
+#ifdef CONFIG_NO_HZ
+	else if (nohz_kick_needed(rq, cpu) && likely(!on_null_domain(cpu)))
+		nohz_balancer_kick(cpu);
+#endif
 }
 
 static void rq_online_fair(struct rq *rq)
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 1d7b9bc1c03..5f171f04ab0 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -408,13 +408,7 @@ void tick_nohz_stop_sched_tick(int inidle)
 		 * the scheduler tick in nohz_restart_sched_tick.
 		 */
 		if (!ts->tick_stopped) {
-			if (select_nohz_load_balancer(1)) {
-				/*
-				 * sched tick not stopped!
-				 */
-				cpumask_clear_cpu(cpu, nohz_cpu_mask);
-				goto out;
-			}
+			select_nohz_load_balancer(1);
 
 			ts->idle_tick = hrtimer_get_expires(&ts->sched_timer);
 			ts->tick_stopped = 1;
diff --git a/kernel/timer.c b/kernel/timer.c
index ee305c8d4e1..48d6aec0789 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -679,12 +679,8 @@ __mod_timer(struct timer_list *timer, unsigned long expires,
 	cpu = smp_processor_id();
 
 #if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP)
-	if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu)) {
-		int preferred_cpu = get_nohz_load_balancer();
-
-		if (preferred_cpu >= 0)
-			cpu = preferred_cpu;
-	}
+	if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu))
+		cpu = get_nohz_timer_target();
 #endif
 	new_base = per_cpu(tvec_bases, cpu);
 
-- 
cgit v1.2.3-70-g09d2


From 9d5efe05eb0c904545a28b19c18b949f23334de0 Mon Sep 17 00:00:00 2001
From: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
Date: Tue, 8 Jun 2010 14:57:02 +1000
Subject: sched: Fix capacity calculations for SMT4

Handle cpu capacity being reported as 0 on cores with more number of
hardware threads. For example on a Power7 core with 4 hardware
threads, core power is 1177 and thus power of each hardware thread is
1177/4 = 294. This low power can lead to capacity for each hardware
thread being calculated as 0, which leads to tasks bouncing within the
core madly!

Fix this by reporting capacity for hardware threads as 1, provided
their power is not scaled down significantly because of frequency
scaling or real-time tasks usage of cpu.

Signed-off-by: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
Signed-off-by: Michael Neuling <mikey@neuling.org>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arjan van de Ven <arjan@linux.intel.com>
LKML-Reference: <20100608045702.21D03CC895@localhost.localdomain>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h |  2 +-
 kernel/sched_fair.c   | 53 +++++++++++++++++++++++++++++++++++++++++----------
 2 files changed, 44 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index a3e5b1cd043..c731296e5e9 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -857,7 +857,7 @@ struct sched_group {
 	 * CPU power of this group, SCHED_LOAD_SCALE being max power for a
 	 * single CPU.
 	 */
-	unsigned int cpu_power;
+	unsigned int cpu_power, cpu_power_orig;
 
 	/*
 	 * The CPUs this group covers.
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 6ee2e0af665..b9b3462483b 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -2285,13 +2285,6 @@ static void update_cpu_power(struct sched_domain *sd, int cpu)
 	unsigned long power = SCHED_LOAD_SCALE;
 	struct sched_group *sdg = sd->groups;
 
-	if (sched_feat(ARCH_POWER))
-		power *= arch_scale_freq_power(sd, cpu);
-	else
-		power *= default_scale_freq_power(sd, cpu);
-
-	power >>= SCHED_LOAD_SHIFT;
-
 	if ((sd->flags & SD_SHARE_CPUPOWER) && weight > 1) {
 		if (sched_feat(ARCH_POWER))
 			power *= arch_scale_smt_power(sd, cpu);
@@ -2301,6 +2294,15 @@ static void update_cpu_power(struct sched_domain *sd, int cpu)
 		power >>= SCHED_LOAD_SHIFT;
 	}
 
+	sdg->cpu_power_orig = power;
+
+	if (sched_feat(ARCH_POWER))
+		power *= arch_scale_freq_power(sd, cpu);
+	else
+		power *= default_scale_freq_power(sd, cpu);
+
+	power >>= SCHED_LOAD_SHIFT;
+
 	power *= scale_rt_power(cpu);
 	power >>= SCHED_LOAD_SHIFT;
 
@@ -2333,6 +2335,31 @@ static void update_group_power(struct sched_domain *sd, int cpu)
 	sdg->cpu_power = power;
 }
 
+/*
+ * Try and fix up capacity for tiny siblings, this is needed when
+ * things like SD_ASYM_PACKING need f_b_g to select another sibling
+ * which on its own isn't powerful enough.
+ *
+ * See update_sd_pick_busiest() and check_asym_packing().
+ */
+static inline int
+fix_small_capacity(struct sched_domain *sd, struct sched_group *group)
+{
+	/*
+	 * Only siblings can have significantly less than SCHED_LOAD_SCALE
+	 */
+	if (sd->level != SD_LV_SIBLING)
+		return 0;
+
+	/*
+	 * If ~90% of the cpu_power is still there, we're good.
+	 */
+	if (group->cpu_power * 32 < group->cpu_power_orig * 29)
+		return 1;
+
+	return 0;
+}
+
 /**
  * update_sg_lb_stats - Update sched_group's statistics for load balancing.
  * @sd: The sched_domain whose statistics are to be updated.
@@ -2426,6 +2453,8 @@ static inline void update_sg_lb_stats(struct sched_domain *sd,
 
 	sgs->group_capacity =
 		DIV_ROUND_CLOSEST(group->cpu_power, SCHED_LOAD_SCALE);
+	if (!sgs->group_capacity)
+		sgs->group_capacity = fix_small_capacity(sd, group);
 }
 
 /**
@@ -2724,8 +2753,9 @@ ret:
  * find_busiest_queue - find the busiest runqueue among the cpus in group.
  */
 static struct rq *
-find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle,
-		   unsigned long imbalance, const struct cpumask *cpus)
+find_busiest_queue(struct sched_domain *sd, struct sched_group *group,
+		   enum cpu_idle_type idle, unsigned long imbalance,
+		   const struct cpumask *cpus)
 {
 	struct rq *busiest = NULL, *rq;
 	unsigned long max_load = 0;
@@ -2736,6 +2766,9 @@ find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle,
 		unsigned long capacity = DIV_ROUND_CLOSEST(power, SCHED_LOAD_SCALE);
 		unsigned long wl;
 
+		if (!capacity)
+			capacity = fix_small_capacity(sd, group);
+
 		if (!cpumask_test_cpu(i, cpus))
 			continue;
 
@@ -2852,7 +2885,7 @@ redo:
 		goto out_balanced;
 	}
 
-	busiest = find_busiest_queue(group, idle, imbalance, cpus);
+	busiest = find_busiest_queue(sd, group, idle, imbalance, cpus);
 	if (!busiest) {
 		schedstat_inc(sd, lb_nobusyq[idle]);
 		goto out_balanced;
-- 
cgit v1.2.3-70-g09d2


From 532cb4c401e225b084c14d6bd6a2f8ee561de2f1 Mon Sep 17 00:00:00 2001
From: Michael Neuling <mikey@neuling.org>
Date: Tue, 8 Jun 2010 14:57:02 +1000
Subject: sched: Add asymmetric group packing option for sibling domain

Check to see if the group is packed in a sched doman.

This is primarily intended to used at the sibling level.  Some cores
like POWER7 prefer to use lower numbered SMT threads.  In the case of
POWER7, it can move to lower SMT modes only when higher threads are
idle.  When in lower SMT modes, the threads will perform better since
they share less core resources.  Hence when we have idle threads, we
want them to be the higher ones.

This adds a hook into f_b_g() called check_asym_packing() to check the
packing.  This packing function is run on idle threads.  It checks to
see if the busiest CPU in this domain (core in the P7 case) has a
higher CPU number than what where the packing function is being run
on.  If it is, calculate the imbalance and return the higher busier
thread as the busiest group to f_b_g().  Here we are assuming a lower
CPU number will be equivalent to a lower SMT thread number.

It also creates a new SD_ASYM_PACKING flag to enable this feature at
any scheduler domain level.

It also creates an arch hook to enable this feature at the sibling
level.  The default function doesn't enable this feature.

Based heavily on patch from Peter Zijlstra.
Fixes from Srivatsa Vaddagiri.

Signed-off-by: Michael Neuling <mikey@neuling.org>
Signed-off-by: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
LKML-Reference: <20100608045702.2936CCC897@localhost.localdomain>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h    |   4 +-
 include/linux/topology.h |   1 +
 kernel/sched_fair.c      | 139 +++++++++++++++++++++++++++++++++++++++++------
 3 files changed, 126 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index c731296e5e9..ff154e10752 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -801,7 +801,7 @@ enum cpu_idle_type {
 #define SD_POWERSAVINGS_BALANCE	0x0100	/* Balance for power savings */
 #define SD_SHARE_PKG_RESOURCES	0x0200	/* Domain members share cpu pkg resources */
 #define SD_SERIALIZE		0x0400	/* Only a single load balancing instance */
-
+#define SD_ASYM_PACKING		0x0800  /* Place busy groups earlier in the domain */
 #define SD_PREFER_SIBLING	0x1000	/* Prefer to place tasks in a sibling domain */
 
 enum powersavings_balance_level {
@@ -836,6 +836,8 @@ static inline int sd_balance_for_package_power(void)
 	return SD_PREFER_SIBLING;
 }
 
+extern int __weak arch_sd_sibiling_asym_packing(void);
+
 /*
  * Optimise SD flags for power savings:
  * SD_BALANCE_NEWIDLE helps agressive task consolidation and power savings.
diff --git a/include/linux/topology.h b/include/linux/topology.h
index c44df50a05a..cf57f30d0dc 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -103,6 +103,7 @@ int arch_update_cpu_topology(void);
 				| 1*SD_SHARE_PKG_RESOURCES		\
 				| 0*SD_SERIALIZE			\
 				| 0*SD_PREFER_SIBLING			\
+				| arch_sd_sibiling_asym_packing()	\
 				,					\
 	.last_balance		= jiffies,				\
 	.balance_interval	= 1,					\
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index b9b3462483b..593424f91a8 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -2457,12 +2457,54 @@ static inline void update_sg_lb_stats(struct sched_domain *sd,
 		sgs->group_capacity = fix_small_capacity(sd, group);
 }
 
+/**
+ * update_sd_pick_busiest - return 1 on busiest group
+ * @sd: sched_domain whose statistics are to be checked
+ * @sds: sched_domain statistics
+ * @sg: sched_group candidate to be checked for being the busiest
+ * @sds: sched_group statistics
+ *
+ * Determine if @sg is a busier group than the previously selected
+ * busiest group.
+ */
+static bool update_sd_pick_busiest(struct sched_domain *sd,
+				   struct sd_lb_stats *sds,
+				   struct sched_group *sg,
+				   struct sg_lb_stats *sgs,
+				   int this_cpu)
+{
+	if (sgs->avg_load <= sds->max_load)
+		return false;
+
+	if (sgs->sum_nr_running > sgs->group_capacity)
+		return true;
+
+	if (sgs->group_imb)
+		return true;
+
+	/*
+	 * ASYM_PACKING needs to move all the work to the lowest
+	 * numbered CPUs in the group, therefore mark all groups
+	 * higher than ourself as busy.
+	 */
+	if ((sd->flags & SD_ASYM_PACKING) && sgs->sum_nr_running &&
+	    this_cpu < group_first_cpu(sg)) {
+		if (!sds->busiest)
+			return true;
+
+		if (group_first_cpu(sds->busiest) > group_first_cpu(sg))
+			return true;
+	}
+
+	return false;
+}
+
 /**
  * update_sd_lb_stats - Update sched_group's statistics for load balancing.
  * @sd: sched_domain whose statistics are to be updated.
  * @this_cpu: Cpu for which load balance is currently performed.
  * @idle: Idle status of this_cpu
- * @sd_idle: Idle status of the sched_domain containing group.
+ * @sd_idle: Idle status of the sched_domain containing sg.
  * @cpus: Set of cpus considered for load balancing.
  * @balance: Should we balance.
  * @sds: variable to hold the statistics for this sched_domain.
@@ -2473,7 +2515,7 @@ static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu,
 			struct sd_lb_stats *sds)
 {
 	struct sched_domain *child = sd->child;
-	struct sched_group *group = sd->groups;
+	struct sched_group *sg = sd->groups;
 	struct sg_lb_stats sgs;
 	int load_idx, prefer_sibling = 0;
 
@@ -2486,21 +2528,20 @@ static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu,
 	do {
 		int local_group;
 
-		local_group = cpumask_test_cpu(this_cpu,
-					       sched_group_cpus(group));
+		local_group = cpumask_test_cpu(this_cpu, sched_group_cpus(sg));
 		memset(&sgs, 0, sizeof(sgs));
-		update_sg_lb_stats(sd, group, this_cpu, idle, load_idx, sd_idle,
+		update_sg_lb_stats(sd, sg, this_cpu, idle, load_idx, sd_idle,
 				local_group, cpus, balance, &sgs);
 
 		if (local_group && !(*balance))
 			return;
 
 		sds->total_load += sgs.group_load;
-		sds->total_pwr += group->cpu_power;
+		sds->total_pwr += sg->cpu_power;
 
 		/*
 		 * In case the child domain prefers tasks go to siblings
-		 * first, lower the group capacity to one so that we'll try
+		 * first, lower the sg capacity to one so that we'll try
 		 * and move all the excess tasks away.
 		 */
 		if (prefer_sibling)
@@ -2508,23 +2549,72 @@ static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu,
 
 		if (local_group) {
 			sds->this_load = sgs.avg_load;
-			sds->this = group;
+			sds->this = sg;
 			sds->this_nr_running = sgs.sum_nr_running;
 			sds->this_load_per_task = sgs.sum_weighted_load;
-		} else if (sgs.avg_load > sds->max_load &&
-			   (sgs.sum_nr_running > sgs.group_capacity ||
-				sgs.group_imb)) {
+		} else if (update_sd_pick_busiest(sd, sds, sg, &sgs, this_cpu)) {
 			sds->max_load = sgs.avg_load;
-			sds->busiest = group;
+			sds->busiest = sg;
 			sds->busiest_nr_running = sgs.sum_nr_running;
 			sds->busiest_group_capacity = sgs.group_capacity;
 			sds->busiest_load_per_task = sgs.sum_weighted_load;
 			sds->group_imb = sgs.group_imb;
 		}
 
-		update_sd_power_savings_stats(group, sds, local_group, &sgs);
-		group = group->next;
-	} while (group != sd->groups);
+		update_sd_power_savings_stats(sg, sds, local_group, &sgs);
+		sg = sg->next;
+	} while (sg != sd->groups);
+}
+
+int __weak arch_sd_sibiling_asym_packing(void)
+{
+       return 0*SD_ASYM_PACKING;
+}
+
+/**
+ * check_asym_packing - Check to see if the group is packed into the
+ *			sched doman.
+ *
+ * This is primarily intended to used at the sibling level.  Some
+ * cores like POWER7 prefer to use lower numbered SMT threads.  In the
+ * case of POWER7, it can move to lower SMT modes only when higher
+ * threads are idle.  When in lower SMT modes, the threads will
+ * perform better since they share less core resources.  Hence when we
+ * have idle threads, we want them to be the higher ones.
+ *
+ * This packing function is run on idle threads.  It checks to see if
+ * the busiest CPU in this domain (core in the P7 case) has a higher
+ * CPU number than the packing function is being run on.  Here we are
+ * assuming lower CPU number will be equivalent to lower a SMT thread
+ * number.
+ *
+ * @sd: The sched_domain whose packing is to be checked.
+ * @sds: Statistics of the sched_domain which is to be packed
+ * @this_cpu: The cpu at whose sched_domain we're performing load-balance.
+ * @imbalance: returns amount of imbalanced due to packing.
+ *
+ * Returns 1 when packing is required and a task should be moved to
+ * this CPU.  The amount of the imbalance is returned in *imbalance.
+ */
+static int check_asym_packing(struct sched_domain *sd,
+			      struct sd_lb_stats *sds,
+			      int this_cpu, unsigned long *imbalance)
+{
+	int busiest_cpu;
+
+	if (!(sd->flags & SD_ASYM_PACKING))
+		return 0;
+
+	if (!sds->busiest)
+		return 0;
+
+	busiest_cpu = group_first_cpu(sds->busiest);
+	if (this_cpu > busiest_cpu)
+		return 0;
+
+	*imbalance = DIV_ROUND_CLOSEST(sds->max_load * sds->busiest->cpu_power,
+				       SCHED_LOAD_SCALE);
+	return 1;
 }
 
 /**
@@ -2719,6 +2809,10 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
 	if (!(*balance))
 		goto ret;
 
+	if ((idle == CPU_IDLE || idle == CPU_NEWLY_IDLE) &&
+	    check_asym_packing(sd, &sds, this_cpu, imbalance))
+		return sds.busiest;
+
 	if (!sds.busiest || sds.busiest_nr_running == 0)
 		goto out_balanced;
 
@@ -2808,9 +2902,19 @@ find_busiest_queue(struct sched_domain *sd, struct sched_group *group,
 /* Working cpumask for load_balance and load_balance_newidle. */
 static DEFINE_PER_CPU(cpumask_var_t, load_balance_tmpmask);
 
-static int need_active_balance(struct sched_domain *sd, int sd_idle, int idle)
+static int need_active_balance(struct sched_domain *sd, int sd_idle, int idle,
+			       int busiest_cpu, int this_cpu)
 {
 	if (idle == CPU_NEWLY_IDLE) {
+
+		/*
+		 * ASYM_PACKING needs to force migrate tasks from busy but
+		 * higher numbered CPUs in order to pack all tasks in the
+		 * lowest numbered CPUs.
+		 */
+		if ((sd->flags & SD_ASYM_PACKING) && busiest_cpu > this_cpu)
+			return 1;
+
 		/*
 		 * The only task running in a non-idle cpu can be moved to this
 		 * cpu in an attempt to completely freeup the other CPU
@@ -2929,7 +3033,8 @@ redo:
 		schedstat_inc(sd, lb_failed[idle]);
 		sd->nr_balance_failed++;
 
-		if (need_active_balance(sd, sd_idle, idle)) {
+		if (need_active_balance(sd, sd_idle, idle, cpu_of(busiest),
+					this_cpu)) {
 			raw_spin_lock_irqsave(&busiest->lock, flags);
 
 			/* don't kick the active_load_balance_cpu_stop,
-- 
cgit v1.2.3-70-g09d2


From ecc55f84b2e9741f29daa787ded93986df6cbe17 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 21 May 2010 15:11:34 +0200
Subject: perf, trace: Inline perf_swevent_put_recursion_context()

Inline perf_swevent_put_recursion_context into perf_tp_event(), this
shrinks the per trace template code footprint and saves a function
call.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/ftrace_event.h | 3 +--
 include/linux/perf_event.h   | 2 +-
 kernel/perf_event.c          | 8 ++++----
 3 files changed, 6 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 3167f2df412..0af31cd335d 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -257,8 +257,7 @@ static inline void
 perf_trace_buf_submit(void *raw_data, int size, int rctx, u64 addr,
 		       u64 count, struct pt_regs *regs, void *head)
 {
-	perf_tp_event(addr, count, raw_data, size, regs, head);
-	perf_swevent_put_recursion_context(rctx);
+	perf_tp_event(addr, count, raw_data, size, regs, head, rctx);
 }
 #endif
 
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 5d0266d9498..c691a0b27bc 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1001,7 +1001,7 @@ static inline bool perf_paranoid_kernel(void)
 extern void perf_event_init(void);
 extern void perf_tp_event(u64 addr, u64 count, void *record,
 			  int entry_size, struct pt_regs *regs,
-			  struct hlist_head *head);
+			  struct hlist_head *head, int rctx);
 extern void perf_bp_event(struct perf_event *event, void *data);
 
 #ifndef perf_misc_flags
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index ff86c558af4..4bd3b597bcc 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -4213,14 +4213,12 @@ int perf_swevent_get_recursion_context(void)
 }
 EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context);
 
-void perf_swevent_put_recursion_context(int rctx)
+void inline perf_swevent_put_recursion_context(int rctx)
 {
 	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
 	barrier();
 	cpuctx->recursion[rctx]--;
 }
-EXPORT_SYMBOL_GPL(perf_swevent_put_recursion_context);
-
 
 void __perf_sw_event(u32 event_id, u64 nr, int nmi,
 			    struct pt_regs *regs, u64 addr)
@@ -4601,7 +4599,7 @@ static int perf_tp_event_match(struct perf_event *event,
 }
 
 void perf_tp_event(u64 addr, u64 count, void *record, int entry_size,
-		   struct pt_regs *regs, struct hlist_head *head)
+		   struct pt_regs *regs, struct hlist_head *head, int rctx)
 {
 	struct perf_sample_data data;
 	struct perf_event *event;
@@ -4621,6 +4619,8 @@ void perf_tp_event(u64 addr, u64 count, void *record, int entry_size,
 			perf_swevent_add(event, count, 1, &data, regs);
 	}
 	rcu_read_unlock();
+
+	perf_swevent_put_recursion_context(rctx);
 }
 EXPORT_SYMBOL_GPL(perf_tp_event);
 
-- 
cgit v1.2.3-70-g09d2


From 3af9e859281bda7eb7c20b51879cf43aa788ac2e Mon Sep 17 00:00:00 2001
From: Eric B Munson <ebmunson@us.ibm.com>
Date: Tue, 18 May 2010 15:30:49 +0100
Subject: perf: Add non-exec mmap() tracking

Add the capacility to track data mmap()s. This can be used together
with PERF_SAMPLE_ADDR for data profiling.

Signed-off-by: Anton Blanchard <anton@samba.org>
[Updated code for stable perf ABI]
Signed-off-by: Eric B Munson <ebmunson@us.ibm.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Steven Rostedt <rostedt@goodmis.org>
LKML-Reference: <1274193049-25997-1-git-send-email-ebmunson@us.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 fs/exec.c                   |  1 +
 include/linux/perf_event.h  | 12 +++---------
 kernel/perf_event.c         | 34 +++++++++++++++++++++++-----------
 mm/mmap.c                   |  6 +++++-
 tools/perf/builtin-record.c |  4 +++-
 5 files changed, 35 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/fs/exec.c b/fs/exec.c
index e19de6a8033..97d91a03fb1 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -653,6 +653,7 @@ int setup_arg_pages(struct linux_binprm *bprm,
 	else
 		stack_base = vma->vm_start - stack_expand;
 #endif
+	current->mm->start_stack = bprm->p;
 	ret = expand_stack(vma, stack_base);
 	if (ret)
 		ret = -EFAULT;
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index c691a0b27bc..36efad90cd4 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -214,8 +214,9 @@ struct perf_event_attr {
 				 *  See also PERF_RECORD_MISC_EXACT_IP
 				 */
 				precise_ip     :  2, /* skid constraint       */
+				mmap_data      :  1, /* non-exec mmap data    */
 
-				__reserved_1   : 47;
+				__reserved_1   : 46;
 
 	union {
 		__u32		wakeup_events;	  /* wakeup every n events */
@@ -962,14 +963,7 @@ perf_sw_event(u32 event_id, u64 nr, int nmi, struct pt_regs *regs, u64 addr)
 	}
 }
 
-extern void __perf_event_mmap(struct vm_area_struct *vma);
-
-static inline void perf_event_mmap(struct vm_area_struct *vma)
-{
-	if (vma->vm_flags & VM_EXEC)
-		__perf_event_mmap(vma);
-}
-
+extern void perf_event_mmap(struct vm_area_struct *vma);
 extern struct perf_guest_info_callbacks *perf_guest_cbs;
 extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
 extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index b39bec346e8..227ed9c8ec3 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -1891,7 +1891,7 @@ static void free_event(struct perf_event *event)
 
 	if (!event->parent) {
 		atomic_dec(&nr_events);
-		if (event->attr.mmap)
+		if (event->attr.mmap || event->attr.mmap_data)
 			atomic_dec(&nr_mmap_events);
 		if (event->attr.comm)
 			atomic_dec(&nr_comm_events);
@@ -3491,7 +3491,7 @@ perf_event_read_event(struct perf_event *event,
 /*
  * task tracking -- fork/exit
  *
- * enabled by: attr.comm | attr.mmap | attr.task
+ * enabled by: attr.comm | attr.mmap | attr.mmap_data | attr.task
  */
 
 struct perf_task_event {
@@ -3541,7 +3541,8 @@ static int perf_event_task_match(struct perf_event *event)
 	if (event->cpu != -1 && event->cpu != smp_processor_id())
 		return 0;
 
-	if (event->attr.comm || event->attr.mmap || event->attr.task)
+	if (event->attr.comm || event->attr.mmap ||
+	    event->attr.mmap_data || event->attr.task)
 		return 1;
 
 	return 0;
@@ -3766,7 +3767,8 @@ static void perf_event_mmap_output(struct perf_event *event,
 }
 
 static int perf_event_mmap_match(struct perf_event *event,
-				   struct perf_mmap_event *mmap_event)
+				   struct perf_mmap_event *mmap_event,
+				   int executable)
 {
 	if (event->state < PERF_EVENT_STATE_INACTIVE)
 		return 0;
@@ -3774,19 +3776,21 @@ static int perf_event_mmap_match(struct perf_event *event,
 	if (event->cpu != -1 && event->cpu != smp_processor_id())
 		return 0;
 
-	if (event->attr.mmap)
+	if ((!executable && event->attr.mmap_data) ||
+	    (executable && event->attr.mmap))
 		return 1;
 
 	return 0;
 }
 
 static void perf_event_mmap_ctx(struct perf_event_context *ctx,
-				  struct perf_mmap_event *mmap_event)
+				  struct perf_mmap_event *mmap_event,
+				  int executable)
 {
 	struct perf_event *event;
 
 	list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
-		if (perf_event_mmap_match(event, mmap_event))
+		if (perf_event_mmap_match(event, mmap_event, executable))
 			perf_event_mmap_output(event, mmap_event);
 	}
 }
@@ -3830,6 +3834,14 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
 		if (!vma->vm_mm) {
 			name = strncpy(tmp, "[vdso]", sizeof(tmp));
 			goto got_name;
+		} else if (vma->vm_start <= vma->vm_mm->start_brk &&
+				vma->vm_end >= vma->vm_mm->brk) {
+			name = strncpy(tmp, "[heap]", sizeof(tmp));
+			goto got_name;
+		} else if (vma->vm_start <= vma->vm_mm->start_stack &&
+				vma->vm_end >= vma->vm_mm->start_stack) {
+			name = strncpy(tmp, "[stack]", sizeof(tmp));
+			goto got_name;
 		}
 
 		name = strncpy(tmp, "//anon", sizeof(tmp));
@@ -3846,17 +3858,17 @@ got_name:
 
 	rcu_read_lock();
 	cpuctx = &get_cpu_var(perf_cpu_context);
-	perf_event_mmap_ctx(&cpuctx->ctx, mmap_event);
+	perf_event_mmap_ctx(&cpuctx->ctx, mmap_event, vma->vm_flags & VM_EXEC);
 	ctx = rcu_dereference(current->perf_event_ctxp);
 	if (ctx)
-		perf_event_mmap_ctx(ctx, mmap_event);
+		perf_event_mmap_ctx(ctx, mmap_event, vma->vm_flags & VM_EXEC);
 	put_cpu_var(perf_cpu_context);
 	rcu_read_unlock();
 
 	kfree(buf);
 }
 
-void __perf_event_mmap(struct vm_area_struct *vma)
+void perf_event_mmap(struct vm_area_struct *vma)
 {
 	struct perf_mmap_event mmap_event;
 
@@ -4911,7 +4923,7 @@ done:
 
 	if (!event->parent) {
 		atomic_inc(&nr_events);
-		if (event->attr.mmap)
+		if (event->attr.mmap || event->attr.mmap_data)
 			atomic_inc(&nr_mmap_events);
 		if (event->attr.comm)
 			atomic_inc(&nr_comm_events);
diff --git a/mm/mmap.c b/mm/mmap.c
index 456ec6f2788..e38e910cb75 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1734,8 +1734,10 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
 		grow = (address - vma->vm_end) >> PAGE_SHIFT;
 
 		error = acct_stack_growth(vma, size, grow);
-		if (!error)
+		if (!error) {
 			vma->vm_end = address;
+			perf_event_mmap(vma);
+		}
 	}
 	anon_vma_unlock(vma);
 	return error;
@@ -1781,6 +1783,7 @@ static int expand_downwards(struct vm_area_struct *vma,
 		if (!error) {
 			vma->vm_start = address;
 			vma->vm_pgoff -= grow;
+			perf_event_mmap(vma);
 		}
 	}
 	anon_vma_unlock(vma);
@@ -2208,6 +2211,7 @@ unsigned long do_brk(unsigned long addr, unsigned long len)
 	vma->vm_page_prot = vm_get_page_prot(flags);
 	vma_link(mm, vma, prev, rb_link, rb_parent);
 out:
+	perf_event_mmap(vma);
 	mm->total_vm += len >> PAGE_SHIFT;
 	if (flags & VM_LOCKED) {
 		if (!mlock_vma_pages_range(vma, addr, addr + len))
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 5e5c6403a31..39c7247bc54 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -268,8 +268,10 @@ static void create_counter(int counter, int cpu)
 	if (inherit_stat)
 		attr->inherit_stat = 1;
 
-	if (sample_address)
+	if (sample_address) {
 		attr->sample_type	|= PERF_SAMPLE_ADDR;
+		attr->mmap_data = track;
+	}
 
 	if (call_graph)
 		attr->sample_type	|= PERF_SAMPLE_CALLCHAIN;
-- 
cgit v1.2.3-70-g09d2


From 8d2cacbbb8deadfae78aa16e4e1ee619bdd7019e Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 25 May 2010 17:49:05 +0200
Subject: perf: Cleanup {start,commit,cancel}_txn details

Clarify some of the transactional group scheduling API details
and change it so that a successfull ->commit_txn also closes
the transaction.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Steven Rostedt <rostedt@goodmis.org>
LKML-Reference: <1274803086.5882.1752.camel@twins>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/powerpc/kernel/perf_event.c |  7 ++++---
 arch/sparc/kernel/perf_event.c   |  7 ++++---
 arch/x86/kernel/cpu/perf_event.c | 14 +++++---------
 include/linux/perf_event.h       | 27 ++++++++++++++++++++++-----
 kernel/perf_event.c              |  9 +--------
 5 files changed, 36 insertions(+), 28 deletions(-)

(limited to 'include')

diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c
index 43b83c35cf5..ac2a8c2554d 100644
--- a/arch/powerpc/kernel/perf_event.c
+++ b/arch/powerpc/kernel/perf_event.c
@@ -754,7 +754,7 @@ static int power_pmu_enable(struct perf_event *event)
 	 * skip the schedulability test here, it will be peformed
 	 * at commit time(->commit_txn) as a whole
 	 */
-	if (cpuhw->group_flag & PERF_EVENT_TXN_STARTED)
+	if (cpuhw->group_flag & PERF_EVENT_TXN)
 		goto nocheck;
 
 	if (check_excludes(cpuhw->event, cpuhw->flags, n0, 1))
@@ -858,7 +858,7 @@ void power_pmu_start_txn(const struct pmu *pmu)
 {
 	struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
 
-	cpuhw->group_flag |= PERF_EVENT_TXN_STARTED;
+	cpuhw->group_flag |= PERF_EVENT_TXN;
 	cpuhw->n_txn_start = cpuhw->n_events;
 }
 
@@ -871,7 +871,7 @@ void power_pmu_cancel_txn(const struct pmu *pmu)
 {
 	struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
 
-	cpuhw->group_flag &= ~PERF_EVENT_TXN_STARTED;
+	cpuhw->group_flag &= ~PERF_EVENT_TXN;
 }
 
 /*
@@ -897,6 +897,7 @@ int power_pmu_commit_txn(const struct pmu *pmu)
 	for (i = cpuhw->n_txn_start; i < n; ++i)
 		cpuhw->event[i]->hw.config = cpuhw->events[i];
 
+	cpuhw->group_flag &= ~PERF_EVENT_TXN;
 	return 0;
 }
 
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c
index 0ec92c8861d..beeb92fa3ac 100644
--- a/arch/sparc/kernel/perf_event.c
+++ b/arch/sparc/kernel/perf_event.c
@@ -1005,7 +1005,7 @@ static int sparc_pmu_enable(struct perf_event *event)
 	 * skip the schedulability test here, it will be peformed
 	 * at commit time(->commit_txn) as a whole
 	 */
-	if (cpuc->group_flag & PERF_EVENT_TXN_STARTED)
+	if (cpuc->group_flag & PERF_EVENT_TXN)
 		goto nocheck;
 
 	if (check_excludes(cpuc->event, n0, 1))
@@ -1102,7 +1102,7 @@ static void sparc_pmu_start_txn(const struct pmu *pmu)
 {
 	struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
 
-	cpuhw->group_flag |= PERF_EVENT_TXN_STARTED;
+	cpuhw->group_flag |= PERF_EVENT_TXN;
 }
 
 /*
@@ -1114,7 +1114,7 @@ static void sparc_pmu_cancel_txn(const struct pmu *pmu)
 {
 	struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
 
-	cpuhw->group_flag &= ~PERF_EVENT_TXN_STARTED;
+	cpuhw->group_flag &= ~PERF_EVENT_TXN;
 }
 
 /*
@@ -1137,6 +1137,7 @@ static int sparc_pmu_commit_txn(const struct pmu *pmu)
 	if (sparc_check_constraints(cpuc->event, cpuc->events, n))
 		return -EAGAIN;
 
+	cpuc->group_flag &= ~PERF_EVENT_TXN;
 	return 0;
 }
 
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 5db5b7d65a1..af04c6fa59c 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -969,7 +969,7 @@ static int x86_pmu_enable(struct perf_event *event)
 	 * skip the schedulability test here, it will be peformed
 	 * at commit time(->commit_txn) as a whole
 	 */
-	if (cpuc->group_flag & PERF_EVENT_TXN_STARTED)
+	if (cpuc->group_flag & PERF_EVENT_TXN)
 		goto out;
 
 	ret = x86_pmu.schedule_events(cpuc, n, assign);
@@ -1096,7 +1096,7 @@ static void x86_pmu_disable(struct perf_event *event)
 	 * The events never got scheduled and ->cancel_txn will truncate
 	 * the event_list.
 	 */
-	if (cpuc->group_flag & PERF_EVENT_TXN_STARTED)
+	if (cpuc->group_flag & PERF_EVENT_TXN)
 		return;
 
 	x86_pmu_stop(event);
@@ -1388,7 +1388,7 @@ static void x86_pmu_start_txn(const struct pmu *pmu)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 
-	cpuc->group_flag |= PERF_EVENT_TXN_STARTED;
+	cpuc->group_flag |= PERF_EVENT_TXN;
 	cpuc->n_txn = 0;
 }
 
@@ -1401,7 +1401,7 @@ static void x86_pmu_cancel_txn(const struct pmu *pmu)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 
-	cpuc->group_flag &= ~PERF_EVENT_TXN_STARTED;
+	cpuc->group_flag &= ~PERF_EVENT_TXN;
 	/*
 	 * Truncate the collected events.
 	 */
@@ -1435,11 +1435,7 @@ static int x86_pmu_commit_txn(const struct pmu *pmu)
 	 */
 	memcpy(cpuc->assign, assign, n*sizeof(int));
 
-	/*
-	 * Clear out the txn count so that ->cancel_txn() which gets
-	 * run after ->commit_txn() doesn't undo things.
-	 */
-	cpuc->n_txn = 0;
+	cpuc->group_flag &= ~PERF_EVENT_TXN;
 
 	return 0;
 }
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 36efad90cd4..f1b6ba0770e 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -549,7 +549,10 @@ struct hw_perf_event {
 
 struct perf_event;
 
-#define PERF_EVENT_TXN_STARTED 1
+/*
+ * Common implementation detail of pmu::{start,commit,cancel}_txn
+ */
+#define PERF_EVENT_TXN 0x1
 
 /**
  * struct pmu - generic performance monitoring unit
@@ -563,14 +566,28 @@ struct pmu {
 	void (*unthrottle)		(struct perf_event *event);
 
 	/*
-	 * group events scheduling is treated as a transaction,
-	 * add group events as a whole and perform one schedulability test.
-	 * If test fails, roll back the whole group
+	 * Group events scheduling is treated as a transaction, add group
+	 * events as a whole and perform one schedulability test. If the test
+	 * fails, roll back the whole group
 	 */
 
+	/*
+	 * Start the transaction, after this ->enable() doesn't need
+	 * to do schedulability tests.
+	 */
 	void (*start_txn)	(const struct pmu *pmu);
-	void (*cancel_txn)	(const struct pmu *pmu);
+	/*
+	 * If ->start_txn() disabled the ->enable() schedulability test
+	 * then ->commit_txn() is required to perform one. On success
+	 * the transaction is closed. On error the transaction is kept
+	 * open until ->cancel_txn() is called.
+	 */
 	int  (*commit_txn)	(const struct pmu *pmu);
+	/*
+	 * Will cancel the transaction, assumes ->disable() is called for
+	 * each successfull ->enable() during the transaction.
+	 */
+	void (*cancel_txn)	(const struct pmu *pmu);
 };
 
 /**
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 227ed9c8ec3..6f60920772b 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -675,7 +675,6 @@ group_sched_in(struct perf_event *group_event,
 	struct perf_event *event, *partial_group = NULL;
 	const struct pmu *pmu = group_event->pmu;
 	bool txn = false;
-	int ret;
 
 	if (group_event->state == PERF_EVENT_STATE_OFF)
 		return 0;
@@ -703,15 +702,9 @@ group_sched_in(struct perf_event *group_event,
 		}
 	}
 
-	if (!txn)
+	if (!txn || !pmu->commit_txn(pmu))
 		return 0;
 
-	ret = pmu->commit_txn(pmu);
-	if (!ret) {
-		pmu->cancel_txn(pmu);
-		return 0;
-	}
-
 group_error:
 	/*
 	 * Groups can be scheduled in as one unit only, so undo any
-- 
cgit v1.2.3-70-g09d2


From ca5135e6b4a3cbc7e187737520fbc4b508f6f7a2 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 28 May 2010 19:33:23 +0200
Subject: perf: Rename perf_mmap_data to perf_buffer

Rename to clarify code.

s/perf_mmap_data/perf_buffer/g and selective s/data/buffer/g

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Steven Rostedt <rostedt@goodmis.org>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_event.h |   6 +-
 kernel/perf_event.c        | 308 ++++++++++++++++++++++-----------------------
 2 files changed, 157 insertions(+), 157 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index f1b6ba0770e..2a0da021c23 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -602,7 +602,7 @@ enum perf_event_active_state {
 
 struct file;
 
-struct perf_mmap_data {
+struct perf_buffer {
 	atomic_t			refcount;
 	struct rcu_head			rcu_head;
 #ifdef CONFIG_PERF_USE_VMALLOC
@@ -727,7 +727,7 @@ struct perf_event {
 	atomic_t			mmap_count;
 	int				mmap_locked;
 	struct user_struct		*mmap_user;
-	struct perf_mmap_data		*data;
+	struct perf_buffer		*buffer;
 
 	/* poll related */
 	wait_queue_head_t		waitq;
@@ -825,7 +825,7 @@ struct perf_cpu_context {
 
 struct perf_output_handle {
 	struct perf_event		*event;
-	struct perf_mmap_data		*data;
+	struct perf_buffer		*buffer;
 	unsigned long			wakeup;
 	unsigned long			size;
 	void				*addr;
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 6f60920772b..93d545801e4 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -1876,7 +1876,7 @@ static void free_event_rcu(struct rcu_head *head)
 }
 
 static void perf_pending_sync(struct perf_event *event);
-static void perf_mmap_data_put(struct perf_mmap_data *data);
+static void perf_buffer_put(struct perf_buffer *buffer);
 
 static void free_event(struct perf_event *event)
 {
@@ -1892,9 +1892,9 @@ static void free_event(struct perf_event *event)
 			atomic_dec(&nr_task_events);
 	}
 
-	if (event->data) {
-		perf_mmap_data_put(event->data);
-		event->data = NULL;
+	if (event->buffer) {
+		perf_buffer_put(event->buffer);
+		event->buffer = NULL;
 	}
 
 	if (event->destroy)
@@ -2119,13 +2119,13 @@ perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
 static unsigned int perf_poll(struct file *file, poll_table *wait)
 {
 	struct perf_event *event = file->private_data;
-	struct perf_mmap_data *data;
+	struct perf_buffer *buffer;
 	unsigned int events = POLL_HUP;
 
 	rcu_read_lock();
-	data = rcu_dereference(event->data);
-	if (data)
-		events = atomic_xchg(&data->poll, 0);
+	buffer = rcu_dereference(event->buffer);
+	if (buffer)
+		events = atomic_xchg(&buffer->poll, 0);
 	rcu_read_unlock();
 
 	poll_wait(file, &event->waitq, wait);
@@ -2335,14 +2335,14 @@ static int perf_event_index(struct perf_event *event)
 void perf_event_update_userpage(struct perf_event *event)
 {
 	struct perf_event_mmap_page *userpg;
-	struct perf_mmap_data *data;
+	struct perf_buffer *buffer;
 
 	rcu_read_lock();
-	data = rcu_dereference(event->data);
-	if (!data)
+	buffer = rcu_dereference(event->buffer);
+	if (!buffer)
 		goto unlock;
 
-	userpg = data->user_page;
+	userpg = buffer->user_page;
 
 	/*
 	 * Disable preemption so as to not let the corresponding user-space
@@ -2376,15 +2376,15 @@ unlock:
  */
 
 static struct page *
-perf_mmap_to_page(struct perf_mmap_data *data, unsigned long pgoff)
+perf_mmap_to_page(struct perf_buffer *buffer, unsigned long pgoff)
 {
-	if (pgoff > data->nr_pages)
+	if (pgoff > buffer->nr_pages)
 		return NULL;
 
 	if (pgoff == 0)
-		return virt_to_page(data->user_page);
+		return virt_to_page(buffer->user_page);
 
-	return virt_to_page(data->data_pages[pgoff - 1]);
+	return virt_to_page(buffer->data_pages[pgoff - 1]);
 }
 
 static void *perf_mmap_alloc_page(int cpu)
@@ -2400,42 +2400,42 @@ static void *perf_mmap_alloc_page(int cpu)
 	return page_address(page);
 }
 
-static struct perf_mmap_data *
-perf_mmap_data_alloc(struct perf_event *event, int nr_pages)
+static struct perf_buffer *
+perf_buffer_alloc(struct perf_event *event, int nr_pages)
 {
-	struct perf_mmap_data *data;
+	struct perf_buffer *buffer;
 	unsigned long size;
 	int i;
 
-	size = sizeof(struct perf_mmap_data);
+	size = sizeof(struct perf_buffer);
 	size += nr_pages * sizeof(void *);
 
-	data = kzalloc(size, GFP_KERNEL);
-	if (!data)
+	buffer = kzalloc(size, GFP_KERNEL);
+	if (!buffer)
 		goto fail;
 
-	data->user_page = perf_mmap_alloc_page(event->cpu);
-	if (!data->user_page)
+	buffer->user_page = perf_mmap_alloc_page(event->cpu);
+	if (!buffer->user_page)
 		goto fail_user_page;
 
 	for (i = 0; i < nr_pages; i++) {
-		data->data_pages[i] = perf_mmap_alloc_page(event->cpu);
-		if (!data->data_pages[i])
+		buffer->data_pages[i] = perf_mmap_alloc_page(event->cpu);
+		if (!buffer->data_pages[i])
 			goto fail_data_pages;
 	}
 
-	data->nr_pages = nr_pages;
+	buffer->nr_pages = nr_pages;
 
-	return data;
+	return buffer;
 
 fail_data_pages:
 	for (i--; i >= 0; i--)
-		free_page((unsigned long)data->data_pages[i]);
+		free_page((unsigned long)buffer->data_pages[i]);
 
-	free_page((unsigned long)data->user_page);
+	free_page((unsigned long)buffer->user_page);
 
 fail_user_page:
-	kfree(data);
+	kfree(buffer);
 
 fail:
 	return NULL;
@@ -2449,17 +2449,17 @@ static void perf_mmap_free_page(unsigned long addr)
 	__free_page(page);
 }
 
-static void perf_mmap_data_free(struct perf_mmap_data *data)
+static void perf_buffer_free(struct perf_buffer *buffer)
 {
 	int i;
 
-	perf_mmap_free_page((unsigned long)data->user_page);
-	for (i = 0; i < data->nr_pages; i++)
-		perf_mmap_free_page((unsigned long)data->data_pages[i]);
-	kfree(data);
+	perf_mmap_free_page((unsigned long)buffer->user_page);
+	for (i = 0; i < buffer->nr_pages; i++)
+		perf_mmap_free_page((unsigned long)buffer->data_pages[i]);
+	kfree(buffer);
 }
 
-static inline int page_order(struct perf_mmap_data *data)
+static inline int page_order(struct perf_buffer *buffer)
 {
 	return 0;
 }
@@ -2472,18 +2472,18 @@ static inline int page_order(struct perf_mmap_data *data)
  * Required for architectures that have d-cache aliasing issues.
  */
 
-static inline int page_order(struct perf_mmap_data *data)
+static inline int page_order(struct perf_buffer *buffer)
 {
-	return data->page_order;
+	return buffer->page_order;
 }
 
 static struct page *
-perf_mmap_to_page(struct perf_mmap_data *data, unsigned long pgoff)
+perf_mmap_to_page(struct perf_buffer *buffer, unsigned long pgoff)
 {
-	if (pgoff > (1UL << page_order(data)))
+	if (pgoff > (1UL << page_order(buffer)))
 		return NULL;
 
-	return vmalloc_to_page((void *)data->user_page + pgoff * PAGE_SIZE);
+	return vmalloc_to_page((void *)buffer->user_page + pgoff * PAGE_SIZE);
 }
 
 static void perf_mmap_unmark_page(void *addr)
@@ -2493,57 +2493,57 @@ static void perf_mmap_unmark_page(void *addr)
 	page->mapping = NULL;
 }
 
-static void perf_mmap_data_free_work(struct work_struct *work)
+static void perf_buffer_free_work(struct work_struct *work)
 {
-	struct perf_mmap_data *data;
+	struct perf_buffer *buffer;
 	void *base;
 	int i, nr;
 
-	data = container_of(work, struct perf_mmap_data, work);
-	nr = 1 << page_order(data);
+	buffer = container_of(work, struct perf_buffer, work);
+	nr = 1 << page_order(buffer);
 
-	base = data->user_page;
+	base = buffer->user_page;
 	for (i = 0; i < nr + 1; i++)
 		perf_mmap_unmark_page(base + (i * PAGE_SIZE));
 
 	vfree(base);
-	kfree(data);
+	kfree(buffer);
 }
 
-static void perf_mmap_data_free(struct perf_mmap_data *data)
+static void perf_buffer_free(struct perf_buffer *buffer)
 {
-	schedule_work(&data->work);
+	schedule_work(&buffer->work);
 }
 
-static struct perf_mmap_data *
-perf_mmap_data_alloc(struct perf_event *event, int nr_pages)
+static struct perf_buffer *
+perf_buffer_alloc(struct perf_event *event, int nr_pages)
 {
-	struct perf_mmap_data *data;
+	struct perf_buffer *buffer;
 	unsigned long size;
 	void *all_buf;
 
-	size = sizeof(struct perf_mmap_data);
+	size = sizeof(struct perf_buffer);
 	size += sizeof(void *);
 
-	data = kzalloc(size, GFP_KERNEL);
-	if (!data)
+	buffer = kzalloc(size, GFP_KERNEL);
+	if (!buffer)
 		goto fail;
 
-	INIT_WORK(&data->work, perf_mmap_data_free_work);
+	INIT_WORK(&buffer->work, perf_buffer_free_work);
 
 	all_buf = vmalloc_user((nr_pages + 1) * PAGE_SIZE);
 	if (!all_buf)
 		goto fail_all_buf;
 
-	data->user_page = all_buf;
-	data->data_pages[0] = all_buf + PAGE_SIZE;
-	data->page_order = ilog2(nr_pages);
-	data->nr_pages = 1;
+	buffer->user_page = all_buf;
+	buffer->data_pages[0] = all_buf + PAGE_SIZE;
+	buffer->page_order = ilog2(nr_pages);
+	buffer->nr_pages = 1;
 
-	return data;
+	return buffer;
 
 fail_all_buf:
-	kfree(data);
+	kfree(buffer);
 
 fail:
 	return NULL;
@@ -2551,15 +2551,15 @@ fail:
 
 #endif
 
-static unsigned long perf_data_size(struct perf_mmap_data *data)
+static unsigned long perf_data_size(struct perf_buffer *buffer)
 {
-	return data->nr_pages << (PAGE_SHIFT + page_order(data));
+	return buffer->nr_pages << (PAGE_SHIFT + page_order(buffer));
 }
 
 static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
 	struct perf_event *event = vma->vm_file->private_data;
-	struct perf_mmap_data *data;
+	struct perf_buffer *buffer;
 	int ret = VM_FAULT_SIGBUS;
 
 	if (vmf->flags & FAULT_FLAG_MKWRITE) {
@@ -2569,14 +2569,14 @@ static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	}
 
 	rcu_read_lock();
-	data = rcu_dereference(event->data);
-	if (!data)
+	buffer = rcu_dereference(event->buffer);
+	if (!buffer)
 		goto unlock;
 
 	if (vmf->pgoff && (vmf->flags & FAULT_FLAG_WRITE))
 		goto unlock;
 
-	vmf->page = perf_mmap_to_page(data, vmf->pgoff);
+	vmf->page = perf_mmap_to_page(buffer, vmf->pgoff);
 	if (!vmf->page)
 		goto unlock;
 
@@ -2592,51 +2592,51 @@ unlock:
 }
 
 static void
-perf_mmap_data_init(struct perf_event *event, struct perf_mmap_data *data)
+perf_buffer_init(struct perf_event *event, struct perf_buffer *buffer)
 {
-	long max_size = perf_data_size(data);
+	long max_size = perf_data_size(buffer);
 
 	if (event->attr.watermark) {
-		data->watermark = min_t(long, max_size,
+		buffer->watermark = min_t(long, max_size,
 					event->attr.wakeup_watermark);
 	}
 
-	if (!data->watermark)
-		data->watermark = max_size / 2;
+	if (!buffer->watermark)
+		buffer->watermark = max_size / 2;
 
-	atomic_set(&data->refcount, 1);
-	rcu_assign_pointer(event->data, data);
+	atomic_set(&buffer->refcount, 1);
+	rcu_assign_pointer(event->buffer, buffer);
 }
 
-static void perf_mmap_data_free_rcu(struct rcu_head *rcu_head)
+static void perf_buffer_free_rcu(struct rcu_head *rcu_head)
 {
-	struct perf_mmap_data *data;
+	struct perf_buffer *buffer;
 
-	data = container_of(rcu_head, struct perf_mmap_data, rcu_head);
-	perf_mmap_data_free(data);
+	buffer = container_of(rcu_head, struct perf_buffer, rcu_head);
+	perf_buffer_free(buffer);
 }
 
-static struct perf_mmap_data *perf_mmap_data_get(struct perf_event *event)
+static struct perf_buffer *perf_buffer_get(struct perf_event *event)
 {
-	struct perf_mmap_data *data;
+	struct perf_buffer *buffer;
 
 	rcu_read_lock();
-	data = rcu_dereference(event->data);
-	if (data) {
-		if (!atomic_inc_not_zero(&data->refcount))
-			data = NULL;
+	buffer = rcu_dereference(event->buffer);
+	if (buffer) {
+		if (!atomic_inc_not_zero(&buffer->refcount))
+			buffer = NULL;
 	}
 	rcu_read_unlock();
 
-	return data;
+	return buffer;
 }
 
-static void perf_mmap_data_put(struct perf_mmap_data *data)
+static void perf_buffer_put(struct perf_buffer *buffer)
 {
-	if (!atomic_dec_and_test(&data->refcount))
+	if (!atomic_dec_and_test(&buffer->refcount))
 		return;
 
-	call_rcu(&data->rcu_head, perf_mmap_data_free_rcu);
+	call_rcu(&buffer->rcu_head, perf_buffer_free_rcu);
 }
 
 static void perf_mmap_open(struct vm_area_struct *vma)
@@ -2651,16 +2651,16 @@ static void perf_mmap_close(struct vm_area_struct *vma)
 	struct perf_event *event = vma->vm_file->private_data;
 
 	if (atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) {
-		unsigned long size = perf_data_size(event->data);
+		unsigned long size = perf_data_size(event->buffer);
 		struct user_struct *user = event->mmap_user;
-		struct perf_mmap_data *data = event->data;
+		struct perf_buffer *buffer = event->buffer;
 
 		atomic_long_sub((size >> PAGE_SHIFT) + 1, &user->locked_vm);
 		vma->vm_mm->locked_vm -= event->mmap_locked;
-		rcu_assign_pointer(event->data, NULL);
+		rcu_assign_pointer(event->buffer, NULL);
 		mutex_unlock(&event->mmap_mutex);
 
-		perf_mmap_data_put(data);
+		perf_buffer_put(buffer);
 		free_uid(user);
 	}
 }
@@ -2678,7 +2678,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
 	unsigned long user_locked, user_lock_limit;
 	struct user_struct *user = current_user();
 	unsigned long locked, lock_limit;
-	struct perf_mmap_data *data;
+	struct perf_buffer *buffer;
 	unsigned long vma_size;
 	unsigned long nr_pages;
 	long user_extra, extra;
@@ -2699,7 +2699,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
 	nr_pages = (vma_size / PAGE_SIZE) - 1;
 
 	/*
-	 * If we have data pages ensure they're a power-of-two number, so we
+	 * If we have buffer pages ensure they're a power-of-two number, so we
 	 * can do bitmasks instead of modulo.
 	 */
 	if (nr_pages != 0 && !is_power_of_2(nr_pages))
@@ -2713,9 +2713,9 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
 
 	WARN_ON_ONCE(event->ctx->parent_ctx);
 	mutex_lock(&event->mmap_mutex);
-	if (event->data) {
-		if (event->data->nr_pages == nr_pages)
-			atomic_inc(&event->data->refcount);
+	if (event->buffer) {
+		if (event->buffer->nr_pages == nr_pages)
+			atomic_inc(&event->buffer->refcount);
 		else
 			ret = -EINVAL;
 		goto unlock;
@@ -2745,17 +2745,17 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
 		goto unlock;
 	}
 
-	WARN_ON(event->data);
+	WARN_ON(event->buffer);
 
-	data = perf_mmap_data_alloc(event, nr_pages);
-	if (!data) {
+	buffer = perf_buffer_alloc(event, nr_pages);
+	if (!buffer) {
 		ret = -ENOMEM;
 		goto unlock;
 	}
 
-	perf_mmap_data_init(event, data);
+	perf_buffer_init(event, buffer);
 	if (vma->vm_flags & VM_WRITE)
-		event->data->writable = 1;
+		event->buffer->writable = 1;
 
 	atomic_long_add(user_extra, &user->locked_vm);
 	event->mmap_locked = extra;
@@ -2964,15 +2964,15 @@ EXPORT_SYMBOL_GPL(perf_unregister_guest_info_callbacks);
 /*
  * Output
  */
-static bool perf_output_space(struct perf_mmap_data *data, unsigned long tail,
+static bool perf_output_space(struct perf_buffer *buffer, unsigned long tail,
 			      unsigned long offset, unsigned long head)
 {
 	unsigned long mask;
 
-	if (!data->writable)
+	if (!buffer->writable)
 		return true;
 
-	mask = perf_data_size(data) - 1;
+	mask = perf_data_size(buffer) - 1;
 
 	offset = (offset - tail) & mask;
 	head   = (head   - tail) & mask;
@@ -2985,7 +2985,7 @@ static bool perf_output_space(struct perf_mmap_data *data, unsigned long tail,
 
 static void perf_output_wakeup(struct perf_output_handle *handle)
 {
-	atomic_set(&handle->data->poll, POLL_IN);
+	atomic_set(&handle->buffer->poll, POLL_IN);
 
 	if (handle->nmi) {
 		handle->event->pending_wakeup = 1;
@@ -3005,45 +3005,45 @@ static void perf_output_wakeup(struct perf_output_handle *handle)
  */
 static void perf_output_get_handle(struct perf_output_handle *handle)
 {
-	struct perf_mmap_data *data = handle->data;
+	struct perf_buffer *buffer = handle->buffer;
 
 	preempt_disable();
-	local_inc(&data->nest);
-	handle->wakeup = local_read(&data->wakeup);
+	local_inc(&buffer->nest);
+	handle->wakeup = local_read(&buffer->wakeup);
 }
 
 static void perf_output_put_handle(struct perf_output_handle *handle)
 {
-	struct perf_mmap_data *data = handle->data;
+	struct perf_buffer *buffer = handle->buffer;
 	unsigned long head;
 
 again:
-	head = local_read(&data->head);
+	head = local_read(&buffer->head);
 
 	/*
 	 * IRQ/NMI can happen here, which means we can miss a head update.
 	 */
 
-	if (!local_dec_and_test(&data->nest))
+	if (!local_dec_and_test(&buffer->nest))
 		goto out;
 
 	/*
 	 * Publish the known good head. Rely on the full barrier implied
-	 * by atomic_dec_and_test() order the data->head read and this
+	 * by atomic_dec_and_test() order the buffer->head read and this
 	 * write.
 	 */
-	data->user_page->data_head = head;
+	buffer->user_page->data_head = head;
 
 	/*
 	 * Now check if we missed an update, rely on the (compiler)
-	 * barrier in atomic_dec_and_test() to re-read data->head.
+	 * barrier in atomic_dec_and_test() to re-read buffer->head.
 	 */
-	if (unlikely(head != local_read(&data->head))) {
-		local_inc(&data->nest);
+	if (unlikely(head != local_read(&buffer->head))) {
+		local_inc(&buffer->nest);
 		goto again;
 	}
 
-	if (handle->wakeup != local_read(&data->wakeup))
+	if (handle->wakeup != local_read(&buffer->wakeup))
 		perf_output_wakeup(handle);
 
  out:
@@ -3063,12 +3063,12 @@ __always_inline void perf_output_copy(struct perf_output_handle *handle,
 		buf += size;
 		handle->size -= size;
 		if (!handle->size) {
-			struct perf_mmap_data *data = handle->data;
+			struct perf_buffer *buffer = handle->buffer;
 
 			handle->page++;
-			handle->page &= data->nr_pages - 1;
-			handle->addr = data->data_pages[handle->page];
-			handle->size = PAGE_SIZE << page_order(data);
+			handle->page &= buffer->nr_pages - 1;
+			handle->addr = buffer->data_pages[handle->page];
+			handle->size = PAGE_SIZE << page_order(buffer);
 		}
 	} while (len);
 }
@@ -3077,7 +3077,7 @@ int perf_output_begin(struct perf_output_handle *handle,
 		      struct perf_event *event, unsigned int size,
 		      int nmi, int sample)
 {
-	struct perf_mmap_data *data;
+	struct perf_buffer *buffer;
 	unsigned long tail, offset, head;
 	int have_lost;
 	struct {
@@ -3093,19 +3093,19 @@ int perf_output_begin(struct perf_output_handle *handle,
 	if (event->parent)
 		event = event->parent;
 
-	data = rcu_dereference(event->data);
-	if (!data)
+	buffer = rcu_dereference(event->buffer);
+	if (!buffer)
 		goto out;
 
-	handle->data	= data;
+	handle->buffer	= buffer;
 	handle->event	= event;
 	handle->nmi	= nmi;
 	handle->sample	= sample;
 
-	if (!data->nr_pages)
+	if (!buffer->nr_pages)
 		goto out;
 
-	have_lost = local_read(&data->lost);
+	have_lost = local_read(&buffer->lost);
 	if (have_lost)
 		size += sizeof(lost_event);
 
@@ -3117,30 +3117,30 @@ int perf_output_begin(struct perf_output_handle *handle,
 		 * tail pointer. So that all reads will be completed before the
 		 * write is issued.
 		 */
-		tail = ACCESS_ONCE(data->user_page->data_tail);
+		tail = ACCESS_ONCE(buffer->user_page->data_tail);
 		smp_rmb();
-		offset = head = local_read(&data->head);
+		offset = head = local_read(&buffer->head);
 		head += size;
-		if (unlikely(!perf_output_space(data, tail, offset, head)))
+		if (unlikely(!perf_output_space(buffer, tail, offset, head)))
 			goto fail;
-	} while (local_cmpxchg(&data->head, offset, head) != offset);
+	} while (local_cmpxchg(&buffer->head, offset, head) != offset);
 
-	if (head - local_read(&data->wakeup) > data->watermark)
-		local_add(data->watermark, &data->wakeup);
+	if (head - local_read(&buffer->wakeup) > buffer->watermark)
+		local_add(buffer->watermark, &buffer->wakeup);
 
-	handle->page = offset >> (PAGE_SHIFT + page_order(data));
-	handle->page &= data->nr_pages - 1;
-	handle->size = offset & ((PAGE_SIZE << page_order(data)) - 1);
-	handle->addr = data->data_pages[handle->page];
+	handle->page = offset >> (PAGE_SHIFT + page_order(buffer));
+	handle->page &= buffer->nr_pages - 1;
+	handle->size = offset & ((PAGE_SIZE << page_order(buffer)) - 1);
+	handle->addr = buffer->data_pages[handle->page];
 	handle->addr += handle->size;
-	handle->size = (PAGE_SIZE << page_order(data)) - handle->size;
+	handle->size = (PAGE_SIZE << page_order(buffer)) - handle->size;
 
 	if (have_lost) {
 		lost_event.header.type = PERF_RECORD_LOST;
 		lost_event.header.misc = 0;
 		lost_event.header.size = sizeof(lost_event);
 		lost_event.id          = event->id;
-		lost_event.lost        = local_xchg(&data->lost, 0);
+		lost_event.lost        = local_xchg(&buffer->lost, 0);
 
 		perf_output_put(handle, lost_event);
 	}
@@ -3148,7 +3148,7 @@ int perf_output_begin(struct perf_output_handle *handle,
 	return 0;
 
 fail:
-	local_inc(&data->lost);
+	local_inc(&buffer->lost);
 	perf_output_put_handle(handle);
 out:
 	rcu_read_unlock();
@@ -3159,15 +3159,15 @@ out:
 void perf_output_end(struct perf_output_handle *handle)
 {
 	struct perf_event *event = handle->event;
-	struct perf_mmap_data *data = handle->data;
+	struct perf_buffer *buffer = handle->buffer;
 
 	int wakeup_events = event->attr.wakeup_events;
 
 	if (handle->sample && wakeup_events) {
-		int events = local_inc_return(&data->events);
+		int events = local_inc_return(&buffer->events);
 		if (events >= wakeup_events) {
-			local_sub(wakeup_events, &data->events);
-			local_inc(&data->wakeup);
+			local_sub(wakeup_events, &buffer->events);
+			local_inc(&buffer->wakeup);
 		}
 	}
 
@@ -5010,7 +5010,7 @@ err_size:
 static int
 perf_event_set_output(struct perf_event *event, struct perf_event *output_event)
 {
-	struct perf_mmap_data *data = NULL, *old_data = NULL;
+	struct perf_buffer *buffer = NULL, *old_buffer = NULL;
 	int ret = -EINVAL;
 
 	if (!output_event)
@@ -5040,19 +5040,19 @@ set:
 
 	if (output_event) {
 		/* get the buffer we want to redirect to */
-		data = perf_mmap_data_get(output_event);
-		if (!data)
+		buffer = perf_buffer_get(output_event);
+		if (!buffer)
 			goto unlock;
 	}
 
-	old_data = event->data;
-	rcu_assign_pointer(event->data, data);
+	old_buffer = event->buffer;
+	rcu_assign_pointer(event->buffer, buffer);
 	ret = 0;
 unlock:
 	mutex_unlock(&event->mmap_mutex);
 
-	if (old_data)
-		perf_mmap_data_put(old_data);
+	if (old_buffer)
+		perf_buffer_put(old_buffer);
 out:
 	return ret;
 }
-- 
cgit v1.2.3-70-g09d2


From d57e34fdd60be7ffd0b1d86bfa1a553df86b7172 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 28 May 2010 19:41:35 +0200
Subject: perf: Simplify the ring-buffer logic: make perf_buffer_alloc() do
 everything needed

Currently there are perf_buffer_alloc() + perf_buffer_init() + some
separate bits, fold it all into a single perf_buffer_alloc() and only
leave the attachment to the event separate.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_event.h |  2 ++
 kernel/perf_event.c        | 61 ++++++++++++++++++++++++++--------------------
 2 files changed, 36 insertions(+), 27 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 2a0da021c23..441992a9775 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -602,6 +602,8 @@ enum perf_event_active_state {
 
 struct file;
 
+#define PERF_BUFFER_WRITABLE		0x01
+
 struct perf_buffer {
 	atomic_t			refcount;
 	struct rcu_head			rcu_head;
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 93d545801e4..f75c9c9c817 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -2369,6 +2369,25 @@ unlock:
 	rcu_read_unlock();
 }
 
+static unsigned long perf_data_size(struct perf_buffer *buffer);
+
+static void
+perf_buffer_init(struct perf_buffer *buffer, long watermark, int flags)
+{
+	long max_size = perf_data_size(buffer);
+
+	if (watermark)
+		buffer->watermark = min(max_size, watermark);
+
+	if (!buffer->watermark)
+		buffer->watermark = max_size / 2;
+
+	if (flags & PERF_BUFFER_WRITABLE)
+		buffer->writable = 1;
+
+	atomic_set(&buffer->refcount, 1);
+}
+
 #ifndef CONFIG_PERF_USE_VMALLOC
 
 /*
@@ -2401,7 +2420,7 @@ static void *perf_mmap_alloc_page(int cpu)
 }
 
 static struct perf_buffer *
-perf_buffer_alloc(struct perf_event *event, int nr_pages)
+perf_buffer_alloc(int nr_pages, long watermark, int cpu, int flags)
 {
 	struct perf_buffer *buffer;
 	unsigned long size;
@@ -2414,18 +2433,20 @@ perf_buffer_alloc(struct perf_event *event, int nr_pages)
 	if (!buffer)
 		goto fail;
 
-	buffer->user_page = perf_mmap_alloc_page(event->cpu);
+	buffer->user_page = perf_mmap_alloc_page(cpu);
 	if (!buffer->user_page)
 		goto fail_user_page;
 
 	for (i = 0; i < nr_pages; i++) {
-		buffer->data_pages[i] = perf_mmap_alloc_page(event->cpu);
+		buffer->data_pages[i] = perf_mmap_alloc_page(cpu);
 		if (!buffer->data_pages[i])
 			goto fail_data_pages;
 	}
 
 	buffer->nr_pages = nr_pages;
 
+	perf_buffer_init(buffer, watermark, flags);
+
 	return buffer;
 
 fail_data_pages:
@@ -2516,7 +2537,7 @@ static void perf_buffer_free(struct perf_buffer *buffer)
 }
 
 static struct perf_buffer *
-perf_buffer_alloc(struct perf_event *event, int nr_pages)
+perf_buffer_alloc(int nr_pages, long watermark, int cpu, int flags)
 {
 	struct perf_buffer *buffer;
 	unsigned long size;
@@ -2540,6 +2561,8 @@ perf_buffer_alloc(struct perf_event *event, int nr_pages)
 	buffer->page_order = ilog2(nr_pages);
 	buffer->nr_pages = 1;
 
+	perf_buffer_init(buffer, watermark, flags);
+
 	return buffer;
 
 fail_all_buf:
@@ -2591,23 +2614,6 @@ unlock:
 	return ret;
 }
 
-static void
-perf_buffer_init(struct perf_event *event, struct perf_buffer *buffer)
-{
-	long max_size = perf_data_size(buffer);
-
-	if (event->attr.watermark) {
-		buffer->watermark = min_t(long, max_size,
-					event->attr.wakeup_watermark);
-	}
-
-	if (!buffer->watermark)
-		buffer->watermark = max_size / 2;
-
-	atomic_set(&buffer->refcount, 1);
-	rcu_assign_pointer(event->buffer, buffer);
-}
-
 static void perf_buffer_free_rcu(struct rcu_head *rcu_head)
 {
 	struct perf_buffer *buffer;
@@ -2682,7 +2688,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
 	unsigned long vma_size;
 	unsigned long nr_pages;
 	long user_extra, extra;
-	int ret = 0;
+	int ret = 0, flags = 0;
 
 	/*
 	 * Don't allow mmap() of inherited per-task counters. This would
@@ -2747,15 +2753,16 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
 
 	WARN_ON(event->buffer);
 
-	buffer = perf_buffer_alloc(event, nr_pages);
+	if (vma->vm_flags & VM_WRITE)
+		flags |= PERF_BUFFER_WRITABLE;
+
+	buffer = perf_buffer_alloc(nr_pages, event->attr.wakeup_watermark,
+				   event->cpu, flags);
 	if (!buffer) {
 		ret = -ENOMEM;
 		goto unlock;
 	}
-
-	perf_buffer_init(event, buffer);
-	if (vma->vm_flags & VM_WRITE)
-		event->buffer->writable = 1;
+	rcu_assign_pointer(event->buffer, buffer);
 
 	atomic_long_add(user_extra, &user->locked_vm);
 	event->mmap_locked = extra;
-- 
cgit v1.2.3-70-g09d2


From 1996bda2a42480c275656233e631ee0966574be4 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 21 May 2010 14:05:13 +0200
Subject: arch: Implement local64_t

On 64bit, local_t is of size long, and thus we make local64_t an alias.
On 32bit, we fall back to atomic64_t. (architecture can provide optimized
32-bit version)

(This new facility is to be used by perf events optimizations.)

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: linux-arch@vger.kernel.org
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/alpha/include/asm/local64.h      |  1 +
 arch/arm/include/asm/local64.h        |  1 +
 arch/avr32/include/asm/local64.h      |  1 +
 arch/blackfin/include/asm/local64.h   |  1 +
 arch/cris/include/asm/local64.h       |  1 +
 arch/frv/include/asm/local64.h        |  1 +
 arch/frv/kernel/local64.h             |  1 +
 arch/h8300/include/asm/local64.h      |  1 +
 arch/ia64/include/asm/local64.h       |  1 +
 arch/m32r/include/asm/local64.h       |  1 +
 arch/m68k/include/asm/local64.h       |  1 +
 arch/microblaze/include/asm/local64.h |  1 +
 arch/mips/include/asm/local64.h       |  1 +
 arch/mn10300/include/asm/local64.h    |  1 +
 arch/parisc/include/asm/local64.h     |  1 +
 arch/powerpc/include/asm/local64.h    |  1 +
 arch/s390/include/asm/local64.h       |  1 +
 arch/score/include/asm/local64.h      |  1 +
 arch/sh/include/asm/local64.h         |  1 +
 arch/sparc/include/asm/local64.h      |  1 +
 arch/x86/include/asm/local64.h        |  1 +
 arch/xtensa/include/asm/local64.h     |  1 +
 include/asm-generic/local64.h         | 96 +++++++++++++++++++++++++++++++++++
 23 files changed, 118 insertions(+)
 create mode 100644 arch/alpha/include/asm/local64.h
 create mode 100644 arch/arm/include/asm/local64.h
 create mode 100644 arch/avr32/include/asm/local64.h
 create mode 100644 arch/blackfin/include/asm/local64.h
 create mode 100644 arch/cris/include/asm/local64.h
 create mode 100644 arch/frv/include/asm/local64.h
 create mode 100644 arch/frv/kernel/local64.h
 create mode 100644 arch/h8300/include/asm/local64.h
 create mode 100644 arch/ia64/include/asm/local64.h
 create mode 100644 arch/m32r/include/asm/local64.h
 create mode 100644 arch/m68k/include/asm/local64.h
 create mode 100644 arch/microblaze/include/asm/local64.h
 create mode 100644 arch/mips/include/asm/local64.h
 create mode 100644 arch/mn10300/include/asm/local64.h
 create mode 100644 arch/parisc/include/asm/local64.h
 create mode 100644 arch/powerpc/include/asm/local64.h
 create mode 100644 arch/s390/include/asm/local64.h
 create mode 100644 arch/score/include/asm/local64.h
 create mode 100644 arch/sh/include/asm/local64.h
 create mode 100644 arch/sparc/include/asm/local64.h
 create mode 100644 arch/x86/include/asm/local64.h
 create mode 100644 arch/xtensa/include/asm/local64.h
 create mode 100644 include/asm-generic/local64.h

(limited to 'include')

diff --git a/arch/alpha/include/asm/local64.h b/arch/alpha/include/asm/local64.h
new file mode 100644
index 00000000000..36c93b5cc23
--- /dev/null
+++ b/arch/alpha/include/asm/local64.h
@@ -0,0 +1 @@
+#include <asm-generic/local64.h>
diff --git a/arch/arm/include/asm/local64.h b/arch/arm/include/asm/local64.h
new file mode 100644
index 00000000000..36c93b5cc23
--- /dev/null
+++ b/arch/arm/include/asm/local64.h
@@ -0,0 +1 @@
+#include <asm-generic/local64.h>
diff --git a/arch/avr32/include/asm/local64.h b/arch/avr32/include/asm/local64.h
new file mode 100644
index 00000000000..36c93b5cc23
--- /dev/null
+++ b/arch/avr32/include/asm/local64.h
@@ -0,0 +1 @@
+#include <asm-generic/local64.h>
diff --git a/arch/blackfin/include/asm/local64.h b/arch/blackfin/include/asm/local64.h
new file mode 100644
index 00000000000..36c93b5cc23
--- /dev/null
+++ b/arch/blackfin/include/asm/local64.h
@@ -0,0 +1 @@
+#include <asm-generic/local64.h>
diff --git a/arch/cris/include/asm/local64.h b/arch/cris/include/asm/local64.h
new file mode 100644
index 00000000000..36c93b5cc23
--- /dev/null
+++ b/arch/cris/include/asm/local64.h
@@ -0,0 +1 @@
+#include <asm-generic/local64.h>
diff --git a/arch/frv/include/asm/local64.h b/arch/frv/include/asm/local64.h
new file mode 100644
index 00000000000..36c93b5cc23
--- /dev/null
+++ b/arch/frv/include/asm/local64.h
@@ -0,0 +1 @@
+#include <asm-generic/local64.h>
diff --git a/arch/frv/kernel/local64.h b/arch/frv/kernel/local64.h
new file mode 100644
index 00000000000..36c93b5cc23
--- /dev/null
+++ b/arch/frv/kernel/local64.h
@@ -0,0 +1 @@
+#include <asm-generic/local64.h>
diff --git a/arch/h8300/include/asm/local64.h b/arch/h8300/include/asm/local64.h
new file mode 100644
index 00000000000..36c93b5cc23
--- /dev/null
+++ b/arch/h8300/include/asm/local64.h
@@ -0,0 +1 @@
+#include <asm-generic/local64.h>
diff --git a/arch/ia64/include/asm/local64.h b/arch/ia64/include/asm/local64.h
new file mode 100644
index 00000000000..36c93b5cc23
--- /dev/null
+++ b/arch/ia64/include/asm/local64.h
@@ -0,0 +1 @@
+#include <asm-generic/local64.h>
diff --git a/arch/m32r/include/asm/local64.h b/arch/m32r/include/asm/local64.h
new file mode 100644
index 00000000000..36c93b5cc23
--- /dev/null
+++ b/arch/m32r/include/asm/local64.h
@@ -0,0 +1 @@
+#include <asm-generic/local64.h>
diff --git a/arch/m68k/include/asm/local64.h b/arch/m68k/include/asm/local64.h
new file mode 100644
index 00000000000..36c93b5cc23
--- /dev/null
+++ b/arch/m68k/include/asm/local64.h
@@ -0,0 +1 @@
+#include <asm-generic/local64.h>
diff --git a/arch/microblaze/include/asm/local64.h b/arch/microblaze/include/asm/local64.h
new file mode 100644
index 00000000000..36c93b5cc23
--- /dev/null
+++ b/arch/microblaze/include/asm/local64.h
@@ -0,0 +1 @@
+#include <asm-generic/local64.h>
diff --git a/arch/mips/include/asm/local64.h b/arch/mips/include/asm/local64.h
new file mode 100644
index 00000000000..36c93b5cc23
--- /dev/null
+++ b/arch/mips/include/asm/local64.h
@@ -0,0 +1 @@
+#include <asm-generic/local64.h>
diff --git a/arch/mn10300/include/asm/local64.h b/arch/mn10300/include/asm/local64.h
new file mode 100644
index 00000000000..36c93b5cc23
--- /dev/null
+++ b/arch/mn10300/include/asm/local64.h
@@ -0,0 +1 @@
+#include <asm-generic/local64.h>
diff --git a/arch/parisc/include/asm/local64.h b/arch/parisc/include/asm/local64.h
new file mode 100644
index 00000000000..36c93b5cc23
--- /dev/null
+++ b/arch/parisc/include/asm/local64.h
@@ -0,0 +1 @@
+#include <asm-generic/local64.h>
diff --git a/arch/powerpc/include/asm/local64.h b/arch/powerpc/include/asm/local64.h
new file mode 100644
index 00000000000..36c93b5cc23
--- /dev/null
+++ b/arch/powerpc/include/asm/local64.h
@@ -0,0 +1 @@
+#include <asm-generic/local64.h>
diff --git a/arch/s390/include/asm/local64.h b/arch/s390/include/asm/local64.h
new file mode 100644
index 00000000000..36c93b5cc23
--- /dev/null
+++ b/arch/s390/include/asm/local64.h
@@ -0,0 +1 @@
+#include <asm-generic/local64.h>
diff --git a/arch/score/include/asm/local64.h b/arch/score/include/asm/local64.h
new file mode 100644
index 00000000000..36c93b5cc23
--- /dev/null
+++ b/arch/score/include/asm/local64.h
@@ -0,0 +1 @@
+#include <asm-generic/local64.h>
diff --git a/arch/sh/include/asm/local64.h b/arch/sh/include/asm/local64.h
new file mode 100644
index 00000000000..36c93b5cc23
--- /dev/null
+++ b/arch/sh/include/asm/local64.h
@@ -0,0 +1 @@
+#include <asm-generic/local64.h>
diff --git a/arch/sparc/include/asm/local64.h b/arch/sparc/include/asm/local64.h
new file mode 100644
index 00000000000..36c93b5cc23
--- /dev/null
+++ b/arch/sparc/include/asm/local64.h
@@ -0,0 +1 @@
+#include <asm-generic/local64.h>
diff --git a/arch/x86/include/asm/local64.h b/arch/x86/include/asm/local64.h
new file mode 100644
index 00000000000..36c93b5cc23
--- /dev/null
+++ b/arch/x86/include/asm/local64.h
@@ -0,0 +1 @@
+#include <asm-generic/local64.h>
diff --git a/arch/xtensa/include/asm/local64.h b/arch/xtensa/include/asm/local64.h
new file mode 100644
index 00000000000..36c93b5cc23
--- /dev/null
+++ b/arch/xtensa/include/asm/local64.h
@@ -0,0 +1 @@
+#include <asm-generic/local64.h>
diff --git a/include/asm-generic/local64.h b/include/asm-generic/local64.h
new file mode 100644
index 00000000000..02ac760c1a8
--- /dev/null
+++ b/include/asm-generic/local64.h
@@ -0,0 +1,96 @@
+#ifndef _ASM_GENERIC_LOCAL64_H
+#define _ASM_GENERIC_LOCAL64_H
+
+#include <linux/percpu.h>
+#include <asm/types.h>
+
+/*
+ * A signed long type for operations which are atomic for a single CPU.
+ * Usually used in combination with per-cpu variables.
+ *
+ * This is the default implementation, which uses atomic64_t.  Which is
+ * rather pointless.  The whole point behind local64_t is that some processors
+ * can perform atomic adds and subtracts in a manner which is atomic wrt IRQs
+ * running on this CPU.  local64_t allows exploitation of such capabilities.
+ */
+
+/* Implement in terms of atomics. */
+
+#if BITS_PER_LONG == 64
+
+#include <asm/local.h>
+
+typedef struct {
+	local_t a;
+} local64_t;
+
+#define LOCAL64_INIT(i)	{ LOCAL_INIT(i) }
+
+#define local64_read(l)		local_read(&(l)->a)
+#define local64_set(l,i)	local_set((&(l)->a),(i))
+#define local64_inc(l)		local_inc(&(l)->a)
+#define local64_dec(l)		local_dec(&(l)->a)
+#define local64_add(i,l)	local_add((i),(&(l)->a))
+#define local64_sub(i,l)	local_sub((i),(&(l)->a))
+
+#define local64_sub_and_test(i, l) local_sub_and_test((i), (&(l)->a))
+#define local64_dec_and_test(l) local_dec_and_test(&(l)->a)
+#define local64_inc_and_test(l) local_inc_and_test(&(l)->a)
+#define local64_add_negative(i, l) local_add_negative((i), (&(l)->a))
+#define local64_add_return(i, l) local_add_return((i), (&(l)->a))
+#define local64_sub_return(i, l) local_sub_return((i), (&(l)->a))
+#define local64_inc_return(l)	local_inc_return(&(l)->a)
+
+#define local64_cmpxchg(l, o, n) local_cmpxchg((&(l)->a), (o), (n))
+#define local64_xchg(l, n)	local_xchg((&(l)->a), (n))
+#define local64_add_unless(l, _a, u) local_add_unless((&(l)->a), (_a), (u))
+#define local64_inc_not_zero(l)	local_inc_not_zero(&(l)->a)
+
+/* Non-atomic variants, ie. preemption disabled and won't be touched
+ * in interrupt, etc.  Some archs can optimize this case well. */
+#define __local64_inc(l)	local64_set((l), local64_read(l) + 1)
+#define __local64_dec(l)	local64_set((l), local64_read(l) - 1)
+#define __local64_add(i,l)	local64_set((l), local64_read(l) + (i))
+#define __local64_sub(i,l)	local64_set((l), local64_read(l) - (i))
+
+#else /* BITS_PER_LONG != 64 */
+
+#include <asm/atomic.h>
+
+/* Don't use typedef: don't want them to be mixed with atomic_t's. */
+typedef struct {
+	atomic64_t a;
+} local64_t;
+
+#define LOCAL64_INIT(i)	{ ATOMIC_LONG_INIT(i) }
+
+#define local64_read(l)		atomic64_read(&(l)->a)
+#define local64_set(l,i)	atomic64_set((&(l)->a),(i))
+#define local64_inc(l)		atomic64_inc(&(l)->a)
+#define local64_dec(l)		atomic64_dec(&(l)->a)
+#define local64_add(i,l)	atomic64_add((i),(&(l)->a))
+#define local64_sub(i,l)	atomic64_sub((i),(&(l)->a))
+
+#define local64_sub_and_test(i, l) atomic64_sub_and_test((i), (&(l)->a))
+#define local64_dec_and_test(l) atomic64_dec_and_test(&(l)->a)
+#define local64_inc_and_test(l) atomic64_inc_and_test(&(l)->a)
+#define local64_add_negative(i, l) atomic64_add_negative((i), (&(l)->a))
+#define local64_add_return(i, l) atomic64_add_return((i), (&(l)->a))
+#define local64_sub_return(i, l) atomic64_sub_return((i), (&(l)->a))
+#define local64_inc_return(l)	atomic64_inc_return(&(l)->a)
+
+#define local64_cmpxchg(l, o, n) atomic64_cmpxchg((&(l)->a), (o), (n))
+#define local64_xchg(l, n)	atomic64_xchg((&(l)->a), (n))
+#define local64_add_unless(l, _a, u) atomic64_add_unless((&(l)->a), (_a), (u))
+#define local64_inc_not_zero(l)	atomic64_inc_not_zero(&(l)->a)
+
+/* Non-atomic variants, ie. preemption disabled and won't be touched
+ * in interrupt, etc.  Some archs can optimize this case well. */
+#define __local64_inc(l)	local64_set((l), local64_read(l) + 1)
+#define __local64_dec(l)	local64_set((l), local64_read(l) - 1)
+#define __local64_add(i,l)	local64_set((l), local64_read(l) + (i))
+#define __local64_sub(i,l)	local64_set((l), local64_read(l) - (i))
+
+#endif /* BITS_PER_LONG != 64 */
+
+#endif /* _ASM_GENERIC_LOCAL64_H */
-- 
cgit v1.2.3-70-g09d2


From a6e6dea68c18f705957573ee5596097c7e82d0e5 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 21 May 2010 14:27:58 +0200
Subject: perf: Add perf_event::child_count

Only child counters adding back their values into the parent counter
are responsible for cross-cpu updates to event->count.

So if we pull that out into a new child_count variable, we get an
event->count that is only modified locally.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Steven Rostedt <rostedt@goodmis.org>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_event.h | 1 +
 kernel/perf_event.c        | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 441992a9775..f34dab9b275 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -671,6 +671,7 @@ struct perf_event {
 	enum perf_event_active_state	state;
 	unsigned int			attach_state;
 	atomic64_t			count;
+	atomic64_t			child_count;
 
 	/*
 	 * These are the total time in nanoseconds that the event
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index ab4c0ffc271..a395fda2d94 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -1738,7 +1738,7 @@ static void __perf_event_read(void *info)
 
 static inline u64 perf_event_count(struct perf_event *event)
 {
-	return atomic64_read(&event->count);
+	return atomic64_read(&event->count) + atomic64_read(&event->child_count);
 }
 
 static u64 perf_event_read(struct perf_event *event)
@@ -5379,7 +5379,7 @@ static void sync_child_event(struct perf_event *child_event,
 	/*
 	 * Add back the child's count to the parent's count:
 	 */
-	atomic64_add(child_val, &parent_event->count);
+	atomic64_add(child_val, &parent_event->child_count);
 	atomic64_add(child_event->total_time_enabled,
 		     &parent_event->child_total_time_enabled);
 	atomic64_add(child_event->total_time_running,
-- 
cgit v1.2.3-70-g09d2


From e78505958cf123048fb48cb56b79cebb8edd15fb Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 21 May 2010 14:43:08 +0200
Subject: perf: Convert perf_event to local_t

Since now all modification to event->count (and ->prev_count
and ->period_left) are local to a cpu, change then to local64_t so we
avoid the LOCK'ed ops.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/arm/kernel/perf_event.c     | 18 ++++++++---------
 arch/powerpc/kernel/perf_event.c | 34 ++++++++++++++++----------------
 arch/sh/kernel/perf_event.c      |  6 +++---
 arch/sparc/kernel/perf_event.c   | 18 ++++++++---------
 arch/x86/kernel/cpu/perf_event.c | 18 ++++++++---------
 include/linux/perf_event.h       |  7 ++++---
 kernel/perf_event.c              | 42 ++++++++++++++++++++--------------------
 7 files changed, 72 insertions(+), 71 deletions(-)

(limited to 'include')

diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index c45768614c8..5b7cfafc072 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -164,20 +164,20 @@ armpmu_event_set_period(struct perf_event *event,
 			struct hw_perf_event *hwc,
 			int idx)
 {
-	s64 left = atomic64_read(&hwc->period_left);
+	s64 left = local64_read(&hwc->period_left);
 	s64 period = hwc->sample_period;
 	int ret = 0;
 
 	if (unlikely(left <= -period)) {
 		left = period;
-		atomic64_set(&hwc->period_left, left);
+		local64_set(&hwc->period_left, left);
 		hwc->last_period = period;
 		ret = 1;
 	}
 
 	if (unlikely(left <= 0)) {
 		left += period;
-		atomic64_set(&hwc->period_left, left);
+		local64_set(&hwc->period_left, left);
 		hwc->last_period = period;
 		ret = 1;
 	}
@@ -185,7 +185,7 @@ armpmu_event_set_period(struct perf_event *event,
 	if (left > (s64)armpmu->max_period)
 		left = armpmu->max_period;
 
-	atomic64_set(&hwc->prev_count, (u64)-left);
+	local64_set(&hwc->prev_count, (u64)-left);
 
 	armpmu->write_counter(idx, (u64)(-left) & 0xffffffff);
 
@@ -204,18 +204,18 @@ armpmu_event_update(struct perf_event *event,
 	s64 delta;
 
 again:
-	prev_raw_count = atomic64_read(&hwc->prev_count);
+	prev_raw_count = local64_read(&hwc->prev_count);
 	new_raw_count = armpmu->read_counter(idx);
 
-	if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
+	if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
 			     new_raw_count) != prev_raw_count)
 		goto again;
 
 	delta = (new_raw_count << shift) - (prev_raw_count << shift);
 	delta >>= shift;
 
-	atomic64_add(delta, &event->count);
-	atomic64_sub(delta, &hwc->period_left);
+	local64_add(delta, &event->count);
+	local64_sub(delta, &hwc->period_left);
 
 	return new_raw_count;
 }
@@ -478,7 +478,7 @@ __hw_perf_event_init(struct perf_event *event)
 	if (!hwc->sample_period) {
 		hwc->sample_period  = armpmu->max_period;
 		hwc->last_period    = hwc->sample_period;
-		atomic64_set(&hwc->period_left, hwc->sample_period);
+		local64_set(&hwc->period_left, hwc->sample_period);
 	}
 
 	err = 0;
diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c
index ac2a8c2554d..af1d9a7c65d 100644
--- a/arch/powerpc/kernel/perf_event.c
+++ b/arch/powerpc/kernel/perf_event.c
@@ -410,15 +410,15 @@ static void power_pmu_read(struct perf_event *event)
 	 * Therefore we treat them like NMIs.
 	 */
 	do {
-		prev = atomic64_read(&event->hw.prev_count);
+		prev = local64_read(&event->hw.prev_count);
 		barrier();
 		val = read_pmc(event->hw.idx);
-	} while (atomic64_cmpxchg(&event->hw.prev_count, prev, val) != prev);
+	} while (local64_cmpxchg(&event->hw.prev_count, prev, val) != prev);
 
 	/* The counters are only 32 bits wide */
 	delta = (val - prev) & 0xfffffffful;
-	atomic64_add(delta, &event->count);
-	atomic64_sub(delta, &event->hw.period_left);
+	local64_add(delta, &event->count);
+	local64_sub(delta, &event->hw.period_left);
 }
 
 /*
@@ -444,10 +444,10 @@ static void freeze_limited_counters(struct cpu_hw_events *cpuhw,
 		if (!event->hw.idx)
 			continue;
 		val = (event->hw.idx == 5) ? pmc5 : pmc6;
-		prev = atomic64_read(&event->hw.prev_count);
+		prev = local64_read(&event->hw.prev_count);
 		event->hw.idx = 0;
 		delta = (val - prev) & 0xfffffffful;
-		atomic64_add(delta, &event->count);
+		local64_add(delta, &event->count);
 	}
 }
 
@@ -462,7 +462,7 @@ static void thaw_limited_counters(struct cpu_hw_events *cpuhw,
 		event = cpuhw->limited_counter[i];
 		event->hw.idx = cpuhw->limited_hwidx[i];
 		val = (event->hw.idx == 5) ? pmc5 : pmc6;
-		atomic64_set(&event->hw.prev_count, val);
+		local64_set(&event->hw.prev_count, val);
 		perf_event_update_userpage(event);
 	}
 }
@@ -666,11 +666,11 @@ void hw_perf_enable(void)
 		}
 		val = 0;
 		if (event->hw.sample_period) {
-			left = atomic64_read(&event->hw.period_left);
+			left = local64_read(&event->hw.period_left);
 			if (left < 0x80000000L)
 				val = 0x80000000L - left;
 		}
-		atomic64_set(&event->hw.prev_count, val);
+		local64_set(&event->hw.prev_count, val);
 		event->hw.idx = idx;
 		write_pmc(idx, val);
 		perf_event_update_userpage(event);
@@ -842,8 +842,8 @@ static void power_pmu_unthrottle(struct perf_event *event)
 	if (left < 0x80000000L)
 		val = 0x80000000L - left;
 	write_pmc(event->hw.idx, val);
-	atomic64_set(&event->hw.prev_count, val);
-	atomic64_set(&event->hw.period_left, left);
+	local64_set(&event->hw.prev_count, val);
+	local64_set(&event->hw.period_left, left);
 	perf_event_update_userpage(event);
 	perf_enable();
 	local_irq_restore(flags);
@@ -1109,7 +1109,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
 	event->hw.config = events[n];
 	event->hw.event_base = cflags[n];
 	event->hw.last_period = event->hw.sample_period;
-	atomic64_set(&event->hw.period_left, event->hw.last_period);
+	local64_set(&event->hw.period_left, event->hw.last_period);
 
 	/*
 	 * See if we need to reserve the PMU.
@@ -1147,16 +1147,16 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
 	int record = 0;
 
 	/* we don't have to worry about interrupts here */
-	prev = atomic64_read(&event->hw.prev_count);
+	prev = local64_read(&event->hw.prev_count);
 	delta = (val - prev) & 0xfffffffful;
-	atomic64_add(delta, &event->count);
+	local64_add(delta, &event->count);
 
 	/*
 	 * See if the total period for this event has expired,
 	 * and update for the next period.
 	 */
 	val = 0;
-	left = atomic64_read(&event->hw.period_left) - delta;
+	left = local64_read(&event->hw.period_left) - delta;
 	if (period) {
 		if (left <= 0) {
 			left += period;
@@ -1194,8 +1194,8 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
 	}
 
 	write_pmc(event->hw.idx, val);
-	atomic64_set(&event->hw.prev_count, val);
-	atomic64_set(&event->hw.period_left, left);
+	local64_set(&event->hw.prev_count, val);
+	local64_set(&event->hw.period_left, left);
 	perf_event_update_userpage(event);
 }
 
diff --git a/arch/sh/kernel/perf_event.c b/arch/sh/kernel/perf_event.c
index 81b6de41ae5..7a3dc356725 100644
--- a/arch/sh/kernel/perf_event.c
+++ b/arch/sh/kernel/perf_event.c
@@ -185,10 +185,10 @@ static void sh_perf_event_update(struct perf_event *event,
 	 * this is the simplest approach for maintaining consistency.
 	 */
 again:
-	prev_raw_count = atomic64_read(&hwc->prev_count);
+	prev_raw_count = local64_read(&hwc->prev_count);
 	new_raw_count = sh_pmu->read(idx);
 
-	if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
+	if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
 			     new_raw_count) != prev_raw_count)
 		goto again;
 
@@ -203,7 +203,7 @@ again:
 	delta = (new_raw_count << shift) - (prev_raw_count << shift);
 	delta >>= shift;
 
-	atomic64_add(delta, &event->count);
+	local64_add(delta, &event->count);
 }
 
 static void sh_pmu_disable(struct perf_event *event)
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c
index beeb92fa3ac..8a6660da8e0 100644
--- a/arch/sparc/kernel/perf_event.c
+++ b/arch/sparc/kernel/perf_event.c
@@ -572,18 +572,18 @@ static u64 sparc_perf_event_update(struct perf_event *event,
 	s64 delta;
 
 again:
-	prev_raw_count = atomic64_read(&hwc->prev_count);
+	prev_raw_count = local64_read(&hwc->prev_count);
 	new_raw_count = read_pmc(idx);
 
-	if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
+	if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
 			     new_raw_count) != prev_raw_count)
 		goto again;
 
 	delta = (new_raw_count << shift) - (prev_raw_count << shift);
 	delta >>= shift;
 
-	atomic64_add(delta, &event->count);
-	atomic64_sub(delta, &hwc->period_left);
+	local64_add(delta, &event->count);
+	local64_sub(delta, &hwc->period_left);
 
 	return new_raw_count;
 }
@@ -591,27 +591,27 @@ again:
 static int sparc_perf_event_set_period(struct perf_event *event,
 				       struct hw_perf_event *hwc, int idx)
 {
-	s64 left = atomic64_read(&hwc->period_left);
+	s64 left = local64_read(&hwc->period_left);
 	s64 period = hwc->sample_period;
 	int ret = 0;
 
 	if (unlikely(left <= -period)) {
 		left = period;
-		atomic64_set(&hwc->period_left, left);
+		local64_set(&hwc->period_left, left);
 		hwc->last_period = period;
 		ret = 1;
 	}
 
 	if (unlikely(left <= 0)) {
 		left += period;
-		atomic64_set(&hwc->period_left, left);
+		local64_set(&hwc->period_left, left);
 		hwc->last_period = period;
 		ret = 1;
 	}
 	if (left > MAX_PERIOD)
 		left = MAX_PERIOD;
 
-	atomic64_set(&hwc->prev_count, (u64)-left);
+	local64_set(&hwc->prev_count, (u64)-left);
 
 	write_pmc(idx, (u64)(-left) & 0xffffffff);
 
@@ -1087,7 +1087,7 @@ static int __hw_perf_event_init(struct perf_event *event)
 	if (!hwc->sample_period) {
 		hwc->sample_period = MAX_PERIOD;
 		hwc->last_period = hwc->sample_period;
-		atomic64_set(&hwc->period_left, hwc->sample_period);
+		local64_set(&hwc->period_left, hwc->sample_period);
 	}
 
 	return 0;
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 79e199843db..2d0d2906927 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -296,10 +296,10 @@ x86_perf_event_update(struct perf_event *event)
 	 * count to the generic event atomically:
 	 */
 again:
-	prev_raw_count = atomic64_read(&hwc->prev_count);
+	prev_raw_count = local64_read(&hwc->prev_count);
 	rdmsrl(hwc->event_base + idx, new_raw_count);
 
-	if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
+	if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
 					new_raw_count) != prev_raw_count)
 		goto again;
 
@@ -314,8 +314,8 @@ again:
 	delta = (new_raw_count << shift) - (prev_raw_count << shift);
 	delta >>= shift;
 
-	atomic64_add(delta, &event->count);
-	atomic64_sub(delta, &hwc->period_left);
+	local64_add(delta, &event->count);
+	local64_sub(delta, &hwc->period_left);
 
 	return new_raw_count;
 }
@@ -439,7 +439,7 @@ static int x86_setup_perfctr(struct perf_event *event)
 	if (!hwc->sample_period) {
 		hwc->sample_period = x86_pmu.max_period;
 		hwc->last_period = hwc->sample_period;
-		atomic64_set(&hwc->period_left, hwc->sample_period);
+		local64_set(&hwc->period_left, hwc->sample_period);
 	} else {
 		/*
 		 * If we have a PMU initialized but no APIC
@@ -886,7 +886,7 @@ static int
 x86_perf_event_set_period(struct perf_event *event)
 {
 	struct hw_perf_event *hwc = &event->hw;
-	s64 left = atomic64_read(&hwc->period_left);
+	s64 left = local64_read(&hwc->period_left);
 	s64 period = hwc->sample_period;
 	int ret = 0, idx = hwc->idx;
 
@@ -898,14 +898,14 @@ x86_perf_event_set_period(struct perf_event *event)
 	 */
 	if (unlikely(left <= -period)) {
 		left = period;
-		atomic64_set(&hwc->period_left, left);
+		local64_set(&hwc->period_left, left);
 		hwc->last_period = period;
 		ret = 1;
 	}
 
 	if (unlikely(left <= 0)) {
 		left += period;
-		atomic64_set(&hwc->period_left, left);
+		local64_set(&hwc->period_left, left);
 		hwc->last_period = period;
 		ret = 1;
 	}
@@ -924,7 +924,7 @@ x86_perf_event_set_period(struct perf_event *event)
 	 * The hw event starts counting from this event offset,
 	 * mark it to be able to extra future deltas:
 	 */
-	atomic64_set(&hwc->prev_count, (u64)-left);
+	local64_set(&hwc->prev_count, (u64)-left);
 
 	wrmsrl(hwc->event_base + idx, (u64)(-left) & x86_pmu.cntval_mask);
 
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index f34dab9b275..7342979f95f 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -487,6 +487,7 @@ struct perf_guest_info_callbacks {
 #include <linux/cpu.h>
 #include <asm/atomic.h>
 #include <asm/local.h>
+#include <asm/local64.h>
 
 #define PERF_MAX_STACK_DEPTH		255
 
@@ -536,10 +537,10 @@ struct hw_perf_event {
 		struct arch_hw_breakpoint	info;
 #endif
 	};
-	atomic64_t			prev_count;
+	local64_t			prev_count;
 	u64				sample_period;
 	u64				last_period;
-	atomic64_t			period_left;
+	local64_t			period_left;
 	u64				interrupts;
 
 	u64				freq_time_stamp;
@@ -670,7 +671,7 @@ struct perf_event {
 
 	enum perf_event_active_state	state;
 	unsigned int			attach_state;
-	atomic64_t			count;
+	local64_t			count;
 	atomic64_t			child_count;
 
 	/*
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index a395fda2d94..97c73018592 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -1148,9 +1148,9 @@ static void __perf_event_sync_stat(struct perf_event *event,
 	 * In order to keep per-task stats reliable we need to flip the event
 	 * values when we flip the contexts.
 	 */
-	value = atomic64_read(&next_event->count);
-	value = atomic64_xchg(&event->count, value);
-	atomic64_set(&next_event->count, value);
+	value = local64_read(&next_event->count);
+	value = local64_xchg(&event->count, value);
+	local64_set(&next_event->count, value);
 
 	swap(event->total_time_enabled, next_event->total_time_enabled);
 	swap(event->total_time_running, next_event->total_time_running);
@@ -1540,10 +1540,10 @@ static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count)
 
 	hwc->sample_period = sample_period;
 
-	if (atomic64_read(&hwc->period_left) > 8*sample_period) {
+	if (local64_read(&hwc->period_left) > 8*sample_period) {
 		perf_disable();
 		perf_event_stop(event);
-		atomic64_set(&hwc->period_left, 0);
+		local64_set(&hwc->period_left, 0);
 		perf_event_start(event);
 		perf_enable();
 	}
@@ -1584,7 +1584,7 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx)
 
 		perf_disable();
 		event->pmu->read(event);
-		now = atomic64_read(&event->count);
+		now = local64_read(&event->count);
 		delta = now - hwc->freq_count_stamp;
 		hwc->freq_count_stamp = now;
 
@@ -1738,7 +1738,7 @@ static void __perf_event_read(void *info)
 
 static inline u64 perf_event_count(struct perf_event *event)
 {
-	return atomic64_read(&event->count) + atomic64_read(&event->child_count);
+	return local64_read(&event->count) + atomic64_read(&event->child_count);
 }
 
 static u64 perf_event_read(struct perf_event *event)
@@ -2141,7 +2141,7 @@ static unsigned int perf_poll(struct file *file, poll_table *wait)
 static void perf_event_reset(struct perf_event *event)
 {
 	(void)perf_event_read(event);
-	atomic64_set(&event->count, 0);
+	local64_set(&event->count, 0);
 	perf_event_update_userpage(event);
 }
 
@@ -2359,7 +2359,7 @@ void perf_event_update_userpage(struct perf_event *event)
 	userpg->index = perf_event_index(event);
 	userpg->offset = perf_event_count(event);
 	if (event->state == PERF_EVENT_STATE_ACTIVE)
-		userpg->offset -= atomic64_read(&event->hw.prev_count);
+		userpg->offset -= local64_read(&event->hw.prev_count);
 
 	userpg->time_enabled = event->total_time_enabled +
 			atomic64_read(&event->child_total_time_enabled);
@@ -4035,14 +4035,14 @@ static u64 perf_swevent_set_period(struct perf_event *event)
 	hwc->last_period = hwc->sample_period;
 
 again:
-	old = val = atomic64_read(&hwc->period_left);
+	old = val = local64_read(&hwc->period_left);
 	if (val < 0)
 		return 0;
 
 	nr = div64_u64(period + val, period);
 	offset = nr * period;
 	val -= offset;
-	if (atomic64_cmpxchg(&hwc->period_left, old, val) != old)
+	if (local64_cmpxchg(&hwc->period_left, old, val) != old)
 		goto again;
 
 	return nr;
@@ -4081,7 +4081,7 @@ static void perf_swevent_add(struct perf_event *event, u64 nr,
 {
 	struct hw_perf_event *hwc = &event->hw;
 
-	atomic64_add(nr, &event->count);
+	local64_add(nr, &event->count);
 
 	if (!regs)
 		return;
@@ -4092,7 +4092,7 @@ static void perf_swevent_add(struct perf_event *event, u64 nr,
 	if (nr == 1 && hwc->sample_period == 1 && !event->attr.freq)
 		return perf_swevent_overflow(event, 1, nmi, data, regs);
 
-	if (atomic64_add_negative(nr, &hwc->period_left))
+	if (local64_add_negative(nr, &hwc->period_left))
 		return;
 
 	perf_swevent_overflow(event, 0, nmi, data, regs);
@@ -4383,8 +4383,8 @@ static void cpu_clock_perf_event_update(struct perf_event *event)
 	u64 now;
 
 	now = cpu_clock(cpu);
-	prev = atomic64_xchg(&event->hw.prev_count, now);
-	atomic64_add(now - prev, &event->count);
+	prev = local64_xchg(&event->hw.prev_count, now);
+	local64_add(now - prev, &event->count);
 }
 
 static int cpu_clock_perf_event_enable(struct perf_event *event)
@@ -4392,7 +4392,7 @@ static int cpu_clock_perf_event_enable(struct perf_event *event)
 	struct hw_perf_event *hwc = &event->hw;
 	int cpu = raw_smp_processor_id();
 
-	atomic64_set(&hwc->prev_count, cpu_clock(cpu));
+	local64_set(&hwc->prev_count, cpu_clock(cpu));
 	perf_swevent_start_hrtimer(event);
 
 	return 0;
@@ -4424,9 +4424,9 @@ static void task_clock_perf_event_update(struct perf_event *event, u64 now)
 	u64 prev;
 	s64 delta;
 
-	prev = atomic64_xchg(&event->hw.prev_count, now);
+	prev = local64_xchg(&event->hw.prev_count, now);
 	delta = now - prev;
-	atomic64_add(delta, &event->count);
+	local64_add(delta, &event->count);
 }
 
 static int task_clock_perf_event_enable(struct perf_event *event)
@@ -4436,7 +4436,7 @@ static int task_clock_perf_event_enable(struct perf_event *event)
 
 	now = event->ctx->time;
 
-	atomic64_set(&hwc->prev_count, now);
+	local64_set(&hwc->prev_count, now);
 
 	perf_swevent_start_hrtimer(event);
 
@@ -4879,7 +4879,7 @@ perf_event_alloc(struct perf_event_attr *attr,
 		hwc->sample_period = 1;
 	hwc->last_period = hwc->sample_period;
 
-	atomic64_set(&hwc->period_left, hwc->sample_period);
+	local64_set(&hwc->period_left, hwc->sample_period);
 
 	/*
 	 * we currently do not support PERF_FORMAT_GROUP on inherited events
@@ -5313,7 +5313,7 @@ inherit_event(struct perf_event *parent_event,
 		hwc->sample_period = sample_period;
 		hwc->last_period   = sample_period;
 
-		atomic64_set(&hwc->period_left, sample_period);
+		local64_set(&hwc->period_left, sample_period);
 	}
 
 	child_event->overflow_handler = parent_event->overflow_handler;
-- 
cgit v1.2.3-70-g09d2


From 7be7923633a142402266d642ccebf74f556a649b Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 9 Jun 2010 11:57:23 +0200
Subject: perf: Fix build breakage for architecutes without atomic64_t

The local64.h include dependency was not dependent on PERF_EVENT=y,
which meant that arch's without atomic64_t support ended up including
it and failed to build.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
---
 include/linux/perf_event.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 7342979f95f..1218d05728b 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -462,6 +462,7 @@ enum perf_callchain_context {
 
 #ifdef CONFIG_PERF_EVENTS
 # include <asm/perf_event.h>
+# include <asm/local64.h>
 #endif
 
 struct perf_guest_info_callbacks {
@@ -487,7 +488,6 @@ struct perf_guest_info_callbacks {
 #include <linux/cpu.h>
 #include <asm/atomic.h>
 #include <asm/local.h>
-#include <asm/local64.h>
 
 #define PERF_MAX_STACK_DEPTH		255
 
-- 
cgit v1.2.3-70-g09d2


From b3c5163fe0193a74016dba1bb22491e0d1e9aaa4 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 9 Jun 2010 14:43:38 +0200
Subject: netfilter: nf_conntrack: per_cpu untracking

NOTRACK makes all cpus share a cache line on nf_conntrack_untracked
twice per packet, slowing down performance.

This patch converts it to a per_cpu variable.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/net/netfilter/nf_conntrack.h |  5 ++---
 net/netfilter/nf_conntrack_core.c    | 36 ++++++++++++++++++++++++++----------
 2 files changed, 28 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index 3bc38c70bbb..84a4b6fec99 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -261,11 +261,10 @@ extern s16 (*nf_ct_nat_offset)(const struct nf_conn *ct,
 			       u32 seq);
 
 /* Fake conntrack entry for untracked connections */
+DECLARE_PER_CPU(struct nf_conn, nf_conntrack_untracked);
 static inline struct nf_conn *nf_ct_untracked_get(void)
 {
-	extern struct nf_conn nf_conntrack_untracked;
-
-	return &nf_conntrack_untracked;
+	return &__raw_get_cpu_var(nf_conntrack_untracked);
 }
 extern void nf_ct_untracked_status_or(unsigned long bits);
 
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 6c1da212380..9c661413b82 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -62,8 +62,8 @@ EXPORT_SYMBOL_GPL(nf_conntrack_htable_size);
 unsigned int nf_conntrack_max __read_mostly;
 EXPORT_SYMBOL_GPL(nf_conntrack_max);
 
-struct nf_conn nf_conntrack_untracked;
-EXPORT_SYMBOL_GPL(nf_conntrack_untracked);
+DEFINE_PER_CPU(struct nf_conn, nf_conntrack_untracked);
+EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked);
 
 static int nf_conntrack_hash_rnd_initted;
 static unsigned int nf_conntrack_hash_rnd;
@@ -1183,10 +1183,21 @@ static void nf_ct_release_dying_list(struct net *net)
 	spin_unlock_bh(&nf_conntrack_lock);
 }
 
+static int untrack_refs(void)
+{
+	int cnt = 0, cpu;
+
+	for_each_possible_cpu(cpu) {
+		struct nf_conn *ct = &per_cpu(nf_conntrack_untracked, cpu);
+
+		cnt += atomic_read(&ct->ct_general.use) - 1;
+	}
+	return cnt;
+}
+
 static void nf_conntrack_cleanup_init_net(void)
 {
-	/* wait until all references to nf_conntrack_untracked are dropped */
-	while (atomic_read(&nf_conntrack_untracked.ct_general.use) > 1)
+	while (untrack_refs() > 0)
 		schedule();
 
 	nf_conntrack_helper_fini();
@@ -1323,14 +1334,17 @@ module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint,
 
 void nf_ct_untracked_status_or(unsigned long bits)
 {
-	nf_conntrack_untracked.status |= bits;
+	int cpu;
+
+	for_each_possible_cpu(cpu)
+		per_cpu(nf_conntrack_untracked, cpu).status |= bits;
 }
 EXPORT_SYMBOL_GPL(nf_ct_untracked_status_or);
 
 static int nf_conntrack_init_init_net(void)
 {
 	int max_factor = 8;
-	int ret;
+	int ret, cpu;
 
 	/* Idea from tcp.c: use 1/16384 of memory.  On i386: 32MB
 	 * machine has 512 buckets. >= 1GB machines have 16384 buckets. */
@@ -1369,10 +1383,12 @@ static int nf_conntrack_init_init_net(void)
 		goto err_extend;
 #endif
 	/* Set up fake conntrack: to never be deleted, not in any hashes */
-#ifdef CONFIG_NET_NS
-	nf_conntrack_untracked.ct_net = &init_net;
-#endif
-	atomic_set(&nf_conntrack_untracked.ct_general.use, 1);
+	for_each_possible_cpu(cpu) {
+		struct nf_conn *ct = &per_cpu(nf_conntrack_untracked, cpu);
+
+		write_pnet(&ct->ct_net, &init_net);
+		atomic_set(&ct->ct_general.use, 1);
+	}
 	/*  - and look it like as a confirmed connection */
 	nf_ct_untracked_status_or(IPS_CONFIRMED | IPS_UNTRACKED);
 	return 0;
-- 
cgit v1.2.3-70-g09d2


From 039ca4e74a1cf60bd7487324a564ecf5c981f254 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Wed, 26 May 2010 17:22:17 +0800
Subject: tracing: Remove kmemtrace ftrace plugin

We have been resisting new ftrace plugins and removing existing
ones, and kmemtrace has been superseded by kmem trace events
and perf-kmem, so we remove it.

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Acked-by: Pekka Enberg <penberg@cs.helsinki.fi>
Acked-by: Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Steven Rostedt <rostedt@goodmis.org>
[ remove kmemtrace from the makefile, handle slob too ]
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 Documentation/ABI/testing/debugfs-kmemtrace |  71 ----
 Documentation/trace/kmemtrace.txt           | 126 -------
 MAINTAINERS                                 |   7 -
 include/linux/kmemtrace.h                   |  25 --
 include/linux/slab_def.h                    |   3 +-
 include/linux/slub_def.h                    |   3 +-
 init/main.c                                 |   2 -
 kernel/trace/Kconfig                        |  20 --
 kernel/trace/Makefile                       |   1 -
 kernel/trace/kmemtrace.c                    | 529 ----------------------------
 kernel/trace/trace.h                        |  12 -
 kernel/trace/trace_entries.h                |  35 --
 mm/slab.c                                   |   1 -
 mm/slob.c                                   |   4 +-
 mm/slub.c                                   |   1 -
 15 files changed, 7 insertions(+), 833 deletions(-)
 delete mode 100644 Documentation/ABI/testing/debugfs-kmemtrace
 delete mode 100644 Documentation/trace/kmemtrace.txt
 delete mode 100644 include/linux/kmemtrace.h
 delete mode 100644 kernel/trace/kmemtrace.c

(limited to 'include')

diff --git a/Documentation/ABI/testing/debugfs-kmemtrace b/Documentation/ABI/testing/debugfs-kmemtrace
deleted file mode 100644
index 5e6a92a02d8..00000000000
--- a/Documentation/ABI/testing/debugfs-kmemtrace
+++ /dev/null
@@ -1,71 +0,0 @@
-What:		/sys/kernel/debug/kmemtrace/
-Date:		July 2008
-Contact:	Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro>
-Description:
-
-In kmemtrace-enabled kernels, the following files are created:
-
-/sys/kernel/debug/kmemtrace/
-	cpu<n>		(0400)	Per-CPU tracing data, see below. (binary)
-	total_overruns	(0400)	Total number of bytes which were dropped from
-				cpu<n> files because of full buffer condition,
-				non-binary. (text)
-	abi_version	(0400)	Kernel's kmemtrace ABI version. (text)
-
-Each per-CPU file should be read according to the relay interface. That is,
-the reader should set affinity to that specific CPU and, as currently done by
-the userspace application (though there are other methods), use poll() with
-an infinite timeout before every read(). Otherwise, erroneous data may be
-read. The binary data has the following _core_ format:
-
-	Event ID	(1 byte)	Unsigned integer, one of:
-		0 - represents an allocation (KMEMTRACE_EVENT_ALLOC)
-		1 - represents a freeing of previously allocated memory
-		    (KMEMTRACE_EVENT_FREE)
-	Type ID		(1 byte)	Unsigned integer, one of:
-		0 - this is a kmalloc() / kfree()
-		1 - this is a kmem_cache_alloc() / kmem_cache_free()
-		2 - this is a __get_free_pages() et al.
-	Event size	(2 bytes)	Unsigned integer representing the
-					size of this event. Used to extend
-					kmemtrace. Discard the bytes you
-					don't know about.
-	Sequence number	(4 bytes)	Signed integer used to reorder data
-					logged on SMP machines. Wraparound
-					must be taken into account, although
-					it is unlikely.
-	Caller address	(8 bytes)	Return address to the caller.
-	Pointer to mem	(8 bytes)	Pointer to target memory area. Can be
-					NULL, but not all such calls might be
-					recorded.
-
-In case of KMEMTRACE_EVENT_ALLOC events, the next fields follow:
-
-	Requested bytes	(8 bytes)	Total number of requested bytes,
-					unsigned, must not be zero.
-	Allocated bytes (8 bytes)	Total number of actually allocated
-					bytes, unsigned, must not be lower
-					than requested bytes.
-	Requested flags	(4 bytes)	GFP flags supplied by the caller.
-	Target CPU	(4 bytes)	Signed integer, valid for event id 1.
-					If equal to -1, target CPU is the same
-					as origin CPU, but the reverse might
-					not be true.
-
-The data is made available in the same endianness the machine has.
-
-Other event ids and type ids may be defined and added. Other fields may be
-added by increasing event size, but see below for details.
-Every modification to the ABI, including new id definitions, are followed
-by bumping the ABI version by one.
-
-Adding new data to the packet (features) is done at the end of the mandatory
-data:
-	Feature size	(2 byte)
-	Feature ID	(1 byte)
-	Feature data	(Feature size - 3 bytes)
-
-
-Users:
-	kmemtrace-user - git://repo.or.cz/kmemtrace-user.git
-
diff --git a/Documentation/trace/kmemtrace.txt b/Documentation/trace/kmemtrace.txt
deleted file mode 100644
index 6308735e58c..00000000000
--- a/Documentation/trace/kmemtrace.txt
+++ /dev/null
@@ -1,126 +0,0 @@
-			kmemtrace - Kernel Memory Tracer
-
-			  by Eduard - Gabriel Munteanu
-			     <eduard.munteanu@linux360.ro>
-
-I. Introduction
-===============
-
-kmemtrace helps kernel developers figure out two things:
-1) how different allocators (SLAB, SLUB etc.) perform
-2) how kernel code allocates memory and how much
-
-To do this, we trace every allocation and export information to the userspace
-through the relay interface. We export things such as the number of requested
-bytes, the number of bytes actually allocated (i.e. including internal
-fragmentation), whether this is a slab allocation or a plain kmalloc() and so
-on.
-
-The actual analysis is performed by a userspace tool (see section III for
-details on where to get it from). It logs the data exported by the kernel,
-processes it and (as of writing this) can provide the following information:
-- the total amount of memory allocated and fragmentation per call-site
-- the amount of memory allocated and fragmentation per allocation
-- total memory allocated and fragmentation in the collected dataset
-- number of cross-CPU allocation and frees (makes sense in NUMA environments)
-
-Moreover, it can potentially find inconsistent and erroneous behavior in
-kernel code, such as using slab free functions on kmalloc'ed memory or
-allocating less memory than requested (but not truly failed allocations).
-
-kmemtrace also makes provisions for tracing on some arch and analysing the
-data on another.
-
-II. Design and goals
-====================
-
-kmemtrace was designed to handle rather large amounts of data. Thus, it uses
-the relay interface to export whatever is logged to userspace, which then
-stores it. Analysis and reporting is done asynchronously, that is, after the
-data is collected and stored. By design, it allows one to log and analyse
-on different machines and different arches.
-
-As of writing this, the ABI is not considered stable, though it might not
-change much. However, no guarantees are made about compatibility yet. When
-deemed stable, the ABI should still allow easy extension while maintaining
-backward compatibility. This is described further in Documentation/ABI.
-
-Summary of design goals:
-	- allow logging and analysis to be done across different machines
-	- be fast and anticipate usage in high-load environments (*)
-	- be reasonably extensible
-	- make it possible for GNU/Linux distributions to have kmemtrace
-	included in their repositories
-
-(*) - one of the reasons Pekka Enberg's original userspace data analysis
-    tool's code was rewritten from Perl to C (although this is more than a
-    simple conversion)
-
-
-III. Quick usage guide
-======================
-
-1) Get a kernel that supports kmemtrace and build it accordingly (i.e. enable
-CONFIG_KMEMTRACE).
-
-2) Get the userspace tool and build it:
-$ git clone git://repo.or.cz/kmemtrace-user.git		# current repository
-$ cd kmemtrace-user/
-$ ./autogen.sh
-$ ./configure
-$ make
-
-3) Boot the kmemtrace-enabled kernel if you haven't, preferably in the
-'single' runlevel (so that relay buffers don't fill up easily), and run
-kmemtrace:
-# '$' does not mean user, but root here.
-$ mount -t debugfs none /sys/kernel/debug
-$ mount -t proc none /proc
-$ cd path/to/kmemtrace-user/
-$ ./kmemtraced
-Wait a bit, then stop it with CTRL+C.
-$ cat /sys/kernel/debug/kmemtrace/total_overruns	# Check if we didn't
-							# overrun, should
-							# be zero.
-$ (Optionally) [Run kmemtrace_check separately on each cpu[0-9]*.out file to
-		check its correctness]
-$ ./kmemtrace-report
-
-Now you should have a nice and short summary of how the allocator performs.
-
-IV. FAQ and known issues
-========================
-
-Q: 'cat /sys/kernel/debug/kmemtrace/total_overruns' is non-zero, how do I fix
-this? Should I worry?
-A: If it's non-zero, this affects kmemtrace's accuracy, depending on how
-large the number is. You can fix it by supplying a higher
-'kmemtrace.subbufs=N' kernel parameter.
----
-
-Q: kmemtrace_check reports errors, how do I fix this? Should I worry?
-A: This is a bug and should be reported. It can occur for a variety of
-reasons:
-	- possible bugs in relay code
-	- possible misuse of relay by kmemtrace
-	- timestamps being collected unorderly
-Or you may fix it yourself and send us a patch.
----
-
-Q: kmemtrace_report shows many errors, how do I fix this? Should I worry?
-A: This is a known issue and I'm working on it. These might be true errors
-in kernel code, which may have inconsistent behavior (e.g. allocating memory
-with kmem_cache_alloc() and freeing it with kfree()). Pekka Enberg pointed
-out this behavior may work with SLAB, but may fail with other allocators.
-
-It may also be due to lack of tracing in some unusual allocator functions.
-
-We don't want bug reports regarding this issue yet.
----
-
-V. See also
-===========
-
-Documentation/kernel-parameters.txt
-Documentation/ABI/testing/debugfs-kmemtrace
-
diff --git a/MAINTAINERS b/MAINTAINERS
index 33047a60543..67e6e9d848d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3361,13 +3361,6 @@ F:	include/linux/kmemleak.h
 F:	mm/kmemleak.c
 F:	mm/kmemleak-test.c
 
-KMEMTRACE
-M:	Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro>
-S:	Maintained
-F:	Documentation/trace/kmemtrace.txt
-F:	include/linux/kmemtrace.h
-F:	kernel/trace/kmemtrace.c
-
 KPROBES
 M:	Ananth N Mavinakayanahalli <ananth@in.ibm.com>
 M:	Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
diff --git a/include/linux/kmemtrace.h b/include/linux/kmemtrace.h
deleted file mode 100644
index b616d3930c3..00000000000
--- a/include/linux/kmemtrace.h
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * Copyright (C) 2008 Eduard - Gabriel Munteanu
- *
- * This file is released under GPL version 2.
- */
-
-#ifndef _LINUX_KMEMTRACE_H
-#define _LINUX_KMEMTRACE_H
-
-#ifdef __KERNEL__
-
-#include <trace/events/kmem.h>
-
-#ifdef CONFIG_KMEMTRACE
-extern void kmemtrace_init(void);
-#else
-static inline void kmemtrace_init(void)
-{
-}
-#endif
-
-#endif /* __KERNEL__ */
-
-#endif /* _LINUX_KMEMTRACE_H */
-
diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h
index 1812dac8c49..1acfa73ce2a 100644
--- a/include/linux/slab_def.h
+++ b/include/linux/slab_def.h
@@ -14,7 +14,8 @@
 #include <asm/page.h>		/* kmalloc_sizes.h needs PAGE_SIZE */
 #include <asm/cache.h>		/* kmalloc_sizes.h needs L1_CACHE_BYTES */
 #include <linux/compiler.h>
-#include <linux/kmemtrace.h>
+
+#include <trace/events/kmem.h>
 
 #ifndef ARCH_KMALLOC_MINALIGN
 /*
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 55695c8d2f8..2345d3a033e 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -10,9 +10,10 @@
 #include <linux/gfp.h>
 #include <linux/workqueue.h>
 #include <linux/kobject.h>
-#include <linux/kmemtrace.h>
 #include <linux/kmemleak.h>
 
+#include <trace/events/kmem.h>
+
 enum stat_item {
 	ALLOC_FASTPATH,		/* Allocation from cpu slab */
 	ALLOC_SLOWPATH,		/* Allocation by getting a new cpu slab */
diff --git a/init/main.c b/init/main.c
index 94f65efdc65..e2a2bf3a169 100644
--- a/init/main.c
+++ b/init/main.c
@@ -66,7 +66,6 @@
 #include <linux/ftrace.h>
 #include <linux/async.h>
 #include <linux/kmemcheck.h>
-#include <linux/kmemtrace.h>
 #include <linux/sfi.h>
 #include <linux/shmem_fs.h>
 #include <linux/slab.h>
@@ -652,7 +651,6 @@ asmlinkage void __init start_kernel(void)
 #endif
 	page_cgroup_init();
 	enable_debug_pagealloc();
-	kmemtrace_init();
 	kmemleak_init();
 	debug_objects_mem_init();
 	idr_init_cache();
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 572992abc71..f669092fdea 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -354,26 +354,6 @@ config STACK_TRACER
 
 	  Say N if unsure.
 
-config KMEMTRACE
-	bool "Trace SLAB allocations"
-	select GENERIC_TRACER
-	help
-	  kmemtrace provides tracing for slab allocator functions, such as
-	  kmalloc, kfree, kmem_cache_alloc, kmem_cache_free, etc. Collected
-	  data is then fed to the userspace application in order to analyse
-	  allocation hotspots, internal fragmentation and so on, making it
-	  possible to see how well an allocator performs, as well as debug
-	  and profile kernel code.
-
-	  This requires an userspace application to use. See
-	  Documentation/trace/kmemtrace.txt for more information.
-
-	  Saying Y will make the kernel somewhat larger and slower. However,
-	  if you disable kmemtrace at run-time or boot-time, the performance
-	  impact is minimal (depending on the arch the kernel is built for).
-
-	  If unsure, say N.
-
 config WORKQUEUE_TRACER
 	bool "Trace workqueues"
 	select GENERIC_TRACER
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index c3aaeba8237..469a1c7555a 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -40,7 +40,6 @@ obj-$(CONFIG_STACK_TRACER) += trace_stack.o
 obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o
 obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += trace_functions_graph.o
 obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o
-obj-$(CONFIG_KMEMTRACE) += kmemtrace.o
 obj-$(CONFIG_WORKQUEUE_TRACER) += trace_workqueue.o
 obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o
 ifeq ($(CONFIG_BLOCK),y)
diff --git a/kernel/trace/kmemtrace.c b/kernel/trace/kmemtrace.c
deleted file mode 100644
index bbfc1bb1660..00000000000
--- a/kernel/trace/kmemtrace.c
+++ /dev/null
@@ -1,529 +0,0 @@
-/*
- * Memory allocator tracing
- *
- * Copyright (C) 2008 Eduard - Gabriel Munteanu
- * Copyright (C) 2008 Pekka Enberg <penberg@cs.helsinki.fi>
- * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com>
- */
-
-#include <linux/tracepoint.h>
-#include <linux/seq_file.h>
-#include <linux/debugfs.h>
-#include <linux/dcache.h>
-#include <linux/fs.h>
-
-#include <linux/kmemtrace.h>
-
-#include "trace_output.h"
-#include "trace.h"
-
-/* Select an alternative, minimalistic output than the original one */
-#define TRACE_KMEM_OPT_MINIMAL	0x1
-
-static struct tracer_opt kmem_opts[] = {
-	/* Default disable the minimalistic output */
-	{ TRACER_OPT(kmem_minimalistic, TRACE_KMEM_OPT_MINIMAL) },
-	{ }
-};
-
-static struct tracer_flags kmem_tracer_flags = {
-	.val			= 0,
-	.opts			= kmem_opts
-};
-
-static struct trace_array *kmemtrace_array;
-
-/* Trace allocations */
-static inline void kmemtrace_alloc(enum kmemtrace_type_id type_id,
-				   unsigned long call_site,
-				   const void *ptr,
-				   size_t bytes_req,
-				   size_t bytes_alloc,
-				   gfp_t gfp_flags,
-				   int node)
-{
-	struct ftrace_event_call *call = &event_kmem_alloc;
-	struct trace_array *tr = kmemtrace_array;
-	struct kmemtrace_alloc_entry *entry;
-	struct ring_buffer_event *event;
-
-	event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry));
-	if (!event)
-		return;
-
-	entry = ring_buffer_event_data(event);
-	tracing_generic_entry_update(&entry->ent, 0, 0);
-
-	entry->ent.type		= TRACE_KMEM_ALLOC;
-	entry->type_id		= type_id;
-	entry->call_site	= call_site;
-	entry->ptr		= ptr;
-	entry->bytes_req	= bytes_req;
-	entry->bytes_alloc	= bytes_alloc;
-	entry->gfp_flags	= gfp_flags;
-	entry->node		= node;
-
-	if (!filter_check_discard(call, entry, tr->buffer, event))
-		ring_buffer_unlock_commit(tr->buffer, event);
-
-	trace_wake_up();
-}
-
-static inline void kmemtrace_free(enum kmemtrace_type_id type_id,
-				  unsigned long call_site,
-				  const void *ptr)
-{
-	struct ftrace_event_call *call = &event_kmem_free;
-	struct trace_array *tr = kmemtrace_array;
-	struct kmemtrace_free_entry *entry;
-	struct ring_buffer_event *event;
-
-	event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry));
-	if (!event)
-		return;
-	entry	= ring_buffer_event_data(event);
-	tracing_generic_entry_update(&entry->ent, 0, 0);
-
-	entry->ent.type		= TRACE_KMEM_FREE;
-	entry->type_id		= type_id;
-	entry->call_site	= call_site;
-	entry->ptr		= ptr;
-
-	if (!filter_check_discard(call, entry, tr->buffer, event))
-		ring_buffer_unlock_commit(tr->buffer, event);
-
-	trace_wake_up();
-}
-
-static void kmemtrace_kmalloc(void *ignore,
-			      unsigned long call_site,
-			      const void *ptr,
-			      size_t bytes_req,
-			      size_t bytes_alloc,
-			      gfp_t gfp_flags)
-{
-	kmemtrace_alloc(KMEMTRACE_TYPE_KMALLOC, call_site, ptr,
-			bytes_req, bytes_alloc, gfp_flags, -1);
-}
-
-static void kmemtrace_kmem_cache_alloc(void *ignore,
-				       unsigned long call_site,
-				       const void *ptr,
-				       size_t bytes_req,
-				       size_t bytes_alloc,
-				       gfp_t gfp_flags)
-{
-	kmemtrace_alloc(KMEMTRACE_TYPE_CACHE, call_site, ptr,
-			bytes_req, bytes_alloc, gfp_flags, -1);
-}
-
-static void kmemtrace_kmalloc_node(void *ignore,
-				   unsigned long call_site,
-				   const void *ptr,
-				   size_t bytes_req,
-				   size_t bytes_alloc,
-				   gfp_t gfp_flags,
-				   int node)
-{
-	kmemtrace_alloc(KMEMTRACE_TYPE_KMALLOC, call_site, ptr,
-			bytes_req, bytes_alloc, gfp_flags, node);
-}
-
-static void kmemtrace_kmem_cache_alloc_node(void *ignore,
-					    unsigned long call_site,
-					    const void *ptr,
-					    size_t bytes_req,
-					    size_t bytes_alloc,
-					    gfp_t gfp_flags,
-					    int node)
-{
-	kmemtrace_alloc(KMEMTRACE_TYPE_CACHE, call_site, ptr,
-			bytes_req, bytes_alloc, gfp_flags, node);
-}
-
-static void
-kmemtrace_kfree(void *ignore, unsigned long call_site, const void *ptr)
-{
-	kmemtrace_free(KMEMTRACE_TYPE_KMALLOC, call_site, ptr);
-}
-
-static void kmemtrace_kmem_cache_free(void *ignore,
-				      unsigned long call_site, const void *ptr)
-{
-	kmemtrace_free(KMEMTRACE_TYPE_CACHE, call_site, ptr);
-}
-
-static int kmemtrace_start_probes(void)
-{
-	int err;
-
-	err = register_trace_kmalloc(kmemtrace_kmalloc, NULL);
-	if (err)
-		return err;
-	err = register_trace_kmem_cache_alloc(kmemtrace_kmem_cache_alloc, NULL);
-	if (err)
-		return err;
-	err = register_trace_kmalloc_node(kmemtrace_kmalloc_node, NULL);
-	if (err)
-		return err;
-	err = register_trace_kmem_cache_alloc_node(kmemtrace_kmem_cache_alloc_node, NULL);
-	if (err)
-		return err;
-	err = register_trace_kfree(kmemtrace_kfree, NULL);
-	if (err)
-		return err;
-	err = register_trace_kmem_cache_free(kmemtrace_kmem_cache_free, NULL);
-
-	return err;
-}
-
-static void kmemtrace_stop_probes(void)
-{
-	unregister_trace_kmalloc(kmemtrace_kmalloc, NULL);
-	unregister_trace_kmem_cache_alloc(kmemtrace_kmem_cache_alloc, NULL);
-	unregister_trace_kmalloc_node(kmemtrace_kmalloc_node, NULL);
-	unregister_trace_kmem_cache_alloc_node(kmemtrace_kmem_cache_alloc_node, NULL);
-	unregister_trace_kfree(kmemtrace_kfree, NULL);
-	unregister_trace_kmem_cache_free(kmemtrace_kmem_cache_free, NULL);
-}
-
-static int kmem_trace_init(struct trace_array *tr)
-{
-	kmemtrace_array = tr;
-
-	tracing_reset_online_cpus(tr);
-
-	kmemtrace_start_probes();
-
-	return 0;
-}
-
-static void kmem_trace_reset(struct trace_array *tr)
-{
-	kmemtrace_stop_probes();
-}
-
-static void kmemtrace_headers(struct seq_file *s)
-{
-	/* Don't need headers for the original kmemtrace output */
-	if (!(kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL))
-		return;
-
-	seq_printf(s, "#\n");
-	seq_printf(s, "# ALLOC  TYPE  REQ   GIVEN  FLAGS     "
-			"      POINTER         NODE    CALLER\n");
-	seq_printf(s, "# FREE   |      |     |       |       "
-			"       |   |            |        |\n");
-	seq_printf(s, "# |\n\n");
-}
-
-/*
- * The following functions give the original output from kmemtrace,
- * plus the origin CPU, since reordering occurs in-kernel now.
- */
-
-#define KMEMTRACE_USER_ALLOC	0
-#define KMEMTRACE_USER_FREE	1
-
-struct kmemtrace_user_event {
-	u8			event_id;
-	u8			type_id;
-	u16			event_size;
-	u32			cpu;
-	u64			timestamp;
-	unsigned long		call_site;
-	unsigned long		ptr;
-};
-
-struct kmemtrace_user_event_alloc {
-	size_t			bytes_req;
-	size_t			bytes_alloc;
-	unsigned		gfp_flags;
-	int			node;
-};
-
-static enum print_line_t
-kmemtrace_print_alloc(struct trace_iterator *iter, int flags,
-		      struct trace_event *event)
-{
-	struct trace_seq *s = &iter->seq;
-	struct kmemtrace_alloc_entry *entry;
-	int ret;
-
-	trace_assign_type(entry, iter->ent);
-
-	ret = trace_seq_printf(s, "type_id %d call_site %pF ptr %lu "
-	    "bytes_req %lu bytes_alloc %lu gfp_flags %lu node %d\n",
-	    entry->type_id, (void *)entry->call_site, (unsigned long)entry->ptr,
-	    (unsigned long)entry->bytes_req, (unsigned long)entry->bytes_alloc,
-	    (unsigned long)entry->gfp_flags, entry->node);
-
-	if (!ret)
-		return TRACE_TYPE_PARTIAL_LINE;
-	return TRACE_TYPE_HANDLED;
-}
-
-static enum print_line_t
-kmemtrace_print_free(struct trace_iterator *iter, int flags,
-		     struct trace_event *event)
-{
-	struct trace_seq *s = &iter->seq;
-	struct kmemtrace_free_entry *entry;
-	int ret;
-
-	trace_assign_type(entry, iter->ent);
-
-	ret = trace_seq_printf(s, "type_id %d call_site %pF ptr %lu\n",
-			       entry->type_id, (void *)entry->call_site,
-			       (unsigned long)entry->ptr);
-
-	if (!ret)
-		return TRACE_TYPE_PARTIAL_LINE;
-	return TRACE_TYPE_HANDLED;
-}
-
-static enum print_line_t
-kmemtrace_print_alloc_user(struct trace_iterator *iter, int flags,
-			   struct trace_event *event)
-{
-	struct trace_seq *s = &iter->seq;
-	struct kmemtrace_alloc_entry *entry;
-	struct kmemtrace_user_event *ev;
-	struct kmemtrace_user_event_alloc *ev_alloc;
-
-	trace_assign_type(entry, iter->ent);
-
-	ev = trace_seq_reserve(s, sizeof(*ev));
-	if (!ev)
-		return TRACE_TYPE_PARTIAL_LINE;
-
-	ev->event_id		= KMEMTRACE_USER_ALLOC;
-	ev->type_id		= entry->type_id;
-	ev->event_size		= sizeof(*ev) + sizeof(*ev_alloc);
-	ev->cpu			= iter->cpu;
-	ev->timestamp		= iter->ts;
-	ev->call_site		= entry->call_site;
-	ev->ptr			= (unsigned long)entry->ptr;
-
-	ev_alloc = trace_seq_reserve(s, sizeof(*ev_alloc));
-	if (!ev_alloc)
-		return TRACE_TYPE_PARTIAL_LINE;
-
-	ev_alloc->bytes_req	= entry->bytes_req;
-	ev_alloc->bytes_alloc	= entry->bytes_alloc;
-	ev_alloc->gfp_flags	= entry->gfp_flags;
-	ev_alloc->node		= entry->node;
-
-	return TRACE_TYPE_HANDLED;
-}
-
-static enum print_line_t
-kmemtrace_print_free_user(struct trace_iterator *iter, int flags,
-			  struct trace_event *event)
-{
-	struct trace_seq *s = &iter->seq;
-	struct kmemtrace_free_entry *entry;
-	struct kmemtrace_user_event *ev;
-
-	trace_assign_type(entry, iter->ent);
-
-	ev = trace_seq_reserve(s, sizeof(*ev));
-	if (!ev)
-		return TRACE_TYPE_PARTIAL_LINE;
-
-	ev->event_id		= KMEMTRACE_USER_FREE;
-	ev->type_id		= entry->type_id;
-	ev->event_size		= sizeof(*ev);
-	ev->cpu			= iter->cpu;
-	ev->timestamp		= iter->ts;
-	ev->call_site		= entry->call_site;
-	ev->ptr			= (unsigned long)entry->ptr;
-
-	return TRACE_TYPE_HANDLED;
-}
-
-/* The two other following provide a more minimalistic output */
-static enum print_line_t
-kmemtrace_print_alloc_compress(struct trace_iterator *iter)
-{
-	struct kmemtrace_alloc_entry *entry;
-	struct trace_seq *s = &iter->seq;
-	int ret;
-
-	trace_assign_type(entry, iter->ent);
-
-	/* Alloc entry */
-	ret = trace_seq_printf(s, "  +      ");
-	if (!ret)
-		return TRACE_TYPE_PARTIAL_LINE;
-
-	/* Type */
-	switch (entry->type_id) {
-	case KMEMTRACE_TYPE_KMALLOC:
-		ret = trace_seq_printf(s, "K   ");
-		break;
-	case KMEMTRACE_TYPE_CACHE:
-		ret = trace_seq_printf(s, "C   ");
-		break;
-	case KMEMTRACE_TYPE_PAGES:
-		ret = trace_seq_printf(s, "P   ");
-		break;
-	default:
-		ret = trace_seq_printf(s, "?   ");
-	}
-
-	if (!ret)
-		return TRACE_TYPE_PARTIAL_LINE;
-
-	/* Requested */
-	ret = trace_seq_printf(s, "%4zu   ", entry->bytes_req);
-	if (!ret)
-		return TRACE_TYPE_PARTIAL_LINE;
-
-	/* Allocated */
-	ret = trace_seq_printf(s, "%4zu   ", entry->bytes_alloc);
-	if (!ret)
-		return TRACE_TYPE_PARTIAL_LINE;
-
-	/* Flags
-	 * TODO: would be better to see the name of the GFP flag names
-	 */
-	ret = trace_seq_printf(s, "%08x   ", entry->gfp_flags);
-	if (!ret)
-		return TRACE_TYPE_PARTIAL_LINE;
-
-	/* Pointer to allocated */
-	ret = trace_seq_printf(s, "0x%tx   ", (ptrdiff_t)entry->ptr);
-	if (!ret)
-		return TRACE_TYPE_PARTIAL_LINE;
-
-	/* Node and call site*/
-	ret = trace_seq_printf(s, "%4d   %pf\n", entry->node,
-						 (void *)entry->call_site);
-	if (!ret)
-		return TRACE_TYPE_PARTIAL_LINE;
-
-	return TRACE_TYPE_HANDLED;
-}
-
-static enum print_line_t
-kmemtrace_print_free_compress(struct trace_iterator *iter)
-{
-	struct kmemtrace_free_entry *entry;
-	struct trace_seq *s = &iter->seq;
-	int ret;
-
-	trace_assign_type(entry, iter->ent);
-
-	/* Free entry */
-	ret = trace_seq_printf(s, "  -      ");
-	if (!ret)
-		return TRACE_TYPE_PARTIAL_LINE;
-
-	/* Type */
-	switch (entry->type_id) {
-	case KMEMTRACE_TYPE_KMALLOC:
-		ret = trace_seq_printf(s, "K     ");
-		break;
-	case KMEMTRACE_TYPE_CACHE:
-		ret = trace_seq_printf(s, "C     ");
-		break;
-	case KMEMTRACE_TYPE_PAGES:
-		ret = trace_seq_printf(s, "P     ");
-		break;
-	default:
-		ret = trace_seq_printf(s, "?     ");
-	}
-
-	if (!ret)
-		return TRACE_TYPE_PARTIAL_LINE;
-
-	/* Skip requested/allocated/flags */
-	ret = trace_seq_printf(s, "                       ");
-	if (!ret)
-		return TRACE_TYPE_PARTIAL_LINE;
-
-	/* Pointer to allocated */
-	ret = trace_seq_printf(s, "0x%tx   ", (ptrdiff_t)entry->ptr);
-	if (!ret)
-		return TRACE_TYPE_PARTIAL_LINE;
-
-	/* Skip node and print call site*/
-	ret = trace_seq_printf(s, "       %pf\n", (void *)entry->call_site);
-	if (!ret)
-		return TRACE_TYPE_PARTIAL_LINE;
-
-	return TRACE_TYPE_HANDLED;
-}
-
-static enum print_line_t kmemtrace_print_line(struct trace_iterator *iter)
-{
-	struct trace_entry *entry = iter->ent;
-
-	if (!(kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL))
-		return TRACE_TYPE_UNHANDLED;
-
-	switch (entry->type) {
-	case TRACE_KMEM_ALLOC:
-		return kmemtrace_print_alloc_compress(iter);
-	case TRACE_KMEM_FREE:
-		return kmemtrace_print_free_compress(iter);
-	default:
-		return TRACE_TYPE_UNHANDLED;
-	}
-}
-
-static struct trace_event_functions kmem_trace_alloc_funcs = {
-	.trace			= kmemtrace_print_alloc,
-	.binary			= kmemtrace_print_alloc_user,
-};
-
-static struct trace_event kmem_trace_alloc = {
-	.type			= TRACE_KMEM_ALLOC,
-	.funcs			= &kmem_trace_alloc_funcs,
-};
-
-static struct trace_event_functions kmem_trace_free_funcs = {
-	.trace			= kmemtrace_print_free,
-	.binary			= kmemtrace_print_free_user,
-};
-
-static struct trace_event kmem_trace_free = {
-	.type			= TRACE_KMEM_FREE,
-	.funcs			= &kmem_trace_free_funcs,
-};
-
-static struct tracer kmem_tracer __read_mostly = {
-	.name			= "kmemtrace",
-	.init			= kmem_trace_init,
-	.reset			= kmem_trace_reset,
-	.print_line		= kmemtrace_print_line,
-	.print_header		= kmemtrace_headers,
-	.flags			= &kmem_tracer_flags
-};
-
-void kmemtrace_init(void)
-{
-	/* earliest opportunity to start kmem tracing */
-}
-
-static int __init init_kmem_tracer(void)
-{
-	if (!register_ftrace_event(&kmem_trace_alloc)) {
-		pr_warning("Warning: could not register kmem events\n");
-		return 1;
-	}
-
-	if (!register_ftrace_event(&kmem_trace_free)) {
-		pr_warning("Warning: could not register kmem events\n");
-		return 1;
-	}
-
-	if (register_tracer(&kmem_tracer) != 0) {
-		pr_warning("Warning: could not register the kmem tracer\n");
-		return 1;
-	}
-
-	return 0;
-}
-device_initcall(init_kmem_tracer);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 75a5e800a73..075cd2ea84a 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -9,7 +9,6 @@
 #include <linux/mmiotrace.h>
 #include <linux/tracepoint.h>
 #include <linux/ftrace.h>
-#include <linux/kmemtrace.h>
 #include <linux/hw_breakpoint.h>
 #include <linux/trace_seq.h>
 #include <linux/ftrace_event.h>
@@ -30,19 +29,12 @@ enum trace_type {
 	TRACE_GRAPH_RET,
 	TRACE_GRAPH_ENT,
 	TRACE_USER_STACK,
-	TRACE_KMEM_ALLOC,
-	TRACE_KMEM_FREE,
 	TRACE_BLK,
 	TRACE_KSYM,
 
 	__TRACE_LAST_TYPE,
 };
 
-enum kmemtrace_type_id {
-	KMEMTRACE_TYPE_KMALLOC = 0,	/* kmalloc() or kfree(). */
-	KMEMTRACE_TYPE_CACHE,		/* kmem_cache_*(). */
-	KMEMTRACE_TYPE_PAGES,		/* __get_free_pages() and friends. */
-};
 
 #undef __field
 #define __field(type, item)		type	item;
@@ -208,10 +200,6 @@ extern void __ftrace_bad_type(void);
 			  TRACE_GRAPH_ENT);		\
 		IF_ASSIGN(var, ent, struct ftrace_graph_ret_entry,	\
 			  TRACE_GRAPH_RET);		\
-		IF_ASSIGN(var, ent, struct kmemtrace_alloc_entry,	\
-			  TRACE_KMEM_ALLOC);	\
-		IF_ASSIGN(var, ent, struct kmemtrace_free_entry,	\
-			  TRACE_KMEM_FREE);	\
 		IF_ASSIGN(var, ent, struct ksym_trace_entry, TRACE_KSYM);\
 		__ftrace_bad_type();					\
 	} while (0)
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
index c293364c984..13abc157dba 100644
--- a/kernel/trace/trace_entries.h
+++ b/kernel/trace/trace_entries.h
@@ -291,41 +291,6 @@ FTRACE_ENTRY(branch, trace_branch,
 		 __entry->func, __entry->file, __entry->correct)
 );
 
-FTRACE_ENTRY(kmem_alloc, kmemtrace_alloc_entry,
-
-	TRACE_KMEM_ALLOC,
-
-	F_STRUCT(
-		__field(	enum kmemtrace_type_id,	type_id		)
-		__field(	unsigned long,		call_site	)
-		__field(	const void *,		ptr		)
-		__field(	size_t,			bytes_req	)
-		__field(	size_t,			bytes_alloc	)
-		__field(	gfp_t,			gfp_flags	)
-		__field(	int,			node		)
-	),
-
-	F_printk("type:%u call_site:%lx ptr:%p req:%zi alloc:%zi"
-		 " flags:%x node:%d",
-		 __entry->type_id, __entry->call_site, __entry->ptr,
-		 __entry->bytes_req, __entry->bytes_alloc,
-		 __entry->gfp_flags, __entry->node)
-);
-
-FTRACE_ENTRY(kmem_free, kmemtrace_free_entry,
-
-	TRACE_KMEM_FREE,
-
-	F_STRUCT(
-		__field(	enum kmemtrace_type_id,	type_id		)
-		__field(	unsigned long,		call_site	)
-		__field(	const void *,		ptr		)
-	),
-
-	F_printk("type:%u call_site:%lx ptr:%p",
-		 __entry->type_id, __entry->call_site, __entry->ptr)
-);
-
 FTRACE_ENTRY(ksym_trace, ksym_trace_entry,
 
 	TRACE_KSYM,
diff --git a/mm/slab.c b/mm/slab.c
index e49f8f46f46..47360c3e5ab 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -102,7 +102,6 @@
 #include	<linux/cpu.h>
 #include	<linux/sysctl.h>
 #include	<linux/module.h>
-#include	<linux/kmemtrace.h>
 #include	<linux/rcupdate.h>
 #include	<linux/string.h>
 #include	<linux/uaccess.h>
diff --git a/mm/slob.c b/mm/slob.c
index 23631e2bb57..a82ab5811bd 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -66,8 +66,10 @@
 #include <linux/module.h>
 #include <linux/rcupdate.h>
 #include <linux/list.h>
-#include <linux/kmemtrace.h>
 #include <linux/kmemleak.h>
+
+#include <trace/events/kmem.h>
+
 #include <asm/atomic.h>
 
 /*
diff --git a/mm/slub.c b/mm/slub.c
index 26f0cb9cc58..a61f1aad107 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -17,7 +17,6 @@
 #include <linux/slab.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
-#include <linux/kmemtrace.h>
 #include <linux/kmemcheck.h>
 #include <linux/cpu.h>
 #include <linux/cpuset.h>
-- 
cgit v1.2.3-70-g09d2


From 88e7594a9775e54dcd421cb246406dce62e48bee Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Mon, 7 Jun 2010 03:27:39 +0000
Subject: phonet: use call_rcu for phonet device free

Use call_rcu rather than synchronize_rcu.

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/phonet/pn_dev.h |  1 +
 net/phonet/pn_dev.c         | 15 +++++++++++----
 2 files changed, 12 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/phonet/pn_dev.h b/include/net/phonet/pn_dev.h
index d7b989ca3d6..2d16783d5e2 100644
--- a/include/net/phonet/pn_dev.h
+++ b/include/net/phonet/pn_dev.h
@@ -34,6 +34,7 @@ struct phonet_device {
 	struct list_head list;
 	struct net_device *netdev;
 	DECLARE_BITMAP(addrs, 64);
+	struct rcu_head	rcu;
 };
 
 int phonet_device_init(void);
diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c
index c33da657694..b18e48fae97 100644
--- a/net/phonet/pn_dev.c
+++ b/net/phonet/pn_dev.c
@@ -162,6 +162,14 @@ int phonet_address_add(struct net_device *dev, u8 addr)
 	return err;
 }
 
+static void phonet_device_rcu_free(struct rcu_head *head)
+{
+	struct phonet_device *pnd;
+
+	pnd = container_of(head, struct phonet_device, rcu);
+	kfree(pnd);
+}
+
 int phonet_address_del(struct net_device *dev, u8 addr)
 {
 	struct phonet_device_list *pndevs = phonet_device_list(dev_net(dev));
@@ -179,10 +187,9 @@ int phonet_address_del(struct net_device *dev, u8 addr)
 		pnd = NULL;
 	mutex_unlock(&pndevs->lock);
 
-	if (pnd) {
-		synchronize_rcu();
-		kfree(pnd);
-	}
+	if (pnd)
+		call_rcu(&pnd->rcu, phonet_device_rcu_free);
+
 	return err;
 }
 
-- 
cgit v1.2.3-70-g09d2


From 8e4b50f94e8c1435a3e0ece42b7f97bc857d0145 Mon Sep 17 00:00:00 2001
From: Clemens Ladisch <clemens@ladisch.de>
Date: Thu, 10 Jun 2010 08:26:28 +0200
Subject: firewire: core: add CSR SPLIT_TIMEOUT support

Implement the SPLIT_TIMEOUT registers.  Besides being required by the
spec, this is desirable for some IIDC devices and necessary for many
audio devices to be able to increase the timeout from userspace.

Signed-off-by: Clemens Ladisch <clemens@ladisch.de>
---
 drivers/firewire/core-card.c        |  4 ++
 drivers/firewire/core-transaction.c | 76 +++++++++++++++++++++++++++++++------
 include/linux/firewire.h            |  5 +++
 3 files changed, 74 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/drivers/firewire/core-card.c b/drivers/firewire/core-card.c
index 901435cdd5c..d0f15c2f1e1 100644
--- a/drivers/firewire/core-card.c
+++ b/drivers/firewire/core-card.c
@@ -428,6 +428,10 @@ void fw_card_initialize(struct fw_card *card,
 	card->device = device;
 	card->current_tlabel = 0;
 	card->tlabel_mask = 0;
+	card->split_timeout_hi = 0;
+	card->split_timeout_lo = 800 << 19;
+	card->split_timeout_cycles = 800;
+	card->split_timeout_jiffies = DIV_ROUND_UP(HZ, 10);
 	card->color = 0;
 	card->broadcast_channel = BROADCAST_CHANNEL_INITIAL;
 
diff --git a/drivers/firewire/core-transaction.c b/drivers/firewire/core-transaction.c
index 0034229dfd1..9a7d3ec23f2 100644
--- a/drivers/firewire/core-transaction.c
+++ b/drivers/firewire/core-transaction.c
@@ -339,7 +339,8 @@ void fw_send_request(struct fw_card *card, struct fw_transaction *t, int tcode,
 	setup_timer(&t->split_timeout_timer,
 		    split_transaction_timeout_callback, (unsigned long)t);
 	/* FIXME: start this timer later, relative to t->timestamp */
-	mod_timer(&t->split_timeout_timer, jiffies + DIV_ROUND_UP(HZ, 10));
+	mod_timer(&t->split_timeout_timer,
+		  jiffies + card->split_timeout_jiffies);
 	t->callback = callback;
 	t->callback_data = callback_data;
 
@@ -673,11 +674,28 @@ void fw_fill_response(struct fw_packet *response, u32 *request_header,
 }
 EXPORT_SYMBOL(fw_fill_response);
 
-static struct fw_request *allocate_request(struct fw_packet *p)
+static u32 compute_split_timeout_timestamp(struct fw_card *card,
+					   u32 request_timestamp)
+{
+	unsigned int cycles;
+	u32 timestamp;
+
+	cycles = card->split_timeout_cycles;
+	cycles += request_timestamp & 0x1fff;
+
+	timestamp = request_timestamp & ~0x1fff;
+	timestamp += (cycles / 8000) << 13;
+	timestamp |= cycles % 8000;
+
+	return timestamp;
+}
+
+static struct fw_request *allocate_request(struct fw_card *card,
+					   struct fw_packet *p)
 {
 	struct fw_request *request;
 	u32 *data, length;
-	int request_tcode, t;
+	int request_tcode;
 
 	request_tcode = HEADER_GET_TCODE(p->header[0]);
 	switch (request_tcode) {
@@ -712,14 +730,9 @@ static struct fw_request *allocate_request(struct fw_packet *p)
 	if (request == NULL)
 		return NULL;
 
-	t = (p->timestamp & 0x1fff) + 4000;
-	if (t >= 8000)
-		t = (p->timestamp & ~0x1fff) + 0x2000 + t - 8000;
-	else
-		t = (p->timestamp & ~0x1fff) + t;
-
 	request->response.speed = p->speed;
-	request->response.timestamp = t;
+	request->response.timestamp =
+			compute_split_timeout_timestamp(card, p->timestamp);
 	request->response.generation = p->generation;
 	request->response.ack = 0;
 	request->response.callback = free_response_callback;
@@ -845,7 +858,7 @@ void fw_core_handle_request(struct fw_card *card, struct fw_packet *p)
 	if (p->ack != ACK_PENDING && p->ack != ACK_COMPLETE)
 		return;
 
-	request = allocate_request(p);
+	request = allocate_request(card, p);
 	if (request == NULL) {
 		/* FIXME: send statically allocated busy packet. */
 		return;
@@ -993,6 +1006,19 @@ static u32 read_state_register(struct fw_card *card)
 	return 0;
 }
 
+static void update_split_timeout(struct fw_card *card)
+{
+	unsigned int cycles;
+
+	cycles = card->split_timeout_hi * 8000 + (card->split_timeout_lo >> 19);
+
+	cycles = max(cycles, 800u); /* minimum as per the spec */
+	cycles = min(cycles, 3u * 8000u); /* maximum OHCI timeout */
+
+	card->split_timeout_cycles = cycles;
+	card->split_timeout_jiffies = DIV_ROUND_UP(cycles * HZ, 8000);
+}
+
 static void handle_registers(struct fw_card *card, struct fw_request *request,
 		int tcode, int destination, int source, int generation,
 		int speed, unsigned long long offset,
@@ -1001,6 +1027,7 @@ static void handle_registers(struct fw_card *card, struct fw_request *request,
 	int reg = offset & ~CSR_REGISTER_BASE;
 	__be32 *data = payload;
 	int rcode = RCODE_COMPLETE;
+	unsigned long flags;
 
 	switch (reg) {
 	case CSR_STATE_CLEAR:
@@ -1039,6 +1066,33 @@ static void handle_registers(struct fw_card *card, struct fw_request *request,
 			rcode = RCODE_TYPE_ERROR;
 		break;
 
+	case CSR_SPLIT_TIMEOUT_HI:
+		if (tcode == TCODE_READ_QUADLET_REQUEST) {
+			*data = cpu_to_be32(card->split_timeout_hi);
+		} else if (tcode == TCODE_WRITE_QUADLET_REQUEST) {
+			spin_lock_irqsave(&card->lock, flags);
+			card->split_timeout_hi = be32_to_cpu(*data) & 7;
+			update_split_timeout(card);
+			spin_unlock_irqrestore(&card->lock, flags);
+		} else {
+			rcode = RCODE_TYPE_ERROR;
+		}
+		break;
+
+	case CSR_SPLIT_TIMEOUT_LO:
+		if (tcode == TCODE_READ_QUADLET_REQUEST) {
+			*data = cpu_to_be32(card->split_timeout_lo);
+		} else if (tcode == TCODE_WRITE_QUADLET_REQUEST) {
+			spin_lock_irqsave(&card->lock, flags);
+			card->split_timeout_lo =
+					be32_to_cpu(*data) & 0xfff80000;
+			update_split_timeout(card);
+			spin_unlock_irqrestore(&card->lock, flags);
+		} else {
+			rcode = RCODE_TYPE_ERROR;
+		}
+		break;
+
 	case CSR_CYCLE_TIME:
 		if (TCODE_IS_READ_REQUEST(tcode) && length == 4)
 			*data = cpu_to_be32(card->driver->
diff --git a/include/linux/firewire.h b/include/linux/firewire.h
index 72e2b8ac2a5..cdf8213c68c 100644
--- a/include/linux/firewire.h
+++ b/include/linux/firewire.h
@@ -89,6 +89,11 @@ struct fw_card {
 	struct list_head transaction_list;
 	unsigned long reset_jiffies;
 
+	u32 split_timeout_hi;
+	u32 split_timeout_lo;
+	unsigned int split_timeout_cycles;
+	unsigned int split_timeout_jiffies;
+
 	unsigned long long guid;
 	unsigned max_receive;
 	int link_speed;
-- 
cgit v1.2.3-70-g09d2


From a1a1132bd83d0aea51d4f19be4b4a58a064a0131 Mon Sep 17 00:00:00 2001
From: Clemens Ladisch <clemens@ladisch.de>
Date: Thu, 10 Jun 2010 08:35:06 +0200
Subject: firewire: add CSR PRIORITY_BUDGET support

If supported by the OHCI controller, implement the PRIORITY_BUDGET
register, which is required for nodes that can use asynchronous
priority arbitration.

To allow the core to determine what features the lowlevel device
supports, add a new card driver callback.

Signed-off-by: Clemens Ladisch <clemens@ladisch.de>
---
 drivers/firewire/core-transaction.c | 14 ++++++++++++++
 drivers/firewire/core.h             |  4 ++++
 drivers/firewire/ohci.c             | 27 +++++++++++++++++++++++++++
 include/linux/firewire.h            |  1 +
 4 files changed, 46 insertions(+)

(limited to 'include')

diff --git a/drivers/firewire/core-transaction.c b/drivers/firewire/core-transaction.c
index 8146133818d..a61eb3fb957 100644
--- a/drivers/firewire/core-transaction.c
+++ b/drivers/firewire/core-transaction.c
@@ -1126,6 +1126,20 @@ static void handle_registers(struct fw_card *card, struct fw_request *request,
 			rcode = RCODE_TYPE_ERROR;
 		break;
 
+	case CSR_PRIORITY_BUDGET:
+		if (!(card->driver->get_features(card) &
+						FEATURE_PRIORITY_BUDGET))
+			rcode = RCODE_ADDRESS_ERROR;
+		else if (tcode == TCODE_READ_QUADLET_REQUEST)
+			*data = cpu_to_be32(card->driver->
+				read_csr_reg(card, CSR_PRIORITY_BUDGET));
+		else if (tcode == TCODE_WRITE_QUADLET_REQUEST)
+			card->driver->write_csr_reg(card, CSR_PRIORITY_BUDGET,
+						    be32_to_cpu(*data));
+		else
+			rcode = RCODE_TYPE_ERROR;
+		break;
+
 	case CSR_BROADCAST_CHANNEL:
 		if (tcode == TCODE_READ_QUADLET_REQUEST)
 			*data = cpu_to_be32(card->broadcast_channel);
diff --git a/drivers/firewire/core.h b/drivers/firewire/core.h
index efcdeb2e31e..3b8c0f042f4 100644
--- a/drivers/firewire/core.h
+++ b/drivers/firewire/core.h
@@ -38,6 +38,8 @@ struct fw_packet;
 #define BROADCAST_CHANNEL_INITIAL	(1 << 31 | 31)
 #define BROADCAST_CHANNEL_VALID		(1 << 30)
 
+#define FEATURE_PRIORITY_BUDGET		0x01
+
 struct fw_card_driver {
 	/*
 	 * Enable the given card with the given initial config rom.
@@ -78,6 +80,8 @@ struct fw_card_driver {
 	u32 (*read_csr_reg)(struct fw_card *card, int csr_offset);
 	void (*write_csr_reg)(struct fw_card *card, int csr_offset, u32 value);
 
+	unsigned int (*get_features)(struct fw_card *card);
+
 	struct fw_iso_context *
 	(*allocate_iso_context)(struct fw_card *card,
 				int type, int channel, size_t header_size);
diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c
index 9c588fd0125..0e541353178 100644
--- a/drivers/firewire/ohci.c
+++ b/drivers/firewire/ohci.c
@@ -170,6 +170,7 @@ struct fw_ohci {
 	int generation;
 	int request_generation;	/* for timestamping incoming requests */
 	unsigned quirks;
+	unsigned int pri_req_max;
 	u32 bus_time;
 
 	/*
@@ -1738,6 +1739,11 @@ static int ohci_enable(struct fw_card *card,
 	reg_write(ohci, OHCI1394_IsochronousCycleTimer, seconds << 25);
 	ohci->bus_time = seconds & ~0x3f;
 
+	/* Get implemented bits of the priority arbitration request counter. */
+	reg_write(ohci, OHCI1394_FairnessControl, 0x3f);
+	ohci->pri_req_max = reg_read(ohci, OHCI1394_FairnessControl) & 0x3f;
+	reg_write(ohci, OHCI1394_FairnessControl, 0);
+
 	ar_context_run(&ohci->ar_request_ctx);
 	ar_context_run(&ohci->ar_response_ctx);
 
@@ -2028,6 +2034,10 @@ static u32 ohci_read_csr_reg(struct fw_card *card, int csr_offset)
 		value = reg_read(ohci, OHCI1394_ATRetries);
 		return (value >> 4) & 0x0ffff00f;
 
+	case CSR_PRIORITY_BUDGET:
+		return (reg_read(ohci, OHCI1394_FairnessControl) & 0x3f) |
+			(ohci->pri_req_max << 8);
+
 	default:
 		WARN_ON(1);
 		return 0;
@@ -2065,12 +2075,28 @@ static void ohci_write_csr_reg(struct fw_card *card, int csr_offset, u32 value)
 		flush_writes(ohci);
 		break;
 
+	case CSR_PRIORITY_BUDGET:
+		reg_write(ohci, OHCI1394_FairnessControl, value & 0x3f);
+		flush_writes(ohci);
+		break;
+
 	default:
 		WARN_ON(1);
 		break;
 	}
 }
 
+static unsigned int ohci_get_features(struct fw_card *card)
+{
+	struct fw_ohci *ohci = fw_ohci(card);
+	unsigned int features = 0;
+
+	if (ohci->pri_req_max != 0)
+		features |= FEATURE_PRIORITY_BUDGET;
+
+	return features;
+}
+
 static void copy_iso_headers(struct iso_context *ctx, void *p)
 {
 	int i = ctx->header_length;
@@ -2510,6 +2536,7 @@ static const struct fw_card_driver ohci_driver = {
 	.enable_phys_dma	= ohci_enable_phys_dma,
 	.read_csr_reg		= ohci_read_csr_reg,
 	.write_csr_reg		= ohci_write_csr_reg,
+	.get_features		= ohci_get_features,
 
 	.allocate_iso_context	= ohci_allocate_iso_context,
 	.free_iso_context	= ohci_free_iso_context,
diff --git a/include/linux/firewire.h b/include/linux/firewire.h
index cdf8213c68c..a50377d9125 100644
--- a/include/linux/firewire.h
+++ b/include/linux/firewire.h
@@ -32,6 +32,7 @@
 #define CSR_CYCLE_TIME			0x200
 #define CSR_BUS_TIME			0x204
 #define CSR_BUSY_TIMEOUT		0x210
+#define CSR_PRIORITY_BUDGET		0x218
 #define CSR_BUS_MANAGER_ID		0x21c
 #define CSR_BANDWIDTH_AVAILABLE		0x220
 #define CSR_CHANNELS_AVAILABLE		0x224
-- 
cgit v1.2.3-70-g09d2


From 3d1f46eb60b155c705e389ecdf313f11b4b91976 Mon Sep 17 00:00:00 2001
From: Clemens Ladisch <clemens@ladisch.de>
Date: Thu, 10 Jun 2010 08:35:37 +0200
Subject: firewire: core: add CSR MAINT_UTILITY support

Implement the MAIN_UTILITY register, which is utterly optional
but useful as a safe target for diagnostic read/write/broadcast
transactions.

Signed-off-by: Clemens Ladisch <clemens@ladisch.de>
---
 drivers/firewire/core-transaction.c | 9 +++++++++
 include/linux/firewire.h            | 3 +++
 2 files changed, 12 insertions(+)

(limited to 'include')

diff --git a/drivers/firewire/core-transaction.c b/drivers/firewire/core-transaction.c
index a61eb3fb957..dd8ef650a7c 100644
--- a/drivers/firewire/core-transaction.c
+++ b/drivers/firewire/core-transaction.c
@@ -1140,6 +1140,15 @@ static void handle_registers(struct fw_card *card, struct fw_request *request,
 			rcode = RCODE_TYPE_ERROR;
 		break;
 
+	case CSR_MAINT_UTILITY:
+		if (tcode == TCODE_READ_QUADLET_REQUEST)
+			*data = card->maint_utility_register;
+		else if (tcode == TCODE_WRITE_QUADLET_REQUEST)
+			card->maint_utility_register = *data;
+		else
+			rcode = RCODE_TYPE_ERROR;
+		break;
+
 	case CSR_BROADCAST_CHANNEL:
 		if (tcode == TCODE_READ_QUADLET_REQUEST)
 			*data = cpu_to_be32(card->broadcast_channel);
diff --git a/include/linux/firewire.h b/include/linux/firewire.h
index a50377d9125..f1160e831da 100644
--- a/include/linux/firewire.h
+++ b/include/linux/firewire.h
@@ -38,6 +38,7 @@
 #define CSR_CHANNELS_AVAILABLE		0x224
 #define CSR_CHANNELS_AVAILABLE_HI	0x224
 #define CSR_CHANNELS_AVAILABLE_LO	0x228
+#define CSR_MAINT_UTILITY		0x230
 #define CSR_BROADCAST_CHANNEL		0x234
 #define CSR_CONFIG_ROM			0x400
 #define CSR_CONFIG_ROM_END		0x800
@@ -122,6 +123,8 @@ struct fw_card {
 	bool broadcast_channel_allocated;
 	u32 broadcast_channel;
 	__be32 topology_map[(CSR_TOPOLOGY_MAP_END - CSR_TOPOLOGY_MAP) / 4];
+
+	__be32 maint_utility_register;
 };
 
 struct fw_attribute_group {
-- 
cgit v1.2.3-70-g09d2


From 7e0e314f198d5048b74c8f0ef9f4c1c02e5ecfc9 Mon Sep 17 00:00:00 2001
From: Clemens Ladisch <clemens@ladisch.de>
Date: Thu, 10 Jun 2010 08:37:15 +0200
Subject: firewire: core: add CSR abdicate support

Implement the abdicate bit, which is required for bus manager
capable nodes and tested by the Base 1394 Test Suite.

Finally, something to do at a command reset!  :-)

Signed-off-by: Clemens Ladisch <clemens@ladisch.de>
---
 drivers/firewire/core-card.c        |  3 ++-
 drivers/firewire/core-topology.c    |  2 ++
 drivers/firewire/core-transaction.c | 13 +++++++++++--
 drivers/firewire/core.h             |  1 +
 include/linux/firewire.h            |  2 ++
 5 files changed, 18 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/firewire/core-card.c b/drivers/firewire/core-card.c
index d0f15c2f1e1..7c4cf6cfa74 100644
--- a/drivers/firewire/core-card.c
+++ b/drivers/firewire/core-card.c
@@ -260,7 +260,8 @@ static void fw_card_bm_work(struct work_struct *work)
 
 	grace = time_after(jiffies, card->reset_jiffies + DIV_ROUND_UP(HZ, 8));
 
-	if (is_next_generation(generation, card->bm_generation) ||
+	if ((is_next_generation(generation, card->bm_generation) &&
+	     !card->bm_abdicate) ||
 	    (card->bm_generation != generation && grace)) {
 		/*
 		 * This first step is to figure out who is IRM and
diff --git a/drivers/firewire/core-topology.c b/drivers/firewire/core-topology.c
index 93ec64cdeef..ca3c6531816 100644
--- a/drivers/firewire/core-topology.c
+++ b/drivers/firewire/core-topology.c
@@ -552,6 +552,8 @@ void fw_core_handle_bus_reset(struct fw_card *card, int node_id, int generation,
 	smp_wmb();
 	card->generation = generation;
 	card->reset_jiffies = jiffies;
+	card->bm_abdicate = card->csr_abdicate;
+	card->csr_abdicate = false;
 	fw_schedule_bm_work(card, 0);
 
 	local_node = build_tree(card, self_ids, self_id_count);
diff --git a/drivers/firewire/core-transaction.c b/drivers/firewire/core-transaction.c
index e0c6cce894c..85a54da243e 100644
--- a/drivers/firewire/core-transaction.c
+++ b/drivers/firewire/core-transaction.c
@@ -1008,6 +1008,10 @@ static u32 read_state_register(struct fw_card *card)
 	/* Bit 8 (cmstr): */
 	value |= card->driver->read_csr_reg(card, CSR_STATE_CLEAR);
 
+	/* Bit 10 (abdicate): */
+	if (card->csr_abdicate)
+		value |= CSR_STATE_BIT_ABDICATE;
+
 	return value;
 }
 
@@ -1041,6 +1045,8 @@ static void handle_registers(struct fw_card *card, struct fw_request *request,
 		} else if (tcode == TCODE_WRITE_QUADLET_REQUEST) {
 			card->driver->write_csr_reg(card, CSR_STATE_CLEAR,
 						    be32_to_cpu(*data));
+			if (*data & cpu_to_be32(CSR_STATE_BIT_ABDICATE))
+				card->csr_abdicate = false;
 		} else {
 			rcode = RCODE_TYPE_ERROR;
 		}
@@ -1052,7 +1058,8 @@ static void handle_registers(struct fw_card *card, struct fw_request *request,
 		} else if (tcode == TCODE_WRITE_QUADLET_REQUEST) {
 			card->driver->write_csr_reg(card, CSR_STATE_SET,
 						    be32_to_cpu(*data));
-			/* FIXME: implement abdicate */
+			if (*data & cpu_to_be32(CSR_STATE_BIT_ABDICATE))
+				card->csr_abdicate = true;
 		} else {
 			rcode = RCODE_TYPE_ERROR;
 		}
@@ -1070,7 +1077,9 @@ static void handle_registers(struct fw_card *card, struct fw_request *request,
 		break;
 
 	case CSR_RESET_START:
-		if (tcode != TCODE_WRITE_QUADLET_REQUEST)
+		if (tcode == TCODE_WRITE_QUADLET_REQUEST)
+			card->csr_abdicate = false;
+		else
 			rcode = RCODE_TYPE_ERROR;
 		break;
 
diff --git a/drivers/firewire/core.h b/drivers/firewire/core.h
index aaecdd1c176..a9ace1f8dc3 100644
--- a/drivers/firewire/core.h
+++ b/drivers/firewire/core.h
@@ -41,6 +41,7 @@ struct fw_packet;
 #define FEATURE_PRIORITY_BUDGET		0x01
 
 #define CSR_STATE_BIT_CMSTR	(1 << 8)
+#define CSR_STATE_BIT_ABDICATE	(1 << 10)
 
 struct fw_card_driver {
 	/*
diff --git a/include/linux/firewire.h b/include/linux/firewire.h
index f1160e831da..4d22643215e 100644
--- a/include/linux/firewire.h
+++ b/include/linux/firewire.h
@@ -119,6 +119,8 @@ struct fw_card {
 	int bm_retries;
 	int bm_generation;
 	__be32 bm_transaction_data[2];
+	bool bm_abdicate; /* value of csr_abdicate before last bus reset */
+	bool csr_abdicate; /* visible in CSR STATE_CLEAR/SET registers */
 
 	bool broadcast_channel_allocated;
 	u32 broadcast_channel;
-- 
cgit v1.2.3-70-g09d2


From fb76dd10b91146e9cefbb3cd4e6812c5a95ee43b Mon Sep 17 00:00:00 2001
From: Luotao Fu <l.fu@pengutronix.de>
Date: Thu, 10 Jun 2010 12:05:23 -0700
Subject: Input: matrix_keypad - add support for clustered irq

This one adds support of a combined irq source for the whole matrix keypad.
This can be useful if all rows and columns of the keypad are e.g. connected
to a GPIO expander, which only has one interrupt line for all events on
every single GPIO.

Signed-off-by: Luotao Fu <l.fu@pengutronix.de>
Acked-by: Eric Miao <eric.y.miao@gmail.com>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/keyboard/matrix_keypad.c | 108 ++++++++++++++++++++++++---------
 include/linux/input/matrix_keypad.h    |   6 ++
 2 files changed, 86 insertions(+), 28 deletions(-)

(limited to 'include')

diff --git a/drivers/input/keyboard/matrix_keypad.c b/drivers/input/keyboard/matrix_keypad.c
index b443e088fd3..b02e4268e18 100644
--- a/drivers/input/keyboard/matrix_keypad.c
+++ b/drivers/input/keyboard/matrix_keypad.c
@@ -37,6 +37,7 @@ struct matrix_keypad {
 	spinlock_t lock;
 	bool scan_pending;
 	bool stopped;
+	bool gpio_all_disabled;
 };
 
 /*
@@ -87,8 +88,12 @@ static void enable_row_irqs(struct matrix_keypad *keypad)
 	const struct matrix_keypad_platform_data *pdata = keypad->pdata;
 	int i;
 
-	for (i = 0; i < pdata->num_row_gpios; i++)
-		enable_irq(gpio_to_irq(pdata->row_gpios[i]));
+	if (pdata->clustered_irq > 0)
+		enable_irq(pdata->clustered_irq);
+	else {
+		for (i = 0; i < pdata->num_row_gpios; i++)
+			enable_irq(gpio_to_irq(pdata->row_gpios[i]));
+	}
 }
 
 static void disable_row_irqs(struct matrix_keypad *keypad)
@@ -96,8 +101,12 @@ static void disable_row_irqs(struct matrix_keypad *keypad)
 	const struct matrix_keypad_platform_data *pdata = keypad->pdata;
 	int i;
 
-	for (i = 0; i < pdata->num_row_gpios; i++)
-		disable_irq_nosync(gpio_to_irq(pdata->row_gpios[i]));
+	if (pdata->clustered_irq > 0)
+		disable_irq_nosync(pdata->clustered_irq);
+	else {
+		for (i = 0; i < pdata->num_row_gpios; i++)
+			disable_irq_nosync(gpio_to_irq(pdata->row_gpios[i]));
+	}
 }
 
 /*
@@ -216,45 +225,69 @@ static void matrix_keypad_stop(struct input_dev *dev)
 }
 
 #ifdef CONFIG_PM
-static int matrix_keypad_suspend(struct device *dev)
+static void matrix_keypad_enable_wakeup(struct matrix_keypad *keypad)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-	struct matrix_keypad *keypad = platform_get_drvdata(pdev);
 	const struct matrix_keypad_platform_data *pdata = keypad->pdata;
+	unsigned int gpio;
 	int i;
 
-	matrix_keypad_stop(keypad->input_dev);
+	if (pdata->clustered_irq > 0) {
+		if (enable_irq_wake(pdata->clustered_irq) == 0)
+			keypad->gpio_all_disabled = true;
+	} else {
 
-	if (device_may_wakeup(&pdev->dev)) {
 		for (i = 0; i < pdata->num_row_gpios; i++) {
 			if (!test_bit(i, keypad->disabled_gpios)) {
-				unsigned int gpio = pdata->row_gpios[i];
+				gpio = pdata->row_gpios[i];
 
 				if (enable_irq_wake(gpio_to_irq(gpio)) == 0)
 					__set_bit(i, keypad->disabled_gpios);
 			}
 		}
 	}
-
-	return 0;
 }
 
-static int matrix_keypad_resume(struct device *dev)
+static void matrix_keypad_disable_wakeup(struct matrix_keypad *keypad)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-	struct matrix_keypad *keypad = platform_get_drvdata(pdev);
 	const struct matrix_keypad_platform_data *pdata = keypad->pdata;
+	unsigned int gpio;
 	int i;
 
-	if (device_may_wakeup(&pdev->dev)) {
+	if (pdata->clustered_irq > 0) {
+		if (keypad->gpio_all_disabled) {
+			disable_irq_wake(pdata->clustered_irq);
+			keypad->gpio_all_disabled = false;
+		}
+	} else {
 		for (i = 0; i < pdata->num_row_gpios; i++) {
 			if (test_and_clear_bit(i, keypad->disabled_gpios)) {
-				unsigned int gpio = pdata->row_gpios[i];
-
+				gpio = pdata->row_gpios[i];
 				disable_irq_wake(gpio_to_irq(gpio));
 			}
 		}
 	}
+}
+
+static int matrix_keypad_suspend(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct matrix_keypad *keypad = platform_get_drvdata(pdev);
+
+	matrix_keypad_stop(keypad->input_dev);
+
+	if (device_may_wakeup(&pdev->dev))
+		matrix_keypad_enable_wakeup(keypad);
+
+	return 0;
+}
+
+static int matrix_keypad_resume(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct matrix_keypad *keypad = platform_get_drvdata(pdev);
+
+	if (device_may_wakeup(&pdev->dev))
+		matrix_keypad_disable_wakeup(keypad);
 
 	matrix_keypad_start(keypad->input_dev);
 
@@ -296,17 +329,31 @@ static int __devinit init_matrix_gpio(struct platform_device *pdev,
 		gpio_direction_input(pdata->row_gpios[i]);
 	}
 
-	for (i = 0; i < pdata->num_row_gpios; i++) {
-		err = request_irq(gpio_to_irq(pdata->row_gpios[i]),
+	if (pdata->clustered_irq > 0) {
+		err = request_irq(pdata->clustered_irq,
 				matrix_keypad_interrupt,
-				IRQF_DISABLED |
-				IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING,
+				pdata->clustered_irq_flags,
 				"matrix-keypad", keypad);
 		if (err) {
 			dev_err(&pdev->dev,
-				"Unable to acquire interrupt for GPIO line %i\n",
-				pdata->row_gpios[i]);
-			goto err_free_irqs;
+				"Unable to acquire clustered interrupt\n");
+			goto err_free_rows;
+		}
+	} else {
+		for (i = 0; i < pdata->num_row_gpios; i++) {
+			err = request_irq(gpio_to_irq(pdata->row_gpios[i]),
+					matrix_keypad_interrupt,
+					IRQF_DISABLED |
+					IRQF_TRIGGER_RISING |
+					IRQF_TRIGGER_FALLING,
+					"matrix-keypad", keypad);
+			if (err) {
+				dev_err(&pdev->dev,
+					"Unable to acquire interrupt "
+					"for GPIO line %i\n",
+					pdata->row_gpios[i]);
+				goto err_free_irqs;
+			}
 		}
 	}
 
@@ -418,11 +465,16 @@ static int __devexit matrix_keypad_remove(struct platform_device *pdev)
 
 	device_init_wakeup(&pdev->dev, 0);
 
-	for (i = 0; i < pdata->num_row_gpios; i++) {
-		free_irq(gpio_to_irq(pdata->row_gpios[i]), keypad);
-		gpio_free(pdata->row_gpios[i]);
+	if (pdata->clustered_irq > 0) {
+		free_irq(pdata->clustered_irq, keypad);
+	} else {
+		for (i = 0; i < pdata->num_row_gpios; i++)
+			free_irq(gpio_to_irq(pdata->row_gpios[i]), keypad);
 	}
 
+	for (i = 0; i < pdata->num_row_gpios; i++)
+		gpio_free(pdata->row_gpios[i]);
+
 	for (i = 0; i < pdata->num_col_gpios; i++)
 		gpio_free(pdata->col_gpios[i]);
 
diff --git a/include/linux/input/matrix_keypad.h b/include/linux/input/matrix_keypad.h
index c964cd7f436..80352ad6581 100644
--- a/include/linux/input/matrix_keypad.h
+++ b/include/linux/input/matrix_keypad.h
@@ -41,6 +41,9 @@ struct matrix_keymap_data {
  * @col_scan_delay_us: delay, measured in microseconds, that is
  *	needed before we can keypad after activating column gpio
  * @debounce_ms: debounce interval in milliseconds
+ * @clustered_irq: may be specified if interrupts of all row/column GPIOs
+ *	are bundled to one single irq
+ * @clustered_irq_flags: flags that are needed for the clustered irq
  * @active_low: gpio polarity
  * @wakeup: controls whether the device should be set up as wakeup
  *	source
@@ -63,6 +66,9 @@ struct matrix_keypad_platform_data {
 	/* key debounce interval in milli-second */
 	unsigned int	debounce_ms;
 
+	unsigned int	clustered_irq;
+	unsigned int	clustered_irq_flags;
+
 	bool		active_low;
 	bool		wakeup;
 	bool		no_autorepeat;
-- 
cgit v1.2.3-70-g09d2


From c6de9f08912311ddc1b3502b90e10fd449acd401 Mon Sep 17 00:00:00 2001
From: Huang Ying <ying.huang@intel.com>
Date: Mon, 31 May 2010 16:48:09 +0800
Subject: x86, mce: Add HW_ERR printk prefix for hardware error logging

This makes hardware error related log in printk log more explicit. So
that the users can report it to hardware vendor instead of LKML or
software vendor.

Signed-off-by: Huang Ying <ying.huang@intel.com>
Reviewed-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
LKML-Reference: <1275295689.3444.462.camel@yhuang-dev.sh.intel.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 include/linux/kernel.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 8317ec4b9f3..3bf740bb069 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -247,6 +247,13 @@ extern struct pid *session_of_pgrp(struct pid *pgrp);
 #define FW_WARN		"[Firmware Warn]: "
 #define FW_INFO		"[Firmware Info]: "
 
+/*
+ * HW_ERR
+ * Add this to a message for hardware errors, so that user can report
+ * it to hardware vendor instead of LKML or software vendor.
+ */
+#define HW_ERR		"[Hardware Error]: "
+
 #ifdef CONFIG_PRINTK
 asmlinkage int vprintk(const char *fmt, va_list args)
 	__attribute__ ((format (printf, 1, 0)));
-- 
cgit v1.2.3-70-g09d2


From 592fcb9dfafaa02dd0edc207bf5d3a0ee7a1f8df Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 9 Jun 2010 16:21:07 +0000
Subject: ip: ip_ra_control() rcu fix

commit 66018506e15b (ip: Router Alert RCU conversion) introduced RCU
lookups to ip_call_ra_chain(). It missed proper deinit phase :
When ip_ra_control() deletes an ip_ra_chain, it should make sure
ip_call_ra_chain() users can not start to use socket during the rcu
grace period. It should also delay the sock_put() after the grace
period, or we risk a premature socket freeing and corruptions, as
raw sockets are not rcu protected yet.

This delay avoids using expensive atomic_inc_not_zero() in
ip_call_ra_chain().

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip.h       |  5 ++++-
 net/ipv4/ip_sockglue.c | 19 +++++++++++++++----
 2 files changed, 19 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/net/ip.h b/include/net/ip.h
index 9982c97f0bd..d52f0118036 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -61,7 +61,10 @@ struct ipcm_cookie {
 struct ip_ra_chain {
 	struct ip_ra_chain	*next;
 	struct sock		*sk;
-	void			(*destructor)(struct sock *);
+	union {
+		void			(*destructor)(struct sock *);
+		struct sock		*saved_sk;
+	};
 	struct rcu_head		rcu;
 };
 
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 08b9519a24f..47fff528ff3 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -241,9 +241,13 @@ int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc)
 struct ip_ra_chain *ip_ra_chain;
 static DEFINE_SPINLOCK(ip_ra_lock);
 
-static void ip_ra_free_rcu(struct rcu_head *head)
+
+static void ip_ra_destroy_rcu(struct rcu_head *head)
 {
-	kfree(container_of(head, struct ip_ra_chain, rcu));
+	struct ip_ra_chain *ra = container_of(head, struct ip_ra_chain, rcu);
+
+	sock_put(ra->saved_sk);
+	kfree(ra);
 }
 
 int ip_ra_control(struct sock *sk, unsigned char on,
@@ -264,13 +268,20 @@ int ip_ra_control(struct sock *sk, unsigned char on,
 				kfree(new_ra);
 				return -EADDRINUSE;
 			}
+			/* dont let ip_call_ra_chain() use sk again */
+			ra->sk = NULL;
 			rcu_assign_pointer(*rap, ra->next);
 			spin_unlock_bh(&ip_ra_lock);
 
 			if (ra->destructor)
 				ra->destructor(sk);
-			sock_put(sk);
-			call_rcu(&ra->rcu, ip_ra_free_rcu);
+			/*
+			 * Delay sock_put(sk) and kfree(ra) after one rcu grace
+			 * period. This guarantee ip_call_ra_chain() dont need
+			 * to mess with socket refcounts.
+			 */
+			ra->saved_sk = sk;
+			call_rcu(&ra->rcu, ip_ra_destroy_rcu);
 			return 0;
 		}
 	}
-- 
cgit v1.2.3-70-g09d2


From d8d1f30b95a635dbd610dcc5eb641aca8f4768cf Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Thu, 10 Jun 2010 23:31:35 -0700
Subject: net-next: remove useless union keyword

remove useless union keyword in rtable, rt6_info and dn_route.

Since there is only one member in a union, the union keyword isn't useful.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/infiniband/core/addr.c          |   2 +-
 drivers/infiniband/hw/cxgb3/iwch_cm.c   |   4 +-
 drivers/infiniband/hw/cxgb4/cm.c        |   4 +-
 drivers/infiniband/hw/nes/nes_cm.c      |   2 +-
 drivers/net/bonding/bond_main.c         |   6 +-
 drivers/net/cnic.c                      |   2 +-
 drivers/scsi/cxgb3i/cxgb3i_offload.c    |   4 +-
 include/net/dn_route.h                  |   4 +-
 include/net/ip6_fib.h                   |  10 +-
 include/net/ipip.h                      |   2 +-
 include/net/route.h                     |   6 +-
 net/atm/clip.c                          |   2 +-
 net/bridge/br_device.c                  |   2 +-
 net/bridge/br_netfilter.c               |  20 +-
 net/dccp/ipv4.c                         |   4 +-
 net/decnet/dn_route.c                   | 158 ++++++------
 net/ethernet/eth.c                      |   5 +-
 net/ipv4/af_inet.c                      |   4 +-
 net/ipv4/arp.c                          |  12 +-
 net/ipv4/datagram.c                     |   2 +-
 net/ipv4/icmp.c                         |  18 +-
 net/ipv4/igmp.c                         |  10 +-
 net/ipv4/inet_connection_sock.c         |   2 +-
 net/ipv4/ip_forward.c                   |  10 +-
 net/ipv4/ip_gre.c                       |  14 +-
 net/ipv4/ip_input.c                     |   4 +-
 net/ipv4/ip_output.c                    |  60 ++---
 net/ipv4/ipip.c                         |   8 +-
 net/ipv4/ipmr.c                         |   8 +-
 net/ipv4/netfilter.c                    |   8 +-
 net/ipv4/raw.c                          |  16 +-
 net/ipv4/route.c                        | 420 ++++++++++++++++----------------
 net/ipv4/syncookies.c                   |   6 +-
 net/ipv4/tcp_ipv4.c                     |   2 +-
 net/ipv4/udp.c                          |   4 +-
 net/ipv4/xfrm4_policy.c                 |   2 +-
 net/ipv6/addrconf.c                     |  10 +-
 net/ipv6/anycast.c                      |   6 +-
 net/ipv6/fib6_rules.c                   |  10 +-
 net/ipv6/ip6_fib.c                      |  30 +--
 net/ipv6/ip6_output.c                   |  38 +--
 net/ipv6/ip6_tunnel.c                   |   8 +-
 net/ipv6/mcast.c                        |   4 +-
 net/ipv6/ndisc.c                        |   8 +-
 net/ipv6/raw.c                          |  12 +-
 net/ipv6/route.c                        | 300 +++++++++++------------
 net/ipv6/sit.c                          |   8 +-
 net/l2tp/l2tp_ip.c                      |   6 +-
 net/netfilter/ipvs/ip_vs_xmit.c         |  86 +++----
 net/netfilter/nf_conntrack_h323_main.c  |  12 +-
 net/netfilter/nf_conntrack_netbios_ns.c |   2 +-
 net/netfilter/xt_TCPMSS.c               |   4 +-
 net/netfilter/xt_TEE.c                  |   4 +-
 net/rxrpc/ar-peer.c                     |   4 +-
 net/sctp/protocol.c                     |   4 +-
 55 files changed, 694 insertions(+), 709 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index 0b926e45afe..a5ea1bce968 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -215,7 +215,7 @@ static int addr4_resolve(struct sockaddr_in *src_in,
 
 	neigh = neigh_lookup(&arp_tbl, &rt->rt_gateway, rt->idev->dev);
 	if (!neigh || !(neigh->nud_state & NUD_VALID)) {
-		neigh_event_send(rt->u.dst.neighbour, NULL);
+		neigh_event_send(rt->dst.neighbour, NULL);
 		ret = -ENODATA;
 		if (neigh)
 			goto release;
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c
index ebfb117ba68..abd683ea326 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cm.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c
@@ -1364,7 +1364,7 @@ static int pass_accept_req(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
 		       __func__);
 		goto reject;
 	}
-	dst = &rt->u.dst;
+	dst = &rt->dst;
 	l2t = t3_l2t_get(tdev, dst->neighbour, dst->neighbour->dev);
 	if (!l2t) {
 		printk(KERN_ERR MOD "%s - failed to allocate l2t entry!\n",
@@ -1932,7 +1932,7 @@ int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
 		err = -EHOSTUNREACH;
 		goto fail3;
 	}
-	ep->dst = &rt->u.dst;
+	ep->dst = &rt->dst;
 
 	/* get a l2t entry */
 	ep->l2t = t3_l2t_get(ep->com.tdev, ep->dst->neighbour,
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index 30ce0a8eca0..8b693c8c25e 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -1364,7 +1364,7 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
 		       __func__);
 		goto reject;
 	}
-	dst = &rt->u.dst;
+	dst = &rt->dst;
 	if (dst->neighbour->dev->flags & IFF_LOOPBACK) {
 		pdev = ip_dev_find(&init_net, peer_ip);
 		BUG_ON(!pdev);
@@ -1938,7 +1938,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
 		err = -EHOSTUNREACH;
 		goto fail3;
 	}
-	ep->dst = &rt->u.dst;
+	ep->dst = &rt->dst;
 
 	/* get a l2t entry */
 	if (ep->dst->neighbour->dev->flags & IFF_LOOPBACK) {
diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c
index 986d6f32dde..d876d0435cd 100644
--- a/drivers/infiniband/hw/nes/nes_cm.c
+++ b/drivers/infiniband/hw/nes/nes_cm.c
@@ -1146,7 +1146,7 @@ static int nes_addr_resolve_neigh(struct nes_vnic *nesvnic, u32 dst_ip, int arpi
 	}
 
 	if ((neigh == NULL) || (!(neigh->nud_state & NUD_VALID)))
-		neigh_event_send(rt->u.dst.neighbour, NULL);
+		neigh_event_send(rt->dst.neighbour, NULL);
 
 	ip_rt_put(rt);
 	return rc;
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 1b19276cff1..ac4f94b7da3 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -2584,7 +2584,7 @@ static void bond_arp_send_all(struct bonding *bond, struct slave *slave)
 		/*
 		 * This target is not on a VLAN
 		 */
-		if (rt->u.dst.dev == bond->dev) {
+		if (rt->dst.dev == bond->dev) {
 			ip_rt_put(rt);
 			pr_debug("basa: rtdev == bond->dev: arp_send\n");
 			bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i],
@@ -2595,7 +2595,7 @@ static void bond_arp_send_all(struct bonding *bond, struct slave *slave)
 		vlan_id = 0;
 		list_for_each_entry(vlan, &bond->vlan_list, vlan_list) {
 			vlan_dev = vlan_group_get_device(bond->vlgrp, vlan->vlan_id);
-			if (vlan_dev == rt->u.dst.dev) {
+			if (vlan_dev == rt->dst.dev) {
 				vlan_id = vlan->vlan_id;
 				pr_debug("basa: vlan match on %s %d\n",
 				       vlan_dev->name, vlan_id);
@@ -2613,7 +2613,7 @@ static void bond_arp_send_all(struct bonding *bond, struct slave *slave)
 		if (net_ratelimit()) {
 			pr_warning("%s: no path to arp_ip_target %pI4 via rt.dev %s\n",
 				   bond->dev->name, &fl.fl4_dst,
-				   rt->u.dst.dev ? rt->u.dst.dev->name : "NULL");
+				   rt->dst.dev ? rt->dst.dev->name : "NULL");
 		}
 		ip_rt_put(rt);
 	}
diff --git a/drivers/net/cnic.c b/drivers/net/cnic.c
index fe925663d39..908d89a4fe8 100644
--- a/drivers/net/cnic.c
+++ b/drivers/net/cnic.c
@@ -2824,7 +2824,7 @@ static int cnic_get_v4_route(struct sockaddr_in *dst_addr,
 
 	err = ip_route_output_key(&init_net, &rt, &fl);
 	if (!err)
-		*dst = &rt->u.dst;
+		*dst = &rt->dst;
 	return err;
 #else
 	return -ENETUNREACH;
diff --git a/drivers/scsi/cxgb3i/cxgb3i_offload.c b/drivers/scsi/cxgb3i/cxgb3i_offload.c
index a175be9c496..3b6a06eebf7 100644
--- a/drivers/scsi/cxgb3i/cxgb3i_offload.c
+++ b/drivers/scsi/cxgb3i/cxgb3i_offload.c
@@ -1587,7 +1587,7 @@ cxgb3i_find_dev(struct net_device *dev, __be32 ipaddr)
 
 	err = ip_route_output_key(dev ? dev_net(dev) : &init_net, &rt, &fl);
 	if (!err)
-		return (&rt->u.dst)->dev;
+		return (&rt->dst)->dev;
 
 	return NULL;
 }
@@ -1649,7 +1649,7 @@ int cxgb3i_c3cn_connect(struct net_device *dev, struct s3_conn *c3cn,
 		c3cn->saddr.sin_addr.s_addr = rt->rt_src;
 
 	/* now commit destination to connection */
-	c3cn->dst_cache = &rt->u.dst;
+	c3cn->dst_cache = &rt->dst;
 
 	/* try to establish an offloaded connection */
 	dev = cxgb3_egress_dev(c3cn->dst_cache->dev, c3cn, 0);
diff --git a/include/net/dn_route.h b/include/net/dn_route.h
index 60c9f22d869..ccadab3aa3f 100644
--- a/include/net/dn_route.h
+++ b/include/net/dn_route.h
@@ -65,9 +65,7 @@ extern void dn_rt_cache_flush(int delay);
  * packets to the originating host.
  */
 struct dn_route {
-	union {
-		struct dst_entry dst;
-	} u;
+	struct dst_entry dst;
 
 	struct flowi fl;
 
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 4b1dc1161c3..062a823d311 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -84,13 +84,11 @@ struct rt6key {
 struct fib6_table;
 
 struct rt6_info {
-	union {
-		struct dst_entry	dst;
-	} u;
+	struct dst_entry		dst;
 
-#define rt6i_dev			u.dst.dev
-#define rt6i_nexthop			u.dst.neighbour
-#define rt6i_expires			u.dst.expires
+#define rt6i_dev			dst.dev
+#define rt6i_nexthop			dst.neighbour
+#define rt6i_expires			dst.expires
 
 	/*
 	 * Tail elements of dst_entry (__refcnt etc.)
diff --git a/include/net/ipip.h b/include/net/ipip.h
index 11e8513d2d0..65caea8b414 100644
--- a/include/net/ipip.h
+++ b/include/net/ipip.h
@@ -50,7 +50,7 @@ struct ip_tunnel_prl_entry {
 	int pkt_len = skb->len - skb_transport_offset(skb);		\
 									\
 	skb->ip_summed = CHECKSUM_NONE;					\
-	ip_select_ident(iph, &rt->u.dst, NULL);				\
+	ip_select_ident(iph, &rt->dst, NULL);				\
 									\
 	err = ip_local_out(skb);					\
 	if (likely(net_xmit_eval(err) == 0)) {				\
diff --git a/include/net/route.h b/include/net/route.h
index af6cf4b4c9d..bd732d62e1c 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -50,9 +50,7 @@
 struct fib_nh;
 struct inet_peer;
 struct rtable {
-	union {
-		struct dst_entry	dst;
-	} u;
+	struct dst_entry	dst;
 
 	/* Cache lookup keys */
 	struct flowi		fl;
@@ -144,7 +142,7 @@ extern void fib_add_ifaddr(struct in_ifaddr *);
 static inline void ip_rt_put(struct rtable * rt)
 {
 	if (rt)
-		dst_release(&rt->u.dst);
+		dst_release(&rt->dst);
 }
 
 #define IPTOS_RT_MASK	(IPTOS_TOS_MASK & ~3)
diff --git a/net/atm/clip.c b/net/atm/clip.c
index 313aba11316..95fdd118506 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -522,7 +522,7 @@ static int clip_setentry(struct atm_vcc *vcc, __be32 ip)
 	error = ip_route_output_key(&init_net, &rt, &fl);
 	if (error)
 		return error;
-	neigh = __neigh_lookup(&clip_tbl, &ip, rt->u.dst.dev, 1);
+	neigh = __neigh_lookup(&clip_tbl, &ip, rt->dst.dev, 1);
 	ip_rt_put(rt);
 	if (!neigh)
 		return -ENOMEM;
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index eedf2c94820..b898364beaf 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -127,7 +127,7 @@ static int br_change_mtu(struct net_device *dev, int new_mtu)
 
 #ifdef CONFIG_BRIDGE_NETFILTER
 	/* remember the MTU in the rtable for PMTU */
-	br->fake_rtable.u.dst.metrics[RTAX_MTU - 1] = new_mtu;
+	br->fake_rtable.dst.metrics[RTAX_MTU - 1] = new_mtu;
 #endif
 
 	return 0;
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 44420992f72..0685b2558ab 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -117,12 +117,12 @@ void br_netfilter_rtable_init(struct net_bridge *br)
 {
 	struct rtable *rt = &br->fake_rtable;
 
-	atomic_set(&rt->u.dst.__refcnt, 1);
-	rt->u.dst.dev = br->dev;
-	rt->u.dst.path = &rt->u.dst;
-	rt->u.dst.metrics[RTAX_MTU - 1] = 1500;
-	rt->u.dst.flags	= DST_NOXFRM;
-	rt->u.dst.ops = &fake_dst_ops;
+	atomic_set(&rt->dst.__refcnt, 1);
+	rt->dst.dev = br->dev;
+	rt->dst.path = &rt->dst;
+	rt->dst.metrics[RTAX_MTU - 1] = 1500;
+	rt->dst.flags	= DST_NOXFRM;
+	rt->dst.ops = &fake_dst_ops;
 }
 
 static inline struct rtable *bridge_parent_rtable(const struct net_device *dev)
@@ -244,8 +244,8 @@ static int br_nf_pre_routing_finish_ipv6(struct sk_buff *skb)
 		kfree_skb(skb);
 		return 0;
 	}
-	dst_hold(&rt->u.dst);
-	skb_dst_set(skb, &rt->u.dst);
+	dst_hold(&rt->dst);
+	skb_dst_set(skb, &rt->dst);
 
 	skb->dev = nf_bridge->physindev;
 	nf_bridge_update_protocol(skb);
@@ -396,8 +396,8 @@ bridged_dnat:
 			kfree_skb(skb);
 			return 0;
 		}
-		dst_hold(&rt->u.dst);
-		skb_dst_set(skb, &rt->u.dst);
+		dst_hold(&rt->dst);
+		skb_dst_set(skb, &rt->dst);
 	}
 
 	skb->dev = nf_bridge->physindev;
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index d9b11ef8694..d4a166f0f39 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -105,7 +105,7 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 		goto failure;
 
 	/* OK, now commit destination to socket.  */
-	sk_setup_caps(sk, &rt->u.dst);
+	sk_setup_caps(sk, &rt->dst);
 
 	dp->dccps_iss = secure_dccp_sequence_number(inet->inet_saddr,
 						    inet->inet_daddr,
@@ -475,7 +475,7 @@ static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk,
 		return NULL;
 	}
 
-	return &rt->u.dst;
+	return &rt->dst;
 }
 
 static int dccp_v4_send_response(struct sock *sk, struct request_sock *req,
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 812e6dff606..6585ea6d118 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -146,13 +146,13 @@ static __inline__ unsigned dn_hash(__le16 src, __le16 dst)
 
 static inline void dnrt_free(struct dn_route *rt)
 {
-	call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free);
+	call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free);
 }
 
 static inline void dnrt_drop(struct dn_route *rt)
 {
-	dst_release(&rt->u.dst);
-	call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free);
+	dst_release(&rt->dst);
+	call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free);
 }
 
 static void dn_dst_check_expire(unsigned long dummy)
@@ -167,13 +167,13 @@ static void dn_dst_check_expire(unsigned long dummy)
 
 		spin_lock(&dn_rt_hash_table[i].lock);
 		while((rt=*rtp) != NULL) {
-			if (atomic_read(&rt->u.dst.__refcnt) ||
-					(now - rt->u.dst.lastuse) < expire) {
-				rtp = &rt->u.dst.dn_next;
+			if (atomic_read(&rt->dst.__refcnt) ||
+					(now - rt->dst.lastuse) < expire) {
+				rtp = &rt->dst.dn_next;
 				continue;
 			}
-			*rtp = rt->u.dst.dn_next;
-			rt->u.dst.dn_next = NULL;
+			*rtp = rt->dst.dn_next;
+			rt->dst.dn_next = NULL;
 			dnrt_free(rt);
 		}
 		spin_unlock(&dn_rt_hash_table[i].lock);
@@ -198,13 +198,13 @@ static int dn_dst_gc(struct dst_ops *ops)
 		rtp = &dn_rt_hash_table[i].chain;
 
 		while((rt=*rtp) != NULL) {
-			if (atomic_read(&rt->u.dst.__refcnt) ||
-					(now - rt->u.dst.lastuse) < expire) {
-				rtp = &rt->u.dst.dn_next;
+			if (atomic_read(&rt->dst.__refcnt) ||
+					(now - rt->dst.lastuse) < expire) {
+				rtp = &rt->dst.dn_next;
 				continue;
 			}
-			*rtp = rt->u.dst.dn_next;
-			rt->u.dst.dn_next = NULL;
+			*rtp = rt->dst.dn_next;
+			rt->dst.dn_next = NULL;
 			dnrt_drop(rt);
 			break;
 		}
@@ -287,25 +287,25 @@ static int dn_insert_route(struct dn_route *rt, unsigned hash, struct dn_route *
 	while((rth = *rthp) != NULL) {
 		if (compare_keys(&rth->fl, &rt->fl)) {
 			/* Put it first */
-			*rthp = rth->u.dst.dn_next;
-			rcu_assign_pointer(rth->u.dst.dn_next,
+			*rthp = rth->dst.dn_next;
+			rcu_assign_pointer(rth->dst.dn_next,
 					   dn_rt_hash_table[hash].chain);
 			rcu_assign_pointer(dn_rt_hash_table[hash].chain, rth);
 
-			dst_use(&rth->u.dst, now);
+			dst_use(&rth->dst, now);
 			spin_unlock_bh(&dn_rt_hash_table[hash].lock);
 
 			dnrt_drop(rt);
 			*rp = rth;
 			return 0;
 		}
-		rthp = &rth->u.dst.dn_next;
+		rthp = &rth->dst.dn_next;
 	}
 
-	rcu_assign_pointer(rt->u.dst.dn_next, dn_rt_hash_table[hash].chain);
+	rcu_assign_pointer(rt->dst.dn_next, dn_rt_hash_table[hash].chain);
 	rcu_assign_pointer(dn_rt_hash_table[hash].chain, rt);
 
-	dst_use(&rt->u.dst, now);
+	dst_use(&rt->dst, now);
 	spin_unlock_bh(&dn_rt_hash_table[hash].lock);
 	*rp = rt;
 	return 0;
@@ -323,8 +323,8 @@ static void dn_run_flush(unsigned long dummy)
 			goto nothing_to_declare;
 
 		for(; rt; rt=next) {
-			next = rt->u.dst.dn_next;
-			rt->u.dst.dn_next = NULL;
+			next = rt->dst.dn_next;
+			rt->dst.dn_next = NULL;
 			dst_free((struct dst_entry *)rt);
 		}
 
@@ -743,7 +743,7 @@ static int dn_forward(struct sk_buff *skb)
 	/* Ensure that we have enough space for headers */
 	rt = (struct dn_route *)skb_dst(skb);
 	header_len = dn_db->use_long ? 21 : 6;
-	if (skb_cow(skb, LL_RESERVED_SPACE(rt->u.dst.dev)+header_len))
+	if (skb_cow(skb, LL_RESERVED_SPACE(rt->dst.dev)+header_len))
 		goto drop;
 
 	/*
@@ -752,7 +752,7 @@ static int dn_forward(struct sk_buff *skb)
 	if (++cb->hops > 30)
 		goto drop;
 
-	skb->dev = rt->u.dst.dev;
+	skb->dev = rt->dst.dev;
 
 	/*
 	 * If packet goes out same interface it came in on, then set
@@ -792,7 +792,7 @@ static int dn_rt_bug(struct sk_buff *skb)
 static int dn_rt_set_next_hop(struct dn_route *rt, struct dn_fib_res *res)
 {
 	struct dn_fib_info *fi = res->fi;
-	struct net_device *dev = rt->u.dst.dev;
+	struct net_device *dev = rt->dst.dev;
 	struct neighbour *n;
 	unsigned mss;
 
@@ -800,25 +800,25 @@ static int dn_rt_set_next_hop(struct dn_route *rt, struct dn_fib_res *res)
 		if (DN_FIB_RES_GW(*res) &&
 		    DN_FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
 			rt->rt_gateway = DN_FIB_RES_GW(*res);
-		memcpy(rt->u.dst.metrics, fi->fib_metrics,
-		       sizeof(rt->u.dst.metrics));
+		memcpy(rt->dst.metrics, fi->fib_metrics,
+		       sizeof(rt->dst.metrics));
 	}
 	rt->rt_type = res->type;
 
-	if (dev != NULL && rt->u.dst.neighbour == NULL) {
+	if (dev != NULL && rt->dst.neighbour == NULL) {
 		n = __neigh_lookup_errno(&dn_neigh_table, &rt->rt_gateway, dev);
 		if (IS_ERR(n))
 			return PTR_ERR(n);
-		rt->u.dst.neighbour = n;
+		rt->dst.neighbour = n;
 	}
 
-	if (dst_metric(&rt->u.dst, RTAX_MTU) == 0 ||
-	    dst_metric(&rt->u.dst, RTAX_MTU) > rt->u.dst.dev->mtu)
-		rt->u.dst.metrics[RTAX_MTU-1] = rt->u.dst.dev->mtu;
-	mss = dn_mss_from_pmtu(dev, dst_mtu(&rt->u.dst));
-	if (dst_metric(&rt->u.dst, RTAX_ADVMSS) == 0 ||
-	    dst_metric(&rt->u.dst, RTAX_ADVMSS) > mss)
-		rt->u.dst.metrics[RTAX_ADVMSS-1] = mss;
+	if (dst_metric(&rt->dst, RTAX_MTU) == 0 ||
+	    dst_metric(&rt->dst, RTAX_MTU) > rt->dst.dev->mtu)
+		rt->dst.metrics[RTAX_MTU-1] = rt->dst.dev->mtu;
+	mss = dn_mss_from_pmtu(dev, dst_mtu(&rt->dst));
+	if (dst_metric(&rt->dst, RTAX_ADVMSS) == 0 ||
+	    dst_metric(&rt->dst, RTAX_ADVMSS) > mss)
+		rt->dst.metrics[RTAX_ADVMSS-1] = mss;
 	return 0;
 }
 
@@ -1096,8 +1096,8 @@ make_route:
 	if (rt == NULL)
 		goto e_nobufs;
 
-	atomic_set(&rt->u.dst.__refcnt, 1);
-	rt->u.dst.flags   = DST_HOST;
+	atomic_set(&rt->dst.__refcnt, 1);
+	rt->dst.flags   = DST_HOST;
 
 	rt->fl.fld_src    = oldflp->fld_src;
 	rt->fl.fld_dst    = oldflp->fld_dst;
@@ -1113,17 +1113,17 @@ make_route:
 	rt->rt_dst_map    = fl.fld_dst;
 	rt->rt_src_map    = fl.fld_src;
 
-	rt->u.dst.dev = dev_out;
+	rt->dst.dev = dev_out;
 	dev_hold(dev_out);
-	rt->u.dst.neighbour = neigh;
+	rt->dst.neighbour = neigh;
 	neigh = NULL;
 
-	rt->u.dst.lastuse = jiffies;
-	rt->u.dst.output  = dn_output;
-	rt->u.dst.input   = dn_rt_bug;
+	rt->dst.lastuse = jiffies;
+	rt->dst.output  = dn_output;
+	rt->dst.input   = dn_rt_bug;
 	rt->rt_flags      = flags;
 	if (flags & RTCF_LOCAL)
-		rt->u.dst.input = dn_nsp_rx;
+		rt->dst.input = dn_nsp_rx;
 
 	err = dn_rt_set_next_hop(rt, &res);
 	if (err)
@@ -1152,7 +1152,7 @@ e_nobufs:
 	err = -ENOBUFS;
 	goto done;
 e_neighbour:
-	dst_free(&rt->u.dst);
+	dst_free(&rt->dst);
 	goto e_nobufs;
 }
 
@@ -1168,15 +1168,15 @@ static int __dn_route_output_key(struct dst_entry **pprt, const struct flowi *fl
 	if (!(flags & MSG_TRYHARD)) {
 		rcu_read_lock_bh();
 		for (rt = rcu_dereference_bh(dn_rt_hash_table[hash].chain); rt;
-			rt = rcu_dereference_bh(rt->u.dst.dn_next)) {
+			rt = rcu_dereference_bh(rt->dst.dn_next)) {
 			if ((flp->fld_dst == rt->fl.fld_dst) &&
 			    (flp->fld_src == rt->fl.fld_src) &&
 			    (flp->mark == rt->fl.mark) &&
 			    (rt->fl.iif == 0) &&
 			    (rt->fl.oif == flp->oif)) {
-				dst_use(&rt->u.dst, jiffies);
+				dst_use(&rt->dst, jiffies);
 				rcu_read_unlock_bh();
-				*pprt = &rt->u.dst;
+				*pprt = &rt->dst;
 				return 0;
 			}
 		}
@@ -1375,29 +1375,29 @@ make_route:
 	rt->fl.iif        = in_dev->ifindex;
 	rt->fl.mark       = fl.mark;
 
-	rt->u.dst.flags = DST_HOST;
-	rt->u.dst.neighbour = neigh;
-	rt->u.dst.dev = out_dev;
-	rt->u.dst.lastuse = jiffies;
-	rt->u.dst.output = dn_rt_bug;
+	rt->dst.flags = DST_HOST;
+	rt->dst.neighbour = neigh;
+	rt->dst.dev = out_dev;
+	rt->dst.lastuse = jiffies;
+	rt->dst.output = dn_rt_bug;
 	switch(res.type) {
 		case RTN_UNICAST:
-			rt->u.dst.input = dn_forward;
+			rt->dst.input = dn_forward;
 			break;
 		case RTN_LOCAL:
-			rt->u.dst.output = dn_output;
-			rt->u.dst.input = dn_nsp_rx;
-			rt->u.dst.dev = in_dev;
+			rt->dst.output = dn_output;
+			rt->dst.input = dn_nsp_rx;
+			rt->dst.dev = in_dev;
 			flags |= RTCF_LOCAL;
 			break;
 		default:
 		case RTN_UNREACHABLE:
 		case RTN_BLACKHOLE:
-			rt->u.dst.input = dst_discard;
+			rt->dst.input = dst_discard;
 	}
 	rt->rt_flags = flags;
-	if (rt->u.dst.dev)
-		dev_hold(rt->u.dst.dev);
+	if (rt->dst.dev)
+		dev_hold(rt->dst.dev);
 
 	err = dn_rt_set_next_hop(rt, &res);
 	if (err)
@@ -1405,7 +1405,7 @@ make_route:
 
 	hash = dn_hash(rt->fl.fld_src, rt->fl.fld_dst);
 	dn_insert_route(rt, hash, &rt);
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set(skb, &rt->dst);
 
 done:
 	if (neigh)
@@ -1427,7 +1427,7 @@ e_nobufs:
 	goto done;
 
 e_neighbour:
-	dst_free(&rt->u.dst);
+	dst_free(&rt->dst);
 	goto done;
 }
 
@@ -1442,13 +1442,13 @@ static int dn_route_input(struct sk_buff *skb)
 
 	rcu_read_lock();
 	for(rt = rcu_dereference(dn_rt_hash_table[hash].chain); rt != NULL;
-	    rt = rcu_dereference(rt->u.dst.dn_next)) {
+	    rt = rcu_dereference(rt->dst.dn_next)) {
 		if ((rt->fl.fld_src == cb->src) &&
 		    (rt->fl.fld_dst == cb->dst) &&
 		    (rt->fl.oif == 0) &&
 		    (rt->fl.mark == skb->mark) &&
 		    (rt->fl.iif == cb->iif)) {
-			dst_use(&rt->u.dst, jiffies);
+			dst_use(&rt->dst, jiffies);
 			rcu_read_unlock();
 			skb_dst_set(skb, (struct dst_entry *)rt);
 			return 0;
@@ -1487,8 +1487,8 @@ static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
 		r->rtm_src_len = 16;
 		RTA_PUT(skb, RTA_SRC, 2, &rt->fl.fld_src);
 	}
-	if (rt->u.dst.dev)
-		RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->u.dst.dev->ifindex);
+	if (rt->dst.dev)
+		RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->dst.dev->ifindex);
 	/*
 	 * Note to self - change this if input routes reverse direction when
 	 * they deal only with inputs and not with replies like they do
@@ -1497,11 +1497,11 @@ static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
 	RTA_PUT(skb, RTA_PREFSRC, 2, &rt->rt_local_src);
 	if (rt->rt_daddr != rt->rt_gateway)
 		RTA_PUT(skb, RTA_GATEWAY, 2, &rt->rt_gateway);
-	if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
+	if (rtnetlink_put_metrics(skb, rt->dst.metrics) < 0)
 		goto rtattr_failure;
-	expires = rt->u.dst.expires ? rt->u.dst.expires - jiffies : 0;
-	if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0, expires,
-			       rt->u.dst.error) < 0)
+	expires = rt->dst.expires ? rt->dst.expires - jiffies : 0;
+	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0, expires,
+			       rt->dst.error) < 0)
 		goto rtattr_failure;
 	if (rt->fl.iif)
 		RTA_PUT(skb, RTA_IIF, sizeof(int), &rt->fl.iif);
@@ -1568,8 +1568,8 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void
 		local_bh_enable();
 		memset(cb, 0, sizeof(struct dn_skb_cb));
 		rt = (struct dn_route *)skb_dst(skb);
-		if (!err && -rt->u.dst.error)
-			err = rt->u.dst.error;
+		if (!err && -rt->dst.error)
+			err = rt->dst.error;
 	} else {
 		int oif = 0;
 		if (rta[RTA_OIF - 1])
@@ -1583,7 +1583,7 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void
 	skb->dev = NULL;
 	if (err)
 		goto out_free;
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set(skb, &rt->dst);
 	if (rtm->rtm_flags & RTM_F_NOTIFY)
 		rt->rt_flags |= RTCF_NOTIFY;
 
@@ -1632,10 +1632,10 @@ int dn_cache_dump(struct sk_buff *skb, struct netlink_callback *cb)
 		rcu_read_lock_bh();
 		for(rt = rcu_dereference_bh(dn_rt_hash_table[h].chain), idx = 0;
 			rt;
-			rt = rcu_dereference_bh(rt->u.dst.dn_next), idx++) {
+			rt = rcu_dereference_bh(rt->dst.dn_next), idx++) {
 			if (idx < s_idx)
 				continue;
-			skb_dst_set(skb, dst_clone(&rt->u.dst));
+			skb_dst_set(skb, dst_clone(&rt->dst));
 			if (dn_rt_fill_info(skb, NETLINK_CB(cb->skb).pid,
 					cb->nlh->nlmsg_seq, RTM_NEWROUTE,
 					1, NLM_F_MULTI) <= 0) {
@@ -1678,7 +1678,7 @@ static struct dn_route *dn_rt_cache_get_next(struct seq_file *seq, struct dn_rou
 {
 	struct dn_rt_cache_iter_state *s = seq->private;
 
-	rt = rt->u.dst.dn_next;
+	rt = rt->dst.dn_next;
 	while(!rt) {
 		rcu_read_unlock_bh();
 		if (--s->bucket < 0)
@@ -1719,12 +1719,12 @@ static int dn_rt_cache_seq_show(struct seq_file *seq, void *v)
 	char buf1[DN_ASCBUF_LEN], buf2[DN_ASCBUF_LEN];
 
 	seq_printf(seq, "%-8s %-7s %-7s %04d %04d %04d\n",
-			rt->u.dst.dev ? rt->u.dst.dev->name : "*",
+			rt->dst.dev ? rt->dst.dev->name : "*",
 			dn_addr2asc(le16_to_cpu(rt->rt_daddr), buf1),
 			dn_addr2asc(le16_to_cpu(rt->rt_saddr), buf2),
-			atomic_read(&rt->u.dst.__refcnt),
-			rt->u.dst.__use,
-			(int) dst_metric(&rt->u.dst, RTAX_RTT));
+			atomic_read(&rt->dst.__refcnt),
+			rt->dst.__use,
+			(int) dst_metric(&rt->dst, RTAX_RTT));
 	return 0;
 }
 
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 61ec0329316..215c83986a9 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -158,7 +158,6 @@ EXPORT_SYMBOL(eth_rebuild_header);
 __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev)
 {
 	struct ethhdr *eth;
-	unsigned char *rawp;
 
 	skb->dev = dev;
 	skb_reset_mac_header(skb);
@@ -199,15 +198,13 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev)
 	if (ntohs(eth->h_proto) >= 1536)
 		return eth->h_proto;
 
-	rawp = skb->data;
-
 	/*
 	 *      This is a magic hack to spot IPX packets. Older Novell breaks
 	 *      the protocol design and runs IPX over 802.3 without an 802.2 LLC
 	 *      layer. We look for FFFF which isn't a used 802.2 SSAP/DSAP. This
 	 *      won't work for fault tolerant netware but does for the rest.
 	 */
-	if (*(unsigned short *)rawp == 0xFFFF)
+	if (skb->len >= 2 && *(unsigned short *)(skb->data) == 0xFFFF)
 		return htons(ETH_P_802_3);
 
 	/*
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 551ce564b03..d99e7e02018 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1100,7 +1100,7 @@ static int inet_sk_reselect_saddr(struct sock *sk)
 	if (err)
 		return err;
 
-	sk_setup_caps(sk, &rt->u.dst);
+	sk_setup_caps(sk, &rt->dst);
 
 	new_saddr = rt->rt_src;
 
@@ -1166,7 +1166,7 @@ int inet_sk_rebuild_header(struct sock *sk)
 	err = ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0);
 }
 	if (!err)
-		sk_setup_caps(sk, &rt->u.dst);
+		sk_setup_caps(sk, &rt->dst);
 	else {
 		/* Routing failed... */
 		sk->sk_route_caps = 0;
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 917d2d66162..cf78f41830c 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -427,7 +427,7 @@ static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev)
 
 	if (ip_route_output_key(net, &rt, &fl) < 0)
 		return 1;
-	if (rt->u.dst.dev != dev) {
+	if (rt->dst.dev != dev) {
 		NET_INC_STATS_BH(net, LINUX_MIB_ARPFILTER);
 		flag = 1;
 	}
@@ -532,7 +532,7 @@ static inline int arp_fwd_proxy(struct in_device *in_dev,
 	struct in_device *out_dev;
 	int imi, omi = -1;
 
-	if (rt->u.dst.dev == dev)
+	if (rt->dst.dev == dev)
 		return 0;
 
 	if (!IN_DEV_PROXY_ARP(in_dev))
@@ -545,7 +545,7 @@ static inline int arp_fwd_proxy(struct in_device *in_dev,
 
 	/* place to check for proxy_arp for routes */
 
-	out_dev = __in_dev_get_rcu(rt->u.dst.dev);
+	out_dev = __in_dev_get_rcu(rt->dst.dev);
 	if (out_dev)
 		omi = IN_DEV_MEDIUM_ID(out_dev);
 
@@ -576,7 +576,7 @@ static inline int arp_fwd_pvlan(struct in_device *in_dev,
 				__be32 sip, __be32 tip)
 {
 	/* Private VLAN is only concerned about the same ethernet segment */
-	if (rt->u.dst.dev != dev)
+	if (rt->dst.dev != dev)
 		return 0;
 
 	/* Don't reply on self probes (often done by windowz boxes)*/
@@ -1042,7 +1042,7 @@ static int arp_req_set(struct net *net, struct arpreq *r,
 		struct rtable * rt;
 		if ((err = ip_route_output_key(net, &rt, &fl)) != 0)
 			return err;
-		dev = rt->u.dst.dev;
+		dev = rt->dst.dev;
 		ip_rt_put(rt);
 		if (!dev)
 			return -EINVAL;
@@ -1149,7 +1149,7 @@ static int arp_req_delete(struct net *net, struct arpreq *r,
 		struct rtable * rt;
 		if ((err = ip_route_output_key(net, &rt, &fl)) != 0)
 			return err;
-		dev = rt->u.dst.dev;
+		dev = rt->dst.dev;
 		ip_rt_put(rt);
 		if (!dev)
 			return -EINVAL;
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
index fb2465811b4..fe3daa7f07a 100644
--- a/net/ipv4/datagram.c
+++ b/net/ipv4/datagram.c
@@ -69,7 +69,7 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	sk->sk_state = TCP_ESTABLISHED;
 	inet->inet_id = jiffies;
 
-	sk_dst_set(sk, &rt->u.dst);
+	sk_dst_set(sk, &rt->dst);
 	return(0);
 }
 
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index bdb6c71e72a..7569b21a3a2 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -271,7 +271,7 @@ int xrlim_allow(struct dst_entry *dst, int timeout)
 static inline int icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
 		int type, int code)
 {
-	struct dst_entry *dst = &rt->u.dst;
+	struct dst_entry *dst = &rt->dst;
 	int rc = 1;
 
 	if (type > NR_ICMP_TYPES)
@@ -327,7 +327,7 @@ static void icmp_push_reply(struct icmp_bxm *icmp_param,
 	struct sock *sk;
 	struct sk_buff *skb;
 
-	sk = icmp_sk(dev_net((*rt)->u.dst.dev));
+	sk = icmp_sk(dev_net((*rt)->dst.dev));
 	if (ip_append_data(sk, icmp_glue_bits, icmp_param,
 			   icmp_param->data_len+icmp_param->head_len,
 			   icmp_param->head_len,
@@ -359,7 +359,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
 {
 	struct ipcm_cookie ipc;
 	struct rtable *rt = skb_rtable(skb);
-	struct net *net = dev_net(rt->u.dst.dev);
+	struct net *net = dev_net(rt->dst.dev);
 	struct sock *sk;
 	struct inet_sock *inet;
 	__be32 daddr;
@@ -427,7 +427,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
 
 	if (!rt)
 		goto out;
-	net = dev_net(rt->u.dst.dev);
+	net = dev_net(rt->dst.dev);
 
 	/*
 	 *	Find the original header. It is expected to be valid, of course.
@@ -596,9 +596,9 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
 			/* Ugh! */
 			orefdst = skb_in->_skb_refdst; /* save old refdst */
 			err = ip_route_input(skb_in, fl.fl4_dst, fl.fl4_src,
-					     RT_TOS(tos), rt2->u.dst.dev);
+					     RT_TOS(tos), rt2->dst.dev);
 
-			dst_release(&rt2->u.dst);
+			dst_release(&rt2->dst);
 			rt2 = skb_rtable(skb_in);
 			skb_in->_skb_refdst = orefdst; /* restore old refdst */
 		}
@@ -610,7 +610,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
 				  XFRM_LOOKUP_ICMP);
 		switch (err) {
 		case 0:
-			dst_release(&rt->u.dst);
+			dst_release(&rt->dst);
 			rt = rt2;
 			break;
 		case -EPERM:
@@ -629,7 +629,7 @@ route_done:
 
 	/* RFC says return as much as we can without exceeding 576 bytes. */
 
-	room = dst_mtu(&rt->u.dst);
+	room = dst_mtu(&rt->dst);
 	if (room > 576)
 		room = 576;
 	room -= sizeof(struct iphdr) + icmp_param.replyopts.optlen;
@@ -972,7 +972,7 @@ int icmp_rcv(struct sk_buff *skb)
 {
 	struct icmphdr *icmph;
 	struct rtable *rt = skb_rtable(skb);
-	struct net *net = dev_net(rt->u.dst.dev);
+	struct net *net = dev_net(rt->dst.dev);
 
 	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
 		struct sec_path *sp = skb_sec_path(skb);
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 3294f547c48..b5580d42299 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -312,7 +312,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
 		return NULL;
 	}
 
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set(skb, &rt->dst);
 	skb->dev = dev;
 
 	skb_reserve(skb, LL_RESERVED_SPACE(dev));
@@ -330,7 +330,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
 	pip->saddr    = rt->rt_src;
 	pip->protocol = IPPROTO_IGMP;
 	pip->tot_len  = 0;	/* filled in later */
-	ip_select_ident(pip, &rt->u.dst, NULL);
+	ip_select_ident(pip, &rt->dst, NULL);
 	((u8*)&pip[1])[0] = IPOPT_RA;
 	((u8*)&pip[1])[1] = 4;
 	((u8*)&pip[1])[2] = 0;
@@ -660,7 +660,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
 		return -1;
 	}
 
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set(skb, &rt->dst);
 
 	skb_reserve(skb, LL_RESERVED_SPACE(dev));
 
@@ -676,7 +676,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
 	iph->daddr    = dst;
 	iph->saddr    = rt->rt_src;
 	iph->protocol = IPPROTO_IGMP;
-	ip_select_ident(iph, &rt->u.dst, NULL);
+	ip_select_ident(iph, &rt->dst, NULL);
 	((u8*)&iph[1])[0] = IPOPT_RA;
 	((u8*)&iph[1])[1] = 4;
 	((u8*)&iph[1])[2] = 0;
@@ -1425,7 +1425,7 @@ static struct in_device *ip_mc_find_dev(struct net *net, struct ip_mreqn *imr)
 	}
 
 	if (!dev && !ip_route_output_key(net, &rt, &fl)) {
-		dev = rt->u.dst.dev;
+		dev = rt->dst.dev;
 		ip_rt_put(rt);
 	}
 	if (dev) {
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 70eb3507c40..57c9e4d7b80 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -383,7 +383,7 @@ struct dst_entry *inet_csk_route_req(struct sock *sk,
 		goto no_route;
 	if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway)
 		goto route_err;
-	return &rt->u.dst;
+	return &rt->dst;
 
 route_err:
 	ip_rt_put(rt);
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 56cdf68a074..99461f09320 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -87,16 +87,16 @@ int ip_forward(struct sk_buff *skb)
 	if (opt->is_strictroute && rt->rt_dst != rt->rt_gateway)
 		goto sr_failed;
 
-	if (unlikely(skb->len > dst_mtu(&rt->u.dst) && !skb_is_gso(skb) &&
+	if (unlikely(skb->len > dst_mtu(&rt->dst) && !skb_is_gso(skb) &&
 		     (ip_hdr(skb)->frag_off & htons(IP_DF))) && !skb->local_df) {
-		IP_INC_STATS(dev_net(rt->u.dst.dev), IPSTATS_MIB_FRAGFAILS);
+		IP_INC_STATS(dev_net(rt->dst.dev), IPSTATS_MIB_FRAGFAILS);
 		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
-			  htonl(dst_mtu(&rt->u.dst)));
+			  htonl(dst_mtu(&rt->dst)));
 		goto drop;
 	}
 
 	/* We are about to mangle packet. Copy it! */
-	if (skb_cow(skb, LL_RESERVED_SPACE(rt->u.dst.dev)+rt->u.dst.header_len))
+	if (skb_cow(skb, LL_RESERVED_SPACE(rt->dst.dev)+rt->dst.header_len))
 		goto drop;
 	iph = ip_hdr(skb);
 
@@ -113,7 +113,7 @@ int ip_forward(struct sk_buff *skb)
 	skb->priority = rt_tos2priority(iph->tos);
 
 	return NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, skb, skb->dev,
-		       rt->u.dst.dev, ip_forward_finish);
+		       rt->dst.dev, ip_forward_finish);
 
 sr_failed:
 	/*
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 32618e11076..749e54889e8 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -745,7 +745,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
 			goto tx_error;
 		}
 	}
-	tdev = rt->u.dst.dev;
+	tdev = rt->dst.dev;
 
 	if (tdev == dev) {
 		ip_rt_put(rt);
@@ -755,7 +755,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
 
 	df = tiph->frag_off;
 	if (df)
-		mtu = dst_mtu(&rt->u.dst) - dev->hard_header_len - tunnel->hlen;
+		mtu = dst_mtu(&rt->dst) - dev->hard_header_len - tunnel->hlen;
 	else
 		mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
 
@@ -803,7 +803,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
 			tunnel->err_count = 0;
 	}
 
-	max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + rt->u.dst.header_len;
+	max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + rt->dst.header_len;
 
 	if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
 	    (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
@@ -830,7 +830,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
 	IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
 			      IPSKB_REROUTED);
 	skb_dst_drop(skb);
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set(skb, &rt->dst);
 
 	/*
 	 *	Push down and install the IPIP header.
@@ -853,7 +853,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
 			iph->ttl = ((struct ipv6hdr *)old_iph)->hop_limit;
 #endif
 		else
-			iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT);
+			iph->ttl = dst_metric(&rt->dst, RTAX_HOPLIMIT);
 	}
 
 	((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags;
@@ -915,7 +915,7 @@ static int ipgre_tunnel_bind_dev(struct net_device *dev)
 				    .proto = IPPROTO_GRE };
 		struct rtable *rt;
 		if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
-			tdev = rt->u.dst.dev;
+			tdev = rt->dst.dev;
 			ip_rt_put(rt);
 		}
 
@@ -1174,7 +1174,7 @@ static int ipgre_open(struct net_device *dev)
 		struct rtable *rt;
 		if (ip_route_output_key(dev_net(dev), &rt, &fl))
 			return -EADDRNOTAVAIL;
-		dev = rt->u.dst.dev;
+		dev = rt->dst.dev;
 		ip_rt_put(rt);
 		if (__in_dev_get_rtnl(dev) == NULL)
 			return -EADDRNOTAVAIL;
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 08a3b121f90..db47a5a00ed 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -356,10 +356,10 @@ static int ip_rcv_finish(struct sk_buff *skb)
 
 	rt = skb_rtable(skb);
 	if (rt->rt_type == RTN_MULTICAST) {
-		IP_UPD_PO_STATS_BH(dev_net(rt->u.dst.dev), IPSTATS_MIB_INMCAST,
+		IP_UPD_PO_STATS_BH(dev_net(rt->dst.dev), IPSTATS_MIB_INMCAST,
 				skb->len);
 	} else if (rt->rt_type == RTN_BROADCAST)
-		IP_UPD_PO_STATS_BH(dev_net(rt->u.dst.dev), IPSTATS_MIB_INBCAST,
+		IP_UPD_PO_STATS_BH(dev_net(rt->dst.dev), IPSTATS_MIB_INBCAST,
 				skb->len);
 
 	return dst_input(skb);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 9a4a6c96cb0..6cbeb2e108d 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -151,15 +151,15 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
 	iph->version  = 4;
 	iph->ihl      = 5;
 	iph->tos      = inet->tos;
-	if (ip_dont_fragment(sk, &rt->u.dst))
+	if (ip_dont_fragment(sk, &rt->dst))
 		iph->frag_off = htons(IP_DF);
 	else
 		iph->frag_off = 0;
-	iph->ttl      = ip_select_ttl(inet, &rt->u.dst);
+	iph->ttl      = ip_select_ttl(inet, &rt->dst);
 	iph->daddr    = rt->rt_dst;
 	iph->saddr    = rt->rt_src;
 	iph->protocol = sk->sk_protocol;
-	ip_select_ident(iph, &rt->u.dst, sk);
+	ip_select_ident(iph, &rt->dst, sk);
 
 	if (opt && opt->optlen) {
 		iph->ihl += opt->optlen>>2;
@@ -240,7 +240,7 @@ int ip_mc_output(struct sk_buff *skb)
 {
 	struct sock *sk = skb->sk;
 	struct rtable *rt = skb_rtable(skb);
-	struct net_device *dev = rt->u.dst.dev;
+	struct net_device *dev = rt->dst.dev;
 
 	/*
 	 *	If the indicated interface is up and running, send the packet.
@@ -359,9 +359,9 @@ int ip_queue_xmit(struct sk_buff *skb)
 			if (ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0))
 				goto no_route;
 		}
-		sk_setup_caps(sk, &rt->u.dst);
+		sk_setup_caps(sk, &rt->dst);
 	}
-	skb_dst_set_noref(skb, &rt->u.dst);
+	skb_dst_set_noref(skb, &rt->dst);
 
 packet_routed:
 	if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway)
@@ -372,11 +372,11 @@ packet_routed:
 	skb_reset_network_header(skb);
 	iph = ip_hdr(skb);
 	*((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff));
-	if (ip_dont_fragment(sk, &rt->u.dst) && !skb->local_df)
+	if (ip_dont_fragment(sk, &rt->dst) && !skb->local_df)
 		iph->frag_off = htons(IP_DF);
 	else
 		iph->frag_off = 0;
-	iph->ttl      = ip_select_ttl(inet, &rt->u.dst);
+	iph->ttl      = ip_select_ttl(inet, &rt->dst);
 	iph->protocol = sk->sk_protocol;
 	iph->saddr    = rt->rt_src;
 	iph->daddr    = rt->rt_dst;
@@ -387,7 +387,7 @@ packet_routed:
 		ip_options_build(skb, opt, inet->inet_daddr, rt, 0);
 	}
 
-	ip_select_ident_more(iph, &rt->u.dst, sk,
+	ip_select_ident_more(iph, &rt->dst, sk,
 			     (skb_shinfo(skb)->gso_segs ?: 1) - 1);
 
 	skb->priority = sk->sk_priority;
@@ -452,7 +452,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 	struct rtable *rt = skb_rtable(skb);
 	int err = 0;
 
-	dev = rt->u.dst.dev;
+	dev = rt->dst.dev;
 
 	/*
 	 *	Point into the IP datagram header.
@@ -473,7 +473,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 	 */
 
 	hlen = iph->ihl * 4;
-	mtu = dst_mtu(&rt->u.dst) - hlen;	/* Size of data space */
+	mtu = dst_mtu(&rt->dst) - hlen;	/* Size of data space */
 #ifdef CONFIG_BRIDGE_NETFILTER
 	if (skb->nf_bridge)
 		mtu -= nf_bridge_mtu_reduction(skb);
@@ -586,7 +586,7 @@ slow_path:
 	 * we need to make room for the encapsulating header
 	 */
 	pad = nf_bridge_pad(skb);
-	ll_rs = LL_RESERVED_SPACE_EXTRA(rt->u.dst.dev, pad);
+	ll_rs = LL_RESERVED_SPACE_EXTRA(rt->dst.dev, pad);
 	mtu -= pad;
 
 	/*
@@ -833,13 +833,13 @@ int ip_append_data(struct sock *sk,
 		 */
 		*rtp = NULL;
 		inet->cork.fragsize = mtu = inet->pmtudisc == IP_PMTUDISC_PROBE ?
-					    rt->u.dst.dev->mtu :
-					    dst_mtu(rt->u.dst.path);
-		inet->cork.dst = &rt->u.dst;
+					    rt->dst.dev->mtu :
+					    dst_mtu(rt->dst.path);
+		inet->cork.dst = &rt->dst;
 		inet->cork.length = 0;
 		sk->sk_sndmsg_page = NULL;
 		sk->sk_sndmsg_off = 0;
-		if ((exthdrlen = rt->u.dst.header_len) != 0) {
+		if ((exthdrlen = rt->dst.header_len) != 0) {
 			length += exthdrlen;
 			transhdrlen += exthdrlen;
 		}
@@ -852,7 +852,7 @@ int ip_append_data(struct sock *sk,
 		exthdrlen = 0;
 		mtu = inet->cork.fragsize;
 	}
-	hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
+	hh_len = LL_RESERVED_SPACE(rt->dst.dev);
 
 	fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
 	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
@@ -869,14 +869,14 @@ int ip_append_data(struct sock *sk,
 	 */
 	if (transhdrlen &&
 	    length + fragheaderlen <= mtu &&
-	    rt->u.dst.dev->features & NETIF_F_V4_CSUM &&
+	    rt->dst.dev->features & NETIF_F_V4_CSUM &&
 	    !exthdrlen)
 		csummode = CHECKSUM_PARTIAL;
 
 	inet->cork.length += length;
 	if (((length> mtu) || !skb_queue_empty(&sk->sk_write_queue)) &&
 	    (sk->sk_protocol == IPPROTO_UDP) &&
-	    (rt->u.dst.dev->features & NETIF_F_UFO)) {
+	    (rt->dst.dev->features & NETIF_F_UFO)) {
 		err = ip_ufo_append_data(sk, getfrag, from, length, hh_len,
 					 fragheaderlen, transhdrlen, mtu,
 					 flags);
@@ -924,7 +924,7 @@ alloc_new_skb:
 			fraglen = datalen + fragheaderlen;
 
 			if ((flags & MSG_MORE) &&
-			    !(rt->u.dst.dev->features&NETIF_F_SG))
+			    !(rt->dst.dev->features&NETIF_F_SG))
 				alloclen = mtu;
 			else
 				alloclen = datalen + fragheaderlen;
@@ -935,7 +935,7 @@ alloc_new_skb:
 			 * the last.
 			 */
 			if (datalen == length + fraggap)
-				alloclen += rt->u.dst.trailer_len;
+				alloclen += rt->dst.trailer_len;
 
 			if (transhdrlen) {
 				skb = sock_alloc_send_skb(sk,
@@ -1008,7 +1008,7 @@ alloc_new_skb:
 		if (copy > length)
 			copy = length;
 
-		if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
+		if (!(rt->dst.dev->features&NETIF_F_SG)) {
 			unsigned int off;
 
 			off = skb->len;
@@ -1103,10 +1103,10 @@ ssize_t	ip_append_page(struct sock *sk, struct page *page,
 	if (inet->cork.flags & IPCORK_OPT)
 		opt = inet->cork.opt;
 
-	if (!(rt->u.dst.dev->features&NETIF_F_SG))
+	if (!(rt->dst.dev->features&NETIF_F_SG))
 		return -EOPNOTSUPP;
 
-	hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
+	hh_len = LL_RESERVED_SPACE(rt->dst.dev);
 	mtu = inet->cork.fragsize;
 
 	fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
@@ -1122,7 +1122,7 @@ ssize_t	ip_append_page(struct sock *sk, struct page *page,
 
 	inet->cork.length += size;
 	if ((sk->sk_protocol == IPPROTO_UDP) &&
-	    (rt->u.dst.dev->features & NETIF_F_UFO)) {
+	    (rt->dst.dev->features & NETIF_F_UFO)) {
 		skb_shinfo(skb)->gso_size = mtu - fragheaderlen;
 		skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
 	}
@@ -1274,8 +1274,8 @@ int ip_push_pending_frames(struct sock *sk)
 	 * If local_df is set too, we still allow to fragment this frame
 	 * locally. */
 	if (inet->pmtudisc >= IP_PMTUDISC_DO ||
-	    (skb->len <= dst_mtu(&rt->u.dst) &&
-	     ip_dont_fragment(sk, &rt->u.dst)))
+	    (skb->len <= dst_mtu(&rt->dst) &&
+	     ip_dont_fragment(sk, &rt->dst)))
 		df = htons(IP_DF);
 
 	if (inet->cork.flags & IPCORK_OPT)
@@ -1284,7 +1284,7 @@ int ip_push_pending_frames(struct sock *sk)
 	if (rt->rt_type == RTN_MULTICAST)
 		ttl = inet->mc_ttl;
 	else
-		ttl = ip_select_ttl(inet, &rt->u.dst);
+		ttl = ip_select_ttl(inet, &rt->dst);
 
 	iph = (struct iphdr *)skb->data;
 	iph->version = 4;
@@ -1295,7 +1295,7 @@ int ip_push_pending_frames(struct sock *sk)
 	}
 	iph->tos = inet->tos;
 	iph->frag_off = df;
-	ip_select_ident(iph, &rt->u.dst, sk);
+	ip_select_ident(iph, &rt->dst, sk);
 	iph->ttl = ttl;
 	iph->protocol = sk->sk_protocol;
 	iph->saddr = rt->rt_src;
@@ -1308,7 +1308,7 @@ int ip_push_pending_frames(struct sock *sk)
 	 * on dst refcount
 	 */
 	inet->cork.dst = NULL;
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set(skb, &rt->dst);
 
 	if (iph->protocol == IPPROTO_ICMP)
 		icmp_out_count(net, ((struct icmphdr *)
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 7fd63671103..ec036731a70 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -435,7 +435,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 			goto tx_error_icmp;
 		}
 	}
-	tdev = rt->u.dst.dev;
+	tdev = rt->dst.dev;
 
 	if (tdev == dev) {
 		ip_rt_put(rt);
@@ -446,7 +446,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 	df |= old_iph->frag_off & htons(IP_DF);
 
 	if (df) {
-		mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
+		mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
 
 		if (mtu < 68) {
 			stats->collisions++;
@@ -503,7 +503,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 	IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
 			      IPSKB_REROUTED);
 	skb_dst_drop(skb);
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set(skb, &rt->dst);
 
 	/*
 	 *	Push down and install the IPIP header.
@@ -552,7 +552,7 @@ static void ipip_tunnel_bind_dev(struct net_device *dev)
 				    .proto = IPPROTO_IPIP };
 		struct rtable *rt;
 		if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
-			tdev = rt->u.dst.dev;
+			tdev = rt->dst.dev;
 			ip_rt_put(rt);
 		}
 		dev->flags |= IFF_POINTOPOINT;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 856123fe32f..8418afc357e 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1551,9 +1551,9 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
 			goto out_free;
 	}
 
-	dev = rt->u.dst.dev;
+	dev = rt->dst.dev;
 
-	if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) {
+	if (skb->len+encap > dst_mtu(&rt->dst) && (ntohs(iph->frag_off) & IP_DF)) {
 		/* Do not fragment multicasts. Alas, IPv4 does not
 		   allow to send ICMP, so that packets will disappear
 		   to blackhole.
@@ -1564,7 +1564,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
 		goto out_free;
 	}
 
-	encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len;
+	encap += LL_RESERVED_SPACE(dev) + rt->dst.header_len;
 
 	if (skb_cow(skb, encap)) {
 		ip_rt_put(rt);
@@ -1575,7 +1575,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
 	vif->bytes_out += skb->len;
 
 	skb_dst_drop(skb);
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set(skb, &rt->dst);
 	ip_decrease_ttl(ip_hdr(skb));
 
 	/* FIXME: forward and output firewalls used to be called here.
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index 07de855e217..cfbc79af21c 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -43,7 +43,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
 
 		/* Drop old route. */
 		skb_dst_drop(skb);
-		skb_dst_set(skb, &rt->u.dst);
+		skb_dst_set(skb, &rt->dst);
 	} else {
 		/* non-local src, find valid iif to satisfy
 		 * rp-filter when calling ip_route_input. */
@@ -53,11 +53,11 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
 
 		orefdst = skb->_skb_refdst;
 		if (ip_route_input(skb, iph->daddr, iph->saddr,
-				   RT_TOS(iph->tos), rt->u.dst.dev) != 0) {
-			dst_release(&rt->u.dst);
+				   RT_TOS(iph->tos), rt->dst.dev) != 0) {
+			dst_release(&rt->dst);
 			return -1;
 		}
-		dst_release(&rt->u.dst);
+		dst_release(&rt->dst);
 		refdst_drop(orefdst);
 	}
 
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 66cc3befcd4..009a7b2aa1e 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -325,24 +325,24 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length,
 	int err;
 	struct rtable *rt = *rtp;
 
-	if (length > rt->u.dst.dev->mtu) {
+	if (length > rt->dst.dev->mtu) {
 		ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->inet_dport,
-			       rt->u.dst.dev->mtu);
+			       rt->dst.dev->mtu);
 		return -EMSGSIZE;
 	}
 	if (flags&MSG_PROBE)
 		goto out;
 
 	skb = sock_alloc_send_skb(sk,
-				  length + LL_ALLOCATED_SPACE(rt->u.dst.dev) + 15,
+				  length + LL_ALLOCATED_SPACE(rt->dst.dev) + 15,
 				  flags & MSG_DONTWAIT, &err);
 	if (skb == NULL)
 		goto error;
-	skb_reserve(skb, LL_RESERVED_SPACE(rt->u.dst.dev));
+	skb_reserve(skb, LL_RESERVED_SPACE(rt->dst.dev));
 
 	skb->priority = sk->sk_priority;
 	skb->mark = sk->sk_mark;
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set(skb, &rt->dst);
 	*rtp = NULL;
 
 	skb_reset_network_header(skb);
@@ -375,7 +375,7 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length,
 		iph->check   = 0;
 		iph->tot_len = htons(length);
 		if (!iph->id)
-			ip_select_ident(iph, &rt->u.dst, NULL);
+			ip_select_ident(iph, &rt->dst, NULL);
 
 		iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
 	}
@@ -384,7 +384,7 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length,
 			skb_transport_header(skb))->type);
 
 	err = NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_OUT, skb, NULL,
-		      rt->u.dst.dev, dst_output);
+		      rt->dst.dev, dst_output);
 	if (err > 0)
 		err = net_xmit_errno(err);
 	if (err)
@@ -606,7 +606,7 @@ out:
 	return len;
 
 do_confirm:
-	dst_confirm(&rt->u.dst);
+	dst_confirm(&rt->dst);
 	if (!(msg->msg_flags & MSG_PROBE) || len)
 		goto back_from_confirm;
 	err = 0;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 883b5c7195a..a291edbbc97 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -286,10 +286,10 @@ static struct rtable *rt_cache_get_first(struct seq_file *seq)
 		rcu_read_lock_bh();
 		r = rcu_dereference_bh(rt_hash_table[st->bucket].chain);
 		while (r) {
-			if (dev_net(r->u.dst.dev) == seq_file_net(seq) &&
+			if (dev_net(r->dst.dev) == seq_file_net(seq) &&
 			    r->rt_genid == st->genid)
 				return r;
-			r = rcu_dereference_bh(r->u.dst.rt_next);
+			r = rcu_dereference_bh(r->dst.rt_next);
 		}
 		rcu_read_unlock_bh();
 	}
@@ -301,7 +301,7 @@ static struct rtable *__rt_cache_get_next(struct seq_file *seq,
 {
 	struct rt_cache_iter_state *st = seq->private;
 
-	r = r->u.dst.rt_next;
+	r = r->dst.rt_next;
 	while (!r) {
 		rcu_read_unlock_bh();
 		do {
@@ -319,7 +319,7 @@ static struct rtable *rt_cache_get_next(struct seq_file *seq,
 {
 	struct rt_cache_iter_state *st = seq->private;
 	while ((r = __rt_cache_get_next(seq, r)) != NULL) {
-		if (dev_net(r->u.dst.dev) != seq_file_net(seq))
+		if (dev_net(r->dst.dev) != seq_file_net(seq))
 			continue;
 		if (r->rt_genid == st->genid)
 			break;
@@ -377,19 +377,19 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v)
 
 		seq_printf(seq, "%s\t%08X\t%08X\t%8X\t%d\t%u\t%d\t"
 			      "%08X\t%d\t%u\t%u\t%02X\t%d\t%1d\t%08X%n",
-			r->u.dst.dev ? r->u.dst.dev->name : "*",
+			r->dst.dev ? r->dst.dev->name : "*",
 			(__force u32)r->rt_dst,
 			(__force u32)r->rt_gateway,
-			r->rt_flags, atomic_read(&r->u.dst.__refcnt),
-			r->u.dst.__use, 0, (__force u32)r->rt_src,
-			(dst_metric(&r->u.dst, RTAX_ADVMSS) ?
-			     (int)dst_metric(&r->u.dst, RTAX_ADVMSS) + 40 : 0),
-			dst_metric(&r->u.dst, RTAX_WINDOW),
-			(int)((dst_metric(&r->u.dst, RTAX_RTT) >> 3) +
-			      dst_metric(&r->u.dst, RTAX_RTTVAR)),
+			r->rt_flags, atomic_read(&r->dst.__refcnt),
+			r->dst.__use, 0, (__force u32)r->rt_src,
+			(dst_metric(&r->dst, RTAX_ADVMSS) ?
+			     (int)dst_metric(&r->dst, RTAX_ADVMSS) + 40 : 0),
+			dst_metric(&r->dst, RTAX_WINDOW),
+			(int)((dst_metric(&r->dst, RTAX_RTT) >> 3) +
+			      dst_metric(&r->dst, RTAX_RTTVAR)),
 			r->fl.fl4_tos,
-			r->u.dst.hh ? atomic_read(&r->u.dst.hh->hh_refcnt) : -1,
-			r->u.dst.hh ? (r->u.dst.hh->hh_output ==
+			r->dst.hh ? atomic_read(&r->dst.hh->hh_refcnt) : -1,
+			r->dst.hh ? (r->dst.hh->hh_output ==
 				       dev_queue_xmit) : 0,
 			r->rt_spec_dst, &len);
 
@@ -608,13 +608,13 @@ static inline int ip_rt_proc_init(void)
 
 static inline void rt_free(struct rtable *rt)
 {
-	call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free);
+	call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free);
 }
 
 static inline void rt_drop(struct rtable *rt)
 {
 	ip_rt_put(rt);
-	call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free);
+	call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free);
 }
 
 static inline int rt_fast_clean(struct rtable *rth)
@@ -622,13 +622,13 @@ static inline int rt_fast_clean(struct rtable *rth)
 	/* Kill broadcast/multicast entries very aggresively, if they
 	   collide in hash table with more useful entries */
 	return (rth->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) &&
-		rth->fl.iif && rth->u.dst.rt_next;
+		rth->fl.iif && rth->dst.rt_next;
 }
 
 static inline int rt_valuable(struct rtable *rth)
 {
 	return (rth->rt_flags & (RTCF_REDIRECTED | RTCF_NOTIFY)) ||
-		rth->u.dst.expires;
+		rth->dst.expires;
 }
 
 static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long tmo2)
@@ -636,15 +636,15 @@ static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long t
 	unsigned long age;
 	int ret = 0;
 
-	if (atomic_read(&rth->u.dst.__refcnt))
+	if (atomic_read(&rth->dst.__refcnt))
 		goto out;
 
 	ret = 1;
-	if (rth->u.dst.expires &&
-	    time_after_eq(jiffies, rth->u.dst.expires))
+	if (rth->dst.expires &&
+	    time_after_eq(jiffies, rth->dst.expires))
 		goto out;
 
-	age = jiffies - rth->u.dst.lastuse;
+	age = jiffies - rth->dst.lastuse;
 	ret = 0;
 	if ((age <= tmo1 && !rt_fast_clean(rth)) ||
 	    (age <= tmo2 && rt_valuable(rth)))
@@ -660,7 +660,7 @@ out:	return ret;
  */
 static inline u32 rt_score(struct rtable *rt)
 {
-	u32 score = jiffies - rt->u.dst.lastuse;
+	u32 score = jiffies - rt->dst.lastuse;
 
 	score = ~score & ~(3<<30);
 
@@ -700,12 +700,12 @@ static inline int compare_keys(struct flowi *fl1, struct flowi *fl2)
 
 static inline int compare_netns(struct rtable *rt1, struct rtable *rt2)
 {
-	return net_eq(dev_net(rt1->u.dst.dev), dev_net(rt2->u.dst.dev));
+	return net_eq(dev_net(rt1->dst.dev), dev_net(rt2->dst.dev));
 }
 
 static inline int rt_is_expired(struct rtable *rth)
 {
-	return rth->rt_genid != rt_genid(dev_net(rth->u.dst.dev));
+	return rth->rt_genid != rt_genid(dev_net(rth->dst.dev));
 }
 
 /*
@@ -734,7 +734,7 @@ static void rt_do_flush(int process_context)
 		rth = rt_hash_table[i].chain;
 
 		/* defer releasing the head of the list after spin_unlock */
-		for (tail = rth; tail; tail = tail->u.dst.rt_next)
+		for (tail = rth; tail; tail = tail->dst.rt_next)
 			if (!rt_is_expired(tail))
 				break;
 		if (rth != tail)
@@ -743,9 +743,9 @@ static void rt_do_flush(int process_context)
 		/* call rt_free on entries after the tail requiring flush */
 		prev = &rt_hash_table[i].chain;
 		for (p = *prev; p; p = next) {
-			next = p->u.dst.rt_next;
+			next = p->dst.rt_next;
 			if (!rt_is_expired(p)) {
-				prev = &p->u.dst.rt_next;
+				prev = &p->dst.rt_next;
 			} else {
 				*prev = next;
 				rt_free(p);
@@ -760,7 +760,7 @@ static void rt_do_flush(int process_context)
 		spin_unlock_bh(rt_hash_lock_addr(i));
 
 		for (; rth != tail; rth = next) {
-			next = rth->u.dst.rt_next;
+			next = rth->dst.rt_next;
 			rt_free(rth);
 		}
 	}
@@ -791,7 +791,7 @@ static int has_noalias(const struct rtable *head, const struct rtable *rth)
 	while (aux != rth) {
 		if (compare_hash_inputs(&aux->fl, &rth->fl))
 			return 0;
-		aux = aux->u.dst.rt_next;
+		aux = aux->dst.rt_next;
 	}
 	return ONE;
 }
@@ -831,18 +831,18 @@ static void rt_check_expire(void)
 		length = 0;
 		spin_lock_bh(rt_hash_lock_addr(i));
 		while ((rth = *rthp) != NULL) {
-			prefetch(rth->u.dst.rt_next);
+			prefetch(rth->dst.rt_next);
 			if (rt_is_expired(rth)) {
-				*rthp = rth->u.dst.rt_next;
+				*rthp = rth->dst.rt_next;
 				rt_free(rth);
 				continue;
 			}
-			if (rth->u.dst.expires) {
+			if (rth->dst.expires) {
 				/* Entry is expired even if it is in use */
-				if (time_before_eq(jiffies, rth->u.dst.expires)) {
+				if (time_before_eq(jiffies, rth->dst.expires)) {
 nofree:
 					tmo >>= 1;
-					rthp = &rth->u.dst.rt_next;
+					rthp = &rth->dst.rt_next;
 					/*
 					 * We only count entries on
 					 * a chain with equal hash inputs once
@@ -858,7 +858,7 @@ nofree:
 				goto nofree;
 
 			/* Cleanup aged off entries. */
-			*rthp = rth->u.dst.rt_next;
+			*rthp = rth->dst.rt_next;
 			rt_free(rth);
 		}
 		spin_unlock_bh(rt_hash_lock_addr(i));
@@ -999,10 +999,10 @@ static int rt_garbage_collect(struct dst_ops *ops)
 				if (!rt_is_expired(rth) &&
 					!rt_may_expire(rth, tmo, expire)) {
 					tmo >>= 1;
-					rthp = &rth->u.dst.rt_next;
+					rthp = &rth->dst.rt_next;
 					continue;
 				}
-				*rthp = rth->u.dst.rt_next;
+				*rthp = rth->dst.rt_next;
 				rt_free(rth);
 				goal--;
 			}
@@ -1068,7 +1068,7 @@ static int slow_chain_length(const struct rtable *head)
 
 	while (rth) {
 		length += has_noalias(head, rth);
-		rth = rth->u.dst.rt_next;
+		rth = rth->dst.rt_next;
 	}
 	return length >> FRACT_BITS;
 }
@@ -1090,7 +1090,7 @@ restart:
 	candp = NULL;
 	now = jiffies;
 
-	if (!rt_caching(dev_net(rt->u.dst.dev))) {
+	if (!rt_caching(dev_net(rt->dst.dev))) {
 		/*
 		 * If we're not caching, just tell the caller we
 		 * were successful and don't touch the route.  The
@@ -1108,7 +1108,7 @@ restart:
 		 */
 
 		if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) {
-			int err = arp_bind_neighbour(&rt->u.dst);
+			int err = arp_bind_neighbour(&rt->dst);
 			if (err) {
 				if (net_ratelimit())
 					printk(KERN_WARNING
@@ -1127,19 +1127,19 @@ restart:
 	spin_lock_bh(rt_hash_lock_addr(hash));
 	while ((rth = *rthp) != NULL) {
 		if (rt_is_expired(rth)) {
-			*rthp = rth->u.dst.rt_next;
+			*rthp = rth->dst.rt_next;
 			rt_free(rth);
 			continue;
 		}
 		if (compare_keys(&rth->fl, &rt->fl) && compare_netns(rth, rt)) {
 			/* Put it first */
-			*rthp = rth->u.dst.rt_next;
+			*rthp = rth->dst.rt_next;
 			/*
 			 * Since lookup is lockfree, the deletion
 			 * must be visible to another weakly ordered CPU before
 			 * the insertion at the start of the hash chain.
 			 */
-			rcu_assign_pointer(rth->u.dst.rt_next,
+			rcu_assign_pointer(rth->dst.rt_next,
 					   rt_hash_table[hash].chain);
 			/*
 			 * Since lookup is lockfree, the update writes
@@ -1147,18 +1147,18 @@ restart:
 			 */
 			rcu_assign_pointer(rt_hash_table[hash].chain, rth);
 
-			dst_use(&rth->u.dst, now);
+			dst_use(&rth->dst, now);
 			spin_unlock_bh(rt_hash_lock_addr(hash));
 
 			rt_drop(rt);
 			if (rp)
 				*rp = rth;
 			else
-				skb_dst_set(skb, &rth->u.dst);
+				skb_dst_set(skb, &rth->dst);
 			return 0;
 		}
 
-		if (!atomic_read(&rth->u.dst.__refcnt)) {
+		if (!atomic_read(&rth->dst.__refcnt)) {
 			u32 score = rt_score(rth);
 
 			if (score <= min_score) {
@@ -1170,7 +1170,7 @@ restart:
 
 		chain_length++;
 
-		rthp = &rth->u.dst.rt_next;
+		rthp = &rth->dst.rt_next;
 	}
 
 	if (cand) {
@@ -1181,17 +1181,17 @@ restart:
 		 * only 2 entries per bucket. We will see.
 		 */
 		if (chain_length > ip_rt_gc_elasticity) {
-			*candp = cand->u.dst.rt_next;
+			*candp = cand->dst.rt_next;
 			rt_free(cand);
 		}
 	} else {
 		if (chain_length > rt_chain_length_max &&
 		    slow_chain_length(rt_hash_table[hash].chain) > rt_chain_length_max) {
-			struct net *net = dev_net(rt->u.dst.dev);
+			struct net *net = dev_net(rt->dst.dev);
 			int num = ++net->ipv4.current_rt_cache_rebuild_count;
 			if (!rt_caching(net)) {
 				printk(KERN_WARNING "%s: %d rebuilds is over limit, route caching disabled\n",
-					rt->u.dst.dev->name, num);
+					rt->dst.dev->name, num);
 			}
 			rt_emergency_hash_rebuild(net);
 			spin_unlock_bh(rt_hash_lock_addr(hash));
@@ -1206,7 +1206,7 @@ restart:
 	   route or unicast forwarding path.
 	 */
 	if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) {
-		int err = arp_bind_neighbour(&rt->u.dst);
+		int err = arp_bind_neighbour(&rt->dst);
 		if (err) {
 			spin_unlock_bh(rt_hash_lock_addr(hash));
 
@@ -1237,14 +1237,14 @@ restart:
 		}
 	}
 
-	rt->u.dst.rt_next = rt_hash_table[hash].chain;
+	rt->dst.rt_next = rt_hash_table[hash].chain;
 
 #if RT_CACHE_DEBUG >= 2
-	if (rt->u.dst.rt_next) {
+	if (rt->dst.rt_next) {
 		struct rtable *trt;
 		printk(KERN_DEBUG "rt_cache @%02x: %pI4",
 		       hash, &rt->rt_dst);
-		for (trt = rt->u.dst.rt_next; trt; trt = trt->u.dst.rt_next)
+		for (trt = rt->dst.rt_next; trt; trt = trt->dst.rt_next)
 			printk(" . %pI4", &trt->rt_dst);
 		printk("\n");
 	}
@@ -1262,7 +1262,7 @@ skip_hashing:
 	if (rp)
 		*rp = rt;
 	else
-		skb_dst_set(skb, &rt->u.dst);
+		skb_dst_set(skb, &rt->dst);
 	return 0;
 }
 
@@ -1334,11 +1334,11 @@ static void rt_del(unsigned hash, struct rtable *rt)
 	ip_rt_put(rt);
 	while ((aux = *rthp) != NULL) {
 		if (aux == rt || rt_is_expired(aux)) {
-			*rthp = aux->u.dst.rt_next;
+			*rthp = aux->dst.rt_next;
 			rt_free(aux);
 			continue;
 		}
-		rthp = &aux->u.dst.rt_next;
+		rthp = &aux->dst.rt_next;
 	}
 	spin_unlock_bh(rt_hash_lock_addr(hash));
 }
@@ -1392,19 +1392,19 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
 				    rth->fl.oif != ikeys[k] ||
 				    rth->fl.iif != 0 ||
 				    rt_is_expired(rth) ||
-				    !net_eq(dev_net(rth->u.dst.dev), net)) {
-					rthp = &rth->u.dst.rt_next;
+				    !net_eq(dev_net(rth->dst.dev), net)) {
+					rthp = &rth->dst.rt_next;
 					continue;
 				}
 
 				if (rth->rt_dst != daddr ||
 				    rth->rt_src != saddr ||
-				    rth->u.dst.error ||
+				    rth->dst.error ||
 				    rth->rt_gateway != old_gw ||
-				    rth->u.dst.dev != dev)
+				    rth->dst.dev != dev)
 					break;
 
-				dst_hold(&rth->u.dst);
+				dst_hold(&rth->dst);
 
 				rt = dst_alloc(&ipv4_dst_ops);
 				if (rt == NULL) {
@@ -1414,20 +1414,20 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
 
 				/* Copy all the information. */
 				*rt = *rth;
-				rt->u.dst.__use		= 1;
-				atomic_set(&rt->u.dst.__refcnt, 1);
-				rt->u.dst.child		= NULL;
-				if (rt->u.dst.dev)
-					dev_hold(rt->u.dst.dev);
+				rt->dst.__use		= 1;
+				atomic_set(&rt->dst.__refcnt, 1);
+				rt->dst.child		= NULL;
+				if (rt->dst.dev)
+					dev_hold(rt->dst.dev);
 				if (rt->idev)
 					in_dev_hold(rt->idev);
-				rt->u.dst.obsolete	= -1;
-				rt->u.dst.lastuse	= jiffies;
-				rt->u.dst.path		= &rt->u.dst;
-				rt->u.dst.neighbour	= NULL;
-				rt->u.dst.hh		= NULL;
+				rt->dst.obsolete	= -1;
+				rt->dst.lastuse	= jiffies;
+				rt->dst.path		= &rt->dst;
+				rt->dst.neighbour	= NULL;
+				rt->dst.hh		= NULL;
 #ifdef CONFIG_XFRM
-				rt->u.dst.xfrm		= NULL;
+				rt->dst.xfrm		= NULL;
 #endif
 				rt->rt_genid		= rt_genid(net);
 				rt->rt_flags		|= RTCF_REDIRECTED;
@@ -1436,23 +1436,23 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
 				rt->rt_gateway		= new_gw;
 
 				/* Redirect received -> path was valid */
-				dst_confirm(&rth->u.dst);
+				dst_confirm(&rth->dst);
 
 				if (rt->peer)
 					atomic_inc(&rt->peer->refcnt);
 
-				if (arp_bind_neighbour(&rt->u.dst) ||
-				    !(rt->u.dst.neighbour->nud_state &
+				if (arp_bind_neighbour(&rt->dst) ||
+				    !(rt->dst.neighbour->nud_state &
 					    NUD_VALID)) {
-					if (rt->u.dst.neighbour)
-						neigh_event_send(rt->u.dst.neighbour, NULL);
+					if (rt->dst.neighbour)
+						neigh_event_send(rt->dst.neighbour, NULL);
 					ip_rt_put(rth);
 					rt_drop(rt);
 					goto do_next;
 				}
 
-				netevent.old = &rth->u.dst;
-				netevent.new = &rt->u.dst;
+				netevent.old = &rth->dst;
+				netevent.new = &rt->dst;
 				call_netevent_notifiers(NETEVENT_REDIRECT,
 							&netevent);
 
@@ -1488,8 +1488,8 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
 			ip_rt_put(rt);
 			ret = NULL;
 		} else if ((rt->rt_flags & RTCF_REDIRECTED) ||
-			   (rt->u.dst.expires &&
-			    time_after_eq(jiffies, rt->u.dst.expires))) {
+			   (rt->dst.expires &&
+			    time_after_eq(jiffies, rt->dst.expires))) {
 			unsigned hash = rt_hash(rt->fl.fl4_dst, rt->fl.fl4_src,
 						rt->fl.oif,
 						rt_genid(dev_net(dst->dev)));
@@ -1527,7 +1527,7 @@ void ip_rt_send_redirect(struct sk_buff *skb)
 	int log_martians;
 
 	rcu_read_lock();
-	in_dev = __in_dev_get_rcu(rt->u.dst.dev);
+	in_dev = __in_dev_get_rcu(rt->dst.dev);
 	if (!in_dev || !IN_DEV_TX_REDIRECTS(in_dev)) {
 		rcu_read_unlock();
 		return;
@@ -1538,30 +1538,30 @@ void ip_rt_send_redirect(struct sk_buff *skb)
 	/* No redirected packets during ip_rt_redirect_silence;
 	 * reset the algorithm.
 	 */
-	if (time_after(jiffies, rt->u.dst.rate_last + ip_rt_redirect_silence))
-		rt->u.dst.rate_tokens = 0;
+	if (time_after(jiffies, rt->dst.rate_last + ip_rt_redirect_silence))
+		rt->dst.rate_tokens = 0;
 
 	/* Too many ignored redirects; do not send anything
-	 * set u.dst.rate_last to the last seen redirected packet.
+	 * set dst.rate_last to the last seen redirected packet.
 	 */
-	if (rt->u.dst.rate_tokens >= ip_rt_redirect_number) {
-		rt->u.dst.rate_last = jiffies;
+	if (rt->dst.rate_tokens >= ip_rt_redirect_number) {
+		rt->dst.rate_last = jiffies;
 		return;
 	}
 
 	/* Check for load limit; set rate_last to the latest sent
 	 * redirect.
 	 */
-	if (rt->u.dst.rate_tokens == 0 ||
+	if (rt->dst.rate_tokens == 0 ||
 	    time_after(jiffies,
-		       (rt->u.dst.rate_last +
-			(ip_rt_redirect_load << rt->u.dst.rate_tokens)))) {
+		       (rt->dst.rate_last +
+			(ip_rt_redirect_load << rt->dst.rate_tokens)))) {
 		icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway);
-		rt->u.dst.rate_last = jiffies;
-		++rt->u.dst.rate_tokens;
+		rt->dst.rate_last = jiffies;
+		++rt->dst.rate_tokens;
 #ifdef CONFIG_IP_ROUTE_VERBOSE
 		if (log_martians &&
-		    rt->u.dst.rate_tokens == ip_rt_redirect_number &&
+		    rt->dst.rate_tokens == ip_rt_redirect_number &&
 		    net_ratelimit())
 			printk(KERN_WARNING "host %pI4/if%d ignores redirects for %pI4 to %pI4.\n",
 				&rt->rt_src, rt->rt_iif,
@@ -1576,7 +1576,7 @@ static int ip_error(struct sk_buff *skb)
 	unsigned long now;
 	int code;
 
-	switch (rt->u.dst.error) {
+	switch (rt->dst.error) {
 		case EINVAL:
 		default:
 			goto out;
@@ -1585,7 +1585,7 @@ static int ip_error(struct sk_buff *skb)
 			break;
 		case ENETUNREACH:
 			code = ICMP_NET_UNREACH;
-			IP_INC_STATS_BH(dev_net(rt->u.dst.dev),
+			IP_INC_STATS_BH(dev_net(rt->dst.dev),
 					IPSTATS_MIB_INNOROUTES);
 			break;
 		case EACCES:
@@ -1594,12 +1594,12 @@ static int ip_error(struct sk_buff *skb)
 	}
 
 	now = jiffies;
-	rt->u.dst.rate_tokens += now - rt->u.dst.rate_last;
-	if (rt->u.dst.rate_tokens > ip_rt_error_burst)
-		rt->u.dst.rate_tokens = ip_rt_error_burst;
-	rt->u.dst.rate_last = now;
-	if (rt->u.dst.rate_tokens >= ip_rt_error_cost) {
-		rt->u.dst.rate_tokens -= ip_rt_error_cost;
+	rt->dst.rate_tokens += now - rt->dst.rate_last;
+	if (rt->dst.rate_tokens > ip_rt_error_burst)
+		rt->dst.rate_tokens = ip_rt_error_burst;
+	rt->dst.rate_last = now;
+	if (rt->dst.rate_tokens >= ip_rt_error_cost) {
+		rt->dst.rate_tokens -= ip_rt_error_cost;
 		icmp_send(skb, ICMP_DEST_UNREACH, code, 0);
 	}
 
@@ -1644,7 +1644,7 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph,
 
 			rcu_read_lock();
 			for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
-			     rth = rcu_dereference(rth->u.dst.rt_next)) {
+			     rth = rcu_dereference(rth->dst.rt_next)) {
 				unsigned short mtu = new_mtu;
 
 				if (rth->fl.fl4_dst != daddr ||
@@ -1653,8 +1653,8 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph,
 				    rth->rt_src != iph->saddr ||
 				    rth->fl.oif != ikeys[k] ||
 				    rth->fl.iif != 0 ||
-				    dst_metric_locked(&rth->u.dst, RTAX_MTU) ||
-				    !net_eq(dev_net(rth->u.dst.dev), net) ||
+				    dst_metric_locked(&rth->dst, RTAX_MTU) ||
+				    !net_eq(dev_net(rth->dst.dev), net) ||
 				    rt_is_expired(rth))
 					continue;
 
@@ -1662,22 +1662,22 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph,
 
 					/* BSD 4.2 compatibility hack :-( */
 					if (mtu == 0 &&
-					    old_mtu >= dst_mtu(&rth->u.dst) &&
+					    old_mtu >= dst_mtu(&rth->dst) &&
 					    old_mtu >= 68 + (iph->ihl << 2))
 						old_mtu -= iph->ihl << 2;
 
 					mtu = guess_mtu(old_mtu);
 				}
-				if (mtu <= dst_mtu(&rth->u.dst)) {
-					if (mtu < dst_mtu(&rth->u.dst)) {
-						dst_confirm(&rth->u.dst);
+				if (mtu <= dst_mtu(&rth->dst)) {
+					if (mtu < dst_mtu(&rth->dst)) {
+						dst_confirm(&rth->dst);
 						if (mtu < ip_rt_min_pmtu) {
 							mtu = ip_rt_min_pmtu;
-							rth->u.dst.metrics[RTAX_LOCK-1] |=
+							rth->dst.metrics[RTAX_LOCK-1] |=
 								(1 << RTAX_MTU);
 						}
-						rth->u.dst.metrics[RTAX_MTU-1] = mtu;
-						dst_set_expires(&rth->u.dst,
+						rth->dst.metrics[RTAX_MTU-1] = mtu;
+						dst_set_expires(&rth->dst,
 							ip_rt_mtu_expires);
 					}
 					est_mtu = mtu;
@@ -1750,7 +1750,7 @@ static void ipv4_link_failure(struct sk_buff *skb)
 
 	rt = skb_rtable(skb);
 	if (rt)
-		dst_set_expires(&rt->u.dst, 0);
+		dst_set_expires(&rt->dst, 0);
 }
 
 static int ip_rt_bug(struct sk_buff *skb)
@@ -1778,11 +1778,11 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt)
 
 	if (rt->fl.iif == 0)
 		src = rt->rt_src;
-	else if (fib_lookup(dev_net(rt->u.dst.dev), &rt->fl, &res) == 0) {
+	else if (fib_lookup(dev_net(rt->dst.dev), &rt->fl, &res) == 0) {
 		src = FIB_RES_PREFSRC(res);
 		fib_res_put(&res);
 	} else
-		src = inet_select_addr(rt->u.dst.dev, rt->rt_gateway,
+		src = inet_select_addr(rt->dst.dev, rt->rt_gateway,
 					RT_SCOPE_UNIVERSE);
 	memcpy(addr, &src, 4);
 }
@@ -1790,10 +1790,10 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt)
 #ifdef CONFIG_NET_CLS_ROUTE
 static void set_class_tag(struct rtable *rt, u32 tag)
 {
-	if (!(rt->u.dst.tclassid & 0xFFFF))
-		rt->u.dst.tclassid |= tag & 0xFFFF;
-	if (!(rt->u.dst.tclassid & 0xFFFF0000))
-		rt->u.dst.tclassid |= tag & 0xFFFF0000;
+	if (!(rt->dst.tclassid & 0xFFFF))
+		rt->dst.tclassid |= tag & 0xFFFF;
+	if (!(rt->dst.tclassid & 0xFFFF0000))
+		rt->dst.tclassid |= tag & 0xFFFF0000;
 }
 #endif
 
@@ -1805,30 +1805,30 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag)
 		if (FIB_RES_GW(*res) &&
 		    FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
 			rt->rt_gateway = FIB_RES_GW(*res);
-		memcpy(rt->u.dst.metrics, fi->fib_metrics,
-		       sizeof(rt->u.dst.metrics));
+		memcpy(rt->dst.metrics, fi->fib_metrics,
+		       sizeof(rt->dst.metrics));
 		if (fi->fib_mtu == 0) {
-			rt->u.dst.metrics[RTAX_MTU-1] = rt->u.dst.dev->mtu;
-			if (dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
+			rt->dst.metrics[RTAX_MTU-1] = rt->dst.dev->mtu;
+			if (dst_metric_locked(&rt->dst, RTAX_MTU) &&
 			    rt->rt_gateway != rt->rt_dst &&
-			    rt->u.dst.dev->mtu > 576)
-				rt->u.dst.metrics[RTAX_MTU-1] = 576;
+			    rt->dst.dev->mtu > 576)
+				rt->dst.metrics[RTAX_MTU-1] = 576;
 		}
 #ifdef CONFIG_NET_CLS_ROUTE
-		rt->u.dst.tclassid = FIB_RES_NH(*res).nh_tclassid;
+		rt->dst.tclassid = FIB_RES_NH(*res).nh_tclassid;
 #endif
 	} else
-		rt->u.dst.metrics[RTAX_MTU-1]= rt->u.dst.dev->mtu;
-
-	if (dst_metric(&rt->u.dst, RTAX_HOPLIMIT) == 0)
-		rt->u.dst.metrics[RTAX_HOPLIMIT-1] = sysctl_ip_default_ttl;
-	if (dst_mtu(&rt->u.dst) > IP_MAX_MTU)
-		rt->u.dst.metrics[RTAX_MTU-1] = IP_MAX_MTU;
-	if (dst_metric(&rt->u.dst, RTAX_ADVMSS) == 0)
-		rt->u.dst.metrics[RTAX_ADVMSS-1] = max_t(unsigned int, rt->u.dst.dev->mtu - 40,
+		rt->dst.metrics[RTAX_MTU-1]= rt->dst.dev->mtu;
+
+	if (dst_metric(&rt->dst, RTAX_HOPLIMIT) == 0)
+		rt->dst.metrics[RTAX_HOPLIMIT-1] = sysctl_ip_default_ttl;
+	if (dst_mtu(&rt->dst) > IP_MAX_MTU)
+		rt->dst.metrics[RTAX_MTU-1] = IP_MAX_MTU;
+	if (dst_metric(&rt->dst, RTAX_ADVMSS) == 0)
+		rt->dst.metrics[RTAX_ADVMSS-1] = max_t(unsigned int, rt->dst.dev->mtu - 40,
 				       ip_rt_min_advmss);
-	if (dst_metric(&rt->u.dst, RTAX_ADVMSS) > 65535 - 40)
-		rt->u.dst.metrics[RTAX_ADVMSS-1] = 65535 - 40;
+	if (dst_metric(&rt->dst, RTAX_ADVMSS) > 65535 - 40)
+		rt->dst.metrics[RTAX_ADVMSS-1] = 65535 - 40;
 
 #ifdef CONFIG_NET_CLS_ROUTE
 #ifdef CONFIG_IP_MULTIPLE_TABLES
@@ -1873,13 +1873,13 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 	if (!rth)
 		goto e_nobufs;
 
-	rth->u.dst.output = ip_rt_bug;
-	rth->u.dst.obsolete = -1;
+	rth->dst.output = ip_rt_bug;
+	rth->dst.obsolete = -1;
 
-	atomic_set(&rth->u.dst.__refcnt, 1);
-	rth->u.dst.flags= DST_HOST;
+	atomic_set(&rth->dst.__refcnt, 1);
+	rth->dst.flags= DST_HOST;
 	if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
-		rth->u.dst.flags |= DST_NOPOLICY;
+		rth->dst.flags |= DST_NOPOLICY;
 	rth->fl.fl4_dst	= daddr;
 	rth->rt_dst	= daddr;
 	rth->fl.fl4_tos	= tos;
@@ -1887,13 +1887,13 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 	rth->fl.fl4_src	= saddr;
 	rth->rt_src	= saddr;
 #ifdef CONFIG_NET_CLS_ROUTE
-	rth->u.dst.tclassid = itag;
+	rth->dst.tclassid = itag;
 #endif
 	rth->rt_iif	=
 	rth->fl.iif	= dev->ifindex;
-	rth->u.dst.dev	= init_net.loopback_dev;
-	dev_hold(rth->u.dst.dev);
-	rth->idev	= in_dev_get(rth->u.dst.dev);
+	rth->dst.dev	= init_net.loopback_dev;
+	dev_hold(rth->dst.dev);
+	rth->idev	= in_dev_get(rth->dst.dev);
 	rth->fl.oif	= 0;
 	rth->rt_gateway	= daddr;
 	rth->rt_spec_dst= spec_dst;
@@ -1901,13 +1901,13 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 	rth->rt_flags	= RTCF_MULTICAST;
 	rth->rt_type	= RTN_MULTICAST;
 	if (our) {
-		rth->u.dst.input= ip_local_deliver;
+		rth->dst.input= ip_local_deliver;
 		rth->rt_flags |= RTCF_LOCAL;
 	}
 
 #ifdef CONFIG_IP_MROUTE
 	if (!ipv4_is_local_multicast(daddr) && IN_DEV_MFORWARD(in_dev))
-		rth->u.dst.input = ip_mr_input;
+		rth->dst.input = ip_mr_input;
 #endif
 	RT_CACHE_STAT_INC(in_slow_mc);
 
@@ -2016,12 +2016,12 @@ static int __mkroute_input(struct sk_buff *skb,
 		goto cleanup;
 	}
 
-	atomic_set(&rth->u.dst.__refcnt, 1);
-	rth->u.dst.flags= DST_HOST;
+	atomic_set(&rth->dst.__refcnt, 1);
+	rth->dst.flags= DST_HOST;
 	if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
-		rth->u.dst.flags |= DST_NOPOLICY;
+		rth->dst.flags |= DST_NOPOLICY;
 	if (IN_DEV_CONF_GET(out_dev, NOXFRM))
-		rth->u.dst.flags |= DST_NOXFRM;
+		rth->dst.flags |= DST_NOXFRM;
 	rth->fl.fl4_dst	= daddr;
 	rth->rt_dst	= daddr;
 	rth->fl.fl4_tos	= tos;
@@ -2031,16 +2031,16 @@ static int __mkroute_input(struct sk_buff *skb,
 	rth->rt_gateway	= daddr;
 	rth->rt_iif 	=
 		rth->fl.iif	= in_dev->dev->ifindex;
-	rth->u.dst.dev	= (out_dev)->dev;
-	dev_hold(rth->u.dst.dev);
-	rth->idev	= in_dev_get(rth->u.dst.dev);
+	rth->dst.dev	= (out_dev)->dev;
+	dev_hold(rth->dst.dev);
+	rth->idev	= in_dev_get(rth->dst.dev);
 	rth->fl.oif 	= 0;
 	rth->rt_spec_dst= spec_dst;
 
-	rth->u.dst.obsolete = -1;
-	rth->u.dst.input = ip_forward;
-	rth->u.dst.output = ip_output;
-	rth->rt_genid = rt_genid(dev_net(rth->u.dst.dev));
+	rth->dst.obsolete = -1;
+	rth->dst.input = ip_forward;
+	rth->dst.output = ip_output;
+	rth->rt_genid = rt_genid(dev_net(rth->dst.dev));
 
 	rt_set_nexthop(rth, res, itag);
 
@@ -2074,7 +2074,7 @@ static int ip_mkroute_input(struct sk_buff *skb,
 
 	/* put it into the cache */
 	hash = rt_hash(daddr, saddr, fl->iif,
-		       rt_genid(dev_net(rth->u.dst.dev)));
+		       rt_genid(dev_net(rth->dst.dev)));
 	return rt_intern_hash(hash, rth, NULL, skb, fl->iif);
 }
 
@@ -2197,14 +2197,14 @@ local_input:
 	if (!rth)
 		goto e_nobufs;
 
-	rth->u.dst.output= ip_rt_bug;
-	rth->u.dst.obsolete = -1;
+	rth->dst.output= ip_rt_bug;
+	rth->dst.obsolete = -1;
 	rth->rt_genid = rt_genid(net);
 
-	atomic_set(&rth->u.dst.__refcnt, 1);
-	rth->u.dst.flags= DST_HOST;
+	atomic_set(&rth->dst.__refcnt, 1);
+	rth->dst.flags= DST_HOST;
 	if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
-		rth->u.dst.flags |= DST_NOPOLICY;
+		rth->dst.flags |= DST_NOPOLICY;
 	rth->fl.fl4_dst	= daddr;
 	rth->rt_dst	= daddr;
 	rth->fl.fl4_tos	= tos;
@@ -2212,20 +2212,20 @@ local_input:
 	rth->fl.fl4_src	= saddr;
 	rth->rt_src	= saddr;
 #ifdef CONFIG_NET_CLS_ROUTE
-	rth->u.dst.tclassid = itag;
+	rth->dst.tclassid = itag;
 #endif
 	rth->rt_iif	=
 	rth->fl.iif	= dev->ifindex;
-	rth->u.dst.dev	= net->loopback_dev;
-	dev_hold(rth->u.dst.dev);
-	rth->idev	= in_dev_get(rth->u.dst.dev);
+	rth->dst.dev	= net->loopback_dev;
+	dev_hold(rth->dst.dev);
+	rth->idev	= in_dev_get(rth->dst.dev);
 	rth->rt_gateway	= daddr;
 	rth->rt_spec_dst= spec_dst;
-	rth->u.dst.input= ip_local_deliver;
+	rth->dst.input= ip_local_deliver;
 	rth->rt_flags 	= flags|RTCF_LOCAL;
 	if (res.type == RTN_UNREACHABLE) {
-		rth->u.dst.input= ip_error;
-		rth->u.dst.error= -err;
+		rth->dst.input= ip_error;
+		rth->dst.error= -err;
 		rth->rt_flags 	&= ~RTCF_LOCAL;
 	}
 	rth->rt_type	= res.type;
@@ -2291,21 +2291,21 @@ int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 	hash = rt_hash(daddr, saddr, iif, rt_genid(net));
 
 	for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
-	     rth = rcu_dereference(rth->u.dst.rt_next)) {
+	     rth = rcu_dereference(rth->dst.rt_next)) {
 		if ((((__force u32)rth->fl.fl4_dst ^ (__force u32)daddr) |
 		     ((__force u32)rth->fl.fl4_src ^ (__force u32)saddr) |
 		     (rth->fl.iif ^ iif) |
 		     rth->fl.oif |
 		     (rth->fl.fl4_tos ^ tos)) == 0 &&
 		    rth->fl.mark == skb->mark &&
-		    net_eq(dev_net(rth->u.dst.dev), net) &&
+		    net_eq(dev_net(rth->dst.dev), net) &&
 		    !rt_is_expired(rth)) {
 			if (noref) {
-				dst_use_noref(&rth->u.dst, jiffies);
-				skb_dst_set_noref(skb, &rth->u.dst);
+				dst_use_noref(&rth->dst, jiffies);
+				skb_dst_set_noref(skb, &rth->dst);
 			} else {
-				dst_use(&rth->u.dst, jiffies);
-				skb_dst_set(skb, &rth->u.dst);
+				dst_use(&rth->dst, jiffies);
+				skb_dst_set(skb, &rth->dst);
 			}
 			RT_CACHE_STAT_INC(in_hit);
 			rcu_read_unlock();
@@ -2412,12 +2412,12 @@ static int __mkroute_output(struct rtable **result,
 		goto cleanup;
 	}
 
-	atomic_set(&rth->u.dst.__refcnt, 1);
-	rth->u.dst.flags= DST_HOST;
+	atomic_set(&rth->dst.__refcnt, 1);
+	rth->dst.flags= DST_HOST;
 	if (IN_DEV_CONF_GET(in_dev, NOXFRM))
-		rth->u.dst.flags |= DST_NOXFRM;
+		rth->dst.flags |= DST_NOXFRM;
 	if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
-		rth->u.dst.flags |= DST_NOPOLICY;
+		rth->dst.flags |= DST_NOPOLICY;
 
 	rth->fl.fl4_dst	= oldflp->fl4_dst;
 	rth->fl.fl4_tos	= tos;
@@ -2429,35 +2429,35 @@ static int __mkroute_output(struct rtable **result,
 	rth->rt_iif	= oldflp->oif ? : dev_out->ifindex;
 	/* get references to the devices that are to be hold by the routing
 	   cache entry */
-	rth->u.dst.dev	= dev_out;
+	rth->dst.dev	= dev_out;
 	dev_hold(dev_out);
 	rth->idev	= in_dev_get(dev_out);
 	rth->rt_gateway = fl->fl4_dst;
 	rth->rt_spec_dst= fl->fl4_src;
 
-	rth->u.dst.output=ip_output;
-	rth->u.dst.obsolete = -1;
+	rth->dst.output=ip_output;
+	rth->dst.obsolete = -1;
 	rth->rt_genid = rt_genid(dev_net(dev_out));
 
 	RT_CACHE_STAT_INC(out_slow_tot);
 
 	if (flags & RTCF_LOCAL) {
-		rth->u.dst.input = ip_local_deliver;
+		rth->dst.input = ip_local_deliver;
 		rth->rt_spec_dst = fl->fl4_dst;
 	}
 	if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) {
 		rth->rt_spec_dst = fl->fl4_src;
 		if (flags & RTCF_LOCAL &&
 		    !(dev_out->flags & IFF_LOOPBACK)) {
-			rth->u.dst.output = ip_mc_output;
+			rth->dst.output = ip_mc_output;
 			RT_CACHE_STAT_INC(out_slow_mc);
 		}
 #ifdef CONFIG_IP_MROUTE
 		if (res->type == RTN_MULTICAST) {
 			if (IN_DEV_MFORWARD(in_dev) &&
 			    !ipv4_is_local_multicast(oldflp->fl4_dst)) {
-				rth->u.dst.input = ip_mr_input;
-				rth->u.dst.output = ip_mc_output;
+				rth->dst.input = ip_mr_input;
+				rth->dst.output = ip_mc_output;
 			}
 		}
 #endif
@@ -2712,7 +2712,7 @@ int __ip_route_output_key(struct net *net, struct rtable **rp,
 
 	rcu_read_lock_bh();
 	for (rth = rcu_dereference_bh(rt_hash_table[hash].chain); rth;
-		rth = rcu_dereference_bh(rth->u.dst.rt_next)) {
+		rth = rcu_dereference_bh(rth->dst.rt_next)) {
 		if (rth->fl.fl4_dst == flp->fl4_dst &&
 		    rth->fl.fl4_src == flp->fl4_src &&
 		    rth->fl.iif == 0 &&
@@ -2720,9 +2720,9 @@ int __ip_route_output_key(struct net *net, struct rtable **rp,
 		    rth->fl.mark == flp->mark &&
 		    !((rth->fl.fl4_tos ^ flp->fl4_tos) &
 			    (IPTOS_RT_MASK | RTO_ONLINK)) &&
-		    net_eq(dev_net(rth->u.dst.dev), net) &&
+		    net_eq(dev_net(rth->dst.dev), net) &&
 		    !rt_is_expired(rth)) {
-			dst_use(&rth->u.dst, jiffies);
+			dst_use(&rth->dst, jiffies);
 			RT_CACHE_STAT_INC(out_hit);
 			rcu_read_unlock_bh();
 			*rp = rth;
@@ -2759,15 +2759,15 @@ static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi
 		dst_alloc(&ipv4_dst_blackhole_ops);
 
 	if (rt) {
-		struct dst_entry *new = &rt->u.dst;
+		struct dst_entry *new = &rt->dst;
 
 		atomic_set(&new->__refcnt, 1);
 		new->__use = 1;
 		new->input = dst_discard;
 		new->output = dst_discard;
-		memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
+		memcpy(new->metrics, ort->dst.metrics, RTAX_MAX*sizeof(u32));
 
-		new->dev = ort->u.dst.dev;
+		new->dev = ort->dst.dev;
 		if (new->dev)
 			dev_hold(new->dev);
 
@@ -2791,7 +2791,7 @@ static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi
 		dst_free(new);
 	}
 
-	dst_release(&(*rp)->u.dst);
+	dst_release(&(*rp)->dst);
 	*rp = rt;
 	return (rt ? 0 : -ENOMEM);
 }
@@ -2861,11 +2861,11 @@ static int rt_fill_info(struct net *net,
 		r->rtm_src_len = 32;
 		NLA_PUT_BE32(skb, RTA_SRC, rt->fl.fl4_src);
 	}
-	if (rt->u.dst.dev)
-		NLA_PUT_U32(skb, RTA_OIF, rt->u.dst.dev->ifindex);
+	if (rt->dst.dev)
+		NLA_PUT_U32(skb, RTA_OIF, rt->dst.dev->ifindex);
 #ifdef CONFIG_NET_CLS_ROUTE
-	if (rt->u.dst.tclassid)
-		NLA_PUT_U32(skb, RTA_FLOW, rt->u.dst.tclassid);
+	if (rt->dst.tclassid)
+		NLA_PUT_U32(skb, RTA_FLOW, rt->dst.tclassid);
 #endif
 	if (rt->fl.iif)
 		NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_spec_dst);
@@ -2875,11 +2875,11 @@ static int rt_fill_info(struct net *net,
 	if (rt->rt_dst != rt->rt_gateway)
 		NLA_PUT_BE32(skb, RTA_GATEWAY, rt->rt_gateway);
 
-	if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
+	if (rtnetlink_put_metrics(skb, rt->dst.metrics) < 0)
 		goto nla_put_failure;
 
-	error = rt->u.dst.error;
-	expires = rt->u.dst.expires ? rt->u.dst.expires - jiffies : 0;
+	error = rt->dst.error;
+	expires = rt->dst.expires ? rt->dst.expires - jiffies : 0;
 	if (rt->peer) {
 		id = atomic_read(&rt->peer->ip_id_count) & 0xffff;
 		if (rt->peer->tcp_ts_stamp) {
@@ -2911,7 +2911,7 @@ static int rt_fill_info(struct net *net,
 			NLA_PUT_U32(skb, RTA_IIF, rt->fl.iif);
 	}
 
-	if (rtnl_put_cacheinfo(skb, &rt->u.dst, id, ts, tsage,
+	if (rtnl_put_cacheinfo(skb, &rt->dst, id, ts, tsage,
 			       expires, error) < 0)
 		goto nla_put_failure;
 
@@ -2976,8 +2976,8 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
 		local_bh_enable();
 
 		rt = skb_rtable(skb);
-		if (err == 0 && rt->u.dst.error)
-			err = -rt->u.dst.error;
+		if (err == 0 && rt->dst.error)
+			err = -rt->dst.error;
 	} else {
 		struct flowi fl = {
 			.nl_u = {
@@ -2995,7 +2995,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
 	if (err)
 		goto errout_free;
 
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set(skb, &rt->dst);
 	if (rtm->rtm_flags & RTM_F_NOTIFY)
 		rt->rt_flags |= RTCF_NOTIFY;
 
@@ -3031,12 +3031,12 @@ int ip_rt_dump(struct sk_buff *skb,  struct netlink_callback *cb)
 			continue;
 		rcu_read_lock_bh();
 		for (rt = rcu_dereference_bh(rt_hash_table[h].chain), idx = 0; rt;
-		     rt = rcu_dereference_bh(rt->u.dst.rt_next), idx++) {
-			if (!net_eq(dev_net(rt->u.dst.dev), net) || idx < s_idx)
+		     rt = rcu_dereference_bh(rt->dst.rt_next), idx++) {
+			if (!net_eq(dev_net(rt->dst.dev), net) || idx < s_idx)
 				continue;
 			if (rt_is_expired(rt))
 				continue;
-			skb_dst_set_noref(skb, &rt->u.dst);
+			skb_dst_set_noref(skb, &rt->dst);
 			if (rt_fill_info(net, skb, NETLINK_CB(cb->skb).pid,
 					 cb->nlh->nlmsg_seq, RTM_NEWROUTE,
 					 1, NLM_F_MULTI) <= 0) {
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 5c48124332d..02bef6aa8b3 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -354,15 +354,15 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 	}
 
 	/* Try to redo what tcp_v4_send_synack did. */
-	req->window_clamp = tp->window_clamp ? :dst_metric(&rt->u.dst, RTAX_WINDOW);
+	req->window_clamp = tp->window_clamp ? :dst_metric(&rt->dst, RTAX_WINDOW);
 
 	tcp_select_initial_window(tcp_full_space(sk), req->mss,
 				  &req->rcv_wnd, &req->window_clamp,
 				  ireq->wscale_ok, &rcv_wscale,
-				  dst_metric(&rt->u.dst, RTAX_INITRWND));
+				  dst_metric(&rt->dst, RTAX_INITRWND));
 
 	ireq->rcv_wscale  = rcv_wscale;
 
-	ret = get_cookie_sock(sk, skb, req, &rt->u.dst);
+	ret = get_cookie_sock(sk, skb, req, &rt->dst);
 out:	return ret;
 }
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 7f976af27bf..7f9515c0379 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -237,7 +237,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 
 	/* OK, now commit destination to socket.  */
 	sk->sk_gso_type = SKB_GSO_TCPV4;
-	sk_setup_caps(sk, &rt->u.dst);
+	sk_setup_caps(sk, &rt->dst);
 
 	if (!tp->write_seq)
 		tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index eec4ff456e3..32e0bef60d0 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -914,7 +914,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 		    !sock_flag(sk, SOCK_BROADCAST))
 			goto out;
 		if (connected)
-			sk_dst_set(sk, dst_clone(&rt->u.dst));
+			sk_dst_set(sk, dst_clone(&rt->dst));
 	}
 
 	if (msg->msg_flags&MSG_CONFIRM)
@@ -978,7 +978,7 @@ out:
 	return err;
 
 do_confirm:
-	dst_confirm(&rt->u.dst);
+	dst_confirm(&rt->dst);
 	if (!(msg->msg_flags&MSG_PROBE) || len)
 		goto back_from_confirm;
 	err = 0;
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 1705476670e..349327092c9 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -37,7 +37,7 @@ static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos,
 		fl.fl4_src = saddr->a4;
 
 	err = __ip_route_output_key(net, &rt, &fl);
-	dst = &rt->u.dst;
+	dst = &rt->dst;
 	if (err)
 		dst = ERR_PTR(err);
 	return dst;
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index e1a698df570..b97bb1f3080 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -557,7 +557,7 @@ void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp)
 		pr_warning("Freeing alive inet6 address %p\n", ifp);
 		return;
 	}
-	dst_release(&ifp->rt->u.dst);
+	dst_release(&ifp->rt->dst);
 
 	call_rcu(&ifp->rcu, inet6_ifa_finish_destroy_rcu);
 }
@@ -823,7 +823,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
 				rt->rt6i_flags |= RTF_EXPIRES;
 			}
 		}
-		dst_release(&rt->u.dst);
+		dst_release(&rt->dst);
 	}
 
 out:
@@ -1863,7 +1863,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len)
 					      dev, expires, flags);
 		}
 		if (rt)
-			dst_release(&rt->u.dst);
+			dst_release(&rt->dst);
 	}
 
 	/* Try to figure out our local address for this prefix */
@@ -4093,11 +4093,11 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
 		if (ifp->idev->cnf.forwarding)
 			addrconf_leave_anycast(ifp);
 		addrconf_leave_solict(ifp->idev, &ifp->addr);
-		dst_hold(&ifp->rt->u.dst);
+		dst_hold(&ifp->rt->dst);
 
 		if (ifp->state == INET6_IFADDR_STATE_DEAD &&
 		    ip6_del_rt(ifp->rt))
-			dst_free(&ifp->rt->u.dst);
+			dst_free(&ifp->rt->dst);
 		break;
 	}
 }
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index f058fbd808c..0e5e943446f 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -84,7 +84,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, struct in6_addr *addr)
 		rt = rt6_lookup(net, addr, NULL, 0, 0);
 		if (rt) {
 			dev = rt->rt6i_dev;
-			dst_release(&rt->u.dst);
+			dst_release(&rt->dst);
 		} else if (ishost) {
 			err = -EADDRNOTAVAIL;
 			goto error;
@@ -244,7 +244,7 @@ static void aca_put(struct ifacaddr6 *ac)
 {
 	if (atomic_dec_and_test(&ac->aca_refcnt)) {
 		in6_dev_put(ac->aca_idev);
-		dst_release(&ac->aca_rt->u.dst);
+		dst_release(&ac->aca_rt->dst);
 		kfree(ac);
 	}
 }
@@ -350,7 +350,7 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, struct in6_addr *addr)
 	write_unlock_bh(&idev->lock);
 	addrconf_leave_solict(idev, &aca->aca_addr);
 
-	dst_hold(&aca->aca_rt->u.dst);
+	dst_hold(&aca->aca_rt->dst);
 	ip6_del_rt(aca->aca_rt);
 
 	aca_put(aca);
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 8e44f8f9c18..b1108ede18e 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -43,8 +43,8 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi *fl,
 	if (arg.result)
 		return arg.result;
 
-	dst_hold(&net->ipv6.ip6_null_entry->u.dst);
-	return &net->ipv6.ip6_null_entry->u.dst;
+	dst_hold(&net->ipv6.ip6_null_entry->dst);
+	return &net->ipv6.ip6_null_entry->dst;
 }
 
 static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
@@ -86,7 +86,7 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
 			struct in6_addr saddr;
 
 			if (ipv6_dev_get_saddr(net,
-					       ip6_dst_idev(&rt->u.dst)->dev,
+					       ip6_dst_idev(&rt->dst)->dev,
 					       &flp->fl6_dst,
 					       rt6_flags2srcprefs(flags),
 					       &saddr))
@@ -99,12 +99,12 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
 		goto out;
 	}
 again:
-	dst_release(&rt->u.dst);
+	dst_release(&rt->dst);
 	rt = NULL;
 	goto out;
 
 discard_pkt:
-	dst_hold(&rt->u.dst);
+	dst_hold(&rt->dst);
 out:
 	arg->result = rt;
 	return rt == NULL ? -EAGAIN : 0;
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 92a122b7795..b6a585909d3 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -165,7 +165,7 @@ static __inline__ void node_free(struct fib6_node * fn)
 static __inline__ void rt6_release(struct rt6_info *rt)
 {
 	if (atomic_dec_and_test(&rt->rt6i_ref))
-		dst_free(&rt->u.dst);
+		dst_free(&rt->dst);
 }
 
 static void fib6_link_table(struct net *net, struct fib6_table *tb)
@@ -278,7 +278,7 @@ static int fib6_dump_node(struct fib6_walker_t *w)
 	int res;
 	struct rt6_info *rt;
 
-	for (rt = w->leaf; rt; rt = rt->u.dst.rt6_next) {
+	for (rt = w->leaf; rt; rt = rt->dst.rt6_next) {
 		res = rt6_dump_route(rt, w->args);
 		if (res < 0) {
 			/* Frame is full, suspend walking */
@@ -619,7 +619,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
 
 	ins = &fn->leaf;
 
-	for (iter = fn->leaf; iter; iter=iter->u.dst.rt6_next) {
+	for (iter = fn->leaf; iter; iter=iter->dst.rt6_next) {
 		/*
 		 *	Search for duplicates
 		 */
@@ -647,7 +647,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
 		if (iter->rt6i_metric > rt->rt6i_metric)
 			break;
 
-		ins = &iter->u.dst.rt6_next;
+		ins = &iter->dst.rt6_next;
 	}
 
 	/* Reset round-robin state, if necessary */
@@ -658,7 +658,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
 	 *	insert node
 	 */
 
-	rt->u.dst.rt6_next = iter;
+	rt->dst.rt6_next = iter;
 	*ins = rt;
 	rt->rt6i_node = fn;
 	atomic_inc(&rt->rt6i_ref);
@@ -799,7 +799,7 @@ out:
 			atomic_inc(&pn->leaf->rt6i_ref);
 		}
 #endif
-		dst_free(&rt->u.dst);
+		dst_free(&rt->dst);
 	}
 	return err;
 
@@ -810,7 +810,7 @@ out:
 st_failure:
 	if (fn && !(fn->fn_flags & (RTN_RTINFO|RTN_ROOT)))
 		fib6_repair_tree(info->nl_net, fn);
-	dst_free(&rt->u.dst);
+	dst_free(&rt->dst);
 	return err;
 #endif
 }
@@ -1108,7 +1108,7 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
 	RT6_TRACE("fib6_del_route\n");
 
 	/* Unlink it */
-	*rtp = rt->u.dst.rt6_next;
+	*rtp = rt->dst.rt6_next;
 	rt->rt6i_node = NULL;
 	net->ipv6.rt6_stats->fib_rt_entries--;
 	net->ipv6.rt6_stats->fib_discarded_routes++;
@@ -1122,14 +1122,14 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
 	FOR_WALKERS(w) {
 		if (w->state == FWS_C && w->leaf == rt) {
 			RT6_TRACE("walker %p adjusted by delroute\n", w);
-			w->leaf = rt->u.dst.rt6_next;
+			w->leaf = rt->dst.rt6_next;
 			if (w->leaf == NULL)
 				w->state = FWS_U;
 		}
 	}
 	read_unlock(&fib6_walker_lock);
 
-	rt->u.dst.rt6_next = NULL;
+	rt->dst.rt6_next = NULL;
 
 	/* If it was last route, expunge its radix tree node */
 	if (fn->leaf == NULL) {
@@ -1168,7 +1168,7 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info)
 	struct rt6_info **rtp;
 
 #if RT6_DEBUG >= 2
-	if (rt->u.dst.obsolete>0) {
+	if (rt->dst.obsolete>0) {
 		WARN_ON(fn != NULL);
 		return -ENOENT;
 	}
@@ -1195,7 +1195,7 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info)
 	 *	Walk the leaf entries looking for ourself
 	 */
 
-	for (rtp = &fn->leaf; *rtp; rtp = &(*rtp)->u.dst.rt6_next) {
+	for (rtp = &fn->leaf; *rtp; rtp = &(*rtp)->dst.rt6_next) {
 		if (*rtp == rt) {
 			fib6_del_route(fn, rtp, info);
 			return 0;
@@ -1334,7 +1334,7 @@ static int fib6_clean_node(struct fib6_walker_t *w)
 		.nl_net = c->net,
 	};
 
-	for (rt = w->leaf; rt; rt = rt->u.dst.rt6_next) {
+	for (rt = w->leaf; rt; rt = rt->dst.rt6_next) {
 		res = c->func(rt, c->arg);
 		if (res < 0) {
 			w->leaf = rt;
@@ -1448,8 +1448,8 @@ static int fib6_age(struct rt6_info *rt, void *arg)
 		}
 		gc_args.more++;
 	} else if (rt->rt6i_flags & RTF_CACHE) {
-		if (atomic_read(&rt->u.dst.__refcnt) == 0 &&
-		    time_after_eq(now, rt->u.dst.lastuse + gc_args.timeout)) {
+		if (atomic_read(&rt->dst.__refcnt) == 0 &&
+		    time_after_eq(now, rt->dst.lastuse + gc_args.timeout)) {
 			RT6_TRACE("aging clone %p\n", rt);
 			return -1;
 		} else if ((rt->rt6i_flags & RTF_GATEWAY) &&
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 89425af0684..d40b330c0ee 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -698,7 +698,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 		ipv6_hdr(skb)->payload_len = htons(first_len -
 						   sizeof(struct ipv6hdr));
 
-		dst_hold(&rt->u.dst);
+		dst_hold(&rt->dst);
 
 		for (;;) {
 			/* Prepare header of the next frame,
@@ -726,7 +726,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 
 			err = output(skb);
 			if(!err)
-				IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst),
+				IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
 					      IPSTATS_MIB_FRAGCREATES);
 
 			if (err || !frag)
@@ -740,9 +740,9 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 		kfree(tmp_hdr);
 
 		if (err == 0) {
-			IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst),
+			IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
 				      IPSTATS_MIB_FRAGOKS);
-			dst_release(&rt->u.dst);
+			dst_release(&rt->dst);
 			return 0;
 		}
 
@@ -752,9 +752,9 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 			frag = skb;
 		}
 
-		IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst),
+		IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
 			      IPSTATS_MIB_FRAGFAILS);
-		dst_release(&rt->u.dst);
+		dst_release(&rt->dst);
 		return err;
 	}
 
@@ -785,7 +785,7 @@ slow_path:
 		 *	Allocate buffer.
 		 */
 
-		if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
+		if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->dst.dev), GFP_ATOMIC)) == NULL) {
 			NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
 			IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 				      IPSTATS_MIB_FRAGFAILS);
@@ -798,7 +798,7 @@ slow_path:
 		 */
 
 		ip6_copy_metadata(frag, skb);
-		skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev));
+		skb_reserve(frag, LL_RESERVED_SPACE(rt->dst.dev));
 		skb_put(frag, len + hlen + sizeof(struct frag_hdr));
 		skb_reset_network_header(frag);
 		fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
@@ -1156,24 +1156,24 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
 
 			/* need source address above miyazawa*/
 		}
-		dst_hold(&rt->u.dst);
-		inet->cork.dst = &rt->u.dst;
+		dst_hold(&rt->dst);
+		inet->cork.dst = &rt->dst;
 		inet->cork.fl = *fl;
 		np->cork.hop_limit = hlimit;
 		np->cork.tclass = tclass;
 		mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
-		      rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path);
+		      rt->dst.dev->mtu : dst_mtu(rt->dst.path);
 		if (np->frag_size < mtu) {
 			if (np->frag_size)
 				mtu = np->frag_size;
 		}
 		inet->cork.fragsize = mtu;
-		if (dst_allfrag(rt->u.dst.path))
+		if (dst_allfrag(rt->dst.path))
 			inet->cork.flags |= IPCORK_ALLFRAG;
 		inet->cork.length = 0;
 		sk->sk_sndmsg_page = NULL;
 		sk->sk_sndmsg_off = 0;
-		exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0) -
+		exthdrlen = rt->dst.header_len + (opt ? opt->opt_flen : 0) -
 			    rt->rt6i_nfheader_len;
 		length += exthdrlen;
 		transhdrlen += exthdrlen;
@@ -1186,7 +1186,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
 		mtu = inet->cork.fragsize;
 	}
 
-	hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
+	hh_len = LL_RESERVED_SPACE(rt->dst.dev);
 
 	fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
 			(opt ? opt->opt_nflen : 0);
@@ -1224,7 +1224,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
 		}
 
 		if (proto == IPPROTO_UDP &&
-		    (rt->u.dst.dev->features & NETIF_F_UFO)) {
+		    (rt->dst.dev->features & NETIF_F_UFO)) {
 
 			err = ip6_ufo_append_data(sk, getfrag, from, length,
 						  hh_len, fragheaderlen,
@@ -1270,7 +1270,7 @@ alloc_new_skb:
 
 			fraglen = datalen + fragheaderlen;
 			if ((flags & MSG_MORE) &&
-			    !(rt->u.dst.dev->features&NETIF_F_SG))
+			    !(rt->dst.dev->features&NETIF_F_SG))
 				alloclen = mtu;
 			else
 				alloclen = datalen + fragheaderlen;
@@ -1281,7 +1281,7 @@ alloc_new_skb:
 			 * because we have no idea if we're the last one.
 			 */
 			if (datalen == length + fraggap)
-				alloclen += rt->u.dst.trailer_len;
+				alloclen += rt->dst.trailer_len;
 
 			/*
 			 * We just reserve space for fragment header.
@@ -1358,7 +1358,7 @@ alloc_new_skb:
 		if (copy > length)
 			copy = length;
 
-		if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
+		if (!(rt->dst.dev->features&NETIF_F_SG)) {
 			unsigned int off;
 
 			off = skb->len;
@@ -1503,7 +1503,7 @@ int ip6_push_pending_frames(struct sock *sk)
 	skb->priority = sk->sk_priority;
 	skb->mark = sk->sk_mark;
 
-	skb_dst_set(skb, dst_clone(&rt->u.dst));
+	skb_dst_set(skb, dst_clone(&rt->dst));
 	IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
 	if (proto == IPPROTO_ICMPV6) {
 		struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 8f39893d808..0fd027f3f47 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -552,7 +552,7 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	if (ip_route_output_key(dev_net(skb->dev), &rt, &fl))
 		goto out;
 
-	skb2->dev = rt->u.dst.dev;
+	skb2->dev = rt->dst.dev;
 
 	/* route "incoming" packet */
 	if (rt->rt_flags & RTCF_LOCAL) {
@@ -562,7 +562,7 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 		fl.fl4_src = eiph->saddr;
 		fl.fl4_tos = eiph->tos;
 		if (ip_route_output_key(dev_net(skb->dev), &rt, &fl) ||
-		    rt->u.dst.dev->type != ARPHRD_TUNNEL) {
+		    rt->dst.dev->type != ARPHRD_TUNNEL) {
 			ip_rt_put(rt);
 			goto out;
 		}
@@ -626,7 +626,7 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 		icmpv6_send(skb2, rel_type, rel_code, rel_info);
 
 		if (rt)
-			dst_release(&rt->u.dst);
+			dst_release(&rt->dst);
 
 		kfree_skb(skb2);
 	}
@@ -1135,7 +1135,7 @@ static void ip6_tnl_link_config(struct ip6_tnl *t)
 			if (dev->mtu < IPV6_MIN_MTU)
 				dev->mtu = IPV6_MIN_MTU;
 		}
-		dst_release(&rt->u.dst);
+		dst_release(&rt->dst);
 	}
 }
 
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 3e36d1538b6..d1444b95ad7 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -158,7 +158,7 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
 		rt = rt6_lookup(net, addr, NULL, 0, 0);
 		if (rt) {
 			dev = rt->rt6i_dev;
-			dst_release(&rt->u.dst);
+			dst_release(&rt->dst);
 		}
 	} else
 		dev = dev_get_by_index_rcu(net, ifindex);
@@ -248,7 +248,7 @@ static struct inet6_dev *ip6_mc_find_dev_rcu(struct net *net,
 		if (rt) {
 			dev = rt->rt6i_dev;
 			dev_hold(dev);
-			dst_release(&rt->u.dst);
+			dst_release(&rt->dst);
 		}
 	} else
 		dev = dev_get_by_index_rcu(net, ifindex);
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 0abdc242ddb..1fc46fc60ef 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1229,7 +1229,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
 			ND_PRINTK0(KERN_ERR
 				   "ICMPv6 RA: %s() got default router without neighbour.\n",
 				   __func__);
-			dst_release(&rt->u.dst);
+			dst_release(&rt->dst);
 			in6_dev_put(in6_dev);
 			return;
 		}
@@ -1244,7 +1244,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
 	if (ra_msg->icmph.icmp6_hop_limit) {
 		in6_dev->cnf.hop_limit = ra_msg->icmph.icmp6_hop_limit;
 		if (rt)
-			rt->u.dst.metrics[RTAX_HOPLIMIT-1] = ra_msg->icmph.icmp6_hop_limit;
+			rt->dst.metrics[RTAX_HOPLIMIT-1] = ra_msg->icmph.icmp6_hop_limit;
 	}
 
 skip_defrtr:
@@ -1363,7 +1363,7 @@ skip_linkparms:
 			in6_dev->cnf.mtu6 = mtu;
 
 			if (rt)
-				rt->u.dst.metrics[RTAX_MTU-1] = mtu;
+				rt->dst.metrics[RTAX_MTU-1] = mtu;
 
 			rt6_mtu_change(skb->dev, mtu);
 		}
@@ -1384,7 +1384,7 @@ skip_linkparms:
 	}
 out:
 	if (rt)
-		dst_release(&rt->u.dst);
+		dst_release(&rt->dst);
 	else if (neigh)
 		neigh_release(neigh);
 	in6_dev_put(in6_dev);
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 968b9649072..e677937a07f 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -611,23 +611,23 @@ static int rawv6_send_hdrinc(struct sock *sk, void *from, int length,
 	int err;
 	struct rt6_info *rt = (struct rt6_info *)*dstp;
 
-	if (length > rt->u.dst.dev->mtu) {
-		ipv6_local_error(sk, EMSGSIZE, fl, rt->u.dst.dev->mtu);
+	if (length > rt->dst.dev->mtu) {
+		ipv6_local_error(sk, EMSGSIZE, fl, rt->dst.dev->mtu);
 		return -EMSGSIZE;
 	}
 	if (flags&MSG_PROBE)
 		goto out;
 
 	skb = sock_alloc_send_skb(sk,
-				  length + LL_ALLOCATED_SPACE(rt->u.dst.dev) + 15,
+				  length + LL_ALLOCATED_SPACE(rt->dst.dev) + 15,
 				  flags & MSG_DONTWAIT, &err);
 	if (skb == NULL)
 		goto error;
-	skb_reserve(skb, LL_RESERVED_SPACE(rt->u.dst.dev));
+	skb_reserve(skb, LL_RESERVED_SPACE(rt->dst.dev));
 
 	skb->priority = sk->sk_priority;
 	skb->mark = sk->sk_mark;
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set(skb, &rt->dst);
 	*dstp = NULL;
 
 	skb_put(skb, length);
@@ -643,7 +643,7 @@ static int rawv6_send_hdrinc(struct sock *sk, void *from, int length,
 
 	IP6_UPD_PO_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
 	err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
-		      rt->u.dst.dev, dst_output);
+		      rt->dst.dev, dst_output);
 	if (err > 0)
 		err = net_xmit_errno(err);
 	if (err)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 252d76199c4..f7702850d45 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -126,16 +126,14 @@ static struct dst_ops ip6_dst_blackhole_ops = {
 };
 
 static struct rt6_info ip6_null_entry_template = {
-	.u = {
-		.dst = {
-			.__refcnt	= ATOMIC_INIT(1),
-			.__use		= 1,
-			.obsolete	= -1,
-			.error		= -ENETUNREACH,
-			.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
-			.input		= ip6_pkt_discard,
-			.output		= ip6_pkt_discard_out,
-		}
+	.dst = {
+		.__refcnt	= ATOMIC_INIT(1),
+		.__use		= 1,
+		.obsolete	= -1,
+		.error		= -ENETUNREACH,
+		.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
+		.input		= ip6_pkt_discard,
+		.output		= ip6_pkt_discard_out,
 	},
 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
 	.rt6i_protocol  = RTPROT_KERNEL,
@@ -149,16 +147,14 @@ static int ip6_pkt_prohibit(struct sk_buff *skb);
 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
 
 static struct rt6_info ip6_prohibit_entry_template = {
-	.u = {
-		.dst = {
-			.__refcnt	= ATOMIC_INIT(1),
-			.__use		= 1,
-			.obsolete	= -1,
-			.error		= -EACCES,
-			.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
-			.input		= ip6_pkt_prohibit,
-			.output		= ip6_pkt_prohibit_out,
-		}
+	.dst = {
+		.__refcnt	= ATOMIC_INIT(1),
+		.__use		= 1,
+		.obsolete	= -1,
+		.error		= -EACCES,
+		.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
+		.input		= ip6_pkt_prohibit,
+		.output		= ip6_pkt_prohibit_out,
 	},
 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
 	.rt6i_protocol  = RTPROT_KERNEL,
@@ -167,16 +163,14 @@ static struct rt6_info ip6_prohibit_entry_template = {
 };
 
 static struct rt6_info ip6_blk_hole_entry_template = {
-	.u = {
-		.dst = {
-			.__refcnt	= ATOMIC_INIT(1),
-			.__use		= 1,
-			.obsolete	= -1,
-			.error		= -EINVAL,
-			.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
-			.input		= dst_discard,
-			.output		= dst_discard,
-		}
+	.dst = {
+		.__refcnt	= ATOMIC_INIT(1),
+		.__use		= 1,
+		.obsolete	= -1,
+		.error		= -EINVAL,
+		.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
+		.input		= dst_discard,
+		.output		= dst_discard,
 	},
 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
 	.rt6i_protocol  = RTPROT_KERNEL,
@@ -249,7 +243,7 @@ static inline struct rt6_info *rt6_device_match(struct net *net,
 	if (!oif && ipv6_addr_any(saddr))
 		goto out;
 
-	for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) {
+	for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
 		struct net_device *dev = sprt->rt6i_dev;
 
 		if (oif) {
@@ -407,10 +401,10 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
 
 	match = NULL;
 	for (rt = rr_head; rt && rt->rt6i_metric == metric;
-	     rt = rt->u.dst.rt6_next)
+	     rt = rt->dst.rt6_next)
 		match = find_match(rt, oif, strict, &mpri, match);
 	for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
-	     rt = rt->u.dst.rt6_next)
+	     rt = rt->dst.rt6_next)
 		match = find_match(rt, oif, strict, &mpri, match);
 
 	return match;
@@ -432,7 +426,7 @@ static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
 
 	if (!match &&
 	    (strict & RT6_LOOKUP_F_REACHABLE)) {
-		struct rt6_info *next = rt0->u.dst.rt6_next;
+		struct rt6_info *next = rt0->dst.rt6_next;
 
 		/* no entries matched; do round-robin */
 		if (!next || next->rt6i_metric != rt0->rt6i_metric)
@@ -517,7 +511,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
 			rt->rt6i_expires = jiffies + HZ * lifetime;
 			rt->rt6i_flags |= RTF_EXPIRES;
 		}
-		dst_release(&rt->u.dst);
+		dst_release(&rt->dst);
 	}
 	return 0;
 }
@@ -555,7 +549,7 @@ restart:
 	rt = rt6_device_match(net, rt, &fl->fl6_src, fl->oif, flags);
 	BACKTRACK(net, &fl->fl6_src);
 out:
-	dst_use(&rt->u.dst, jiffies);
+	dst_use(&rt->dst, jiffies);
 	read_unlock_bh(&table->tb6_lock);
 	return rt;
 
@@ -643,7 +637,7 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *dad
 		ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
 		rt->rt6i_dst.plen = 128;
 		rt->rt6i_flags |= RTF_CACHE;
-		rt->u.dst.flags |= DST_HOST;
+		rt->dst.flags |= DST_HOST;
 
 #ifdef CONFIG_IPV6_SUBTREES
 		if (rt->rt6i_src.plen && saddr) {
@@ -677,7 +671,7 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *dad
 			if (net_ratelimit())
 				printk(KERN_WARNING
 				       "Neighbour table overflow.\n");
-			dst_free(&rt->u.dst);
+			dst_free(&rt->dst);
 			return NULL;
 		}
 		rt->rt6i_nexthop = neigh;
@@ -694,7 +688,7 @@ static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *d
 		ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
 		rt->rt6i_dst.plen = 128;
 		rt->rt6i_flags |= RTF_CACHE;
-		rt->u.dst.flags |= DST_HOST;
+		rt->dst.flags |= DST_HOST;
 		rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
 	}
 	return rt;
@@ -726,7 +720,7 @@ restart:
 	    rt->rt6i_flags & RTF_CACHE)
 		goto out;
 
-	dst_hold(&rt->u.dst);
+	dst_hold(&rt->dst);
 	read_unlock_bh(&table->tb6_lock);
 
 	if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
@@ -739,10 +733,10 @@ restart:
 #endif
 	}
 
-	dst_release(&rt->u.dst);
+	dst_release(&rt->dst);
 	rt = nrt ? : net->ipv6.ip6_null_entry;
 
-	dst_hold(&rt->u.dst);
+	dst_hold(&rt->dst);
 	if (nrt) {
 		err = ip6_ins_rt(nrt);
 		if (!err)
@@ -756,7 +750,7 @@ restart:
 	 * Race condition! In the gap, when table->tb6_lock was
 	 * released someone could insert this route.  Relookup.
 	 */
-	dst_release(&rt->u.dst);
+	dst_release(&rt->dst);
 	goto relookup;
 
 out:
@@ -764,11 +758,11 @@ out:
 		reachable = 0;
 		goto restart_2;
 	}
-	dst_hold(&rt->u.dst);
+	dst_hold(&rt->dst);
 	read_unlock_bh(&table->tb6_lock);
 out2:
-	rt->u.dst.lastuse = jiffies;
-	rt->u.dst.__use++;
+	rt->dst.lastuse = jiffies;
+	rt->dst.__use++;
 
 	return rt;
 }
@@ -835,15 +829,15 @@ int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl
 	struct dst_entry *new = NULL;
 
 	if (rt) {
-		new = &rt->u.dst;
+		new = &rt->dst;
 
 		atomic_set(&new->__refcnt, 1);
 		new->__use = 1;
 		new->input = dst_discard;
 		new->output = dst_discard;
 
-		memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
-		new->dev = ort->u.dst.dev;
+		memcpy(new->metrics, ort->dst.metrics, RTAX_MAX*sizeof(u32));
+		new->dev = ort->dst.dev;
 		if (new->dev)
 			dev_hold(new->dev);
 		rt->rt6i_idev = ort->rt6i_idev;
@@ -912,7 +906,7 @@ static void ip6_link_failure(struct sk_buff *skb)
 	rt = (struct rt6_info *) skb_dst(skb);
 	if (rt) {
 		if (rt->rt6i_flags&RTF_CACHE) {
-			dst_set_expires(&rt->u.dst, 0);
+			dst_set_expires(&rt->dst, 0);
 			rt->rt6i_flags |= RTF_EXPIRES;
 		} else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
 			rt->rt6i_node->fn_sernum = -1;
@@ -986,14 +980,14 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
 	rt->rt6i_dev	  = dev;
 	rt->rt6i_idev     = idev;
 	rt->rt6i_nexthop  = neigh;
-	atomic_set(&rt->u.dst.__refcnt, 1);
-	rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
-	rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
-	rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
-	rt->u.dst.output  = ip6_output;
+	atomic_set(&rt->dst.__refcnt, 1);
+	rt->dst.metrics[RTAX_HOPLIMIT-1] = 255;
+	rt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
+	rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->dst));
+	rt->dst.output  = ip6_output;
 
 #if 0	/* there's no chance to use these for ndisc */
-	rt->u.dst.flags   = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
+	rt->dst.flags   = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
 				? DST_HOST
 				: 0;
 	ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
@@ -1001,14 +995,14 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
 #endif
 
 	spin_lock_bh(&icmp6_dst_lock);
-	rt->u.dst.next = icmp6_dst_gc_list;
-	icmp6_dst_gc_list = &rt->u.dst;
+	rt->dst.next = icmp6_dst_gc_list;
+	icmp6_dst_gc_list = &rt->dst;
 	spin_unlock_bh(&icmp6_dst_lock);
 
 	fib6_force_start_gc(net);
 
 out:
-	return &rt->u.dst;
+	return &rt->dst;
 }
 
 int icmp6_dst_gc(void)
@@ -1159,7 +1153,7 @@ int ip6_route_add(struct fib6_config *cfg)
 		goto out;
 	}
 
-	rt->u.dst.obsolete = -1;
+	rt->dst.obsolete = -1;
 	rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
 				jiffies + clock_t_to_jiffies(cfg->fc_expires) :
 				0;
@@ -1171,16 +1165,16 @@ int ip6_route_add(struct fib6_config *cfg)
 	addr_type = ipv6_addr_type(&cfg->fc_dst);
 
 	if (addr_type & IPV6_ADDR_MULTICAST)
-		rt->u.dst.input = ip6_mc_input;
+		rt->dst.input = ip6_mc_input;
 	else
-		rt->u.dst.input = ip6_forward;
+		rt->dst.input = ip6_forward;
 
-	rt->u.dst.output = ip6_output;
+	rt->dst.output = ip6_output;
 
 	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
 	rt->rt6i_dst.plen = cfg->fc_dst_len;
 	if (rt->rt6i_dst.plen == 128)
-	       rt->u.dst.flags = DST_HOST;
+	       rt->dst.flags = DST_HOST;
 
 #ifdef CONFIG_IPV6_SUBTREES
 	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
@@ -1208,9 +1202,9 @@ int ip6_route_add(struct fib6_config *cfg)
 				goto out;
 			}
 		}
-		rt->u.dst.output = ip6_pkt_discard_out;
-		rt->u.dst.input = ip6_pkt_discard;
-		rt->u.dst.error = -ENETUNREACH;
+		rt->dst.output = ip6_pkt_discard_out;
+		rt->dst.input = ip6_pkt_discard;
+		rt->dst.error = -ENETUNREACH;
 		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
 		goto install_route;
 	}
@@ -1244,7 +1238,7 @@ int ip6_route_add(struct fib6_config *cfg)
 				goto out;
 			if (dev) {
 				if (dev != grt->rt6i_dev) {
-					dst_release(&grt->u.dst);
+					dst_release(&grt->dst);
 					goto out;
 				}
 			} else {
@@ -1255,7 +1249,7 @@ int ip6_route_add(struct fib6_config *cfg)
 			}
 			if (!(grt->rt6i_flags&RTF_GATEWAY))
 				err = 0;
-			dst_release(&grt->u.dst);
+			dst_release(&grt->dst);
 
 			if (err)
 				goto out;
@@ -1294,18 +1288,18 @@ install_route:
 					goto out;
 				}
 
-				rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
+				rt->dst.metrics[type - 1] = nla_get_u32(nla);
 			}
 		}
 	}
 
-	if (dst_metric(&rt->u.dst, RTAX_HOPLIMIT) == 0)
-		rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
-	if (!dst_mtu(&rt->u.dst))
-		rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
-	if (!dst_metric(&rt->u.dst, RTAX_ADVMSS))
-		rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
-	rt->u.dst.dev = dev;
+	if (dst_metric(&rt->dst, RTAX_HOPLIMIT) == 0)
+		rt->dst.metrics[RTAX_HOPLIMIT-1] = -1;
+	if (!dst_mtu(&rt->dst))
+		rt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
+	if (!dst_metric(&rt->dst, RTAX_ADVMSS))
+		rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->dst));
+	rt->dst.dev = dev;
 	rt->rt6i_idev = idev;
 	rt->rt6i_table = table;
 
@@ -1319,7 +1313,7 @@ out:
 	if (idev)
 		in6_dev_put(idev);
 	if (rt)
-		dst_free(&rt->u.dst);
+		dst_free(&rt->dst);
 	return err;
 }
 
@@ -1336,7 +1330,7 @@ static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
 	write_lock_bh(&table->tb6_lock);
 
 	err = fib6_del(rt, info);
-	dst_release(&rt->u.dst);
+	dst_release(&rt->dst);
 
 	write_unlock_bh(&table->tb6_lock);
 
@@ -1369,7 +1363,7 @@ static int ip6_route_del(struct fib6_config *cfg)
 			 &cfg->fc_src, cfg->fc_src_len);
 
 	if (fn) {
-		for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
+		for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
 			if (cfg->fc_ifindex &&
 			    (rt->rt6i_dev == NULL ||
 			     rt->rt6i_dev->ifindex != cfg->fc_ifindex))
@@ -1379,7 +1373,7 @@ static int ip6_route_del(struct fib6_config *cfg)
 				continue;
 			if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
 				continue;
-			dst_hold(&rt->u.dst);
+			dst_hold(&rt->dst);
 			read_unlock_bh(&table->tb6_lock);
 
 			return __ip6_del_rt(rt, &cfg->fc_nlinfo);
@@ -1421,7 +1415,7 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,
 	read_lock_bh(&table->tb6_lock);
 	fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
 restart:
-	for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
+	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
 		/*
 		 * Current route is on-link; redirect is always invalid.
 		 *
@@ -1445,7 +1439,7 @@ restart:
 		rt = net->ipv6.ip6_null_entry;
 	BACKTRACK(net, &fl->fl6_src);
 out:
-	dst_hold(&rt->u.dst);
+	dst_hold(&rt->dst);
 
 	read_unlock_bh(&table->tb6_lock);
 
@@ -1513,10 +1507,10 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
 	 * Look, redirects are sent only in response to data packets,
 	 * so that this nexthop apparently is reachable. --ANK
 	 */
-	dst_confirm(&rt->u.dst);
+	dst_confirm(&rt->dst);
 
 	/* Duplicate redirect: silently ignore. */
-	if (neigh == rt->u.dst.neighbour)
+	if (neigh == rt->dst.neighbour)
 		goto out;
 
 	nrt = ip6_rt_copy(rt);
@@ -1529,20 +1523,20 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
 
 	ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
 	nrt->rt6i_dst.plen = 128;
-	nrt->u.dst.flags |= DST_HOST;
+	nrt->dst.flags |= DST_HOST;
 
 	ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
 	nrt->rt6i_nexthop = neigh_clone(neigh);
 	/* Reset pmtu, it may be better */
-	nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
-	nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dev_net(neigh->dev),
-							dst_mtu(&nrt->u.dst));
+	nrt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
+	nrt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dev_net(neigh->dev),
+							dst_mtu(&nrt->dst));
 
 	if (ip6_ins_rt(nrt))
 		goto out;
 
-	netevent.old = &rt->u.dst;
-	netevent.new = &nrt->u.dst;
+	netevent.old = &rt->dst;
+	netevent.new = &nrt->dst;
 	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
 
 	if (rt->rt6i_flags&RTF_CACHE) {
@@ -1551,7 +1545,7 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
 	}
 
 out:
-	dst_release(&rt->u.dst);
+	dst_release(&rt->dst);
 }
 
 /*
@@ -1570,7 +1564,7 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
 	if (rt == NULL)
 		return;
 
-	if (pmtu >= dst_mtu(&rt->u.dst))
+	if (pmtu >= dst_mtu(&rt->dst))
 		goto out;
 
 	if (pmtu < IPV6_MIN_MTU) {
@@ -1588,7 +1582,7 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
 	   They are sent only in response to data packets,
 	   so that this nexthop apparently is reachable. --ANK
 	 */
-	dst_confirm(&rt->u.dst);
+	dst_confirm(&rt->dst);
 
 	/* Host route. If it is static, it would be better
 	   not to override it, but add new one, so that
@@ -1596,10 +1590,10 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
 	   would return automatically.
 	 */
 	if (rt->rt6i_flags & RTF_CACHE) {
-		rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
+		rt->dst.metrics[RTAX_MTU-1] = pmtu;
 		if (allfrag)
-			rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
-		dst_set_expires(&rt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
+			rt->dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
+		dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
 		rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
 		goto out;
 	}
@@ -1615,9 +1609,9 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
 		nrt = rt6_alloc_clone(rt, daddr);
 
 	if (nrt) {
-		nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
+		nrt->dst.metrics[RTAX_MTU-1] = pmtu;
 		if (allfrag)
-			nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
+			nrt->dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
 
 		/* According to RFC 1981, detecting PMTU increase shouldn't be
 		 * happened within 5 mins, the recommended timer is 10 mins.
@@ -1625,13 +1619,13 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
 		 * which is 10 mins. After 10 mins the decreased pmtu is expired
 		 * and detecting PMTU increase will be automatically happened.
 		 */
-		dst_set_expires(&nrt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
+		dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
 		nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
 
 		ip6_ins_rt(nrt);
 	}
 out:
-	dst_release(&rt->u.dst);
+	dst_release(&rt->dst);
 }
 
 /*
@@ -1644,18 +1638,18 @@ static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
 	struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
 
 	if (rt) {
-		rt->u.dst.input = ort->u.dst.input;
-		rt->u.dst.output = ort->u.dst.output;
-
-		memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
-		rt->u.dst.error = ort->u.dst.error;
-		rt->u.dst.dev = ort->u.dst.dev;
-		if (rt->u.dst.dev)
-			dev_hold(rt->u.dst.dev);
+		rt->dst.input = ort->dst.input;
+		rt->dst.output = ort->dst.output;
+
+		memcpy(rt->dst.metrics, ort->dst.metrics, RTAX_MAX*sizeof(u32));
+		rt->dst.error = ort->dst.error;
+		rt->dst.dev = ort->dst.dev;
+		if (rt->dst.dev)
+			dev_hold(rt->dst.dev);
 		rt->rt6i_idev = ort->rt6i_idev;
 		if (rt->rt6i_idev)
 			in6_dev_hold(rt->rt6i_idev);
-		rt->u.dst.lastuse = jiffies;
+		rt->dst.lastuse = jiffies;
 		rt->rt6i_expires = 0;
 
 		ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
@@ -1689,14 +1683,14 @@ static struct rt6_info *rt6_get_route_info(struct net *net,
 	if (!fn)
 		goto out;
 
-	for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
+	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
 		if (rt->rt6i_dev->ifindex != ifindex)
 			continue;
 		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
 			continue;
 		if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
 			continue;
-		dst_hold(&rt->u.dst);
+		dst_hold(&rt->dst);
 		break;
 	}
 out:
@@ -1744,14 +1738,14 @@ struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *d
 		return NULL;
 
 	write_lock_bh(&table->tb6_lock);
-	for (rt = table->tb6_root.leaf; rt; rt=rt->u.dst.rt6_next) {
+	for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
 		if (dev == rt->rt6i_dev &&
 		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
 		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
 			break;
 	}
 	if (rt)
-		dst_hold(&rt->u.dst);
+		dst_hold(&rt->dst);
 	write_unlock_bh(&table->tb6_lock);
 	return rt;
 }
@@ -1790,9 +1784,9 @@ void rt6_purge_dflt_routers(struct net *net)
 
 restart:
 	read_lock_bh(&table->tb6_lock);
-	for (rt = table->tb6_root.leaf; rt; rt = rt->u.dst.rt6_next) {
+	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
-			dst_hold(&rt->u.dst);
+			dst_hold(&rt->dst);
 			read_unlock_bh(&table->tb6_lock);
 			ip6_del_rt(rt);
 			goto restart;
@@ -1930,15 +1924,15 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
 	dev_hold(net->loopback_dev);
 	in6_dev_hold(idev);
 
-	rt->u.dst.flags = DST_HOST;
-	rt->u.dst.input = ip6_input;
-	rt->u.dst.output = ip6_output;
+	rt->dst.flags = DST_HOST;
+	rt->dst.input = ip6_input;
+	rt->dst.output = ip6_output;
 	rt->rt6i_dev = net->loopback_dev;
 	rt->rt6i_idev = idev;
-	rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
-	rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
-	rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
-	rt->u.dst.obsolete = -1;
+	rt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
+	rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->dst));
+	rt->dst.metrics[RTAX_HOPLIMIT-1] = -1;
+	rt->dst.obsolete = -1;
 
 	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
 	if (anycast)
@@ -1947,7 +1941,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
 		rt->rt6i_flags |= RTF_LOCAL;
 	neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
 	if (IS_ERR(neigh)) {
-		dst_free(&rt->u.dst);
+		dst_free(&rt->dst);
 
 		/* We are casting this because that is the return
 		 * value type.  But an errno encoded pointer is the
@@ -1962,7 +1956,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
 	rt->rt6i_dst.plen = 128;
 	rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
 
-	atomic_set(&rt->u.dst.__refcnt, 1);
+	atomic_set(&rt->dst.__refcnt, 1);
 
 	return rt;
 }
@@ -2033,12 +2027,12 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
 	   PMTU discouvery.
 	 */
 	if (rt->rt6i_dev == arg->dev &&
-	    !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
-	    (dst_mtu(&rt->u.dst) >= arg->mtu ||
-	     (dst_mtu(&rt->u.dst) < arg->mtu &&
-	      dst_mtu(&rt->u.dst) == idev->cnf.mtu6))) {
-		rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
-		rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, arg->mtu);
+	    !dst_metric_locked(&rt->dst, RTAX_MTU) &&
+	    (dst_mtu(&rt->dst) >= arg->mtu ||
+	     (dst_mtu(&rt->dst) < arg->mtu &&
+	      dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
+		rt->dst.metrics[RTAX_MTU-1] = arg->mtu;
+		rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, arg->mtu);
 	}
 	return 0;
 }
@@ -2252,20 +2246,20 @@ static int rt6_fill_node(struct net *net,
 #endif
 			NLA_PUT_U32(skb, RTA_IIF, iif);
 	} else if (dst) {
-		struct inet6_dev *idev = ip6_dst_idev(&rt->u.dst);
+		struct inet6_dev *idev = ip6_dst_idev(&rt->dst);
 		struct in6_addr saddr_buf;
 		if (ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
 				       dst, 0, &saddr_buf) == 0)
 			NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
 	}
 
-	if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
+	if (rtnetlink_put_metrics(skb, rt->dst.metrics) < 0)
 		goto nla_put_failure;
 
-	if (rt->u.dst.neighbour)
-		NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
+	if (rt->dst.neighbour)
+		NLA_PUT(skb, RTA_GATEWAY, 16, &rt->dst.neighbour->primary_key);
 
-	if (rt->u.dst.dev)
+	if (rt->dst.dev)
 		NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
 
 	NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
@@ -2277,8 +2271,8 @@ static int rt6_fill_node(struct net *net,
 	else
 		expires = INT_MAX;
 
-	if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0,
-			       expires, rt->u.dst.error) < 0)
+	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0,
+			       expires, rt->dst.error) < 0)
 		goto nla_put_failure;
 
 	return nlmsg_end(skb, nlh);
@@ -2364,7 +2358,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
 	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
 
 	rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl);
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set(skb, &rt->dst);
 
 	err = rt6_fill_node(net, skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
 			    RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
@@ -2416,12 +2410,12 @@ static int ip6_route_dev_notify(struct notifier_block *this,
 	struct net *net = dev_net(dev);
 
 	if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
-		net->ipv6.ip6_null_entry->u.dst.dev = dev;
+		net->ipv6.ip6_null_entry->dst.dev = dev;
 		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
-		net->ipv6.ip6_prohibit_entry->u.dst.dev = dev;
+		net->ipv6.ip6_prohibit_entry->dst.dev = dev;
 		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
-		net->ipv6.ip6_blk_hole_entry->u.dst.dev = dev;
+		net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
 		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
 #endif
 	}
@@ -2464,8 +2458,8 @@ static int rt6_info_route(struct rt6_info *rt, void *p_arg)
 		seq_puts(m, "00000000000000000000000000000000");
 	}
 	seq_printf(m, " %08x %08x %08x %08x %8s\n",
-		   rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
-		   rt->u.dst.__use, rt->rt6i_flags,
+		   rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
+		   rt->dst.__use, rt->rt6i_flags,
 		   rt->rt6i_dev ? rt->rt6i_dev->name : "");
 	return 0;
 }
@@ -2646,9 +2640,9 @@ static int __net_init ip6_route_net_init(struct net *net)
 					   GFP_KERNEL);
 	if (!net->ipv6.ip6_null_entry)
 		goto out_ip6_dst_ops;
-	net->ipv6.ip6_null_entry->u.dst.path =
+	net->ipv6.ip6_null_entry->dst.path =
 		(struct dst_entry *)net->ipv6.ip6_null_entry;
-	net->ipv6.ip6_null_entry->u.dst.ops = &net->ipv6.ip6_dst_ops;
+	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
 
 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
 	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
@@ -2656,18 +2650,18 @@ static int __net_init ip6_route_net_init(struct net *net)
 					       GFP_KERNEL);
 	if (!net->ipv6.ip6_prohibit_entry)
 		goto out_ip6_null_entry;
-	net->ipv6.ip6_prohibit_entry->u.dst.path =
+	net->ipv6.ip6_prohibit_entry->dst.path =
 		(struct dst_entry *)net->ipv6.ip6_prohibit_entry;
-	net->ipv6.ip6_prohibit_entry->u.dst.ops = &net->ipv6.ip6_dst_ops;
+	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
 
 	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
 					       sizeof(*net->ipv6.ip6_blk_hole_entry),
 					       GFP_KERNEL);
 	if (!net->ipv6.ip6_blk_hole_entry)
 		goto out_ip6_prohibit_entry;
-	net->ipv6.ip6_blk_hole_entry->u.dst.path =
+	net->ipv6.ip6_blk_hole_entry->dst.path =
 		(struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
-	net->ipv6.ip6_blk_hole_entry->u.dst.ops = &net->ipv6.ip6_dst_ops;
+	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
 #endif
 
 	net->ipv6.sysctl.flush_delay = 0;
@@ -2742,12 +2736,12 @@ int __init ip6_route_init(void)
 	/* Registering of the loopback is done before this portion of code,
 	 * the loopback reference in rt6_info will not be taken, do it
 	 * manually for init_net */
-	init_net.ipv6.ip6_null_entry->u.dst.dev = init_net.loopback_dev;
+	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
 	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
-	init_net.ipv6.ip6_prohibit_entry->u.dst.dev = init_net.loopback_dev;
+	init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
 	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
-	init_net.ipv6.ip6_blk_hole_entry->u.dst.dev = init_net.loopback_dev;
+	init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
 	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
   #endif
 	ret = fib6_init();
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 702c532ec21..4699cd3c311 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -712,7 +712,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
 		stats->tx_carrier_errors++;
 		goto tx_error_icmp;
 	}
-	tdev = rt->u.dst.dev;
+	tdev = rt->dst.dev;
 
 	if (tdev == dev) {
 		ip_rt_put(rt);
@@ -721,7 +721,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
 	}
 
 	if (df) {
-		mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
+		mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
 
 		if (mtu < 68) {
 			stats->collisions++;
@@ -780,7 +780,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
 	IPCB(skb)->flags = 0;
 	skb_dst_drop(skb);
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set(skb, &rt->dst);
 
 	/*
 	 *	Push down and install the IPIP header.
@@ -829,7 +829,7 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev)
 				    .proto = IPPROTO_IPV6 };
 		struct rtable *rt;
 		if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
-			tdev = rt->u.dst.dev;
+			tdev = rt->dst.dev;
 			ip_rt_put(rt);
 		}
 		dev->flags |= IFF_POINTOPOINT;
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index 0852512d392..226a0ae3bcf 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -348,7 +348,7 @@ static int l2tp_ip_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len
 	sk->sk_state = TCP_ESTABLISHED;
 	inet->inet_id = jiffies;
 
-	sk_dst_set(sk, &rt->u.dst);
+	sk_dst_set(sk, &rt->dst);
 
 	write_lock_bh(&l2tp_ip_lock);
 	hlist_del_init(&sk->sk_bind_node);
@@ -496,9 +496,9 @@ static int l2tp_ip_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *m
 			if (ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0))
 				goto no_route;
 		}
-		sk_setup_caps(sk, &rt->u.dst);
+		sk_setup_caps(sk, &rt->dst);
 	}
-	skb_dst_set(skb, dst_clone(&rt->u.dst));
+	skb_dst_set(skb, dst_clone(&rt->dst));
 
 	/* Queue the packet to IP for output */
 	rc = ip_queue_xmit(skb);
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 93c15a107b2..02b078e11cf 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -90,10 +90,10 @@ __ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos)
 					     &dest->addr.ip);
 				return NULL;
 			}
-			__ip_vs_dst_set(dest, rtos, dst_clone(&rt->u.dst));
+			__ip_vs_dst_set(dest, rtos, dst_clone(&rt->dst));
 			IP_VS_DBG(10, "new dst %pI4, refcnt=%d, rtos=%X\n",
 				  &dest->addr.ip,
-				  atomic_read(&rt->u.dst.__refcnt), rtos);
+				  atomic_read(&rt->dst.__refcnt), rtos);
 		}
 		spin_unlock(&dest->dst_lock);
 	} else {
@@ -148,10 +148,10 @@ __ip_vs_get_out_rt_v6(struct ip_vs_conn *cp)
 					     &dest->addr.in6);
 				return NULL;
 			}
-			__ip_vs_dst_set(dest, 0, dst_clone(&rt->u.dst));
+			__ip_vs_dst_set(dest, 0, dst_clone(&rt->dst));
 			IP_VS_DBG(10, "new dst %pI6, refcnt=%d\n",
 				  &dest->addr.in6,
-				  atomic_read(&rt->u.dst.__refcnt));
+				  atomic_read(&rt->dst.__refcnt));
 		}
 		spin_unlock(&dest->dst_lock);
 	} else {
@@ -198,7 +198,7 @@ do {							\
 	(skb)->ipvs_property = 1;			\
 	skb_forward_csum(skb);				\
 	NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL,	\
-		(rt)->u.dst.dev, dst_output);		\
+		(rt)->dst.dev, dst_output);		\
 } while (0)
 
 
@@ -245,7 +245,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	}
 
 	/* MTU checking */
-	mtu = dst_mtu(&rt->u.dst);
+	mtu = dst_mtu(&rt->dst);
 	if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) {
 		ip_rt_put(rt);
 		icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
@@ -265,7 +265,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 
 	/* drop old route */
 	skb_dst_drop(skb);
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set(skb, &rt->dst);
 
 	/* Another hack: avoid icmp_send in ip_fragment */
 	skb->local_df = 1;
@@ -309,9 +309,9 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	}
 
 	/* MTU checking */
-	mtu = dst_mtu(&rt->u.dst);
+	mtu = dst_mtu(&rt->dst);
 	if (skb->len > mtu) {
-		dst_release(&rt->u.dst);
+		dst_release(&rt->dst);
 		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 		IP_VS_DBG_RL("%s(): frag needed\n", __func__);
 		goto tx_error;
@@ -323,13 +323,13 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	 */
 	skb = skb_share_check(skb, GFP_ATOMIC);
 	if (unlikely(skb == NULL)) {
-		dst_release(&rt->u.dst);
+		dst_release(&rt->dst);
 		return NF_STOLEN;
 	}
 
 	/* drop old route */
 	skb_dst_drop(skb);
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set(skb, &rt->dst);
 
 	/* Another hack: avoid icmp_send in ip_fragment */
 	skb->local_df = 1;
@@ -376,7 +376,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 		goto tx_error_icmp;
 
 	/* MTU checking */
-	mtu = dst_mtu(&rt->u.dst);
+	mtu = dst_mtu(&rt->dst);
 	if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) {
 		ip_rt_put(rt);
 		icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
@@ -388,12 +388,12 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	if (!skb_make_writable(skb, sizeof(struct iphdr)))
 		goto tx_error_put;
 
-	if (skb_cow(skb, rt->u.dst.dev->hard_header_len))
+	if (skb_cow(skb, rt->dst.dev->hard_header_len))
 		goto tx_error_put;
 
 	/* drop old route */
 	skb_dst_drop(skb);
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set(skb, &rt->dst);
 
 	/* mangle the packet */
 	if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
@@ -452,9 +452,9 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 		goto tx_error_icmp;
 
 	/* MTU checking */
-	mtu = dst_mtu(&rt->u.dst);
+	mtu = dst_mtu(&rt->dst);
 	if (skb->len > mtu) {
-		dst_release(&rt->u.dst);
+		dst_release(&rt->dst);
 		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 		IP_VS_DBG_RL_PKT(0, pp, skb, 0,
 				 "ip_vs_nat_xmit_v6(): frag needed for");
@@ -465,12 +465,12 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	if (!skb_make_writable(skb, sizeof(struct ipv6hdr)))
 		goto tx_error_put;
 
-	if (skb_cow(skb, rt->u.dst.dev->hard_header_len))
+	if (skb_cow(skb, rt->dst.dev->hard_header_len))
 		goto tx_error_put;
 
 	/* drop old route */
 	skb_dst_drop(skb);
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set(skb, &rt->dst);
 
 	/* mangle the packet */
 	if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
@@ -498,7 +498,7 @@ tx_error:
 	kfree_skb(skb);
 	return NF_STOLEN;
 tx_error_put:
-	dst_release(&rt->u.dst);
+	dst_release(&rt->dst);
 	goto tx_error;
 }
 #endif
@@ -549,9 +549,9 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(tos))))
 		goto tx_error_icmp;
 
-	tdev = rt->u.dst.dev;
+	tdev = rt->dst.dev;
 
-	mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
+	mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
 	if (mtu < 68) {
 		ip_rt_put(rt);
 		IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__);
@@ -601,7 +601,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 
 	/* drop old route */
 	skb_dst_drop(skb);
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set(skb, &rt->dst);
 
 	/*
 	 *	Push down and install the IPIP header.
@@ -615,7 +615,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	iph->daddr		=	rt->rt_dst;
 	iph->saddr		=	rt->rt_src;
 	iph->ttl		=	old_iph->ttl;
-	ip_select_ident(iph, &rt->u.dst, NULL);
+	ip_select_ident(iph, &rt->dst, NULL);
 
 	/* Another hack: avoid icmp_send in ip_fragment */
 	skb->local_df = 1;
@@ -660,12 +660,12 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	if (!rt)
 		goto tx_error_icmp;
 
-	tdev = rt->u.dst.dev;
+	tdev = rt->dst.dev;
 
-	mtu = dst_mtu(&rt->u.dst) - sizeof(struct ipv6hdr);
+	mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr);
 	/* TODO IPv6: do we need this check in IPv6? */
 	if (mtu < 1280) {
-		dst_release(&rt->u.dst);
+		dst_release(&rt->dst);
 		IP_VS_DBG_RL("%s(): mtu less than 1280\n", __func__);
 		goto tx_error;
 	}
@@ -674,7 +674,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 
 	if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr)) {
 		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
-		dst_release(&rt->u.dst);
+		dst_release(&rt->dst);
 		IP_VS_DBG_RL("%s(): frag needed\n", __func__);
 		goto tx_error;
 	}
@@ -689,7 +689,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 		struct sk_buff *new_skb =
 			skb_realloc_headroom(skb, max_headroom);
 		if (!new_skb) {
-			dst_release(&rt->u.dst);
+			dst_release(&rt->dst);
 			kfree_skb(skb);
 			IP_VS_ERR_RL("%s(): no memory\n", __func__);
 			return NF_STOLEN;
@@ -707,7 +707,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 
 	/* drop old route */
 	skb_dst_drop(skb);
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set(skb, &rt->dst);
 
 	/*
 	 *	Push down and install the IPIP header.
@@ -760,7 +760,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 		goto tx_error_icmp;
 
 	/* MTU checking */
-	mtu = dst_mtu(&rt->u.dst);
+	mtu = dst_mtu(&rt->dst);
 	if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu) {
 		icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
 		ip_rt_put(rt);
@@ -780,7 +780,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 
 	/* drop old route */
 	skb_dst_drop(skb);
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set(skb, &rt->dst);
 
 	/* Another hack: avoid icmp_send in ip_fragment */
 	skb->local_df = 1;
@@ -813,10 +813,10 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 		goto tx_error_icmp;
 
 	/* MTU checking */
-	mtu = dst_mtu(&rt->u.dst);
+	mtu = dst_mtu(&rt->dst);
 	if (skb->len > mtu) {
 		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
-		dst_release(&rt->u.dst);
+		dst_release(&rt->dst);
 		IP_VS_DBG_RL("%s(): frag needed\n", __func__);
 		goto tx_error;
 	}
@@ -827,13 +827,13 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	 */
 	skb = skb_share_check(skb, GFP_ATOMIC);
 	if (unlikely(skb == NULL)) {
-		dst_release(&rt->u.dst);
+		dst_release(&rt->dst);
 		return NF_STOLEN;
 	}
 
 	/* drop old route */
 	skb_dst_drop(skb);
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set(skb, &rt->dst);
 
 	/* Another hack: avoid icmp_send in ip_fragment */
 	skb->local_df = 1;
@@ -888,7 +888,7 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 		goto tx_error_icmp;
 
 	/* MTU checking */
-	mtu = dst_mtu(&rt->u.dst);
+	mtu = dst_mtu(&rt->dst);
 	if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF))) {
 		ip_rt_put(rt);
 		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
@@ -900,12 +900,12 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	if (!skb_make_writable(skb, offset))
 		goto tx_error_put;
 
-	if (skb_cow(skb, rt->u.dst.dev->hard_header_len))
+	if (skb_cow(skb, rt->dst.dev->hard_header_len))
 		goto tx_error_put;
 
 	/* drop the old route when skb is not shared */
 	skb_dst_drop(skb);
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set(skb, &rt->dst);
 
 	ip_vs_nat_icmp(skb, pp, cp, 0);
 
@@ -963,9 +963,9 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 		goto tx_error_icmp;
 
 	/* MTU checking */
-	mtu = dst_mtu(&rt->u.dst);
+	mtu = dst_mtu(&rt->dst);
 	if (skb->len > mtu) {
-		dst_release(&rt->u.dst);
+		dst_release(&rt->dst);
 		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 		IP_VS_DBG_RL("%s(): frag needed\n", __func__);
 		goto tx_error;
@@ -975,12 +975,12 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	if (!skb_make_writable(skb, offset))
 		goto tx_error_put;
 
-	if (skb_cow(skb, rt->u.dst.dev->hard_header_len))
+	if (skb_cow(skb, rt->dst.dev->hard_header_len))
 		goto tx_error_put;
 
 	/* drop the old route when skb is not shared */
 	skb_dst_drop(skb);
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set(skb, &rt->dst);
 
 	ip_vs_nat_icmp_v6(skb, pp, cp, 0);
 
@@ -1001,7 +1001,7 @@ out:
 	LeaveFunction(10);
 	return rc;
 tx_error_put:
-	dst_release(&rt->u.dst);
+	dst_release(&rt->dst);
 	goto tx_error;
 }
 #endif
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index 6eaee7c8a33..b969025cf82 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -734,11 +734,11 @@ static int callforward_do_filter(const union nf_inet_addr *src,
 		if (!afinfo->route((struct dst_entry **)&rt1, &fl1)) {
 			if (!afinfo->route((struct dst_entry **)&rt2, &fl2)) {
 				if (rt1->rt_gateway == rt2->rt_gateway &&
-				    rt1->u.dst.dev  == rt2->u.dst.dev)
+				    rt1->dst.dev  == rt2->dst.dev)
 					ret = 1;
-				dst_release(&rt2->u.dst);
+				dst_release(&rt2->dst);
 			}
-			dst_release(&rt1->u.dst);
+			dst_release(&rt1->dst);
 		}
 		break;
 	}
@@ -753,11 +753,11 @@ static int callforward_do_filter(const union nf_inet_addr *src,
 			if (!afinfo->route((struct dst_entry **)&rt2, &fl2)) {
 				if (!memcmp(&rt1->rt6i_gateway, &rt2->rt6i_gateway,
 					    sizeof(rt1->rt6i_gateway)) &&
-				    rt1->u.dst.dev == rt2->u.dst.dev)
+				    rt1->dst.dev == rt2->dst.dev)
 					ret = 1;
-				dst_release(&rt2->u.dst);
+				dst_release(&rt2->dst);
 			}
-			dst_release(&rt1->u.dst);
+			dst_release(&rt1->dst);
 		}
 		break;
 	}
diff --git a/net/netfilter/nf_conntrack_netbios_ns.c b/net/netfilter/nf_conntrack_netbios_ns.c
index 497b2224536..aadde018a07 100644
--- a/net/netfilter/nf_conntrack_netbios_ns.c
+++ b/net/netfilter/nf_conntrack_netbios_ns.c
@@ -61,7 +61,7 @@ static int help(struct sk_buff *skb, unsigned int protoff,
 		goto out;
 
 	rcu_read_lock();
-	in_dev = __in_dev_get_rcu(rt->u.dst.dev);
+	in_dev = __in_dev_get_rcu(rt->dst.dev);
 	if (in_dev != NULL) {
 		for_primary_ifa(in_dev) {
 			if (ifa->ifa_broadcast == iph->daddr) {
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index 62ec021fbd5..1841388c770 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -165,8 +165,8 @@ static u_int32_t tcpmss_reverse_mtu(const struct sk_buff *skb,
 	rcu_read_unlock();
 
 	if (rt != NULL) {
-		mtu = dst_mtu(&rt->u.dst);
-		dst_release(&rt->u.dst);
+		mtu = dst_mtu(&rt->dst);
+		dst_release(&rt->dst);
 	}
 	return mtu;
 }
diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c
index 859d9fd429c..c77a85bbd9e 100644
--- a/net/netfilter/xt_TEE.c
+++ b/net/netfilter/xt_TEE.c
@@ -77,8 +77,8 @@ tee_tg_route4(struct sk_buff *skb, const struct xt_tee_tginfo *info)
 		return false;
 
 	skb_dst_drop(skb);
-	skb_dst_set(skb, &rt->u.dst);
-	skb->dev      = rt->u.dst.dev;
+	skb_dst_set(skb, &rt->dst);
+	skb->dev      = rt->dst.dev;
 	skb->protocol = htons(ETH_P_IP);
 	return true;
 }
diff --git a/net/rxrpc/ar-peer.c b/net/rxrpc/ar-peer.c
index f0f85b0123f..9f1729bd60d 100644
--- a/net/rxrpc/ar-peer.c
+++ b/net/rxrpc/ar-peer.c
@@ -64,8 +64,8 @@ static void rxrpc_assess_MTU_size(struct rxrpc_peer *peer)
 		return;
 	}
 
-	peer->if_mtu = dst_mtu(&rt->u.dst);
-	dst_release(&rt->u.dst);
+	peer->if_mtu = dst_mtu(&rt->dst);
+	dst_release(&rt->dst);
 
 	_leave(" [if_mtu %u]", peer->if_mtu);
 }
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 182749867c7..a0e1a7fdebb 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -490,7 +490,7 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc,
 			  __func__, &fl.fl4_dst, &fl.fl4_src);
 
 	if (!ip_route_output_key(&init_net, &rt, &fl)) {
-		dst = &rt->u.dst;
+		dst = &rt->dst;
 	}
 
 	/* If there is no association or if a source address is passed, no
@@ -534,7 +534,7 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc,
 			fl.fl4_src = laddr->a.v4.sin_addr.s_addr;
 			fl.fl_ip_sport = laddr->a.v4.sin_port;
 			if (!ip_route_output_key(&init_net, &rt, &fl)) {
-				dst = &rt->u.dst;
+				dst = &rt->dst;
 				goto out_unlock;
 			}
 		}
-- 
cgit v1.2.3-70-g09d2


From c7de2cf053420d63bac85133469c965d4b1083e1 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 9 Jun 2010 02:09:23 +0000
Subject: pkt_sched: gen_kill_estimator() rcu fixes

gen_kill_estimator() API is incomplete or not well documented, since
caller should make sure an RCU grace period is respected before
freeing stats_lock.

This was partially addressed in commit 5d944c640b4
(gen_estimator: deadlock fix), but same problem exist for all
gen_kill_estimator() users, if lock they use is not already RCU
protected.

A code review shows xt_RATEEST.c, act_api.c, act_police.c have this
problem. Other are ok because they use qdisc lock, already RCU
protected.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/act_api.h              |  2 ++
 include/net/netfilter/xt_rateest.h |  1 +
 net/core/gen_estimator.c           |  1 +
 net/netfilter/xt_RATEEST.c         | 12 +++++++++++-
 net/sched/act_api.c                | 11 ++++++++++-
 net/sched/act_police.c             | 12 +++++++++++-
 6 files changed, 36 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/act_api.h b/include/net/act_api.h
index c05fd717c58..bab385f13ac 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -20,6 +20,7 @@ struct tcf_common {
 	struct gnet_stats_queue		tcfc_qstats;
 	struct gnet_stats_rate_est	tcfc_rate_est;
 	spinlock_t			tcfc_lock;
+	struct rcu_head			tcfc_rcu;
 };
 #define tcf_next	common.tcfc_next
 #define tcf_index	common.tcfc_index
@@ -32,6 +33,7 @@ struct tcf_common {
 #define tcf_qstats	common.tcfc_qstats
 #define tcf_rate_est	common.tcfc_rate_est
 #define tcf_lock	common.tcfc_lock
+#define tcf_rcu		common.tcfc_rcu
 
 struct tcf_police {
 	struct tcf_common	common;
diff --git a/include/net/netfilter/xt_rateest.h b/include/net/netfilter/xt_rateest.h
index ddbf37e1961..5e142779592 100644
--- a/include/net/netfilter/xt_rateest.h
+++ b/include/net/netfilter/xt_rateest.h
@@ -9,6 +9,7 @@ struct xt_rateest {
 	struct gnet_estimator		params;
 	struct gnet_stats_rate_est	rstats;
 	struct gnet_stats_basic_packed	bstats;
+	struct rcu_head			rcu;
 };
 
 extern struct xt_rateest *xt_rateest_lookup(const char *name);
diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index 785e5276a30..9fbe7f7429b 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -263,6 +263,7 @@ static void __gen_kill_estimator(struct rcu_head *head)
  *
  * Removes the rate estimator specified by &bstats and &rate_est.
  *
+ * Note : Caller should respect an RCU grace period before freeing stats_lock
  */
 void gen_kill_estimator(struct gnet_stats_basic_packed *bstats,
 			struct gnet_stats_rate_est *rate_est)
diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c
index 69c01e10f8a..de079abd5bc 100644
--- a/net/netfilter/xt_RATEEST.c
+++ b/net/netfilter/xt_RATEEST.c
@@ -60,13 +60,22 @@ struct xt_rateest *xt_rateest_lookup(const char *name)
 }
 EXPORT_SYMBOL_GPL(xt_rateest_lookup);
 
+static void xt_rateest_free_rcu(struct rcu_head *head)
+{
+	kfree(container_of(head, struct xt_rateest, rcu));
+}
+
 void xt_rateest_put(struct xt_rateest *est)
 {
 	mutex_lock(&xt_rateest_mutex);
 	if (--est->refcnt == 0) {
 		hlist_del(&est->list);
 		gen_kill_estimator(&est->bstats, &est->rstats);
-		kfree(est);
+		/*
+		 * gen_estimator est_timer() might access est->lock or bstats,
+		 * wait a RCU grace period before freeing 'est'
+		 */
+		call_rcu(&est->rcu, xt_rateest_free_rcu);
 	}
 	mutex_unlock(&xt_rateest_mutex);
 }
@@ -179,6 +188,7 @@ static int __init xt_rateest_tg_init(void)
 static void __exit xt_rateest_tg_fini(void)
 {
 	xt_unregister_target(&xt_rateest_tg_reg);
+	rcu_barrier(); /* Wait for completion of call_rcu()'s (xt_rateest_free_rcu) */
 }
 
 
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 972378f47f3..23b25f89e7e 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -26,6 +26,11 @@
 #include <net/act_api.h>
 #include <net/netlink.h>
 
+static void tcf_common_free_rcu(struct rcu_head *head)
+{
+	kfree(container_of(head, struct tcf_common, tcfc_rcu));
+}
+
 void tcf_hash_destroy(struct tcf_common *p, struct tcf_hashinfo *hinfo)
 {
 	unsigned int h = tcf_hash(p->tcfc_index, hinfo->hmask);
@@ -38,7 +43,11 @@ void tcf_hash_destroy(struct tcf_common *p, struct tcf_hashinfo *hinfo)
 			write_unlock_bh(hinfo->lock);
 			gen_kill_estimator(&p->tcfc_bstats,
 					   &p->tcfc_rate_est);
-			kfree(p);
+			/*
+			 * gen_estimator est_timer() might access p->tcfc_lock
+			 * or bstats, wait a RCU grace period before freeing p
+			 */
+			call_rcu(&p->tcfc_rcu, tcf_common_free_rcu);
 			return;
 		}
 	}
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 654f73dff7c..537a48732e9 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -97,6 +97,11 @@ nla_put_failure:
 	goto done;
 }
 
+static void tcf_police_free_rcu(struct rcu_head *head)
+{
+	kfree(container_of(head, struct tcf_police, tcf_rcu));
+}
+
 static void tcf_police_destroy(struct tcf_police *p)
 {
 	unsigned int h = tcf_hash(p->tcf_index, POL_TAB_MASK);
@@ -113,7 +118,11 @@ static void tcf_police_destroy(struct tcf_police *p)
 				qdisc_put_rtab(p->tcfp_R_tab);
 			if (p->tcfp_P_tab)
 				qdisc_put_rtab(p->tcfp_P_tab);
-			kfree(p);
+			/*
+			 * gen_estimator est_timer() might access p->tcf_lock
+			 * or bstats, wait a RCU grace period before freeing p
+			 */
+			call_rcu(&p->tcf_rcu, tcf_police_free_rcu);
 			return;
 		}
 	}
@@ -397,6 +406,7 @@ static void __exit
 police_cleanup_module(void)
 {
 	tcf_unregister_action(&act_police_ops);
+	rcu_barrier(); /* Wait for completion of call_rcu()'s (tcf_police_free_rcu) */
 }
 
 module_init(police_init_module);
-- 
cgit v1.2.3-70-g09d2


From be1f3c2c027cc5ad735df6a45a542ed1db7ec48b Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Tue, 8 Jun 2010 07:19:54 +0000
Subject: net: Enable 64-bit net device statistics on 32-bit architectures

Use struct rtnl_link_stats64 as the statistics structure.

On 32-bit architectures, insert 32 bits of padding after/before each
field of struct net_device_stats to make its layout compatible with
struct rtnl_link_stats64.  Add an anonymous union in net_device; move
stats into the union and add struct rtnl_link_stats64 stats64.

Add net_device_ops::ndo_get_stats64, implementations of which will
return a pointer to struct rtnl_link_stats64.  Drivers that implement
this operation must not update the structure asynchronously.

Change dev_get_stats() to call ndo_get_stats64 if available, and to
return a pointer to struct rtnl_link_stats64.  Change callers of
dev_get_stats() accordingly.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c | 13 +++---
 include/linux/if_link.h         |  3 +-
 include/linux/netdevice.h       | 91 ++++++++++++++++++++++++-----------------
 net/8021q/vlanproc.c            | 13 +++---
 net/core/dev.c                  | 19 +++++----
 net/core/net-sysfs.c            | 12 +++---
 net/core/rtnetlink.c            |  6 +--
 7 files changed, 90 insertions(+), 67 deletions(-)

(limited to 'include')

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index ac4f94b7da3..a95a41b74b4 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -3804,20 +3804,21 @@ static int bond_close(struct net_device *bond_dev)
 	return 0;
 }
 
-static struct net_device_stats *bond_get_stats(struct net_device *bond_dev)
+static struct rtnl_link_stats64 *bond_get_stats(struct net_device *bond_dev)
 {
 	struct bonding *bond = netdev_priv(bond_dev);
-	struct net_device_stats *stats = &bond_dev->stats;
-	struct net_device_stats local_stats;
+	struct rtnl_link_stats64 *stats = &bond_dev->stats64;
+	struct rtnl_link_stats64 local_stats;
 	struct slave *slave;
 	int i;
 
-	memset(&local_stats, 0, sizeof(struct net_device_stats));
+	memset(&local_stats, 0, sizeof(local_stats));
 
 	read_lock_bh(&bond->lock);
 
 	bond_for_each_slave(bond, slave, i) {
-		const struct net_device_stats *sstats = dev_get_stats(slave->dev);
+		const struct rtnl_link_stats64 *sstats =
+			dev_get_stats(slave->dev);
 
 		local_stats.rx_packets += sstats->rx_packets;
 		local_stats.rx_bytes += sstats->rx_bytes;
@@ -4569,7 +4570,7 @@ static const struct net_device_ops bond_netdev_ops = {
 	.ndo_stop		= bond_close,
 	.ndo_start_xmit		= bond_start_xmit,
 	.ndo_select_queue	= bond_select_queue,
-	.ndo_get_stats		= bond_get_stats,
+	.ndo_get_stats64	= bond_get_stats,
 	.ndo_do_ioctl		= bond_do_ioctl,
 	.ndo_set_multicast_list	= bond_set_multicast_list,
 	.ndo_change_mtu		= bond_change_mtu,
diff --git a/include/linux/if_link.h b/include/linux/if_link.h
index 85c812db5a3..7fcad2e1be3 100644
--- a/include/linux/if_link.h
+++ b/include/linux/if_link.h
@@ -4,7 +4,7 @@
 #include <linux/types.h>
 #include <linux/netlink.h>
 
-/* The struct should be in sync with struct net_device_stats */
+/* This struct should be in sync with struct rtnl_link_stats64 */
 struct rtnl_link_stats {
 	__u32	rx_packets;		/* total packets received	*/
 	__u32	tx_packets;		/* total packets transmitted	*/
@@ -37,6 +37,7 @@ struct rtnl_link_stats {
 	__u32	tx_compressed;
 };
 
+/* The main device statistics structure */
 struct rtnl_link_stats64 {
 	__u64	rx_packets;		/* total packets received	*/
 	__u64	tx_packets;		/* total packets transmitted	*/
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index c319f28d699..4fbccc5f609 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -159,45 +159,49 @@ static inline bool dev_xmit_complete(int rc)
 #define MAX_HEADER (LL_MAX_HEADER + 48)
 #endif
 
-#endif  /*  __KERNEL__  */
-
 /*
- *	Network device statistics. Akin to the 2.0 ether stats but
- *	with byte counters.
+ *	Old network device statistics. Fields are native words
+ *	(unsigned long) so they can be read and written atomically.
+ *	Each field is padded to 64 bits for compatibility with
+ *	rtnl_link_stats64.
  */
 
+#if BITS_PER_LONG == 64
+#define NET_DEVICE_STATS_DEFINE(name)	unsigned long name
+#elif defined(__LITTLE_ENDIAN)
+#define NET_DEVICE_STATS_DEFINE(name)	unsigned long name, pad_ ## name
+#else
+#define NET_DEVICE_STATS_DEFINE(name)	unsigned long pad_ ## name, name
+#endif
+
 struct net_device_stats {
-	unsigned long	rx_packets;		/* total packets received	*/
-	unsigned long	tx_packets;		/* total packets transmitted	*/
-	unsigned long	rx_bytes;		/* total bytes received 	*/
-	unsigned long	tx_bytes;		/* total bytes transmitted	*/
-	unsigned long	rx_errors;		/* bad packets received		*/
-	unsigned long	tx_errors;		/* packet transmit problems	*/
-	unsigned long	rx_dropped;		/* no space in linux buffers	*/
-	unsigned long	tx_dropped;		/* no space available in linux	*/
-	unsigned long	multicast;		/* multicast packets received	*/
-	unsigned long	collisions;
-
-	/* detailed rx_errors: */
-	unsigned long	rx_length_errors;
-	unsigned long	rx_over_errors;		/* receiver ring buff overflow	*/
-	unsigned long	rx_crc_errors;		/* recved pkt with crc error	*/
-	unsigned long	rx_frame_errors;	/* recv'd frame alignment error */
-	unsigned long	rx_fifo_errors;		/* recv'r fifo overrun		*/
-	unsigned long	rx_missed_errors;	/* receiver missed packet	*/
-
-	/* detailed tx_errors */
-	unsigned long	tx_aborted_errors;
-	unsigned long	tx_carrier_errors;
-	unsigned long	tx_fifo_errors;
-	unsigned long	tx_heartbeat_errors;
-	unsigned long	tx_window_errors;
-	
-	/* for cslip etc */
-	unsigned long	rx_compressed;
-	unsigned long	tx_compressed;
+	NET_DEVICE_STATS_DEFINE(rx_packets);
+	NET_DEVICE_STATS_DEFINE(tx_packets);
+	NET_DEVICE_STATS_DEFINE(rx_bytes);
+	NET_DEVICE_STATS_DEFINE(tx_bytes);
+	NET_DEVICE_STATS_DEFINE(rx_errors);
+	NET_DEVICE_STATS_DEFINE(tx_errors);
+	NET_DEVICE_STATS_DEFINE(rx_dropped);
+	NET_DEVICE_STATS_DEFINE(tx_dropped);
+	NET_DEVICE_STATS_DEFINE(multicast);
+	NET_DEVICE_STATS_DEFINE(collisions);
+	NET_DEVICE_STATS_DEFINE(rx_length_errors);
+	NET_DEVICE_STATS_DEFINE(rx_over_errors);
+	NET_DEVICE_STATS_DEFINE(rx_crc_errors);
+	NET_DEVICE_STATS_DEFINE(rx_frame_errors);
+	NET_DEVICE_STATS_DEFINE(rx_fifo_errors);
+	NET_DEVICE_STATS_DEFINE(rx_missed_errors);
+	NET_DEVICE_STATS_DEFINE(tx_aborted_errors);
+	NET_DEVICE_STATS_DEFINE(tx_carrier_errors);
+	NET_DEVICE_STATS_DEFINE(tx_fifo_errors);
+	NET_DEVICE_STATS_DEFINE(tx_heartbeat_errors);
+	NET_DEVICE_STATS_DEFINE(tx_window_errors);
+	NET_DEVICE_STATS_DEFINE(rx_compressed);
+	NET_DEVICE_STATS_DEFINE(tx_compressed);
 };
 
+#endif  /*  __KERNEL__  */
+
 
 /* Media selection options. */
 enum {
@@ -662,10 +666,19 @@ struct netdev_rx_queue {
  *	Callback uses when the transmitter has not made any progress
  *	for dev->watchdog ticks.
  *
+ * struct rtnl_link_stats64* (*ndo_get_stats64)(struct net_device *dev);
  * struct net_device_stats* (*ndo_get_stats)(struct net_device *dev);
  *	Called when a user wants to get the network device usage
- *	statistics. If not defined, the counters in dev->stats will
- *	be used.
+ *	statistics. Drivers must do one of the following:
+ *	1. Define @ndo_get_stats64 to update a rtnl_link_stats64 structure
+ *	   (which should normally be dev->stats64) and return a ponter to
+ *	   it. The structure must not be changed asynchronously.
+ *	2. Define @ndo_get_stats to update a net_device_stats64 structure
+ *	   (which should normally be dev->stats) and return a pointer to
+ *	   it. The structure may be changed asynchronously only if each
+ *	   field is written atomically.
+ *	3. Update dev->stats asynchronously and atomically, and define
+ *	   neither operation.
  *
  * void (*ndo_vlan_rx_register)(struct net_device *dev, struct vlan_group *grp);
  *	If device support VLAN receive accleration
@@ -720,6 +733,7 @@ struct net_device_ops {
 						   struct neigh_parms *);
 	void			(*ndo_tx_timeout) (struct net_device *dev);
 
+	struct rtnl_link_stats64* (*ndo_get_stats64)(struct net_device *dev);
 	struct net_device_stats* (*ndo_get_stats)(struct net_device *dev);
 
 	void			(*ndo_vlan_rx_register)(struct net_device *dev,
@@ -869,7 +883,10 @@ struct net_device {
 	int			ifindex;
 	int			iflink;
 
-	struct net_device_stats	stats;
+	union {
+		struct rtnl_link_stats64 stats64;
+		struct net_device_stats stats;
+	};
 
 #ifdef CONFIG_WIRELESS_EXT
 	/* List of functions to handle Wireless Extensions (instead of ioctl).
@@ -2121,7 +2138,7 @@ extern void		netdev_features_change(struct net_device *dev);
 /* Load a device via the kmod */
 extern void		dev_load(struct net *net, const char *name);
 extern void		dev_mcast_init(void);
-extern const struct net_device_stats *dev_get_stats(struct net_device *dev);
+extern const struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev);
 extern void		dev_txq_stats_fold(const struct net_device *dev, struct net_device_stats *stats);
 
 extern int		netdev_max_backlog;
diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c
index afead353e21..df56f5ce887 100644
--- a/net/8021q/vlanproc.c
+++ b/net/8021q/vlanproc.c
@@ -278,8 +278,9 @@ static int vlandev_seq_show(struct seq_file *seq, void *offset)
 {
 	struct net_device *vlandev = (struct net_device *) seq->private;
 	const struct vlan_dev_info *dev_info = vlan_dev_info(vlandev);
-	const struct net_device_stats *stats;
+	const struct rtnl_link_stats64 *stats;
 	static const char fmt[] = "%30s %12lu\n";
+	static const char fmt64[] = "%30s %12llu\n";
 	int i;
 
 	if (!is_vlan_dev(vlandev))
@@ -291,12 +292,12 @@ static int vlandev_seq_show(struct seq_file *seq, void *offset)
 		   vlandev->name, dev_info->vlan_id,
 		   (int)(dev_info->flags & 1), vlandev->priv_flags);
 
-	seq_printf(seq, fmt, "total frames received", stats->rx_packets);
-	seq_printf(seq, fmt, "total bytes received", stats->rx_bytes);
-	seq_printf(seq, fmt, "Broadcast/Multicast Rcvd", stats->multicast);
+	seq_printf(seq, fmt64, "total frames received", stats->rx_packets);
+	seq_printf(seq, fmt64, "total bytes received", stats->rx_bytes);
+	seq_printf(seq, fmt64, "Broadcast/Multicast Rcvd", stats->multicast);
 	seq_puts(seq, "\n");
-	seq_printf(seq, fmt, "total frames transmitted", stats->tx_packets);
-	seq_printf(seq, fmt, "total bytes transmitted", stats->tx_bytes);
+	seq_printf(seq, fmt64, "total frames transmitted", stats->tx_packets);
+	seq_printf(seq, fmt64, "total bytes transmitted", stats->tx_bytes);
 	seq_printf(seq, fmt, "total headroom inc",
 		   dev_info->cnt_inc_headroom_on_tx);
 	seq_printf(seq, fmt, "total encap on xmit",
diff --git a/net/core/dev.c b/net/core/dev.c
index 277844901ce..a1abc10db08 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3701,10 +3701,10 @@ void dev_seq_stop(struct seq_file *seq, void *v)
 
 static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
 {
-	const struct net_device_stats *stats = dev_get_stats(dev);
+	const struct rtnl_link_stats64 *stats = dev_get_stats(dev);
 
-	seq_printf(seq, "%6s: %7lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
-		   "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
+	seq_printf(seq, "%6s: %7llu %7llu %4llu %4llu %4llu %5llu %10llu %9llu "
+		   "%8llu %7llu %4llu %4llu %4llu %5llu %7llu %10llu\n",
 		   dev->name, stats->rx_bytes, stats->rx_packets,
 		   stats->rx_errors,
 		   stats->rx_dropped + stats->rx_missed_errors,
@@ -5281,18 +5281,21 @@ EXPORT_SYMBOL(dev_txq_stats_fold);
  *	@dev: device to get statistics from
  *
  *	Get network statistics from device. The device driver may provide
- *	its own method by setting dev->netdev_ops->get_stats; otherwise
- *	the internal statistics structure is used.
+ *	its own method by setting dev->netdev_ops->get_stats64 or
+ *	dev->netdev_ops->get_stats; otherwise the internal statistics
+ *	structure is used.
  */
-const struct net_device_stats *dev_get_stats(struct net_device *dev)
+const struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev)
 {
 	const struct net_device_ops *ops = dev->netdev_ops;
 
+	if (ops->ndo_get_stats64)
+		return ops->ndo_get_stats64(dev);
 	if (ops->ndo_get_stats)
-		return ops->ndo_get_stats(dev);
+		return (struct rtnl_link_stats64 *)ops->ndo_get_stats(dev);
 
 	dev_txq_stats_fold(dev, &dev->stats);
-	return &dev->stats;
+	return &dev->stats64;
 }
 EXPORT_SYMBOL(dev_get_stats);
 
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 99e7052d732..ea3bb4c3b87 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -29,6 +29,7 @@ static const char fmt_hex[] = "%#x\n";
 static const char fmt_long_hex[] = "%#lx\n";
 static const char fmt_dec[] = "%d\n";
 static const char fmt_ulong[] = "%lu\n";
+static const char fmt_u64[] = "%llu\n";
 
 static inline int dev_isalive(const struct net_device *dev)
 {
@@ -324,14 +325,13 @@ static ssize_t netstat_show(const struct device *d,
 	struct net_device *dev = to_net_dev(d);
 	ssize_t ret = -EINVAL;
 
-	WARN_ON(offset > sizeof(struct net_device_stats) ||
-			offset % sizeof(unsigned long) != 0);
+	WARN_ON(offset > sizeof(struct rtnl_link_stats64) ||
+			offset % sizeof(u64) != 0);
 
 	read_lock(&dev_base_lock);
 	if (dev_isalive(dev)) {
-		const struct net_device_stats *stats = dev_get_stats(dev);
-		ret = sprintf(buf, fmt_ulong,
-			      *(unsigned long *)(((u8 *) stats) + offset));
+		const struct rtnl_link_stats64 *stats = dev_get_stats(dev);
+		ret = sprintf(buf, fmt_u64, *(u64 *)(((u8 *) stats) + offset));
 	}
 	read_unlock(&dev_base_lock);
 	return ret;
@@ -343,7 +343,7 @@ static ssize_t show_##name(struct device *d,				\
 			   struct device_attribute *attr, char *buf) 	\
 {									\
 	return netstat_show(d, attr, buf,				\
-			    offsetof(struct net_device_stats, name));	\
+			    offsetof(struct rtnl_link_stats64, name));	\
 }									\
 static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL)
 
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 1a2af24e9e3..e645778e9b7 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -579,7 +579,7 @@ static unsigned int rtnl_dev_combine_flags(const struct net_device *dev,
 }
 
 static void copy_rtnl_link_stats(struct rtnl_link_stats *a,
-				 const struct net_device_stats *b)
+				 const struct rtnl_link_stats64 *b)
 {
 	a->rx_packets = b->rx_packets;
 	a->tx_packets = b->tx_packets;
@@ -610,7 +610,7 @@ static void copy_rtnl_link_stats(struct rtnl_link_stats *a,
 	a->tx_compressed = b->tx_compressed;
 }
 
-static void copy_rtnl_link_stats64(void *v, const struct net_device_stats *b)
+static void copy_rtnl_link_stats64(void *v, const struct rtnl_link_stats64 *b)
 {
 	struct rtnl_link_stats64 a;
 
@@ -791,7 +791,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 {
 	struct ifinfomsg *ifm;
 	struct nlmsghdr *nlh;
-	const struct net_device_stats *stats;
+	const struct rtnl_link_stats64 *stats;
 	struct nlattr *attr;
 
 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags);
-- 
cgit v1.2.3-70-g09d2


From f5c5440d40a24c5dc8030cde0a03debe87de4afb Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 14 Jun 2010 16:15:23 +0200
Subject: netfilter: nfnetlink_log: RCU conversion, part 2

- must use atomic_inc_not_zero() in instance_lookup_get()

- must use hlist_add_head_rcu() instead of hlist_add_head()

- must use hlist_del_rcu() instead of hlist_del()

- Introduce NFULNL_COPY_DISABLED to stop lockless reader from using an
instance, before we do final instance_put() on it.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter/nfnetlink_log.h |  1 +
 net/netfilter/nfnetlink_log.c           | 18 ++++++++++++------
 2 files changed, 13 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter/nfnetlink_log.h b/include/linux/netfilter/nfnetlink_log.h
index d3bab7a2c9b..1d0b84aa1d4 100644
--- a/include/linux/netfilter/nfnetlink_log.h
+++ b/include/linux/netfilter/nfnetlink_log.h
@@ -89,6 +89,7 @@ enum nfulnl_attr_config {
 #define NFULNL_COPY_NONE	0x00
 #define NFULNL_COPY_META	0x01
 #define NFULNL_COPY_PACKET	0x02
+#define NFULNL_COPY_DISABLED	0x03
 
 #define NFULNL_CFG_F_SEQ	0x0001
 #define NFULNL_CFG_F_SEQ_GLOBAL	0x0002
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 8ec23ec568e..fb86a51bb65 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -109,8 +109,8 @@ instance_lookup_get(u_int16_t group_num)
 
 	rcu_read_lock_bh();
 	inst = __instance_lookup(group_num);
-	if (inst)
-		instance_get(inst);
+	if (inst && !atomic_inc_not_zero(&inst->use))
+		inst = NULL;
 	rcu_read_unlock_bh();
 
 	return inst;
@@ -171,7 +171,7 @@ instance_create(u_int16_t group_num, int pid)
 	inst->copy_mode 	= NFULNL_COPY_PACKET;
 	inst->copy_range 	= NFULNL_COPY_RANGE_MAX;
 
-	hlist_add_head(&inst->hlist,
+	hlist_add_head_rcu(&inst->hlist,
 		       &instance_table[instance_hashfn(group_num)]);
 
 	spin_unlock_bh(&instances_lock);
@@ -185,18 +185,23 @@ out_unlock:
 
 static void __nfulnl_flush(struct nfulnl_instance *inst);
 
+/* called with BH disabled */
 static void
 __instance_destroy(struct nfulnl_instance *inst)
 {
 	/* first pull it out of the global list */
-	hlist_del(&inst->hlist);
+	hlist_del_rcu(&inst->hlist);
 
 	/* then flush all pending packets from skb */
 
-	spin_lock_bh(&inst->lock);
+	spin_lock(&inst->lock);
+
+	/* lockless readers wont be able to use us */
+	inst->copy_mode = NFULNL_COPY_DISABLED;
+
 	if (inst->skb)
 		__nfulnl_flush(inst);
-	spin_unlock_bh(&inst->lock);
+	spin_unlock(&inst->lock);
 
 	/* and finally put the refcount */
 	instance_put(inst);
@@ -624,6 +629,7 @@ nfulnl_log_packet(u_int8_t pf,
 		size += nla_total_size(data_len);
 		break;
 
+	case NFULNL_COPY_DISABLED:
 	default:
 		goto unlock_and_release;
 	}
-- 
cgit v1.2.3-70-g09d2


From 5d22c89b9bea17a0e48e7534a9b237885e2c0809 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 10 Jun 2010 10:21:40 +0200
Subject: mac80211: remove non-irqsafe aggregation callbacks

The non-irqsafe aggregation start/stop done
callbacks are currently only used by ath9k_htc,
and can cause callbacks into the driver again.
This might lead to locking issues, which will
only get worse as we modify locking. To avoid
trouble, remove the non-irqsafe versions and
change ath9k_htc to use those instead.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/ath/ath9k/htc_drv_main.c |  6 ++---
 include/net/mac80211.h                        | 32 +++++----------------------
 net/mac80211/agg-tx.c                         |  2 --
 net/mac80211/ieee80211_i.h                    |  2 ++
 4 files changed, 10 insertions(+), 32 deletions(-)

(limited to 'include')

diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_main.c b/drivers/net/wireless/ath/ath9k/htc_drv_main.c
index 7aefbc63877..8c463f5965f 100644
--- a/drivers/net/wireless/ath/ath9k/htc_drv_main.c
+++ b/drivers/net/wireless/ath/ath9k/htc_drv_main.c
@@ -510,13 +510,13 @@ void ath9k_htc_aggr_work(struct work_struct *work)
 		ret = ath9k_htc_aggr_oper(priv, wk->vif, wk->sta_addr,
 					  wk->tid, true);
 		if (!ret)
-			ieee80211_start_tx_ba_cb(wk->vif, wk->sta_addr,
-						 wk->tid);
+			ieee80211_start_tx_ba_cb_irqsafe(wk->vif, wk->sta_addr,
+							 wk->tid);
 		break;
 	case IEEE80211_AMPDU_TX_STOP:
 		ath9k_htc_aggr_oper(priv, wk->vif, wk->sta_addr,
 				    wk->tid, false);
-		ieee80211_stop_tx_ba_cb(wk->vif, wk->sta_addr, wk->tid);
+		ieee80211_stop_tx_ba_cb_irqsafe(wk->vif, wk->sta_addr, wk->tid);
 		break;
 	default:
 		ath_print(ath9k_hw_common(priv->ah), ATH_DBG_FATAL,
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index abb3b1a9ddc..7f9401b3d3c 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1445,7 +1445,7 @@ enum ieee80211_filter_flags {
  *
  * Note that drivers MUST be able to deal with a TX aggregation
  * session being stopped even before they OK'ed starting it by
- * calling ieee80211_start_tx_ba_cb(_irqsafe), because the peer
+ * calling ieee80211_start_tx_ba_cb_irqsafe, because the peer
  * might receive the addBA frame and send a delBA right away!
  *
  * @IEEE80211_AMPDU_RX_START: start Rx aggregation
@@ -2313,17 +2313,6 @@ void ieee80211_queue_delayed_work(struct ieee80211_hw *hw,
  */
 int ieee80211_start_tx_ba_session(struct ieee80211_sta *sta, u16 tid);
 
-/**
- * ieee80211_start_tx_ba_cb - low level driver ready to aggregate.
- * @vif: &struct ieee80211_vif pointer from the add_interface callback
- * @ra: receiver address of the BA session recipient.
- * @tid: the TID to BA on.
- *
- * This function must be called by low level driver once it has
- * finished with preparations for the BA session.
- */
-void ieee80211_start_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u16 tid);
-
 /**
  * ieee80211_start_tx_ba_cb_irqsafe - low level driver ready to aggregate.
  * @vif: &struct ieee80211_vif pointer from the add_interface callback
@@ -2331,8 +2320,8 @@ void ieee80211_start_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u16 tid);
  * @tid: the TID to BA on.
  *
  * This function must be called by low level driver once it has
- * finished with preparations for the BA session.
- * This version of the function is IRQ-safe.
+ * finished with preparations for the BA session. It can be called
+ * from any context.
  */
 void ieee80211_start_tx_ba_cb_irqsafe(struct ieee80211_vif *vif, const u8 *ra,
 				      u16 tid);
@@ -2350,17 +2339,6 @@ void ieee80211_start_tx_ba_cb_irqsafe(struct ieee80211_vif *vif, const u8 *ra,
  */
 int ieee80211_stop_tx_ba_session(struct ieee80211_sta *sta, u16 tid);
 
-/**
- * ieee80211_stop_tx_ba_cb - low level driver ready to stop aggregate.
- * @vif: &struct ieee80211_vif pointer from the add_interface callback
- * @ra: receiver address of the BA session recipient.
- * @tid: the desired TID to BA on.
- *
- * This function must be called by low level driver once it has
- * finished with preparations for the BA session tear down.
- */
-void ieee80211_stop_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u8 tid);
-
 /**
  * ieee80211_stop_tx_ba_cb_irqsafe - low level driver ready to stop aggregate.
  * @vif: &struct ieee80211_vif pointer from the add_interface callback
@@ -2368,8 +2346,8 @@ void ieee80211_stop_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u8 tid);
  * @tid: the desired TID to BA on.
  *
  * This function must be called by low level driver once it has
- * finished with preparations for the BA session tear down.
- * This version of the function is IRQ-safe.
+ * finished with preparations for the BA session tear down. It
+ * can be called from any context.
  */
 void ieee80211_stop_tx_ba_cb_irqsafe(struct ieee80211_vif *vif, const u8 *ra,
 				     u16 tid);
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index 7d8656d51c6..5a7ef51e302 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -479,7 +479,6 @@ void ieee80211_start_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u16 tid)
 	spin_unlock_bh(&sta->lock);
 	rcu_read_unlock();
 }
-EXPORT_SYMBOL(ieee80211_start_tx_ba_cb);
 
 void ieee80211_start_tx_ba_cb_irqsafe(struct ieee80211_vif *vif,
 				      const u8 *ra, u16 tid)
@@ -619,7 +618,6 @@ void ieee80211_stop_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u8 tid)
 	spin_unlock_bh(&sta->lock);
 	rcu_read_unlock();
 }
-EXPORT_SYMBOL(ieee80211_stop_tx_ba_cb);
 
 void ieee80211_stop_tx_ba_cb_irqsafe(struct ieee80211_vif *vif,
 				     const u8 *ra, u16 tid)
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 71bdd8b3c3f..a3ae5130803 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1119,6 +1119,8 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
 
 int __ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
 				   enum ieee80211_back_parties initiator);
+void ieee80211_start_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u16 tid);
+void ieee80211_stop_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u8 tid);
 
 /* Spectrum management */
 void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata,
-- 
cgit v1.2.3-70-g09d2


From 85ad181ea78861f69b007599cec9e6ba33fcdf8a Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 10 Jun 2010 10:21:49 +0200
Subject: mac80211: allow drivers to sleep in ampdu_action

Allow drivers to sleep, and indicate this in
the documentation. ath9k has some locking I
don't understand, so keep it safe and disable
BHs in it, all other drivers look fine with
the context change.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/ath/ath9k/main.c | 4 ++++
 include/net/mac80211.h                | 2 +-
 net/mac80211/driver-ops.h             | 3 +--
 3 files changed, 6 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c
index b8b76dd2c11..e1b8456f3d2 100644
--- a/drivers/net/wireless/ath/ath9k/main.c
+++ b/drivers/net/wireless/ath/ath9k/main.c
@@ -1769,6 +1769,8 @@ static int ath9k_ampdu_action(struct ieee80211_hw *hw,
 	struct ath_softc *sc = aphy->sc;
 	int ret = 0;
 
+	local_bh_disable();
+
 	switch (action) {
 	case IEEE80211_AMPDU_RX_START:
 		if (!(sc->sc_flags & SC_OP_RXAGGR))
@@ -1798,6 +1800,8 @@ static int ath9k_ampdu_action(struct ieee80211_hw *hw,
 			  "Unknown AMPDU action\n");
 	}
 
+	local_bh_enable();
+
 	return ret;
 }
 
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 7f9401b3d3c..bbae3d9b117 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1640,7 +1640,7 @@ enum ieee80211_ampdu_mlme_action {
  * 	is the first frame we expect to perform the action on. Notice
  * 	that TX/RX_STOP can pass NULL for this parameter.
  *	Returns a negative error code on failure.
- *	The callback must be atomic.
+ *	The callback can sleep.
  *
  * @get_survey: Return per-channel survey information
  *
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index 7e86c6f89be..a4fcbcc4f45 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -352,11 +352,10 @@ static inline int drv_ampdu_action(struct ieee80211_local *local,
 
 	might_sleep();
 
-	local_bh_disable();
 	if (local->ops->ampdu_action)
 		ret = local->ops->ampdu_action(&local->hw, &sdata->vif, action,
 					       sta, tid, ssn);
-	local_bh_enable();
+
 	trace_drv_ampdu_action(local, sdata, action, sta, tid, ssn, ret);
 	return ret;
 }
-- 
cgit v1.2.3-70-g09d2


From fbd2c8dcbc69616d2e15b8a269a86b3a05d45aea Mon Sep 17 00:00:00 2001
From: Teemu Paasikivi <ext-teemu.3.paasikivi@nokia.com>
Date: Mon, 14 Jun 2010 12:55:31 +0300
Subject: mac80211: Set basic rates while joining ibss network

This patch adds support to nl80211 and mac80211 to set basic rates when
joining/creating ibss network.

Original patch was posted by Johannes Berg on the linux-wireless posting list.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Teemu Paasikivi <ext-teemu.3.paasikivi@nokia.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/cfg80211.h     |  2 ++
 net/mac80211/ibss.c        |  4 +++-
 net/mac80211/ieee80211_i.h |  2 ++
 net/wireless/nl80211.c     | 49 ++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 56 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 22ab9d88cf4..64374f4cb7c 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -810,6 +810,7 @@ struct cfg80211_disassoc_request {
  * @beacon_interval: beacon interval to use
  * @privacy: this is a protected network, keys will be configured
  *	after joining
+ * @basic_rates: bitmap of basic rates to use when creating the IBSS
  */
 struct cfg80211_ibss_params {
 	u8 *ssid;
@@ -818,6 +819,7 @@ struct cfg80211_ibss_params {
 	u8 *ie;
 	u8 ssid_len, ie_len;
 	u16 beacon_interval;
+	u32 basic_rates;
 	bool channel_fixed;
 	bool privacy;
 };
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index bfd7286488c..9f4e64ed8b8 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -172,6 +172,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
 	rcu_assign_pointer(ifibss->presp, skb);
 
 	sdata->vif.bss_conf.beacon_int = beacon_int;
+	sdata->vif.bss_conf.basic_rates = basic_rates;
 	bss_change = BSS_CHANGED_BEACON_INT;
 	bss_change |= ieee80211_reset_erp_info(sdata);
 	bss_change |= BSS_CHANGED_BSSID;
@@ -529,7 +530,7 @@ static void ieee80211_sta_create_ibss(struct ieee80211_sub_if_data *sdata)
 		sdata->drop_unencrypted = 0;
 
 	__ieee80211_sta_join_ibss(sdata, bssid, sdata->vif.bss_conf.beacon_int,
-				  ifibss->channel, 3, /* first two are basic */
+				  ifibss->channel, ifibss->basic_rates,
 				  capability, 0);
 }
 
@@ -859,6 +860,7 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata,
 		sdata->u.ibss.fixed_bssid = false;
 
 	sdata->u.ibss.privacy = params->privacy;
+	sdata->u.ibss.basic_rates = params->basic_rates;
 
 	sdata->vif.bss_conf.beacon_int = params->beacon_interval;
 
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 9d753a02a2e..c3c2be3f8a2 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -387,6 +387,8 @@ struct ieee80211_if_ibss {
 	unsigned long request;
 	unsigned long last_scan_completed;
 
+	u32 basic_rates;
+
 	bool timer_running;
 
 	bool fixed_bssid;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index c65e67e9231..41529aca794 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -3955,6 +3955,55 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info)
 		}
 	}
 
+	if (info->attrs[NL80211_ATTR_BSS_BASIC_RATES]) {
+		u8 *rates =
+			nla_data(info->attrs[NL80211_ATTR_BSS_BASIC_RATES]);
+		int n_rates =
+			nla_len(info->attrs[NL80211_ATTR_BSS_BASIC_RATES]);
+		struct ieee80211_supported_band *sband =
+			wiphy->bands[ibss.channel->band];
+		int i, j;
+
+		if (n_rates == 0) {
+			err = -EINVAL;
+			goto out;
+		}
+
+		for (i = 0; i < n_rates; i++) {
+			int rate = (rates[i] & 0x7f) * 5;
+			bool found = false;
+
+			for (j = 0; j < sband->n_bitrates; j++) {
+				if (sband->bitrates[j].bitrate == rate) {
+					found = true;
+					ibss.basic_rates |= BIT(j);
+					break;
+				}
+			}
+			if (!found) {
+				err = -EINVAL;
+				goto out;
+			}
+		}
+	} else {
+		/*
+		* If no rates were explicitly configured,
+		* use the mandatory rate set for 11b or
+		* 11a for maximum compatibility.
+		*/
+		struct ieee80211_supported_band *sband =
+			wiphy->bands[ibss.channel->band];
+		int j;
+		u32 flag = ibss.channel->band == IEEE80211_BAND_5GHZ ?
+			IEEE80211_RATE_MANDATORY_A :
+			IEEE80211_RATE_MANDATORY_B;
+
+		for (j = 0; j < sband->n_bitrates; j++) {
+			if (sband->bitrates[j].flags & flag)
+				ibss.basic_rates |= BIT(j);
+		}
+	}
+
 	err = cfg80211_join_ibss(rdev, dev, &ibss, connkeys);
 
 out:
-- 
cgit v1.2.3-70-g09d2


From 685429623f88d84f98bd5daffc3c427c408740d4 Mon Sep 17 00:00:00 2001
From: Juuso Oikarinen <juuso.oikarinen@nokia.com>
Date: Wed, 9 Jun 2010 13:43:26 +0300
Subject: mac80211: Fix circular locking dependency in ARP filter handling

There is a circular locking dependency when configuring the
hardware ARP filters on association, occurring when flushing the mac80211
workqueue. This is what happens:

[   92.026800] =======================================================
[   92.030507] [ INFO: possible circular locking dependency detected ]
[   92.030507] 2.6.34-04781-g2b2c009 #85
[   92.030507] -------------------------------------------------------
[   92.030507] modprobe/5225 is trying to acquire lock:
[   92.030507]  ((wiphy_name(local->hw.wiphy))){+.+.+.}, at: [<ffffffff8105b5c0>] flush_workq
ueue+0x0/0xb0
[   92.030507]
[   92.030507] but task is already holding lock:
[   92.030507]  (rtnl_mutex){+.+.+.}, at: [<ffffffff812b9ce2>] rtnl_lock+0x12/0x20
[   92.030507]
[   92.030507] which lock already depends on the new lock.
[   92.030507]
[   92.030507]
[   92.030507] the existing dependency chain (in reverse order) is:
[   92.030507]
[   92.030507] -> #2 (rtnl_mutex){+.+.+.}:
[   92.030507]        [<ffffffff810761fb>] lock_acquire+0xdb/0x110
[   92.030507]        [<ffffffff81341754>] mutex_lock_nested+0x44/0x300
[   92.030507]        [<ffffffff812b9ce2>] rtnl_lock+0x12/0x20
[   92.030507]        [<ffffffffa022d47c>] ieee80211_assoc_done+0x6c/0xe0 [mac80211]
[   92.030507]        [<ffffffffa022f2ad>] ieee80211_work_work+0x31d/0x1280 [mac80211]

[   92.030507] -> #1 ((&local->work_work)){+.+.+.}:
[   92.030507]        [<ffffffff810761fb>] lock_acquire+0xdb/0x110
[   92.030507]        [<ffffffff8105a51a>] worker_thread+0x22a/0x370
[   92.030507]        [<ffffffff8105ecc6>] kthread+0x96/0xb0
[   92.030507]        [<ffffffff81003a94>] kernel_thread_helper+0x4/0x10
[   92.030507]
[   92.030507] -> #0 ((wiphy_name(local->hw.wiphy))){+.+.+.}:
[   92.030507]        [<ffffffff81075fdc>] __lock_acquire+0x1c0c/0x1d50
[   92.030507]        [<ffffffff810761fb>] lock_acquire+0xdb/0x110
[   92.030507]        [<ffffffff8105b60e>] flush_workqueue+0x4e/0xb0
[   92.030507]        [<ffffffffa023ff7b>] ieee80211_stop_device+0x2b/0xb0 [mac80211]
[   92.030507]        [<ffffffffa0231635>] ieee80211_stop+0x3e5/0x680 [mac80211]

The locking in this case is quite complex. Fix the problem by rewriting the
way the hardware ARP filter list is handled - i.e. make a copy of the address
list to the bss_conf struct, and provide that list to the hardware driver
when needed.

The current patch will enable filtering also in promiscuous mode. This may need
to be changed in the future.

Reported-by: Reinette Chatre <reinette.chatre@intel.com>
Signed-off-by: Juuso Oikarinen <juuso.oikarinen@nokia.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/mac80211.h      | 35 +++++++++++++++++------------
 net/mac80211/driver-ops.h   | 17 --------------
 net/mac80211/driver-trace.h | 22 ------------------
 net/mac80211/ieee80211_i.h  |  2 ++
 net/mac80211/iface.c        |  3 +++
 net/mac80211/main.c         | 54 ++++++++++++++++++++++++++++-----------------
 net/mac80211/mlme.c         | 34 +++++++++++++++-------------
 7 files changed, 79 insertions(+), 88 deletions(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index bbae3d9b117..3a47877f496 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -19,7 +19,6 @@
 #include <linux/wireless.h>
 #include <linux/device.h>
 #include <linux/ieee80211.h>
-#include <linux/inetdevice.h>
 #include <net/cfg80211.h>
 
 /**
@@ -147,6 +146,7 @@ struct ieee80211_low_level_stats {
  *	enabled/disabled (beaconing modes)
  * @BSS_CHANGED_CQM: Connection quality monitor config changed
  * @BSS_CHANGED_IBSS: IBSS join status changed
+ * @BSS_CHANGED_ARP_FILTER: Hardware ARP filter address list or state changed.
  */
 enum ieee80211_bss_change {
 	BSS_CHANGED_ASSOC		= 1<<0,
@@ -161,10 +161,18 @@ enum ieee80211_bss_change {
 	BSS_CHANGED_BEACON_ENABLED	= 1<<9,
 	BSS_CHANGED_CQM			= 1<<10,
 	BSS_CHANGED_IBSS		= 1<<11,
+	BSS_CHANGED_ARP_FILTER		= 1<<12,
 
 	/* when adding here, make sure to change ieee80211_reconfig */
 };
 
+/*
+ * The maximum number of IPv4 addresses listed for ARP filtering. If the number
+ * of addresses for an interface increase beyond this value, hardware ARP
+ * filtering will be disabled.
+ */
+#define IEEE80211_BSS_ARP_ADDR_LIST_LEN 4
+
 /**
  * struct ieee80211_bss_conf - holds the BSS's changing parameters
  *
@@ -200,6 +208,15 @@ enum ieee80211_bss_change {
  * @cqm_rssi_thold: Connection quality monitor RSSI threshold, a zero value
  *	implies disabled
  * @cqm_rssi_hyst: Connection quality monitor RSSI hysteresis
+ * @arp_addr_list: List of IPv4 addresses for hardware ARP filtering. The
+ *	may filter ARP queries targeted for other addresses than listed here.
+ *	The driver must allow ARP queries targeted for all address listed here
+ *	to pass through. An empty list implies no ARP queries need to pass.
+ * @arp_addr_cnt: Number of addresses currently on the list.
+ * @arp_filter_enabled: Enable ARP filtering - if enabled, the hardware may
+ *	filter ARP queries based on the @arp_addr_list, if disabled, the
+ *	hardware must not perform any ARP filtering. Note, that the filter will
+ *	be enabled also in promiscuous mode.
  */
 struct ieee80211_bss_conf {
 	const u8 *bssid;
@@ -220,6 +237,9 @@ struct ieee80211_bss_conf {
 	s32 cqm_rssi_thold;
 	u32 cqm_rssi_hyst;
 	enum nl80211_channel_type channel_type;
+	__be32 arp_addr_list[IEEE80211_BSS_ARP_ADDR_LIST_LEN];
+	u8 arp_addr_cnt;
+	bool arp_filter_enabled;
 };
 
 /**
@@ -1529,16 +1549,6 @@ enum ieee80211_ampdu_mlme_action {
  *	of the bss parameters has changed when a call is made. The callback
  *	can sleep.
  *
- * @configure_arp_filter: Configuration function for hardware ARP query filter.
- *	This function is called with all the IP addresses configured to the
- *	interface as argument - all ARP queries targeted to any of these
- *	addresses must pass through. If the hardware filter does not support
- *	enought addresses, hardware filtering must be disabled. The ifa_list
- *	argument may be NULL, indicating that filtering must be disabled.
- *	This function is called upon association complete with current
- *	address(es), and while associated whenever the IP address(es) change.
- *	The callback can sleep.
- *
  * @prepare_multicast: Prepare for multicast filter configuration.
  *	This callback is optional, and its return value is passed
  *	to configure_filter(). This callback must be atomic.
@@ -1678,9 +1688,6 @@ struct ieee80211_ops {
 				 struct ieee80211_vif *vif,
 				 struct ieee80211_bss_conf *info,
 				 u32 changed);
-	int (*configure_arp_filter)(struct ieee80211_hw *hw,
-				    struct ieee80211_vif *vif,
-				    struct in_ifaddr *ifa_list);
 	u64 (*prepare_multicast)(struct ieee80211_hw *hw,
 				 struct netdev_hw_addr_list *mc_list);
 	void (*configure_filter)(struct ieee80211_hw *hw,
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index 965d64f6856..c33317320ee 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -89,23 +89,6 @@ static inline void drv_bss_info_changed(struct ieee80211_local *local,
 	trace_drv_return_void(local);
 }
 
-struct in_ifaddr;
-static inline int drv_configure_arp_filter(struct ieee80211_local *local,
-					   struct ieee80211_vif *vif,
-					   struct in_ifaddr *ifa_list)
-{
-	int ret = 0;
-
-	might_sleep();
-
-	trace_drv_configure_arp_filter(local, vif_to_sdata(vif));
-	if (local->ops->configure_arp_filter)
-		ret = local->ops->configure_arp_filter(&local->hw, vif,
-						       ifa_list);
-	trace_drv_return_int(local, ret);
-	return ret;
-}
-
 static inline u64 drv_prepare_multicast(struct ieee80211_local *local,
 					struct netdev_hw_addr_list *mc_list)
 {
diff --git a/net/mac80211/driver-trace.h b/net/mac80211/driver-trace.h
index 06444ea67bc..8da31caff93 100644
--- a/net/mac80211/driver-trace.h
+++ b/net/mac80211/driver-trace.h
@@ -251,28 +251,6 @@ TRACE_EVENT(drv_bss_info_changed,
 	)
 );
 
-TRACE_EVENT(drv_configure_arp_filter,
-	TP_PROTO(struct ieee80211_local *local,
-		 struct ieee80211_sub_if_data *sdata),
-
-	TP_ARGS(local, sdata),
-
-	TP_STRUCT__entry(
-		LOCAL_ENTRY
-		VIF_ENTRY
-	),
-
-	TP_fast_assign(
-		LOCAL_ASSIGN;
-		VIF_ASSIGN;
-	),
-
-	TP_printk(
-		VIF_PR_FMT LOCAL_PR_FMT,
-		VIF_PR_ARG, LOCAL_PR_ARG
-	)
-);
-
 TRACE_EVENT(drv_prepare_multicast,
 	TP_PROTO(struct ieee80211_local *local, int mc_count),
 
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index c3c2be3f8a2..9b3c3f971d2 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -514,6 +514,8 @@ struct ieee80211_sub_if_data {
 	struct work_struct work;
 	struct sk_buff_head skb_queue;
 
+	bool arp_filter_state;
+
 	/*
 	 * AP this belongs to: self in AP mode and
 	 * corresponding AP in VLAN mode, NULL for
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 490be2f3af2..910729fc18c 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -1076,6 +1076,9 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
 	sdata->wdev.wiphy = local->hw.wiphy;
 	sdata->local = local;
 	sdata->dev = ndev;
+#ifdef CONFIG_INET
+	sdata->arp_filter_state = true;
+#endif
 
 	for (i = 0; i < IEEE80211_FRAGMENT_MAX; i++)
 		skb_queue_head_init(&sdata->fragments[i].skb_list);
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index c2e46e88f3c..a1bf46c64b9 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -20,6 +20,7 @@
 #include <linux/rtnetlink.h>
 #include <linux/bitmap.h>
 #include <linux/pm_qos_params.h>
+#include <linux/inetdevice.h>
 #include <net/net_namespace.h>
 #include <net/cfg80211.h>
 
@@ -317,23 +318,6 @@ static void ieee80211_recalc_smps_work(struct work_struct *work)
 }
 
 #ifdef CONFIG_INET
-int ieee80211_set_arp_filter(struct ieee80211_sub_if_data *sdata)
-{
-	struct in_device *idev;
-	int ret = 0;
-
-	BUG_ON(!sdata);
-	ASSERT_RTNL();
-
-	idev = sdata->dev->ip_ptr;
-	if (!idev)
-		return 0;
-
-	ret = drv_configure_arp_filter(sdata->local, &sdata->vif,
-				       idev->ifa_list);
-	return ret;
-}
-
 static int ieee80211_ifa_changed(struct notifier_block *nb,
 				 unsigned long data, void *arg)
 {
@@ -343,8 +327,11 @@ static int ieee80211_ifa_changed(struct notifier_block *nb,
 			     ifa_notifier);
 	struct net_device *ndev = ifa->ifa_dev->dev;
 	struct wireless_dev *wdev = ndev->ieee80211_ptr;
+	struct in_device *idev;
 	struct ieee80211_sub_if_data *sdata;
+	struct ieee80211_bss_conf *bss_conf;
 	struct ieee80211_if_managed *ifmgd;
+	int c = 0;
 
 	if (!netif_running(ndev))
 		return NOTIFY_DONE;
@@ -356,17 +343,44 @@ static int ieee80211_ifa_changed(struct notifier_block *nb,
 	if (wdev->wiphy != local->hw.wiphy)
 		return NOTIFY_DONE;
 
-	/* We are concerned about IP addresses only when associated */
 	sdata = IEEE80211_DEV_TO_SUB_IF(ndev);
+	bss_conf = &sdata->vif.bss_conf;
 
 	/* ARP filtering is only supported in managed mode */
 	if (sdata->vif.type != NL80211_IFTYPE_STATION)
 		return NOTIFY_DONE;
 
+	idev = sdata->dev->ip_ptr;
+	if (!idev)
+		return NOTIFY_DONE;
+
 	ifmgd = &sdata->u.mgd;
 	mutex_lock(&ifmgd->mtx);
-	if (ifmgd->associated)
-		ieee80211_set_arp_filter(sdata);
+
+	/* Copy the addresses to the bss_conf list */
+	ifa = idev->ifa_list;
+	while (c < IEEE80211_BSS_ARP_ADDR_LIST_LEN && ifa) {
+		bss_conf->arp_addr_list[c] = ifa->ifa_address;
+		ifa = ifa->ifa_next;
+		c++;
+	}
+
+	/* If not all addresses fit the list, disable filtering */
+	if (ifa) {
+		sdata->arp_filter_state = false;
+		c = 0;
+	} else {
+		sdata->arp_filter_state = true;
+	}
+	bss_conf->arp_addr_cnt = c;
+
+	/* Configure driver only if associated */
+	if (ifmgd->associated) {
+		bss_conf->arp_filter_enabled = sdata->arp_filter_state;
+		ieee80211_bss_info_change_notify(sdata,
+						 BSS_CHANGED_ARP_FILTER);
+	}
+
 	mutex_unlock(&ifmgd->mtx);
 
 	return NOTIFY_DONE;
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 583b34686a2..74479c2d12d 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -806,11 +806,12 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
 {
 	struct ieee80211_bss *bss = (void *)cbss->priv;
 	struct ieee80211_local *local = sdata->local;
+	struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf;
 
 	bss_info_changed |= BSS_CHANGED_ASSOC;
 	/* set timing information */
-	sdata->vif.bss_conf.beacon_int = cbss->beacon_interval;
-	sdata->vif.bss_conf.timestamp = cbss->tsf;
+	bss_conf->beacon_int = cbss->beacon_interval;
+	bss_conf->timestamp = cbss->tsf;
 
 	bss_info_changed |= BSS_CHANGED_BEACON_INT;
 	bss_info_changed |= ieee80211_handle_bss_capability(sdata,
@@ -835,7 +836,7 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
 
 	ieee80211_led_assoc(local, 1);
 
-	sdata->vif.bss_conf.assoc = 1;
+	bss_conf->assoc = 1;
 	/*
 	 * For now just always ask the driver to update the basic rateset
 	 * when we have associated, we aren't checking whether it actually
@@ -848,9 +849,15 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
 
 	/* Tell the driver to monitor connection quality (if supported) */
 	if ((local->hw.flags & IEEE80211_HW_SUPPORTS_CQM_RSSI) &&
-	    sdata->vif.bss_conf.cqm_rssi_thold)
+	    bss_conf->cqm_rssi_thold)
 		bss_info_changed |= BSS_CHANGED_CQM;
 
+	/* Enable ARP filtering */
+	if (bss_conf->arp_filter_enabled != sdata->arp_filter_state) {
+		bss_conf->arp_filter_enabled = sdata->arp_filter_state;
+		bss_info_changed |= BSS_CHANGED_ARP_FILTER;
+	}
+
 	ieee80211_bss_info_change_notify(sdata, bss_info_changed);
 
 	mutex_lock(&local->iflist_mtx);
@@ -932,6 +939,12 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
 
 	ieee80211_hw_config(local, config_changed);
 
+	/* Disable ARP filtering */
+	if (sdata->vif.bss_conf.arp_filter_enabled) {
+		sdata->vif.bss_conf.arp_filter_enabled = false;
+		changed |= BSS_CHANGED_ARP_FILTER;
+	}
+
 	/* The BSSID (not really interesting) and HT changed */
 	changed |= BSS_CHANGED_BSSID | BSS_CHANGED_HT;
 	ieee80211_bss_info_change_notify(sdata, changed);
@@ -2018,18 +2031,9 @@ static enum work_done_result ieee80211_assoc_done(struct ieee80211_work *wk,
 			cfg80211_send_assoc_timeout(wk->sdata->dev,
 						    wk->filter_ta);
 			return WORK_DONE_DESTROY;
-		} else {
-			mutex_unlock(&wk->sdata->u.mgd.mtx);
-#ifdef CONFIG_INET
-			/*
-			 * configure ARP filter IP addresses to the driver,
-			 * intentionally outside the mgd mutex.
-			 */
-			rtnl_lock();
-			ieee80211_set_arp_filter(wk->sdata);
-			rtnl_unlock();
-#endif
 		}
+
+		mutex_unlock(&wk->sdata->u.mgd.mtx);
 	}
 
 	cfg80211_send_rx_assoc(wk->sdata->dev, skb->data, skb->len);
-- 
cgit v1.2.3-70-g09d2


From 551d55a944b143ef26fbd482d1c463199d6f65cf Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Date: Sat, 17 Apr 2010 08:48:42 -0400
Subject: tree/tiny rcu: Add debug RCU head objects

Helps finding racy users of call_rcu(), which results in hangs because list
entries are overwritten and/or skipped.

Changelog since v4:
- Bissectability is now OK
- Now generate a WARN_ON_ONCE() for non-initialized rcu_head passed to
  call_rcu(). Statically initialized objects are detected with
  object_is_static().
- Rename rcu_head_init_on_stack to init_rcu_head_on_stack.
- Remove init_rcu_head() completely.

Changelog since v3:
- Include comments from Lai Jiangshan

This new patch version is based on the debugobjects with the newly introduced
"active state" tracker.

Non-initialized entries are all considered as "statically initialized". An
activation fixup (triggered by call_rcu()) takes care of performing the debug
object initialization without issuing any warning. Since we cannot increase the
size of struct rcu_head, I don't see much room to put an identifier for
statically initialized rcu_head structures. So for now, we have to live without
"activation without explicit init" detection. But the main purpose of this debug
option is to detect double-activations (double call_rcu() use of a rcu_head
before the callback is executed), which is correctly addressed here.

This also detects potential internal RCU callback corruption, which would cause
the callbacks to be executed twice.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
CC: David S. Miller <davem@davemloft.net>
CC: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
CC: akpm@linux-foundation.org
CC: mingo@elte.hu
CC: laijs@cn.fujitsu.com
CC: dipankar@in.ibm.com
CC: josh@joshtriplett.org
CC: dvhltc@us.ibm.com
CC: niv@us.ibm.com
CC: tglx@linutronix.de
CC: peterz@infradead.org
CC: rostedt@goodmis.org
CC: Valdis.Kletnieks@vt.edu
CC: dhowells@redhat.com
CC: eric.dumazet@gmail.com
CC: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 include/linux/rcupdate.h |  49 +++++++++++++++
 kernel/rcupdate.c        | 160 +++++++++++++++++++++++++++++++++++++++++++++++
 kernel/rcutiny.c         |   2 +
 kernel/rcutree.c         |   2 +
 lib/Kconfig.debug        |   6 ++
 5 files changed, 219 insertions(+)

(limited to 'include')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index b653b4aaa8a..2b7fc506e47 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -40,6 +40,7 @@
 #include <linux/seqlock.h>
 #include <linux/lockdep.h>
 #include <linux/completion.h>
+#include <linux/debugobjects.h>
 
 #ifdef CONFIG_RCU_TORTURE_TEST
 extern int rcutorture_runnable; /* for sysctl */
@@ -79,6 +80,16 @@ extern void rcu_init(void);
        (ptr)->next = NULL; (ptr)->func = NULL; \
 } while (0)
 
+/*
+ * init_rcu_head_on_stack()/destroy_rcu_head_on_stack() are needed for dynamic
+ * initialization and destruction of rcu_head on the stack. rcu_head structures
+ * allocated dynamically in the heap or defined statically don't need any
+ * initialization.
+ */
+#ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD
+extern void init_rcu_head_on_stack(struct rcu_head *head);
+extern void destroy_rcu_head_on_stack(struct rcu_head *head);
+#else /* !CONFIG_DEBUG_OBJECTS_RCU_HEAD */
 static inline void init_rcu_head_on_stack(struct rcu_head *head)
 {
 }
@@ -86,6 +97,7 @@ static inline void init_rcu_head_on_stack(struct rcu_head *head)
 static inline void destroy_rcu_head_on_stack(struct rcu_head *head)
 {
 }
+#endif	/* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 
@@ -517,4 +529,41 @@ extern void call_rcu(struct rcu_head *head,
 extern void call_rcu_bh(struct rcu_head *head,
 			void (*func)(struct rcu_head *head));
 
+/*
+ * debug_rcu_head_queue()/debug_rcu_head_unqueue() are used internally
+ * by call_rcu() and rcu callback execution, and are therefore not part of the
+ * RCU API. Leaving in rcupdate.h because they are used by all RCU flavors.
+ */
+
+#ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD
+# define STATE_RCU_HEAD_READY	0
+# define STATE_RCU_HEAD_QUEUED	1
+
+extern struct debug_obj_descr rcuhead_debug_descr;
+
+static inline void debug_rcu_head_queue(struct rcu_head *head)
+{
+	debug_object_activate(head, &rcuhead_debug_descr);
+	debug_object_active_state(head, &rcuhead_debug_descr,
+				  STATE_RCU_HEAD_READY,
+				  STATE_RCU_HEAD_QUEUED);
+}
+
+static inline void debug_rcu_head_unqueue(struct rcu_head *head)
+{
+	debug_object_active_state(head, &rcuhead_debug_descr,
+				  STATE_RCU_HEAD_QUEUED,
+				  STATE_RCU_HEAD_READY);
+	debug_object_deactivate(head, &rcuhead_debug_descr);
+}
+#else	/* !CONFIG_DEBUG_OBJECTS_RCU_HEAD */
+static inline void debug_rcu_head_queue(struct rcu_head *head)
+{
+}
+
+static inline void debug_rcu_head_unqueue(struct rcu_head *head)
+{
+}
+#endif	/* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */
+
 #endif /* __LINUX_RCUPDATE_H */
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index 72a8dc9567f..4d169835fb3 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -114,3 +114,163 @@ int rcu_my_thread_group_empty(void)
 }
 EXPORT_SYMBOL_GPL(rcu_my_thread_group_empty);
 #endif /* #ifdef CONFIG_PROVE_RCU */
+
+#ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD
+static inline void debug_init_rcu_head(struct rcu_head *head)
+{
+	debug_object_init(head, &rcuhead_debug_descr);
+}
+
+static inline void debug_rcu_head_free(struct rcu_head *head)
+{
+	debug_object_free(head, &rcuhead_debug_descr);
+}
+
+/*
+ * fixup_init is called when:
+ * - an active object is initialized
+ */
+static int rcuhead_fixup_init(void *addr, enum debug_obj_state state)
+{
+	struct rcu_head *head = addr;
+
+	switch (state) {
+	case ODEBUG_STATE_ACTIVE:
+		/*
+		 * Ensure that queued callbacks are all executed.
+		 * If we detect that we are nested in a RCU read-side critical
+		 * section, we should simply fail, otherwise we would deadlock.
+		 */
+		if (rcu_preempt_depth() != 0 || preempt_count() != 0 ||
+		    irqs_disabled()) {
+			WARN_ON(1);
+			return 0;
+		}
+		rcu_barrier();
+		rcu_barrier_sched();
+		rcu_barrier_bh();
+		debug_object_init(head, &rcuhead_debug_descr);
+		return 1;
+	default:
+		return 0;
+	}
+}
+
+/*
+ * fixup_activate is called when:
+ * - an active object is activated
+ * - an unknown object is activated (might be a statically initialized object)
+ * Activation is performed internally by call_rcu().
+ */
+static int rcuhead_fixup_activate(void *addr, enum debug_obj_state state)
+{
+	struct rcu_head *head = addr;
+
+	switch (state) {
+
+	case ODEBUG_STATE_NOTAVAILABLE:
+		/*
+		 * This is not really a fixup. We just make sure that it is
+		 * tracked in the object tracker.
+		 */
+		debug_object_init(head, &rcuhead_debug_descr);
+		debug_object_activate(head, &rcuhead_debug_descr);
+		return 0;
+
+	case ODEBUG_STATE_ACTIVE:
+		/*
+		 * Ensure that queued callbacks are all executed.
+		 * If we detect that we are nested in a RCU read-side critical
+		 * section, we should simply fail, otherwise we would deadlock.
+		 */
+		if (rcu_preempt_depth() != 0 || preempt_count() != 0 ||
+		    irqs_disabled()) {
+			WARN_ON(1);
+			return 0;
+		}
+		rcu_barrier();
+		rcu_barrier_sched();
+		rcu_barrier_bh();
+		debug_object_activate(head, &rcuhead_debug_descr);
+		return 1;
+	default:
+		return 0;
+	}
+}
+
+/*
+ * fixup_free is called when:
+ * - an active object is freed
+ */
+static int rcuhead_fixup_free(void *addr, enum debug_obj_state state)
+{
+	struct rcu_head *head = addr;
+
+	switch (state) {
+	case ODEBUG_STATE_ACTIVE:
+		/*
+		 * Ensure that queued callbacks are all executed.
+		 * If we detect that we are nested in a RCU read-side critical
+		 * section, we should simply fail, otherwise we would deadlock.
+		 */
+#ifndef CONFIG_PREEMPT
+		WARN_ON(1);
+		return 0;
+#else
+		if (rcu_preempt_depth() != 0 || preempt_count() != 0 ||
+		    irqs_disabled()) {
+			WARN_ON(1);
+			return 0;
+		}
+		rcu_barrier();
+		rcu_barrier_sched();
+		rcu_barrier_bh();
+		debug_object_free(head, &rcuhead_debug_descr);
+		return 1;
+#endif
+	default:
+		return 0;
+	}
+}
+
+/**
+ * init_rcu_head_on_stack() - initialize on-stack rcu_head for debugobjects
+ * @head: pointer to rcu_head structure to be initialized
+ *
+ * This function informs debugobjects of a new rcu_head structure that
+ * has been allocated as an auto variable on the stack.  This function
+ * is not required for rcu_head structures that are statically defined or
+ * that are dynamically allocated on the heap.  This function has no
+ * effect for !CONFIG_DEBUG_OBJECTS_RCU_HEAD kernel builds.
+ */
+void init_rcu_head_on_stack(struct rcu_head *head)
+{
+	debug_object_init_on_stack(head, &rcuhead_debug_descr);
+}
+EXPORT_SYMBOL_GPL(init_rcu_head_on_stack);
+
+/**
+ * destroy_rcu_head_on_stack() - destroy on-stack rcu_head for debugobjects
+ * @head: pointer to rcu_head structure to be initialized
+ *
+ * This function informs debugobjects that an on-stack rcu_head structure
+ * is about to go out of scope.  As with init_rcu_head_on_stack(), this
+ * function is not required for rcu_head structures that are statically
+ * defined or that are dynamically allocated on the heap.  Also as with
+ * init_rcu_head_on_stack(), this function has no effect for
+ * !CONFIG_DEBUG_OBJECTS_RCU_HEAD kernel builds.
+ */
+void destroy_rcu_head_on_stack(struct rcu_head *head)
+{
+	debug_object_free(head, &rcuhead_debug_descr);
+}
+EXPORT_SYMBOL_GPL(destroy_rcu_head_on_stack);
+
+struct debug_obj_descr rcuhead_debug_descr = {
+	.name = "rcu_head",
+	.fixup_init = rcuhead_fixup_init,
+	.fixup_activate = rcuhead_fixup_activate,
+	.fixup_free = rcuhead_fixup_free,
+};
+EXPORT_SYMBOL_GPL(rcuhead_debug_descr);
+#endif /* #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD */
diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c
index 38729d3cd23..196ec02f8be 100644
--- a/kernel/rcutiny.c
+++ b/kernel/rcutiny.c
@@ -169,6 +169,7 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp)
 	while (list) {
 		next = list->next;
 		prefetch(next);
+		debug_rcu_head_unqueue(list);
 		list->func(list);
 		list = next;
 	}
@@ -211,6 +212,7 @@ static void __call_rcu(struct rcu_head *head,
 {
 	unsigned long flags;
 
+	debug_rcu_head_queue(head);
 	head->func = func;
 	head->next = NULL;
 
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index d4437345706..d5bc43976c5 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -1112,6 +1112,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
 	while (list) {
 		next = list->next;
 		prefetch(next);
+		debug_rcu_head_unqueue(list);
 		list->func(list);
 		list = next;
 		if (++count >= rdp->blimit)
@@ -1388,6 +1389,7 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
 	unsigned long flags;
 	struct rcu_data *rdp;
 
+	debug_rcu_head_queue(head);
 	head->func = func;
 	head->next = NULL;
 
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index e722e9d6222..142faa2ec66 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -307,6 +307,12 @@ config DEBUG_OBJECTS_WORK
 	  work queue routines to track the life time of work objects and
 	  validate the work operations.
 
+config DEBUG_OBJECTS_RCU_HEAD
+	bool "Debug RCU callbacks objects"
+	depends on DEBUG_OBJECTS && PREEMPT
+	help
+	  Enable this to turn on debugging of RCU list heads (call_rcu() usage).
+
 config DEBUG_OBJECTS_ENABLE_DEFAULT
 	int "debug_objects bootup default value (0-1)"
         range 0 1
-- 
cgit v1.2.3-70-g09d2


From f5155b33277c9678041a27869165619bb34f722f Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Fri, 30 Apr 2010 06:42:01 -0700
Subject: rcu: add an rcu_dereference_index_check()

The sparse RCU-pointer checking relies on type magic that dereferences
the pointer in question.  This does not work if the pointer is in fact
an array index.  This commit therefore supplies a new RCU API that
omits the sparse checking to continue to support rcu_dereference()
on integers.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcupdate.h | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

(limited to 'include')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 2b7fc506e47..9fbc54a2585 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -566,4 +566,37 @@ static inline void debug_rcu_head_unqueue(struct rcu_head *head)
 }
 #endif	/* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */
 
+#ifndef CONFIG_PROVE_RCU
+#define __do_rcu_dereference_check(c) do { } while (0)
+#endif /* #ifdef CONFIG_PROVE_RCU */
+
+#define __rcu_dereference_index_check(p, c) \
+	({ \
+		typeof(p) _________p1 = ACCESS_ONCE(p); \
+		__do_rcu_dereference_check(c); \
+		smp_read_barrier_depends(); \
+		(_________p1); \
+	})
+
+/**
+ * rcu_dereference_index_check() - rcu_dereference for indices with debug checking
+ * @p: The pointer to read, prior to dereferencing
+ * @c: The conditions under which the dereference will take place
+ *
+ * Similar to rcu_dereference_check(), but omits the sparse checking.
+ * This allows rcu_dereference_index_check() to be used on integers,
+ * which can then be used as array indices.  Attempting to use
+ * rcu_dereference_check() on an integer will give compiler warnings
+ * because the sparse address-space mechanism relies on dereferencing
+ * the RCU-protected pointer.  Dereferencing integers is not something
+ * that even gcc will put up with.
+ *
+ * Note that this function does not implicitly check for RCU read-side
+ * critical sections.  If this function gains lots of uses, it might
+ * make sense to provide versions for each flavor of RCU, but it does
+ * not make sense as of early 2010.
+ */
+#define rcu_dereference_index_check(p, c) \
+	__rcu_dereference_index_check((p), (c))
+
 #endif /* __LINUX_RCUPDATE_H */
-- 
cgit v1.2.3-70-g09d2


From 71d1d5c722db9ae3b3f9c08ef7ddcd7759fbb1e0 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 11 May 2010 16:13:14 -0700
Subject: rcu: add __rcu API for later sparse checking

This commit defines an __rcu API, but provides only vacuous definitions
for it.  This breaks dependencies among most of the subsequent patches,
allowing them to reach mainline asynchronously via whatever trees are
appropriate.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Christopher Li <sparse@chrisli.org>
Cc: Josh Triplett <josh@joshtriplett.org>
---
 include/linux/compiler.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index a5a472b1074..c1a62c56a66 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -16,6 +16,7 @@
 # define __release(x)	__context__(x,-1)
 # define __cond_lock(x,c)	((c) ? ({ __acquire(x); 1; }) : 0)
 # define __percpu	__attribute__((noderef, address_space(3)))
+# define __rcu
 extern void __chk_user_ptr(const volatile void __user *);
 extern void __chk_io_ptr(const volatile void __iomem *);
 #else
@@ -34,6 +35,7 @@ extern void __chk_io_ptr(const volatile void __iomem *);
 # define __release(x) (void)0
 # define __cond_lock(x,c) (c)
 # define __percpu
+# define __rcu
 #endif
 
 #ifdef __KERNEL__
-- 
cgit v1.2.3-70-g09d2


From a25909a4d4a29e272f953e12595bf2f04a292dbd Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Thu, 13 May 2010 12:32:28 -0700
Subject: lockdep: Add an in_workqueue_context() lockdep-based test function

Some recent uses of RCU make use of workqueues.  In these uses, execution
within the context of a specific workqueue takes the place of the usual
RCU read-side primitives such as rcu_read_lock(), and flushing of workqueues
takes the place of the usual RCU grace-period primitives.  Checking for
correct use of rcu_dereference() in such cases requires a test of whether
the code is executing in the context of a particular workqueue.  This
commit adds an in_workqueue_context() function that provides this test.
This new function is only defined when lockdep is enabled, which allows
it to be used as the second argument of rcu_dereference_check().

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/workqueue.h |  4 ++++
 kernel/workqueue.c        | 15 +++++++++++++++
 2 files changed, 19 insertions(+)

(limited to 'include')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 9466e860d8c..d0f7c817849 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -297,4 +297,8 @@ static inline long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg)
 #else
 long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg);
 #endif /* CONFIG_SMP */
+
+#ifdef CONFIG_LOCKDEP
+int in_workqueue_context(struct workqueue_struct *wq);
+#endif
 #endif
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 327d2deb445..59fef1531dd 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -68,6 +68,21 @@ struct workqueue_struct {
 #endif
 };
 
+#ifdef CONFIG_LOCKDEP
+/**
+ * in_workqueue_context() - in context of specified workqueue?
+ * @wq: the workqueue of interest
+ *
+ * Checks lockdep state to see if the current task is executing from
+ * within a workqueue item.  This function exists only if lockdep is
+ * enabled.
+ */
+int in_workqueue_context(struct workqueue_struct *wq)
+{
+	return lock_is_held(&wq->lockdep_map);
+}
+#endif
+
 #ifdef CONFIG_DEBUG_OBJECTS_WORK
 
 static struct debug_obj_descr work_debug_descr;
-- 
cgit v1.2.3-70-g09d2


From 2c666df80764389886110c942a7916ba9622583d Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Thu, 29 Apr 2010 20:48:47 -0700
Subject: vfs: add fs.h to define struct file

The sparse RCU-pointer annotations require definition of the
underlying type of any pointer passed to rcu_dereference() and friends.
So fcheck_files() needs "struct file" to be defined, so include fs.h.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Al Viro <viro@ZenIV.linux.org.uk>
---
 include/linux/fdtable.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h
index 013dc529e95..551671e8792 100644
--- a/include/linux/fdtable.h
+++ b/include/linux/fdtable.h
@@ -11,6 +11,7 @@
 #include <linux/rcupdate.h>
 #include <linux/types.h>
 #include <linux/init.h>
+#include <linux/fs.h>
 
 #include <asm/atomic.h>
 
-- 
cgit v1.2.3-70-g09d2


From 81bdf5bd7349bd4523538cbd7878f334bc2bfe14 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Sun, 2 May 2010 18:10:06 -0700
Subject: net: Make accesses to ->br_port safe for sparse RCU

The new versions of the rcu_dereference() APIs requires that any pointers
passed to one of these APIs be fully defined.  The ->br_port field
in struct net_device points to a struct net_bridge_port, which is an
incomplete type.  This commit therefore changes ->br_port to be a void*,
and introduces a br_port() helper function to convert the type to struct
net_bridge_port, and applies this new helper function where required.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: David Miller <davem@davemloft.net>
Cc: Stephen Hemminger <shemminger@linux-foundation.org>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
---
 include/linux/if_bridge.h           | 3 +++
 net/bridge/br_fdb.c                 | 2 +-
 net/bridge/br_private.h             | 5 +++++
 net/bridge/netfilter/ebt_redirect.c | 2 +-
 net/bridge/netfilter/ebt_ulog.c     | 4 ++--
 net/bridge/netfilter/ebtables.c     | 4 ++--
 net/netfilter/nfnetlink_log.c       | 4 ++--
 net/netfilter/nfnetlink_queue.c     | 4 ++--
 8 files changed, 18 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
index 938b7e81df9..d001d782922 100644
--- a/include/linux/if_bridge.h
+++ b/include/linux/if_bridge.h
@@ -101,6 +101,9 @@ struct __fdb_entry {
 
 #include <linux/netdevice.h>
 
+/* br_handle_frame_hook() needs the following forward declaration. */
+struct net_bridge_port;
+
 extern void brioctl_set(int (*ioctl_hook)(struct net *, unsigned int, void __user *));
 extern struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
 					       struct sk_buff *skb);
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 26637439965..845710bca49 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -246,7 +246,7 @@ int br_fdb_test_addr(struct net_device *dev, unsigned char *addr)
 		return 0;
 
 	rcu_read_lock();
-	fdb = __br_fdb_get(dev->br_port->br, addr);
+	fdb = __br_fdb_get(br_port(dev)->br, addr);
 	ret = fdb && fdb->dst->dev != dev &&
 		fdb->dst->state == BR_STATE_FORWARDING;
 	rcu_read_unlock();
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 0f4a74bc6a9..3255188355b 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -268,6 +268,11 @@ static inline int br_is_root_bridge(const struct net_bridge *br)
 	return !memcmp(&br->bridge_id, &br->designated_root, 8);
 }
 
+static inline struct net_bridge_port *br_port(const struct net_device *dev)
+{
+	return rcu_dereference(dev->br_port);
+}
+
 /* br_device.c */
 extern void br_dev_setup(struct net_device *dev);
 extern netdev_tx_t br_dev_xmit(struct sk_buff *skb,
diff --git a/net/bridge/netfilter/ebt_redirect.c b/net/bridge/netfilter/ebt_redirect.c
index 9e19166ba45..a39df0ae0f8 100644
--- a/net/bridge/netfilter/ebt_redirect.c
+++ b/net/bridge/netfilter/ebt_redirect.c
@@ -25,7 +25,7 @@ ebt_redirect_tg(struct sk_buff *skb, const struct xt_action_param *par)
 
 	if (par->hooknum != NF_BR_BROUTING)
 		memcpy(eth_hdr(skb)->h_dest,
-		       par->in->br_port->br->dev->dev_addr, ETH_ALEN);
+		       br_port(par->in)->br->dev->dev_addr, ETH_ALEN);
 	else
 		memcpy(eth_hdr(skb)->h_dest, par->in->dev_addr, ETH_ALEN);
 	skb->pkt_type = PACKET_HOST;
diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c
index ae3c7cef148..5a4996bbb09 100644
--- a/net/bridge/netfilter/ebt_ulog.c
+++ b/net/bridge/netfilter/ebt_ulog.c
@@ -178,7 +178,7 @@ static void ebt_ulog_packet(unsigned int hooknr, const struct sk_buff *skb,
 		strcpy(pm->physindev, in->name);
 		/* If in isn't a bridge, then physindev==indev */
 		if (in->br_port)
-			strcpy(pm->indev, in->br_port->br->dev->name);
+			strcpy(pm->indev, br_port(in)->br->dev->name);
 		else
 			strcpy(pm->indev, in->name);
 	} else
@@ -187,7 +187,7 @@ static void ebt_ulog_packet(unsigned int hooknr, const struct sk_buff *skb,
 	if (out) {
 		/* If out exists, then out is a bridge port */
 		strcpy(pm->physoutdev, out->name);
-		strcpy(pm->outdev, out->br_port->br->dev->name);
+		strcpy(pm->outdev, br_port(out)->br->dev->name);
 	} else
 		pm->outdev[0] = pm->physoutdev[0] = '\0';
 
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 59ca00e40de..4c2aab8cbfc 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -141,10 +141,10 @@ ebt_basic_match(const struct ebt_entry *e, const struct ethhdr *h,
 	if (FWINV2(ebt_dev_check(e->out, out), EBT_IOUT))
 		return 1;
 	if ((!in || !in->br_port) ? 0 : FWINV2(ebt_dev_check(
-	   e->logical_in, in->br_port->br->dev), EBT_ILOGICALIN))
+	   e->logical_in, br_port(in)->br->dev), EBT_ILOGICALIN))
 		return 1;
 	if ((!out || !out->br_port) ? 0 : FWINV2(ebt_dev_check(
-	   e->logical_out, out->br_port->br->dev), EBT_ILOGICALOUT))
+	   e->logical_out, br_port(out)->br->dev), EBT_ILOGICALOUT))
 		return 1;
 
 	if (e->bitmask & EBT_SOURCEMAC) {
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index fc9a211e629..78957cfa3bd 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -404,7 +404,7 @@ __build_packet_message(struct nfulnl_instance *inst,
 				     htonl(indev->ifindex));
 			/* this is the bridge group "brX" */
 			NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_INDEV,
-				     htonl(indev->br_port->br->dev->ifindex));
+				     htonl(br_port(indev)->br->dev->ifindex));
 		} else {
 			/* Case 2: indev is bridge group, we need to look for
 			 * physical device (when called from ipv4) */
@@ -431,7 +431,7 @@ __build_packet_message(struct nfulnl_instance *inst,
 				     htonl(outdev->ifindex));
 			/* this is the bridge group "brX" */
 			NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_OUTDEV,
-				     htonl(outdev->br_port->br->dev->ifindex));
+				     htonl(br_port(outdev)->br->dev->ifindex));
 		} else {
 			/* Case 2: indev is a bridge group, we need to look
 			 * for physical device (when called from ipv4) */
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 12e1ab37fcd..c3c17498298 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -297,7 +297,7 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
 				     htonl(indev->ifindex));
 			/* this is the bridge group "brX" */
 			NLA_PUT_BE32(skb, NFQA_IFINDEX_INDEV,
-				     htonl(indev->br_port->br->dev->ifindex));
+				     htonl(br_port(indev)->br->dev->ifindex));
 		} else {
 			/* Case 2: indev is bridge group, we need to look for
 			 * physical device (when called from ipv4) */
@@ -322,7 +322,7 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
 				     htonl(outdev->ifindex));
 			/* this is the bridge group "brX" */
 			NLA_PUT_BE32(skb, NFQA_IFINDEX_OUTDEV,
-				     htonl(outdev->br_port->br->dev->ifindex));
+				     htonl(br_port(outdev)->br->dev->ifindex));
 		} else {
 			/* Case 2: outdev is bridge group, we need to look for
 			 * physical output device (when called from ipv4) */
-- 
cgit v1.2.3-70-g09d2


From 1be3b5fe9dffe3300f995584d8f996dd20e29412 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Mon, 14 Jun 2010 08:53:26 +0000
Subject: ethtool: Revert incorrect indentation changes

commit 97f8aefbbfb5aa5c9944e5fa8149f1fdaf71c7b6 "net: fix ethtool
coding style errors and warnings" changed the indentation of several
macro definitions in ethtool.h.  These definitions line up in the diff
where there is an extra character at the start of each line, but not
in the resulting file.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool.h | 40 ++++++++++++++++++++--------------------
 1 file changed, 20 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 276b40a1683..2c8af093d8b 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -586,29 +586,29 @@ struct ethtool_ops {
 #define ETHTOOL_GREGS		0x00000004 /* Get NIC registers. */
 #define ETHTOOL_GWOL		0x00000005 /* Get wake-on-lan options. */
 #define ETHTOOL_SWOL		0x00000006 /* Set wake-on-lan options. */
-#define ETHTOOL_GMSGLVL	0x00000007 /* Get driver message level */
-#define ETHTOOL_SMSGLVL	0x00000008 /* Set driver msg level. */
+#define ETHTOOL_GMSGLVL		0x00000007 /* Get driver message level */
+#define ETHTOOL_SMSGLVL		0x00000008 /* Set driver msg level. */
 #define ETHTOOL_NWAY_RST	0x00000009 /* Restart autonegotiation. */
 #define ETHTOOL_GLINK		0x0000000a /* Get link status (ethtool_value) */
-#define ETHTOOL_GEEPROM	0x0000000b /* Get EEPROM data */
-#define ETHTOOL_SEEPROM	0x0000000c /* Set EEPROM data. */
+#define ETHTOOL_GEEPROM		0x0000000b /* Get EEPROM data */
+#define ETHTOOL_SEEPROM		0x0000000c /* Set EEPROM data. */
 #define ETHTOOL_GCOALESCE	0x0000000e /* Get coalesce config */
 #define ETHTOOL_SCOALESCE	0x0000000f /* Set coalesce config. */
 #define ETHTOOL_GRINGPARAM	0x00000010 /* Get ring parameters */
 #define ETHTOOL_SRINGPARAM	0x00000011 /* Set ring parameters. */
 #define ETHTOOL_GPAUSEPARAM	0x00000012 /* Get pause parameters */
 #define ETHTOOL_SPAUSEPARAM	0x00000013 /* Set pause parameters. */
-#define ETHTOOL_GRXCSUM	0x00000014 /* Get RX hw csum enable (ethtool_value) */
-#define ETHTOOL_SRXCSUM	0x00000015 /* Set RX hw csum enable (ethtool_value) */
-#define ETHTOOL_GTXCSUM	0x00000016 /* Get TX hw csum enable (ethtool_value) */
-#define ETHTOOL_STXCSUM	0x00000017 /* Set TX hw csum enable (ethtool_value) */
+#define ETHTOOL_GRXCSUM		0x00000014 /* Get RX hw csum enable (ethtool_value) */
+#define ETHTOOL_SRXCSUM		0x00000015 /* Set RX hw csum enable (ethtool_value) */
+#define ETHTOOL_GTXCSUM		0x00000016 /* Get TX hw csum enable (ethtool_value) */
+#define ETHTOOL_STXCSUM		0x00000017 /* Set TX hw csum enable (ethtool_value) */
 #define ETHTOOL_GSG		0x00000018 /* Get scatter-gather enable
 					    * (ethtool_value) */
 #define ETHTOOL_SSG		0x00000019 /* Set scatter-gather enable
 					    * (ethtool_value). */
 #define ETHTOOL_TEST		0x0000001a /* execute NIC self-test. */
 #define ETHTOOL_GSTRINGS	0x0000001b /* get specified string set */
-#define ETHTOOL_PHYS_ID	0x0000001c /* identify the NIC */
+#define ETHTOOL_PHYS_ID		0x0000001c /* identify the NIC */
 #define ETHTOOL_GSTATS		0x0000001d /* get NIC-specific statistics */
 #define ETHTOOL_GTSO		0x0000001e /* Get TSO enable (ethtool_value) */
 #define ETHTOOL_STSO		0x0000001f /* Set TSO enable (ethtool_value) */
@@ -619,8 +619,8 @@ struct ethtool_ops {
 #define ETHTOOL_SGSO		0x00000024 /* Set GSO enable (ethtool_value) */
 #define ETHTOOL_GFLAGS		0x00000025 /* Get flags bitmap(ethtool_value) */
 #define ETHTOOL_SFLAGS		0x00000026 /* Set flags bitmap(ethtool_value) */
-#define ETHTOOL_GPFLAGS	0x00000027 /* Get driver-private flags bitmap */
-#define ETHTOOL_SPFLAGS	0x00000028 /* Set driver-private flags bitmap */
+#define ETHTOOL_GPFLAGS		0x00000027 /* Get driver-private flags bitmap */
+#define ETHTOOL_SPFLAGS		0x00000028 /* Set driver-private flags bitmap */
 
 #define ETHTOOL_GRXFH		0x00000029 /* Get RX flow hash configuration */
 #define ETHTOOL_SRXFH		0x0000002a /* Set RX flow hash configuration */
@@ -645,18 +645,18 @@ struct ethtool_ops {
 /* Indicates what features are supported by the interface. */
 #define SUPPORTED_10baseT_Half		(1 << 0)
 #define SUPPORTED_10baseT_Full		(1 << 1)
-#define SUPPORTED_100baseT_Half	(1 << 2)
-#define SUPPORTED_100baseT_Full	(1 << 3)
+#define SUPPORTED_100baseT_Half		(1 << 2)
+#define SUPPORTED_100baseT_Full		(1 << 3)
 #define SUPPORTED_1000baseT_Half	(1 << 4)
 #define SUPPORTED_1000baseT_Full	(1 << 5)
 #define SUPPORTED_Autoneg		(1 << 6)
 #define SUPPORTED_TP			(1 << 7)
 #define SUPPORTED_AUI			(1 << 8)
 #define SUPPORTED_MII			(1 << 9)
-#define SUPPORTED_FIBRE		(1 << 10)
+#define SUPPORTED_FIBRE			(1 << 10)
 #define SUPPORTED_BNC			(1 << 11)
 #define SUPPORTED_10000baseT_Full	(1 << 12)
-#define SUPPORTED_Pause		(1 << 13)
+#define SUPPORTED_Pause			(1 << 13)
 #define SUPPORTED_Asym_Pause		(1 << 14)
 #define SUPPORTED_2500baseX_Full	(1 << 15)
 #define SUPPORTED_Backplane		(1 << 16)
@@ -666,8 +666,8 @@ struct ethtool_ops {
 #define SUPPORTED_10000baseR_FEC	(1 << 20)
 
 /* Indicates what features are advertised by the interface. */
-#define ADVERTISED_10baseT_Half	(1 << 0)
-#define ADVERTISED_10baseT_Full	(1 << 1)
+#define ADVERTISED_10baseT_Half		(1 << 0)
+#define ADVERTISED_10baseT_Full		(1 << 1)
 #define ADVERTISED_100baseT_Half	(1 << 2)
 #define ADVERTISED_100baseT_Full	(1 << 3)
 #define ADVERTISED_1000baseT_Half	(1 << 4)
@@ -706,12 +706,12 @@ struct ethtool_ops {
 #define DUPLEX_FULL		0x01
 
 /* Which connector port. */
-#define PORT_TP		0x00
+#define PORT_TP			0x00
 #define PORT_AUI		0x01
 #define PORT_MII		0x02
 #define PORT_FIBRE		0x03
 #define PORT_BNC		0x04
-#define PORT_DA		0x05
+#define PORT_DA			0x05
 #define PORT_NONE		0xef
 #define PORT_OTHER		0xff
 
@@ -725,7 +725,7 @@ struct ethtool_ops {
 /* Enable or disable autonegotiation.  If this is set to enable,
  * the forced link modes above are completely ignored.
  */
-#define AUTONEG_DISABLE	0x00
+#define AUTONEG_DISABLE		0x00
 #define AUTONEG_ENABLE		0x01
 
 /* Mode MDI or MDI-X */
-- 
cgit v1.2.3-70-g09d2


From 0902b469bd25065aa0688c3cee6f11744c817e7c Mon Sep 17 00:00:00 2001
From: Luciano Coelho <luciano.coelho@nokia.com>
Date: Tue, 15 Jun 2010 15:04:00 +0200
Subject: netfilter: xtables: idletimer target implementation

This patch implements an idletimer Xtables target that can be used to
identify when interfaces have been idle for a certain period of time.

Timers are identified by labels and are created when a rule is set with a new
label.  The rules also take a timeout value (in seconds) as an option.  If
more than one rule uses the same timer label, the timer will be restarted
whenever any of the rules get a hit.

One entry for each timer is created in sysfs.  This attribute contains the
timer remaining for the timer to expire.  The attributes are located under
the xt_idletimer class:

/sys/class/xt_idletimer/timers/<label>

When the timer expires, the target module sends a sysfs notification to the
userspace, which can then decide what to do (eg. disconnect to save power).

Cc: Timo Teras <timo.teras@iki.fi>
Signed-off-by: Luciano Coelho <luciano.coelho@nokia.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter/Kbuild         |   1 +
 include/linux/netfilter/xt_IDLETIMER.h |  45 +++++
 net/netfilter/Kconfig                  |  12 ++
 net/netfilter/Makefile                 |   1 +
 net/netfilter/xt_IDLETIMER.c           | 314 +++++++++++++++++++++++++++++++++
 5 files changed, 373 insertions(+)
 create mode 100644 include/linux/netfilter/xt_IDLETIMER.h
 create mode 100644 net/netfilter/xt_IDLETIMER.c

(limited to 'include')

diff --git a/include/linux/netfilter/Kbuild b/include/linux/netfilter/Kbuild
index 48767cd1645..bb103f43afa 100644
--- a/include/linux/netfilter/Kbuild
+++ b/include/linux/netfilter/Kbuild
@@ -8,6 +8,7 @@ header-y += xt_CONNMARK.h
 header-y += xt_CONNSECMARK.h
 header-y += xt_CT.h
 header-y += xt_DSCP.h
+header-y += xt_IDLETIMER.h
 header-y += xt_LED.h
 header-y += xt_MARK.h
 header-y += xt_NFLOG.h
diff --git a/include/linux/netfilter/xt_IDLETIMER.h b/include/linux/netfilter/xt_IDLETIMER.h
new file mode 100644
index 00000000000..3e1aa1be942
--- /dev/null
+++ b/include/linux/netfilter/xt_IDLETIMER.h
@@ -0,0 +1,45 @@
+/*
+ * linux/include/linux/netfilter/xt_IDLETIMER.h
+ *
+ * Header file for Xtables timer target module.
+ *
+ * Copyright (C) 2004, 2010 Nokia Corporation
+ * Written by Timo Teras <ext-timo.teras@nokia.com>
+ *
+ * Converted to x_tables and forward-ported to 2.6.34
+ * by Luciano Coelho <luciano.coelho@nokia.com>
+ *
+ * Contact: Luciano Coelho <luciano.coelho@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ */
+
+#ifndef _XT_IDLETIMER_H
+#define _XT_IDLETIMER_H
+
+#include <linux/types.h>
+
+#define MAX_IDLETIMER_LABEL_SIZE 28
+
+struct idletimer_tg_info {
+	__u32 timeout;
+
+	char label[MAX_IDLETIMER_LABEL_SIZE];
+
+	/* for kernel module internal use only */
+	struct idletimer_tg *timer __attribute((aligned(8)));
+};
+
+#endif
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 8593a77cfea..413ed24a968 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -424,6 +424,18 @@ config NETFILTER_XT_TARGET_HL
 	since you can easily create immortal packets that loop
 	forever on the network.
 
+config NETFILTER_XT_TARGET_IDLETIMER
+	tristate  "IDLETIMER target support"
+	depends on NETFILTER_ADVANCED
+	help
+
+	  This option adds the `IDLETIMER' target.  Each matching packet
+	  resets the timer associated with label specified when the rule is
+	  added.  When the timer expires, it triggers a sysfs notification.
+	  The remaining time for expiration can be read via sysfs.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
 config NETFILTER_XT_TARGET_LED
 	tristate '"LED" target support'
 	depends on LEDS_CLASS && LEDS_TRIGGERS
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 14e3a8fd818..e28420aac5e 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -61,6 +61,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_TCPMSS) += xt_TCPMSS.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP) += xt_TCPOPTSTRIP.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_TEE) += xt_TEE.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_TRACE) += xt_TRACE.o
+obj-$(CONFIG_NETFILTER_XT_TARGET_IDLETIMER) += xt_IDLETIMER.o
 
 # matches
 obj-$(CONFIG_NETFILTER_XT_MATCH_CLUSTER) += xt_cluster.o
diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c
new file mode 100644
index 00000000000..e11090a0675
--- /dev/null
+++ b/net/netfilter/xt_IDLETIMER.c
@@ -0,0 +1,314 @@
+/*
+ * linux/net/netfilter/xt_IDLETIMER.c
+ *
+ * Netfilter module to trigger a timer when packet matches.
+ * After timer expires a kevent will be sent.
+ *
+ * Copyright (C) 2004, 2010 Nokia Corporation
+ * Written by Timo Teras <ext-timo.teras@nokia.com>
+ *
+ * Converted to x_tables and reworked for upstream inclusion
+ * by Luciano Coelho <luciano.coelho@nokia.com>
+ *
+ * Contact: Luciano Coelho <luciano.coelho@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/timer.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_IDLETIMER.h>
+#include <linux/kobject.h>
+#include <linux/workqueue.h>
+#include <linux/sysfs.h>
+
+struct idletimer_tg_attr {
+	struct attribute attr;
+	ssize_t	(*show)(struct kobject *kobj,
+			struct attribute *attr, char *buf);
+};
+
+struct idletimer_tg {
+	struct list_head entry;
+	struct timer_list timer;
+	struct work_struct work;
+
+	struct kobject *kobj;
+	struct idletimer_tg_attr attr;
+
+	unsigned int refcnt;
+};
+
+static LIST_HEAD(idletimer_tg_list);
+static DEFINE_MUTEX(list_mutex);
+
+static struct kobject *idletimer_tg_kobj;
+
+static
+struct idletimer_tg *__idletimer_tg_find_by_label(const char *label)
+{
+	struct idletimer_tg *entry;
+
+	BUG_ON(!label);
+
+	list_for_each_entry(entry, &idletimer_tg_list, entry) {
+		if (!strcmp(label, entry->attr.attr.name))
+			return entry;
+	}
+
+	return NULL;
+}
+
+static ssize_t idletimer_tg_show(struct kobject *kobj, struct attribute *attr,
+				 char *buf)
+{
+	struct idletimer_tg *timer;
+	unsigned long expires = 0;
+
+	mutex_lock(&list_mutex);
+
+	timer =	__idletimer_tg_find_by_label(attr->name);
+	if (timer)
+		expires = timer->timer.expires;
+
+	mutex_unlock(&list_mutex);
+
+	if (time_after(expires, jiffies))
+		return sprintf(buf, "%u\n",
+			       jiffies_to_msecs(expires - jiffies) / 1000);
+
+	return sprintf(buf, "0\n");
+}
+
+static void idletimer_tg_work(struct work_struct *work)
+{
+	struct idletimer_tg *timer = container_of(work, struct idletimer_tg,
+						  work);
+
+	sysfs_notify(idletimer_tg_kobj, NULL, timer->attr.attr.name);
+}
+
+static void idletimer_tg_expired(unsigned long data)
+{
+	struct idletimer_tg *timer = (struct idletimer_tg *) data;
+
+	pr_debug("timer %s expired\n", timer->attr.attr.name);
+
+	schedule_work(&timer->work);
+}
+
+static int idletimer_tg_create(struct idletimer_tg_info *info)
+{
+	int ret;
+
+	info->timer = kmalloc(sizeof(*info->timer), GFP_KERNEL);
+	if (!info->timer) {
+		pr_debug("couldn't alloc timer\n");
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	info->timer->attr.attr.name = kstrdup(info->label, GFP_KERNEL);
+	if (!info->timer->attr.attr.name) {
+		pr_debug("couldn't alloc attribute name\n");
+		ret = -ENOMEM;
+		goto out_free_timer;
+	}
+	info->timer->attr.attr.mode = S_IRUGO;
+	info->timer->attr.show = idletimer_tg_show;
+
+	ret = sysfs_create_file(idletimer_tg_kobj, &info->timer->attr.attr);
+	if (ret < 0) {
+		pr_debug("couldn't add file to sysfs");
+		goto out_free_attr;
+	}
+
+	list_add(&info->timer->entry, &idletimer_tg_list);
+
+	setup_timer(&info->timer->timer, idletimer_tg_expired,
+		    (unsigned long) info->timer);
+	info->timer->refcnt = 1;
+
+	mod_timer(&info->timer->timer,
+		  msecs_to_jiffies(info->timeout * 1000) + jiffies);
+
+	INIT_WORK(&info->timer->work, idletimer_tg_work);
+
+	return 0;
+
+out_free_attr:
+	kfree(info->timer->attr.attr.name);
+out_free_timer:
+	kfree(info->timer);
+out:
+	return ret;
+}
+
+/*
+ * The actual xt_tables plugin.
+ */
+static unsigned int idletimer_tg_target(struct sk_buff *skb,
+					 const struct xt_action_param *par)
+{
+	const struct idletimer_tg_info *info = par->targinfo;
+
+	pr_debug("resetting timer %s, timeout period %u\n",
+		 info->label, info->timeout);
+
+	BUG_ON(!info->timer);
+
+	mod_timer(&info->timer->timer,
+		  msecs_to_jiffies(info->timeout * 1000) + jiffies);
+
+	return XT_CONTINUE;
+}
+
+static int idletimer_tg_checkentry(const struct xt_tgchk_param *par)
+{
+	struct idletimer_tg_info *info = par->targinfo;
+	int ret;
+
+	pr_debug("checkentry targinfo%s\n", info->label);
+
+	if (info->timeout == 0) {
+		pr_debug("timeout value is zero\n");
+		return -EINVAL;
+	}
+
+	if (info->label[0] == '\0' ||
+	    strnlen(info->label,
+		    MAX_IDLETIMER_LABEL_SIZE) == MAX_IDLETIMER_LABEL_SIZE) {
+		pr_debug("label is empty or not nul-terminated\n");
+		return -EINVAL;
+	}
+
+	mutex_lock(&list_mutex);
+
+	info->timer = __idletimer_tg_find_by_label(info->label);
+	if (info->timer) {
+		info->timer->refcnt++;
+		mod_timer(&info->timer->timer,
+			  msecs_to_jiffies(info->timeout * 1000) + jiffies);
+
+		pr_debug("increased refcnt of timer %s to %u\n",
+			 info->label, info->timer->refcnt);
+	} else {
+		ret = idletimer_tg_create(info);
+		if (ret < 0) {
+			pr_debug("failed to create timer\n");
+			mutex_unlock(&list_mutex);
+			return ret;
+		}
+	}
+
+	mutex_unlock(&list_mutex);
+	return 0;
+}
+
+static void idletimer_tg_destroy(const struct xt_tgdtor_param *par)
+{
+	const struct idletimer_tg_info *info = par->targinfo;
+
+	pr_debug("destroy targinfo %s\n", info->label);
+
+	mutex_lock(&list_mutex);
+
+	if (--info->timer->refcnt == 0) {
+		pr_debug("deleting timer %s\n", info->label);
+
+		list_del(&info->timer->entry);
+		del_timer_sync(&info->timer->timer);
+		sysfs_remove_file(idletimer_tg_kobj, &info->timer->attr.attr);
+		kfree(info->timer->attr.attr.name);
+		kfree(info->timer);
+	} else {
+		pr_debug("decreased refcnt of timer %s to %u\n",
+			 info->label, info->timer->refcnt);
+	}
+
+	mutex_unlock(&list_mutex);
+}
+
+static struct xt_target idletimer_tg __read_mostly = {
+	.name		= "IDLETIMER",
+	.family		= NFPROTO_UNSPEC,
+	.target		= idletimer_tg_target,
+	.targetsize     = sizeof(struct idletimer_tg_info),
+	.checkentry	= idletimer_tg_checkentry,
+	.destroy        = idletimer_tg_destroy,
+	.me		= THIS_MODULE,
+};
+
+static struct class *idletimer_tg_class;
+
+static struct device *idletimer_tg_device;
+
+static int __init idletimer_tg_init(void)
+{
+	int err;
+
+	idletimer_tg_class = class_create(THIS_MODULE, "xt_idletimer");
+	err = PTR_ERR(idletimer_tg_class);
+	if (IS_ERR(idletimer_tg_class)) {
+		pr_debug("couldn't register device class\n");
+		goto out;
+	}
+
+	idletimer_tg_device = device_create(idletimer_tg_class, NULL,
+					    MKDEV(0, 0), NULL, "timers");
+	err = PTR_ERR(idletimer_tg_device);
+	if (IS_ERR(idletimer_tg_device)) {
+		pr_debug("couldn't register system device\n");
+		goto out_class;
+	}
+
+	idletimer_tg_kobj = &idletimer_tg_device->kobj;
+
+	err =  xt_register_target(&idletimer_tg);
+	if (err < 0) {
+		pr_debug("couldn't register xt target\n");
+		goto out_dev;
+	}
+
+	return 0;
+out_dev:
+	device_destroy(idletimer_tg_class, MKDEV(0, 0));
+out_class:
+	class_destroy(idletimer_tg_class);
+out:
+	return err;
+}
+
+static void __exit idletimer_tg_exit(void)
+{
+	xt_unregister_target(&idletimer_tg);
+
+	device_destroy(idletimer_tg_class, MKDEV(0, 0));
+	class_destroy(idletimer_tg_class);
+}
+
+module_init(idletimer_tg_init);
+module_exit(idletimer_tg_exit);
+
+MODULE_AUTHOR("Timo Teras <ext-timo.teras@nokia.com>");
+MODULE_AUTHOR("Luciano Coelho <luciano.coelho@nokia.com>");
+MODULE_DESCRIPTION("Xtables: idle time monitor");
+MODULE_LICENSE("GPL v2");
-- 
cgit v1.2.3-70-g09d2


From de85d99eb7b595f6751550184b94c1e2f74a828b Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 10 Jun 2010 16:12:44 +0000
Subject: netpoll: Fix RCU usage

The use of RCU in netpoll is incorrect in a number of places:

1) The initial setting is lacking a write barrier.
2) The synchronize_rcu is in the wrong place.
3) Read barriers are missing.
4) Some places are even missing rcu_read_lock.
5) npinfo is zeroed after freeing.

This patch fixes those issues.  As most users are in BH context,
this also converts the RCU usage to the BH variant.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netpoll.h | 13 ++++++++-----
 net/core/netpoll.c      | 20 ++++++++++++--------
 2 files changed, 20 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h
index e9e23121586..95c9f7e1677 100644
--- a/include/linux/netpoll.h
+++ b/include/linux/netpoll.h
@@ -57,12 +57,15 @@ void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb);
 #ifdef CONFIG_NETPOLL
 static inline bool netpoll_rx(struct sk_buff *skb)
 {
-	struct netpoll_info *npinfo = skb->dev->npinfo;
+	struct netpoll_info *npinfo;
 	unsigned long flags;
 	bool ret = false;
 
+	rcu_read_lock_bh();
+	npinfo = rcu_dereference(skb->dev->npinfo);
+
 	if (!npinfo || (list_empty(&npinfo->rx_np) && !npinfo->rx_flags))
-		return false;
+		goto out;
 
 	spin_lock_irqsave(&npinfo->rx_lock, flags);
 	/* check rx_flags again with the lock held */
@@ -70,12 +73,14 @@ static inline bool netpoll_rx(struct sk_buff *skb)
 		ret = true;
 	spin_unlock_irqrestore(&npinfo->rx_lock, flags);
 
+out:
+	rcu_read_unlock_bh();
 	return ret;
 }
 
 static inline int netpoll_rx_on(struct sk_buff *skb)
 {
-	struct netpoll_info *npinfo = skb->dev->npinfo;
+	struct netpoll_info *npinfo = rcu_dereference(skb->dev->npinfo);
 
 	return npinfo && (!list_empty(&npinfo->rx_np) || npinfo->rx_flags);
 }
@@ -91,7 +96,6 @@ static inline void *netpoll_poll_lock(struct napi_struct *napi)
 {
 	struct net_device *dev = napi->dev;
 
-	rcu_read_lock(); /* deal with race on ->npinfo */
 	if (dev && dev->npinfo) {
 		spin_lock(&napi->poll_lock);
 		napi->poll_owner = smp_processor_id();
@@ -108,7 +112,6 @@ static inline void netpoll_poll_unlock(void *have)
 		napi->poll_owner = -1;
 		spin_unlock(&napi->poll_lock);
 	}
-	rcu_read_unlock();
 }
 
 #else
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 19ff66079f7..e9ab4f0c454 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -261,6 +261,7 @@ void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
 	unsigned long tries;
 	struct net_device *dev = np->dev;
 	const struct net_device_ops *ops = dev->netdev_ops;
+	/* It is up to the caller to keep npinfo alive. */
 	struct netpoll_info *npinfo = np->dev->npinfo;
 
 	if (!npinfo || !netif_running(dev) || !netif_device_present(dev)) {
@@ -810,10 +811,7 @@ int netpoll_setup(struct netpoll *np)
 	refill_skbs();
 
 	/* last thing to do is link it to the net device structure */
-	ndev->npinfo = npinfo;
-
-	/* avoid racing with NAPI reading npinfo */
-	synchronize_rcu();
+	rcu_assign_pointer(ndev->npinfo, npinfo);
 
 	return 0;
 
@@ -857,6 +855,16 @@ void netpoll_cleanup(struct netpoll *np)
 
 			if (atomic_dec_and_test(&npinfo->refcnt)) {
 				const struct net_device_ops *ops;
+
+				ops = np->dev->netdev_ops;
+				if (ops->ndo_netpoll_cleanup)
+					ops->ndo_netpoll_cleanup(np->dev);
+
+				rcu_assign_pointer(np->dev->npinfo, NULL);
+
+				/* avoid racing with NAPI reading npinfo */
+				synchronize_rcu_bh();
+
 				skb_queue_purge(&npinfo->arp_tx);
 				skb_queue_purge(&npinfo->txq);
 				cancel_rearming_delayed_work(&npinfo->tx_work);
@@ -864,10 +872,6 @@ void netpoll_cleanup(struct netpoll *np)
 				/* clean after last, unfinished work */
 				__skb_queue_purge(&npinfo->txq);
 				kfree(npinfo);
-				ops = np->dev->netdev_ops;
-				if (ops->ndo_netpoll_cleanup)
-					ops->ndo_netpoll_cleanup(np->dev);
-				np->dev->npinfo = NULL;
 			}
 		}
 
-- 
cgit v1.2.3-70-g09d2


From 4247e161b12f8dffb7ee3ee07bc5e61f714ebe2d Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 10 Jun 2010 16:12:47 +0000
Subject: netpoll: Add ndo_netpoll_setup

This patch adds ndo_netpoll_setup as the initialisation primitive
to complement ndo_netpoll_cleanup.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  2 ++
 net/core/netpoll.c        | 10 ++++++++++
 2 files changed, 12 insertions(+)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 4fbccc5f609..fb20cc55ba5 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -744,6 +744,8 @@ struct net_device_ops {
 						        unsigned short vid);
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	void                    (*ndo_poll_controller)(struct net_device *dev);
+	int			(*ndo_netpoll_setup)(struct net_device *dev,
+						     struct netpoll_info *info);
 	void			(*ndo_netpoll_cleanup)(struct net_device *dev);
 #endif
 	int			(*ndo_set_vf_mac)(struct net_device *dev,
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index d10c249bcc8..7de6dcad5d7 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -698,6 +698,7 @@ int netpoll_setup(struct netpoll *np)
 	struct net_device *ndev = NULL;
 	struct in_device *in_dev;
 	struct netpoll_info *npinfo;
+	const struct net_device_ops *ops;
 	unsigned long flags;
 	int err;
 
@@ -797,6 +798,13 @@ int netpoll_setup(struct netpoll *np)
 		INIT_DELAYED_WORK(&npinfo->tx_work, queue_process);
 
 		atomic_set(&npinfo->refcnt, 1);
+
+		ops = np->dev->netdev_ops;
+		if (ops->ndo_netpoll_setup) {
+			err = ops->ndo_netpoll_setup(ndev, npinfo);
+			if (err)
+				goto free_npinfo;
+		}
 	} else {
 		npinfo = ndev->npinfo;
 		atomic_inc(&npinfo->refcnt);
@@ -817,6 +825,8 @@ int netpoll_setup(struct netpoll *np)
 
 	return 0;
 
+free_npinfo:
+	kfree(npinfo);
 unlock:
 	rtnl_unlock();
 put:
-- 
cgit v1.2.3-70-g09d2


From 8fdd95ec162a8fbac7f41d6f54f90402fe3e8cb1 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 10 Jun 2010 16:12:48 +0000
Subject: netpoll: Allow netpoll_setup/cleanup recursion

This patch adds the functions __netpoll_setup/__netpoll_cleanup
which is designed to be called recursively through ndo_netpoll_seutp.

They must be called with RTNL held, and the caller must initialise
np->dev and ensure that it has a valid reference count.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netpoll.h |   2 +
 net/core/netpoll.c      | 176 ++++++++++++++++++++++++++----------------------
 2 files changed, 99 insertions(+), 79 deletions(-)

(limited to 'include')

diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h
index 95c9f7e1677..f3ad74af7e1 100644
--- a/include/linux/netpoll.h
+++ b/include/linux/netpoll.h
@@ -46,9 +46,11 @@ void netpoll_poll(struct netpoll *np);
 void netpoll_send_udp(struct netpoll *np, const char *msg, int len);
 void netpoll_print_options(struct netpoll *np);
 int netpoll_parse_options(struct netpoll *np, char *opt);
+int __netpoll_setup(struct netpoll *np);
 int netpoll_setup(struct netpoll *np);
 int netpoll_trap(void);
 void netpoll_set_trap(int trap);
+void __netpoll_cleanup(struct netpoll *np);
 void netpoll_cleanup(struct netpoll *np);
 int __netpoll_rx(struct sk_buff *skb);
 void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb);
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 7de6dcad5d7..560297ee55b 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -693,15 +693,78 @@ int netpoll_parse_options(struct netpoll *np, char *opt)
 	return -1;
 }
 
-int netpoll_setup(struct netpoll *np)
+int __netpoll_setup(struct netpoll *np)
 {
-	struct net_device *ndev = NULL;
-	struct in_device *in_dev;
+	struct net_device *ndev = np->dev;
 	struct netpoll_info *npinfo;
 	const struct net_device_ops *ops;
 	unsigned long flags;
 	int err;
 
+	if ((ndev->priv_flags & IFF_DISABLE_NETPOLL) ||
+	    !ndev->netdev_ops->ndo_poll_controller) {
+		printk(KERN_ERR "%s: %s doesn't support polling, aborting.\n",
+		       np->name, np->dev_name);
+		err = -ENOTSUPP;
+		goto out;
+	}
+
+	if (!ndev->npinfo) {
+		npinfo = kmalloc(sizeof(*npinfo), GFP_KERNEL);
+		if (!npinfo) {
+			err = -ENOMEM;
+			goto out;
+		}
+
+		npinfo->rx_flags = 0;
+		INIT_LIST_HEAD(&npinfo->rx_np);
+
+		spin_lock_init(&npinfo->rx_lock);
+		skb_queue_head_init(&npinfo->arp_tx);
+		skb_queue_head_init(&npinfo->txq);
+		INIT_DELAYED_WORK(&npinfo->tx_work, queue_process);
+
+		atomic_set(&npinfo->refcnt, 1);
+
+		ops = np->dev->netdev_ops;
+		if (ops->ndo_netpoll_setup) {
+			err = ops->ndo_netpoll_setup(ndev, npinfo);
+			if (err)
+				goto free_npinfo;
+		}
+	} else {
+		npinfo = ndev->npinfo;
+		atomic_inc(&npinfo->refcnt);
+	}
+
+	npinfo->netpoll = np;
+
+	if (np->rx_hook) {
+		spin_lock_irqsave(&npinfo->rx_lock, flags);
+		npinfo->rx_flags |= NETPOLL_RX_ENABLED;
+		list_add_tail(&np->rx, &npinfo->rx_np);
+		spin_unlock_irqrestore(&npinfo->rx_lock, flags);
+	}
+
+	/* last thing to do is link it to the net device structure */
+	rcu_assign_pointer(ndev->npinfo, npinfo);
+	rtnl_unlock();
+
+	return 0;
+
+free_npinfo:
+	kfree(npinfo);
+out:
+	return err;
+}
+EXPORT_SYMBOL_GPL(__netpoll_setup);
+
+int netpoll_setup(struct netpoll *np)
+{
+	struct net_device *ndev = NULL;
+	struct in_device *in_dev;
+	int err;
+
 	if (np->dev_name)
 		ndev = dev_get_by_name(&init_net, np->dev_name);
 	if (!ndev) {
@@ -774,61 +837,14 @@ int netpoll_setup(struct netpoll *np)
 	refill_skbs();
 
 	rtnl_lock();
-	if ((ndev->priv_flags & IFF_DISABLE_NETPOLL) ||
-	    !ndev->netdev_ops->ndo_poll_controller) {
-		printk(KERN_ERR "%s: %s doesn't support polling, aborting.\n",
-		       np->name, np->dev_name);
-		err = -ENOTSUPP;
-		goto unlock;
-	}
-
-	if (!ndev->npinfo) {
-		npinfo = kmalloc(sizeof(*npinfo), GFP_KERNEL);
-		if (!npinfo) {
-			err = -ENOMEM;
-			goto unlock;
-		}
-
-		npinfo->rx_flags = 0;
-		INIT_LIST_HEAD(&npinfo->rx_np);
-
-		spin_lock_init(&npinfo->rx_lock);
-		skb_queue_head_init(&npinfo->arp_tx);
-		skb_queue_head_init(&npinfo->txq);
-		INIT_DELAYED_WORK(&npinfo->tx_work, queue_process);
-
-		atomic_set(&npinfo->refcnt, 1);
-
-		ops = np->dev->netdev_ops;
-		if (ops->ndo_netpoll_setup) {
-			err = ops->ndo_netpoll_setup(ndev, npinfo);
-			if (err)
-				goto free_npinfo;
-		}
-	} else {
-		npinfo = ndev->npinfo;
-		atomic_inc(&npinfo->refcnt);
-	}
-
-	npinfo->netpoll = np;
-
-	if (np->rx_hook) {
-		spin_lock_irqsave(&npinfo->rx_lock, flags);
-		npinfo->rx_flags |= NETPOLL_RX_ENABLED;
-		list_add_tail(&np->rx, &npinfo->rx_np);
-		spin_unlock_irqrestore(&npinfo->rx_lock, flags);
-	}
-
-	/* last thing to do is link it to the net device structure */
-	rcu_assign_pointer(ndev->npinfo, npinfo);
+	err = __netpoll_setup(np);
 	rtnl_unlock();
 
+	if (err)
+		goto put;
+
 	return 0;
 
-free_npinfo:
-	kfree(npinfo);
-unlock:
-	rtnl_unlock();
 put:
 	dev_put(ndev);
 	return err;
@@ -841,40 +857,32 @@ static int __init netpoll_init(void)
 }
 core_initcall(netpoll_init);
 
-void netpoll_cleanup(struct netpoll *np)
+void __netpoll_cleanup(struct netpoll *np)
 {
 	struct netpoll_info *npinfo;
 	unsigned long flags;
-	int free = 0;
 
-	if (!np->dev)
+	npinfo = np->dev->npinfo;
+	if (!npinfo)
 		return;
 
-	rtnl_lock();
-	npinfo = np->dev->npinfo;
-	if (npinfo) {
-		if (!list_empty(&npinfo->rx_np)) {
-			spin_lock_irqsave(&npinfo->rx_lock, flags);
-			list_del(&np->rx);
-			if (list_empty(&npinfo->rx_np))
-				npinfo->rx_flags &= ~NETPOLL_RX_ENABLED;
-			spin_unlock_irqrestore(&npinfo->rx_lock, flags);
-		}
+	if (!list_empty(&npinfo->rx_np)) {
+		spin_lock_irqsave(&npinfo->rx_lock, flags);
+		list_del(&np->rx);
+		if (list_empty(&npinfo->rx_np))
+			npinfo->rx_flags &= ~NETPOLL_RX_ENABLED;
+		spin_unlock_irqrestore(&npinfo->rx_lock, flags);
+	}
 
-		free = atomic_dec_and_test(&npinfo->refcnt);
-		if (free) {
-			const struct net_device_ops *ops;
+	if (atomic_dec_and_test(&npinfo->refcnt)) {
+		const struct net_device_ops *ops;
 
-			ops = np->dev->netdev_ops;
-			if (ops->ndo_netpoll_cleanup)
-				ops->ndo_netpoll_cleanup(np->dev);
+		ops = np->dev->netdev_ops;
+		if (ops->ndo_netpoll_cleanup)
+			ops->ndo_netpoll_cleanup(np->dev);
 
-			rcu_assign_pointer(np->dev->npinfo, NULL);
-		}
-	}
-	rtnl_unlock();
+		rcu_assign_pointer(np->dev->npinfo, NULL);
 
-	if (free) {
 		/* avoid racing with NAPI reading npinfo */
 		synchronize_rcu_bh();
 
@@ -886,9 +894,19 @@ void netpoll_cleanup(struct netpoll *np)
 		__skb_queue_purge(&npinfo->txq);
 		kfree(npinfo);
 	}
+}
+EXPORT_SYMBOL_GPL(__netpoll_cleanup);
 
-	dev_put(np->dev);
+void netpoll_cleanup(struct netpoll *np)
+{
+	if (!np->dev)
+		return;
 
+	rtnl_lock();
+	__netpoll_cleanup(np);
+	rtnl_unlock();
+
+	dev_put(np->dev);
 	np->dev = NULL;
 }
 
-- 
cgit v1.2.3-70-g09d2


From c18370f5b2949d9cca519355f33690b75e1e7c8b Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 10 Jun 2010 16:12:49 +0000
Subject: netpoll: Add netpoll_tx_running

This patch adds the helper netpoll_tx_running for use within
ndo_start_xmit.  It returns non-zero if ndo_start_xmit is being
invoked by netpoll, and zero otherwise.

This is currently implemented by simply looking at the hardirq
count.  This is because for all non-netpoll uses of ndo_start_xmit,
IRQs must be enabled while netpoll always disables IRQs before
calling ndo_start_xmit.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netpoll.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h
index f3ad74af7e1..4c77fe78cef 100644
--- a/include/linux/netpoll.h
+++ b/include/linux/netpoll.h
@@ -116,6 +116,11 @@ static inline void netpoll_poll_unlock(void *have)
 	}
 }
 
+static inline int netpoll_tx_running(struct net_device *dev)
+{
+	return irqs_disabled();
+}
+
 #else
 static inline int netpoll_rx(struct sk_buff *skb)
 {
@@ -139,6 +144,10 @@ static inline void netpoll_poll_unlock(void *have)
 static inline void netpoll_netdev_init(struct net_device *dev)
 {
 }
+static inline int netpoll_tx_running(struct net_device *dev)
+{
+	return 0;
+}
 #endif
 
 #endif
-- 
cgit v1.2.3-70-g09d2


From 93e2c32b5cb2ad92ceb1d7a4684f20a0d25bf530 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Thu, 10 Jun 2010 03:34:59 +0000
Subject: net: add rx_handler data pointer

Add possibility to register rx_handler data pointer along with a rx_handler.

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/macvlan.c     | 2 +-
 include/linux/netdevice.h | 4 +++-
 net/bridge/br_if.c        | 2 +-
 net/core/dev.c            | 6 +++++-
 4 files changed, 10 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 59c315556a3..87a3bf69c4a 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -532,7 +532,7 @@ static int macvlan_port_create(struct net_device *dev)
 		INIT_HLIST_HEAD(&port->vlan_hash[i]);
 	rcu_assign_pointer(dev->macvlan_port, port);
 
-	err = netdev_rx_handler_register(dev, macvlan_handle_frame);
+	err = netdev_rx_handler_register(dev, macvlan_handle_frame, NULL);
 	if (err) {
 		rcu_assign_pointer(dev->macvlan_port, NULL);
 		kfree(port);
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index fb20cc55ba5..361ff1145cf 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -979,6 +979,7 @@ struct net_device {
 
 	struct netdev_queue	rx_queue;
 	rx_handler_func_t	*rx_handler;
+	void			*rx_handler_data;
 
 	struct netdev_queue	*_tx ____cacheline_aligned_in_smp;
 
@@ -1712,7 +1713,8 @@ static inline void napi_free_frags(struct napi_struct *napi)
 }
 
 extern int netdev_rx_handler_register(struct net_device *dev,
-				      rx_handler_func_t *rx_handler);
+				      rx_handler_func_t *rx_handler,
+				      void *rx_handler_data);
 extern void netdev_rx_handler_unregister(struct net_device *dev);
 
 extern void		netif_nit_deliver(struct sk_buff *skb);
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 97ac9da4d76..0d142ed0bbe 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -433,7 +433,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
 
 	rcu_assign_pointer(dev->br_port, p);
 
-	err = netdev_rx_handler_register(dev, br_handle_frame);
+	err = netdev_rx_handler_register(dev, br_handle_frame, NULL);
 	if (err)
 		goto err4;
 
diff --git a/net/core/dev.c b/net/core/dev.c
index a1abc10db08..abdb19e547a 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2703,6 +2703,7 @@ void netif_nit_deliver(struct sk_buff *skb)
  *	netdev_rx_handler_register - register receive handler
  *	@dev: device to register a handler for
  *	@rx_handler: receive handler to register
+ *	@rx_handler_data: data pointer that is used by rx handler
  *
  *	Register a receive hander for a device. This handler will then be
  *	called from __netif_receive_skb. A negative errno code is returned
@@ -2711,13 +2712,15 @@ void netif_nit_deliver(struct sk_buff *skb)
  *	The caller must hold the rtnl_mutex.
  */
 int netdev_rx_handler_register(struct net_device *dev,
-			       rx_handler_func_t *rx_handler)
+			       rx_handler_func_t *rx_handler,
+			       void *rx_handler_data)
 {
 	ASSERT_RTNL();
 
 	if (dev->rx_handler)
 		return -EBUSY;
 
+	rcu_assign_pointer(dev->rx_handler_data, rx_handler_data);
 	rcu_assign_pointer(dev->rx_handler, rx_handler);
 
 	return 0;
@@ -2737,6 +2740,7 @@ void netdev_rx_handler_unregister(struct net_device *dev)
 
 	ASSERT_RTNL();
 	rcu_assign_pointer(dev->rx_handler, NULL);
+	rcu_assign_pointer(dev->rx_handler_data, NULL);
 }
 EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister);
 
-- 
cgit v1.2.3-70-g09d2


From a35e2c1b6d90544b3c688783869817628e5f9607 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Tue, 15 Jun 2010 03:27:57 +0000
Subject: macvlan: use rx_handler_data pointer to store macvlan_port pointer V2

Register macvlan_port pointer as rx_handler data pointer. As macvlan_port is
removed from struct net_device, another netdev priv_flag is added to indicate
the device serves as a macvlan port.

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Acked-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/macvlan.c     | 28 ++++++++++++++++------------
 include/linux/if.h        |  1 +
 include/linux/netdevice.h |  2 --
 3 files changed, 17 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 87a3bf69c4a..e096875aa05 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -40,6 +40,11 @@ struct macvlan_port {
 	struct rcu_head		rcu;
 };
 
+#define macvlan_port_get_rcu(dev) \
+	((struct macvlan_port *) rcu_dereference(dev->rx_handler_data))
+#define macvlan_port_get(dev) ((struct macvlan_port *) dev->rx_handler_data)
+#define macvlan_port_exists(dev) (dev->priv_flags & IFF_MACVLAN_PORT)
+
 static struct macvlan_dev *macvlan_hash_lookup(const struct macvlan_port *port,
 					       const unsigned char *addr)
 {
@@ -155,7 +160,7 @@ static struct sk_buff *macvlan_handle_frame(struct sk_buff *skb)
 	struct net_device *dev;
 	unsigned int len;
 
-	port = rcu_dereference(skb->dev->macvlan_port);
+	port = macvlan_port_get_rcu(skb->dev);
 	if (is_multicast_ether_addr(eth->h_dest)) {
 		src = macvlan_hash_lookup(port, eth->h_source);
 		if (!src)
@@ -530,14 +535,12 @@ static int macvlan_port_create(struct net_device *dev)
 	INIT_LIST_HEAD(&port->vlans);
 	for (i = 0; i < MACVLAN_HASH_SIZE; i++)
 		INIT_HLIST_HEAD(&port->vlan_hash[i]);
-	rcu_assign_pointer(dev->macvlan_port, port);
 
-	err = netdev_rx_handler_register(dev, macvlan_handle_frame, NULL);
-	if (err) {
-		rcu_assign_pointer(dev->macvlan_port, NULL);
+	err = netdev_rx_handler_register(dev, macvlan_handle_frame, port);
+	if (err)
 		kfree(port);
-	}
 
+	dev->priv_flags |= IFF_MACVLAN_PORT;
 	return err;
 }
 
@@ -551,10 +554,10 @@ static void macvlan_port_rcu_free(struct rcu_head *head)
 
 static void macvlan_port_destroy(struct net_device *dev)
 {
-	struct macvlan_port *port = dev->macvlan_port;
+	struct macvlan_port *port = macvlan_port_get(dev);
 
+	dev->priv_flags &= ~IFF_MACVLAN_PORT;
 	netdev_rx_handler_unregister(dev);
-	rcu_assign_pointer(dev->macvlan_port, NULL);
 	call_rcu(&port->rcu, macvlan_port_rcu_free);
 }
 
@@ -633,12 +636,12 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev,
 	if (!tb[IFLA_ADDRESS])
 		random_ether_addr(dev->dev_addr);
 
-	if (lowerdev->macvlan_port == NULL) {
+	if (!macvlan_port_exists(lowerdev)) {
 		err = macvlan_port_create(lowerdev);
 		if (err < 0)
 			return err;
 	}
-	port = lowerdev->macvlan_port;
+	port = macvlan_port_get(lowerdev);
 
 	vlan->lowerdev = lowerdev;
 	vlan->dev      = dev;
@@ -748,10 +751,11 @@ static int macvlan_device_event(struct notifier_block *unused,
 	struct macvlan_dev *vlan, *next;
 	struct macvlan_port *port;
 
-	port = dev->macvlan_port;
-	if (port == NULL)
+	if (!macvlan_port_exists(dev))
 		return NOTIFY_DONE;
 
+	port = macvlan_port_get(dev);
+
 	switch (event) {
 	case NETDEV_CHANGE:
 		list_for_each_entry(vlan, &port->vlans, list)
diff --git a/include/linux/if.h b/include/linux/if.h
index be350e62a90..31f2e27ebcd 100644
--- a/include/linux/if.h
+++ b/include/linux/if.h
@@ -73,6 +73,7 @@
 #define IFF_DONT_BRIDGE 0x800		/* disallow bridging this ether dev */
 #define IFF_IN_NETPOLL	0x1000		/* whether we are processing netpoll */
 #define IFF_DISABLE_NETPOLL	0x2000	/* disable netpoll at run-time */
+#define IFF_MACVLAN_PORT	0x4000	/* device used as macvlan port */
 
 #define IF_GET_IFACE	0x0001		/* for querying only */
 #define IF_GET_PROTO	0x0002
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 361ff1145cf..5f231de2032 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1049,8 +1049,6 @@ struct net_device {
 
 	/* bridge stuff */
 	struct net_bridge_port	*br_port;
-	/* macvlan */
-	struct macvlan_port	*macvlan_port;
 	/* GARP */
 	struct garp_port	*garp_port;
 
-- 
cgit v1.2.3-70-g09d2


From f350a0a87374418635689471606454abc7beaa3a Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Tue, 15 Jun 2010 06:50:45 +0000
Subject: bridge: use rx_handler_data pointer to store net_bridge_port pointer

Register net_bridge_port pointer as rx_handler data pointer. As br_port is
removed from struct net_device, another netdev priv_flag is added to indicate
the device serves as a bridge port. Also rcuized pointers are now correctly
dereferenced in br_fdb.c and in netfilter parts.

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ksz884x.c                       |  2 +-
 drivers/staging/batman-adv/hard-interface.c |  2 +-
 include/linux/if.h                          |  1 +
 include/linux/netdevice.h                   |  2 --
 net/bridge/br_fdb.c                         |  4 ++--
 net/bridge/br_if.c                          | 23 +++++++++++++----------
 net/bridge/br_input.c                       |  9 ++++-----
 net/bridge/br_netfilter.c                   | 11 ++++++-----
 net/bridge/br_netlink.c                     |  9 +++++----
 net/bridge/br_notify.c                      |  5 +++--
 net/bridge/br_private.h                     |  5 +++++
 net/bridge/br_stp_bpdu.c                    |  5 +++--
 net/bridge/netfilter/ebt_redirect.c         |  3 ++-
 net/bridge/netfilter/ebt_ulog.c             |  8 +++++---
 net/bridge/netfilter/ebtables.c             | 11 +++++++----
 net/core/dev.c                              |  3 ++-
 net/netfilter/nfnetlink_log.c               |  6 ++++--
 net/netfilter/nfnetlink_queue.c             |  6 ++++--
 net/wireless/nl80211.c                      |  2 +-
 net/wireless/util.c                         |  4 ++--
 20 files changed, 71 insertions(+), 50 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ksz884x.c b/drivers/net/ksz884x.c
index 7805bbf1d53..62362b4a8c5 100644
--- a/drivers/net/ksz884x.c
+++ b/drivers/net/ksz884x.c
@@ -5718,7 +5718,7 @@ static void dev_set_promiscuous(struct net_device *dev, struct dev_priv *priv,
 		 * from the bridge.
 		 */
 		if ((hw->features & STP_SUPPORT) && !promiscuous &&
-				dev->br_port) {
+		    (dev->priv_flags & IFF_BRIDGE_PORT)) {
 			struct ksz_switch *sw = hw->ksz_switch;
 			int port = priv->port.first_port;
 
diff --git a/drivers/staging/batman-adv/hard-interface.c b/drivers/staging/batman-adv/hard-interface.c
index 7a582e80de1..5ede9c25509 100644
--- a/drivers/staging/batman-adv/hard-interface.c
+++ b/drivers/staging/batman-adv/hard-interface.c
@@ -71,7 +71,7 @@ static int is_valid_iface(struct net_device *net_dev)
 #endif
 
 	/* Device is being bridged */
-	/* if (net_dev->br_port != NULL)
+	/* if (net_dev->priv_flags & IFF_BRIDGE_PORT)
 		return 0; */
 
 	return 1;
diff --git a/include/linux/if.h b/include/linux/if.h
index 31f2e27ebcd..53558ec59e1 100644
--- a/include/linux/if.h
+++ b/include/linux/if.h
@@ -74,6 +74,7 @@
 #define IFF_IN_NETPOLL	0x1000		/* whether we are processing netpoll */
 #define IFF_DISABLE_NETPOLL	0x2000	/* disable netpoll at run-time */
 #define IFF_MACVLAN_PORT	0x4000	/* device used as macvlan port */
+#define IFF_BRIDGE_PORT	0x8000		/* device used as bridge port */
 
 #define IF_GET_IFACE	0x0001		/* for querying only */
 #define IF_GET_PROTO	0x0002
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 5f231de2032..a7e0458029b 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1047,8 +1047,6 @@ struct net_device {
 	/* mid-layer private */
 	void			*ml_priv;
 
-	/* bridge stuff */
-	struct net_bridge_port	*br_port;
 	/* GARP */
 	struct garp_port	*garp_port;
 
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 26637439965..6818e609b2c 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -242,11 +242,11 @@ int br_fdb_test_addr(struct net_device *dev, unsigned char *addr)
 	struct net_bridge_fdb_entry *fdb;
 	int ret;
 
-	if (!dev->br_port)
+	if (!br_port_exists(dev))
 		return 0;
 
 	rcu_read_lock();
-	fdb = __br_fdb_get(dev->br_port->br, addr);
+	fdb = __br_fdb_get(br_port_get_rcu(dev)->br, addr);
 	ret = fdb && fdb->dst->dev != dev &&
 		fdb->dst->state == BR_STATE_FORWARDING;
 	rcu_read_unlock();
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 0d142ed0bbe..c03d2c3ff03 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -147,8 +147,9 @@ static void del_nbp(struct net_bridge_port *p)
 
 	list_del_rcu(&p->list);
 
+	dev->priv_flags &= ~IFF_BRIDGE_PORT;
+
 	netdev_rx_handler_unregister(dev);
-	rcu_assign_pointer(dev->br_port, NULL);
 
 	br_multicast_del_port(p);
 
@@ -400,7 +401,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
 		return -ELOOP;
 
 	/* Device is already being bridged */
-	if (dev->br_port != NULL)
+	if (br_port_exists(dev))
 		return -EBUSY;
 
 	/* No bridging devices that dislike that (e.g. wireless) */
@@ -431,11 +432,11 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
 	if (br_netpoll_info(br) && ((err = br_netpoll_enable(p))))
 		goto err3;
 
-	rcu_assign_pointer(dev->br_port, p);
-
-	err = netdev_rx_handler_register(dev, br_handle_frame, NULL);
+	err = netdev_rx_handler_register(dev, br_handle_frame, p);
 	if (err)
-		goto err4;
+		goto err3;
+
+	dev->priv_flags |= IFF_BRIDGE_PORT;
 
 	dev_disable_lro(dev);
 
@@ -457,8 +458,6 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
 	kobject_uevent(&p->kobj, KOBJ_ADD);
 
 	return 0;
-err4:
-	rcu_assign_pointer(dev->br_port, NULL);
 err3:
 	sysfs_remove_link(br->ifobj, p->dev->name);
 err2:
@@ -477,9 +476,13 @@ put_back:
 /* called with RTNL */
 int br_del_if(struct net_bridge *br, struct net_device *dev)
 {
-	struct net_bridge_port *p = dev->br_port;
+	struct net_bridge_port *p;
+
+	if (!br_port_exists(dev))
+		return -EINVAL;
 
-	if (!p || p->br != br)
+	p = br_port_get(dev);
+	if (p->br != br)
 		return -EINVAL;
 
 	del_nbp(p);
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 99647d8f95c..f076c9d79d5 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -41,7 +41,7 @@ static int br_pass_frame_up(struct sk_buff *skb)
 int br_handle_frame_finish(struct sk_buff *skb)
 {
 	const unsigned char *dest = eth_hdr(skb)->h_dest;
-	struct net_bridge_port *p = rcu_dereference(skb->dev->br_port);
+	struct net_bridge_port *p = br_port_get_rcu(skb->dev);
 	struct net_bridge *br;
 	struct net_bridge_fdb_entry *dst;
 	struct net_bridge_mdb_entry *mdst;
@@ -111,10 +111,9 @@ drop:
 /* note: already called with rcu_read_lock (preempt_disabled) */
 static int br_handle_local_finish(struct sk_buff *skb)
 {
-	struct net_bridge_port *p = rcu_dereference(skb->dev->br_port);
+	struct net_bridge_port *p = br_port_get_rcu(skb->dev);
 
-	if (p)
-		br_fdb_update(p->br, p, eth_hdr(skb)->h_source);
+	br_fdb_update(p->br, p, eth_hdr(skb)->h_source);
 	return 0;	 /* process further */
 }
 
@@ -151,7 +150,7 @@ struct sk_buff *br_handle_frame(struct sk_buff *skb)
 	if (!skb)
 		return NULL;
 
-	p = rcu_dereference(skb->dev->br_port);
+	p = br_port_get_rcu(skb->dev);
 
 	if (unlikely(is_link_local(dest))) {
 		/* Pause frames shouldn't be passed up by driver anyway */
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 0685b2558ab..f54404ddee5 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -127,16 +127,17 @@ void br_netfilter_rtable_init(struct net_bridge *br)
 
 static inline struct rtable *bridge_parent_rtable(const struct net_device *dev)
 {
-	struct net_bridge_port *port = rcu_dereference(dev->br_port);
-
-	return port ? &port->br->fake_rtable : NULL;
+	if (!br_port_exists(dev))
+		return NULL;
+	return &br_port_get_rcu(dev)->br->fake_rtable;
 }
 
 static inline struct net_device *bridge_parent(const struct net_device *dev)
 {
-	struct net_bridge_port *port = rcu_dereference(dev->br_port);
+	if (!br_port_exists(dev))
+		return NULL;
 
-	return port ? port->br->dev : NULL;
+	return br_port_get_rcu(dev)->br->dev;
 }
 
 static inline struct nf_bridge_info *nf_bridge_alloc(struct sk_buff *skb)
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index fe0a79018ab..4a6a378c84e 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -120,10 +120,11 @@ static int br_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
 	idx = 0;
 	for_each_netdev(net, dev) {
 		/* not a bridge port */
-		if (dev->br_port == NULL || idx < cb->args[0])
+		if (!br_port_exists(dev) || idx < cb->args[0])
 			goto skip;
 
-		if (br_fill_ifinfo(skb, dev->br_port, NETLINK_CB(cb->skb).pid,
+		if (br_fill_ifinfo(skb, br_port_get(dev),
+				   NETLINK_CB(cb->skb).pid,
 				   cb->nlh->nlmsg_seq, RTM_NEWLINK,
 				   NLM_F_MULTI) < 0)
 			break;
@@ -168,9 +169,9 @@ static int br_rtm_setlink(struct sk_buff *skb,  struct nlmsghdr *nlh, void *arg)
 	if (!dev)
 		return -ENODEV;
 
-	p = dev->br_port;
-	if (!p)
+	if (!br_port_exists(dev))
 		return -EINVAL;
+	p = br_port_get(dev);
 
 	/* if kernel STP is running, don't allow changes */
 	if (p->br->stp_enabled == BR_KERNEL_STP)
diff --git a/net/bridge/br_notify.c b/net/bridge/br_notify.c
index 717e1fd6133..404d4e14c6a 100644
--- a/net/bridge/br_notify.c
+++ b/net/bridge/br_notify.c
@@ -32,14 +32,15 @@ struct notifier_block br_device_notifier = {
 static int br_device_event(struct notifier_block *unused, unsigned long event, void *ptr)
 {
 	struct net_device *dev = ptr;
-	struct net_bridge_port *p = dev->br_port;
+	struct net_bridge_port *p = br_port_get(dev);
 	struct net_bridge *br;
 	int err;
 
 	/* not a port of a bridge */
-	if (p == NULL)
+	if (!br_port_exists(dev))
 		return NOTIFY_DONE;
 
+	p = br_port_get(dev);
 	br = p->br;
 
 	switch (event) {
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 0f5394c4f2f..f6bc979b113 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -150,6 +150,11 @@ struct net_bridge_port
 #endif
 };
 
+#define br_port_get_rcu(dev) \
+	((struct net_bridge_port *) rcu_dereference(dev->rx_handler_data))
+#define br_port_get(dev) ((struct net_bridge_port *) dev->rx_handler_data)
+#define br_port_exists(dev) (dev->priv_flags & IFF_BRIDGE_PORT)
+
 struct br_cpu_netstats {
 	unsigned long	rx_packets;
 	unsigned long	rx_bytes;
diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c
index 217bd225a42..70aecb48fb6 100644
--- a/net/bridge/br_stp_bpdu.c
+++ b/net/bridge/br_stp_bpdu.c
@@ -137,12 +137,13 @@ void br_stp_rcv(const struct stp_proto *proto, struct sk_buff *skb,
 		struct net_device *dev)
 {
 	const unsigned char *dest = eth_hdr(skb)->h_dest;
-	struct net_bridge_port *p = rcu_dereference(dev->br_port);
+	struct net_bridge_port *p;
 	struct net_bridge *br;
 	const unsigned char *buf;
 
-	if (!p)
+	if (!br_port_exists(dev))
 		goto err;
+	p = br_port_get_rcu(dev);
 
 	if (!pskb_may_pull(skb, 4))
 		goto err;
diff --git a/net/bridge/netfilter/ebt_redirect.c b/net/bridge/netfilter/ebt_redirect.c
index 9e19166ba45..46624bb6d9b 100644
--- a/net/bridge/netfilter/ebt_redirect.c
+++ b/net/bridge/netfilter/ebt_redirect.c
@@ -24,8 +24,9 @@ ebt_redirect_tg(struct sk_buff *skb, const struct xt_action_param *par)
 		return EBT_DROP;
 
 	if (par->hooknum != NF_BR_BROUTING)
+		/* rcu_read_lock()ed by nf_hook_slow */
 		memcpy(eth_hdr(skb)->h_dest,
-		       par->in->br_port->br->dev->dev_addr, ETH_ALEN);
+		       br_port_get_rcu(par->in)->br->dev->dev_addr, ETH_ALEN);
 	else
 		memcpy(eth_hdr(skb)->h_dest, par->in->dev_addr, ETH_ALEN);
 	skb->pkt_type = PACKET_HOST;
diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c
index ae3c7cef148..26377e96fa1 100644
--- a/net/bridge/netfilter/ebt_ulog.c
+++ b/net/bridge/netfilter/ebt_ulog.c
@@ -177,8 +177,9 @@ static void ebt_ulog_packet(unsigned int hooknr, const struct sk_buff *skb,
 	if (in) {
 		strcpy(pm->physindev, in->name);
 		/* If in isn't a bridge, then physindev==indev */
-		if (in->br_port)
-			strcpy(pm->indev, in->br_port->br->dev->name);
+		if (br_port_exists(in))
+			/* rcu_read_lock()ed by nf_hook_slow */
+			strcpy(pm->indev, br_port_get_rcu(in)->br->dev->name);
 		else
 			strcpy(pm->indev, in->name);
 	} else
@@ -187,7 +188,8 @@ static void ebt_ulog_packet(unsigned int hooknr, const struct sk_buff *skb,
 	if (out) {
 		/* If out exists, then out is a bridge port */
 		strcpy(pm->physoutdev, out->name);
-		strcpy(pm->outdev, out->br_port->br->dev->name);
+		/* rcu_read_lock()ed by nf_hook_slow */
+		strcpy(pm->outdev, br_port_get_rcu(out)->br->dev->name);
 	} else
 		pm->outdev[0] = pm->physoutdev[0] = '\0';
 
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 59ca00e40de..bcc102e3be4 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -140,11 +140,14 @@ ebt_basic_match(const struct ebt_entry *e, const struct ethhdr *h,
 		return 1;
 	if (FWINV2(ebt_dev_check(e->out, out), EBT_IOUT))
 		return 1;
-	if ((!in || !in->br_port) ? 0 : FWINV2(ebt_dev_check(
-	   e->logical_in, in->br_port->br->dev), EBT_ILOGICALIN))
+	/* rcu_read_lock()ed by nf_hook_slow */
+	if (in && br_port_exists(in) &&
+	    FWINV2(ebt_dev_check(e->logical_in, br_port_get_rcu(in)->br->dev),
+		   EBT_ILOGICALIN))
 		return 1;
-	if ((!out || !out->br_port) ? 0 : FWINV2(ebt_dev_check(
-	   e->logical_out, out->br_port->br->dev), EBT_ILOGICALOUT))
+	if (out && br_port_exists(out) &&
+	    FWINV2(ebt_dev_check(e->logical_out, br_port_get_rcu(out)->br->dev),
+		   EBT_ILOGICALOUT))
 		return 1;
 
 	if (e->bitmask & EBT_SOURCEMAC) {
diff --git a/net/core/dev.c b/net/core/dev.c
index abdb19e547a..5902426ef58 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2765,7 +2765,8 @@ int __skb_bond_should_drop(struct sk_buff *skb, struct net_device *master)
 	if (master->priv_flags & IFF_MASTER_ARPMON)
 		dev->last_rx = jiffies;
 
-	if ((master->priv_flags & IFF_MASTER_ALB) && master->br_port) {
+	if ((master->priv_flags & IFF_MASTER_ALB) &&
+	    (master->priv_flags & IFF_BRIDGE_PORT)) {
 		/* Do address unmangle. The local destination address
 		 * will be always the one master has. Provides the right
 		 * functionality in a bridge.
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index fc9a211e629..e0504e90a0f 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -403,8 +403,9 @@ __build_packet_message(struct nfulnl_instance *inst,
 			NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_PHYSINDEV,
 				     htonl(indev->ifindex));
 			/* this is the bridge group "brX" */
+			/* rcu_read_lock()ed by nf_hook_slow or nf_log_packet */
 			NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_INDEV,
-				     htonl(indev->br_port->br->dev->ifindex));
+				     htonl(br_port_get_rcu(indev)->br->dev->ifindex));
 		} else {
 			/* Case 2: indev is bridge group, we need to look for
 			 * physical device (when called from ipv4) */
@@ -430,8 +431,9 @@ __build_packet_message(struct nfulnl_instance *inst,
 			NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_PHYSOUTDEV,
 				     htonl(outdev->ifindex));
 			/* this is the bridge group "brX" */
+			/* rcu_read_lock()ed by nf_hook_slow or nf_log_packet */
 			NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_OUTDEV,
-				     htonl(outdev->br_port->br->dev->ifindex));
+				     htonl(br_port_get_rcu(outdev)->br->dev->ifindex));
 		} else {
 			/* Case 2: indev is a bridge group, we need to look
 			 * for physical device (when called from ipv4) */
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 12e1ab37fcd..cc3ae861e8f 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -296,8 +296,9 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
 			NLA_PUT_BE32(skb, NFQA_IFINDEX_PHYSINDEV,
 				     htonl(indev->ifindex));
 			/* this is the bridge group "brX" */
+			/* rcu_read_lock()ed by __nf_queue */
 			NLA_PUT_BE32(skb, NFQA_IFINDEX_INDEV,
-				     htonl(indev->br_port->br->dev->ifindex));
+				     htonl(br_port_get_rcu(indev)->br->dev->ifindex));
 		} else {
 			/* Case 2: indev is bridge group, we need to look for
 			 * physical device (when called from ipv4) */
@@ -321,8 +322,9 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
 			NLA_PUT_BE32(skb, NFQA_IFINDEX_PHYSOUTDEV,
 				     htonl(outdev->ifindex));
 			/* this is the bridge group "brX" */
+			/* rcu_read_lock()ed by __nf_queue */
 			NLA_PUT_BE32(skb, NFQA_IFINDEX_OUTDEV,
-				     htonl(outdev->br_port->br->dev->ifindex));
+				     htonl(br_port_get_rcu(outdev)->br->dev->ifindex));
 		} else {
 			/* Case 2: outdev is bridge group, we need to look for
 			 * physical output device (when called from ipv4) */
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 90ab3c8519b..3a7b8a2f2d5 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -1107,7 +1107,7 @@ static int nl80211_valid_4addr(struct cfg80211_registered_device *rdev,
 			       enum nl80211_iftype iftype)
 {
 	if (!use_4addr) {
-		if (netdev && netdev->br_port)
+		if (netdev && (netdev->priv_flags & IFF_BRIDGE_PORT))
 			return -EBUSY;
 		return 0;
 	}
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 3416373a9c0..0c8a1e8b769 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -770,8 +770,8 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
 		return -EOPNOTSUPP;
 
 	/* if it's part of a bridge, reject changing type to station/ibss */
-	if (dev->br_port && (ntype == NL80211_IFTYPE_ADHOC ||
-			     ntype == NL80211_IFTYPE_STATION))
+	if ((dev->priv_flags & IFF_BRIDGE_PORT) &&
+	    (ntype == NL80211_IFTYPE_ADHOC || ntype == NL80211_IFTYPE_STATION))
 		return -EBUSY;
 
 	if (ntype != otype) {
-- 
cgit v1.2.3-70-g09d2


From a3433f35a55f7604742cae620c6dc6edfc70db6a Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Sat, 12 Jun 2010 14:01:43 +0000
Subject: tcp: unify tcp flag macros

unify tcp flag macros: TCPHDR_FIN, TCPHDR_SYN, TCPHDR_RST, TCPHDR_PSH,
TCPHDR_ACK, TCPHDR_URG, TCPHDR_ECE and TCPHDR_CWR. TCBCB_FLAG_* are replaced
with the corresponding TCPHDR_*.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
----
 include/net/tcp.h                      |   24 ++++++-------
 net/ipv4/tcp.c                         |    8 ++--
 net/ipv4/tcp_input.c                   |    2 -
 net/ipv4/tcp_output.c                  |   59 ++++++++++++++++-----------------
 net/netfilter/nf_conntrack_proto_tcp.c |   32 ++++++-----------
 net/netfilter/xt_TCPMSS.c              |    4 --
 6 files changed, 58 insertions(+), 71 deletions(-)
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h                      | 24 +++++++-------
 net/ipv4/tcp.c                         |  8 ++---
 net/ipv4/tcp_input.c                   |  2 +-
 net/ipv4/tcp_output.c                  | 59 +++++++++++++++++-----------------
 net/netfilter/nf_conntrack_proto_tcp.c | 32 +++++++-----------
 net/netfilter/xt_TCPMSS.c              |  4 +--
 6 files changed, 58 insertions(+), 71 deletions(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 57316648441..9e68e25c8b8 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -602,6 +602,17 @@ extern u32	__tcp_select_window(struct sock *sk);
  */
 #define tcp_time_stamp		((__u32)(jiffies))
 
+#define tcp_flag_byte(th) (((u_int8_t *)th)[13])
+
+#define TCPHDR_FIN 0x01
+#define TCPHDR_SYN 0x02
+#define TCPHDR_RST 0x04
+#define TCPHDR_PSH 0x08
+#define TCPHDR_ACK 0x10
+#define TCPHDR_URG 0x20
+#define TCPHDR_ECE 0x40
+#define TCPHDR_CWR 0x80
+
 /* This is what the send packet queuing engine uses to pass
  * TCP per-packet control information to the transmission
  * code.  We also store the host-order sequence numbers in
@@ -620,19 +631,6 @@ struct tcp_skb_cb {
 	__u32		end_seq;	/* SEQ + FIN + SYN + datalen	*/
 	__u32		when;		/* used to compute rtt's	*/
 	__u8		flags;		/* TCP header flags.		*/
-
-	/* NOTE: These must match up to the flags byte in a
-	 *       real TCP header.
-	 */
-#define TCPCB_FLAG_FIN		0x01
-#define TCPCB_FLAG_SYN		0x02
-#define TCPCB_FLAG_RST		0x04
-#define TCPCB_FLAG_PSH		0x08
-#define TCPCB_FLAG_ACK		0x10
-#define TCPCB_FLAG_URG		0x20
-#define TCPCB_FLAG_ECE		0x40
-#define TCPCB_FLAG_CWR		0x80
-
 	__u8		sacked;		/* State flags for SACK/FACK.	*/
 #define TCPCB_SACKED_ACKED	0x01	/* SKB ACK'd by a SACK block	*/
 #define TCPCB_SACKED_RETRANS	0x02	/* SKB retransmitted		*/
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 49d0d2b8900..779d40c3b96 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -511,7 +511,7 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
 
 static inline void tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb)
 {
-	TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
+	TCP_SKB_CB(skb)->flags |= TCPHDR_PSH;
 	tp->pushed_seq = tp->write_seq;
 }
 
@@ -527,7 +527,7 @@ static inline void skb_entail(struct sock *sk, struct sk_buff *skb)
 
 	skb->csum    = 0;
 	tcb->seq     = tcb->end_seq = tp->write_seq;
-	tcb->flags   = TCPCB_FLAG_ACK;
+	tcb->flags   = TCPHDR_ACK;
 	tcb->sacked  = 0;
 	skb_header_release(skb);
 	tcp_add_write_queue_tail(sk, skb);
@@ -815,7 +815,7 @@ new_segment:
 		skb_shinfo(skb)->gso_segs = 0;
 
 		if (!copied)
-			TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_PSH;
+			TCP_SKB_CB(skb)->flags &= ~TCPHDR_PSH;
 
 		copied += copy;
 		poffset += copy;
@@ -1061,7 +1061,7 @@ new_segment:
 			}
 
 			if (!copied)
-				TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_PSH;
+				TCP_SKB_CB(skb)->flags &= ~TCPHDR_PSH;
 
 			tp->write_seq += copy;
 			TCP_SKB_CB(skb)->end_seq += copy;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 548d575e6cc..04334661fa2 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3286,7 +3286,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
 		 * connection startup slow start one packet too
 		 * quickly.  This is severely frowned upon behavior.
 		 */
-		if (!(scb->flags & TCPCB_FLAG_SYN)) {
+		if (!(scb->flags & TCPHDR_SYN)) {
 			flag |= FLAG_DATA_ACKED;
 		} else {
 			flag |= FLAG_SYN_ACKED;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index b4ed957f201..51d316dbb05 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -294,9 +294,9 @@ static u16 tcp_select_window(struct sock *sk)
 /* Packet ECN state for a SYN-ACK */
 static inline void TCP_ECN_send_synack(struct tcp_sock *tp, struct sk_buff *skb)
 {
-	TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_CWR;
+	TCP_SKB_CB(skb)->flags &= ~TCPHDR_CWR;
 	if (!(tp->ecn_flags & TCP_ECN_OK))
-		TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_ECE;
+		TCP_SKB_CB(skb)->flags &= ~TCPHDR_ECE;
 }
 
 /* Packet ECN state for a SYN.  */
@@ -306,7 +306,7 @@ static inline void TCP_ECN_send_syn(struct sock *sk, struct sk_buff *skb)
 
 	tp->ecn_flags = 0;
 	if (sysctl_tcp_ecn == 1) {
-		TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_ECE | TCPCB_FLAG_CWR;
+		TCP_SKB_CB(skb)->flags |= TCPHDR_ECE | TCPHDR_CWR;
 		tp->ecn_flags = TCP_ECN_OK;
 	}
 }
@@ -361,7 +361,7 @@ static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags)
 	skb_shinfo(skb)->gso_type = 0;
 
 	TCP_SKB_CB(skb)->seq = seq;
-	if (flags & (TCPCB_FLAG_SYN | TCPCB_FLAG_FIN))
+	if (flags & (TCPHDR_SYN | TCPHDR_FIN))
 		seq++;
 	TCP_SKB_CB(skb)->end_seq = seq;
 }
@@ -820,7 +820,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 	tcb = TCP_SKB_CB(skb);
 	memset(&opts, 0, sizeof(opts));
 
-	if (unlikely(tcb->flags & TCPCB_FLAG_SYN))
+	if (unlikely(tcb->flags & TCPHDR_SYN))
 		tcp_options_size = tcp_syn_options(sk, skb, &opts, &md5);
 	else
 		tcp_options_size = tcp_established_options(sk, skb, &opts,
@@ -843,7 +843,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 	*(((__be16 *)th) + 6)	= htons(((tcp_header_size >> 2) << 12) |
 					tcb->flags);
 
-	if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) {
+	if (unlikely(tcb->flags & TCPHDR_SYN)) {
 		/* RFC1323: The window in SYN & SYN/ACK segments
 		 * is never scaled.
 		 */
@@ -866,7 +866,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 	}
 
 	tcp_options_write((__be32 *)(th + 1), tp, &opts);
-	if (likely((tcb->flags & TCPCB_FLAG_SYN) == 0))
+	if (likely((tcb->flags & TCPHDR_SYN) == 0))
 		TCP_ECN_send(sk, skb, tcp_header_size);
 
 #ifdef CONFIG_TCP_MD5SIG
@@ -880,7 +880,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 
 	icsk->icsk_af_ops->send_check(sk, skb);
 
-	if (likely(tcb->flags & TCPCB_FLAG_ACK))
+	if (likely(tcb->flags & TCPHDR_ACK))
 		tcp_event_ack_sent(sk, tcp_skb_pcount(skb));
 
 	if (skb->len != tcp_header_size)
@@ -1023,7 +1023,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
 
 	/* PSH and FIN should only be set in the second packet. */
 	flags = TCP_SKB_CB(skb)->flags;
-	TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN | TCPCB_FLAG_PSH);
+	TCP_SKB_CB(skb)->flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH);
 	TCP_SKB_CB(buff)->flags = flags;
 	TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked;
 
@@ -1328,8 +1328,7 @@ static inline unsigned int tcp_cwnd_test(struct tcp_sock *tp,
 	u32 in_flight, cwnd;
 
 	/* Don't be strict about the congestion window for the final FIN.  */
-	if ((TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) &&
-	    tcp_skb_pcount(skb) == 1)
+	if ((TCP_SKB_CB(skb)->flags & TCPHDR_FIN) && tcp_skb_pcount(skb) == 1)
 		return 1;
 
 	in_flight = tcp_packets_in_flight(tp);
@@ -1398,7 +1397,7 @@ static inline int tcp_nagle_test(struct tcp_sock *tp, struct sk_buff *skb,
 	 * Nagle can be ignored during F-RTO too (see RFC4138).
 	 */
 	if (tcp_urg_mode(tp) || (tp->frto_counter == 2) ||
-	    (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN))
+	    (TCP_SKB_CB(skb)->flags & TCPHDR_FIN))
 		return 1;
 
 	if (!tcp_nagle_check(tp, skb, cur_mss, nonagle))
@@ -1487,7 +1486,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
 
 	/* PSH and FIN should only be set in the second packet. */
 	flags = TCP_SKB_CB(skb)->flags;
-	TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN | TCPCB_FLAG_PSH);
+	TCP_SKB_CB(skb)->flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH);
 	TCP_SKB_CB(buff)->flags = flags;
 
 	/* This packet was never sent out yet, so no SACK bits. */
@@ -1518,7 +1517,7 @@ static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
 	const struct inet_connection_sock *icsk = inet_csk(sk);
 	u32 send_win, cong_win, limit, in_flight;
 
-	if (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN)
+	if (TCP_SKB_CB(skb)->flags & TCPHDR_FIN)
 		goto send_now;
 
 	if (icsk->icsk_ca_state != TCP_CA_Open)
@@ -1644,7 +1643,7 @@ static int tcp_mtu_probe(struct sock *sk)
 
 	TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq;
 	TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size;
-	TCP_SKB_CB(nskb)->flags = TCPCB_FLAG_ACK;
+	TCP_SKB_CB(nskb)->flags = TCPHDR_ACK;
 	TCP_SKB_CB(nskb)->sacked = 0;
 	nskb->csum = 0;
 	nskb->ip_summed = skb->ip_summed;
@@ -1669,7 +1668,7 @@ static int tcp_mtu_probe(struct sock *sk)
 			sk_wmem_free_skb(sk, skb);
 		} else {
 			TCP_SKB_CB(nskb)->flags |= TCP_SKB_CB(skb)->flags &
-						   ~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH);
+						   ~(TCPHDR_FIN|TCPHDR_PSH);
 			if (!skb_shinfo(skb)->nr_frags) {
 				skb_pull(skb, copy);
 				if (skb->ip_summed != CHECKSUM_PARTIAL)
@@ -2020,7 +2019,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to,
 
 	if (!sysctl_tcp_retrans_collapse)
 		return;
-	if (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN)
+	if (TCP_SKB_CB(skb)->flags & TCPHDR_SYN)
 		return;
 
 	tcp_for_write_queue_from_safe(skb, tmp, sk) {
@@ -2112,7 +2111,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
 	 * since it is cheap to do so and saves bytes on the network.
 	 */
 	if (skb->len > 0 &&
-	    (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) &&
+	    (TCP_SKB_CB(skb)->flags & TCPHDR_FIN) &&
 	    tp->snd_una == (TCP_SKB_CB(skb)->end_seq - 1)) {
 		if (!pskb_trim(skb, 0)) {
 			/* Reuse, even though it does some unnecessary work */
@@ -2301,7 +2300,7 @@ void tcp_send_fin(struct sock *sk)
 	mss_now = tcp_current_mss(sk);
 
 	if (tcp_send_head(sk) != NULL) {
-		TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_FIN;
+		TCP_SKB_CB(skb)->flags |= TCPHDR_FIN;
 		TCP_SKB_CB(skb)->end_seq++;
 		tp->write_seq++;
 	} else {
@@ -2318,7 +2317,7 @@ void tcp_send_fin(struct sock *sk)
 		skb_reserve(skb, MAX_TCP_HEADER);
 		/* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */
 		tcp_init_nondata_skb(skb, tp->write_seq,
-				     TCPCB_FLAG_ACK | TCPCB_FLAG_FIN);
+				     TCPHDR_ACK | TCPHDR_FIN);
 		tcp_queue_skb(sk, skb);
 	}
 	__tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_OFF);
@@ -2343,7 +2342,7 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority)
 	/* Reserve space for headers and prepare control bits. */
 	skb_reserve(skb, MAX_TCP_HEADER);
 	tcp_init_nondata_skb(skb, tcp_acceptable_seq(sk),
-			     TCPCB_FLAG_ACK | TCPCB_FLAG_RST);
+			     TCPHDR_ACK | TCPHDR_RST);
 	/* Send it off. */
 	TCP_SKB_CB(skb)->when = tcp_time_stamp;
 	if (tcp_transmit_skb(sk, skb, 0, priority))
@@ -2363,11 +2362,11 @@ int tcp_send_synack(struct sock *sk)
 	struct sk_buff *skb;
 
 	skb = tcp_write_queue_head(sk);
-	if (skb == NULL || !(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN)) {
+	if (skb == NULL || !(TCP_SKB_CB(skb)->flags & TCPHDR_SYN)) {
 		printk(KERN_DEBUG "tcp_send_synack: wrong queue state\n");
 		return -EFAULT;
 	}
-	if (!(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_ACK)) {
+	if (!(TCP_SKB_CB(skb)->flags & TCPHDR_ACK)) {
 		if (skb_cloned(skb)) {
 			struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC);
 			if (nskb == NULL)
@@ -2381,7 +2380,7 @@ int tcp_send_synack(struct sock *sk)
 			skb = nskb;
 		}
 
-		TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_ACK;
+		TCP_SKB_CB(skb)->flags |= TCPHDR_ACK;
 		TCP_ECN_send_synack(tcp_sk(sk), skb);
 	}
 	TCP_SKB_CB(skb)->when = tcp_time_stamp;
@@ -2460,7 +2459,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 	 * not even correctly set)
 	 */
 	tcp_init_nondata_skb(skb, tcp_rsk(req)->snt_isn,
-			     TCPCB_FLAG_SYN | TCPCB_FLAG_ACK);
+			     TCPHDR_SYN | TCPHDR_ACK);
 
 	if (OPTION_COOKIE_EXTENSION & opts.options) {
 		if (s_data_desired) {
@@ -2592,7 +2591,7 @@ int tcp_connect(struct sock *sk)
 	skb_reserve(buff, MAX_TCP_HEADER);
 
 	tp->snd_nxt = tp->write_seq;
-	tcp_init_nondata_skb(buff, tp->write_seq++, TCPCB_FLAG_SYN);
+	tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN);
 	TCP_ECN_send_syn(sk, buff);
 
 	/* Send it off. */
@@ -2698,7 +2697,7 @@ void tcp_send_ack(struct sock *sk)
 
 	/* Reserve space for headers and prepare control bits. */
 	skb_reserve(buff, MAX_TCP_HEADER);
-	tcp_init_nondata_skb(buff, tcp_acceptable_seq(sk), TCPCB_FLAG_ACK);
+	tcp_init_nondata_skb(buff, tcp_acceptable_seq(sk), TCPHDR_ACK);
 
 	/* Send it off, this clears delayed acks for us. */
 	TCP_SKB_CB(buff)->when = tcp_time_stamp;
@@ -2732,7 +2731,7 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent)
 	 * end to send an ack.  Don't queue or clone SKB, just
 	 * send it.
 	 */
-	tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPCB_FLAG_ACK);
+	tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPHDR_ACK);
 	TCP_SKB_CB(skb)->when = tcp_time_stamp;
 	return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC);
 }
@@ -2762,13 +2761,13 @@ int tcp_write_wakeup(struct sock *sk)
 		if (seg_size < TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq ||
 		    skb->len > mss) {
 			seg_size = min(seg_size, mss);
-			TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
+			TCP_SKB_CB(skb)->flags |= TCPHDR_PSH;
 			if (tcp_fragment(sk, skb, seg_size, mss))
 				return -1;
 		} else if (!tcp_skb_pcount(skb))
 			tcp_set_skb_tso_segs(sk, skb, mss);
 
-		TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
+		TCP_SKB_CB(skb)->flags |= TCPHDR_PSH;
 		TCP_SKB_CB(skb)->when = tcp_time_stamp;
 		err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
 		if (!err)
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 9dd8cd4fb6e..802dbffae8b 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -736,27 +736,19 @@ static bool tcp_in_window(const struct nf_conn *ct,
 	return res;
 }
 
-#define	TH_FIN	0x01
-#define	TH_SYN	0x02
-#define	TH_RST	0x04
-#define	TH_PUSH	0x08
-#define	TH_ACK	0x10
-#define	TH_URG	0x20
-#define	TH_ECE	0x40
-#define	TH_CWR	0x80
-
 /* table of valid flag combinations - PUSH, ECE and CWR are always valid */
-static const u8 tcp_valid_flags[(TH_FIN|TH_SYN|TH_RST|TH_ACK|TH_URG) + 1] =
+static const u8 tcp_valid_flags[(TCPHDR_FIN|TCPHDR_SYN|TCPHDR_RST|TCPHDR_ACK|
+				 TCPHDR_URG) + 1] =
 {
-	[TH_SYN]			= 1,
-	[TH_SYN|TH_URG]			= 1,
-	[TH_SYN|TH_ACK]			= 1,
-	[TH_RST]			= 1,
-	[TH_RST|TH_ACK]			= 1,
-	[TH_FIN|TH_ACK]			= 1,
-	[TH_FIN|TH_ACK|TH_URG]		= 1,
-	[TH_ACK]			= 1,
-	[TH_ACK|TH_URG]			= 1,
+	[TCPHDR_SYN]				= 1,
+	[TCPHDR_SYN|TCPHDR_URG]			= 1,
+	[TCPHDR_SYN|TCPHDR_ACK]			= 1,
+	[TCPHDR_RST]				= 1,
+	[TCPHDR_RST|TCPHDR_ACK]			= 1,
+	[TCPHDR_FIN|TCPHDR_ACK]			= 1,
+	[TCPHDR_FIN|TCPHDR_ACK|TCPHDR_URG]	= 1,
+	[TCPHDR_ACK]				= 1,
+	[TCPHDR_ACK|TCPHDR_URG]			= 1,
 };
 
 /* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c.  */
@@ -803,7 +795,7 @@ static int tcp_error(struct net *net, struct nf_conn *tmpl,
 	}
 
 	/* Check TCP flags. */
-	tcpflags = (((u_int8_t *)th)[13] & ~(TH_ECE|TH_CWR|TH_PUSH));
+	tcpflags = (tcp_flag_byte(th) & ~(TCPHDR_ECE|TCPHDR_CWR|TCPHDR_PSH));
 	if (!tcp_valid_flags[tcpflags]) {
 		if (LOG_INVALID(net, IPPROTO_TCP))
 			nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index 1841388c770..eb81c380da1 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -220,15 +220,13 @@ tcpmss_tg6(struct sk_buff *skb, const struct xt_action_param *par)
 }
 #endif
 
-#define TH_SYN 0x02
-
 /* Must specify -p tcp --syn */
 static inline bool find_syn_match(const struct xt_entry_match *m)
 {
 	const struct xt_tcp *tcpinfo = (const struct xt_tcp *)m->data;
 
 	if (strcmp(m->u.kernel.match->name, "tcp") == 0 &&
-	    tcpinfo->flg_cmp & TH_SYN &&
+	    tcpinfo->flg_cmp & TCPHDR_SYN &&
 	    !(tcpinfo->invflags & XT_TCP_INV_FLAGS))
 		return true;
 
-- 
cgit v1.2.3-70-g09d2


From ff61638105db6f5832ef8700436ba6aa6d3a2fda Mon Sep 17 00:00:00 2001
From: Juuso Oikarinen <juuso.oikarinen@nokia.com>
Date: Wed, 9 Jun 2010 09:51:52 +0300
Subject: mac80211: Fix ps-qos network latency handling

The ps-qos latency handling is broken. It uses predetermined latency values
to select specific dynamic PS timeouts. With common AP configurations, these
values overlap with beacon interval and are therefore essentially useless
(for network latencies less than the beacon interval, PSM is disabled.)

This patch remedies the problem by replacing the predetermined network latency
values with one high value (1900ms) which is used to go trigger full psm. For
backwards compatibility, the value 2000ms is still mapped to a dynamic ps
timeout of 100ms.

Currently also the mac80211 internal value for storing user space configured
dynamic PSM values is incorrectly in the driver visible ieee80211_conf struct.
Move it to the ieee80211_local struct.

Signed-off-by: Juuso Oikarinen <juuso.oikarinen@nokia.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/mac80211.h     |  5 +----
 net/mac80211/cfg.c         |  5 ++---
 net/mac80211/ieee80211_i.h |  6 ++++++
 net/mac80211/main.c        |  2 +-
 net/mac80211/mlme.c        | 16 ++++++----------
 5 files changed, 16 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 3a47877f496..fe1a3a60337 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -695,9 +695,6 @@ enum ieee80211_smps_mode {
  * @dynamic_ps_timeout: The dynamic powersave timeout (in ms), see the
  *	powersave documentation below. This variable is valid only when
  *	the CONF_PS flag is set.
- * @dynamic_ps_forced_timeout: The dynamic powersave timeout (in ms) configured
- *	by cfg80211 (essentially, wext) If set, this value overrules the value
- *	chosen by mac80211 based on ps qos network latency.
  *
  * @power_level: requested transmit power (in dBm)
  *
@@ -717,7 +714,7 @@ enum ieee80211_smps_mode {
  */
 struct ieee80211_conf {
 	u32 flags;
-	int power_level, dynamic_ps_timeout, dynamic_ps_forced_timeout;
+	int power_level, dynamic_ps_timeout;
 	int max_sleep_period;
 
 	u16 listen_interval;
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 59f597d0c6a..003b6addf5f 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1446,7 +1446,6 @@ static int ieee80211_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev,
 {
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
-	struct ieee80211_conf *conf = &local->hw.conf;
 
 	if (sdata->vif.type != NL80211_IFTYPE_STATION)
 		return -EOPNOTSUPP;
@@ -1455,11 +1454,11 @@ static int ieee80211_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev,
 		return -EOPNOTSUPP;
 
 	if (enabled == sdata->u.mgd.powersave &&
-	    timeout == conf->dynamic_ps_forced_timeout)
+	    timeout == local->dynamic_ps_forced_timeout)
 		return 0;
 
 	sdata->u.mgd.powersave = enabled;
-	conf->dynamic_ps_forced_timeout = timeout;
+	local->dynamic_ps_forced_timeout = timeout;
 
 	/* no change, but if automatic follow powersave */
 	mutex_lock(&sdata->u.mgd.mtx);
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 9b3c3f971d2..fb5430188e8 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -850,6 +850,12 @@ struct ieee80211_local {
 	struct notifier_block network_latency_notifier;
 	struct notifier_block ifa_notifier;
 
+	/*
+	 * The dynamic ps timeout configured from user space via WEXT -
+	 * this will override whatever chosen by mac80211 internally.
+	 */
+	int dynamic_ps_forced_timeout;
+
 	int user_power_level; /* in dBm */
 	int power_constr_level; /* in dBm */
 
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index a1bf46c64b9..edf7aff9326 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -630,7 +630,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 
 	local->hw.conf.listen_interval = local->hw.max_listen_interval;
 
-	local->hw.conf.dynamic_ps_forced_timeout = -1;
+	local->dynamic_ps_forced_timeout = -1;
 
 	result = sta_info_start(local);
 	if (result < 0)
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 74479c2d12d..1c0d8fce08d 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -561,23 +561,19 @@ void ieee80211_recalc_ps(struct ieee80211_local *local, s32 latency)
 		beaconint_us = ieee80211_tu_to_usec(
 					found->vif.bss_conf.beacon_int);
 
-		timeout = local->hw.conf.dynamic_ps_forced_timeout;
+		timeout = local->dynamic_ps_forced_timeout;
 		if (timeout < 0) {
 			/*
+			 * Go to full PSM if the user configures a very low
+			 * latency requirement.
 			 * The 2 second value is there for compatibility until
 			 * the PM_QOS_NETWORK_LATENCY is configured with real
 			 * values.
 			 */
-			if (latency == 2000000000)
-				timeout = 100;
-			else if (latency <= 50000)
-				timeout = 300;
-			else if (latency <= 100000)
-				timeout = 100;
-			else if (latency <= 500000)
-				timeout = 50;
-			else
+			if (latency > 1900000000 && latency != 2000000000)
 				timeout = 0;
+			else
+				timeout = 100;
 		}
 		local->hw.conf.dynamic_ps_timeout = timeout;
 
-- 
cgit v1.2.3-70-g09d2


From aa1039e73cc2cf834e99c09d2033d5d2675357b9 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 15 Jun 2010 08:23:14 +0000
Subject: inetpeer: RCU conversion

inetpeer currently uses an AVL tree protected by an rwlock.

It's possible to make most lookups use RCU

1) Add a struct rcu_head to struct inet_peer

2) add a lookup_rcu_bh() helper to perform lockless and opportunistic
lookup. This is a normal function, not a macro like lookup().

3) Add a limit to number of links followed by lookup_rcu_bh(). This is
needed in case we fall in a loop.

4) add an smp_wmb() in link_to_pool() right before node insert.

5) make unlink_from_pool() use atomic_cmpxchg() to make sure it can take
last reference to an inet_peer, since lockless readers could increase
refcount, even while we hold peers.lock.

6) Delay struct inet_peer freeing after rcu grace period so that
lookup_rcu_bh() cannot crash.

7) inet_getpeer() first attempts lockless lookup.
   Note this lookup can fail even if target is in AVL tree, but a
concurrent writer can let tree in a non correct form.
   If this attemps fails, lock is taken a regular lookup is performed
again.

8) convert peers.lock from rwlock to a spinlock

9) Remove SLAB_HWCACHE_ALIGN when peer_cachep is created, because
rcu_head adds 16 bytes on 64bit arches, doubling effective size (64 ->
128 bytes)
In a future patch, this is probably possible to revert this part, if rcu
field is put in an union to share space with rid, ip_id_count, tcp_ts &
tcp_ts_stamp. These fields being manipulated only with refcnt > 0.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inetpeer.h |   1 +
 net/ipv4/inetpeer.c    | 164 ++++++++++++++++++++++++++++---------------------
 2 files changed, 96 insertions(+), 69 deletions(-)

(limited to 'include')

diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h
index 87b1df0d4d8..61740473042 100644
--- a/include/net/inetpeer.h
+++ b/include/net/inetpeer.h
@@ -26,6 +26,7 @@ struct inet_peer {
 	atomic_t		ip_id_count;	/* IP ID for the next packet */
 	__u32			tcp_ts;
 	__u32			tcp_ts_stamp;
+	struct rcu_head		rcu;
 };
 
 void			inet_initpeers(void) __init;
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 035673fd42d..58fbc7e2475 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -51,8 +51,8 @@
  *  lookups performed with disabled BHs.
  *
  *  Serialisation issues.
- *  1.  Nodes may appear in the tree only with the pool write lock held.
- *  2.  Nodes may disappear from the tree only with the pool write lock held
+ *  1.  Nodes may appear in the tree only with the pool lock held.
+ *  2.  Nodes may disappear from the tree only with the pool lock held
  *      AND reference count being 0.
  *  3.  Nodes appears and disappears from unused node list only under
  *      "inet_peer_unused_lock".
@@ -80,11 +80,11 @@ static const struct inet_peer peer_fake_node = {
 
 static struct {
 	struct inet_peer *root;
-	rwlock_t	lock;
+	spinlock_t	lock;
 	int		total;
 } peers = {
 	.root		= peer_avl_empty,
-	.lock		= __RW_LOCK_UNLOCKED(peers.lock),
+	.lock		= __SPIN_LOCK_UNLOCKED(peers.lock),
 	.total		= 0,
 };
 #define PEER_MAXDEPTH 40 /* sufficient for about 2^27 nodes */
@@ -129,7 +129,7 @@ void __init inet_initpeers(void)
 
 	peer_cachep = kmem_cache_create("inet_peer_cache",
 			sizeof(struct inet_peer),
-			0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
+			0, SLAB_PANIC,
 			NULL);
 
 	/* All the timers, started at system startup tend
@@ -153,16 +153,13 @@ static void unlink_from_unused(struct inet_peer *p)
 
 /*
  * Called with local BH disabled and the pool lock held.
- * _stack is known to be NULL or not at compile time,
- * so compiler will optimize the if (_stack) tests.
  */
 #define lookup(_daddr, _stack) 					\
 ({								\
 	struct inet_peer *u, **v;				\
-	if (_stack != NULL) {					\
-		stackptr = _stack;				\
-		*stackptr++ = &peers.root;			\
-	}							\
+								\
+	stackptr = _stack;					\
+	*stackptr++ = &peers.root;				\
 	for (u = peers.root; u != peer_avl_empty; ) {		\
 		if (_daddr == u->v4daddr)			\
 			break;					\
@@ -170,14 +167,41 @@ static void unlink_from_unused(struct inet_peer *p)
 			v = &u->avl_left;			\
 		else						\
 			v = &u->avl_right;			\
-		if (_stack != NULL)				\
-			*stackptr++ = v;			\
+		*stackptr++ = v;				\
 		u = *v;						\
 	}							\
 	u;							\
 })
 
-/* Called with local BH disabled and the pool write lock held. */
+/*
+ * Called with rcu_read_lock_bh()
+ * Because we hold no lock against a writer, its quite possible we fall
+ * in an endless loop.
+ * But every pointer we follow is guaranteed to be valid thanks to RCU.
+ * We exit from this function if number of links exceeds PEER_MAXDEPTH
+ */
+static struct inet_peer *lookup_rcu_bh(__be32 daddr)
+{
+	struct inet_peer *u = rcu_dereference_bh(peers.root);
+	int count = 0;
+
+	while (u != peer_avl_empty) {
+		if (daddr == u->v4daddr) {
+			if (unlikely(!atomic_inc_not_zero(&u->refcnt)))
+				u = NULL;
+			return u;
+		}
+		if ((__force __u32)daddr < (__force __u32)u->v4daddr)
+			u = rcu_dereference_bh(u->avl_left);
+		else
+			u = rcu_dereference_bh(u->avl_right);
+		if (unlikely(++count == PEER_MAXDEPTH))
+			break;
+	}
+	return NULL;
+}
+
+/* Called with local BH disabled and the pool lock held. */
 #define lookup_rightempty(start)				\
 ({								\
 	struct inet_peer *u, **v;				\
@@ -191,9 +215,10 @@ static void unlink_from_unused(struct inet_peer *p)
 	u;							\
 })
 
-/* Called with local BH disabled and the pool write lock held.
+/* Called with local BH disabled and the pool lock held.
  * Variable names are the proof of operation correctness.
- * Look into mm/map_avl.c for more detail description of the ideas.  */
+ * Look into mm/map_avl.c for more detail description of the ideas.
+ */
 static void peer_avl_rebalance(struct inet_peer **stack[],
 		struct inet_peer ***stackend)
 {
@@ -269,16 +294,22 @@ static void peer_avl_rebalance(struct inet_peer **stack[],
 	}
 }
 
-/* Called with local BH disabled and the pool write lock held. */
+/* Called with local BH disabled and the pool lock held. */
 #define link_to_pool(n)						\
 do {								\
 	n->avl_height = 1;					\
 	n->avl_left = peer_avl_empty;				\
 	n->avl_right = peer_avl_empty;				\
+	smp_wmb(); /* lockless readers can catch us now */	\
 	**--stackptr = n;					\
 	peer_avl_rebalance(stack, stackptr);			\
 } while (0)
 
+static void inetpeer_free_rcu(struct rcu_head *head)
+{
+	kmem_cache_free(peer_cachep, container_of(head, struct inet_peer, rcu));
+}
+
 /* May be called with local BH enabled. */
 static void unlink_from_pool(struct inet_peer *p)
 {
@@ -286,13 +317,13 @@ static void unlink_from_pool(struct inet_peer *p)
 
 	do_free = 0;
 
-	write_lock_bh(&peers.lock);
+	spin_lock_bh(&peers.lock);
 	/* Check the reference counter.  It was artificially incremented by 1
-	 * in cleanup() function to prevent sudden disappearing.  If the
-	 * reference count is still 1 then the node is referenced only as `p'
-	 * here and from the pool.  So under the exclusive pool lock it's safe
-	 * to remove the node and free it later. */
-	if (atomic_read(&p->refcnt) == 1) {
+	 * in cleanup() function to prevent sudden disappearing.  If we can
+	 * atomically (because of lockless readers) take this last reference,
+	 * it's safe to remove the node and free it later.
+	 */
+	if (atomic_cmpxchg(&p->refcnt, 1, 0) == 1) {
 		struct inet_peer **stack[PEER_MAXDEPTH];
 		struct inet_peer ***stackptr, ***delp;
 		if (lookup(p->v4daddr, stack) != p)
@@ -321,17 +352,18 @@ static void unlink_from_pool(struct inet_peer *p)
 		peers.total--;
 		do_free = 1;
 	}
-	write_unlock_bh(&peers.lock);
+	spin_unlock_bh(&peers.lock);
 
 	if (do_free)
-		kmem_cache_free(peer_cachep, p);
+		call_rcu_bh(&p->rcu, inetpeer_free_rcu);
 	else
 		/* The node is used again.  Decrease the reference counter
 		 * back.  The loop "cleanup -> unlink_from_unused
 		 *   -> unlink_from_pool -> putpeer -> link_to_unused
 		 *   -> cleanup (for the same node)"
 		 * doesn't really exist because the entry will have a
-		 * recent deletion time and will not be cleaned again soon. */
+		 * recent deletion time and will not be cleaned again soon.
+		 */
 		inet_putpeer(p);
 }
 
@@ -375,62 +407,56 @@ static int cleanup_once(unsigned long ttl)
 /* Called with or without local BH being disabled. */
 struct inet_peer *inet_getpeer(__be32 daddr, int create)
 {
-	struct inet_peer *p, *n;
+	struct inet_peer *p;
 	struct inet_peer **stack[PEER_MAXDEPTH], ***stackptr;
 
-	/* Look up for the address quickly. */
-	read_lock_bh(&peers.lock);
-	p = lookup(daddr, NULL);
-	if (p != peer_avl_empty)
-		atomic_inc(&p->refcnt);
-	read_unlock_bh(&peers.lock);
+	/* Look up for the address quickly, lockless.
+	 * Because of a concurrent writer, we might not find an existing entry.
+	 */
+	rcu_read_lock_bh();
+	p = lookup_rcu_bh(daddr);
+	rcu_read_unlock_bh();
+
+	if (p) {
+		/* The existing node has been found.
+		 * Remove the entry from unused list if it was there.
+		 */
+		unlink_from_unused(p);
+		return p;
+	}
 
+	/* retry an exact lookup, taking the lock before.
+	 * At least, nodes should be hot in our cache.
+	 */
+	spin_lock_bh(&peers.lock);
+	p = lookup(daddr, stack);
 	if (p != peer_avl_empty) {
-		/* The existing node has been found. */
+		atomic_inc(&p->refcnt);
+		spin_unlock_bh(&peers.lock);
 		/* Remove the entry from unused list if it was there. */
 		unlink_from_unused(p);
 		return p;
 	}
-
-	if (!create)
-		return NULL;
-
-	/* Allocate the space outside the locked region. */
-	n = kmem_cache_alloc(peer_cachep, GFP_ATOMIC);
-	if (n == NULL)
-		return NULL;
-	n->v4daddr = daddr;
-	atomic_set(&n->refcnt, 1);
-	atomic_set(&n->rid, 0);
-	atomic_set(&n->ip_id_count, secure_ip_id(daddr));
-	n->tcp_ts_stamp = 0;
-
-	write_lock_bh(&peers.lock);
-	/* Check if an entry has suddenly appeared. */
-	p = lookup(daddr, stack);
-	if (p != peer_avl_empty)
-		goto out_free;
-
-	/* Link the node. */
-	link_to_pool(n);
-	INIT_LIST_HEAD(&n->unused);
-	peers.total++;
-	write_unlock_bh(&peers.lock);
+	p = create ? kmem_cache_alloc(peer_cachep, GFP_ATOMIC) : NULL;
+	if (p) {
+		p->v4daddr = daddr;
+		atomic_set(&p->refcnt, 1);
+		atomic_set(&p->rid, 0);
+		atomic_set(&p->ip_id_count, secure_ip_id(daddr));
+		p->tcp_ts_stamp = 0;
+		INIT_LIST_HEAD(&p->unused);
+
+
+		/* Link the node. */
+		link_to_pool(p);
+		peers.total++;
+	}
+	spin_unlock_bh(&peers.lock);
 
 	if (peers.total >= inet_peer_threshold)
 		/* Remove one less-recently-used entry. */
 		cleanup_once(0);
 
-	return n;
-
-out_free:
-	/* The appropriate node is already in the pool. */
-	atomic_inc(&p->refcnt);
-	write_unlock_bh(&peers.lock);
-	/* Remove the entry from unused list if it was there. */
-	unlink_from_unused(p);
-	/* Free preallocated the preallocated node. */
-	kmem_cache_free(peer_cachep, n);
 	return p;
 }
 
-- 
cgit v1.2.3-70-g09d2


From 82695d9b186dcefe9bd119b53521deec20858f19 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Tue, 15 Jun 2010 15:08:48 -0700
Subject: net: Fix error in comment on net_device_ops::ndo_get_stats

ndo_get_stats still returns struct net_device_stats *; there is
no struct net_device_stats64.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index a7e0458029b..398f6c28cf8 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -673,7 +673,7 @@ struct netdev_rx_queue {
  *	1. Define @ndo_get_stats64 to update a rtnl_link_stats64 structure
  *	   (which should normally be dev->stats64) and return a ponter to
  *	   it. The structure must not be changed asynchronously.
- *	2. Define @ndo_get_stats to update a net_device_stats64 structure
+ *	2. Define @ndo_get_stats to update a net_device_stats structure
  *	   (which should normally be dev->stats) and return a pointer to
  *	   it. The structure may be changed asynchronously only if each
  *	   field is written atomically.
-- 
cgit v1.2.3-70-g09d2


From 5933dd2f028cdcbb4b3169dca594324704ba10ae Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 15 Jun 2010 18:16:43 -0700
Subject: net: NET_SKB_PAD should depend on L1_CACHE_BYTES

In old kernels, NET_SKB_PAD was defined to 16.

Then commit d6301d3dd1c2 (net: Increase default NET_SKB_PAD to 32), and
commit 18e8c134f4e9 (net: Increase NET_SKB_PAD to 64 bytes) increased it
to 64.

While first patch was governed by network stack needs, second was more
driven by performance issues on current hardware. Real intent was to
align data on a cache line boundary.

So use max(32, L1_CACHE_BYTES) instead of 64, to be more generic.

Remove microblaze and powerpc own NET_SKB_PAD definitions.

Thanks to Alexander Duyck and David Miller for their comments.

Suggested-by: David Miller <davem@davemloft.net>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/microblaze/include/asm/system.h | 3 ---
 arch/powerpc/include/asm/system.h    | 3 ---
 include/linux/skbuff.h               | 8 +++++---
 3 files changed, 5 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/arch/microblaze/include/asm/system.h b/arch/microblaze/include/asm/system.h
index 48c4f0335e3..81e1f7d5b4c 100644
--- a/arch/microblaze/include/asm/system.h
+++ b/arch/microblaze/include/asm/system.h
@@ -101,10 +101,7 @@ extern struct dentry *of_debugfs_root;
  * MicroBlaze doesn't handle unaligned accesses in hardware.
  *
  * Based on this we force the IP header alignment in network drivers.
- * We also modify NET_SKB_PAD to be a cacheline in size, thus maintaining
- * cacheline alignment of buffers.
  */
 #define NET_IP_ALIGN	2
-#define NET_SKB_PAD	L1_CACHE_BYTES
 
 #endif /* _ASM_MICROBLAZE_SYSTEM_H */
diff --git a/arch/powerpc/include/asm/system.h b/arch/powerpc/include/asm/system.h
index a6297c67c3d..6c294acac84 100644
--- a/arch/powerpc/include/asm/system.h
+++ b/arch/powerpc/include/asm/system.h
@@ -515,11 +515,8 @@ __cmpxchg_local(volatile void *ptr, unsigned long old, unsigned long new,
  * powers of 2 writes until it reaches sufficient alignment).
  *
  * Based on this we disable the IP header alignment in network drivers.
- * We also modify NET_SKB_PAD to be a cacheline in size, thus maintaining
- * cacheline alignment of buffers.
  */
 #define NET_IP_ALIGN	0
-#define NET_SKB_PAD	L1_CACHE_BYTES
 
 #define cmpxchg64(ptr, o, n)						\
   ({									\
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 122d08396e5..ac74ee085d7 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1414,12 +1414,14 @@ static inline int skb_network_offset(const struct sk_buff *skb)
  *
  * Various parts of the networking layer expect at least 32 bytes of
  * headroom, you should not reduce this.
- * With RPS, we raised NET_SKB_PAD to 64 so that get_rps_cpus() fetches span
- * a 64 bytes aligned block to fit modern (>= 64 bytes) cache line sizes
+ *
+ * Using max(32, L1_CACHE_BYTES) makes sense (especially with RPS)
+ * to reduce average number of cache lines per packet.
+ * get_rps_cpus() for example only access one 64 bytes aligned block :
  * NET_IP_ALIGN(2) + ethernet_header(14) + IP_header(20/40) + ports(8)
  */
 #ifndef NET_SKB_PAD
-#define NET_SKB_PAD	64
+#define NET_SKB_PAD	max(32, L1_CACHE_BYTES)
 #endif
 
 extern int ___pskb_trim(struct sk_buff *skb, unsigned int len);
-- 
cgit v1.2.3-70-g09d2


From d5f31fbfd8fa3836a918592032853c41d1797c3d Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 15 Jun 2010 21:44:29 -0700
Subject: netpoll: Use correct primitives for RCU dereferencing

Now that RCU debugging checks for matching rcu_dereference calls
and rcu_read_lock, we need to use the correct primitives or face
nasty warnings.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netpoll.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h
index 4c77fe78cef..413742c92d1 100644
--- a/include/linux/netpoll.h
+++ b/include/linux/netpoll.h
@@ -64,7 +64,7 @@ static inline bool netpoll_rx(struct sk_buff *skb)
 	bool ret = false;
 
 	rcu_read_lock_bh();
-	npinfo = rcu_dereference(skb->dev->npinfo);
+	npinfo = rcu_dereference_bh(skb->dev->npinfo);
 
 	if (!npinfo || (list_empty(&npinfo->rx_np) && !npinfo->rx_flags))
 		goto out;
@@ -82,7 +82,7 @@ out:
 
 static inline int netpoll_rx_on(struct sk_buff *skb)
 {
-	struct netpoll_info *npinfo = rcu_dereference(skb->dev->npinfo);
+	struct netpoll_info *npinfo = rcu_dereference_bh(skb->dev->npinfo);
 
 	return npinfo && (!list_empty(&npinfo->rx_np) || npinfo->rx_flags);
 }
-- 
cgit v1.2.3-70-g09d2


From 46cd09a7de52cad464d35a75924b79984646288d Mon Sep 17 00:00:00 2001
From: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Date: Fri, 11 Jun 2010 12:16:57 +0200
Subject: fix typos concerning "acquire"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 arch/arm/mach-omap2/sleep34xx.S | 2 +-
 include/linux/lru_cache.h       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/arch/arm/mach-omap2/sleep34xx.S b/arch/arm/mach-omap2/sleep34xx.S
index d522cd70bf5..ba53191ae4c 100644
--- a/arch/arm/mach-omap2/sleep34xx.S
+++ b/arch/arm/mach-omap2/sleep34xx.S
@@ -60,7 +60,7 @@
 #define SDRC_DLLA_CTRL_V	OMAP34XX_SDRC_REGADDR(SDRC_DLLA_CTRL)
 
         .text
-/* Function to aquire the semaphore in scratchpad */
+/* Function to acquire the semaphore in scratchpad */
 ENTRY(lock_scratchpad_sem)
 	stmfd	sp!, {lr}	@ save registers on stack
 wait_sem:
diff --git a/include/linux/lru_cache.h b/include/linux/lru_cache.h
index de48d167568..78fbf24f357 100644
--- a/include/linux/lru_cache.h
+++ b/include/linux/lru_cache.h
@@ -262,7 +262,7 @@ extern void lc_seq_dump_details(struct seq_file *seq, struct lru_cache *lc, char
  * @lc: the lru cache to operate on
  *
  * Note that the reference counts and order on the active and lru lists may
- * still change.  Returns true if we aquired the lock.
+ * still change.  Returns true if we acquired the lock.
  */
 static inline int lc_try_lock(struct lru_cache *lc)
 {
-- 
cgit v1.2.3-70-g09d2


From 65155b3708137fabee865dc4da822763c0c41208 Mon Sep 17 00:00:00 2001
From: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Date: Fri, 11 Jun 2010 12:17:01 +0200
Subject: fix typos concerning "management"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/gpu/drm/vmwgfx/vmwgfx_resource.c | 2 +-
 drivers/media/dvb/siano/smscoreapi.c     | 2 +-
 drivers/net/benet/be_hw.h                | 2 +-
 drivers/scsi/fcoe/fcoe.c                 | 4 ++--
 drivers/scsi/mpt2sas/mpt2sas_base.h      | 2 +-
 drivers/scsi/mpt2sas/mpt2sas_scsih.c     | 4 ++--
 drivers/scsi/pm8001/pm8001_hwi.c         | 2 +-
 drivers/scsi/qla2xxx/qla_iocb.c          | 2 +-
 drivers/scsi/qla2xxx/qla_nx.h            | 2 +-
 include/linux/ide.h                      | 2 +-
 include/linux/if_link.h                  | 2 +-
 11 files changed, 13 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
index f8fbbc67a40..7745394c3e6 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
@@ -1013,7 +1013,7 @@ int vmw_gmr_id_alloc(struct vmw_private *dev_priv, uint32_t *p_id)
 }
 
 /*
- * Stream managment
+ * Stream management
  */
 
 static void vmw_stream_destroy(struct vmw_resource *res)
diff --git a/drivers/media/dvb/siano/smscoreapi.c b/drivers/media/dvb/siano/smscoreapi.c
index 0c87a3c3899..a19f649666d 100644
--- a/drivers/media/dvb/siano/smscoreapi.c
+++ b/drivers/media/dvb/siano/smscoreapi.c
@@ -1297,7 +1297,7 @@ int smsclient_sendrequest(struct smscore_client_t *client,
 EXPORT_SYMBOL_GPL(smsclient_sendrequest);
 
 
-/* old GPIO managments implementation */
+/* old GPIO managements implementation */
 int smscore_configure_gpio(struct smscore_device_t *coredev, u32 pin,
 			   struct smscore_config_gpio *pinconfig)
 {
diff --git a/drivers/net/benet/be_hw.h b/drivers/net/benet/be_hw.h
index 063026de495..3f1b7c3965b 100644
--- a/drivers/net/benet/be_hw.h
+++ b/drivers/net/benet/be_hw.h
@@ -52,7 +52,7 @@
  */
 #define MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK	(1 << 29) /* bit 29 */
 
-/********* Power managment (WOL) **********/
+/********* Power management (WOL) **********/
 #define PCICFG_PM_CONTROL_OFFSET		0x44
 #define PCICFG_PM_CONTROL_MASK			0x108	/* bits 3 & 8 */
 
diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c
index 9276121db1e..bc39542481a 100644
--- a/drivers/scsi/fcoe/fcoe.c
+++ b/drivers/scsi/fcoe/fcoe.c
@@ -2452,7 +2452,7 @@ module_exit(fcoe_exit);
  * @fp: response frame, or error encoded in a pointer (timeout)
  * @arg: pointer the the fcoe_ctlr structure
  *
- * This handles MAC address managment for FCoE, then passes control on to
+ * This handles MAC address management for FCoE, then passes control on to
  * the libfc FLOGI response handler.
  */
 static void fcoe_flogi_resp(struct fc_seq *seq, struct fc_frame *fp, void *arg)
@@ -2484,7 +2484,7 @@ done:
  * @fp: response frame, or error encoded in a pointer (timeout)
  * @arg: pointer the the fcoe_ctlr structure
  *
- * This handles MAC address managment for FCoE, then passes control on to
+ * This handles MAC address management for FCoE, then passes control on to
  * the libfc LOGO response handler.
  */
 static void fcoe_logo_resp(struct fc_seq *seq, struct fc_frame *fp, void *arg)
diff --git a/drivers/scsi/mpt2sas/mpt2sas_base.h b/drivers/scsi/mpt2sas/mpt2sas_base.h
index b4afe431ac1..41c29a86e83 100644
--- a/drivers/scsi/mpt2sas/mpt2sas_base.h
+++ b/drivers/scsi/mpt2sas/mpt2sas_base.h
@@ -474,7 +474,7 @@ typedef void (*MPT_ADD_SGE)(void *paddr, u32 flags_length, dma_addr_t dma_addr);
  * @shost_recovery: host reset in progress
  * @ioc_reset_in_progress_lock:
  * @ioc_link_reset_in_progress: phy/hard reset in progress
- * @ignore_loginfos: ignore loginfos during task managment
+ * @ignore_loginfos: ignore loginfos during task management
  * @remove_host: flag for when driver unloads, to avoid sending dev resets
  * @wait_for_port_enable_to_complete:
  * @msix_enable: flag indicating msix is enabled
diff --git a/drivers/scsi/mpt2sas/mpt2sas_scsih.c b/drivers/scsi/mpt2sas/mpt2sas_scsih.c
index c5ff26a2a51..06d645a36f1 100644
--- a/drivers/scsi/mpt2sas/mpt2sas_scsih.c
+++ b/drivers/scsi/mpt2sas/mpt2sas_scsih.c
@@ -2979,7 +2979,7 @@ _scsih_qcmd(struct scsi_cmnd *scmd, void (*done)(struct scsi_cmnd *))
 	/* host recovery or link resets sent via IOCTLs */
 	if (ioc->shost_recovery || ioc->ioc_link_reset_in_progress)
 		return SCSI_MLQUEUE_HOST_BUSY;
-	/* device busy with task managment */
+	/* device busy with task management */
 	else if (sas_device_priv_data->block || sas_target_priv_data->tm_busy)
 		return SCSI_MLQUEUE_DEVICE_BUSY;
 	/* device has been deleted */
@@ -6845,7 +6845,7 @@ _scsih_init(void)
 	 /* queuecommand callback hander */
 	scsi_io_cb_idx = mpt2sas_base_register_callback_handler(_scsih_io_done);
 
-	/* task managment callback handler */
+	/* task management callback handler */
 	tm_cb_idx = mpt2sas_base_register_callback_handler(_scsih_tm_done);
 
 	/* base internal commands callback handler */
diff --git a/drivers/scsi/pm8001/pm8001_hwi.c b/drivers/scsi/pm8001/pm8001_hwi.c
index 5ff8261c5d6..0e05e8a2216 100644
--- a/drivers/scsi/pm8001/pm8001_hwi.c
+++ b/drivers/scsi/pm8001/pm8001_hwi.c
@@ -4152,7 +4152,7 @@ static int pm8001_chip_abort_task(struct pm8001_hba_info *pm8001_ha,
 }
 
 /**
- * pm8001_chip_ssp_tm_req - built the task managment command.
+ * pm8001_chip_ssp_tm_req - built the task management command.
  * @pm8001_ha: our hba card information.
  * @ccb: the ccb information.
  * @tmf: task management function.
diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c
index 8ef94536541..782b30d0eea 100644
--- a/drivers/scsi/qla2xxx/qla_iocb.c
+++ b/drivers/scsi/qla2xxx/qla_iocb.c
@@ -1129,7 +1129,7 @@ qla24xx_build_scsi_crc_2_iocbs(srb_t *sp, struct cmd_type_crc_2 *cmd_pkt,
 	cmd_pkt->fcp_cmnd_dseg_address[1] = cpu_to_le32(
 	    MSD(crc_ctx_dma + CRC_CONTEXT_FCPCMND_OFF));
 	fcp_cmnd->task_attribute = 0;
-	fcp_cmnd->task_managment = 0;
+	fcp_cmnd->task_management = 0;
 
 	cmd_pkt->fcp_rsp_dseg_len = 0; /* Let response come in status iocb */
 
diff --git a/drivers/scsi/qla2xxx/qla_nx.h b/drivers/scsi/qla2xxx/qla_nx.h
index f8f99a5ea53..1b44d013f15 100644
--- a/drivers/scsi/qla2xxx/qla_nx.h
+++ b/drivers/scsi/qla2xxx/qla_nx.h
@@ -832,7 +832,7 @@ struct fcp_cmnd {
 	struct scsi_lun lun;
 	uint8_t crn;
 	uint8_t task_attribute;
-	uint8_t task_managment;
+	uint8_t task_management;
 	uint8_t additional_cdb_len;
 	uint8_t cdb[260]; /* 256 for CDB len and 4 for FCP_DL */
 };
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 3239d1c10ac..c2c598ed4ee 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -458,7 +458,7 @@ enum {
 	IDE_DFLAG_DOORLOCKING		= (1 << 15),
 	/* disallow DMA */
 	IDE_DFLAG_NODMA			= (1 << 16),
-	/* powermanagment told us not to do anything, so sleep nicely */
+	/* powermanagement told us not to do anything, so sleep nicely */
 	IDE_DFLAG_BLOCKED		= (1 << 17),
 	/* sleeping & sleep field valid */
 	IDE_DFLAG_SLEEPING		= (1 << 18),
diff --git a/include/linux/if_link.h b/include/linux/if_link.h
index 85c812db5a3..9d8f0807dae 100644
--- a/include/linux/if_link.h
+++ b/include/linux/if_link.h
@@ -233,7 +233,7 @@ enum macvlan_mode {
 	MACVLAN_MODE_BRIDGE  = 4, /* talk to bridge ports directly */
 };
 
-/* SR-IOV virtual function managment section */
+/* SR-IOV virtual function management section */
 
 enum {
 	IFLA_VF_INFO_UNSPEC,
-- 
cgit v1.2.3-70-g09d2


From 317fe0e6c5dc9448bcef41a2e31fecfd3dba7f55 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 16 Jun 2010 04:52:13 +0000
Subject: inetpeer: restore small inet_peer structures

Addition of rcu_head to struct inet_peer added 16bytes on 64bit arches.

Thats a bit unfortunate, since old size was exactly 64 bytes.

This can be solved, using an union between this rcu_head an four fields,
that are normally used only when a refcount is taken on inet_peer.
rcu_head is used only when refcnt=-1, right before structure freeing.

Add a inet_peer_refcheck() function to check this assertion for a while.

We can bring back SLAB_HWCACHE_ALIGN qualifier in kmem cache creation.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inetpeer.h | 31 ++++++++++++++++++++++++++-----
 net/ipv4/inetpeer.c    |  4 ++--
 net/ipv4/route.c       |  1 +
 net/ipv4/tcp_ipv4.c    | 11 +++++++----
 4 files changed, 36 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h
index 61740473042..417d0c894f2 100644
--- a/include/net/inetpeer.h
+++ b/include/net/inetpeer.h
@@ -22,11 +22,21 @@ struct inet_peer {
 	__u32			dtime;		/* the time of last use of not
 						 * referenced entries */
 	atomic_t		refcnt;
-	atomic_t		rid;		/* Frag reception counter */
-	atomic_t		ip_id_count;	/* IP ID for the next packet */
-	__u32			tcp_ts;
-	__u32			tcp_ts_stamp;
-	struct rcu_head		rcu;
+	/*
+	 * Once inet_peer is queued for deletion (refcnt == -1), following fields
+	 * are not available: rid, ip_id_count, tcp_ts, tcp_ts_stamp
+	 * We can share memory with rcu_head to keep inet_peer small
+	 * (less then 64 bytes)
+	 */
+	union {
+		struct {
+			atomic_t	rid;		/* Frag reception counter */
+			atomic_t	ip_id_count;	/* IP ID for the next packet */
+			__u32		tcp_ts;
+			__u32		tcp_ts_stamp;
+		};
+		struct rcu_head         rcu;
+	};
 };
 
 void			inet_initpeers(void) __init;
@@ -37,10 +47,21 @@ struct inet_peer	*inet_getpeer(__be32 daddr, int create);
 /* can be called from BH context or outside */
 extern void inet_putpeer(struct inet_peer *p);
 
+/*
+ * temporary check to make sure we dont access rid, ip_id_count, tcp_ts,
+ * tcp_ts_stamp if no refcount is taken on inet_peer
+ */
+static inline void inet_peer_refcheck(const struct inet_peer *p)
+{
+	WARN_ON_ONCE(atomic_read(&p->refcnt) <= 0);
+}
+
+
 /* can be called with or without local BH being disabled */
 static inline __u16	inet_getid(struct inet_peer *p, int more)
 {
 	more++;
+	inet_peer_refcheck(p);
 	return atomic_add_return(more, &p->ip_id_count) - more;
 }
 
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 349249fad2d..9ffa24b9a80 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -64,7 +64,7 @@
  *		   usually under some other lock to prevent node disappearing
  *		dtime: unused node list lock
  *		v4daddr: unchangeable
- *		ip_id_count: idlock
+ *		ip_id_count: atomic value (no lock needed)
  */
 
 static struct kmem_cache *peer_cachep __read_mostly;
@@ -129,7 +129,7 @@ void __init inet_initpeers(void)
 
 	peer_cachep = kmem_cache_create("inet_peer_cache",
 			sizeof(struct inet_peer),
-			0, SLAB_PANIC,
+			0, SLAB_HWCACHE_ALIGN | SLAB_PANIC,
 			NULL);
 
 	/* All the timers, started at system startup tend
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index a291edbbc97..03430de4616 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2881,6 +2881,7 @@ static int rt_fill_info(struct net *net,
 	error = rt->dst.error;
 	expires = rt->dst.expires ? rt->dst.expires - jiffies : 0;
 	if (rt->peer) {
+		inet_peer_refcheck(rt->peer);
 		id = atomic_read(&rt->peer->ip_id_count) & 0xffff;
 		if (rt->peer->tcp_ts_stamp) {
 			ts = rt->peer->tcp_ts;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 7f9515c0379..2e41e6f9296 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -204,10 +204,12 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 		 * TIME-WAIT * and initialize rx_opt.ts_recent from it,
 		 * when trying new connection.
 		 */
-		if (peer != NULL &&
-		    (u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) {
-			tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
-			tp->rx_opt.ts_recent = peer->tcp_ts;
+		if (peer) {
+			inet_peer_refcheck(peer);
+			if ((u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) {
+				tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
+				tp->rx_opt.ts_recent = peer->tcp_ts;
+			}
 		}
 	}
 
@@ -1351,6 +1353,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 		    (dst = inet_csk_route_req(sk, req)) != NULL &&
 		    (peer = rt_get_peer((struct rtable *)dst)) != NULL &&
 		    peer->v4daddr == saddr) {
+			inet_peer_refcheck(peer);
 			if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL &&
 			    (s32)(peer->tcp_ts - req->ts_recent) >
 							TCP_PAWS_WINDOW) {
-- 
cgit v1.2.3-70-g09d2


From 8c76368174ed2359739f1b7b8a9c042b1ef839c4 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 16 Jun 2010 14:42:15 -0700
Subject: syncookies: check decoded options against sysctl settings

Discard the ACK if we find options that do not match current sysctl
settings.

Previously it was possible to create a connection with sack, wscale,
etc. enabled even if the feature was disabled via sysctl.

Also remove an unneeded call to tcp_sack_reset() in
cookie_check_timestamp: Both call sites (cookie_v4_check,
cookie_v6_check) zero "struct tcp_options_received", hand it to
tcp_parse_options() (which does not change tcp_opt->num_sacks/dsack)
and then call cookie_check_timestamp().

Even if num_sacks/dsacks were changed, the structure is allocated on
the stack and after cookie_check_timestamp returns only a few selected
members are copied to the inet_request_sock.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h     |  2 +-
 net/ipv4/syncookies.c | 25 +++++++++++++++++++------
 net/ipv6/syncookies.c |  4 ++--
 3 files changed, 22 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 9e68e25c8b8..18c246c9b00 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -464,7 +464,7 @@ extern __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb,
 				     __u16 *mss);
 
 extern __u32 cookie_init_timestamp(struct request_sock *req);
-extern void cookie_check_timestamp(struct tcp_options_received *tcp_opt);
+extern bool cookie_check_timestamp(struct tcp_options_received *tcp_opt);
 
 /* From net/ipv6/syncookies.c */
 extern struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb);
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 02bef6aa8b3..51b5662545d 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -230,23 +230,36 @@ static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
  * The lowest 4 bits are for snd_wscale
  * The next 4 lsb are for rcv_wscale
  * The next lsb is for sack_ok
+ *
+ * return false if we decode an option that should not be.
  */
-void cookie_check_timestamp(struct tcp_options_received *tcp_opt)
+bool cookie_check_timestamp(struct tcp_options_received *tcp_opt)
 {
 	/* echoed timestamp, 9 lowest bits contain options */
 	u32 options = tcp_opt->rcv_tsecr & TSMASK;
 
+	if (!tcp_opt->saw_tstamp)  {
+		tcp_clear_options(tcp_opt);
+		return true;
+	}
+
+	if (!sysctl_tcp_timestamps)
+		return false;
+
 	tcp_opt->snd_wscale = options & 0xf;
 	options >>= 4;
 	tcp_opt->rcv_wscale = options & 0xf;
 
 	tcp_opt->sack_ok = (options >> 4) & 0x1;
 
-	if (tcp_opt->sack_ok)
-		tcp_sack_reset(tcp_opt);
+	if (tcp_opt->sack_ok && !sysctl_tcp_sack)
+		return false;
 
-	if (tcp_opt->snd_wscale || tcp_opt->rcv_wscale)
+	if (tcp_opt->snd_wscale || tcp_opt->rcv_wscale) {
 		tcp_opt->wscale_ok = 1;
+		return sysctl_tcp_window_scaling != 0;
+	}
+	return true;
 }
 EXPORT_SYMBOL(cookie_check_timestamp);
 
@@ -281,8 +294,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 	memset(&tcp_opt, 0, sizeof(tcp_opt));
 	tcp_parse_options(skb, &tcp_opt, &hash_location, 0);
 
-	if (tcp_opt.saw_tstamp)
-		cookie_check_timestamp(&tcp_opt);
+	if (!cookie_check_timestamp(&tcp_opt))
+		goto out;
 
 	ret = NULL;
 	req = inet_reqsk_alloc(&tcp_request_sock_ops); /* for safety */
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 70d330f8c99..c7ee57421ec 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -180,8 +180,8 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 	memset(&tcp_opt, 0, sizeof(tcp_opt));
 	tcp_parse_options(skb, &tcp_opt, &hash_location, 0);
 
-	if (tcp_opt.saw_tstamp)
-		cookie_check_timestamp(&tcp_opt);
+	if (!cookie_check_timestamp(&tcp_opt))
+		goto out;
 
 	ret = NULL;
 	req = inet6_reqsk_alloc(&tcp6_request_sock_ops);
-- 
cgit v1.2.3-70-g09d2


From 812e876e842488221aa54cb4587a8a33445cfa9e Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sun, 13 Jun 2010 03:27:04 +0000
Subject: scm: Reorder scm_cookie.

Reorder the fields in scm_cookie so they pack better on 64bit.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Acked-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/scm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/scm.h b/include/net/scm.h
index 8360e47aa7e..17d9d2e75ff 100644
--- a/include/net/scm.h
+++ b/include/net/scm.h
@@ -19,8 +19,8 @@ struct scm_fp_list {
 };
 
 struct scm_cookie {
-	struct ucred		creds;		/* Skb credentials	*/
 	struct scm_fp_list	*fp;		/* Passed files		*/
+	struct ucred		creds;		/* Skb credentials	*/
 #ifdef CONFIG_SECURITY_NETWORK
 	u32			secid;		/* Passed security ID 	*/
 #endif
-- 
cgit v1.2.3-70-g09d2


From 5c1469de7545a35a16ff2b902e217044a7d2f8a5 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sun, 13 Jun 2010 03:28:03 +0000
Subject: user_ns: Introduce user_nsmap_uid and user_ns_map_gid.

Define what happens when a we view a uid from one user_namespace
in another user_namepece.

- If the user namespaces are the same no mapping is necessary.

- For most cases of difference use overflowuid and overflowgid,
  the uid and gid currently used for 16bit apis when we have a 32bit uid
  that does fit in 16bits.  Effectively the situation is the same,
  we want to return a uid or gid that is not assigned to any user.

- For the case when we happen to be mapping the uid or gid of the
  creator of the target user namespace use uid 0 and gid as confusing
  that user with root is not a problem.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Acked-by: Serge E. Hallyn <serue@us.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/user_namespace.h | 14 ++++++++++++++
 kernel/user_namespace.c        | 44 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 58 insertions(+)

(limited to 'include')

diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index cc4f45361db..8178156711f 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -36,6 +36,9 @@ static inline void put_user_ns(struct user_namespace *ns)
 		kref_put(&ns->kref, free_user_ns);
 }
 
+uid_t user_ns_map_uid(struct user_namespace *to, const struct cred *cred, uid_t uid);
+gid_t user_ns_map_gid(struct user_namespace *to, const struct cred *cred, gid_t gid);
+
 #else
 
 static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
@@ -52,6 +55,17 @@ static inline void put_user_ns(struct user_namespace *ns)
 {
 }
 
+static inline uid_t user_ns_map_uid(struct user_namespace *to,
+	const struct cred *cred, uid_t uid)
+{
+	return uid;
+}
+static inline gid_t user_ns_map_gid(struct user_namespace *to,
+	const struct cred *cred, gid_t gid)
+{
+	return gid;
+}
+
 #endif
 
 #endif /* _LINUX_USER_H */
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index b2d70d38dff..25915832291 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -9,6 +9,7 @@
 #include <linux/nsproxy.h>
 #include <linux/slab.h>
 #include <linux/user_namespace.h>
+#include <linux/highuid.h>
 #include <linux/cred.h>
 
 /*
@@ -82,3 +83,46 @@ void free_user_ns(struct kref *kref)
 	schedule_work(&ns->destroyer);
 }
 EXPORT_SYMBOL(free_user_ns);
+
+uid_t user_ns_map_uid(struct user_namespace *to, const struct cred *cred, uid_t uid)
+{
+	struct user_namespace *tmp;
+
+	if (likely(to == cred->user->user_ns))
+		return uid;
+
+
+	/* Is cred->user the creator of the target user_ns
+	 * or the creator of one of it's parents?
+	 */
+	for ( tmp = to; tmp != &init_user_ns;
+	      tmp = tmp->creator->user_ns ) {
+		if (cred->user == tmp->creator) {
+			return (uid_t)0;
+		}
+	}
+
+	/* No useful relationship so no mapping */
+	return overflowuid;
+}
+
+gid_t user_ns_map_gid(struct user_namespace *to, const struct cred *cred, gid_t gid)
+{
+	struct user_namespace *tmp;
+
+	if (likely(to == cred->user->user_ns))
+		return gid;
+
+	/* Is cred->user the creator of the target user_ns
+	 * or the creator of one of it's parents?
+	 */
+	for ( tmp = to; tmp != &init_user_ns;
+	      tmp = tmp->creator->user_ns ) {
+		if (cred->user == tmp->creator) {
+			return (gid_t)0;
+		}
+	}
+
+	/* No useful relationship so no mapping */
+	return overflowgid;
+}
-- 
cgit v1.2.3-70-g09d2


From 3f551f9436c05a3b5eccdd6e94733df5bb98d2a5 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sun, 13 Jun 2010 03:28:59 +0000
Subject: sock: Introduce cred_to_ucred

To keep the coming code clear and to allow both the sock
code and the scm code to share the logic introduce a
fuction to translate from struct cred to struct ucred.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Acked-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/socket.h |  5 +++++
 net/core/sock.c        | 14 ++++++++++++++
 2 files changed, 19 insertions(+)

(limited to 'include')

diff --git a/include/linux/socket.h b/include/linux/socket.h
index 032a19eb61b..a2fada9becb 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -24,6 +24,9 @@ struct __kernel_sockaddr_storage {
 #include <linux/types.h>		/* pid_t			*/
 #include <linux/compiler.h>		/* __user			*/
 
+struct pid;
+struct cred;
+
 #define __sockaddr_check_size(size)	\
 	BUILD_BUG_ON(((size) > sizeof(struct __kernel_sockaddr_storage)))
 
@@ -309,6 +312,8 @@ struct ucred {
 #define IPX_TYPE	1
 
 #ifdef __KERNEL__
+extern void cred_to_ucred(struct pid *pid, const struct cred *cred, struct ucred *ucred);
+
 extern int memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len);
 extern int memcpy_fromiovecend(unsigned char *kdata, const struct iovec *iov,
 			       int offset, int len);
diff --git a/net/core/sock.c b/net/core/sock.c
index f9ce0db41cd..db8335ad755 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -110,6 +110,7 @@
 #include <linux/tcp.h>
 #include <linux/init.h>
 #include <linux/highmem.h>
+#include <linux/user_namespace.h>
 
 #include <asm/uaccess.h>
 #include <asm/system.h>
@@ -749,6 +750,19 @@ set_rcvbuf:
 EXPORT_SYMBOL(sock_setsockopt);
 
 
+void cred_to_ucred(struct pid *pid, const struct cred *cred,
+		   struct ucred *ucred)
+{
+	ucred->pid = pid_vnr(pid);
+	ucred->uid = ucred->gid = -1;
+	if (cred) {
+		struct user_namespace *current_ns = current_user_ns();
+
+		ucred->uid = user_ns_map_uid(current_ns, cred, cred->euid);
+		ucred->gid = user_ns_map_gid(current_ns, cred, cred->egid);
+	}
+}
+
 int sock_getsockopt(struct socket *sock, int level, int optname,
 		    char __user *optval, int __user *optlen)
 {
-- 
cgit v1.2.3-70-g09d2


From 109f6e39fa07c48f580125f531f46cb7c245b528 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sun, 13 Jun 2010 03:30:14 +0000
Subject: af_unix: Allow SO_PEERCRED to work across namespaces.

Use struct pid and struct cred to store the peer credentials on struct
sock.  This gives enough information to convert the peer credential
information to a value relative to whatever namespace the socket is in
at the time.

This removes nasty surprises when using SO_PEERCRED on socket
connetions where the processes on either side are in different pid and
user namespaces.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Acked-by: Daniel Lezcano <daniel.lezcano@free.fr>
Acked-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h |  3 ++-
 net/core/sock.c    | 18 ++++++++++++------
 net/unix/af_unix.c | 37 ++++++++++++++++++++++++++++---------
 3 files changed, 42 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/net/sock.h b/include/net/sock.h
index f8acf38f092..4f26f2f83be 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -295,7 +295,8 @@ struct sock {
 	unsigned short		sk_ack_backlog;
 	unsigned short		sk_max_ack_backlog;
 	__u32			sk_priority;
-	struct ucred		sk_peercred;
+	struct pid		*sk_peer_pid;
+	const struct cred	*sk_peer_cred;
 	long			sk_rcvtimeo;
 	long			sk_sndtimeo;
 	struct sk_filter      	*sk_filter;
diff --git a/net/core/sock.c b/net/core/sock.c
index db8335ad755..0229d5566a4 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -915,11 +915,15 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
 		break;
 
 	case SO_PEERCRED:
-		if (len > sizeof(sk->sk_peercred))
-			len = sizeof(sk->sk_peercred);
-		if (copy_to_user(optval, &sk->sk_peercred, len))
+	{
+		struct ucred peercred;
+		if (len > sizeof(peercred))
+			len = sizeof(peercred);
+		cred_to_ucred(sk->sk_peer_pid, sk->sk_peer_cred, &peercred);
+		if (copy_to_user(optval, &peercred, len))
 			return -EFAULT;
 		goto lenout;
+	}
 
 	case SO_PEERNAME:
 	{
@@ -1133,6 +1137,9 @@ static void __sk_free(struct sock *sk)
 		printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n",
 		       __func__, atomic_read(&sk->sk_omem_alloc));
 
+	if (sk->sk_peer_cred)
+		put_cred(sk->sk_peer_cred);
+	put_pid(sk->sk_peer_pid);
 	put_net(sock_net(sk));
 	sk_prot_free(sk->sk_prot_creator, sk);
 }
@@ -1968,9 +1975,8 @@ void sock_init_data(struct socket *sock, struct sock *sk)
 	sk->sk_sndmsg_page	=	NULL;
 	sk->sk_sndmsg_off	=	0;
 
-	sk->sk_peercred.pid 	=	0;
-	sk->sk_peercred.uid	=	-1;
-	sk->sk_peercred.gid	=	-1;
+	sk->sk_peer_pid 	=	NULL;
+	sk->sk_peer_cred	=	NULL;
 	sk->sk_write_pending	=	0;
 	sk->sk_rcvlowat		=	1;
 	sk->sk_rcvtimeo		=	MAX_SCHEDULE_TIMEOUT;
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index fef2cc5e9d2..e1f1349fae8 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -450,11 +450,31 @@ static int unix_release_sock(struct sock *sk, int embrion)
 	return 0;
 }
 
+static void init_peercred(struct sock *sk)
+{
+	put_pid(sk->sk_peer_pid);
+	if (sk->sk_peer_cred)
+		put_cred(sk->sk_peer_cred);
+	sk->sk_peer_pid  = get_pid(task_tgid(current));
+	sk->sk_peer_cred = get_current_cred();
+}
+
+static void copy_peercred(struct sock *sk, struct sock *peersk)
+{
+	put_pid(sk->sk_peer_pid);
+	if (sk->sk_peer_cred)
+		put_cred(sk->sk_peer_cred);
+	sk->sk_peer_pid  = get_pid(peersk->sk_peer_pid);
+	sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
+}
+
 static int unix_listen(struct socket *sock, int backlog)
 {
 	int err;
 	struct sock *sk = sock->sk;
 	struct unix_sock *u = unix_sk(sk);
+	struct pid *old_pid = NULL;
+	const struct cred *old_cred = NULL;
 
 	err = -EOPNOTSUPP;
 	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
@@ -470,12 +490,14 @@ static int unix_listen(struct socket *sock, int backlog)
 	sk->sk_max_ack_backlog	= backlog;
 	sk->sk_state		= TCP_LISTEN;
 	/* set credentials so connect can copy them */
-	sk->sk_peercred.pid	= task_tgid_vnr(current);
-	current_euid_egid(&sk->sk_peercred.uid, &sk->sk_peercred.gid);
+	init_peercred(sk);
 	err = 0;
 
 out_unlock:
 	unix_state_unlock(sk);
+	put_pid(old_pid);
+	if (old_cred)
+		put_cred(old_cred);
 out:
 	return err;
 }
@@ -1140,8 +1162,7 @@ restart:
 	unix_peer(newsk)	= sk;
 	newsk->sk_state		= TCP_ESTABLISHED;
 	newsk->sk_type		= sk->sk_type;
-	newsk->sk_peercred.pid	= task_tgid_vnr(current);
-	current_euid_egid(&newsk->sk_peercred.uid, &newsk->sk_peercred.gid);
+	init_peercred(newsk);
 	newu = unix_sk(newsk);
 	newsk->sk_wq		= &newu->peer_wq;
 	otheru = unix_sk(other);
@@ -1157,7 +1178,7 @@ restart:
 	}
 
 	/* Set credentials */
-	sk->sk_peercred = other->sk_peercred;
+	copy_peercred(sk, other);
 
 	sock->state	= SS_CONNECTED;
 	sk->sk_state	= TCP_ESTABLISHED;
@@ -1199,10 +1220,8 @@ static int unix_socketpair(struct socket *socka, struct socket *sockb)
 	sock_hold(skb);
 	unix_peer(ska) = skb;
 	unix_peer(skb) = ska;
-	ska->sk_peercred.pid = skb->sk_peercred.pid = task_tgid_vnr(current);
-	current_euid_egid(&skb->sk_peercred.uid, &skb->sk_peercred.gid);
-	ska->sk_peercred.uid = skb->sk_peercred.uid;
-	ska->sk_peercred.gid = skb->sk_peercred.gid;
+	init_peercred(ska);
+	init_peercred(skb);
 
 	if (ska->sk_type != SOCK_DGRAM) {
 		ska->sk_state = TCP_ESTABLISHED;
-- 
cgit v1.2.3-70-g09d2


From 257b5358b32f17e0603b6ff57b13610b0e02348f Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sun, 13 Jun 2010 03:32:34 +0000
Subject: scm: Capture the full credentials of the scm sender.

Start capturing not only the userspace pid, uid and gid values of the
sending process but also the struct pid and struct cred of the sending
process as well.

This is in preparation for properly supporting SCM_CREDENTIALS for
sockets that have different uid and/or pid namespaces at the different
ends.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Acked-by: Serge E. Hallyn <serge@hallyn.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/scm.h | 28 ++++++++++++++++++++++++----
 net/core/scm.c    | 24 ++++++++++++++++++++++++
 2 files changed, 48 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/scm.h b/include/net/scm.h
index 17d9d2e75ff..31656506d96 100644
--- a/include/net/scm.h
+++ b/include/net/scm.h
@@ -19,6 +19,8 @@ struct scm_fp_list {
 };
 
 struct scm_cookie {
+	struct pid		*pid;		/* Skb credentials */
+	const struct cred	*cred;
 	struct scm_fp_list	*fp;		/* Passed files		*/
 	struct ucred		creds;		/* Skb credentials	*/
 #ifdef CONFIG_SECURITY_NETWORK
@@ -42,8 +44,27 @@ static __inline__ void unix_get_peersec_dgram(struct socket *sock, struct scm_co
 { }
 #endif /* CONFIG_SECURITY_NETWORK */
 
+static __inline__ void scm_set_cred(struct scm_cookie *scm,
+				    struct pid *pid, const struct cred *cred)
+{
+	scm->pid  = get_pid(pid);
+	scm->cred = get_cred(cred);
+	cred_to_ucred(pid, cred, &scm->creds);
+}
+
+static __inline__ void scm_destroy_cred(struct scm_cookie *scm)
+{
+	put_pid(scm->pid);
+	scm->pid  = NULL;
+
+	if (scm->cred)
+		put_cred(scm->cred);
+	scm->cred = NULL;
+}
+
 static __inline__ void scm_destroy(struct scm_cookie *scm)
 {
+	scm_destroy_cred(scm);
 	if (scm && scm->fp)
 		__scm_destroy(scm);
 }
@@ -51,10 +72,7 @@ static __inline__ void scm_destroy(struct scm_cookie *scm)
 static __inline__ int scm_send(struct socket *sock, struct msghdr *msg,
 			       struct scm_cookie *scm)
 {
-	struct task_struct *p = current;
-	scm->creds.uid = current_uid();
-	scm->creds.gid = current_gid();
-	scm->creds.pid = task_tgid_vnr(p);
+	scm_set_cred(scm, task_tgid(current), current_cred());
 	scm->fp = NULL;
 	unix_get_peersec_dgram(sock, scm);
 	if (msg->msg_controllen <= 0)
@@ -96,6 +114,8 @@ static __inline__ void scm_recv(struct socket *sock, struct msghdr *msg,
 	if (test_bit(SOCK_PASSCRED, &sock->flags))
 		put_cmsg(msg, SOL_SOCKET, SCM_CREDENTIALS, sizeof(scm->creds), &scm->creds);
 
+	scm_destroy_cred(scm);
+
 	scm_passec(sock, msg, scm);
 
 	if (!scm->fp)
diff --git a/net/core/scm.c b/net/core/scm.c
index b88f6f9d0b9..681c976307b 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -170,6 +170,30 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p)
 			err = scm_check_creds(&p->creds);
 			if (err)
 				goto error;
+
+			if (pid_vnr(p->pid) != p->creds.pid) {
+				struct pid *pid;
+				err = -ESRCH;
+				pid = find_get_pid(p->creds.pid);
+				if (!pid)
+					goto error;
+				put_pid(p->pid);
+				p->pid = pid;
+			}
+
+			if ((p->cred->euid != p->creds.uid) ||
+				(p->cred->egid != p->creds.gid)) {
+				struct cred *cred;
+				err = -ENOMEM;
+				cred = prepare_creds();
+				if (!cred)
+					goto error;
+
+				cred->uid = cred->euid = p->creds.uid;
+				cred->gid = cred->egid = p->creds.uid;
+				put_cred(p->cred);
+				p->cred = cred;
+			}
 			break;
 		default:
 			goto error;
-- 
cgit v1.2.3-70-g09d2


From 7361c36c5224519b258219fe3d0e8abc865d8134 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sun, 13 Jun 2010 03:34:33 +0000
Subject: af_unix: Allow credentials to work across user and pid namespaces.

In unix_skb_parms store pointers to struct pid and struct cred instead
of raw uid, gid, and pid values, then translate the credentials on
reception into values that are meaningful in the receiving processes
namespaces.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Acked-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/af_unix.h |  4 ++--
 net/unix/af_unix.c    | 53 ++++++++++++++++++++++++++++++---------------------
 2 files changed, 33 insertions(+), 24 deletions(-)

(limited to 'include')

diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index 20725e213ae..90c9e2872f2 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -23,7 +23,8 @@ struct unix_address {
 };
 
 struct unix_skb_parms {
-	struct ucred		creds;		/* Skb credentials	*/
+	struct pid		*pid;		/* Skb credentials	*/
+	const struct cred	*cred;
 	struct scm_fp_list	*fp;		/* Passed files		*/
 #ifdef CONFIG_SECURITY_NETWORK
 	u32			secid;		/* Security ID		*/
@@ -31,7 +32,6 @@ struct unix_skb_parms {
 };
 
 #define UNIXCB(skb) 	(*(struct unix_skb_parms *)&((skb)->cb))
-#define UNIXCREDS(skb)	(&UNIXCB((skb)).creds)
 #define UNIXSID(skb)	(&UNIXCB((skb)).secid)
 
 #define unix_state_lock(s)	spin_lock(&unix_sk(s)->lock)
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index e1f1349fae8..5fe9d6fe08b 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1316,18 +1316,20 @@ static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
 	int i;
 
 	scm->fp = UNIXCB(skb).fp;
-	skb->destructor = sock_wfree;
 	UNIXCB(skb).fp = NULL;
 
 	for (i = scm->fp->count-1; i >= 0; i--)
 		unix_notinflight(scm->fp->fp[i]);
 }
 
-static void unix_destruct_fds(struct sk_buff *skb)
+static void unix_destruct_scm(struct sk_buff *skb)
 {
 	struct scm_cookie scm;
 	memset(&scm, 0, sizeof(scm));
-	unix_detach_fds(&scm, skb);
+	scm.pid  = UNIXCB(skb).pid;
+	scm.cred = UNIXCB(skb).cred;
+	if (UNIXCB(skb).fp)
+		unix_detach_fds(&scm, skb);
 
 	/* Alas, it calls VFS */
 	/* So fscking what? fput() had been SMP-safe since the last Summer */
@@ -1350,10 +1352,22 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
 
 	for (i = scm->fp->count-1; i >= 0; i--)
 		unix_inflight(scm->fp->fp[i]);
-	skb->destructor = unix_destruct_fds;
 	return 0;
 }
 
+static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
+{
+	int err = 0;
+	UNIXCB(skb).pid  = get_pid(scm->pid);
+	UNIXCB(skb).cred = get_cred(scm->cred);
+	UNIXCB(skb).fp = NULL;
+	if (scm->fp && send_fds)
+		err = unix_attach_fds(scm, skb);
+
+	skb->destructor = unix_destruct_scm;
+	return err;
+}
+
 /*
  *	Send AF_UNIX data.
  */
@@ -1410,12 +1424,9 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
 	if (skb == NULL)
 		goto out;
 
-	memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
-	if (siocb->scm->fp) {
-		err = unix_attach_fds(siocb->scm, skb);
-		if (err)
-			goto out_free;
-	}
+	err = unix_scm_to_skb(siocb->scm, skb, true);
+	if (err)
+		goto out_free;
 	unix_get_secdata(siocb->scm, skb);
 
 	skb_reset_transport_header(skb);
@@ -1585,16 +1596,14 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
 		 */
 		size = min_t(int, size, skb_tailroom(skb));
 
-		memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
+
 		/* Only send the fds in the first buffer */
-		if (siocb->scm->fp && !fds_sent) {
-			err = unix_attach_fds(siocb->scm, skb);
-			if (err) {
-				kfree_skb(skb);
-				goto out_err;
-			}
-			fds_sent = true;
+		err = unix_scm_to_skb(siocb->scm, skb, !fds_sent);
+		if (err) {
+			kfree_skb(skb);
+			goto out_err;
 		}
+		fds_sent = true;
 
 		err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
 		if (err) {
@@ -1711,7 +1720,7 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
 		siocb->scm = &tmp_scm;
 		memset(&tmp_scm, 0, sizeof(tmp_scm));
 	}
-	siocb->scm->creds = *UNIXCREDS(skb);
+	scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred);
 	unix_set_secdata(siocb->scm, skb);
 
 	if (!(flags & MSG_PEEK)) {
@@ -1860,14 +1869,14 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
 
 		if (check_creds) {
 			/* Never glue messages from different writers */
-			if (memcmp(UNIXCREDS(skb), &siocb->scm->creds,
-				   sizeof(siocb->scm->creds)) != 0) {
+			if ((UNIXCB(skb).pid  != siocb->scm->pid) ||
+			    (UNIXCB(skb).cred != siocb->scm->cred)) {
 				skb_queue_head(&sk->sk_receive_queue, skb);
 				break;
 			}
 		} else {
 			/* Copy credentials */
-			siocb->scm->creds = *UNIXCREDS(skb);
+			scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred);
 			check_creds = 1;
 		}
 
-- 
cgit v1.2.3-70-g09d2


From d29c0c5c332131f1151cf33995e2f01299b9234f Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 14 Jun 2010 20:21:04 +0000
Subject: udp: Add UFO to NETIF_F_SOFTWARE_GSO

This patch adds UFO to the list of GSO features with a software
fallback.  This allows UFO to be used even if the hardware does
not support it.

In particular, this allows us to test the UFO fallback, as it
has been reported to not work in some cases.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 398f6c28cf8..8fa5e5aa879 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -865,7 +865,8 @@ struct net_device {
 #define NETIF_F_FSO		(SKB_GSO_FCOE << NETIF_F_GSO_SHIFT)
 
 	/* List of features with software fallbacks. */
-#define NETIF_F_GSO_SOFTWARE	(NETIF_F_TSO | NETIF_F_TSO_ECN | NETIF_F_TSO6)
+#define NETIF_F_GSO_SOFTWARE	(NETIF_F_TSO | NETIF_F_TSO_ECN | \
+				 NETIF_F_TSO6 | NETIF_F_UFO)
 
 
 #define NETIF_F_GEN_CSUM	(NETIF_F_NO_CSUM | NETIF_F_HW_CSUM)
-- 
cgit v1.2.3-70-g09d2


From c68cd6cc21eb329c47ff020ff7412bf58176984e Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Thu, 17 Jun 2010 06:12:26 +0200
Subject: netfilter: nf_nat: support user-specified SNAT rules in LOCAL_IN

2.6.34 introduced 'conntrack zones' to deal with cases where packets
from multiple identical networks are handled by conntrack/NAT. Packets
are looped through veth devices, during which they are NATed to private
addresses, after which they can continue normally through the stack
and possibly have NAT rules applied a second time.

This works well, but is needlessly complicated for cases where only
a single SNAT/DNAT mapping needs to be applied to these packets. In that
case, all that needs to be done is to assign each network to a seperate
zone and perform NAT as usual. However this doesn't work for packets
destined for the machine performing NAT itself since its corrently not
possible to configure SNAT mappings for the LOCAL_IN chain.

This patch adds a new INPUT chain to the NAT table and changes the
targets performing SNAT to be usable in that chain.

Example usage with two identical networks (192.168.0.0/24) on eth0/eth1:

iptables -t raw -A PREROUTING -i eth0 -j CT --zone 1
iptables -t raw -A PREROUTING -i eth0 -j MARK --set-mark 1
iptables -t raw -A PREROUTING -i eth1 -j CT --zone 2
iptabels -t raw -A PREROUTING -i eth1 -j MARK --set-mark 2

iptables -t nat -A INPUT       -m mark --mark 1 -j NETMAP --to 10.0.0.0/24
iptables -t nat -A POSTROUTING -m mark --mark 1 -j NETMAP --to 10.0.0.0/24
iptables -t nat -A INPUT       -m mark --mark 2 -j NETMAP --to 10.0.1.0/24
iptables -t nat -A POSTROUTING -m mark --mark 2 -j NETMAP --to 10.0.1.0/24

iptables -t raw -A PREROUTING -d 10.0.0.0/24 -j CT --zone 1
iptables -t raw -A OUTPUT     -d 10.0.0.0/24 -j CT --zone 1
iptables -t raw -A PREROUTING -d 10.0.1.0/24 -j CT --zone 2
iptables -t raw -A OUTPUT     -d 10.0.1.0/24 -j CT --zone 2

iptables -t nat -A PREROUTING -d 10.0.0.0/24 -j NETMAP --to 192.168.0.0/24
iptables -t nat -A OUTPUT     -d 10.0.0.0/24 -j NETMAP --to 192.168.0.0/24
iptables -t nat -A PREROUTING -d 10.0.1.0/24 -j NETMAP --to 192.168.0.0/24
iptables -t nat -A OUTPUT     -d 10.0.1.0/24 -j NETMAP --to 192.168.0.0/24

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/net/netfilter/nf_nat_rule.h    |  2 --
 net/ipv4/netfilter/ipt_NETMAP.c        |  6 ++++--
 net/ipv4/netfilter/nf_nat_rule.c       | 10 ++++++----
 net/ipv4/netfilter/nf_nat_standalone.c |  8 +-------
 4 files changed, 11 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_nat_rule.h b/include/net/netfilter/nf_nat_rule.h
index e4a18ae361c..2890bdc4cd9 100644
--- a/include/net/netfilter/nf_nat_rule.h
+++ b/include/net/netfilter/nf_nat_rule.h
@@ -12,6 +12,4 @@ extern int nf_nat_rule_find(struct sk_buff *skb,
 			    const struct net_device *out,
 			    struct nf_conn *ct);
 
-extern unsigned int
-alloc_null_binding(struct nf_conn *ct, unsigned int hooknum);
 #endif /* _NF_NAT_RULE_H */
diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c
index f43867d1697..6cdb298f103 100644
--- a/net/ipv4/netfilter/ipt_NETMAP.c
+++ b/net/ipv4/netfilter/ipt_NETMAP.c
@@ -48,7 +48,8 @@ netmap_tg(struct sk_buff *skb, const struct xt_action_param *par)
 
 	NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING ||
 		     par->hooknum == NF_INET_POST_ROUTING ||
-		     par->hooknum == NF_INET_LOCAL_OUT);
+		     par->hooknum == NF_INET_LOCAL_OUT ||
+		     par->hooknum == NF_INET_LOCAL_IN);
 	ct = nf_ct_get(skb, &ctinfo);
 
 	netmask = ~(mr->range[0].min_ip ^ mr->range[0].max_ip);
@@ -77,7 +78,8 @@ static struct xt_target netmap_tg_reg __read_mostly = {
 	.table		= "nat",
 	.hooks		= (1 << NF_INET_PRE_ROUTING) |
 			  (1 << NF_INET_POST_ROUTING) |
-			  (1 << NF_INET_LOCAL_OUT),
+			  (1 << NF_INET_LOCAL_OUT) |
+			  (1 << NF_INET_LOCAL_IN),
 	.checkentry 	= netmap_tg_check,
 	.me 		= THIS_MODULE
 };
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c
index 98ed78281ae..ebbd319f62f 100644
--- a/net/ipv4/netfilter/nf_nat_rule.c
+++ b/net/ipv4/netfilter/nf_nat_rule.c
@@ -28,7 +28,8 @@
 
 #define NAT_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | \
 			 (1 << NF_INET_POST_ROUTING) | \
-			 (1 << NF_INET_LOCAL_OUT))
+			 (1 << NF_INET_LOCAL_OUT) | \
+			 (1 << NF_INET_LOCAL_IN))
 
 static const struct xt_table nat_table = {
 	.name		= "nat",
@@ -45,7 +46,8 @@ ipt_snat_target(struct sk_buff *skb, const struct xt_action_param *par)
 	enum ip_conntrack_info ctinfo;
 	const struct nf_nat_multi_range_compat *mr = par->targinfo;
 
-	NF_CT_ASSERT(par->hooknum == NF_INET_POST_ROUTING);
+	NF_CT_ASSERT(par->hooknum == NF_INET_POST_ROUTING ||
+		     par->hooknum == NF_INET_LOCAL_IN);
 
 	ct = nf_ct_get(skb, &ctinfo);
 
@@ -99,7 +101,7 @@ static int ipt_dnat_checkentry(const struct xt_tgchk_param *par)
 	return 0;
 }
 
-unsigned int
+static unsigned int
 alloc_null_binding(struct nf_conn *ct, unsigned int hooknum)
 {
 	/* Force range to this IP; let proto decide mapping for
@@ -141,7 +143,7 @@ static struct xt_target ipt_snat_reg __read_mostly = {
 	.target		= ipt_snat_target,
 	.targetsize	= sizeof(struct nf_nat_multi_range_compat),
 	.table		= "nat",
-	.hooks		= 1 << NF_INET_POST_ROUTING,
+	.hooks		= (1 << NF_INET_POST_ROUTING) | (1 << NF_INET_LOCAL_IN),
 	.checkentry	= ipt_snat_checkentry,
 	.family		= AF_INET,
 };
diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c
index 6723c682250..95481fee8bd 100644
--- a/net/ipv4/netfilter/nf_nat_standalone.c
+++ b/net/ipv4/netfilter/nf_nat_standalone.c
@@ -131,13 +131,7 @@ nf_nat_fn(unsigned int hooknum,
 		if (!nf_nat_initialized(ct, maniptype)) {
 			unsigned int ret;
 
-			if (hooknum == NF_INET_LOCAL_IN)
-				/* LOCAL_IN hook doesn't have a chain!  */
-				ret = alloc_null_binding(ct, hooknum);
-			else
-				ret = nf_nat_rule_find(skb, hooknum, in, out,
-						       ct);
-
+			ret = nf_nat_rule_find(skb, hooknum, in, out, ct);
 			if (ret != NF_ACCEPT)
 				return ret;
 		} else
-- 
cgit v1.2.3-70-g09d2


From db3c9cc105ee844f6cd7a1beb9926fb8e9a093ae Mon Sep 17 00:00:00 2001
From: Stefan Richter <stefanr@s5r6.in-berlin.de>
Date: Sat, 12 Jun 2010 20:30:21 +0200
Subject: firewire: replace get_features card driver hook

by feature variables in the fw_card struct.  The hook appeared to be an
unnecessary abstraction in the card driver interface.

Cleaner would be to pass those feature flags as arguments to
fw_card_initialize() or fw_card_add(), but the FairnessControl register
is in the SCLK domain and may therefore not be accessible while Link
Power Status is off, i.e. before the card->driver->enable call from
fw_card_add().

Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
---
 drivers/firewire/core-topology.c    |  3 +--
 drivers/firewire/core-transaction.c |  3 +--
 drivers/firewire/core.h             |  5 -----
 drivers/firewire/ohci.c             | 14 ++------------
 include/linux/firewire.h            |  3 +++
 5 files changed, 7 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/drivers/firewire/core-topology.c b/drivers/firewire/core-topology.c
index 00a556f3a58..3b9667c37b6 100644
--- a/drivers/firewire/core-topology.c
+++ b/drivers/firewire/core-topology.c
@@ -543,8 +543,7 @@ void fw_core_handle_bus_reset(struct fw_card *card, int node_id, int generation,
 
 	spin_lock_irqsave(&card->lock, flags);
 
-	card->broadcast_channel_allocated = (card->driver->get_features(card) &
-					     FEATURE_CHANNEL_31_ALLOCATED) != 0;
+	card->broadcast_channel_allocated = card->broadcast_channel_auto_allocated;
 	card->node_id = node_id;
 	/*
 	 * Update node_id before generation to prevent anybody from using
diff --git a/drivers/firewire/core-transaction.c b/drivers/firewire/core-transaction.c
index 5069cfc75b5..62bf30560a3 100644
--- a/drivers/firewire/core-transaction.c
+++ b/drivers/firewire/core-transaction.c
@@ -1129,8 +1129,7 @@ static void handle_registers(struct fw_card *card, struct fw_request *request,
 		break;
 
 	case CSR_PRIORITY_BUDGET:
-		if (!(card->driver->get_features(card) &
-						FEATURE_PRIORITY_BUDGET))
+		if (!card->priority_budget_implemented)
 			rcode = RCODE_ADDRESS_ERROR;
 		else if (tcode == TCODE_READ_QUADLET_REQUEST)
 			*data = cpu_to_be32(card->driver->
diff --git a/drivers/firewire/core.h b/drivers/firewire/core.h
index 3f9e39b60bc..8dc76d8711a 100644
--- a/drivers/firewire/core.h
+++ b/drivers/firewire/core.h
@@ -38,9 +38,6 @@ struct fw_packet;
 #define BROADCAST_CHANNEL_INITIAL	(1 << 31 | 31)
 #define BROADCAST_CHANNEL_VALID		(1 << 30)
 
-#define FEATURE_PRIORITY_BUDGET		0x01
-#define FEATURE_CHANNEL_31_ALLOCATED	0x02
-
 #define CSR_STATE_BIT_CMSTR	(1 << 8)
 #define CSR_STATE_BIT_ABDICATE	(1 << 10)
 
@@ -84,8 +81,6 @@ struct fw_card_driver {
 	u32 (*read_csr_reg)(struct fw_card *card, int csr_offset);
 	void (*write_csr_reg)(struct fw_card *card, int csr_offset, u32 value);
 
-	unsigned int (*get_features)(struct fw_card *card);
-
 	struct fw_iso_context *
 	(*allocate_iso_context)(struct fw_card *card,
 				int type, int channel, size_t header_size);
diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c
index 2abdb3268a1..09bba9315de 100644
--- a/drivers/firewire/ohci.c
+++ b/drivers/firewire/ohci.c
@@ -172,7 +172,6 @@ struct fw_ohci {
 	int request_generation;	/* for timestamping incoming requests */
 	unsigned quirks;
 	unsigned int pri_req_max;
-	unsigned int features;
 	u32 bus_time;
 	bool is_root;
 
@@ -1753,15 +1752,14 @@ static int ohci_enable(struct fw_card *card,
 	if (version >= OHCI_VERSION_1_1) {
 		reg_write(ohci, OHCI1394_InitialChannelsAvailableHi,
 			  0xfffffffe);
-		ohci->features |= FEATURE_CHANNEL_31_ALLOCATED;
+		card->broadcast_channel_auto_allocated = true;
 	}
 
 	/* Get implemented bits of the priority arbitration request counter. */
 	reg_write(ohci, OHCI1394_FairnessControl, 0x3f);
 	ohci->pri_req_max = reg_read(ohci, OHCI1394_FairnessControl) & 0x3f;
 	reg_write(ohci, OHCI1394_FairnessControl, 0);
-	if (ohci->pri_req_max != 0)
-		ohci->features |= FEATURE_PRIORITY_BUDGET;
+	card->priority_budget_implemented = ohci->pri_req_max != 0;
 
 	ar_context_run(&ohci->ar_request_ctx);
 	ar_context_run(&ohci->ar_response_ctx);
@@ -2132,13 +2130,6 @@ static void ohci_write_csr_reg(struct fw_card *card, int csr_offset, u32 value)
 	}
 }
 
-static unsigned int ohci_get_features(struct fw_card *card)
-{
-	struct fw_ohci *ohci = fw_ohci(card);
-
-	return ohci->features;
-}
-
 static void copy_iso_headers(struct iso_context *ctx, void *p)
 {
 	int i = ctx->header_length;
@@ -2578,7 +2569,6 @@ static const struct fw_card_driver ohci_driver = {
 	.enable_phys_dma	= ohci_enable_phys_dma,
 	.read_csr_reg		= ohci_read_csr_reg,
 	.write_csr_reg		= ohci_write_csr_reg,
-	.get_features		= ohci_get_features,
 
 	.allocate_iso_context	= ohci_allocate_iso_context,
 	.free_iso_context	= ohci_free_iso_context,
diff --git a/include/linux/firewire.h b/include/linux/firewire.h
index 4d22643215e..5acb5fc1918 100644
--- a/include/linux/firewire.h
+++ b/include/linux/firewire.h
@@ -122,6 +122,9 @@ struct fw_card {
 	bool bm_abdicate; /* value of csr_abdicate before last bus reset */
 	bool csr_abdicate; /* visible in CSR STATE_CLEAR/SET registers */
 
+	bool priority_budget_implemented;	/* controller feature */
+	bool broadcast_channel_auto_allocated;	/* controller feature */
+
 	bool broadcast_channel_allocated;
 	u32 broadcast_channel;
 	__be32 topology_map[(CSR_TOPOLOGY_MAP_END - CSR_TOPOLOGY_MAP) / 4];
-- 
cgit v1.2.3-70-g09d2


From c8a94ded57e9cc2498d401b2f5c856213a3e19fb Mon Sep 17 00:00:00 2001
From: Stefan Richter <stefanr@s5r6.in-berlin.de>
Date: Sat, 12 Jun 2010 20:34:50 +0200
Subject: firewire: normalize STATE_CLEAR/SET CSR access interface

Push the maintenance of STATE_CLEAR/SET.abdicate down into the card
driver.  This way, the read/write_csr_reg driver method works uniformly
across all CSR offsets.

Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
---
 drivers/firewire/core-topology.c    |  5 ++---
 drivers/firewire/core-transaction.c | 41 +++++++++++--------------------------
 drivers/firewire/core.h             |  2 +-
 drivers/firewire/ohci.c             | 19 ++++++++++++-----
 include/linux/firewire.h            |  3 +--
 5 files changed, 30 insertions(+), 40 deletions(-)

(limited to 'include')

diff --git a/drivers/firewire/core-topology.c b/drivers/firewire/core-topology.c
index 3b9667c37b6..56e908ba43f 100644
--- a/drivers/firewire/core-topology.c
+++ b/drivers/firewire/core-topology.c
@@ -524,7 +524,7 @@ static void update_topology_map(struct fw_card *card,
 }
 
 void fw_core_handle_bus_reset(struct fw_card *card, int node_id, int generation,
-			      int self_id_count, u32 *self_ids)
+			      int self_id_count, u32 *self_ids, bool bm_abdicate)
 {
 	struct fw_node *local_node;
 	unsigned long flags;
@@ -552,8 +552,7 @@ void fw_core_handle_bus_reset(struct fw_card *card, int node_id, int generation,
 	smp_wmb();
 	card->generation = generation;
 	card->reset_jiffies = jiffies;
-	card->bm_abdicate = card->csr_abdicate;
-	card->csr_abdicate = false;
+	card->bm_abdicate = bm_abdicate;
 	fw_schedule_bm_work(card, 0);
 
 	local_node = build_tree(card, self_ids, self_id_count);
diff --git a/drivers/firewire/core-transaction.c b/drivers/firewire/core-transaction.c
index 62bf30560a3..87d69cddb23 100644
--- a/drivers/firewire/core-transaction.c
+++ b/drivers/firewire/core-transaction.c
@@ -982,20 +982,6 @@ static const struct fw_address_region registers_region =
 	{ .start = CSR_REGISTER_BASE,
 	  .end   = CSR_REGISTER_BASE | CSR_CONFIG_ROM, };
 
-static u32 read_state_register(struct fw_card *card)
-{
-	u32 value;
-
-	/* Bit 8 (cmstr): */
-	value = card->driver->read_csr_reg(card, CSR_STATE_CLEAR);
-
-	/* Bit 10 (abdicate): */
-	if (card->csr_abdicate)
-		value |= CSR_STATE_BIT_ABDICATE;
-
-	return value;
-}
-
 static void update_split_timeout(struct fw_card *card)
 {
 	unsigned int cycles;
@@ -1021,29 +1007,25 @@ static void handle_registers(struct fw_card *card, struct fw_request *request,
 
 	switch (reg) {
 	case CSR_STATE_CLEAR:
-		if (tcode == TCODE_READ_QUADLET_REQUEST) {
-			*data = cpu_to_be32(read_state_register(card));
-		} else if (tcode == TCODE_WRITE_QUADLET_REQUEST) {
+		if (tcode == TCODE_READ_QUADLET_REQUEST)
+			*data = cpu_to_be32(card->driver->
+					read_csr_reg(card, CSR_STATE_CLEAR));
+		else if (tcode == TCODE_WRITE_QUADLET_REQUEST)
 			card->driver->write_csr_reg(card, CSR_STATE_CLEAR,
 						    be32_to_cpu(*data));
-			if (*data & cpu_to_be32(CSR_STATE_BIT_ABDICATE))
-				card->csr_abdicate = false;
-		} else {
+		else
 			rcode = RCODE_TYPE_ERROR;
-		}
 		break;
 
 	case CSR_STATE_SET:
-		if (tcode == TCODE_READ_QUADLET_REQUEST) {
-			*data = cpu_to_be32(read_state_register(card));
-		} else if (tcode == TCODE_WRITE_QUADLET_REQUEST) {
+		if (tcode == TCODE_READ_QUADLET_REQUEST)
+			*data = cpu_to_be32(card->driver->
+					read_csr_reg(card, CSR_STATE_SET));
+		else if (tcode == TCODE_WRITE_QUADLET_REQUEST)
 			card->driver->write_csr_reg(card, CSR_STATE_SET,
 						    be32_to_cpu(*data));
-			if (*data & cpu_to_be32(CSR_STATE_BIT_ABDICATE))
-				card->csr_abdicate = true;
-		} else {
+		else
 			rcode = RCODE_TYPE_ERROR;
-		}
 		break;
 
 	case CSR_NODE_IDS:
@@ -1063,7 +1045,8 @@ static void handle_registers(struct fw_card *card, struct fw_request *request,
 
 	case CSR_RESET_START:
 		if (tcode == TCODE_WRITE_QUADLET_REQUEST)
-			card->csr_abdicate = false;
+			card->driver->write_csr_reg(card, CSR_STATE_CLEAR,
+						    CSR_STATE_BIT_ABDICATE);
 		else
 			rcode = RCODE_TYPE_ERROR;
 		break;
diff --git a/drivers/firewire/core.h b/drivers/firewire/core.h
index 8dc76d8711a..8280c625170 100644
--- a/drivers/firewire/core.h
+++ b/drivers/firewire/core.h
@@ -196,7 +196,7 @@ static inline void fw_node_put(struct fw_node *node)
 }
 
 void fw_core_handle_bus_reset(struct fw_card *card, int node_id,
-			      int generation, int self_id_count, u32 *self_ids);
+	int generation, int self_id_count, u32 *self_ids, bool bm_abdicate);
 void fw_destroy_nodes(struct fw_card *card);
 
 /*
diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c
index 09bba9315de..a55cf0911b7 100644
--- a/drivers/firewire/ohci.c
+++ b/drivers/firewire/ohci.c
@@ -174,6 +174,7 @@ struct fw_ohci {
 	unsigned int pri_req_max;
 	u32 bus_time;
 	bool is_root;
+	bool csr_state_setclear_abdicate;
 
 	/*
 	 * Spinlock for accessing fw_ohci data.  Never call out of
@@ -1529,7 +1530,9 @@ static void bus_reset_tasklet(unsigned long data)
 		    self_id_count, ohci->self_id_buffer);
 
 	fw_core_handle_bus_reset(&ohci->card, ohci->node_id, generation,
-				 self_id_count, ohci->self_id_buffer);
+				 self_id_count, ohci->self_id_buffer,
+				 ohci->csr_state_setclear_abdicate);
+	ohci->csr_state_setclear_abdicate = false;
 }
 
 static irqreturn_t irq_handler(int irq, void *data)
@@ -2032,13 +2035,16 @@ static u32 ohci_read_csr_reg(struct fw_card *card, int csr_offset)
 	switch (csr_offset) {
 	case CSR_STATE_CLEAR:
 	case CSR_STATE_SET:
-		/* the controller driver handles only the cmstr bit */
 		if (ohci->is_root &&
 		    (reg_read(ohci, OHCI1394_LinkControlSet) &
 		     OHCI1394_LinkControl_cycleMaster))
-			return CSR_STATE_BIT_CMSTR;
+			value = CSR_STATE_BIT_CMSTR;
 		else
-			return 0;
+			value = 0;
+		if (ohci->csr_state_setclear_abdicate)
+			value |= CSR_STATE_BIT_ABDICATE;
+
+		return value;
 
 	case CSR_NODE_IDS:
 		return reg_read(ohci, OHCI1394_NodeID) << 16;
@@ -2078,12 +2084,13 @@ static void ohci_write_csr_reg(struct fw_card *card, int csr_offset, u32 value)
 
 	switch (csr_offset) {
 	case CSR_STATE_CLEAR:
-		/* the controller driver handles only the cmstr bit */
 		if ((value & CSR_STATE_BIT_CMSTR) && ohci->is_root) {
 			reg_write(ohci, OHCI1394_LinkControlClear,
 				  OHCI1394_LinkControl_cycleMaster);
 			flush_writes(ohci);
 		}
+		if (value & CSR_STATE_BIT_ABDICATE)
+			ohci->csr_state_setclear_abdicate = false;
 		break;
 
 	case CSR_STATE_SET:
@@ -2092,6 +2099,8 @@ static void ohci_write_csr_reg(struct fw_card *card, int csr_offset, u32 value)
 				  OHCI1394_LinkControl_cycleMaster);
 			flush_writes(ohci);
 		}
+		if (value & CSR_STATE_BIT_ABDICATE)
+			ohci->csr_state_setclear_abdicate = true;
 		break;
 
 	case CSR_NODE_IDS:
diff --git a/include/linux/firewire.h b/include/linux/firewire.h
index 5acb5fc1918..5553018d45d 100644
--- a/include/linux/firewire.h
+++ b/include/linux/firewire.h
@@ -119,8 +119,7 @@ struct fw_card {
 	int bm_retries;
 	int bm_generation;
 	__be32 bm_transaction_data[2];
-	bool bm_abdicate; /* value of csr_abdicate before last bus reset */
-	bool csr_abdicate; /* visible in CSR STATE_CLEAR/SET registers */
+	bool bm_abdicate;
 
 	bool priority_budget_implemented;	/* controller feature */
 	bool broadcast_channel_auto_allocated;	/* controller feature */
-- 
cgit v1.2.3-70-g09d2


From 33e553fe2b4a983ef34a57ab1440d8d33397bb12 Mon Sep 17 00:00:00 2001
From: Stefan Richter <stefanr@s5r6.in-berlin.de>
Date: Sun, 20 Jun 2010 22:50:35 +0200
Subject: firewire: remove an unused function argument

void (*fw_address_callback_t)(..., int speed, ...) is the speed that a
remote node chose to transmit a request to us.  In case of split
transactions, firewire-core will transmit the response at that speed.

Upper layer drivers on the other hand (firewire-net, -sbp2, firedtv, and
userspace drivers) cannot do anything useful with that speed datum,
except log it for debug purposes.  But data that is merely potentially
(not even actually) used for debug purposes does not belong into the API.

Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
---
 drivers/firewire/core-cdev.c            |  3 +--
 drivers/firewire/core-transaction.c     | 14 +++++++-------
 drivers/firewire/net.c                  |  4 ++--
 drivers/firewire/sbp2.c                 |  3 +--
 drivers/media/dvb/firewire/firedtv-fw.c |  4 ++--
 include/linux/firewire.h                |  2 +-
 6 files changed, 14 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/drivers/firewire/core-cdev.c b/drivers/firewire/core-cdev.c
index ca72cdaa68c..4e0478d70d4 100644
--- a/drivers/firewire/core-cdev.c
+++ b/drivers/firewire/core-cdev.c
@@ -632,8 +632,7 @@ static void release_request(struct client *client,
 
 static void handle_request(struct fw_card *card, struct fw_request *request,
 			   int tcode, int destination, int source,
-			   int generation, int speed,
-			   unsigned long long offset,
+			   int generation, unsigned long long offset,
 			   void *payload, size_t length, void *callback_data)
 {
 	struct address_handler_resource *handler = callback_data;
diff --git a/drivers/firewire/core-transaction.c b/drivers/firewire/core-transaction.c
index cb6390fe368..2f67c8d5ce9 100644
--- a/drivers/firewire/core-transaction.c
+++ b/drivers/firewire/core-transaction.c
@@ -802,7 +802,7 @@ static void handle_exclusive_region_request(struct fw_card *card,
 	else
 		handler->address_callback(card, request,
 					  tcode, destination, source,
-					  p->generation, p->speed, offset,
+					  p->generation, offset,
 					  request->data, request->length,
 					  handler->callback_data);
 }
@@ -840,8 +840,8 @@ static void handle_fcp_region_request(struct fw_card *card,
 		if (is_enclosing_handler(handler, offset, request->length))
 			handler->address_callback(card, NULL, tcode,
 						  destination, source,
-						  p->generation, p->speed,
-						  offset, request->data,
+						  p->generation, offset,
+						  request->data,
 						  request->length,
 						  handler->callback_data);
 	}
@@ -951,8 +951,8 @@ static const struct fw_address_region topology_map_region =
 
 static void handle_topology_map(struct fw_card *card, struct fw_request *request,
 		int tcode, int destination, int source, int generation,
-		int speed, unsigned long long offset,
-		void *payload, size_t length, void *callback_data)
+		unsigned long long offset, void *payload, size_t length,
+		void *callback_data)
 {
 	int start;
 
@@ -996,8 +996,8 @@ static void update_split_timeout(struct fw_card *card)
 
 static void handle_registers(struct fw_card *card, struct fw_request *request,
 		int tcode, int destination, int source, int generation,
-		int speed, unsigned long long offset,
-		void *payload, size_t length, void *callback_data)
+		unsigned long long offset, void *payload, size_t length,
+		void *callback_data)
 {
 	int reg = offset & ~CSR_REGISTER_BASE;
 	__be32 *data = payload;
diff --git a/drivers/firewire/net.c b/drivers/firewire/net.c
index 2d3dc7ded0a..4bb3fb882f6 100644
--- a/drivers/firewire/net.c
+++ b/drivers/firewire/net.c
@@ -805,8 +805,8 @@ static int fwnet_incoming_packet(struct fwnet_device *dev, __be32 *buf, int len,
 
 static void fwnet_receive_packet(struct fw_card *card, struct fw_request *r,
 		int tcode, int destination, int source, int generation,
-		int speed, unsigned long long offset, void *payload,
-		size_t length, void *callback_data)
+		unsigned long long offset, void *payload, size_t length,
+		void *callback_data)
 {
 	struct fwnet_device *dev = callback_data;
 	int rcode;
diff --git a/drivers/firewire/sbp2.c b/drivers/firewire/sbp2.c
index ae715c82da2..1931964c4fb 100644
--- a/drivers/firewire/sbp2.c
+++ b/drivers/firewire/sbp2.c
@@ -410,8 +410,7 @@ static void free_orb(struct kref *kref)
 
 static void sbp2_status_write(struct fw_card *card, struct fw_request *request,
 			      int tcode, int destination, int source,
-			      int generation, int speed,
-			      unsigned long long offset,
+			      int generation, unsigned long long offset,
 			      void *payload, size_t length, void *callback_data)
 {
 	struct sbp2_logical_unit *lu = callback_data;
diff --git a/drivers/media/dvb/firewire/firedtv-fw.c b/drivers/media/dvb/firewire/firedtv-fw.c
index 4253b7ab009..4dcae63f8cf 100644
--- a/drivers/media/dvb/firewire/firedtv-fw.c
+++ b/drivers/media/dvb/firewire/firedtv-fw.c
@@ -194,8 +194,8 @@ static const struct firedtv_backend backend = {
 
 static void handle_fcp(struct fw_card *card, struct fw_request *request,
 		       int tcode, int destination, int source, int generation,
-		       int speed, unsigned long long offset,
-		       void *payload, size_t length, void *callback_data)
+		       unsigned long long offset, void *payload, size_t length,
+		       void *callback_data)
 {
 	struct firedtv *f, *fdtv = NULL;
 	struct fw_device *device;
diff --git a/include/linux/firewire.h b/include/linux/firewire.h
index 5553018d45d..e44b502c834 100644
--- a/include/linux/firewire.h
+++ b/include/linux/firewire.h
@@ -265,7 +265,7 @@ typedef void (*fw_transaction_callback_t)(struct fw_card *card, int rcode,
 typedef void (*fw_address_callback_t)(struct fw_card *card,
 				      struct fw_request *request,
 				      int tcode, int destination, int source,
-				      int generation, int speed,
+				      int generation,
 				      unsigned long long offset,
 				      void *data, size_t length,
 				      void *callback_data);
-- 
cgit v1.2.3-70-g09d2


From 604f45167824e18ad5766e51ecf1d4d65f15118d Mon Sep 17 00:00:00 2001
From: Stefan Richter <stefanr@s5r6.in-berlin.de>
Date: Sun, 20 Jun 2010 22:52:55 +0200
Subject: firewire: cdev: freeze FW_CDEV_VERSION due to libraw1394 bug

libraw1394 v2.0.0...v2.0.5 takes FW_CDEV_VERSION from an externally
installed header file and uses it to declare its own implementation
level in FW_CDEV_IOC_GET_INFO.  This is wrong; it should set the real
version for which it was actually written.

If we add features to the kernel ABI that require the kernel to check
a client's implementation level, we can not trust the client version if
it was set from FW_CDEV_VERSION.

Hence freeze FW_CDEV_VERSION at the current value (no damage has been
done yet), clearly document FW_CDEV_VERSION as a dummy version and what
clients are expected to do with fw_cdev_get_info.version, and use a new
defined constant (which is not placed into the exported header file) as
kernel implementation level.

Note, in order to check in client program source code which features are
present in an externally installed linux/firewire-cdev.h, use
preprocessor directives like
  #ifdef FW_CDEV_IOC_ALLOCATE_ISO_RESOURCE
or
  #ifdef FW_CDEV_EVENT_ISO_RESOURCE_ALLOCATED
instead of a check of FW_CDEV_VERSION.

Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
---
 drivers/firewire/core-cdev.c  |  7 ++++++-
 include/linux/firewire-cdev.h | 22 ++++++++++++++++------
 2 files changed, 22 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/drivers/firewire/core-cdev.c b/drivers/firewire/core-cdev.c
index 8f8c8eeaf04..0cf86bcbeea 100644
--- a/drivers/firewire/core-cdev.c
+++ b/drivers/firewire/core-cdev.c
@@ -46,6 +46,11 @@
 
 #include "core.h"
 
+/*
+ * ABI version history is documented in linux/firewire-cdev.h.
+ */
+#define FW_CDEV_KERNEL_VERSION 3
+
 struct client {
 	u32 version;
 	struct fw_device *device;
@@ -395,7 +400,7 @@ static int ioctl_get_info(struct client *client, union ioctl_arg *arg)
 	unsigned long ret = 0;
 
 	client->version = a->version;
-	a->version = FW_CDEV_VERSION;
+	a->version = FW_CDEV_KERNEL_VERSION;
 	a->card = client->device->card->index;
 
 	down_read(&fw_device_rwsem);
diff --git a/include/linux/firewire-cdev.h b/include/linux/firewire-cdev.h
index 6ffb24a1f2f..0d0cc07358a 100644
--- a/include/linux/firewire-cdev.h
+++ b/include/linux/firewire-cdev.h
@@ -219,7 +219,7 @@ union fw_cdev_event {
 	struct fw_cdev_event_response		response;
 	struct fw_cdev_event_request		request;
 	struct fw_cdev_event_iso_interrupt	iso_interrupt;
-	struct fw_cdev_event_iso_resource	iso_resource;
+	struct fw_cdev_event_iso_resource	iso_resource; /* added in 2.6.30 */
 };
 
 /* available since kernel version 2.6.22 */
@@ -252,22 +252,32 @@ union fw_cdev_event {
 #define FW_CDEV_IOC_GET_CYCLE_TIMER2   _IOWR('#', 0x14, struct fw_cdev_get_cycle_timer2)
 
 /*
- * FW_CDEV_VERSION History
+ * ABI version history
  *  1  (2.6.22)  - initial version
+ *     (2.6.24)  - added %FW_CDEV_IOC_GET_CYCLE_TIMER
  *  2  (2.6.30)  - changed &fw_cdev_event_iso_interrupt.header if
  *                 &fw_cdev_create_iso_context.header_size is 8 or more
+ *               - added %FW_CDEV_IOC_*_ISO_RESOURCE*,
+ *                 %FW_CDEV_IOC_GET_SPEED, %FW_CDEV_IOC_SEND_BROADCAST_REQUEST,
+ *                 %FW_CDEV_IOC_SEND_STREAM_PACKET
  *     (2.6.32)  - added time stamp to xmit &fw_cdev_event_iso_interrupt
  *     (2.6.33)  - IR has always packet-per-buffer semantics now, not one of
  *                 dual-buffer or packet-per-buffer depending on hardware
  *  3  (2.6.34)  - made &fw_cdev_get_cycle_timer reliable
+ *               - added %FW_CDEV_IOC_GET_CYCLE_TIMER2
  */
-#define FW_CDEV_VERSION 3
+#define FW_CDEV_VERSION 3 /* Meaningless; don't use this macro. */
 
 /**
  * struct fw_cdev_get_info - General purpose information ioctl
- * @version:	The version field is just a running serial number.
- *		We never break backwards compatibility, but may add more
- *		structs and ioctls in later revisions.
+ * @version:	The version field is just a running serial number.  Both an
+ *		input parameter (ABI version implemented by the client) and
+ *		output parameter (ABI version implemented by the kernel).
+ *		A client must not fill in an %FW_CDEV_VERSION defined from an
+ *		included kernel header file but the actual version for which
+ *		the client was implemented.  This is necessary for forward
+ *		compatibility.  We never break backwards compatibility, but
+ *		may add more structs, events, and ioctls in later revisions.
  * @rom_length:	If @rom is non-zero, at most rom_length bytes of configuration
  *		ROM will be copied into that user space address.  In either
  *		case, @rom_length is updated with the actual length of the
-- 
cgit v1.2.3-70-g09d2


From e205597d188a9ea69ce43f740a14f07b3f5b996a Mon Sep 17 00:00:00 2001
From: Stefan Richter <stefanr@s5r6.in-berlin.de>
Date: Sun, 20 Jun 2010 22:53:55 +0200
Subject: firewire: cdev: fix ABI for FCP and address range mapping, add
 fw_cdev_event_request2

The problem:

A target-like userspace driver, e.g. AV/C target or SBP-2/3 target,
needs to be able to act as responder and requester.  In the latter role,
it needs to send requests to nods from which it received requests.  This
is currently impossible because fw_cdev_event_request lacks information
about sender node ID.
Reported-by: Jay Fenlason <fenlason@redhat.com>

Libffado + libraw1394 + firewire-core is currently unable to drive two
or more audio devices on the same bus.
Reported-by: Arnold Krille <arnold@arnoldarts.de>

This is because libffado requires destination node ID of FCP requests
and sender node ID of FCP responses to match.  It even prohibits
libffado from working with a bus on which libraw1394 opens a /dev/fw* as
default ioctl device that does not correspond with the audio device.
This is because libraw1394 does not receive the sender node ID from the
kernel.

Moreover, fw_cdev_event_request makes it impossible to tell unicast and
broadcast write requests apart.

The fix:

Add a replacement of struct fw_cdev_event_request request, boringly
called struct fw_cdev_event_request2.  The new event will be sent to a
userspace client instead of the old one if the client claims
compatibility with <linux/firewire-cdev.h> ABI version 4 or later.

libraw1394 needs to be extended to make use of the new event, in order
to properly support libffado and other FCP or address range mapping
users who require correct sender node IDs.

Further notes:

While we are at it, change back the range of possible values of
fw_cdev_event_request.tcode to 0x0...0xb like in ABI version <= 3.
The preceding change "firewire: expose extended tcode of incoming lock
requests to (userspace) drivers" expanded it to 0x0...0x17 which could
catch sloppily coded clients by surprise.  The extended range of codes
is only used in the new fw_cdev_event_request2.tcode.

Jay and I also suggested an alternative approach to fix the ABI for
incoming requests:  Add an FW_CDEV_IOC_GET_REQUEST_INFO ioctl which can
be called after reception of an fw_cdev_event_request, before issuing of
the closing FW_CDEV_IOC_SEND_RESPONSE ioctl.  The new ioctl would reveal
the vital information about a request that fw_cdev_event_request lacks.
Jay showed an implementation of this approach.

The former event approach adds 27 LOC of rather trivial code to
core-cdev.c, the ioctl approach 34 LOC, some of which is nontrivial.
The ioctl approach would certainly also add more LOC to userspace
programs which require the expanded information on inbound requests.
This approach is probably only on the lighter-weight side in case of
clients that want to be compatible with kernels that lack the new
capability, like libraw1394.  However, the code to be added to such
libraw1394-like clients in case of the event approach is a straight-
forward additional switch () case in its event handler.

Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
---
 drivers/firewire/core-cdev.c  | 45 ++++++++++++++++++++-----
 include/linux/firewire-cdev.h | 76 +++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 110 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/drivers/firewire/core-cdev.c b/drivers/firewire/core-cdev.c
index 0cf86bcbeea..9b8df203915 100644
--- a/drivers/firewire/core-cdev.c
+++ b/drivers/firewire/core-cdev.c
@@ -49,7 +49,8 @@
 /*
  * ABI version history is documented in linux/firewire-cdev.h.
  */
-#define FW_CDEV_KERNEL_VERSION 3
+#define FW_CDEV_KERNEL_VERSION		4
+#define FW_CDEV_VERSION_EVENT_REQUEST2	4
 
 struct client {
 	u32 version;
@@ -176,7 +177,10 @@ struct outbound_transaction_event {
 
 struct inbound_transaction_event {
 	struct event event;
-	struct fw_cdev_event_request request;
+	union {
+		struct fw_cdev_event_request request;
+		struct fw_cdev_event_request2 request2;
+	} req;
 };
 
 struct iso_interrupt_event {
@@ -645,6 +649,7 @@ static void handle_request(struct fw_card *card, struct fw_request *request,
 	struct address_handler_resource *handler = callback_data;
 	struct inbound_transaction_resource *r;
 	struct inbound_transaction_event *e;
+	size_t event_size0;
 	void *fcp_frame = NULL;
 	int ret;
 
@@ -678,15 +683,37 @@ static void handle_request(struct fw_card *card, struct fw_request *request,
 	if (ret < 0)
 		goto failed;
 
-	e->request.type    = FW_CDEV_EVENT_REQUEST;
-	e->request.tcode   = tcode;
-	e->request.offset  = offset;
-	e->request.length  = length;
-	e->request.handle  = r->resource.handle;
-	e->request.closure = handler->closure;
+	if (handler->client->version < FW_CDEV_VERSION_EVENT_REQUEST2) {
+		struct fw_cdev_event_request *req = &e->req.request;
+
+		if (tcode & 0x10)
+			tcode = TCODE_LOCK_REQUEST;
+
+		req->type	= FW_CDEV_EVENT_REQUEST;
+		req->tcode	= tcode;
+		req->offset	= offset;
+		req->length	= length;
+		req->handle	= r->resource.handle;
+		req->closure	= handler->closure;
+		event_size0	= sizeof(*req);
+	} else {
+		struct fw_cdev_event_request2 *req = &e->req.request2;
+
+		req->type	= FW_CDEV_EVENT_REQUEST2;
+		req->tcode	= tcode;
+		req->offset	= offset;
+		req->source_node_id = source;
+		req->destination_node_id = destination;
+		req->card	= card->index;
+		req->generation	= generation;
+		req->length	= length;
+		req->handle	= r->resource.handle;
+		req->closure	= handler->closure;
+		event_size0	= sizeof(*req);
+	}
 
 	queue_event(handler->client, &e->event,
-		    &e->request, sizeof(e->request), r->data, length);
+		    &e->req, event_size0, r->data, length);
 	return;
 
  failed:
diff --git a/include/linux/firewire-cdev.h b/include/linux/firewire-cdev.h
index 0d0cc07358a..52c7ffe934a 100644
--- a/include/linux/firewire-cdev.h
+++ b/include/linux/firewire-cdev.h
@@ -32,6 +32,9 @@
 #define FW_CDEV_EVENT_ISO_RESOURCE_ALLOCATED	0x04
 #define FW_CDEV_EVENT_ISO_RESOURCE_DEALLOCATED	0x05
 
+/* available since kernel version 2.6.36 */
+#define FW_CDEV_EVENT_REQUEST2			0x06
+
 /**
  * struct fw_cdev_event_common - Common part of all fw_cdev_event_ types
  * @closure:	For arbitrary use by userspace
@@ -98,11 +101,46 @@ struct fw_cdev_event_response {
 };
 
 /**
- * struct fw_cdev_event_request - Sent on incoming request to an address region
+ * struct fw_cdev_event_request - Old version of &fw_cdev_event_request2
  * @closure:	See &fw_cdev_event_common; set by %FW_CDEV_IOC_ALLOCATE ioctl
  * @type:	See &fw_cdev_event_common; always %FW_CDEV_EVENT_REQUEST
+ * @tcode:	See &fw_cdev_event_request2
+ * @offset:	See &fw_cdev_event_request2
+ * @handle:	See &fw_cdev_event_request2
+ * @length:	See &fw_cdev_event_request2
+ * @data:	See &fw_cdev_event_request2
+ *
+ * This event is sent instead of &fw_cdev_event_request2 if the kernel or
+ * the client implements ABI version <= 3.
+ *
+ * Unlike &fw_cdev_event_request2, the sender identity cannot be established,
+ * broadcast write requests cannot be distinguished from unicast writes, and
+ * @tcode of lock requests is %TCODE_LOCK_REQUEST.
+ *
+ * Requests to the FCP_REQUEST or FCP_RESPONSE register are responded to as
+ * with &fw_cdev_event_request2, except in kernel 2.6.32 and older which send
+ * the response packet of the client's %FW_CDEV_IOC_SEND_RESPONSE ioctl.
+ */
+struct fw_cdev_event_request {
+	__u64 closure;
+	__u32 type;
+	__u32 tcode;
+	__u64 offset;
+	__u32 handle;
+	__u32 length;
+	__u32 data[0];
+};
+
+/**
+ * struct fw_cdev_event_request2 - Sent on incoming request to an address region
+ * @closure:	See &fw_cdev_event_common; set by %FW_CDEV_IOC_ALLOCATE ioctl
+ * @type:	See &fw_cdev_event_common; always %FW_CDEV_EVENT_REQUEST2
  * @tcode:	Transaction code of the incoming request
  * @offset:	The offset into the 48-bit per-node address space
+ * @source_node_id: Sender node ID
+ * @destination_node_id: Destination node ID
+ * @card:	The index of the card from which the request came
+ * @generation:	Bus generation in which the request is valid
  * @handle:	Reference to the kernel-side pending request
  * @length:	Data length, i.e. the request's payload size in bytes
  * @data:	Incoming data, if any
@@ -115,12 +153,42 @@ struct fw_cdev_event_response {
  *
  * The payload data for requests carrying data (write and lock requests)
  * follows immediately and can be accessed through the @data field.
+ *
+ * Unlike &fw_cdev_event_request, @tcode of lock requests is one of the
+ * firewire-core specific %TCODE_LOCK_MASK_SWAP...%TCODE_LOCK_VENDOR_DEPENDENT,
+ * i.e. encodes the extended transaction code.
+ *
+ * @card may differ from &fw_cdev_get_info.card because requests are received
+ * from all cards of the Linux host.  @source_node_id, @destination_node_id, and
+ * @generation pertain to that card.  Destination node ID and bus generation may
+ * therefore differ from the corresponding fields of the last
+ * &fw_cdev_event_bus_reset.
+ *
+ * @destination_node_id may also differ from the current node ID because of a
+ * non-local bus ID part or in case of a broadcast write request.  Note, a
+ * client must call an %FW_CDEV_IOC_SEND_RESPONSE ioctl even in case of a
+ * broadcast write request; the kernel will then release the kernel-side pending
+ * request but will not actually send a response packet.
+ *
+ * In case of a write request to FCP_REQUEST or FCP_RESPONSE, the kernel already
+ * sent a write response immediately after the request was received; in this
+ * case the client must still call an %FW_CDEV_IOC_SEND_RESPONSE ioctl to
+ * release the kernel-side pending request, though another response won't be
+ * sent.
+ *
+ * If the client subsequently needs to initiate requests to the sender node of
+ * an &fw_cdev_event_request2, it needs to use a device file with matching
+ * card index, node ID, and generation for outbound requests.
  */
-struct fw_cdev_event_request {
+struct fw_cdev_event_request2 {
 	__u64 closure;
 	__u32 type;
 	__u32 tcode;
 	__u64 offset;
+	__u32 source_node_id;
+	__u32 destination_node_id;
+	__u32 card;
+	__u32 generation;
 	__u32 handle;
 	__u32 length;
 	__u32 data[0];
@@ -200,6 +268,7 @@ struct fw_cdev_event_iso_resource {
  * @bus_reset:     Valid if @common.type == %FW_CDEV_EVENT_BUS_RESET
  * @response:      Valid if @common.type == %FW_CDEV_EVENT_RESPONSE
  * @request:       Valid if @common.type == %FW_CDEV_EVENT_REQUEST
+ * @request2:      Valid if @common.type == %FW_CDEV_EVENT_REQUEST2
  * @iso_interrupt: Valid if @common.type == %FW_CDEV_EVENT_ISO_INTERRUPT
  * @iso_resource:  Valid if @common.type ==
  *				%FW_CDEV_EVENT_ISO_RESOURCE_ALLOCATED or
@@ -218,6 +287,7 @@ union fw_cdev_event {
 	struct fw_cdev_event_bus_reset		bus_reset;
 	struct fw_cdev_event_response		response;
 	struct fw_cdev_event_request		request;
+	struct fw_cdev_event_request2		request2;     /* added in 2.6.36 */
 	struct fw_cdev_event_iso_interrupt	iso_interrupt;
 	struct fw_cdev_event_iso_resource	iso_resource; /* added in 2.6.30 */
 };
@@ -263,8 +333,10 @@ union fw_cdev_event {
  *     (2.6.32)  - added time stamp to xmit &fw_cdev_event_iso_interrupt
  *     (2.6.33)  - IR has always packet-per-buffer semantics now, not one of
  *                 dual-buffer or packet-per-buffer depending on hardware
+ *               - shared use and auto-response for FCP registers
  *  3  (2.6.34)  - made &fw_cdev_get_cycle_timer reliable
  *               - added %FW_CDEV_IOC_GET_CYCLE_TIMER2
+ *  4  (2.6.36)  - added %FW_CDEV_EVENT_REQUEST2
  */
 #define FW_CDEV_VERSION 3 /* Meaningless; don't use this macro. */
 
-- 
cgit v1.2.3-70-g09d2


From 3b2b65d68fc87b02ac393a031a4ebb3de84a8218 Mon Sep 17 00:00:00 2001
From: Stefan Richter <stefanr@s5r6.in-berlin.de>
Date: Sun, 20 Jun 2010 22:54:22 +0200
Subject: firewire: cdev: extend fw_cdev_event_iso_interrupt documentation

Add information regarding the 2.6.32 update to the xmit variant of
fw_cdev_event_iso_interrupt.

Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
---
 include/linux/firewire-cdev.h | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/firewire-cdev.h b/include/linux/firewire-cdev.h
index 52c7ffe934a..8b9b2737321 100644
--- a/include/linux/firewire-cdev.h
+++ b/include/linux/firewire-cdev.h
@@ -204,10 +204,21 @@ struct fw_cdev_event_request2 {
  * @header:	Stripped headers, if any
  *
  * This event is sent when the controller has completed an &fw_cdev_iso_packet
- * with the %FW_CDEV_ISO_INTERRUPT bit set.  In the receive case, the headers
- * stripped of all packets up until and including the interrupt packet are
- * returned in the @header field.  The amount of header data per packet is as
- * specified at iso context creation by &fw_cdev_create_iso_context.header_size.
+ * with the %FW_CDEV_ISO_INTERRUPT bit set.
+ *
+ * Isochronous transmit events:
+ *
+ * In version 1 of the ABI, &header_length is 0.  In version 3 and some
+ * implementations of version 2 of the ABI, &header_length is a multiple of 4
+ * and &header contains timestamps of all packets up until the interrupt packet.
+ * The format of the timestamps is as described below for isochronous reception.
+ *
+ * Isochronous receive events:
+ *
+ * The headers stripped of all packets up until and including the interrupt
+ * packet are returned in the @header field.  The amount of header data per
+ * packet is as specified at iso context creation by
+ * &fw_cdev_create_iso_context.header_size.
  *
  * In version 1 of this ABI, header data consisted of the 1394 isochronous
  * packet header, followed by quadlets from the packet payload if
-- 
cgit v1.2.3-70-g09d2


From b1c74247b9e29ae3bfdf133862328c309bc9cf14 Mon Sep 17 00:00:00 2001
From: Sjur Braendeland <sjur.brandeland@stericsson.com>
Date: Thu, 17 Jun 2010 06:55:38 +0000
Subject: caif: Bugfix not all services uses flow-ctrl.

Flow control is not used by all CAIF services.
The usage of flow control is now part of the gerneal
initialization function for CAIF Services.

Signed-off-by: Sjur Braendeland@stericsson.com
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/caif/cfsrvl.h |  6 ++++--
 net/caif/cfctrl.c         |  2 +-
 net/caif/cfdbgl.c         |  2 +-
 net/caif/cfdgml.c         |  2 +-
 net/caif/cfrfml.c         | 12 ++++--------
 net/caif/cfsrvl.c         | 14 ++++++++++++--
 net/caif/cfutill.c        |  2 +-
 net/caif/cfveil.c         |  2 +-
 net/caif/cfvidl.c         |  2 +-
 9 files changed, 26 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/include/net/caif/cfsrvl.h b/include/net/caif/cfsrvl.h
index 2dc9eb193ec..5d921f5e18c 100644
--- a/include/net/caif/cfsrvl.h
+++ b/include/net/caif/cfsrvl.h
@@ -16,6 +16,7 @@ struct cfsrvl {
 	bool open;
 	bool phy_flow_on;
 	bool modem_flow_on;
+	bool supports_flowctrl;
 	struct dev_info dev_info;
 	struct kref ref;
 };
@@ -30,8 +31,9 @@ struct cflayer *cfdbgl_create(u8 linkid, struct dev_info *dev_info);
 bool cfsrvl_phyid_match(struct cflayer *layer, int phyid);
 void cfservl_destroy(struct cflayer *layer);
 void cfsrvl_init(struct cfsrvl *service,
-		 u8 channel_id,
-		 struct dev_info *dev_info);
+			u8 channel_id,
+			struct dev_info *dev_info,
+			bool supports_flowctrl);
 bool cfsrvl_ready(struct cfsrvl *service, int *err);
 u8 cfsrvl_getphyid(struct cflayer *layer);
 
diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c
index fcfda98a5e6..107c4b2a311 100644
--- a/net/caif/cfctrl.c
+++ b/net/caif/cfctrl.c
@@ -43,7 +43,7 @@ struct cflayer *cfctrl_create(void)
 	memset(&dev_info, 0, sizeof(dev_info));
 	dev_info.id = 0xff;
 	memset(this, 0, sizeof(*this));
-	cfsrvl_init(&this->serv, 0, &dev_info);
+	cfsrvl_init(&this->serv, 0, &dev_info, false);
 	atomic_set(&this->req_seq_no, 1);
 	atomic_set(&this->rsp_seq_no, 1);
 	this->serv.layer.receive = cfctrl_recv;
diff --git a/net/caif/cfdbgl.c b/net/caif/cfdbgl.c
index ab6b6dc34cf..676648cac8d 100644
--- a/net/caif/cfdbgl.c
+++ b/net/caif/cfdbgl.c
@@ -22,7 +22,7 @@ struct cflayer *cfdbgl_create(u8 channel_id, struct dev_info *dev_info)
 	}
 	caif_assert(offsetof(struct cfsrvl, layer) == 0);
 	memset(dbg, 0, sizeof(struct cfsrvl));
-	cfsrvl_init(dbg, channel_id, dev_info);
+	cfsrvl_init(dbg, channel_id, dev_info, false);
 	dbg->layer.receive = cfdbgl_receive;
 	dbg->layer.transmit = cfdbgl_transmit;
 	snprintf(dbg->layer.name, CAIF_LAYER_NAME_SZ - 1, "dbg%d", channel_id);
diff --git a/net/caif/cfdgml.c b/net/caif/cfdgml.c
index 53194840ecb..32d9f0dc846 100644
--- a/net/caif/cfdgml.c
+++ b/net/caif/cfdgml.c
@@ -30,7 +30,7 @@ struct cflayer *cfdgml_create(u8 channel_id, struct dev_info *dev_info)
 	}
 	caif_assert(offsetof(struct cfsrvl, layer) == 0);
 	memset(dgm, 0, sizeof(struct cfsrvl));
-	cfsrvl_init(dgm, channel_id, dev_info);
+	cfsrvl_init(dgm, channel_id, dev_info, true);
 	dgm->layer.receive = cfdgml_receive;
 	dgm->layer.transmit = cfdgml_transmit;
 	snprintf(dgm->layer.name, CAIF_LAYER_NAME_SZ - 1, "dgm%d", channel_id);
diff --git a/net/caif/cfrfml.c b/net/caif/cfrfml.c
index fd27b172fb5..689cbfd0e43 100644
--- a/net/caif/cfrfml.c
+++ b/net/caif/cfrfml.c
@@ -23,30 +23,26 @@
 
 static int cfrfml_receive(struct cflayer *layr, struct cfpkt *pkt);
 static int cfrfml_transmit(struct cflayer *layr, struct cfpkt *pkt);
-static int cfservl_modemcmd(struct cflayer *layr, enum caif_modemcmd ctrl);
 
 struct cflayer *cfrfml_create(u8 channel_id, struct dev_info *dev_info)
 {
 	struct cfsrvl *rfm = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC);
+
 	if (!rfm) {
 		pr_warning("CAIF: %s(): Out of memory\n", __func__);
 		return NULL;
 	}
+
 	caif_assert(offsetof(struct cfsrvl, layer) == 0);
+
 	memset(rfm, 0, sizeof(struct cfsrvl));
-	cfsrvl_init(rfm, channel_id, dev_info);
-	rfm->layer.modemcmd = cfservl_modemcmd;
+	cfsrvl_init(rfm, channel_id, dev_info, false);
 	rfm->layer.receive = cfrfml_receive;
 	rfm->layer.transmit = cfrfml_transmit;
 	snprintf(rfm->layer.name, CAIF_LAYER_NAME_SZ, "rfm%d", channel_id);
 	return &rfm->layer;
 }
 
-static int cfservl_modemcmd(struct cflayer *layr, enum caif_modemcmd ctrl)
-{
-       return -EPROTO;
-}
-
 static int cfrfml_receive(struct cflayer *layr, struct cfpkt *pkt)
 {
 	u8 tmp;
diff --git a/net/caif/cfsrvl.c b/net/caif/cfsrvl.c
index 6e5b7079a68..7aa1f03a015 100644
--- a/net/caif/cfsrvl.c
+++ b/net/caif/cfsrvl.c
@@ -24,8 +24,10 @@ static void cfservl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
 				int phyid)
 {
 	struct cfsrvl *service = container_obj(layr);
+
 	caif_assert(layr->up != NULL);
 	caif_assert(layr->up->ctrlcmd != NULL);
+
 	switch (ctrl) {
 	case CAIF_CTRLCMD_INIT_RSP:
 		service->open = true;
@@ -89,9 +91,14 @@ static void cfservl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
 static int cfservl_modemcmd(struct cflayer *layr, enum caif_modemcmd ctrl)
 {
 	struct cfsrvl *service = container_obj(layr);
+
 	caif_assert(layr != NULL);
 	caif_assert(layr->dn != NULL);
 	caif_assert(layr->dn->transmit != NULL);
+
+	if (!service->supports_flowctrl)
+		return 0;
+
 	switch (ctrl) {
 	case CAIF_MODEMCMD_FLOW_ON_REQ:
 		{
@@ -153,8 +160,10 @@ void cfservl_destroy(struct cflayer *layer)
 }
 
 void cfsrvl_init(struct cfsrvl *service,
-		 u8 channel_id,
-		 struct dev_info *dev_info)
+			u8 channel_id,
+			struct dev_info *dev_info,
+			bool supports_flowctrl
+			)
 {
 	caif_assert(offsetof(struct cfsrvl, layer) == 0);
 	service->open = false;
@@ -164,6 +173,7 @@ void cfsrvl_init(struct cfsrvl *service,
 	service->layer.ctrlcmd = cfservl_ctrlcmd;
 	service->layer.modemcmd = cfservl_modemcmd;
 	service->dev_info = *dev_info;
+	service->supports_flowctrl = supports_flowctrl;
 	kref_init(&service->ref);
 }
 
diff --git a/net/caif/cfutill.c b/net/caif/cfutill.c
index 5fd2c9ea8b4..ce525cac906 100644
--- a/net/caif/cfutill.c
+++ b/net/caif/cfutill.c
@@ -31,7 +31,7 @@ struct cflayer *cfutill_create(u8 channel_id, struct dev_info *dev_info)
 	}
 	caif_assert(offsetof(struct cfsrvl, layer) == 0);
 	memset(util, 0, sizeof(struct cfsrvl));
-	cfsrvl_init(util, channel_id, dev_info);
+	cfsrvl_init(util, channel_id, dev_info, true);
 	util->layer.receive = cfutill_receive;
 	util->layer.transmit = cfutill_transmit;
 	snprintf(util->layer.name, CAIF_LAYER_NAME_SZ - 1, "util1");
diff --git a/net/caif/cfveil.c b/net/caif/cfveil.c
index e04f7d964e8..637cb0eee13 100644
--- a/net/caif/cfveil.c
+++ b/net/caif/cfveil.c
@@ -30,7 +30,7 @@ struct cflayer *cfvei_create(u8 channel_id, struct dev_info *dev_info)
 	}
 	caif_assert(offsetof(struct cfsrvl, layer) == 0);
 	memset(vei, 0, sizeof(struct cfsrvl));
-	cfsrvl_init(vei, channel_id, dev_info);
+	cfsrvl_init(vei, channel_id, dev_info, true);
 	vei->layer.receive = cfvei_receive;
 	vei->layer.transmit = cfvei_transmit;
 	snprintf(vei->layer.name, CAIF_LAYER_NAME_SZ - 1, "vei%d", channel_id);
diff --git a/net/caif/cfvidl.c b/net/caif/cfvidl.c
index 89ad4ea239f..ada6ee2d48f 100644
--- a/net/caif/cfvidl.c
+++ b/net/caif/cfvidl.c
@@ -27,7 +27,7 @@ struct cflayer *cfvidl_create(u8 channel_id, struct dev_info *dev_info)
 	caif_assert(offsetof(struct cfsrvl, layer) == 0);
 
 	memset(vid, 0, sizeof(struct cfsrvl));
-	cfsrvl_init(vid, channel_id, dev_info);
+	cfsrvl_init(vid, channel_id, dev_info, false);
 	vid->layer.receive = cfvidl_receive;
 	vid->layer.transmit = cfvidl_transmit;
 	snprintf(vid->layer.name, CAIF_LAYER_NAME_SZ - 1, "vid1");
-- 
cgit v1.2.3-70-g09d2


From a7da1f55a826c621251874e7684c234972fc3216 Mon Sep 17 00:00:00 2001
From: Sjur Braendeland <sjur.brandeland@stericsson.com>
Date: Thu, 17 Jun 2010 06:55:39 +0000
Subject: caif: Bugfix - RFM must support segmentation.

CAIF Remote File Manager may send or receive more than 4050 bytes.
Due to this The CAIF RFM service have to support segmentation.

Signed-off-by: Sjur Braendeland@stericsson.com
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/caif/cfsrvl.h |   9 +-
 net/caif/caif_socket.c    |   4 -
 net/caif/cfcnfg.c         |   4 +-
 net/caif/cfpkt_skbuff.c   |   1 -
 net/caif/cfrfml.c         | 314 ++++++++++++++++++++++++++++++++++++++--------
 net/caif/cfsrvl.c         |  13 +-
 6 files changed, 278 insertions(+), 67 deletions(-)

(limited to 'include')

diff --git a/include/net/caif/cfsrvl.h b/include/net/caif/cfsrvl.h
index 5d921f5e18c..b1fa87ee099 100644
--- a/include/net/caif/cfsrvl.h
+++ b/include/net/caif/cfsrvl.h
@@ -17,6 +17,7 @@ struct cfsrvl {
 	bool phy_flow_on;
 	bool modem_flow_on;
 	bool supports_flowctrl;
+	void (*release)(struct kref *);
 	struct dev_info dev_info;
 	struct kref ref;
 };
@@ -26,7 +27,8 @@ struct cflayer *cfvei_create(u8 linkid, struct dev_info *dev_info);
 struct cflayer *cfdgml_create(u8 linkid, struct dev_info *dev_info);
 struct cflayer *cfutill_create(u8 linkid, struct dev_info *dev_info);
 struct cflayer *cfvidl_create(u8 linkid, struct dev_info *dev_info);
-struct cflayer *cfrfml_create(u8 linkid, struct dev_info *dev_info);
+struct cflayer *cfrfml_create(u8 linkid, struct dev_info *dev_info,
+				int mtu_size);
 struct cflayer *cfdbgl_create(u8 linkid, struct dev_info *dev_info);
 bool cfsrvl_phyid_match(struct cflayer *layer, int phyid);
 void cfservl_destroy(struct cflayer *layer);
@@ -52,7 +54,10 @@ static inline void cfsrvl_put(struct cflayer *layr)
 	if (layr == NULL)
 		return;
 	s = container_of(layr, struct cfsrvl, layer);
-	kref_put(&s->ref, cfsrvl_release);
+
+	WARN_ON(!s->release);
+	if (s->release)
+		kref_put(&s->ref, s->release);
 }
 
 #endif				/* CFSRVL_H_ */
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index 791249316ef..848ae755cdd 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -596,10 +596,6 @@ static int caif_seqpkt_sendmsg(struct kiocb *kiocb, struct socket *sock,
 
 	buffer_size = len + CAIF_NEEDED_HEADROOM + CAIF_NEEDED_TAILROOM;
 
-	ret = -EMSGSIZE;
-	if (buffer_size > CAIF_MAX_PAYLOAD_SIZE)
-		goto err;
-
 	timeo = sock_sndtimeo(sk, noblock);
 	timeo = caif_wait_for_flow_on(container_of(sk, struct caifsock, sk),
 				1, timeo, &ret);
diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c
index 7c81974a45c..cff2dcb9efe 100644
--- a/net/caif/cfcnfg.c
+++ b/net/caif/cfcnfg.c
@@ -22,6 +22,7 @@
 #define PHY_NAME_LEN 20
 
 #define container_obj(layr) container_of(layr, struct cfcnfg, layer)
+#define RFM_FRAGMENT_SIZE 4030
 
 /* Information about CAIF physical interfaces held by Config Module in order
  * to manage physical interfaces
@@ -328,7 +329,8 @@ cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id, enum cfctrl_srv serv,
 		servicel = cfdgml_create(channel_id, &phyinfo->dev_info);
 		break;
 	case CFCTRL_SRV_RFM:
-		servicel = cfrfml_create(channel_id, &phyinfo->dev_info);
+		servicel = cfrfml_create(channel_id, &phyinfo->dev_info,
+						RFM_FRAGMENT_SIZE);
 		break;
 	case CFCTRL_SRV_UTIL:
 		servicel = cfutill_create(channel_id, &phyinfo->dev_info);
diff --git a/net/caif/cfpkt_skbuff.c b/net/caif/cfpkt_skbuff.c
index a6fdf899741..318b0f4b416 100644
--- a/net/caif/cfpkt_skbuff.c
+++ b/net/caif/cfpkt_skbuff.c
@@ -338,7 +338,6 @@ struct cfpkt *cfpkt_append(struct cfpkt *dstpkt,
 	u16 dstlen;
 	u16 createlen;
 	if (unlikely(is_erronous(dstpkt) || is_erronous(addpkt))) {
-		cfpkt_destroy(addpkt);
 		return dstpkt;
 	}
 	if (expectlen > addlen)
diff --git a/net/caif/cfrfml.c b/net/caif/cfrfml.c
index 689cbfd0e43..4b04d25b6a3 100644
--- a/net/caif/cfrfml.c
+++ b/net/caif/cfrfml.c
@@ -7,98 +7,304 @@
 #include <linux/stddef.h>
 #include <linux/spinlock.h>
 #include <linux/slab.h>
+#include <linux/unaligned/le_byteshift.h>
 #include <net/caif/caif_layer.h>
 #include <net/caif/cfsrvl.h>
 #include <net/caif/cfpkt.h>
 
-#define container_obj(layr) container_of(layr, struct cfsrvl, layer)
-
+#define container_obj(layr) container_of(layr, struct cfrfml, serv.layer)
 #define RFM_SEGMENTATION_BIT 0x01
-#define RFM_PAYLOAD  0x00
-#define RFM_CMD_BIT  0x80
-#define RFM_FLOW_OFF 0x81
-#define RFM_FLOW_ON  0x80
-#define RFM_SET_PIN  0x82
-#define RFM_CTRL_PKT_SIZE 1
+#define RFM_HEAD_SIZE 7
 
 static int cfrfml_receive(struct cflayer *layr, struct cfpkt *pkt);
 static int cfrfml_transmit(struct cflayer *layr, struct cfpkt *pkt);
 
-struct cflayer *cfrfml_create(u8 channel_id, struct dev_info *dev_info)
+struct cfrfml {
+	struct cfsrvl serv;
+	struct cfpkt *incomplete_frm;
+	int fragment_size;
+	u8  seghead[6];
+	u16 pdu_size;
+	/* Protects serialized processing of packets */
+	spinlock_t sync;
+};
+
+static void cfrfml_release(struct kref *kref)
+{
+	struct cfsrvl *srvl = container_of(kref, struct cfsrvl, ref);
+	struct cfrfml *rfml = container_obj(&srvl->layer);
+
+	if (rfml->incomplete_frm)
+		cfpkt_destroy(rfml->incomplete_frm);
+
+	kfree(srvl);
+}
+
+struct cflayer *cfrfml_create(u8 channel_id, struct dev_info *dev_info,
+					int mtu_size)
 {
-	struct cfsrvl *rfm = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC);
+	int tmp;
+	struct cfrfml *this =
+		kzalloc(sizeof(struct cfrfml), GFP_ATOMIC);
 
-	if (!rfm) {
+	if (!this) {
 		pr_warning("CAIF: %s(): Out of memory\n", __func__);
 		return NULL;
 	}
 
-	caif_assert(offsetof(struct cfsrvl, layer) == 0);
+	cfsrvl_init(&this->serv, channel_id, dev_info, false);
+	this->serv.release = cfrfml_release;
+	this->serv.layer.receive = cfrfml_receive;
+	this->serv.layer.transmit = cfrfml_transmit;
+
+	/* Round down to closest multiple of 16 */
+	tmp = (mtu_size - RFM_HEAD_SIZE - 6) / 16;
+	tmp *= 16;
+
+	this->fragment_size = tmp;
+	spin_lock_init(&this->sync);
+	snprintf(this->serv.layer.name, CAIF_LAYER_NAME_SZ,
+		"rfm%d", channel_id);
+
+	return &this->serv.layer;
+}
+
+static struct cfpkt *rfm_append(struct cfrfml *rfml, char *seghead,
+			struct cfpkt *pkt, int *err)
+{
+	struct cfpkt *tmppkt;
+	*err = -EPROTO;
+	/* n-th but not last segment */
+
+	if (cfpkt_extr_head(pkt, seghead, 6) < 0)
+		return NULL;
+
+	/* Verify correct header */
+	if (memcmp(seghead, rfml->seghead, 6) != 0)
+		return NULL;
+
+	tmppkt = cfpkt_append(rfml->incomplete_frm, pkt,
+			rfml->pdu_size + RFM_HEAD_SIZE);
+
+	/* If cfpkt_append failes input pkts are not freed */
+	*err = -ENOMEM;
+	if (tmppkt == NULL)
+		return NULL;
 
-	memset(rfm, 0, sizeof(struct cfsrvl));
-	cfsrvl_init(rfm, channel_id, dev_info, false);
-	rfm->layer.receive = cfrfml_receive;
-	rfm->layer.transmit = cfrfml_transmit;
-	snprintf(rfm->layer.name, CAIF_LAYER_NAME_SZ, "rfm%d", channel_id);
-	return &rfm->layer;
+	*err = 0;
+	return tmppkt;
 }
 
 static int cfrfml_receive(struct cflayer *layr, struct cfpkt *pkt)
 {
 	u8 tmp;
 	bool segmented;
-	int ret;
+	int err;
+	u8 seghead[6];
+	struct cfrfml *rfml;
+	struct cfpkt *tmppkt = NULL;
+
 	caif_assert(layr->up != NULL);
 	caif_assert(layr->receive != NULL);
+	rfml = container_obj(layr);
+	spin_lock(&rfml->sync);
+
+	err = -EPROTO;
+	if (cfpkt_extr_head(pkt, &tmp, 1) < 0)
+		goto out;
+	segmented = tmp & RFM_SEGMENTATION_BIT;
+
+	if (segmented) {
+		if (rfml->incomplete_frm == NULL) {
+			/* Initial Segment */
+			if (cfpkt_peek_head(pkt, rfml->seghead, 6) < 0)
+				goto out;
+
+			rfml->pdu_size = get_unaligned_le16(rfml->seghead+4);
+
+			if (cfpkt_erroneous(pkt))
+				goto out;
+			rfml->incomplete_frm = pkt;
+			pkt = NULL;
+		} else {
+
+			tmppkt = rfm_append(rfml, seghead, pkt, &err);
+			if (tmppkt == NULL)
+				goto out;
+
+			if (cfpkt_erroneous(tmppkt))
+				goto out;
+
+			rfml->incomplete_frm = tmppkt;
+
+
+			if (cfpkt_erroneous(tmppkt))
+				goto out;
+		}
+		err = 0;
+		goto out;
+	}
+
+	if (rfml->incomplete_frm) {
+
+		/* Last Segment */
+		tmppkt = rfm_append(rfml, seghead, pkt, &err);
+		if (tmppkt == NULL)
+			goto out;
+
+		if (cfpkt_erroneous(tmppkt))
+			goto out;
+
+		rfml->incomplete_frm = NULL;
+		pkt = tmppkt;
+		tmppkt = NULL;
+
+		/* Verify that length is correct */
+		err = EPROTO;
+		if (rfml->pdu_size != cfpkt_getlen(pkt) - RFM_HEAD_SIZE + 1)
+			goto out;
+	}
+
+	err = rfml->serv.layer.up->receive(rfml->serv.layer.up, pkt);
+
+out:
+
+	if (err != 0) {
+		if (tmppkt)
+			cfpkt_destroy(tmppkt);
+		if (pkt)
+			cfpkt_destroy(pkt);
+		if (rfml->incomplete_frm)
+			cfpkt_destroy(rfml->incomplete_frm);
+		rfml->incomplete_frm = NULL;
+
+		pr_info("CAIF: %s(): "
+				"Connection error %d triggered on RFM link\n",
+				__func__, err);
+
+		/* Trigger connection error upon failure.*/
+		layr->up->ctrlcmd(layr->up, CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND,
+					rfml->serv.dev_info.id);
+	}
+	spin_unlock(&rfml->sync);
+	return err;
+}
+
+
+static int cfrfml_transmit_segment(struct cfrfml *rfml, struct cfpkt *pkt)
+{
+	caif_assert(!cfpkt_getlen(pkt) < rfml->fragment_size);
+
+	/* Add info for MUX-layer to route the packet out. */
+	cfpkt_info(pkt)->channel_id = rfml->serv.layer.id;
 
 	/*
-	 * RFM is taking care of segmentation and stripping of
-	 * segmentation bit.
+	 * To optimize alignment, we add up the size of CAIF header before
+	 * payload.
 	 */
-	if (cfpkt_extr_head(pkt, &tmp, 1) < 0) {
-		pr_err("CAIF: %s(): Packet is erroneous!\n", __func__);
-		cfpkt_destroy(pkt);
-		return -EPROTO;
-	}
-	segmented = tmp & RFM_SEGMENTATION_BIT;
-	caif_assert(!segmented);
+	cfpkt_info(pkt)->hdr_len = RFM_HEAD_SIZE;
+	cfpkt_info(pkt)->dev_info = &rfml->serv.dev_info;
 
-	ret = layr->up->receive(layr->up, pkt);
-	return ret;
+	return rfml->serv.layer.dn->transmit(rfml->serv.layer.dn, pkt);
 }
 
 static int cfrfml_transmit(struct cflayer *layr, struct cfpkt *pkt)
 {
-	u8 tmp = 0;
-	int ret;
-	struct cfsrvl *service = container_obj(layr);
+	int err;
+	u8 seg;
+	u8 head[6];
+	struct cfpkt *rearpkt = NULL;
+	struct cfpkt *frontpkt = pkt;
+	struct cfrfml *rfml = container_obj(layr);
 
 	caif_assert(layr->dn != NULL);
 	caif_assert(layr->dn->transmit != NULL);
 
-	if (!cfsrvl_ready(service, &ret))
-		return ret;
+	if (!cfsrvl_ready(&rfml->serv, &err))
+		return err;
+
+	err = -EPROTO;
+	if (cfpkt_getlen(pkt) <= RFM_HEAD_SIZE-1)
+		goto out;
+
+	err = 0;
+	if (cfpkt_getlen(pkt) > rfml->fragment_size + RFM_HEAD_SIZE)
+		err = cfpkt_peek_head(pkt, head, 6);
+
+	if (err < 0)
+		goto out;
+
+	while (cfpkt_getlen(frontpkt) > rfml->fragment_size + RFM_HEAD_SIZE) {
+
+		seg = 1;
+		err = -EPROTO;
+
+		if (cfpkt_add_head(frontpkt, &seg, 1) < 0)
+			goto out;
+		/*
+		 * On OOM error cfpkt_split returns NULL.
+		 *
+		 * NOTE: Segmented pdu is not correctly aligned.
+		 * This has negative performance impact.
+		 */
+
+		rearpkt = cfpkt_split(frontpkt, rfml->fragment_size);
+		if (rearpkt == NULL)
+			goto out;
+
+		err = cfrfml_transmit_segment(rfml, frontpkt);
+
+		if (err != 0)
+			goto out;
+		frontpkt = rearpkt;
+		rearpkt = NULL;
+
+		err = -ENOMEM;
+		if (frontpkt == NULL)
+			goto out;
+		err = -EPROTO;
+		if (cfpkt_add_head(frontpkt, head, 6) < 0)
+			goto out;
 
-	if (cfpkt_getlen(pkt) > CAIF_MAX_PAYLOAD_SIZE) {
-		pr_err("CAIF: %s():Packet too large - size=%d\n",
-			__func__, cfpkt_getlen(pkt));
-		return -EOVERFLOW;
 	}
-	if (cfpkt_add_head(pkt, &tmp, 1) < 0) {
-		pr_err("CAIF: %s(): Packet is erroneous!\n", __func__);
-		return -EPROTO;
+
+	seg = 0;
+	err = -EPROTO;
+
+	if (cfpkt_add_head(frontpkt, &seg, 1) < 0)
+		goto out;
+
+	err = cfrfml_transmit_segment(rfml, frontpkt);
+
+	frontpkt = NULL;
+out:
+
+	if (err != 0) {
+		pr_info("CAIF: %s(): "
+				"Connection error %d triggered on RFM link\n",
+				__func__, err);
+		/* Trigger connection error upon failure.*/
+
+		layr->up->ctrlcmd(layr->up, CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND,
+					rfml->serv.dev_info.id);
+
+		if (rearpkt)
+			cfpkt_destroy(rearpkt);
+
+		if (frontpkt && frontpkt != pkt) {
+
+			cfpkt_destroy(frontpkt);
+			/*
+			 * Socket layer will free the original packet,
+			 * but this packet may already be sent and
+			 * freed. So we have to return 0 in this case
+			 * to avoid socket layer to re-free this packet.
+			 * The return of shutdown indication will
+			 * cause connection to be invalidated anyhow.
+			 */
+			err = 0;
+		}
 	}
 
-	/* Add info for MUX-layer to route the packet out. */
-	cfpkt_info(pkt)->channel_id = service->layer.id;
-	/*
-	 * To optimize alignment, we add up the size of CAIF header before
-	 * payload.
-	 */
-	cfpkt_info(pkt)->hdr_len = 1;
-	cfpkt_info(pkt)->dev_info = &service->dev_info;
-	ret = layr->dn->transmit(layr->dn, pkt);
-	if (ret < 0)
-		cfpkt_extr_head(pkt, &tmp, 1);
-	return ret;
+	return err;
 }
diff --git a/net/caif/cfsrvl.c b/net/caif/cfsrvl.c
index 7aa1f03a015..4c9f147c38a 100644
--- a/net/caif/cfsrvl.c
+++ b/net/caif/cfsrvl.c
@@ -159,6 +159,13 @@ void cfservl_destroy(struct cflayer *layer)
 	kfree(layer);
 }
 
+void cfsrvl_release(struct kref *kref)
+{
+	struct cfsrvl *service = container_of(kref, struct cfsrvl, ref);
+	pr_info("CAIF: %s(): enter\n", __func__);
+	kfree(service);
+}
+
 void cfsrvl_init(struct cfsrvl *service,
 			u8 channel_id,
 			struct dev_info *dev_info,
@@ -174,14 +181,10 @@ void cfsrvl_init(struct cfsrvl *service,
 	service->layer.modemcmd = cfservl_modemcmd;
 	service->dev_info = *dev_info;
 	service->supports_flowctrl = supports_flowctrl;
+	service->release = cfsrvl_release;
 	kref_init(&service->ref);
 }
 
-void cfsrvl_release(struct kref *kref)
-{
-	struct cfsrvl *service = container_of(kref, struct cfsrvl, ref);
-	kfree(service);
-}
 
 bool cfsrvl_ready(struct cfsrvl *service, int *err)
 {
-- 
cgit v1.2.3-70-g09d2


From 2aa40aef9debc77d55cc87a50d335b6fe97fbeb0 Mon Sep 17 00:00:00 2001
From: Sjur Braendeland <sjur.brandeland@stericsson.com>
Date: Thu, 17 Jun 2010 06:55:40 +0000
Subject: caif: Use link layer MTU instead of fixed MTU

Previously CAIF supported maximum transfer size of ~4050.
The transfer size is now calculated dynamically based on the
link layers mtu size.

Signed-off-by: Sjur Braendeland@stericsson.com
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/caif/caif_serial.c |  1 -
 include/net/caif/caif_dev.h    |  8 +++--
 include/net/caif/caif_layer.h  |  6 ----
 include/net/caif/cfcnfg.h      | 16 +++++++---
 net/caif/caif_dev.c            | 12 +++++---
 net/caif/caif_socket.c         | 54 ++++++++++++++++++++++------------
 net/caif/cfcnfg.c              | 44 ++++++++++++++++++++++++---
 net/caif/cfctrl.c              |  6 ++--
 net/caif/cfdgml.c              |  5 ++++
 net/caif/cfpkt_skbuff.c        |  4 +--
 net/caif/cfserl.c              |  7 +++--
 net/caif/cfsrvl.c              |  1 -
 net/caif/cfutill.c             |  6 ----
 net/caif/cfveil.c              |  5 ----
 net/caif/chnl_net.c            | 67 +++++++++++++++++++++++++++++++++++-------
 15 files changed, 173 insertions(+), 69 deletions(-)

(limited to 'include')

diff --git a/drivers/net/caif/caif_serial.c b/drivers/net/caif/caif_serial.c
index 3e706f00a0d..3df0c0f8b8b 100644
--- a/drivers/net/caif/caif_serial.c
+++ b/drivers/net/caif/caif_serial.c
@@ -403,7 +403,6 @@ static void caifdev_setup(struct net_device *dev)
 	dev->type = ARPHRD_CAIF;
 	dev->flags = IFF_POINTOPOINT | IFF_NOARP;
 	dev->mtu = CAIF_MAX_MTU;
-	dev->hard_header_len = CAIF_NEEDED_HEADROOM;
 	dev->tx_queue_len = 0;
 	dev->destructor = free_netdev;
 	skb_queue_head_init(&serdev->head);
diff --git a/include/net/caif/caif_dev.h b/include/net/caif/caif_dev.h
index 318ab9478a4..6da573c75d5 100644
--- a/include/net/caif/caif_dev.h
+++ b/include/net/caif/caif_dev.h
@@ -50,6 +50,9 @@ struct caif_connect_request {
  * @client_layer:	User implementation of client layer. This layer
  *			MUST have receive and control callback functions
  *			implemented.
+ * @ifindex:		Link layer interface index used for this connection.
+ * @headroom:		Head room needed by CAIF protocol.
+ * @tailroom:		Tail room needed by CAIF protocol.
  *
  * This function connects a CAIF channel. The Client must implement
  * the struct cflayer. This layer represents the Client layer and holds
@@ -59,8 +62,9 @@ struct caif_connect_request {
  * E.g. CAIF Socket will call this function for each socket it connects
  * and have one client_layer instance for each socket.
  */
-int caif_connect_client(struct caif_connect_request *config,
-			   struct cflayer *client_layer);
+int caif_connect_client(struct caif_connect_request *conn_req,
+			struct cflayer *client_layer, int *ifindex,
+			int *headroom, int *tailroom);
 
 /**
  * caif_disconnect_client - Disconnects a client from the CAIF stack.
diff --git a/include/net/caif/caif_layer.h b/include/net/caif/caif_layer.h
index 25c472f0e5b..c8b07a904e7 100644
--- a/include/net/caif/caif_layer.h
+++ b/include/net/caif/caif_layer.h
@@ -15,14 +15,8 @@ struct cfpktq;
 struct caif_payload_info;
 struct caif_packet_funcs;
 
-#define CAIF_MAX_FRAMESIZE 4096
-#define CAIF_MAX_PAYLOAD_SIZE (4096 - 64)
-#define CAIF_NEEDED_HEADROOM (10)
-#define CAIF_NEEDED_TAILROOM (2)
 
 #define CAIF_LAYER_NAME_SZ 16
-#define CAIF_SUCCESS	1
-#define CAIF_FAILURE	0
 
 /**
  * caif_assert() - Assert function for CAIF.
diff --git a/include/net/caif/cfcnfg.h b/include/net/caif/cfcnfg.h
index 9fc2fc20b88..bd646faffa4 100644
--- a/include/net/caif/cfcnfg.h
+++ b/include/net/caif/cfcnfg.h
@@ -7,6 +7,7 @@
 #ifndef CFCNFG_H_
 #define CFCNFG_H_
 #include <linux/spinlock.h>
+#include <linux/netdevice.h>
 #include <net/caif/caif_layer.h>
 #include <net/caif/cfctrl.h>
 
@@ -73,8 +74,8 @@ void cfcnfg_remove(struct cfcnfg *cfg);
 
 void
 cfcnfg_add_phy_layer(struct cfcnfg *cnfg, enum cfcnfg_phy_type phy_type,
-		     void *dev, struct cflayer *phy_layer, u16 *phyid,
-		     enum cfcnfg_phy_preference pref,
+		     struct net_device *dev, struct cflayer *phy_layer,
+		     u16 *phyid, enum cfcnfg_phy_preference pref,
 		     bool fcs, bool stx);
 
 /**
@@ -114,11 +115,18 @@ void cfcnfg_release_adap_layer(struct cflayer *adap_layer);
  * @param:		Link setup parameters.
  * @adap_layer:		Specify the adaptation layer; the receive and
  *			flow-control functions MUST be set in the structure.
- *
+ * @ifindex:		Link layer interface index used for this connection.
+ * @proto_head:		Protocol head-space needed by CAIF protocol,
+ *			excluding link layer.
+ * @proto_tail:		Protocol tail-space needed by CAIF protocol,
+ *			excluding link layer.
  */
 int cfcnfg_add_adaptation_layer(struct cfcnfg *cnfg,
 			    struct cfctrl_link_param *param,
-			    struct cflayer *adap_layer);
+			    struct cflayer *adap_layer,
+			    int *ifindex,
+			    int *proto_head,
+			    int *proto_tail);
 
 /**
  * cfcnfg_get_phyid() - Get physical ID, given type.
diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c
index e2b86f1f5a4..0b586e9d137 100644
--- a/net/caif/caif_dev.c
+++ b/net/caif/caif_dev.c
@@ -255,7 +255,7 @@ static int caif_device_notify(struct notifier_block *me, unsigned long what,
 			pref = CFPHYPREF_HIGH_BW;
 			break;
 		}
-
+		dev_hold(dev);
 		cfcnfg_add_phy_layer(get_caif_conf(),
 				     phy_type,
 				     dev,
@@ -285,6 +285,7 @@ static int caif_device_notify(struct notifier_block *me, unsigned long what,
 		caifd->layer.up->ctrlcmd(caifd->layer.up,
 					 _CAIF_CTRLCMD_PHYIF_DOWN_IND,
 					 caifd->layer.id);
+		might_sleep();
 		res = wait_event_interruptible_timeout(caifd->event,
 					atomic_read(&caifd->in_use) == 0,
 					TIMEOUT);
@@ -300,6 +301,7 @@ static int caif_device_notify(struct notifier_block *me, unsigned long what,
 				   "Unregistering an active CAIF device: %s\n",
 				   __func__, dev->name);
 		cfcnfg_del_phy_layer(get_caif_conf(), &caifd->layer);
+		dev_put(dev);
 		atomic_set(&caifd->state, what);
 		break;
 
@@ -326,7 +328,8 @@ struct cfcnfg *get_caif_conf(void)
 EXPORT_SYMBOL(get_caif_conf);
 
 int caif_connect_client(struct caif_connect_request *conn_req,
-			   struct cflayer *client_layer)
+			struct cflayer *client_layer, int *ifindex,
+			int *headroom, int *tailroom)
 {
 	struct cfctrl_link_param param;
 	int ret;
@@ -334,8 +337,9 @@ int caif_connect_client(struct caif_connect_request *conn_req,
 	if (ret)
 		return ret;
 	/* Hook up the adaptation layer. */
-	return cfcnfg_add_adaptation_layer(get_caif_conf(),
-						&param, client_layer);
+	return cfcnfg_add_adaptation_layer(get_caif_conf(), &param,
+					client_layer, ifindex,
+					headroom, tailroom);
 }
 EXPORT_SYMBOL(caif_connect_client);
 
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index 848ae755cdd..8ce90478611 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -28,8 +28,8 @@
 MODULE_LICENSE("GPL");
 MODULE_ALIAS_NETPROTO(AF_CAIF);
 
-#define CAIF_DEF_SNDBUF (CAIF_MAX_PAYLOAD_SIZE*10)
-#define CAIF_DEF_RCVBUF (CAIF_MAX_PAYLOAD_SIZE*100)
+#define CAIF_DEF_SNDBUF (4096*10)
+#define CAIF_DEF_RCVBUF (4096*100)
 
 /*
  * CAIF state is re-using the TCP socket states.
@@ -76,6 +76,7 @@ struct caifsock {
 	struct caif_connect_request conn_req;
 	struct mutex readlock;
 	struct dentry *debugfs_socket_dir;
+	int headroom, tailroom, maxframe;
 };
 
 static int rx_flow_is_on(struct caifsock *cf_sk)
@@ -594,23 +595,32 @@ static int caif_seqpkt_sendmsg(struct kiocb *kiocb, struct socket *sock,
 		goto err;
 	noblock = msg->msg_flags & MSG_DONTWAIT;
 
-	buffer_size = len + CAIF_NEEDED_HEADROOM + CAIF_NEEDED_TAILROOM;
-
 	timeo = sock_sndtimeo(sk, noblock);
 	timeo = caif_wait_for_flow_on(container_of(sk, struct caifsock, sk),
 				1, timeo, &ret);
 
+	if (ret)
+		goto err;
 	ret = -EPIPE;
 	if (cf_sk->sk.sk_state != CAIF_CONNECTED ||
 		sock_flag(sk, SOCK_DEAD) ||
 		(sk->sk_shutdown & RCV_SHUTDOWN))
 		goto err;
 
+	/* Error if trying to write more than maximum frame size. */
+	ret = -EMSGSIZE;
+	if (len > cf_sk->maxframe && cf_sk->sk.sk_protocol != CAIFPROTO_RFM)
+		goto err;
+
+	buffer_size = len + cf_sk->headroom + cf_sk->tailroom;
+
 	ret = -ENOMEM;
 	skb = sock_alloc_send_skb(sk, buffer_size, noblock, &ret);
-	if (!skb)
+
+	if (!skb || skb_tailroom(skb) < buffer_size)
 		goto err;
-	skb_reserve(skb, CAIF_NEEDED_HEADROOM);
+
+	skb_reserve(skb, cf_sk->headroom);
 
 	ret = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
 
@@ -641,7 +651,6 @@ static int caif_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
 	long timeo;
 
 	err = -EOPNOTSUPP;
-
 	if (unlikely(msg->msg_flags&MSG_OOB))
 		goto out_err;
 
@@ -658,8 +667,8 @@ static int caif_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
 
 		size = len-sent;
 
-		if (size > CAIF_MAX_PAYLOAD_SIZE)
-			size = CAIF_MAX_PAYLOAD_SIZE;
+		if (size > cf_sk->maxframe)
+			size = cf_sk->maxframe;
 
 		/* If size is more than half of sndbuf, chop up message */
 		if (size > ((sk->sk_sndbuf >> 1) - 64))
@@ -669,14 +678,14 @@ static int caif_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
 			size = SKB_MAX_ALLOC;
 
 		skb = sock_alloc_send_skb(sk,
-					size + CAIF_NEEDED_HEADROOM
-					+ CAIF_NEEDED_TAILROOM,
+					size + cf_sk->headroom +
+					cf_sk->tailroom,
 					msg->msg_flags&MSG_DONTWAIT,
 					&err);
 		if (skb == NULL)
 			goto out_err;
 
-		skb_reserve(skb, CAIF_NEEDED_HEADROOM);
+		skb_reserve(skb, cf_sk->headroom);
 		/*
 		 *	If you pass two values to the sock_alloc_send_skb
 		 *	it tries to grab the large buffer with GFP_NOFS
@@ -817,17 +826,15 @@ static int caif_connect(struct socket *sock, struct sockaddr *uaddr,
 	struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
 	long timeo;
 	int err;
+	int ifindex, headroom, tailroom;
+	struct net_device *dev;
+
 	lock_sock(sk);
 
 	err = -EAFNOSUPPORT;
 	if (uaddr->sa_family != AF_CAIF)
 		goto out;
 
-	err = -ESOCKTNOSUPPORT;
-	if (unlikely(!(sk->sk_type == SOCK_STREAM &&
-		       cf_sk->sk.sk_protocol == CAIFPROTO_AT) &&
-		       sk->sk_type != SOCK_SEQPACKET))
-		goto out;
 	switch (sock->state) {
 	case SS_UNCONNECTED:
 		/* Normal case, a fresh connect */
@@ -883,12 +890,23 @@ static int caif_connect(struct socket *sock, struct sockaddr *uaddr,
 	dbfs_atomic_inc(&cnt.num_connect_req);
 	cf_sk->layer.receive = caif_sktrecv_cb;
 	err = caif_connect_client(&cf_sk->conn_req,
-				&cf_sk->layer);
+				&cf_sk->layer, &ifindex, &headroom, &tailroom);
 	if (err < 0) {
 		cf_sk->sk.sk_socket->state = SS_UNCONNECTED;
 		cf_sk->sk.sk_state = CAIF_DISCONNECTED;
 		goto out;
 	}
+	dev = dev_get_by_index(sock_net(sk), ifindex);
+	cf_sk->headroom = LL_RESERVED_SPACE_EXTRA(dev, headroom);
+	cf_sk->tailroom = tailroom;
+	cf_sk->maxframe = dev->mtu - (headroom + tailroom);
+	dev_put(dev);
+	if (cf_sk->maxframe < 1) {
+		pr_warning("CAIF: %s(): CAIF Interface MTU too small (%d)\n",
+			__func__, dev->mtu);
+		err = -ENODEV;
+		goto out;
+	}
 
 	err = -EINPROGRESS;
 wait_connect:
diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c
index cff2dcb9efe..1c29189b344 100644
--- a/net/caif/cfcnfg.c
+++ b/net/caif/cfcnfg.c
@@ -6,6 +6,7 @@
 #include <linux/kernel.h>
 #include <linux/stddef.h>
 #include <linux/slab.h>
+#include <linux/netdevice.h>
 #include <net/caif/caif_layer.h>
 #include <net/caif/cfpkt.h>
 #include <net/caif/cfcnfg.h>
@@ -42,6 +43,15 @@ struct cfcnfg_phyinfo {
 
 	/* Information about the physical device */
 	struct dev_info dev_info;
+
+	/* Interface index */
+	int ifindex;
+
+	/* Use Start of frame extension */
+	bool use_stx;
+
+	/* Use Start of frame checksum */
+	bool use_fcs;
 };
 
 struct cfcnfg {
@@ -249,9 +259,20 @@ static void cfcnfg_linkdestroy_rsp(struct cflayer *layer, u8 channel_id)
 {
 }
 
+int protohead[CFCTRL_SRV_MASK] = {
+	[CFCTRL_SRV_VEI] = 4,
+	[CFCTRL_SRV_DATAGRAM] = 7,
+	[CFCTRL_SRV_UTIL] = 4,
+	[CFCTRL_SRV_RFM] = 3,
+	[CFCTRL_SRV_DBG] = 3,
+};
+
 int cfcnfg_add_adaptation_layer(struct cfcnfg *cnfg,
 				struct cfctrl_link_param *param,
-				struct cflayer *adap_layer)
+				struct cflayer *adap_layer,
+				int *ifindex,
+				int *proto_head,
+				int *proto_tail)
 {
 	struct cflayer *frml;
 	if (adap_layer == NULL) {
@@ -277,6 +298,14 @@ int cfcnfg_add_adaptation_layer(struct cfcnfg *cnfg,
 		     param->phyid);
 	caif_assert(cnfg->phy_layers[param->phyid].phy_layer->id ==
 		     param->phyid);
+
+	*ifindex = cnfg->phy_layers[param->phyid].ifindex;
+	*proto_head =
+		protohead[param->linktype]+
+		(cnfg->phy_layers[param->phyid].use_stx ? 1 : 0);
+
+	*proto_tail = 2;
+
 	/* FIXME: ENUMERATE INITIALLY WHEN ACTIVATING PHYSICAL INTERFACE */
 	cfctrl_enum_req(cnfg->ctrl, param->phyid);
 	return cfctrl_linkup_request(cnfg->ctrl, param, adap_layer);
@@ -298,6 +327,8 @@ cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id, enum cfctrl_srv serv,
 	struct cfcnfg *cnfg = container_obj(layer);
 	struct cflayer *servicel = NULL;
 	struct cfcnfg_phyinfo *phyinfo;
+	struct net_device *netdev;
+
 	if (adapt_layer == NULL) {
 		pr_debug("CAIF: %s(): link setup response "
 				"but no client exist, send linkdown back\n",
@@ -329,8 +360,9 @@ cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id, enum cfctrl_srv serv,
 		servicel = cfdgml_create(channel_id, &phyinfo->dev_info);
 		break;
 	case CFCTRL_SRV_RFM:
+		netdev = phyinfo->dev_info.dev;
 		servicel = cfrfml_create(channel_id, &phyinfo->dev_info,
-						RFM_FRAGMENT_SIZE);
+						netdev->mtu);
 		break;
 	case CFCTRL_SRV_UTIL:
 		servicel = cfutill_create(channel_id, &phyinfo->dev_info);
@@ -361,8 +393,8 @@ cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id, enum cfctrl_srv serv,
 
 void
 cfcnfg_add_phy_layer(struct cfcnfg *cnfg, enum cfcnfg_phy_type phy_type,
-		     void *dev, struct cflayer *phy_layer, u16 *phyid,
-		     enum cfcnfg_phy_preference pref,
+		     struct net_device *dev, struct cflayer *phy_layer,
+		     u16 *phyid, enum cfcnfg_phy_preference pref,
 		     bool fcs, bool stx)
 {
 	struct cflayer *frml;
@@ -416,6 +448,10 @@ cfcnfg_add_phy_layer(struct cfcnfg *cnfg, enum cfcnfg_phy_type phy_type,
 	cnfg->phy_layers[*phyid].dev_info.dev = dev;
 	cnfg->phy_layers[*phyid].phy_layer = phy_layer;
 	cnfg->phy_layers[*phyid].phy_ref_count = 0;
+	cnfg->phy_layers[*phyid].ifindex = dev->ifindex;
+	cnfg->phy_layers[*phyid].use_stx = stx;
+	cnfg->phy_layers[*phyid].use_fcs = fcs;
+
 	phy_layer->type = phy_type;
 	frml = cffrml_create(*phyid, fcs);
 	if (!frml) {
diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c
index 107c4b2a311..563145fdc4c 100644
--- a/net/caif/cfctrl.c
+++ b/net/caif/cfctrl.c
@@ -19,7 +19,7 @@
 #ifdef CAIF_NO_LOOP
 static int handle_loop(struct cfctrl *ctrl,
 			      int cmd, struct cfpkt *pkt){
-	return CAIF_FAILURE;
+	return -1;
 }
 #else
 static int handle_loop(struct cfctrl *ctrl,
@@ -395,7 +395,7 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt)
 	cmd = cmdrsp & CFCTRL_CMD_MASK;
 	if (cmd != CFCTRL_CMD_LINK_ERR
 	    && CFCTRL_RSP_BIT != (CFCTRL_RSP_BIT & cmdrsp)) {
-		if (handle_loop(cfctrl, cmd, pkt) == CAIF_FAILURE)
+		if (handle_loop(cfctrl, cmd, pkt) != 0)
 			cmdrsp |= CFCTRL_ERR_BIT;
 	}
 
@@ -647,6 +647,6 @@ found:
 	default:
 		break;
 	}
-	return CAIF_SUCCESS;
+	return 0;
 }
 #endif
diff --git a/net/caif/cfdgml.c b/net/caif/cfdgml.c
index 32d9f0dc846..ed9d53aff28 100644
--- a/net/caif/cfdgml.c
+++ b/net/caif/cfdgml.c
@@ -17,6 +17,7 @@
 #define DGM_FLOW_OFF 0x81
 #define DGM_FLOW_ON  0x80
 #define DGM_CTRL_PKT_SIZE 1
+#define DGM_MTU 1500
 
 static int cfdgml_receive(struct cflayer *layr, struct cfpkt *pkt);
 static int cfdgml_transmit(struct cflayer *layr, struct cfpkt *pkt);
@@ -89,6 +90,10 @@ static int cfdgml_transmit(struct cflayer *layr, struct cfpkt *pkt)
 	if (!cfsrvl_ready(service, &ret))
 		return ret;
 
+	/* STE Modem cannot handle more than 1500 bytes datagrams */
+	if (cfpkt_getlen(pkt) > DGM_MTU)
+		return -EMSGSIZE;
+
 	cfpkt_add_head(pkt, &zero, 4);
 
 	/* Add info for MUX-layer to route the packet out. */
diff --git a/net/caif/cfpkt_skbuff.c b/net/caif/cfpkt_skbuff.c
index 318b0f4b416..01f238ff234 100644
--- a/net/caif/cfpkt_skbuff.c
+++ b/net/caif/cfpkt_skbuff.c
@@ -9,8 +9,8 @@
 #include <linux/hardirq.h>
 #include <net/caif/cfpkt.h>
 
-#define PKT_PREFIX CAIF_NEEDED_HEADROOM
-#define PKT_POSTFIX CAIF_NEEDED_TAILROOM
+#define PKT_PREFIX  16
+#define PKT_POSTFIX 2
 #define PKT_LEN_WHEN_EXTENDING 128
 #define PKT_ERROR(pkt, errmsg) do {	   \
     cfpkt_priv(pkt)->erronous = true;	   \
diff --git a/net/caif/cfserl.c b/net/caif/cfserl.c
index 965c5baace4..a11fbd68a13 100644
--- a/net/caif/cfserl.c
+++ b/net/caif/cfserl.c
@@ -14,7 +14,8 @@
 #define container_obj(layr) ((struct cfserl *) layr)
 
 #define CFSERL_STX 0x02
-#define CAIF_MINIUM_PACKET_SIZE 4
+#define SERIAL_MINIUM_PACKET_SIZE 4
+#define SERIAL_MAX_FRAMESIZE 4096
 struct cfserl {
 	struct cflayer layer;
 	struct cfpkt *incomplete_frm;
@@ -119,8 +120,8 @@ static int cfserl_receive(struct cflayer *l, struct cfpkt *newpkt)
 		/*
 		 * Frame error handling
 		 */
-		if (expectlen < CAIF_MINIUM_PACKET_SIZE
-		    || expectlen > CAIF_MAX_FRAMESIZE) {
+		if (expectlen < SERIAL_MINIUM_PACKET_SIZE
+		    || expectlen > SERIAL_MAX_FRAMESIZE) {
 			if (!layr->usestx) {
 				if (pkt != NULL)
 					cfpkt_destroy(pkt);
diff --git a/net/caif/cfsrvl.c b/net/caif/cfsrvl.c
index 4c9f147c38a..f40939a9121 100644
--- a/net/caif/cfsrvl.c
+++ b/net/caif/cfsrvl.c
@@ -162,7 +162,6 @@ void cfservl_destroy(struct cflayer *layer)
 void cfsrvl_release(struct kref *kref)
 {
 	struct cfsrvl *service = container_of(kref, struct cfsrvl, ref);
-	pr_info("CAIF: %s(): enter\n", __func__);
 	kfree(service);
 }
 
diff --git a/net/caif/cfutill.c b/net/caif/cfutill.c
index ce525cac906..02795aff57a 100644
--- a/net/caif/cfutill.c
+++ b/net/caif/cfutill.c
@@ -90,12 +90,6 @@ static int cfutill_transmit(struct cflayer *layr, struct cfpkt *pkt)
 	if (!cfsrvl_ready(service, &ret))
 		return ret;
 
-	if (cfpkt_getlen(pkt) > CAIF_MAX_PAYLOAD_SIZE) {
-		pr_err("CAIF: %s(): packet too large size=%d\n",
-			__func__, cfpkt_getlen(pkt));
-		return -EOVERFLOW;
-	}
-
 	cfpkt_add_head(pkt, &zero, 1);
 	/* Add info for MUX-layer to route the packet out. */
 	info = cfpkt_info(pkt);
diff --git a/net/caif/cfveil.c b/net/caif/cfveil.c
index 637cb0eee13..77cc09faac9 100644
--- a/net/caif/cfveil.c
+++ b/net/caif/cfveil.c
@@ -84,11 +84,6 @@ static int cfvei_transmit(struct cflayer *layr, struct cfpkt *pkt)
 		return ret;
 	caif_assert(layr->dn != NULL);
 	caif_assert(layr->dn->transmit != NULL);
-	if (cfpkt_getlen(pkt) > CAIF_MAX_PAYLOAD_SIZE) {
-		pr_warning("CAIF: %s(): Packet too large - size=%d\n",
-			   __func__, cfpkt_getlen(pkt));
-		return -EOVERFLOW;
-	}
 
 	if (cfpkt_add_head(pkt, &tmp, 1) < 0) {
 		pr_err("CAIF: %s(): Packet is erroneous!\n", __func__);
diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c
index 610966abe2d..4293e190ec5 100644
--- a/net/caif/chnl_net.c
+++ b/net/caif/chnl_net.c
@@ -23,7 +23,7 @@
 #include <net/caif/caif_dev.h>
 
 /* GPRS PDP connection has MTU to 1500 */
-#define SIZE_MTU 1500
+#define GPRS_PDP_MTU 1500
 /* 5 sec. connect timeout */
 #define CONNECT_TIMEOUT (5 * HZ)
 #define CAIF_NET_DEFAULT_QUEUE_LEN 500
@@ -232,6 +232,8 @@ static int chnl_net_open(struct net_device *dev)
 {
 	struct chnl_net *priv = NULL;
 	int result = -1;
+	int llifindex, headroom, tailroom, mtu;
+	struct net_device *lldev;
 	ASSERT_RTNL();
 	priv = netdev_priv(dev);
 	if (!priv) {
@@ -241,41 +243,88 @@ static int chnl_net_open(struct net_device *dev)
 
 	if (priv->state != CAIF_CONNECTING) {
 		priv->state = CAIF_CONNECTING;
-		result = caif_connect_client(&priv->conn_req, &priv->chnl);
+		result = caif_connect_client(&priv->conn_req, &priv->chnl,
+					&llifindex, &headroom, &tailroom);
 		if (result != 0) {
-				priv->state = CAIF_DISCONNECTED;
 				pr_debug("CAIF: %s(): err: "
 					"Unable to register and open device,"
 					" Err:%d\n",
 					__func__,
 					result);
-				return result;
+				goto error;
+		}
+
+		lldev = dev_get_by_index(dev_net(dev), llifindex);
+
+		if (lldev == NULL) {
+			pr_debug("CAIF: %s(): no interface?\n", __func__);
+			result = -ENODEV;
+			goto error;
+		}
+
+		dev->needed_tailroom = tailroom + lldev->needed_tailroom;
+		dev->hard_header_len = headroom + lldev->hard_header_len +
+			lldev->needed_tailroom;
+
+		/*
+		 * MTU, head-room etc is not know before we have a
+		 * CAIF link layer device available. MTU calculation may
+		 * override initial RTNL configuration.
+		 * MTU is minimum of current mtu, link layer mtu pluss
+		 * CAIF head and tail, and PDP GPRS contexts max MTU.
+		 */
+		mtu = min_t(int, dev->mtu, lldev->mtu - (headroom + tailroom));
+		mtu = min_t(int, GPRS_PDP_MTU, mtu);
+		dev_set_mtu(dev, mtu);
+		dev_put(lldev);
+
+		if (mtu < 100) {
+			pr_warning("CAIF: %s(): "
+				"CAIF Interface MTU too small (%d)\n",
+				__func__, mtu);
+			result = -ENODEV;
+			goto error;
 		}
 	}
 
+	rtnl_unlock();  /* Release RTNL lock during connect wait */
+
 	result = wait_event_interruptible_timeout(priv->netmgmt_wq,
 						priv->state != CAIF_CONNECTING,
 						CONNECT_TIMEOUT);
 
+	rtnl_lock();
+
 	if (result == -ERESTARTSYS) {
 		pr_debug("CAIF: %s(): wait_event_interruptible"
 			 " woken by a signal\n", __func__);
-		return -ERESTARTSYS;
+		result = -ERESTARTSYS;
+		goto error;
 	}
+
 	if (result == 0) {
 		pr_debug("CAIF: %s(): connect timeout\n", __func__);
 		caif_disconnect_client(&priv->chnl);
 		priv->state = CAIF_DISCONNECTED;
 		pr_debug("CAIF: %s(): state disconnected\n", __func__);
-		return -ETIMEDOUT;
+		result = -ETIMEDOUT;
+		goto error;
 	}
 
 	if (priv->state != CAIF_CONNECTED) {
 		pr_debug("CAIF: %s(): connect failed\n", __func__);
-		return -ECONNREFUSED;
+		result = -ECONNREFUSED;
+		goto error;
 	}
 	pr_debug("CAIF: %s(): CAIF Netdevice connected\n", __func__);
 	return 0;
+
+error:
+	caif_disconnect_client(&priv->chnl);
+	priv->state = CAIF_DISCONNECTED;
+	pr_debug("CAIF: %s(): state disconnected\n", __func__);
+	return result;
+
 }
 
 static int chnl_net_stop(struct net_device *dev)
@@ -321,9 +370,7 @@ static void ipcaif_net_setup(struct net_device *dev)
 	dev->destructor = free_netdev;
 	dev->flags |= IFF_NOARP;
 	dev->flags |= IFF_POINTOPOINT;
-	dev->needed_headroom = CAIF_NEEDED_HEADROOM;
-	dev->needed_tailroom = CAIF_NEEDED_TAILROOM;
-	dev->mtu = SIZE_MTU;
+	dev->mtu = GPRS_PDP_MTU;
 	dev->tx_queue_len = CAIF_NET_DEFAULT_QUEUE_LEN;
 
 	priv = netdev_priv(dev);
-- 
cgit v1.2.3-70-g09d2


From 69ad78208ecf4c392f3d323ed050423847c24104 Mon Sep 17 00:00:00 2001
From: Sjur Braendeland <sjur.brandeland@stericsson.com>
Date: Thu, 17 Jun 2010 06:55:41 +0000
Subject: caif: Add debug connection type for CAIF.

Added new CAIF protocol type CAIFPROTO_DEBUG for accessing
CAIF debug on the ST Ericsson modems.

There are two debug servers on the modem, one for radio related
debug (CAIF_RADIO_DEBUG_SERVICE) and the other for
communication/application related debug (CAIF_COM_DEBUG_SERVICE).

The debug connection can contain trace debug printouts or
interactive debug used for debugging and test.

Debug connections can be of type STREAM or SEQPACKET.

Signed-off-by: Sjur Braendeland <sjur.brandeland@stericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/caif/caif_socket.h | 34 ++++++++++++++++++++++++++++++++++
 net/caif/caif_config_util.c      |  5 +++++
 2 files changed, 39 insertions(+)

(limited to 'include')

diff --git a/include/linux/caif/caif_socket.h b/include/linux/caif/caif_socket.h
index 2a61eb1beb8..d9cb19b7cff 100644
--- a/include/linux/caif/caif_socket.h
+++ b/include/linux/caif/caif_socket.h
@@ -62,6 +62,7 @@ enum caif_channel_priority {
  * @CAIFPROTO_DATAGRAM_LOOP:	Datagram loopback channel, used for testing.
  * @CAIFPROTO_UTIL:		Utility (Psock) channel.
  * @CAIFPROTO_RFM:		Remote File Manager
+ * @CAIFPROTO_DEBUG:		Debug link
  *
  * This enum defines the CAIF Channel type to be used. This defines
  * the service to connect to on the modem.
@@ -72,6 +73,7 @@ enum caif_protocol_type {
 	CAIFPROTO_DATAGRAM_LOOP,
 	CAIFPROTO_UTIL,
 	CAIFPROTO_RFM,
+	CAIFPROTO_DEBUG,
 	_CAIFPROTO_MAX
 };
 #define	CAIFPROTO_MAX _CAIFPROTO_MAX
@@ -83,6 +85,28 @@ enum caif_protocol_type {
 enum caif_at_type {
 	CAIF_ATTYPE_PLAIN = 2
 };
+ /**
+ * enum caif_debug_type - Content selection for debug connection
+ * @CAIF_DEBUG_TRACE_INTERACTIVE: Connection will contain
+ *				both trace and interactive debug.
+ * @CAIF_DEBUG_TRACE:		Connection contains trace only.
+ * @CAIF_DEBUG_INTERACTIVE:	Connection to interactive debug.
+ */
+enum caif_debug_type {
+	CAIF_DEBUG_TRACE_INTERACTIVE = 0,
+	CAIF_DEBUG_TRACE,
+	CAIF_DEBUG_INTERACTIVE,
+};
+
+/**
+ * enum caif_debug_service - Debug Service Endpoint
+ * @CAIF_RADIO_DEBUG_SERVICE:	Debug service on the Radio sub-system
+ * @CAIF_APP_DEBUG_SERVICE:	Debug for the applications sub-system
+ */
+enum caif_debug_service {
+	CAIF_RADIO_DEBUG_SERVICE = 1,
+	CAIF_APP_DEBUG_SERVICE
+};
 
 /**
  * struct sockaddr_caif - the sockaddr structure for CAIF sockets.
@@ -109,6 +133,12 @@ enum caif_at_type {
  *
  * @u.rfm.volume:            Volume to mount.
  *
+ * @u.dbg:		      Applies when family = CAIFPROTO_DEBUG.
+ *
+ * @u.dbg.type:			     Type of debug connection to set up
+ *			      (caif_debug_type).
+ *
+ * @u.dbg.service:	      Service sub-system to connect (caif_debug_service
  * Description:
  * This structure holds the connect parameters used for setting up a
  * CAIF Channel. It defines the service to connect to on the modem.
@@ -130,6 +160,10 @@ struct sockaddr_caif {
 			__u32 connection_id;
 			char	  volume[16];
 		} rfm;				/* CAIFPROTO_RFM */
+		struct {
+			__u8  type;		/* type:enum caif_debug_type */
+			__u8  service;		/* service:caif_debug_service */
+		} dbg;				/* CAIFPROTO_DEBUG */
 	} u;
 };
 
diff --git a/net/caif/caif_config_util.c b/net/caif/caif_config_util.c
index 6f36580366f..76ae68303d3 100644
--- a/net/caif/caif_config_util.c
+++ b/net/caif/caif_config_util.c
@@ -79,6 +79,11 @@ int connect_req_to_link_param(struct cfcnfg *cnfg,
 		memcpy(l->u.utility.params, s->param.data,
 		       l->u.utility.paramlen);
 
+		break;
+	case CAIFPROTO_DEBUG:
+		l->linktype = CFCTRL_SRV_DBG;
+		l->endpoint = s->sockaddr.u.dbg.service;
+		l->chtype = s->sockaddr.u.dbg.type;
 		break;
 	default:
 		return -EINVAL;
-- 
cgit v1.2.3-70-g09d2


From f90754c15f47063671aea55268a9dd6a37b51492 Mon Sep 17 00:00:00 2001
From: Juuso Oikarinen <juuso.oikarinen@nokia.com>
Date: Mon, 21 Jun 2010 08:59:39 +0300
Subject: mac80211: Add interface for driver to temporarily disable dynamic ps

This mechanism introduced in this patch applies (at least) for hardware
designs using a single shared antenna for both WLAN and BT. In these designs,
the antenna must be toggled between WLAN and BT.

In those hardware, managing WLAN co-existence with Bluetooth requires WLAN
full power save whenever there is Bluetooth activity in order for WLAN to be
able to periodically relinquish the antenna to be used for BT. This is because
BT can only access the shared antenna when WLAN is idle or asleep.

Some hardware, for instance the wl1271, are able to indicate to the host
whenever there is BT traffic. In essence, the hardware will send an indication
to the host whenever there is, for example, SCO traffic or A2DP traffic, and
will send another indication when the traffic is over.

The hardware gets information of Bluetooth traffic via hardware co-existence
control lines - these lines are used to negotiate the shared antenna
ownership. The hardware will give the antenna to BT whenever WLAN is sleeping.

This patch adds the interface to mac80211 to facilitate temporarily disabling
of dynamic power save as per request of the WLAN driver. This interface will
immediately force WLAN to full powersave, hence allowing BT coexistence as
described above.

In these kind of shared antenna desings, when WLAN powersave is fully disabled,
Bluetooth will not work simultaneously with WLAN at all. This patch does not
address that problem. This interface will not change PSM state, so if PSM is
disabled it will remain so. Solving this problem requires knowledge about BT
state, and is best done in user-space.

Signed-off-by: Juuso Oikarinen <juuso.oikarinen@nokia.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/mac80211.h     | 39 +++++++++++++++++++++++++++++++++++++++
 net/mac80211/ieee80211_i.h |  2 ++
 net/mac80211/mlme.c        | 39 ++++++++++++++++++++++++++++++++++++++-
 3 files changed, 79 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index fe1a3a60337..7f256e23c57 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1271,6 +1271,15 @@ ieee80211_get_alt_retry_rate(const struct ieee80211_hw *hw,
  * dynamic PS feature in stack and will just keep %IEEE80211_CONF_PS
  * enabled whenever user has enabled powersave.
  *
+ * Some hardware need to toggle a single shared antenna between WLAN and
+ * Bluetooth to facilitate co-existence. These types of hardware set
+ * limitations on the use of host controlled dynamic powersave whenever there
+ * is simultaneous WLAN and Bluetooth traffic. For these types of hardware, the
+ * driver may request temporarily going into full power save, in order to
+ * enable toggling the antenna between BT and WLAN. If the driver requests
+ * disabling dynamic powersave, the @dynamic_ps_timeout value will be
+ * temporarily set to zero until the driver re-enables dynamic powersave.
+ *
  * Driver informs U-APSD client support by enabling
  * %IEEE80211_HW_SUPPORTS_UAPSD flag. The mode is configured through the
  * uapsd paramater in conf_tx() operation. Hardware needs to send the QoS
@@ -2446,6 +2455,36 @@ void ieee80211_beacon_loss(struct ieee80211_vif *vif);
  */
 void ieee80211_connection_loss(struct ieee80211_vif *vif);
 
+/**
+ * ieee80211_disable_dyn_ps - force mac80211 to temporarily disable dynamic psm
+ *
+ * @vif: &struct ieee80211_vif pointer from the add_interface callback.
+ *
+ * Some hardware require full power save to manage simultaneous BT traffic
+ * on the WLAN frequency. Full PSM is required periodically, whenever there are
+ * burst of BT traffic. The hardware gets information of BT traffic via
+ * hardware co-existence lines, and consequentially requests mac80211 to
+ * (temporarily) enter full psm.
+ * This function will only temporarily disable dynamic PS, not enable PSM if
+ * it was not already enabled.
+ * The driver must make sure to re-enable dynamic PS using
+ * ieee80211_enable_dyn_ps() if the driver has disabled it.
+ *
+ */
+void ieee80211_disable_dyn_ps(struct ieee80211_vif *vif);
+
+/**
+ * ieee80211_enable_dyn_ps - restore dynamic psm after being disabled
+ *
+ * @vif: &struct ieee80211_vif pointer from the add_interface callback.
+ *
+ * This function restores dynamic PS after being temporarily disabled via
+ * ieee80211_disable_dyn_ps(). Each ieee80211_disable_dyn_ps() call must
+ * be coupled with an eventual call to this function.
+ *
+ */
+void ieee80211_enable_dyn_ps(struct ieee80211_vif *vif);
+
 /**
  * ieee80211_cqm_rssi_notify - inform a configured connection quality monitoring
  *	rssi threshold triggered
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index fb5430188e8..f9251d50192 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -855,6 +855,8 @@ struct ieee80211_local {
 	 * this will override whatever chosen by mac80211 internally.
 	 */
 	int dynamic_ps_forced_timeout;
+	int dynamic_ps_user_timeout;
+	bool disable_dynamic_ps;
 
 	int user_power_level; /* in dBm */
 	int power_constr_level; /* in dBm */
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 85c3ca33333..d1962650b25 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -478,6 +478,39 @@ static void ieee80211_handle_pwr_constr(struct ieee80211_sub_if_data *sdata,
 	}
 }
 
+void ieee80211_enable_dyn_ps(struct ieee80211_vif *vif)
+{
+	struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
+	struct ieee80211_local *local = sdata->local;
+	struct ieee80211_conf *conf = &local->hw.conf;
+
+	WARN_ON(sdata->vif.type != NL80211_IFTYPE_STATION ||
+		!(local->hw.flags & IEEE80211_HW_SUPPORTS_PS) ||
+		(local->hw.flags & IEEE80211_HW_SUPPORTS_DYNAMIC_PS));
+
+	local->disable_dynamic_ps = false;
+	conf->dynamic_ps_timeout = local->dynamic_ps_user_timeout;
+}
+EXPORT_SYMBOL(ieee80211_enable_dyn_ps);
+
+void ieee80211_disable_dyn_ps(struct ieee80211_vif *vif)
+{
+	struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
+	struct ieee80211_local *local = sdata->local;
+	struct ieee80211_conf *conf = &local->hw.conf;
+
+	WARN_ON(sdata->vif.type != NL80211_IFTYPE_STATION ||
+		!(local->hw.flags & IEEE80211_HW_SUPPORTS_PS) ||
+		(local->hw.flags & IEEE80211_HW_SUPPORTS_DYNAMIC_PS));
+
+	local->disable_dynamic_ps = true;
+	conf->dynamic_ps_timeout = 0;
+	del_timer_sync(&local->dynamic_ps_timer);
+	ieee80211_queue_work(&local->hw,
+			     &local->dynamic_ps_enable_work);
+}
+EXPORT_SYMBOL(ieee80211_disable_dyn_ps);
+
 /* powersave */
 static void ieee80211_enable_ps(struct ieee80211_local *local,
 				struct ieee80211_sub_if_data *sdata)
@@ -553,6 +586,7 @@ void ieee80211_recalc_ps(struct ieee80211_local *local, s32 latency)
 	    found->u.mgd.associated->beacon_ies &&
 	    !(found->u.mgd.flags & (IEEE80211_STA_BEACON_POLL |
 				    IEEE80211_STA_CONNECTION_POLL))) {
+		struct ieee80211_conf *conf = &local->hw.conf;
 		s32 beaconint_us;
 
 		if (latency < 0)
@@ -575,7 +609,10 @@ void ieee80211_recalc_ps(struct ieee80211_local *local, s32 latency)
 			else
 				timeout = 100;
 		}
-		local->hw.conf.dynamic_ps_timeout = timeout;
+		local->dynamic_ps_user_timeout = timeout;
+		if (!local->disable_dynamic_ps)
+			conf->dynamic_ps_timeout =
+				local->dynamic_ps_user_timeout;
 
 		if (beaconint_us > latency) {
 			local->ps_sdata = NULL;
-- 
cgit v1.2.3-70-g09d2


From 26ec037f9841e49cc5c615deb8e1e73e5beab2ca Mon Sep 17 00:00:00 2001
From: Nick Chalk <nick@loadbalancer.org>
Date: Tue, 22 Jun 2010 08:07:01 +0200
Subject: IPVS: one-packet scheduling

Allow one-packet scheduling for UDP connections. When the fwmark-based or
normal virtual service is marked with '-o' or '--ops' options all
connections are created only to schedule one packet. Useful to schedule UDP
packets from same client port to different real servers. Recommended with
RR or WRR schedulers (the connections are not visible with ipvsadm -L).

Signed-off-by: Nick Chalk <nick@loadbalancer.org>
Signed-off-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/ip_vs.h           |  2 ++
 net/netfilter/ipvs/ip_vs_conn.c | 10 +++++++---
 net/netfilter/ipvs/ip_vs_core.c | 20 ++++++++++++++++----
 net/netfilter/ipvs/ip_vs_ctl.c  | 10 ++++++----
 4 files changed, 31 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/ip_vs.h b/include/linux/ip_vs.h
index dfc17036284..9708de265bb 100644
--- a/include/linux/ip_vs.h
+++ b/include/linux/ip_vs.h
@@ -19,6 +19,7 @@
  */
 #define IP_VS_SVC_F_PERSISTENT	0x0001		/* persistent port */
 #define IP_VS_SVC_F_HASHED	0x0002		/* hashed entry */
+#define IP_VS_SVC_F_ONEPACKET	0x0004		/* one-packet scheduling */
 
 /*
  *      Destination Server Flags
@@ -85,6 +86,7 @@
 #define IP_VS_CONN_F_SEQ_MASK	0x0600		/* in/out sequence mask */
 #define IP_VS_CONN_F_NO_CPORT	0x0800		/* no client port set yet */
 #define IP_VS_CONN_F_TEMPLATE	0x1000		/* template, not connection */
+#define IP_VS_CONN_F_ONE_PACKET	0x2000		/* forward only one packet */
 
 #define IP_VS_SCHEDNAME_MAXLEN	16
 #define IP_VS_IFNAME_MAXLEN	16
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index d8f7e8ef67b..717e6233d50 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -158,6 +158,9 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)
 	unsigned hash;
 	int ret;
 
+	if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
+		return 0;
+
 	/* Hash by protocol, client address and port */
 	hash = ip_vs_conn_hashkey(cp->af, cp->protocol, &cp->caddr, cp->cport);
 
@@ -355,8 +358,9 @@ struct ip_vs_conn *ip_vs_conn_out_get
  */
 void ip_vs_conn_put(struct ip_vs_conn *cp)
 {
-	/* reset it expire in its timeout */
-	mod_timer(&cp->timer, jiffies+cp->timeout);
+	unsigned long t = (cp->flags & IP_VS_CONN_F_ONE_PACKET) ?
+		0 : cp->timeout;
+	mod_timer(&cp->timer, jiffies+t);
 
 	__ip_vs_conn_put(cp);
 }
@@ -649,7 +653,7 @@ static void ip_vs_conn_expire(unsigned long data)
 	/*
 	 *	unhash it if it is hashed in the conn table
 	 */
-	if (!ip_vs_conn_unhash(cp))
+	if (!ip_vs_conn_unhash(cp) && !(cp->flags & IP_VS_CONN_F_ONE_PACKET))
 		goto expire_later;
 
 	/*
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 1cd6e3fd058..50907d8472a 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -194,6 +194,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
 	struct ip_vs_dest *dest;
 	struct ip_vs_conn *ct;
 	__be16  dport;			/* destination port to forward */
+	__be16  flags;
 	union nf_inet_addr snet;	/* source network of the client,
 					   after masking */
 
@@ -340,6 +341,10 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
 		dport = ports[1];
 	}
 
+	flags = (svc->flags & IP_VS_SVC_F_ONEPACKET
+		 && iph.protocol == IPPROTO_UDP)?
+		IP_VS_CONN_F_ONE_PACKET : 0;
+
 	/*
 	 *    Create a new connection according to the template
 	 */
@@ -347,7 +352,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
 			    &iph.saddr, ports[0],
 			    &iph.daddr, ports[1],
 			    &dest->addr, dport,
-			    0,
+			    flags,
 			    dest);
 	if (cp == NULL) {
 		ip_vs_conn_put(ct);
@@ -377,7 +382,7 @@ ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 	struct ip_vs_conn *cp = NULL;
 	struct ip_vs_iphdr iph;
 	struct ip_vs_dest *dest;
-	__be16 _ports[2], *pptr;
+	__be16 _ports[2], *pptr, flags;
 
 	ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph);
 	pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports);
@@ -407,6 +412,10 @@ ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 		return NULL;
 	}
 
+	flags = (svc->flags & IP_VS_SVC_F_ONEPACKET
+		 && iph.protocol == IPPROTO_UDP)?
+		IP_VS_CONN_F_ONE_PACKET : 0;
+
 	/*
 	 *    Create a connection entry.
 	 */
@@ -414,7 +423,7 @@ ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 			    &iph.saddr, pptr[0],
 			    &iph.daddr, pptr[1],
 			    &dest->addr, dest->port ? dest->port : pptr[1],
-			    0,
+			    flags,
 			    dest);
 	if (cp == NULL)
 		return NULL;
@@ -464,6 +473,9 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
 	if (sysctl_ip_vs_cache_bypass && svc->fwmark && unicast) {
 		int ret, cs;
 		struct ip_vs_conn *cp;
+		__u16 flags = (svc->flags & IP_VS_SVC_F_ONEPACKET &&
+				iph.protocol == IPPROTO_UDP)?
+				IP_VS_CONN_F_ONE_PACKET : 0;
 		union nf_inet_addr daddr =  { .all = { 0, 0, 0, 0 } };
 
 		ip_vs_service_put(svc);
@@ -474,7 +486,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
 				    &iph.saddr, pptr[0],
 				    &iph.daddr, pptr[1],
 				    &daddr, 0,
-				    IP_VS_CONN_F_BYPASS,
+				    IP_VS_CONN_F_BYPASS | flags,
 				    NULL);
 		if (cp == NULL)
 			return NF_DROP;
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 36dc1d88c2f..0f0c079c422 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -1864,14 +1864,16 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
 					   svc->scheduler->name);
 			else
 #endif
-				seq_printf(seq, "%s  %08X:%04X %s ",
+				seq_printf(seq, "%s  %08X:%04X %s %s ",
 					   ip_vs_proto_name(svc->protocol),
 					   ntohl(svc->addr.ip),
 					   ntohs(svc->port),
-					   svc->scheduler->name);
+					   svc->scheduler->name,
+					   (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
 		} else {
-			seq_printf(seq, "FWM  %08X %s ",
-				   svc->fwmark, svc->scheduler->name);
+			seq_printf(seq, "FWM  %08X %s %s",
+				   svc->fwmark, svc->scheduler->name,
+				   (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
 		}
 
 		if (svc->flags & IP_VS_SVC_F_PERSISTENT)
-- 
cgit v1.2.3-70-g09d2


From 97dc135947181a6670949a480da56c3ebf8d3715 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 16 Jun 2010 09:52:26 -0400
Subject: NFSv41: Clean up the NFSv4.1 minor version specific operations

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/client.c           | 14 ++++++--------
 fs/nfs/nfs4_fs.h          | 11 +++++++++++
 fs/nfs/nfs4proc.c         | 21 ++++++++++++++++++++-
 include/linux/nfs_fs_sb.h |  7 ++-----
 4 files changed, 39 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index d25b5257b7a..1df708fd420 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -150,6 +150,7 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_
 	clp->cl_boot_time = CURRENT_TIME;
 	clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED;
 	clp->cl_minorversion = cl_init->minorversion;
+	clp->cl_mvops = nfs_v4_minor_ops[cl_init->minorversion];
 #endif
 	cred = rpc_lookup_machine_cred();
 	if (!IS_ERR(cred))
@@ -178,7 +179,7 @@ static void nfs4_clear_client_minor_version(struct nfs_client *clp)
 		clp->cl_session = NULL;
 	}
 
-	clp->cl_call_sync = _nfs4_call_sync;
+	clp->cl_mvops = nfs_v4_minor_ops[0];
 #endif /* CONFIG_NFS_V4_1 */
 }
 
@@ -188,7 +189,7 @@ static void nfs4_clear_client_minor_version(struct nfs_client *clp)
 static void nfs4_destroy_callback(struct nfs_client *clp)
 {
 	if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state))
-		nfs_callback_down(clp->cl_minorversion);
+		nfs_callback_down(clp->cl_mvops->minor_version);
 }
 
 static void nfs4_shutdown_client(struct nfs_client *clp)
@@ -1126,7 +1127,7 @@ static int nfs4_init_callback(struct nfs_client *clp)
 				return error;
 		}
 
-		error = nfs_callback_up(clp->cl_minorversion,
+		error = nfs_callback_up(clp->cl_mvops->minor_version,
 					clp->cl_rpcclient->cl_xprt);
 		if (error < 0) {
 			dprintk("%s: failed to start callback. Error = %d\n",
@@ -1143,10 +1144,8 @@ static int nfs4_init_callback(struct nfs_client *clp)
  */
 static int nfs4_init_client_minor_version(struct nfs_client *clp)
 {
-	clp->cl_call_sync = _nfs4_call_sync;
-
 #if defined(CONFIG_NFS_V4_1)
-	if (clp->cl_minorversion) {
+	if (clp->cl_mvops->minor_version) {
 		struct nfs4_session *session = NULL;
 		/*
 		 * Create the session and mark it expired.
@@ -1158,7 +1157,6 @@ static int nfs4_init_client_minor_version(struct nfs_client *clp)
 			return -ENOMEM;
 
 		clp->cl_session = session;
-		clp->cl_call_sync = _nfs4_call_sync_session;
 	}
 #endif /* CONFIG_NFS_V4_1 */
 
@@ -1454,7 +1452,7 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
 				data->authflavor,
 				parent_server->client->cl_xprt->prot,
 				parent_server->client->cl_timeout,
-				parent_client->cl_minorversion);
+				parent_client->cl_mvops->minor_version);
 	if (error < 0)
 		goto error;
 
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index bb1e9553069..5b01705e30f 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -52,6 +52,16 @@ enum nfs4_session_state {
 	NFS4_SESSION_DRAINING,
 };
 
+struct nfs4_minor_version_ops {
+	u32	minor_version;
+
+	int	(*call_sync)(struct nfs_server *server,
+			struct rpc_message *msg,
+			struct nfs4_sequence_args *args,
+			struct nfs4_sequence_res *res,
+			int cache_reply);
+};
+
 /*
  * struct rpc_sequence ensures that RPC calls are sent in the exact
  * order that they appear on the list.
@@ -260,6 +270,7 @@ static inline int nfs4_init_session(struct nfs_server *server)
 }
 #endif /* CONFIG_NFS_V4_1 */
 
+extern const struct nfs4_minor_version_ops *nfs_v4_minor_ops[];
 extern struct nfs4_state_maintenance_ops *nfs4_state_renewal_ops[];
 
 extern const u32 nfs4_fattr_bitmap[2];
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index fc972c6f1ce..a938daf333d 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -667,7 +667,7 @@ int _nfs4_call_sync(struct nfs_server *server,
 }
 
 #define nfs4_call_sync(server, msg, args, res, cache_reply) \
-	(server)->nfs_client->cl_call_sync((server), (msg), &(args)->seq_args, \
+	(server)->nfs_client->cl_mvops->call_sync((server), (msg), &(args)->seq_args, \
 			&(res)->seq_res, (cache_reply))
 
 static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo)
@@ -5353,6 +5353,18 @@ struct nfs4_state_maintenance_ops nfs41_state_renewal_ops = {
 };
 #endif
 
+static const struct nfs4_minor_version_ops nfs_v4_0_minor_ops = {
+	.minor_version = 0,
+	.call_sync = _nfs4_call_sync,
+};
+
+#if defined(CONFIG_NFS_V4_1)
+static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = {
+	.minor_version = 1,
+	.call_sync = _nfs4_call_sync_session,
+};
+#endif
+
 /*
  * Per minor version reboot and network partition recovery ops
  */
@@ -5378,6 +5390,13 @@ struct nfs4_state_maintenance_ops *nfs4_state_renewal_ops[] = {
 #endif
 };
 
+const struct nfs4_minor_version_ops *nfs_v4_minor_ops[] = {
+	[0] = &nfs_v4_0_minor_ops,
+#if defined(CONFIG_NFS_V4_1)
+	[1] = &nfs_v4_1_minor_ops,
+#endif
+};
+
 static const struct inode_operations nfs4_file_inode_operations = {
 	.permission	= nfs_permission,
 	.getattr	= nfs_getattr,
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index d6e10a4c06e..c82ee7cd628 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -15,6 +15,7 @@ struct nlm_host;
 struct nfs4_sequence_args;
 struct nfs4_sequence_res;
 struct nfs_server;
+struct nfs4_minor_version_ops;
 
 /*
  * The nfs_client identifies our client state to the server.
@@ -70,11 +71,7 @@ struct nfs_client {
 	 */
 	char			cl_ipaddr[48];
 	unsigned char		cl_id_uniquifier;
-	int		     (* cl_call_sync)(struct nfs_server *server,
-					      struct rpc_message *msg,
-					      struct nfs4_sequence_args *args,
-					      struct nfs4_sequence_res *res,
-					      int cache_reply);
+	const struct nfs4_minor_version_ops *cl_mvops;
 #endif /* CONFIG_NFS_V4 */
 
 #ifdef CONFIG_NFS_V4_1
-- 
cgit v1.2.3-70-g09d2


From d77d76ffb638bd013782138cca6d8f4918c5afd6 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 16 Jun 2010 09:52:27 -0400
Subject: NFSv41: Clean up exclusive create

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c       | 17 ++++++-----------
 include/linux/nfs_xdr.h |  6 ++++--
 2 files changed, 10 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index d1ab0c36e93..5d87563d0c1 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -744,19 +744,14 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct path *path,
 	p->o_arg.server = server;
 	p->o_arg.bitmask = server->attr_bitmask;
 	p->o_arg.claim = NFS4_OPEN_CLAIM_NULL;
-	if (flags & O_EXCL) {
-		if (nfs4_has_persistent_session(server->nfs_client)) {
-			/* GUARDED */
-			p->o_arg.u.attrs = &p->attrs;
-			memcpy(&p->attrs, attrs, sizeof(p->attrs));
-		} else { /* EXCLUSIVE4_1 */
-			u32 *s = (u32 *) p->o_arg.u.verifier.data;
-			s[0] = jiffies;
-			s[1] = current->pid;
-		}
-	} else if (flags & O_CREAT) {
+	if (flags & O_CREAT) {
+		u32 *s;
+
 		p->o_arg.u.attrs = &p->attrs;
 		memcpy(&p->attrs, attrs, sizeof(p->attrs));
+		s = (u32 *) p->o_arg.u.verifier.data;
+		s[0] = jiffies;
+		s[1] = current->pid;
 	}
 	p->c_arg.fh = &p->o_res.fh;
 	p->c_arg.stateid = &p->o_res.stateid;
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 51914d7d6cc..a319cb926ab 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -196,8 +196,10 @@ struct nfs_openargs {
 	__u64                   clientid;
 	__u64                   id;
 	union {
-		struct iattr *  attrs;    /* UNCHECKED, GUARDED */
-		nfs4_verifier   verifier; /* EXCLUSIVE */
+		struct {
+			struct iattr *  attrs;    /* UNCHECKED, GUARDED */
+			nfs4_verifier   verifier; /* EXCLUSIVE */
+		};
 		nfs4_stateid	delegation;		/* CLAIM_DELEGATE_CUR */
 		fmode_t		delegation_type;	/* CLAIM_PREVIOUS */
 	} u;
-- 
cgit v1.2.3-70-g09d2


From 69da9bcb98ccbfb5d5f751bc13418f1307332925 Mon Sep 17 00:00:00 2001
From: Daniel Mack <daniel@caiaq.de>
Date: Wed, 16 Jun 2010 17:57:28 +0200
Subject: ALSA: usb-audio: unify UAC macros and struct names

Get rid of the last occurances of _v1 suffixes, and move the version
number right after the "uac" string. Now things are consitent again.

Sorry for the forth and back, but it just looks much nicer this way.

Signed-off-by: Daniel Mack <daniel@caiaq.de>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 drivers/usb/gadget/f_audio.c |  6 +++---
 drivers/usb/gadget/gmidi.c   |  2 +-
 include/linux/usb/audio-v2.h |  2 +-
 include/linux/usb/audio.h    | 12 ++++++------
 sound/usb/card.c             |  2 +-
 sound/usb/endpoint.c         |  4 ++--
 sound/usb/mixer.c            | 14 +++++++-------
 7 files changed, 21 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/drivers/usb/gadget/f_audio.c b/drivers/usb/gadget/f_audio.c
index b91115f84b1..1f48ceb55a7 100644
--- a/drivers/usb/gadget/f_audio.c
+++ b/drivers/usb/gadget/f_audio.c
@@ -61,7 +61,7 @@ DECLARE_UAC_AC_HEADER_DESCRIPTOR(2);
 #define UAC_DT_TOTAL_LENGTH (UAC_DT_AC_HEADER_LENGTH + UAC_DT_INPUT_TERMINAL_SIZE \
 	+ UAC_DT_OUTPUT_TERMINAL_SIZE + UAC_DT_FEATURE_UNIT_SIZE(0))
 /* B.3.2  Class-Specific AC Interface Descriptor */
-static struct uac_ac_header_descriptor_v1_2 ac_header_desc = {
+static struct uac1_ac_header_descriptor_2 ac_header_desc = {
 	.bLength =		UAC_DT_AC_HEADER_LENGTH,
 	.bDescriptorType =	USB_DT_CS_INTERFACE,
 	.bDescriptorSubtype =	UAC_HEADER,
@@ -125,7 +125,7 @@ static struct usb_audio_control_selector feature_unit = {
 };
 
 #define OUTPUT_TERMINAL_ID	3
-static struct uac_output_terminal_descriptor_v1 output_terminal_desc = {
+static struct uac1_output_terminal_descriptor output_terminal_desc = {
 	.bLength		= UAC_DT_OUTPUT_TERMINAL_SIZE,
 	.bDescriptorType	= USB_DT_CS_INTERFACE,
 	.bDescriptorSubtype	= UAC_OUTPUT_TERMINAL,
@@ -155,7 +155,7 @@ static struct usb_interface_descriptor as_interface_alt_1_desc = {
 };
 
 /* B.4.2  Class-Specific AS Interface Descriptor */
-static struct uac_as_header_descriptor_v1 as_header_desc = {
+static struct uac1_as_header_descriptor as_header_desc = {
 	.bLength =		UAC_DT_AS_HEADER_SIZE,
 	.bDescriptorType =	USB_DT_CS_INTERFACE,
 	.bDescriptorSubtype =	UAC_AS_GENERAL,
diff --git a/drivers/usb/gadget/gmidi.c b/drivers/usb/gadget/gmidi.c
index 2b56ce62185..b7bf88019b0 100644
--- a/drivers/usb/gadget/gmidi.c
+++ b/drivers/usb/gadget/gmidi.c
@@ -238,7 +238,7 @@ static const struct usb_interface_descriptor ac_interface_desc = {
 };
 
 /* B.3.2  Class-Specific AC Interface Descriptor */
-static const struct uac_ac_header_descriptor_v1_1 ac_header_desc = {
+static const struct uac1_ac_header_descriptor_1 ac_header_desc = {
 	.bLength =		UAC_DT_AC_HEADER_SIZE(1),
 	.bDescriptorType =	USB_DT_CS_INTERFACE,
 	.bDescriptorSubtype =	USB_MS_HEADER,
diff --git a/include/linux/usb/audio-v2.h b/include/linux/usb/audio-v2.h
index 383b94ba8c2..716aebe339e 100644
--- a/include/linux/usb/audio-v2.h
+++ b/include/linux/usb/audio-v2.h
@@ -121,7 +121,7 @@ struct uac2_feature_unit_descriptor {
 
 /* 4.9.2 Class-Specific AS Interface Descriptor */
 
-struct uac_as_header_descriptor_v2 {
+struct uac2_as_header_descriptor {
 	__u8 bLength;
 	__u8 bDescriptorType;
 	__u8 bDescriptorSubtype;
diff --git a/include/linux/usb/audio.h b/include/linux/usb/audio.h
index c51200c715e..a54b8255d75 100644
--- a/include/linux/usb/audio.h
+++ b/include/linux/usb/audio.h
@@ -39,8 +39,8 @@
 #define UAC_MIXER_UNIT			0x04
 #define UAC_SELECTOR_UNIT		0x05
 #define UAC_FEATURE_UNIT		0x06
-#define UAC_PROCESSING_UNIT_V1		0x07
-#define UAC_EXTENSION_UNIT_V1		0x08
+#define UAC1_PROCESSING_UNIT		0x07
+#define UAC1_EXTENSION_UNIT		0x08
 
 /* A.6 Audio Class-Specific AS Interface Descriptor Subtypes */
 #define UAC_AS_GENERAL			0x01
@@ -151,7 +151,7 @@
 
 /* Terminal Control Selectors */
 /* 4.3.2  Class-Specific AC Interface Descriptor */
-struct uac_ac_header_descriptor_v1 {
+struct uac1_ac_header_descriptor {
 	__u8  bLength;			/* 8 + n */
 	__u8  bDescriptorType;		/* USB_DT_CS_INTERFACE */
 	__u8  bDescriptorSubtype;	/* UAC_MS_HEADER */
@@ -165,7 +165,7 @@ struct uac_ac_header_descriptor_v1 {
 
 /* As above, but more useful for defining your own descriptors: */
 #define DECLARE_UAC_AC_HEADER_DESCRIPTOR(n)			\
-struct uac_ac_header_descriptor_v1_##n {			\
+struct uac1_ac_header_descriptor_##n {			\
 	__u8  bLength;						\
 	__u8  bDescriptorType;					\
 	__u8  bDescriptorSubtype;				\
@@ -205,7 +205,7 @@ struct uac_input_terminal_descriptor {
 #define UAC_TERMINAL_CS_COPY_PROTECT_CONTROL		0x01
 
 /* 4.3.2.2 Output Terminal Descriptor */
-struct uac_output_terminal_descriptor_v1 {
+struct uac1_output_terminal_descriptor {
 	__u8  bLength;			/* in bytes: 9 */
 	__u8  bDescriptorType;		/* CS_INTERFACE descriptor type */
 	__u8  bDescriptorSubtype;	/* OUTPUT_TERMINAL descriptor subtype */
@@ -395,7 +395,7 @@ static inline __u8 *uac_processing_unit_specific(struct uac_processing_unit_desc
 }
 
 /* 4.5.2 Class-Specific AS Interface Descriptor */
-struct uac_as_header_descriptor_v1 {
+struct uac1_as_header_descriptor {
 	__u8  bLength;			/* in bytes: 7 */
 	__u8  bDescriptorType;		/* USB_DT_CS_INTERFACE */
 	__u8  bDescriptorSubtype;	/* AS_GENERAL */
diff --git a/sound/usb/card.c b/sound/usb/card.c
index 7a8ac1d81be..9feb00c831a 100644
--- a/sound/usb/card.c
+++ b/sound/usb/card.c
@@ -217,7 +217,7 @@ static int snd_usb_create_streams(struct snd_usb_audio *chip, int ctrlif)
 
 	switch (protocol) {
 	case UAC_VERSION_1: {
-		struct uac_ac_header_descriptor_v1 *h1 = control_header;
+		struct uac1_ac_header_descriptor *h1 = control_header;
 
 		if (!h1->bInCollection) {
 			snd_printk(KERN_INFO "skipping empty audio interface (v1)\n");
diff --git a/sound/usb/endpoint.c b/sound/usb/endpoint.c
index 6f6596cf2b1..2af0f9e3dcd 100644
--- a/sound/usb/endpoint.c
+++ b/sound/usb/endpoint.c
@@ -275,7 +275,7 @@ int snd_usb_parse_audio_endpoints(struct snd_usb_audio *chip, int iface_no)
 		/* get audio formats */
 		switch (protocol) {
 		case UAC_VERSION_1: {
-			struct uac_as_header_descriptor_v1 *as =
+			struct uac1_as_header_descriptor *as =
 				snd_usb_find_csint_desc(alts->extra, alts->extralen, NULL, UAC_AS_GENERAL);
 
 			if (!as) {
@@ -297,7 +297,7 @@ int snd_usb_parse_audio_endpoints(struct snd_usb_audio *chip, int iface_no)
 		case UAC_VERSION_2: {
 			struct uac2_input_terminal_descriptor *input_term;
 			struct uac2_output_terminal_descriptor *output_term;
-			struct uac_as_header_descriptor_v2 *as =
+			struct uac2_as_header_descriptor *as =
 				snd_usb_find_csint_desc(alts->extra, alts->extralen, NULL, UAC_AS_GENERAL);
 
 			if (!as) {
diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c
index 736d134cc03..ba54eb6bb0c 100644
--- a/sound/usb/mixer.c
+++ b/sound/usb/mixer.c
@@ -582,9 +582,9 @@ static int get_term_name(struct mixer_build *state, struct usb_audio_term *iterm
 		switch (iterm->type >> 16) {
 		case UAC_SELECTOR_UNIT:
 			strcpy(name, "Selector"); return 8;
-		case UAC_PROCESSING_UNIT_V1:
+		case UAC1_PROCESSING_UNIT:
 			strcpy(name, "Process Unit"); return 12;
-		case UAC_EXTENSION_UNIT_V1:
+		case UAC1_EXTENSION_UNIT:
 			strcpy(name, "Ext Unit"); return 8;
 		case UAC_MIXER_UNIT:
 			strcpy(name, "Mixer"); return 5;
@@ -672,8 +672,8 @@ static int check_input_term(struct mixer_build *state, int id, struct usb_audio_
 			term->name = uac_selector_unit_iSelector(d);
 			return 0;
 		}
-		case UAC_PROCESSING_UNIT_V1:
-		case UAC_EXTENSION_UNIT_V1: {
+		case UAC1_PROCESSING_UNIT:
+		case UAC1_EXTENSION_UNIT: {
 			struct uac_processing_unit_descriptor *d = p1;
 			if (d->bNrInPins) {
 				id = d->baSourceID[0];
@@ -1855,13 +1855,13 @@ static int parse_audio_unit(struct mixer_build *state, int unitid)
 		return parse_audio_selector_unit(state, unitid, p1);
 	case UAC_FEATURE_UNIT:
 		return parse_audio_feature_unit(state, unitid, p1);
-	case UAC_PROCESSING_UNIT_V1:
+	case UAC1_PROCESSING_UNIT:
 	/*   UAC2_EFFECT_UNIT has the same value */
 		if (state->mixer->protocol == UAC_VERSION_1)
 			return parse_audio_processing_unit(state, unitid, p1);
 		else
 			return 0; /* FIXME - effect units not implemented yet */
-	case UAC_EXTENSION_UNIT_V1:
+	case UAC1_EXTENSION_UNIT:
 	/*   UAC2_PROCESSING_UNIT_V2 has the same value */
 		if (state->mixer->protocol == UAC_VERSION_1)
 			return parse_audio_extension_unit(state, unitid, p1);
@@ -1925,7 +1925,7 @@ static int snd_usb_mixer_controls(struct usb_mixer_interface *mixer)
 	p = NULL;
 	while ((p = snd_usb_find_csint_desc(hostif->extra, hostif->extralen, p, UAC_OUTPUT_TERMINAL)) != NULL) {
 		if (mixer->protocol == UAC_VERSION_1) {
-			struct uac_output_terminal_descriptor_v1 *desc = p;
+			struct uac1_output_terminal_descriptor *desc = p;
 
 			if (desc->bLength < sizeof(*desc))
 				continue; /* invalid descriptor? */
-- 
cgit v1.2.3-70-g09d2


From 157a57b6fae7d3c6d24b7623dcc6679c6d244621 Mon Sep 17 00:00:00 2001
From: Daniel Mack <daniel@caiaq.de>
Date: Wed, 16 Jun 2010 17:57:30 +0200
Subject: ALSA: usb-audio: move and add some comments

Also add a list of open topics.

Signed-off-by: Daniel Mack <daniel@caiaq.de>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/linux/usb/audio-v2.h | 15 +++++++++++++++
 sound/usb/clock.c            | 16 ++++++++++++++--
 sound/usb/mixer.c            | 24 ++++++++++++++++--------
 3 files changed, 45 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/usb/audio-v2.h b/include/linux/usb/audio-v2.h
index 716aebe339e..964cb603f7c 100644
--- a/include/linux/usb/audio-v2.h
+++ b/include/linux/usb/audio-v2.h
@@ -18,6 +18,21 @@
 /* v1.0 and v2.0 of this standard have many things in common. For the rest
  * of the definitions, please refer to audio.h */
 
+/*
+ * bmControl field decoders
+ *
+ * From the USB Audio spec v2.0:
+ *
+ *   bmaControls() is a (ch+1)-element array of 4-byte bitmaps,
+ *   each containing a set of bit pairs. If a Control is present,
+ *   it must be Host readable. If a certain Control is not
+ *   present then the bit pair must be set to 0b00.
+ *   If a Control is present but read-only, the bit pair must be
+ *   set to 0b01. If a Control is also Host programmable, the bit
+ *   pair must be set to 0b11. The value 0b10 is not allowed.
+ *
+ */
+
 static inline bool uac2_control_is_readable(u32 bmControls, u8 control)
 {
 	return (bmControls >> (control * 2)) & 0x1;
diff --git a/sound/usb/clock.c b/sound/usb/clock.c
index 386b09c5ce7..7279d619087 100644
--- a/sound/usb/clock.c
+++ b/sound/usb/clock.c
@@ -120,8 +120,6 @@ static bool uac_clock_source_is_valid(struct snd_usb_audio *chip, int source_id)
 	return !!data;
 }
 
-/* Try to find the clock source ID of a given clock entity */
-
 static int __uac_clock_find_source(struct snd_usb_audio *chip,
 				   struct usb_host_interface *host_iface,
 				   int entity_id, unsigned long *visited)
@@ -154,6 +152,8 @@ static int __uac_clock_find_source(struct snd_usb_audio *chip,
 		if (ret < 0)
 			return ret;
 
+		/* Selector values are one-based */
+
 		if (ret > selector->bNrInPins || ret < 1) {
 			printk(KERN_ERR
 				"%s(): selector reported illegal value, id %d, ret %d\n",
@@ -176,6 +176,17 @@ static int __uac_clock_find_source(struct snd_usb_audio *chip,
 	return -EINVAL;
 }
 
+/*
+ * For all kinds of sample rate settings and other device queries,
+ * the clock source (end-leaf) must be used. However, clock selectors,
+ * clock multipliers and sample rate converters may be specified as
+ * clock source input to terminal. This functions walks the clock path
+ * to its end and tries to find the source.
+ *
+ * The 'visited' bitfield is used internally to detect recursive loops.
+ *
+ * Returns the clock source UnitID (>=0) on success, or an error.
+ */
 int snd_usb_clock_find_source(struct snd_usb_audio *chip,
 			      struct usb_host_interface *host_iface,
 			      int entity_id)
@@ -246,6 +257,7 @@ static int set_sample_rate_v2(struct snd_usb_audio *chip, int iface,
 		return clock;
 
 	if (!uac_clock_source_is_valid(chip, clock)) {
+		/* TODO: should we try to find valid clock setups by ourself? */
 		snd_printk(KERN_ERR "%d:%d:%d: clock source %d is not valid, cannot use\n",
 			   dev->devnum, iface, fmt->altsetting, clock);
 		return -ENXIO;
diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c
index ba54eb6bb0c..1163ec3ca8a 100644
--- a/sound/usb/mixer.c
+++ b/sound/usb/mixer.c
@@ -26,6 +26,22 @@
  *
  */
 
+/*
+ * TODOs, for both the mixer and the streaming interfaces:
+ *
+ *  - support for UAC2 effect units
+ *  - support for graphical equalizers
+ *  - RANGE and MEM set commands (UAC2)
+ *  - RANGE and MEM interrupt dispatchers (UAC2)
+ *  - audio channel clustering (UAC2)
+ *  - audio sample rate converter units (UAC2)
+ *  - proper handling of clock multipliers (UAC2)
+ *  - dispatch clock change notifications (UAC2)
+ *  	- stop PCM streams which use a clock that became invalid
+ *  	- stop PCM streams which use a clock selector that has changed
+ *  	- parse available sample rates again when clock sources changed
+ */
+
 #include <linux/bitops.h>
 #include <linux/init.h>
 #include <linux/list.h>
@@ -1199,14 +1215,6 @@ static int parse_audio_feature_unit(struct mixer_build *state, int unitid, void
 		}
 	} else { /* UAC_VERSION_2 */
 		for (i = 0; i < 30/2; i++) {
-			/* From the USB Audio spec v2.0:
-			   bmaControls() is a (ch+1)-element array of 4-byte bitmaps,
-			   each containing a set of bit pairs. If a Control is present,
-			   it must be Host readable. If a certain Control is not
-			   present then the bit pair must be set to 0b00.
-			   If a Control is present but read-only, the bit pair must be
-			   set to 0b01. If a Control is also Host programmable, the bit
-			   pair must be set to 0b11. The value 0b10 is not allowed. */
 			unsigned int ch_bits = 0;
 			unsigned int ch_read_only = 0;
 
-- 
cgit v1.2.3-70-g09d2


From 1dc8d8c06d4002be4d1373fc06f25cd589be47e1 Mon Sep 17 00:00:00 2001
From: "Justin P. Mattock" <justinmattock@gmail.com>
Date: Mon, 21 Jun 2010 13:45:42 -0700
Subject: net: Fix a typo in netlink.h

Fix a typo in include/net/netlink.h
should be finalize instead of finanlize

Signed-off-by: Justin P. Mattock <justinmattock@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netlink.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/netlink.h b/include/net/netlink.h
index 4fc05b58503..f3b201d335b 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -35,7 +35,7 @@
  *   nlmsg_new()			create a new netlink message
  *   nlmsg_put()			add a netlink message to an skb
  *   nlmsg_put_answer()			callback based nlmsg_put()
- *   nlmsg_end()			finanlize netlink message
+ *   nlmsg_end()			finalize netlink message
  *   nlmsg_get_pos()			return current position in message
  *   nlmsg_trim()			trim part of message
  *   nlmsg_cancel()			cancel message construction
-- 
cgit v1.2.3-70-g09d2


From 16b8a4761cbe5082cd35641c066d7c4b6b83cdca Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 22 Jun 2010 10:22:17 -0700
Subject: net: Introduce u64_stats_sync infrastructure

To properly implement 64bits network statistics on 32bit or 64bit hosts,
we provide one new type and four methods, to ease conversions.

Stats producer should use following template granted it already got an
exclusive access to counters (include/linux/u64_stats_sync.h contains
some documentation about details)

    u64_stats_update_begin(&stats->syncp);
    stats->bytes64 += len;
    stats->packets64++;
    u64_stats_update_end(&stats->syncp);

While a consumer should use following template to get consistent
snapshot :

    u64 tbytes, tpackets;
    unsigned int start;

    do {
        start = u64_stats_fetch_begin(&stats->syncp);
        tbytes = stats->bytes64;
        tpackets = stats->packets64;
    } while (u64_stats_fetch_retry(&stats->lock, syncp));

Suggested by David Miller, and comments courtesy of Nick Piggin.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/u64_stats_sync.h | 107 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 107 insertions(+)
 create mode 100644 include/linux/u64_stats_sync.h

(limited to 'include')

diff --git a/include/linux/u64_stats_sync.h b/include/linux/u64_stats_sync.h
new file mode 100644
index 00000000000..d0505156ed5
--- /dev/null
+++ b/include/linux/u64_stats_sync.h
@@ -0,0 +1,107 @@
+#ifndef _LINUX_U64_STATS_SYNC_H
+#define _LINUX_U64_STATS_SYNC_H
+
+/*
+ * To properly implement 64bits network statistics on 32bit and 64bit hosts,
+ * we provide a synchronization point, that is a noop on 64bit or UP kernels.
+ *
+ * Key points :
+ * 1) Use a seqcount on SMP 32bits, with low overhead.
+ * 2) Whole thing is a noop on 64bit arches or UP kernels.
+ * 3) Write side must ensure mutual exclusion or one seqcount update could
+ *    be lost, thus blocking readers forever.
+ *    If this synchronization point is not a mutex, but a spinlock or
+ *    spinlock_bh() or disable_bh() :
+ * 3.1) Write side should not sleep.
+ * 3.2) Write side should not allow preemption.
+ * 3.3) If applicable, interrupts should be disabled.
+ *
+ * 4) If reader fetches several counters, there is no guarantee the whole values
+ *    are consistent (remember point 1) : this is a noop on 64bit arches anyway)
+ *
+ * 5) readers are allowed to sleep or be preempted/interrupted : They perform
+ *    pure reads. But if they have to fetch many values, it's better to not allow
+ *    preemptions/interruptions to avoid many retries.
+ *
+ * Usage :
+ *
+ * Stats producer (writer) should use following template granted it already got
+ * an exclusive access to counters (a lock is already taken, or per cpu
+ * data is used [in a non preemptable context])
+ *
+ *   spin_lock_bh(...) or other synchronization to get exclusive access
+ *   ...
+ *   u64_stats_update_begin(&stats->syncp);
+ *   stats->bytes64 += len; // non atomic operation
+ *   stats->packets64++;    // non atomic operation
+ *   u64_stats_update_end(&stats->syncp);
+ *
+ * While a consumer (reader) should use following template to get consistent
+ * snapshot for each variable (but no guarantee on several ones)
+ *
+ * u64 tbytes, tpackets;
+ * unsigned int start;
+ *
+ * do {
+ *         start = u64_stats_fetch_begin(&stats->syncp);
+ *         tbytes = stats->bytes64; // non atomic operation
+ *         tpackets = stats->packets64; // non atomic operation
+ * } while (u64_stats_fetch_retry(&stats->lock, syncp));
+ *
+ *
+ * Example of use in drivers/net/loopback.c, using per_cpu containers,
+ * in BH disabled context.
+ */
+#include <linux/seqlock.h>
+
+#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
+struct u64_stats_sync {
+	seqcount_t	seq;
+};
+
+static void inline u64_stats_update_begin(struct u64_stats_sync *syncp)
+{
+	write_seqcount_begin(&syncp->seq);
+}
+
+static void inline u64_stats_update_end(struct u64_stats_sync *syncp)
+{
+	write_seqcount_end(&syncp->seq);
+}
+
+static unsigned int inline u64_stats_fetch_begin(const struct u64_stats_sync *syncp)
+{
+	return read_seqcount_begin(&syncp->seq);
+}
+
+static bool inline u64_stats_fetch_retry(const struct u64_stats_sync *syncp,
+					 unsigned int start)
+{
+	return read_seqcount_retry(&syncp->seq, start);
+}
+
+#else
+struct u64_stats_sync {
+};
+
+static void inline u64_stats_update_begin(struct u64_stats_sync *syncp)
+{
+}
+
+static void inline u64_stats_update_end(struct u64_stats_sync *syncp)
+{
+}
+
+static unsigned int inline u64_stats_fetch_begin(const struct u64_stats_sync *syncp)
+{
+	return 0;
+}
+
+static bool inline u64_stats_fetch_retry(const struct u64_stats_sync *syncp,
+					 unsigned int start)
+{
+	return false;
+}
+#endif
+
+#endif /* _LINUX_U64_STATS_SYNC_H */
-- 
cgit v1.2.3-70-g09d2


From 63a6404d8ae693e71ab27c4f9c4032aa29113e92 Mon Sep 17 00:00:00 2001
From: Henrik Rydberg <rydberg@euromail.se>
Date: Thu, 10 Jun 2010 12:05:24 -0700
Subject: Input: evdev - use driver hint to compute size of event buffer

Some devices, in particular MT devices, produce a lot of data.  This
may lead to overflowing of the event queues in evdev driver, which
by default are fairly small. Let the drivers hint the average number
of events per packet generated by the device, and use that information
when computing the buffer size evdev should use for the device.

Signed-off-by: Henrik Rydberg <rydberg@euromail.se>
Acked-by: Chase Douglas <chase.douglas@canonical.com>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/evdev.c |  9 +++++++--
 include/linux/input.h | 21 +++++++++++++++++++++
 2 files changed, 28 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c
index cff7bf9351a..30836c05edd 100644
--- a/drivers/input/evdev.c
+++ b/drivers/input/evdev.c
@@ -10,7 +10,8 @@
 
 #define EVDEV_MINOR_BASE	64
 #define EVDEV_MINORS		32
-#define EVDEV_MIN_BUFFER_SIZE	64
+#define EVDEV_MIN_BUFFER_SIZE	64U
+#define EVDEV_BUF_PACKETS	8
 
 #include <linux/poll.h>
 #include <linux/sched.h>
@@ -245,7 +246,11 @@ static int evdev_release(struct inode *inode, struct file *file)
 
 static unsigned int evdev_compute_buffer_size(struct input_dev *dev)
 {
-	return EVDEV_MIN_BUFFER_SIZE;
+	unsigned int n_events =
+		max(dev->hint_events_per_packet * EVDEV_BUF_PACKETS,
+		    EVDEV_MIN_BUFFER_SIZE);
+
+	return roundup_pow_of_two(n_events);
 }
 
 static int evdev_open(struct inode *inode, struct file *file)
diff --git a/include/linux/input.h b/include/linux/input.h
index 6fcc9101bee..cc524c8b670 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -1063,6 +1063,10 @@ struct ff_effect {
  * @sndbit: bitmap of sound effects supported by the device
  * @ffbit: bitmap of force feedback effects supported by the device
  * @swbit: bitmap of switches present on the device
+ * @hint_events_per_packet: average number of events generated by the
+ *	device in a packet (between EV_SYN/SYN_REPORT events). Used by
+ *	event handlers to estimate size of the buffer needed to hold
+ *	events.
  * @keycodemax: size of keycode table
  * @keycodesize: size of elements in keycode table
  * @keycode: map of scancodes to keycodes for this device
@@ -1140,6 +1144,8 @@ struct input_dev {
 	unsigned long ffbit[BITS_TO_LONGS(FF_CNT)];
 	unsigned long swbit[BITS_TO_LONGS(SW_CNT)];
 
+	unsigned int hint_events_per_packet;
+
 	unsigned int keycodemax;
 	unsigned int keycodesize;
 	void *keycode;
@@ -1408,6 +1414,21 @@ static inline void input_mt_sync(struct input_dev *dev)
 
 void input_set_capability(struct input_dev *dev, unsigned int type, unsigned int code);
 
+/**
+ * input_set_events_per_packet - tell handlers about the driver event rate
+ * @dev: the input device used by the driver
+ * @n_events: the average number of events between calls to input_sync()
+ *
+ * If the event rate sent from a device is unusually large, use this
+ * function to set the expected event rate. This will allow handlers
+ * to set up an appropriate buffer size for the event stream, in order
+ * to minimize information loss.
+ */
+static inline void input_set_events_per_packet(struct input_dev *dev, int n_events)
+{
+	dev->hint_events_per_packet = n_events;
+}
+
 static inline void input_set_abs_params(struct input_dev *dev, int axis, int min, int max, int fuzz, int flat)
 {
 	dev->absmin[axis] = min;
-- 
cgit v1.2.3-70-g09d2


From 7b2ff18ee7b0ec4bc3162f821e221781aaca48bd Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Tue, 15 Jun 2010 01:07:31 +0000
Subject: net - IP_NODEFRAG option for IPv4 socket

this patch is implementing IP_NODEFRAG option for IPv4 socket.
The reason is, there's no other way to send out the packet with user
customized header of the reassembly part.

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/in.h                  | 1 +
 include/net/inet_sock.h             | 3 ++-
 net/ipv4/af_inet.c                  | 2 ++
 net/ipv4/ip_sockglue.c              | 9 ++++++++-
 net/ipv4/netfilter/nf_defrag_ipv4.c | 5 +++++
 5 files changed, 18 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/in.h b/include/linux/in.h
index 583c76f9c30..41d88a4689a 100644
--- a/include/linux/in.h
+++ b/include/linux/in.h
@@ -85,6 +85,7 @@ struct in_addr {
 #define IP_RECVORIGDSTADDR   IP_ORIGDSTADDR
 
 #define IP_MINTTL       21
+#define IP_NODEFRAG     22
 
 /* IP_MTU_DISCOVER values */
 #define IP_PMTUDISC_DONT		0	/* Never send DF frames */
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index 1653de515ce..1989cfd7405 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -137,7 +137,8 @@ struct inet_sock {
 				hdrincl:1,
 				mc_loop:1,
 				transparent:1,
-				mc_all:1;
+				mc_all:1,
+				nodefrag:1;
 	int			mc_index;
 	__be32			mc_addr;
 	struct ip_mc_socklist	*mc_list;
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index d99e7e02018..b4c0969137c 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -355,6 +355,8 @@ lookup_protocol:
 	inet = inet_sk(sk);
 	inet->is_icsk = (INET_PROTOSW_ICSK & answer_flags) != 0;
 
+	inet->nodefrag = 0;
+
 	if (SOCK_RAW == sock->type) {
 		inet->inet_num = protocol;
 		if (IPPROTO_RAW == protocol)
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 47fff528ff3..6c40a8c46e7 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -465,7 +465,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
 			     (1<<IP_MTU_DISCOVER) | (1<<IP_RECVERR) |
 			     (1<<IP_ROUTER_ALERT) | (1<<IP_FREEBIND) |
 			     (1<<IP_PASSSEC) | (1<<IP_TRANSPARENT) |
-			     (1<<IP_MINTTL))) ||
+			     (1<<IP_MINTTL) | (1<<IP_NODEFRAG))) ||
 	    optname == IP_MULTICAST_TTL ||
 	    optname == IP_MULTICAST_ALL ||
 	    optname == IP_MULTICAST_LOOP ||
@@ -588,6 +588,13 @@ static int do_ip_setsockopt(struct sock *sk, int level,
 		}
 		inet->hdrincl = val ? 1 : 0;
 		break;
+	case IP_NODEFRAG:
+		if (sk->sk_type != SOCK_RAW) {
+			err = -ENOPROTOOPT;
+			break;
+		}
+		inet->nodefrag = val ? 1 : 0;
+		break;
 	case IP_MTU_DISCOVER:
 		if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_PROBE)
 			goto e_inval;
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
index cb763ae9ed9..eab8de32f20 100644
--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -66,6 +66,11 @@ static unsigned int ipv4_conntrack_defrag(unsigned int hooknum,
 					  const struct net_device *out,
 					  int (*okfn)(struct sk_buff *))
 {
+	struct inet_sock *inet = inet_sk(skb->sk);
+
+	if (inet && inet->nodefrag)
+		return NF_ACCEPT;
+
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 #if !defined(CONFIG_NF_NAT) && !defined(CONFIG_NF_NAT_MODULE)
 	/* Previously seen (loopback)?  Ignore.  Do this before
-- 
cgit v1.2.3-70-g09d2


From fa61cf70a6ae1089e459e4b59b2e8d8e90d8535e Mon Sep 17 00:00:00 2001
From: Juuso Oikarinen <juuso.oikarinen@nokia.com>
Date: Wed, 23 Jun 2010 12:12:37 +0300
Subject: cfg80211/mac80211: Update set_tx_power to use mBm instead of dBm
 units

In preparation for a TX power setting interface in the nl80211, change the
.set_tx_power function to use mBm units instead of dBm for greater accuracy and
smaller power levels.

Also, already in advance move the tx_power_setting enumeration to nl80211.

This change affects the .tx_set_power function prototype. As a result, the
corresponding changes are needed to modules using it. These are mac80211,
iwmc3200wifi and rndis_wlan.

Cc: Samuel Ortiz <samuel.ortiz@intel.com>
Cc: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
Signed-off-by: Juuso Oikarinen <juuso.oikarinen@nokia.com>
Acked-by: Samuel Ortiz <samuel.ortiz@intel.com>
Acked-by: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/iwmc3200wifi/cfg80211.c | 12 ++++++++----
 drivers/net/wireless/rndis_wlan.c            | 20 +++++++++++++-------
 include/linux/nl80211.h                      | 13 +++++++++++++
 include/net/cfg80211.h                       | 15 +--------------
 net/mac80211/cfg.c                           | 22 +++++++++++-----------
 net/wireless/wext-compat.c                   | 10 +++++-----
 6 files changed, 51 insertions(+), 41 deletions(-)

(limited to 'include')

diff --git a/drivers/net/wireless/iwmc3200wifi/cfg80211.c b/drivers/net/wireless/iwmc3200wifi/cfg80211.c
index 902e95f70f6..60619678f4e 100644
--- a/drivers/net/wireless/iwmc3200wifi/cfg80211.c
+++ b/drivers/net/wireless/iwmc3200wifi/cfg80211.c
@@ -670,20 +670,24 @@ static int iwm_cfg80211_disconnect(struct wiphy *wiphy, struct net_device *dev,
 }
 
 static int iwm_cfg80211_set_txpower(struct wiphy *wiphy,
-				    enum tx_power_setting type, int dbm)
+				    enum nl80211_tx_power_setting type, int mbm)
 {
 	struct iwm_priv *iwm = wiphy_to_iwm(wiphy);
 	int ret;
 
 	switch (type) {
-	case TX_POWER_AUTOMATIC:
+	case NL80211_TX_POWER_AUTOMATIC:
 		return 0;
-	case TX_POWER_FIXED:
+	case NL80211_TX_POWER_FIXED:
+		if (mbm < 0 || (mbm % 100))
+			return -EOPNOTSUPP;
+
 		if (!test_bit(IWM_STATUS_READY, &iwm->status))
 			return 0;
 
 		ret = iwm_umac_set_config_fix(iwm, UMAC_PARAM_TBL_CFG_FIX,
-					      CFG_TX_PWR_LIMIT_USR, dbm * 2);
+					      CFG_TX_PWR_LIMIT_USR,
+					      MBM_TO_DBM(mbm) * 2);
 		if (ret < 0)
 			return ret;
 
diff --git a/drivers/net/wireless/rndis_wlan.c b/drivers/net/wireless/rndis_wlan.c
index 4102cca5488..5e26edb57d8 100644
--- a/drivers/net/wireless/rndis_wlan.c
+++ b/drivers/net/wireless/rndis_wlan.c
@@ -520,8 +520,9 @@ static int rndis_scan(struct wiphy *wiphy, struct net_device *dev,
 
 static int rndis_set_wiphy_params(struct wiphy *wiphy, u32 changed);
 
-static int rndis_set_tx_power(struct wiphy *wiphy, enum tx_power_setting type,
-				int dbm);
+static int rndis_set_tx_power(struct wiphy *wiphy,
+			      enum nl80211_tx_power_setting type,
+			      int mbm);
 static int rndis_get_tx_power(struct wiphy *wiphy, int *dbm);
 
 static int rndis_connect(struct wiphy *wiphy, struct net_device *dev,
@@ -1856,20 +1857,25 @@ static int rndis_set_wiphy_params(struct wiphy *wiphy, u32 changed)
 	return 0;
 }
 
-static int rndis_set_tx_power(struct wiphy *wiphy, enum tx_power_setting type,
-				int dbm)
+static int rndis_set_tx_power(struct wiphy *wiphy,
+			      enum nl80211_tx_power_setting type,
+			      int mbm)
 {
 	struct rndis_wlan_private *priv = wiphy_priv(wiphy);
 	struct usbnet *usbdev = priv->usbdev;
 
-	netdev_dbg(usbdev->net, "%s(): type:0x%x dbm:%i\n",
-		   __func__, type, dbm);
+	netdev_dbg(usbdev->net, "%s(): type:0x%x mbm:%i\n",
+		   __func__, type, mbm);
+
+	if (mbm < 0 || (mbm % 100))
+		return -ENOTSUPP;
 
 	/* Device doesn't support changing txpower after initialization, only
 	 * turn off/on radio. Support 'auto' mode and setting same dBm that is
 	 * currently used.
 	 */
-	if (type == TX_POWER_AUTOMATIC || dbm == get_bcm4320_power_dbm(priv)) {
+	if (type == NL80211_TX_POWER_AUTOMATIC ||
+	    MBM_TO_DBM(mbm) == get_bcm4320_power_dbm(priv)) {
 		if (!priv->radio_on)
 			disassociate(usbdev, true); /* turn on radio */
 
diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 64fb32b93a2..07aa04693f9 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -1659,4 +1659,17 @@ enum nl80211_cqm_rssi_threshold_event {
 	NL80211_CQM_RSSI_THRESHOLD_EVENT_HIGH,
 };
 
+
+/**
+ * enum nl80211_tx_power_setting - TX power adjustment
+ * @NL80211_TX_POWER_AUTOMATIC: automatically determine transmit power
+ * @NL80211_TX_POWER_LIMITED: limit TX power by the mBm parameter
+ * @NL80211_TX_POWER_FIXED: fix TX power to the mBm parameter
+ */
+enum nl80211_tx_power_setting {
+	NL80211_TX_POWER_AUTOMATIC,
+	NL80211_TX_POWER_LIMITED,
+	NL80211_TX_POWER_FIXED,
+};
+
 #endif /* __LINUX_NL80211_H */
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 64374f4cb7c..9b8b3f486ec 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -875,19 +875,6 @@ enum wiphy_params_flags {
 	WIPHY_PARAM_COVERAGE_CLASS	= 1 << 4,
 };
 
-/**
- * enum tx_power_setting - TX power adjustment
- *
- * @TX_POWER_AUTOMATIC: the dbm parameter is ignored
- * @TX_POWER_LIMITED: limit TX power by the dbm parameter
- * @TX_POWER_FIXED: fix TX power to the dbm parameter
- */
-enum tx_power_setting {
-	TX_POWER_AUTOMATIC,
-	TX_POWER_LIMITED,
-	TX_POWER_FIXED,
-};
-
 /*
  * cfg80211_bitrate_mask - masks for bitrate control
  */
@@ -1149,7 +1136,7 @@ struct cfg80211_ops {
 	int	(*set_wiphy_params)(struct wiphy *wiphy, u32 changed);
 
 	int	(*set_tx_power)(struct wiphy *wiphy,
-				enum tx_power_setting type, int dbm);
+				enum nl80211_tx_power_setting type, int mbm);
 	int	(*get_tx_power)(struct wiphy *wiphy, int *dbm);
 
 	int	(*set_wds_peer)(struct wiphy *wiphy, struct net_device *dev,
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 003b6addf5f..f4efbfa4f23 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1329,28 +1329,28 @@ static int ieee80211_set_wiphy_params(struct wiphy *wiphy, u32 changed)
 }
 
 static int ieee80211_set_tx_power(struct wiphy *wiphy,
-				  enum tx_power_setting type, int dbm)
+				  enum nl80211_tx_power_setting type, int mbm)
 {
 	struct ieee80211_local *local = wiphy_priv(wiphy);
 	struct ieee80211_channel *chan = local->hw.conf.channel;
 	u32 changes = 0;
 
 	switch (type) {
-	case TX_POWER_AUTOMATIC:
+	case NL80211_TX_POWER_AUTOMATIC:
 		local->user_power_level = -1;
 		break;
-	case TX_POWER_LIMITED:
-		if (dbm < 0)
-			return -EINVAL;
-		local->user_power_level = dbm;
+	case NL80211_TX_POWER_LIMITED:
+		if (mbm < 0 || (mbm % 100))
+			return -EOPNOTSUPP;
+		local->user_power_level = MBM_TO_DBM(mbm);
 		break;
-	case TX_POWER_FIXED:
-		if (dbm < 0)
-			return -EINVAL;
+	case NL80211_TX_POWER_FIXED:
+		if (mbm < 0 || (mbm % 100))
+			return -EOPNOTSUPP;
 		/* TODO: move to cfg80211 when it knows the channel */
-		if (dbm > chan->max_power)
+		if (MBM_TO_DBM(mbm) > chan->max_power)
 			return -EINVAL;
-		local->user_power_level = dbm;
+		local->user_power_level = MBM_TO_DBM(mbm);
 		break;
 	}
 
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index 96342993cf9..1ff1e9f4913 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -829,7 +829,7 @@ int cfg80211_wext_siwtxpower(struct net_device *dev,
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
-	enum tx_power_setting type;
+	enum nl80211_tx_power_setting type;
 	int dbm = 0;
 
 	if ((data->txpower.flags & IW_TXPOW_TYPE) != IW_TXPOW_DBM)
@@ -852,7 +852,7 @@ int cfg80211_wext_siwtxpower(struct net_device *dev,
 			if (data->txpower.value < 0)
 				return -EINVAL;
 			dbm = data->txpower.value;
-			type = TX_POWER_FIXED;
+			type = NL80211_TX_POWER_FIXED;
 			/* TODO: do regulatory check! */
 		} else {
 			/*
@@ -860,10 +860,10 @@ int cfg80211_wext_siwtxpower(struct net_device *dev,
 			 * passed in from userland.
 			 */
 			if (data->txpower.value < 0) {
-				type = TX_POWER_AUTOMATIC;
+				type = NL80211_TX_POWER_AUTOMATIC;
 			} else {
 				dbm = data->txpower.value;
-				type = TX_POWER_LIMITED;
+				type = NL80211_TX_POWER_LIMITED;
 			}
 		}
 	} else {
@@ -872,7 +872,7 @@ int cfg80211_wext_siwtxpower(struct net_device *dev,
 		return 0;
 	}
 
-	return rdev->ops->set_tx_power(wdev->wiphy, type, dbm);
+	return rdev->ops->set_tx_power(wdev->wiphy, type, DBM_TO_MBM(dbm));
 }
 EXPORT_SYMBOL_GPL(cfg80211_wext_siwtxpower);
 
-- 
cgit v1.2.3-70-g09d2


From 98d2ff8bec82fc35fe2008a187a5fef9241dab10 Mon Sep 17 00:00:00 2001
From: Juuso Oikarinen <juuso.oikarinen@nokia.com>
Date: Wed, 23 Jun 2010 12:12:38 +0300
Subject: nl80211: Add option to adjust transmit power

This patch adds transmit power setting type and transmit power level attributes
to NL80211_CMD_SET_WIPHY in order to facilitate adjusting of the transmit power
level of the device.

The added attributes allow selection of automatic, limited or fixed transmit
power level, with the level definable in signed mBm format.

Signed-off-by: Juuso Oikarinen <juuso.oikarinen@nokia.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h |  9 +++++++++
 net/wireless/nl80211.c  | 31 +++++++++++++++++++++++++++++++
 2 files changed, 40 insertions(+)

(limited to 'include')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 07aa04693f9..2c870168733 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -725,6 +725,12 @@ enum nl80211_commands {
  * @NL80211_ATTR_AP_ISOLATE: (AP mode) Do not forward traffic between stations
  *	connected to this BSS.
  *
+ * @NL80211_ATTR_WIPHY_TX_POWER_SETTING: Transmit power setting type. See
+ *      &enum nl80211_tx_power_setting for possible values.
+ * @NL80211_ATTR_WIPHY_TX_POWER_LEVEL: Transmit power level in signed mBm units.
+ *      This is used in association with @NL80211_ATTR_WIPHY_TX_POWER_SETTING
+ *      for non-automatic settings.
+ *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
  */
@@ -882,6 +888,9 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_AP_ISOLATE,
 
+	NL80211_ATTR_WIPHY_TX_POWER_SETTING,
+	NL80211_ATTR_WIPHY_TX_POWER_LEVEL,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 41529aca794..a999fc15462 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -153,6 +153,9 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
 	[NL80211_ATTR_CQM] = { .type = NLA_NESTED, },
 	[NL80211_ATTR_LOCAL_STATE_CHANGE] = { .type = NLA_FLAG },
 	[NL80211_ATTR_AP_ISOLATE] = { .type = NLA_U8 },
+
+	[NL80211_ATTR_WIPHY_TX_POWER_SETTING] = { .type = NLA_U32 },
+	[NL80211_ATTR_WIPHY_TX_POWER_LEVEL] = { .type = NLA_U32 },
 };
 
 /* policy for the attributes */
@@ -869,6 +872,34 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
 			goto bad_res;
 	}
 
+	if (info->attrs[NL80211_ATTR_WIPHY_TX_POWER_SETTING]) {
+		enum nl80211_tx_power_setting type;
+		int idx, mbm = 0;
+
+		if (!rdev->ops->set_tx_power) {
+			return -EOPNOTSUPP;
+			goto bad_res;
+		}
+
+		idx = NL80211_ATTR_WIPHY_TX_POWER_SETTING;
+		type = nla_get_u32(info->attrs[idx]);
+
+		if (!info->attrs[NL80211_ATTR_WIPHY_TX_POWER_LEVEL] &&
+		    (type != NL80211_TX_POWER_AUTOMATIC)) {
+			result = -EINVAL;
+			goto bad_res;
+		}
+
+		if (type != NL80211_TX_POWER_AUTOMATIC) {
+			idx = NL80211_ATTR_WIPHY_TX_POWER_LEVEL;
+			mbm = nla_get_u32(info->attrs[idx]);
+		}
+
+		result = rdev->ops->set_tx_power(&rdev->wiphy, type, mbm);
+		if (result)
+			goto bad_res;
+	}
+
 	changed = 0;
 
 	if (info->attrs[NL80211_ATTR_WIPHY_RETRY_SHORT]) {
-- 
cgit v1.2.3-70-g09d2


From 45a73372efe4a63f44aa2e1125d4a777c2fdc8d8 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Wed, 23 Jun 2010 23:00:37 +0200
Subject: hw_breakpoints: Fix per task breakpoint tracking

Freeing a perf event can happen in several ways. A task
calls perf_event_exit_task() right before exiting. This helper
will detach all the events from the task context and queue their
removal through free_event() if they are child tasks. The task
also loses its context reference there.

Releasing the breakpoint slot from the constraint table is made
from free_event() that calls release_bp_slot(). We count the number
of breakpoints this task is running by looking at the task's
perf_event_ctxp and iterating through its attached events.
But at this time, the reference to this context has been cleaned up
already.

So looking at the event->ctx instead of task->perf_event_ctxp
to count the remaining breakpoints should solve the problem.
At least it would for child breakpoints, but not for parent ones.
If the parent exits before the child, it will remove all its
events from the context but free_event() will be called later,
on fd release time. And checking the number of breakpoints the
task has attached to its context at this time is unreliable as all
events have been removed from the context.

To solve this, we keep track of the list of per task breakpoints.
On top of it, we maintain our array of numbers of breakpoints used
by the tasks. We use the context address as a task id.

So, instead of looking at the number of events attached to a context,
we walk through our list of per task breakpoints and count the number
of breakpoints that use the same ctx than the one to be reserved or
released from the constraint table, and update the count on top of this
result.

In the meantime it solves a bad refcounting, it also solves a warning,
reported by Paul.

Badness at /home/paulus/kernel/perf/kernel/hw_breakpoint.c:114
NIP: c0000000000cb470 LR: c0000000000cb46c CTR: c00000000032d9b8
REGS: c000000118e7b570 TRAP: 0700   Not tainted  (2.6.35-rc3-perf-00008-g76b0f13
)
MSR: 9000000000029032 <EE,ME,CE,IR,DR>  CR: 44004424  XER: 000fffff
TASK = c0000001187dcad0[3143] 'perf' THREAD: c000000118e78000 CPU: 1
GPR00: c0000000000cb46c c000000118e7b7f0 c0000000009866a0 0000000000000020
GPR04: 0000000000000000 000000000000001d 0000000000000000 0000000000000001
GPR08: c0000000009bed68 c00000000086dff8 c000000000a5bf10 0000000000000001
GPR12: 0000000024004422 c00000000ffff200 0000000000000000 0000000000000000
GPR16: 0000000000000000 0000000000000000 0000000000000018 00000000101150f4
GPR20: 0000000010206b40 0000000000000000 0000000000000000 00000000101150f4
GPR24: c0000001199090c0 0000000000000001 0000000000000000 0000000000000001
GPR28: 0000000000000000 0000000000000000 c0000000008ec290 0000000000000000
NIP [c0000000000cb470] .task_bp_pinned+0x5c/0x12c
LR [c0000000000cb46c] .task_bp_pinned+0x58/0x12c
Call Trace:
[c000000118e7b7f0] [c0000000000cb46c] .task_bp_pinned+0x58/0x12c (unreliable)
[c000000118e7b8a0] [c0000000000cb584] .toggle_bp_task_slot+0x44/0xe4
[c000000118e7b940] [c0000000000cb6c8] .toggle_bp_slot+0xa4/0x164
[c000000118e7b9f0] [c0000000000cbafc] .release_bp_slot+0x44/0x6c
[c000000118e7ba80] [c0000000000c4178] .bp_perf_event_destroy+0x10/0x24
[c000000118e7bb00] [c0000000000c4aec] .free_event+0x180/0x1bc
[c000000118e7bbc0] [c0000000000c54c4] .perf_event_release_kernel+0x14c/0x170

Reported-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Prasad <prasad@linux.vnet.ibm.com>
Cc: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Jason Wessel <jason.wessel@windriver.com>
---
 include/linux/perf_event.h |  6 ++--
 kernel/hw_breakpoint.c     | 78 ++++++++++++++++++++++++----------------------
 2 files changed, 45 insertions(+), 39 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 63b5aa5dce6..0dd5f8ad77a 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -533,8 +533,10 @@ struct hw_perf_event {
 			struct hrtimer	hrtimer;
 		};
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
-		/* breakpoint */
-		struct arch_hw_breakpoint	info;
+		struct { /* breakpoint */
+			struct arch_hw_breakpoint	info;
+			struct list_head		bp_list;
+		};
 #endif
 	};
 	local64_t			prev_count;
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c
index 7a56b22e060..e34d94d5092 100644
--- a/kernel/hw_breakpoint.c
+++ b/kernel/hw_breakpoint.c
@@ -41,6 +41,7 @@
 #include <linux/sched.h>
 #include <linux/init.h>
 #include <linux/slab.h>
+#include <linux/list.h>
 #include <linux/cpu.h>
 #include <linux/smp.h>
 
@@ -62,6 +63,9 @@ static DEFINE_PER_CPU(unsigned int, nr_bp_flexible[TYPE_MAX]);
 
 static int nr_slots[TYPE_MAX];
 
+/* Keep track of the breakpoints attached to tasks */
+static LIST_HEAD(bp_task_head);
+
 static int constraints_initialized;
 
 /* Gather the number of total pinned and un-pinned bp in a cpuset */
@@ -103,33 +107,21 @@ static unsigned int max_task_bp_pinned(int cpu, enum bp_type_idx type)
 	return 0;
 }
 
-static int task_bp_pinned(struct task_struct *tsk, enum bp_type_idx type)
+/*
+ * Count the number of breakpoints of the same type and same task.
+ * The given event must be not on the list.
+ */
+static int task_bp_pinned(struct perf_event *bp, enum bp_type_idx type)
 {
-	struct perf_event_context *ctx = tsk->perf_event_ctxp;
-	struct list_head *list;
-	struct perf_event *bp;
-	unsigned long flags;
+	struct perf_event_context *ctx = bp->ctx;
+	struct perf_event *iter;
 	int count = 0;
 
-	if (WARN_ONCE(!ctx, "No perf context for this task"))
-		return 0;
-
-	list = &ctx->event_list;
-
-	raw_spin_lock_irqsave(&ctx->lock, flags);
-
-	/*
-	 * The current breakpoint counter is not included in the list
-	 * at the open() callback time
-	 */
-	list_for_each_entry(bp, list, event_entry) {
-		if (bp->attr.type == PERF_TYPE_BREAKPOINT)
-			if (find_slot_idx(bp) == type)
-				count += hw_breakpoint_weight(bp);
+	list_for_each_entry(iter, &bp_task_head, hw.bp_list) {
+		if (iter->ctx == ctx && find_slot_idx(iter) == type)
+			count += hw_breakpoint_weight(iter);
 	}
 
-	raw_spin_unlock_irqrestore(&ctx->lock, flags);
-
 	return count;
 }
 
@@ -149,7 +141,7 @@ fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp,
 		if (!tsk)
 			slots->pinned += max_task_bp_pinned(cpu, type);
 		else
-			slots->pinned += task_bp_pinned(tsk, type);
+			slots->pinned += task_bp_pinned(bp, type);
 		slots->flexible = per_cpu(nr_bp_flexible[type], cpu);
 
 		return;
@@ -162,7 +154,7 @@ fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp,
 		if (!tsk)
 			nr += max_task_bp_pinned(cpu, type);
 		else
-			nr += task_bp_pinned(tsk, type);
+			nr += task_bp_pinned(bp, type);
 
 		if (nr > slots->pinned)
 			slots->pinned = nr;
@@ -188,7 +180,7 @@ fetch_this_slot(struct bp_busy_slots *slots, int weight)
 /*
  * Add a pinned breakpoint for the given task in our constraint table
  */
-static void toggle_bp_task_slot(struct task_struct *tsk, int cpu, bool enable,
+static void toggle_bp_task_slot(struct perf_event *bp, int cpu, bool enable,
 				enum bp_type_idx type, int weight)
 {
 	unsigned int *tsk_pinned;
@@ -196,10 +188,11 @@ static void toggle_bp_task_slot(struct task_struct *tsk, int cpu, bool enable,
 	int old_idx = 0;
 	int idx = 0;
 
-	old_count = task_bp_pinned(tsk, type);
+	old_count = task_bp_pinned(bp, type);
 	old_idx = old_count - 1;
 	idx = old_idx + weight;
 
+	/* tsk_pinned[n] is the number of tasks having n breakpoints */
 	tsk_pinned = per_cpu(nr_task_bp_pinned[type], cpu);
 	if (enable) {
 		tsk_pinned[idx]++;
@@ -222,23 +215,30 @@ toggle_bp_slot(struct perf_event *bp, bool enable, enum bp_type_idx type,
 	int cpu = bp->cpu;
 	struct task_struct *tsk = bp->ctx->task;
 
+	/* Pinned counter cpu profiling */
+	if (!tsk) {
+
+		if (enable)
+			per_cpu(nr_cpu_bp_pinned[type], bp->cpu) += weight;
+		else
+			per_cpu(nr_cpu_bp_pinned[type], bp->cpu) -= weight;
+		return;
+	}
+
 	/* Pinned counter task profiling */
-	if (tsk) {
-		if (cpu >= 0) {
-			toggle_bp_task_slot(tsk, cpu, enable, type, weight);
-			return;
-		}
 
+	if (!enable)
+		list_del(&bp->hw.bp_list);
+
+	if (cpu >= 0) {
+		toggle_bp_task_slot(bp, cpu, enable, type, weight);
+	} else {
 		for_each_online_cpu(cpu)
-			toggle_bp_task_slot(tsk, cpu, enable, type, weight);
-		return;
+			toggle_bp_task_slot(bp, cpu, enable, type, weight);
 	}
 
-	/* Pinned counter cpu profiling */
 	if (enable)
-		per_cpu(nr_cpu_bp_pinned[type], bp->cpu) += weight;
-	else
-		per_cpu(nr_cpu_bp_pinned[type], bp->cpu) -= weight;
+		list_add_tail(&bp->hw.bp_list, &bp_task_head);
 }
 
 /*
@@ -301,6 +301,10 @@ static int __reserve_bp_slot(struct perf_event *bp)
 	weight = hw_breakpoint_weight(bp);
 
 	fetch_bp_busy_slots(&slots, bp, type);
+	/*
+	 * Simulate the addition of this breakpoint to the constraints
+	 * and see the result.
+	 */
 	fetch_this_slot(&slots, weight);
 
 	/* Flexible counters need to keep at least one slot */
-- 
cgit v1.2.3-70-g09d2


From 5cfaf214856eb934759ae500a0b812dd06a00bd9 Mon Sep 17 00:00:00 2001
From: Nobuhiro Iwamatsu <nobuhiro.iwamatsu.yj@renesas.com>
Date: Wed, 23 Jun 2010 09:17:53 +0900
Subject: perf: Fix argument of perf_arch_fetch_caller_regs

"struct regs" was set to argument of perf_arch_fetch_caller_regs
off-case. It should be "struct pt_regs".

This fixes various build errors in archs that have CONFIG_PERF_EVENTS=y
but no overriden implementation of perf_arch_fetch_caller_regs.

cc1: warnings being treated as errors
In file included from include/linux/ftrace_event.h:8,
                 from include/trace/syscall.h:6,
                 from include/linux/syscalls.h:75,
                 from arch/sh/kernel/sys_sh32.c:9:
include/linux/perf_event.h:937: error: 'struct regs' declared inside parameter list
include/linux/perf_event.h:937: error: its scope is only this definition or declaration, which is probably not what you want
include/linux/perf_event.h: In function 'perf_fetch_caller_regs':
include/linux/perf_event.h:952: error: passing argument 1 of 'perf_arch_fetch_caller_regs' from incompatible pointer type

Signed-off-by: Nobuhiro Iwamatsu <nobuhiro.iwamatsu.yj@renesas.com>
Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Paul Mackerras <paulus@samba.org>
Cc: David Miller <davem@davemloft.net>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <AANLkTinKKFKEBQrZ3Hkj-XCaMwaTqulb-XnFzqEYiFRr@mail.gmail.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 include/linux/perf_event.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 0dd5f8ad77a..937495c2507 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -936,7 +936,7 @@ extern void __perf_sw_event(u32, u64, int, struct pt_regs *, u64);
 
 #ifndef perf_arch_fetch_caller_regs
 static inline void
-perf_arch_fetch_caller_regs(struct regs *regs, unsigned long ip) { }
+perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip) { }
 #endif
 
 /*
-- 
cgit v1.2.3-70-g09d2


From 69a4af606ed4836faa2ec69b1d217f384b8235e7 Mon Sep 17 00:00:00 2001
From: Xiaolong CHEN <a21785@motorola.com>
Date: Thu, 24 Jun 2010 19:10:40 -0700
Subject: Input: adp5588-keys - support GPI events for ADP5588 devices

A column or row configured as a GPI can be programmed to be part
of the key event table and therefore also capable of generating a
key event interrupt. A key event interrupt caused by a GPI follows
the same process flow as a key event interrupt caused by a key
press. GPIs configured as part of the key event table allow single
key switches and other GPI interrupts to be monitored. As part of
the event table, GPIs are represented by the decimal value 97 (0x61
or 1100001) through the decimal value 114 (0x72 or 1110010). See
table below for GPI event number assignments for rows and columns.

GPI Event Number Assignments for Rows
Row0 Row1 Row2 Row3 Row4 Row5 Row6 Row7
97   98   99   100  101  102  103  104

GPI Event Number Assignments for Cols
Col0 Col1 Col2 Col3 Col4 Col5 Col6 Col7 Col8 Col9
105  106  107  108  109  110  111  112  113  114

Signed-off-by: Xiaolong Chen <xiao-long.chen@motorola.com>
Signed-off-by: Yuanbo Ye <yuan-bo.ye@motorola.com>
Signed-off-by: Tao Hu <taohu@motorola.com>
Acked-by: Michael Hennerich <michael.hennerich@analog.com>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/keyboard/adp5588-keys.c | 134 ++++++++++++++++++++++++++++++++--
 include/linux/i2c/adp5588.h           |  36 +++++++++
 2 files changed, 163 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/drivers/input/keyboard/adp5588-keys.c b/drivers/input/keyboard/adp5588-keys.c
index 4771ab172b5..4ef789ef104 100644
--- a/drivers/input/keyboard/adp5588-keys.c
+++ b/drivers/input/keyboard/adp5588-keys.c
@@ -67,6 +67,8 @@ struct adp5588_kpad {
 	struct delayed_work work;
 	unsigned long delay;
 	unsigned short keycode[ADP5588_KEYMAPSIZE];
+	const struct adp5588_gpi_map *gpimap;
+	unsigned short gpimapsize;
 };
 
 static int adp5588_read(struct i2c_client *client, u8 reg)
@@ -84,12 +86,37 @@ static int adp5588_write(struct i2c_client *client, u8 reg, u8 val)
 	return i2c_smbus_write_byte_data(client, reg, val);
 }
 
+static void adp5588_report_events(struct adp5588_kpad *kpad, int ev_cnt)
+{
+	int i, j;
+
+	for (i = 0; i < ev_cnt; i++) {
+		int key = adp5588_read(kpad->client, Key_EVENTA + i);
+		int key_val = key & KEY_EV_MASK;
+
+		if (key_val >= GPI_PIN_BASE && key_val <= GPI_PIN_END) {
+			for (j = 0; j < kpad->gpimapsize; j++) {
+				if (key_val == kpad->gpimap[j].pin) {
+					input_report_switch(kpad->input,
+							kpad->gpimap[j].sw_evt,
+							key & KEY_EV_PRESSED);
+					break;
+				}
+			}
+		} else {
+			input_report_key(kpad->input,
+					 kpad->keycode[key_val - 1],
+					 key & KEY_EV_PRESSED);
+		}
+	}
+}
+
 static void adp5588_work(struct work_struct *work)
 {
 	struct adp5588_kpad *kpad = container_of(work,
 						struct adp5588_kpad, work.work);
 	struct i2c_client *client = kpad->client;
-	int i, key, status, ev_cnt;
+	int status, ev_cnt;
 
 	status = adp5588_read(client, INT_STAT);
 
@@ -99,12 +126,7 @@ static void adp5588_work(struct work_struct *work)
 	if (status & KE_INT) {
 		ev_cnt = adp5588_read(client, KEY_LCK_EC_STAT) & KEC;
 		if (ev_cnt) {
-			for (i = 0; i < ev_cnt; i++) {
-				key = adp5588_read(client, Key_EVENTA + i);
-				input_report_key(kpad->input,
-					kpad->keycode[(key & KEY_EV_MASK) - 1],
-					key & KEY_EV_PRESSED);
-			}
+			adp5588_report_events(kpad, ev_cnt);
 			input_sync(kpad->input);
 		}
 	}
@@ -130,6 +152,7 @@ static int __devinit adp5588_setup(struct i2c_client *client)
 {
 	struct adp5588_kpad_platform_data *pdata = client->dev.platform_data;
 	int i, ret;
+	unsigned char evt_mode1 = 0, evt_mode2 = 0, evt_mode3 = 0;
 
 	ret = adp5588_write(client, KP_GPIO1, KP_SEL(pdata->rows));
 	ret |= adp5588_write(client, KP_GPIO2, KP_SEL(pdata->cols) & 0xFF);
@@ -144,6 +167,23 @@ static int __devinit adp5588_setup(struct i2c_client *client)
 	for (i = 0; i < KEYP_MAX_EVENT; i++)
 		ret |= adp5588_read(client, Key_EVENTA);
 
+	for (i = 0; i < pdata->gpimapsize; i++) {
+		unsigned short pin = pdata->gpimap[i].pin;
+
+		if (pin <= GPI_PIN_ROW_END) {
+			evt_mode1 |= (1 << (pin - GPI_PIN_ROW_BASE));
+		} else {
+			evt_mode2 |= ((1 << (pin - GPI_PIN_COL_BASE)) & 0xFF);
+			evt_mode3 |= ((1 << (pin - GPI_PIN_COL_BASE)) >> 8);
+		}
+	}
+
+	if (pdata->gpimapsize) {
+		ret |= adp5588_write(client, GPI_EM1, evt_mode1);
+		ret |= adp5588_write(client, GPI_EM2, evt_mode2);
+		ret |= adp5588_write(client, GPI_EM3, evt_mode3);
+	}
+
 	ret |= adp5588_write(client, INT_STAT, CMP2_INT | CMP1_INT |
 					OVR_FLOW_INT | K_LCK_INT |
 					GPI_INT | KE_INT); /* Status is W1C */
@@ -158,6 +198,44 @@ static int __devinit adp5588_setup(struct i2c_client *client)
 	return 0;
 }
 
+static void __devinit adp5588_report_switch_state(struct adp5588_kpad *kpad)
+{
+	int gpi_stat1 = adp5588_read(kpad->client, GPIO_DAT_STAT1);
+	int gpi_stat2 = adp5588_read(kpad->client, GPIO_DAT_STAT2);
+	int gpi_stat3 = adp5588_read(kpad->client, GPIO_DAT_STAT3);
+	int gpi_stat_tmp, pin_loc;
+	int i;
+
+	for (i = 0; i < kpad->gpimapsize; i++) {
+		unsigned short pin = kpad->gpimap[i].pin;
+
+		if (pin <= GPI_PIN_ROW_END) {
+			gpi_stat_tmp = gpi_stat1;
+			pin_loc = pin - GPI_PIN_ROW_BASE;
+		} else if ((pin - GPI_PIN_COL_BASE) < 8) {
+			gpi_stat_tmp = gpi_stat2;
+			pin_loc = pin - GPI_PIN_COL_BASE;
+		} else {
+			gpi_stat_tmp = gpi_stat3;
+			pin_loc = pin - GPI_PIN_COL_BASE - 8;
+		}
+
+		if (gpi_stat_tmp < 0) {
+			dev_err(&kpad->client->dev,
+				"Can't read GPIO_DAT_STAT switch %d default to OFF\n",
+				pin);
+			gpi_stat_tmp = 0;
+		}
+
+		input_report_switch(kpad->input,
+				    kpad->gpimap[i].sw_evt,
+				    !(gpi_stat_tmp & (1 << pin_loc)));
+	}
+
+	input_sync(kpad->input);
+}
+
+
 static int __devinit adp5588_probe(struct i2c_client *client,
 					const struct i2c_device_id *id)
 {
@@ -189,6 +267,37 @@ static int __devinit adp5588_probe(struct i2c_client *client,
 		return -EINVAL;
 	}
 
+	if (!pdata->gpimap && pdata->gpimapsize) {
+		dev_err(&client->dev, "invalid gpimap from pdata\n");
+		return -EINVAL;
+	}
+
+	if (pdata->gpimapsize > ADP5588_GPIMAPSIZE_MAX) {
+		dev_err(&client->dev, "invalid gpimapsize\n");
+		return -EINVAL;
+	}
+
+	for (i = 0; i < pdata->gpimapsize; i++) {
+		unsigned short pin = pdata->gpimap[i].pin;
+
+		if (pin < GPI_PIN_BASE || pin > GPI_PIN_END) {
+			dev_err(&client->dev, "invalid gpi pin data\n");
+			return -EINVAL;
+		}
+
+		if (pin <= GPI_PIN_ROW_END) {
+			if (pin - GPI_PIN_ROW_BASE + 1 <= pdata->rows) {
+				dev_err(&client->dev, "invalid gpi row data\n");
+				return -EINVAL;
+			}
+		} else {
+			if (pin - GPI_PIN_COL_BASE + 1 <= pdata->cols) {
+				dev_err(&client->dev, "invalid gpi col data\n");
+				return -EINVAL;
+			}
+		}
+	}
+
 	if (!client->irq) {
 		dev_err(&client->dev, "no IRQ?\n");
 		return -EINVAL;
@@ -233,6 +342,9 @@ static int __devinit adp5588_probe(struct i2c_client *client,
 	memcpy(kpad->keycode, pdata->keymap,
 		pdata->keymapsize * input->keycodesize);
 
+	kpad->gpimap = pdata->gpimap;
+	kpad->gpimapsize = pdata->gpimapsize;
+
 	/* setup input device */
 	__set_bit(EV_KEY, input->evbit);
 
@@ -243,6 +355,11 @@ static int __devinit adp5588_probe(struct i2c_client *client,
 		__set_bit(kpad->keycode[i] & KEY_MAX, input->keybit);
 	__clear_bit(KEY_RESERVED, input->keybit);
 
+	if (kpad->gpimapsize)
+		__set_bit(EV_SW, input->evbit);
+	for (i = 0; i < kpad->gpimapsize; i++)
+		__set_bit(kpad->gpimap[i].sw_evt, input->swbit);
+
 	error = input_register_device(input);
 	if (error) {
 		dev_err(&client->dev, "unable to register input device\n");
@@ -261,6 +378,9 @@ static int __devinit adp5588_probe(struct i2c_client *client,
 	if (error)
 		goto err_free_irq;
 
+	if (kpad->gpimapsize)
+		adp5588_report_switch_state(kpad);
+
 	device_init_wakeup(&client->dev, 1);
 	i2c_set_clientdata(client, kpad);
 
diff --git a/include/linux/i2c/adp5588.h b/include/linux/i2c/adp5588.h
index 02c9af37474..b5f57c498e2 100644
--- a/include/linux/i2c/adp5588.h
+++ b/include/linux/i2c/adp5588.h
@@ -78,6 +78,40 @@
 
 #define ADP5588_KEYMAPSIZE	80
 
+#define GPI_PIN_ROW0 97
+#define GPI_PIN_ROW1 98
+#define GPI_PIN_ROW2 99
+#define GPI_PIN_ROW3 100
+#define GPI_PIN_ROW4 101
+#define GPI_PIN_ROW5 102
+#define GPI_PIN_ROW6 103
+#define GPI_PIN_ROW7 104
+#define GPI_PIN_COL0 105
+#define GPI_PIN_COL1 106
+#define GPI_PIN_COL2 107
+#define GPI_PIN_COL3 108
+#define GPI_PIN_COL4 109
+#define GPI_PIN_COL5 110
+#define GPI_PIN_COL6 111
+#define GPI_PIN_COL7 112
+#define GPI_PIN_COL8 113
+#define GPI_PIN_COL9 114
+
+#define GPI_PIN_ROW_BASE GPI_PIN_ROW0
+#define GPI_PIN_ROW_END GPI_PIN_ROW7
+#define GPI_PIN_COL_BASE GPI_PIN_COL0
+#define GPI_PIN_COL_END GPI_PIN_COL9
+
+#define GPI_PIN_BASE GPI_PIN_ROW_BASE
+#define GPI_PIN_END GPI_PIN_COL_END
+
+#define ADP5588_GPIMAPSIZE_MAX (GPI_PIN_END - GPI_PIN_BASE + 1)
+
+struct adp5588_gpi_map {
+	unsigned short pin;
+	unsigned short sw_evt;
+};
+
 struct adp5588_kpad_platform_data {
 	int rows;			/* Number of rows */
 	int cols;			/* Number of columns */
@@ -87,6 +121,8 @@ struct adp5588_kpad_platform_data {
 	unsigned en_keylock:1;		/* Enable Key Lock feature */
 	unsigned short unlock_key1;	/* Unlock Key 1 */
 	unsigned short unlock_key2;	/* Unlock Key 2 */
+	const struct adp5588_gpi_map *gpimap;
+	unsigned short gpimapsize;
 };
 
 struct adp5588_gpio_platform_data {
-- 
cgit v1.2.3-70-g09d2


From fcb26ec5b18d88bb22366799d056dc3630d0e895 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dbaryshkov@gmail.com>
Date: Wed, 16 Jun 2010 23:02:23 +0000
Subject: broadcom: move all PHY_ID's to header

Move all PHY IDs to brcmphy.h header for completeness and unification of code.

Signed-off-by: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/broadcom.c | 24 ++++++++++++------------
 include/linux/brcmphy.h    |  6 ++++++
 2 files changed, 18 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c
index cecdbbd549e..b743d37532f 100644
--- a/drivers/net/phy/broadcom.c
+++ b/drivers/net/phy/broadcom.c
@@ -685,7 +685,7 @@ static int brcm_fet_config_intr(struct phy_device *phydev)
 }
 
 static struct phy_driver bcm5411_driver = {
-	.phy_id		= 0x00206070,
+	.phy_id		= PHY_ID_BCM5411,
 	.phy_id_mask	= 0xfffffff0,
 	.name		= "Broadcom BCM5411",
 	.features	= PHY_GBIT_FEATURES |
@@ -700,7 +700,7 @@ static struct phy_driver bcm5411_driver = {
 };
 
 static struct phy_driver bcm5421_driver = {
-	.phy_id		= 0x002060e0,
+	.phy_id		= PHY_ID_BCM5421,
 	.phy_id_mask	= 0xfffffff0,
 	.name		= "Broadcom BCM5421",
 	.features	= PHY_GBIT_FEATURES |
@@ -715,7 +715,7 @@ static struct phy_driver bcm5421_driver = {
 };
 
 static struct phy_driver bcm5461_driver = {
-	.phy_id		= 0x002060c0,
+	.phy_id		= PHY_ID_BCM5461,
 	.phy_id_mask	= 0xfffffff0,
 	.name		= "Broadcom BCM5461",
 	.features	= PHY_GBIT_FEATURES |
@@ -730,7 +730,7 @@ static struct phy_driver bcm5461_driver = {
 };
 
 static struct phy_driver bcm5464_driver = {
-	.phy_id		= 0x002060b0,
+	.phy_id		= PHY_ID_BCM5464,
 	.phy_id_mask	= 0xfffffff0,
 	.name		= "Broadcom BCM5464",
 	.features	= PHY_GBIT_FEATURES |
@@ -745,7 +745,7 @@ static struct phy_driver bcm5464_driver = {
 };
 
 static struct phy_driver bcm5481_driver = {
-	.phy_id		= 0x0143bca0,
+	.phy_id		= PHY_ID_BCM5481,
 	.phy_id_mask	= 0xfffffff0,
 	.name		= "Broadcom BCM5481",
 	.features	= PHY_GBIT_FEATURES |
@@ -760,7 +760,7 @@ static struct phy_driver bcm5481_driver = {
 };
 
 static struct phy_driver bcm5482_driver = {
-	.phy_id		= 0x0143bcb0,
+	.phy_id		= PHY_ID_BCM5482,
 	.phy_id_mask	= 0xfffffff0,
 	.name		= "Broadcom BCM5482",
 	.features	= PHY_GBIT_FEATURES |
@@ -910,12 +910,12 @@ module_init(broadcom_init);
 module_exit(broadcom_exit);
 
 static struct mdio_device_id broadcom_tbl[] = {
-	{ 0x00206070, 0xfffffff0 },
-	{ 0x002060e0, 0xfffffff0 },
-	{ 0x002060c0, 0xfffffff0 },
-	{ 0x002060b0, 0xfffffff0 },
-	{ 0x0143bca0, 0xfffffff0 },
-	{ 0x0143bcb0, 0xfffffff0 },
+	{ PHY_ID_BCM5411, 0xfffffff0 },
+	{ PHY_ID_BCM5421, 0xfffffff0 },
+	{ PHY_ID_BCM5461, 0xfffffff0 },
+	{ PHY_ID_BCM5464, 0xfffffff0 },
+	{ PHY_ID_BCM5482, 0xfffffff0 },
+	{ PHY_ID_BCM5482, 0xfffffff0 },
 	{ PHY_ID_BCM50610, 0xfffffff0 },
 	{ PHY_ID_BCM50610M, 0xfffffff0 },
 	{ PHY_ID_BCM57780, 0xfffffff0 },
diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h
index 7f437ca1ed4..c14c3a1b64d 100644
--- a/include/linux/brcmphy.h
+++ b/include/linux/brcmphy.h
@@ -1,6 +1,12 @@
 #define PHY_ID_BCM50610			0x0143bd60
 #define PHY_ID_BCM50610M		0x0143bd70
 #define PHY_ID_BCMAC131			0x0143bc70
+#define PHY_ID_BCM5481			0x0143bca0
+#define PHY_ID_BCM5482			0x0143bcb0
+#define PHY_ID_BCM5411			0x00206070
+#define PHY_ID_BCM5421			0x002060e0
+#define PHY_ID_BCM5464			0x002060b0
+#define PHY_ID_BCM5461			0x002060c0
 #define PHY_ID_BCM57780			0x03625d90
 
 #define PHY_BCM_OUI_MASK		0xfffffc00
-- 
cgit v1.2.3-70-g09d2


From 7a938f80264f2cbfb0c0841b450eab42a8093281 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dbaryshkov@gmail.com>
Date: Wed, 16 Jun 2010 23:02:24 +0000
Subject: broadcom: Add 5241 support

This patch adds the 5241 PHY ID to the broadcom module.

Signed-off-by: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/broadcom.c | 22 ++++++++++++++++++++++
 include/linux/brcmphy.h    |  1 +
 2 files changed, 23 insertions(+)

(limited to 'include')

diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c
index b743d37532f..4accd83d3df 100644
--- a/drivers/net/phy/broadcom.c
+++ b/drivers/net/phy/broadcom.c
@@ -834,6 +834,21 @@ static struct phy_driver bcmac131_driver = {
 	.driver		= { .owner = THIS_MODULE },
 };
 
+static struct phy_driver bcm5241_driver = {
+	.phy_id		= PHY_ID_BCM5241,
+	.phy_id_mask	= 0xfffffff0,
+	.name		= "Broadcom BCM5241",
+	.features	= PHY_BASIC_FEATURES |
+			  SUPPORTED_Pause | SUPPORTED_Asym_Pause,
+	.flags		= PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
+	.config_init	= brcm_fet_config_init,
+	.config_aneg	= genphy_config_aneg,
+	.read_status	= genphy_read_status,
+	.ack_interrupt	= brcm_fet_ack_interrupt,
+	.config_intr	= brcm_fet_config_intr,
+	.driver		= { .owner = THIS_MODULE },
+};
+
 static int __init broadcom_init(void)
 {
 	int ret;
@@ -868,8 +883,13 @@ static int __init broadcom_init(void)
 	ret = phy_driver_register(&bcmac131_driver);
 	if (ret)
 		goto out_ac131;
+	ret = phy_driver_register(&bcm5241_driver);
+	if (ret)
+		goto out_5241;
 	return ret;
 
+out_5241:
+	phy_driver_unregister(&bcmac131_driver);
 out_ac131:
 	phy_driver_unregister(&bcm57780_driver);
 out_57780:
@@ -894,6 +914,7 @@ out_5411:
 
 static void __exit broadcom_exit(void)
 {
+	phy_driver_unregister(&bcm5241_driver);
 	phy_driver_unregister(&bcmac131_driver);
 	phy_driver_unregister(&bcm57780_driver);
 	phy_driver_unregister(&bcm50610m_driver);
@@ -920,6 +941,7 @@ static struct mdio_device_id broadcom_tbl[] = {
 	{ PHY_ID_BCM50610M, 0xfffffff0 },
 	{ PHY_ID_BCM57780, 0xfffffff0 },
 	{ PHY_ID_BCMAC131, 0xfffffff0 },
+	{ PHY_ID_BCM5241, 0xfffffff0 },
 	{ }
 };
 
diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h
index c14c3a1b64d..b840a496028 100644
--- a/include/linux/brcmphy.h
+++ b/include/linux/brcmphy.h
@@ -1,5 +1,6 @@
 #define PHY_ID_BCM50610			0x0143bd60
 #define PHY_ID_BCM50610M		0x0143bd70
+#define PHY_ID_BCM5241			0x0143bc30
 #define PHY_ID_BCMAC131			0x0143bc70
 #define PHY_ID_BCM5481			0x0143bca0
 #define PHY_ID_BCM5482			0x0143bcb0
-- 
cgit v1.2.3-70-g09d2


From cc3202f5da3c81a99c5f3a605df527da7a77eed3 Mon Sep 17 00:00:00 2001
From: Vladimir Zapolskiy <vzapolskiy@gmail.com>
Date: Thu, 24 Jun 2010 17:38:50 +0400
Subject: ASoC: uda134x: replace a macro with a value in platform struct.

This change wipes out a hardcoded macro, which enables codec bias
level control. Now is_powered_on_standby value shall be used instead.

Signed-off-by: Vladimir Zapolskiy <vzapolskiy@gmail.com>
Acked-by: Liam Girdwood <lrg@slimlogic.co.uk>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 include/sound/uda134x.h    | 12 ++++++++++++
 sound/soc/codecs/uda134x.c | 21 +++++----------------
 2 files changed, 17 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/sound/uda134x.h b/include/sound/uda134x.h
index 509efb05017..e475659bd3b 100644
--- a/include/sound/uda134x.h
+++ b/include/sound/uda134x.h
@@ -18,6 +18,18 @@ struct uda134x_platform_data {
 	struct l3_pins l3;
 	void (*power) (int);
 	int model;
+	/*
+	  ALSA SOC usually puts the device in standby mode when it's not used
+	  for sometime. If you unset is_powered_on_standby the driver will
+	  turn off the ADC/DAC when this callback is invoked and turn it back
+	  on when needed. Unfortunately this will result in a very light bump
+	  (it can be audible only with good earphones). If this bothers you
+	  set is_powered_on_standby, you will have slightly higher power
+	  consumption. Please note that sending the L3 command for ADC is
+	  enough to make the bump, so it doesn't make difference if you
+	  completely take off power from the codec.
+	*/
+	int is_powered_on_standby;
 #define UDA134X_UDA1340 1
 #define UDA134X_UDA1341 2
 #define UDA134X_UDA1344 3
diff --git a/sound/soc/codecs/uda134x.c b/sound/soc/codecs/uda134x.c
index 28aac53c97b..30cf2f9d329 100644
--- a/sound/soc/codecs/uda134x.c
+++ b/sound/soc/codecs/uda134x.c
@@ -28,19 +28,6 @@
 #include "uda134x.h"
 
 
-#define POWER_OFF_ON_STANDBY 1
-/*
-  ALSA SOC usually puts the device in standby mode when it's not used
-  for sometime. If you define POWER_OFF_ON_STANDBY the driver will
-  turn off the ADC/DAC when this callback is invoked and turn it back
-  on when needed. Unfortunately this will result in a very light bump
-  (it can be audible only with good earphones). If this bothers you
-  just comment this line, you will have slightly higher power
-  consumption . Please note that sending the L3 command for ADC is
-  enough to make the bump, so it doesn't make difference if you
-  completely take off power from the codec.
- */
-
 #define UDA134X_RATES SNDRV_PCM_RATE_8000_48000
 #define UDA134X_FORMATS (SNDRV_PCM_FMTBIT_S8 | SNDRV_PCM_FMTBIT_S16_LE | \
 		SNDRV_PCM_FMTBIT_S18_3LE | SNDRV_PCM_FMTBIT_S20_3LE)
@@ -531,9 +518,11 @@ static int uda134x_soc_probe(struct platform_device *pdev)
 	codec->num_dai = 1;
 	codec->read = uda134x_read_reg_cache;
 	codec->write = uda134x_write;
-#ifdef POWER_OFF_ON_STANDBY
-	codec->set_bias_level = uda134x_set_bias_level;
-#endif
+
+	if (!pd->is_powered_on_standby) {
+		codec->set_bias_level = uda134x_set_bias_level;
+	}
+
 	INIT_LIST_HEAD(&codec->dapm_widgets);
 	INIT_LIST_HEAD(&codec->dapm_paths);
 
-- 
cgit v1.2.3-70-g09d2


From a8756201ba4189bca3ee1a6ec4e290f467ee09ab Mon Sep 17 00:00:00 2001
From: Tim Gardner <tim.gardner@canonical.com>
Date: Fri, 25 Jun 2010 14:44:07 +0200
Subject: netfilter: xt_connbytes: Force CT accounting to be enabled

Check at rule install time that CT accounting is enabled. Force it
to be enabled if not while also emitting a warning since this is not
the default state.

This is in preparation for deprecating CONFIG_NF_CT_ACCT upon which
CONFIG_NETFILTER_XT_MATCH_CONNBYTES depended being set.

Added 2 CT accounting support functions:

nf_ct_acct_enabled() - Get CT accounting state.
nf_ct_set_acct() - Enable/disable CT accountuing.

Signed-off-by: Tim Gardner <tim.gardner@canonical.com>
Acked-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/net/netfilter/nf_conntrack_acct.h | 12 ++++++++++++
 net/netfilter/xt_connbytes.c              | 10 ++++++++++
 2 files changed, 22 insertions(+)

(limited to 'include')

diff --git a/include/net/netfilter/nf_conntrack_acct.h b/include/net/netfilter/nf_conntrack_acct.h
index 03e218f0be4..4e9c63a20db 100644
--- a/include/net/netfilter/nf_conntrack_acct.h
+++ b/include/net/netfilter/nf_conntrack_acct.h
@@ -45,6 +45,18 @@ struct nf_conn_counter *nf_ct_acct_ext_add(struct nf_conn *ct, gfp_t gfp)
 extern unsigned int
 seq_print_acct(struct seq_file *s, const struct nf_conn *ct, int dir);
 
+/* Check if connection tracking accounting is enabled */
+static inline bool nf_ct_acct_enabled(struct net *net)
+{
+	return net->ct.sysctl_acct != 0;
+}
+
+/* Enable/disable connection tracking accounting */
+static inline void nf_ct_set_acct(struct net *net, bool enable)
+{
+	net->ct.sysctl_acct = enable;
+}
+
 extern int nf_conntrack_acct_init(struct net *net);
 extern void nf_conntrack_acct_fini(struct net *net);
 
diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c
index 73517835303..5b138506690 100644
--- a/net/netfilter/xt_connbytes.c
+++ b/net/netfilter/xt_connbytes.c
@@ -112,6 +112,16 @@ static int connbytes_mt_check(const struct xt_mtchk_param *par)
 	if (ret < 0)
 		pr_info("cannot load conntrack support for proto=%u\n",
 			par->family);
+
+	/*
+	 * This filter cannot function correctly unless connection tracking
+	 * accounting is enabled, so complain in the hope that someone notices.
+	 */
+	if (!nf_ct_acct_enabled(par->net)) {
+		pr_warning("Forcing CT accounting to be enabled\n");
+		nf_ct_set_acct(par->net, true);
+	}
+
 	return ret;
 }
 
-- 
cgit v1.2.3-70-g09d2


From e27c729219ad24c8ac9a4b34cf192e56917565c5 Mon Sep 17 00:00:00 2001
From: Michael Hennerich <michael.hennerich@analog.com>
Date: Fri, 25 Jun 2010 08:44:10 -0700
Subject: Input: add driver for ADXL345/346 Digital Accelerometers

This is a driver for the ADXL345/346 Three-Axis Digital Accelerometers.

Signed-off-by: Michael Hennerich <michael.hennerich@analog.com>
Signed-off-by: Chris Verges <chrisv@cyberswitching.com>
Signed-off-by: Luotao Fu <l.fu@pengutronix.de>
Signed-off-by: Barry Song <barry.song@analog.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/misc/Kconfig       |  37 ++
 drivers/input/misc/Makefile      |   3 +
 drivers/input/misc/adxl34x-i2c.c | 163 ++++++++
 drivers/input/misc/adxl34x-spi.c | 145 +++++++
 drivers/input/misc/adxl34x.c     | 840 +++++++++++++++++++++++++++++++++++++++
 drivers/input/misc/adxl34x.h     |  30 ++
 include/linux/input/adxl34x.h    | 293 ++++++++++++++
 7 files changed, 1511 insertions(+)
 create mode 100644 drivers/input/misc/adxl34x-i2c.c
 create mode 100644 drivers/input/misc/adxl34x-spi.c
 create mode 100644 drivers/input/misc/adxl34x.c
 create mode 100644 drivers/input/misc/adxl34x.h
 create mode 100644 include/linux/input/adxl34x.h

(limited to 'include')

diff --git a/drivers/input/misc/Kconfig b/drivers/input/misc/Kconfig
index c44b9eafc55..ede6d52fe95 100644
--- a/drivers/input/misc/Kconfig
+++ b/drivers/input/misc/Kconfig
@@ -390,4 +390,41 @@ config INPUT_PCAP
 	  To compile this driver as a module, choose M here: the
 	  module will be called pcap_keys.
 
+config INPUT_ADXL34X
+	tristate "Analog Devices ADXL34x Three-Axis Digital Accelerometer"
+	default n
+	help
+	  Say Y here if you have a Accelerometer interface using the
+	  ADXL345/6 controller, and your board-specific initialization
+	  code includes that in its table of devices.
+
+	  This driver can use either I2C or SPI communication to the
+	  ADXL345/6 controller.  Select the appropriate method for
+	  your system.
+
+	  If unsure, say N (but it's safe to say "Y").
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called adxl34x.
+
+config INPUT_ADXL34X_I2C
+	tristate "support I2C bus connection"
+	depends on INPUT_ADXL34X && I2C
+	default y
+	help
+	  Say Y here if you have ADXL345/6 hooked to an I2C bus.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called adxl34x-i2c.
+
+config INPUT_ADXL34X_SPI
+	tristate "support SPI bus connection"
+	depends on INPUT_ADXL34X && SPI
+	default y
+	help
+	  Say Y here if you have ADXL345/6 hooked to a SPI bus.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called adxl34x-spi.
+
 endif
diff --git a/drivers/input/misc/Makefile b/drivers/input/misc/Makefile
index 71fe57d8023..97b5dc32df1 100644
--- a/drivers/input/misc/Makefile
+++ b/drivers/input/misc/Makefile
@@ -8,6 +8,9 @@ obj-$(CONFIG_INPUT_88PM860X_ONKEY)	+= 88pm860x_onkey.o
 obj-$(CONFIG_INPUT_AD714X)		+= ad714x.o
 obj-$(CONFIG_INPUT_AD714X_I2C)		+= ad714x-i2c.o
 obj-$(CONFIG_INPUT_AD714X_SPI)		+= ad714x-spi.o
+obj-$(CONFIG_INPUT_ADXL34X)		+= adxl34x.o
+obj-$(CONFIG_INPUT_ADXL34X_I2C)		+= adxl34x-i2c.o
+obj-$(CONFIG_INPUT_ADXL34X_SPI)		+= adxl34x-spi.o
 obj-$(CONFIG_INPUT_APANEL)		+= apanel.o
 obj-$(CONFIG_INPUT_ATI_REMOTE)		+= ati_remote.o
 obj-$(CONFIG_INPUT_ATI_REMOTE2)		+= ati_remote2.o
diff --git a/drivers/input/misc/adxl34x-i2c.c b/drivers/input/misc/adxl34x-i2c.c
new file mode 100644
index 00000000000..76194b58bd0
--- /dev/null
+++ b/drivers/input/misc/adxl34x-i2c.c
@@ -0,0 +1,163 @@
+/*
+ * ADLX345/346 Three-Axis Digital Accelerometers (I2C Interface)
+ *
+ * Enter bugs at http://blackfin.uclinux.org/
+ *
+ * Copyright (C) 2009 Michael Hennerich, Analog Devices Inc.
+ * Licensed under the GPL-2 or later.
+ */
+
+#include <linux/input.h>	/* BUS_I2C */
+#include <linux/i2c.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include "adxl34x.h"
+
+static int adxl34x_smbus_read(struct device *dev, unsigned char reg)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+
+	return i2c_smbus_read_byte_data(client, reg);
+}
+
+static int adxl34x_smbus_write(struct device *dev,
+			       unsigned char reg, unsigned char val)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+
+	return i2c_smbus_write_byte_data(client, reg, val);
+}
+
+static int adxl34x_smbus_read_block(struct device *dev,
+				    unsigned char reg, int count,
+				    void *buf)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+
+	return i2c_smbus_read_i2c_block_data(client, reg, count, buf);
+}
+
+static int adxl34x_i2c_read_block(struct device *dev,
+				  unsigned char reg, int count,
+				  void *buf)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	int ret;
+
+	ret = i2c_master_send(client, &reg, 1);
+	if (ret < 0)
+		return ret;
+
+	ret = i2c_master_recv(client, buf, count);
+	if (ret < 0)
+		return ret;
+
+	if (ret != count)
+		return -EIO;
+
+	return 0;
+}
+
+static const struct adxl34x_bus_ops adx134x_smbus_bops = {
+	.bustype	= BUS_I2C,
+	.write		= adxl34x_smbus_write,
+	.read		= adxl34x_smbus_read,
+	.read_block	= adxl34x_smbus_read_block,
+};
+
+static const struct adxl34x_bus_ops adx134x_i2c_bops = {
+	.bustype	= BUS_I2C,
+	.write		= adxl34x_smbus_write,
+	.read		= adxl34x_smbus_read,
+	.read_block	= adxl34x_i2c_read_block,
+};
+
+static int __devinit adxl34x_i2c_probe(struct i2c_client *client,
+				       const struct i2c_device_id *id)
+{
+	struct adxl34x *ac;
+	int error;
+
+	error = i2c_check_functionality(client->adapter,
+			I2C_FUNC_SMBUS_BYTE_DATA);
+	if (!error) {
+		dev_err(&client->dev, "SMBUS Byte Data not Supported\n");
+		return -EIO;
+	}
+
+	ac = adxl34x_probe(&client->dev, client->irq, false,
+			   i2c_check_functionality(client->adapter,
+						   I2C_FUNC_SMBUS_READ_I2C_BLOCK) ?
+				&adx134x_smbus_bops : &adx134x_i2c_bops);
+	if (IS_ERR(ac))
+		return PTR_ERR(ac);
+
+	i2c_set_clientdata(client, ac);
+
+	return 0;
+}
+
+static int __devexit adxl34x_i2c_remove(struct i2c_client *client)
+{
+	struct adxl34x *ac = i2c_get_clientdata(client);
+
+	return adxl34x_remove(ac);
+}
+
+#ifdef CONFIG_PM
+static int adxl34x_suspend(struct i2c_client *client, pm_message_t message)
+{
+	struct adxl34x *ac = i2c_get_clientdata(client);
+
+	adxl34x_disable(ac);
+
+	return 0;
+}
+
+static int adxl34x_resume(struct i2c_client *client)
+{
+	struct adxl34x *ac = i2c_get_clientdata(client);
+
+	adxl34x_enable(ac);
+
+	return 0;
+}
+#else
+# define adxl34x_suspend NULL
+# define adxl34x_resume  NULL
+#endif
+
+static const struct i2c_device_id adxl34x_id[] = {
+	{ "adxl34x", 0 },
+	{ }
+};
+
+MODULE_DEVICE_TABLE(i2c, adxl34x_id);
+
+static struct i2c_driver adxl34x_driver = {
+	.driver = {
+		.name = "adxl34x",
+		.owner = THIS_MODULE,
+	},
+	.probe    = adxl34x_i2c_probe,
+	.remove   = __devexit_p(adxl34x_i2c_remove),
+	.suspend  = adxl34x_suspend,
+	.resume   = adxl34x_resume,
+	.id_table = adxl34x_id,
+};
+
+static int __init adxl34x_i2c_init(void)
+{
+	return i2c_add_driver(&adxl34x_driver);
+}
+module_init(adxl34x_i2c_init);
+
+static void __exit adxl34x_i2c_exit(void)
+{
+	i2c_del_driver(&adxl34x_driver);
+}
+module_exit(adxl34x_i2c_exit);
+
+MODULE_AUTHOR("Michael Hennerich <hennerich@blackfin.uclinux.org>");
+MODULE_DESCRIPTION("ADXL345/346 Three-Axis Digital Accelerometer I2C Bus Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/input/misc/adxl34x-spi.c b/drivers/input/misc/adxl34x-spi.c
new file mode 100644
index 00000000000..7f992353ffd
--- /dev/null
+++ b/drivers/input/misc/adxl34x-spi.c
@@ -0,0 +1,145 @@
+/*
+ * ADLX345/346 Three-Axis Digital Accelerometers (SPI Interface)
+ *
+ * Enter bugs at http://blackfin.uclinux.org/
+ *
+ * Copyright (C) 2009 Michael Hennerich, Analog Devices Inc.
+ * Licensed under the GPL-2 or later.
+ */
+
+#include <linux/input.h>	/* BUS_SPI */
+#include <linux/module.h>
+#include <linux/spi/spi.h>
+#include <linux/types.h>
+#include "adxl34x.h"
+
+#define MAX_SPI_FREQ_HZ		5000000
+#define MAX_FREQ_NO_FIFODELAY	1500000
+#define ADXL34X_CMD_MULTB	(1 << 6)
+#define ADXL34X_CMD_READ	(1 << 7)
+#define ADXL34X_WRITECMD(reg)	(reg & 0x3F)
+#define ADXL34X_READCMD(reg)	(ADXL34X_CMD_READ | (reg & 0x3F))
+#define ADXL34X_READMB_CMD(reg) (ADXL34X_CMD_READ | ADXL34X_CMD_MULTB \
+					| (reg & 0x3F))
+
+static int adxl34x_spi_read(struct device *dev, unsigned char reg)
+{
+	struct spi_device *spi = to_spi_device(dev);
+	unsigned char cmd;
+
+	cmd = ADXL34X_READCMD(reg);
+
+	return spi_w8r8(spi, cmd);
+}
+
+static int adxl34x_spi_write(struct device *dev,
+			     unsigned char reg, unsigned char val)
+{
+	struct spi_device *spi = to_spi_device(dev);
+	unsigned char buf[2];
+
+	buf[0] = ADXL34X_WRITECMD(reg);
+	buf[1] = val;
+
+	return spi_write(spi, buf, sizeof(buf));
+}
+
+static int adxl34x_spi_read_block(struct device *dev,
+				  unsigned char reg, int count,
+				  void *buf)
+{
+	struct spi_device *spi = to_spi_device(dev);
+	ssize_t status;
+
+	reg = ADXL34X_READMB_CMD(reg);
+	status = spi_write_then_read(spi, &reg, 1, buf, count);
+
+	return (status < 0) ? status : 0;
+}
+
+static const struct adxl34x_bus_ops adx134x_spi_bops = {
+	.bustype	= BUS_SPI,
+	.write		= adxl34x_spi_write,
+	.read		= adxl34x_spi_read,
+	.read_block	= adxl34x_spi_read_block,
+};
+
+static int __devinit adxl34x_spi_probe(struct spi_device *spi)
+{
+	struct adxl34x *ac;
+
+	/* don't exceed max specified SPI CLK frequency */
+	if (spi->max_speed_hz > MAX_SPI_FREQ_HZ) {
+		dev_err(&spi->dev, "SPI CLK %d Hz too fast\n", spi->max_speed_hz);
+		return -EINVAL;
+	}
+
+	ac = adxl34x_probe(&spi->dev, spi->irq,
+			   spi->max_speed_hz > MAX_FREQ_NO_FIFODELAY,
+			   &adx134x_spi_bops);
+
+	if (IS_ERR(ac))
+		return PTR_ERR(ac);
+
+	spi_set_drvdata(spi, ac);
+
+	return 0;
+}
+
+static int __devexit adxl34x_spi_remove(struct spi_device *spi)
+{
+	struct adxl34x *ac = dev_get_drvdata(&spi->dev);
+
+	return adxl34x_remove(ac);
+}
+
+#ifdef CONFIG_PM
+static int adxl34x_suspend(struct spi_device *spi, pm_message_t message)
+{
+	struct adxl34x *ac = dev_get_drvdata(&spi->dev);
+
+	adxl34x_disable(ac);
+
+	return 0;
+}
+
+static int adxl34x_resume(struct spi_device *spi)
+{
+	struct adxl34x *ac = dev_get_drvdata(&spi->dev);
+
+	adxl34x_enable(ac);
+
+	return 0;
+}
+#else
+# define adxl34x_suspend NULL
+# define adxl34x_resume  NULL
+#endif
+
+static struct spi_driver adxl34x_driver = {
+	.driver = {
+		.name = "adxl34x",
+		.bus = &spi_bus_type,
+		.owner = THIS_MODULE,
+	},
+	.probe   = adxl34x_spi_probe,
+	.remove  = __devexit_p(adxl34x_spi_remove),
+	.suspend = adxl34x_suspend,
+	.resume  = adxl34x_resume,
+};
+
+static int __init adxl34x_spi_init(void)
+{
+	return spi_register_driver(&adxl34x_driver);
+}
+module_init(adxl34x_spi_init);
+
+static void __exit adxl34x_spi_exit(void)
+{
+	spi_unregister_driver(&adxl34x_driver);
+}
+module_exit(adxl34x_spi_exit);
+
+MODULE_AUTHOR("Michael Hennerich <hennerich@blackfin.uclinux.org>");
+MODULE_DESCRIPTION("ADXL345/346 Three-Axis Digital Accelerometer SPI Bus Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/input/misc/adxl34x.c b/drivers/input/misc/adxl34x.c
new file mode 100644
index 00000000000..07f9ef63154
--- /dev/null
+++ b/drivers/input/misc/adxl34x.c
@@ -0,0 +1,840 @@
+/*
+ * ADXL345/346 Three-Axis Digital Accelerometers
+ *
+ * Enter bugs at http://blackfin.uclinux.org/
+ *
+ * Copyright (C) 2009 Michael Hennerich, Analog Devices Inc.
+ * Licensed under the GPL-2 or later.
+ */
+
+#include <linux/device.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/input.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/slab.h>
+#include <linux/workqueue.h>
+#include <linux/input/adxl34x.h>
+
+#include "adxl34x.h"
+
+/* ADXL345/6 Register Map */
+#define DEVID		0x00	/* R   Device ID */
+#define THRESH_TAP	0x1D	/* R/W Tap threshold */
+#define OFSX		0x1E	/* R/W X-axis offset */
+#define OFSY		0x1F	/* R/W Y-axis offset */
+#define OFSZ		0x20	/* R/W Z-axis offset */
+#define DUR		0x21	/* R/W Tap duration */
+#define LATENT		0x22	/* R/W Tap latency */
+#define WINDOW		0x23	/* R/W Tap window */
+#define THRESH_ACT	0x24	/* R/W Activity threshold */
+#define THRESH_INACT	0x25	/* R/W Inactivity threshold */
+#define TIME_INACT	0x26	/* R/W Inactivity time */
+#define ACT_INACT_CTL	0x27	/* R/W Axis enable control for activity and */
+				/* inactivity detection */
+#define THRESH_FF	0x28	/* R/W Free-fall threshold */
+#define TIME_FF		0x29	/* R/W Free-fall time */
+#define TAP_AXES	0x2A	/* R/W Axis control for tap/double tap */
+#define ACT_TAP_STATUS	0x2B	/* R   Source of tap/double tap */
+#define BW_RATE		0x2C	/* R/W Data rate and power mode control */
+#define POWER_CTL	0x2D	/* R/W Power saving features control */
+#define INT_ENABLE	0x2E	/* R/W Interrupt enable control */
+#define INT_MAP		0x2F	/* R/W Interrupt mapping control */
+#define INT_SOURCE	0x30	/* R   Source of interrupts */
+#define DATA_FORMAT	0x31	/* R/W Data format control */
+#define DATAX0		0x32	/* R   X-Axis Data 0 */
+#define DATAX1		0x33	/* R   X-Axis Data 1 */
+#define DATAY0		0x34	/* R   Y-Axis Data 0 */
+#define DATAY1		0x35	/* R   Y-Axis Data 1 */
+#define DATAZ0		0x36	/* R   Z-Axis Data 0 */
+#define DATAZ1		0x37	/* R   Z-Axis Data 1 */
+#define FIFO_CTL	0x38	/* R/W FIFO control */
+#define FIFO_STATUS	0x39	/* R   FIFO status */
+#define TAP_SIGN	0x3A	/* R   Sign and source for tap/double tap */
+/* Orientation ADXL346 only */
+#define ORIENT_CONF	0x3B	/* R/W Orientation configuration */
+#define ORIENT		0x3C	/* R   Orientation status */
+
+/* DEVIDs */
+#define ID_ADXL345	0xE5
+#define ID_ADXL346	0xE6
+
+/* INT_ENABLE/INT_MAP/INT_SOURCE Bits */
+#define DATA_READY	(1 << 7)
+#define SINGLE_TAP	(1 << 6)
+#define DOUBLE_TAP	(1 << 5)
+#define ACTIVITY	(1 << 4)
+#define INACTIVITY	(1 << 3)
+#define FREE_FALL	(1 << 2)
+#define WATERMARK	(1 << 1)
+#define OVERRUN		(1 << 0)
+
+/* ACT_INACT_CONTROL Bits */
+#define ACT_ACDC	(1 << 7)
+#define ACT_X_EN	(1 << 6)
+#define ACT_Y_EN	(1 << 5)
+#define ACT_Z_EN	(1 << 4)
+#define INACT_ACDC	(1 << 3)
+#define INACT_X_EN	(1 << 2)
+#define INACT_Y_EN	(1 << 1)
+#define INACT_Z_EN	(1 << 0)
+
+/* TAP_AXES Bits */
+#define SUPPRESS	(1 << 3)
+#define TAP_X_EN	(1 << 2)
+#define TAP_Y_EN	(1 << 1)
+#define TAP_Z_EN	(1 << 0)
+
+/* ACT_TAP_STATUS Bits */
+#define ACT_X_SRC	(1 << 6)
+#define ACT_Y_SRC	(1 << 5)
+#define ACT_Z_SRC	(1 << 4)
+#define ASLEEP		(1 << 3)
+#define TAP_X_SRC	(1 << 2)
+#define TAP_Y_SRC	(1 << 1)
+#define TAP_Z_SRC	(1 << 0)
+
+/* BW_RATE Bits */
+#define LOW_POWER	(1 << 4)
+#define RATE(x)		((x) & 0xF)
+
+/* POWER_CTL Bits */
+#define PCTL_LINK	(1 << 5)
+#define PCTL_AUTO_SLEEP (1 << 4)
+#define PCTL_MEASURE	(1 << 3)
+#define PCTL_SLEEP	(1 << 2)
+#define PCTL_WAKEUP(x)	((x) & 0x3)
+
+/* DATA_FORMAT Bits */
+#define SELF_TEST	(1 << 7)
+#define SPI		(1 << 6)
+#define INT_INVERT	(1 << 5)
+#define FULL_RES	(1 << 3)
+#define JUSTIFY		(1 << 2)
+#define RANGE(x)	((x) & 0x3)
+#define RANGE_PM_2g	0
+#define RANGE_PM_4g	1
+#define RANGE_PM_8g	2
+#define RANGE_PM_16g	3
+
+/*
+ * Maximum value our axis may get in full res mode for the input device
+ * (signed 13 bits)
+ */
+#define ADXL_FULLRES_MAX_VAL 4096
+
+/*
+ * Maximum value our axis may get in fixed res mode for the input device
+ * (signed 10 bits)
+ */
+#define ADXL_FIXEDRES_MAX_VAL 512
+
+/* FIFO_CTL Bits */
+#define FIFO_MODE(x)	(((x) & 0x3) << 6)
+#define FIFO_BYPASS	0
+#define FIFO_FIFO	1
+#define FIFO_STREAM	2
+#define FIFO_TRIGGER	3
+#define TRIGGER		(1 << 5)
+#define SAMPLES(x)	((x) & 0x1F)
+
+/* FIFO_STATUS Bits */
+#define FIFO_TRIG	(1 << 7)
+#define ENTRIES(x)	((x) & 0x3F)
+
+/* TAP_SIGN Bits ADXL346 only */
+#define XSIGN		(1 << 6)
+#define YSIGN		(1 << 5)
+#define ZSIGN		(1 << 4)
+#define XTAP		(1 << 3)
+#define YTAP		(1 << 2)
+#define ZTAP		(1 << 1)
+
+/* ORIENT_CONF ADXL346 only */
+#define ORIENT_DEADZONE(x)	(((x) & 0x7) << 4)
+#define ORIENT_DIVISOR(x)	((x) & 0x7)
+
+/* ORIENT ADXL346 only */
+#define ADXL346_2D_VALID		(1 << 6)
+#define ADXL346_2D_ORIENT(x)		(((x) & 0x3) >> 4)
+#define ADXL346_3D_VALID		(1 << 3)
+#define ADXL346_3D_ORIENT(x)		((x) & 0x7)
+#define ADXL346_2D_PORTRAIT_POS		0	/* +X */
+#define ADXL346_2D_PORTRAIT_NEG		1	/* -X */
+#define ADXL346_2D_LANDSCAPE_POS	2	/* +Y */
+#define ADXL346_2D_LANDSCAPE_NEG	3	/* -Y */
+
+#define ADXL346_3D_FRONT		3	/* +X */
+#define ADXL346_3D_BACK			4	/* -X */
+#define ADXL346_3D_RIGHT		2	/* +Y */
+#define ADXL346_3D_LEFT			5	/* -Y */
+#define ADXL346_3D_TOP			1	/* +Z */
+#define ADXL346_3D_BOTTOM		6	/* -Z */
+
+#undef ADXL_DEBUG
+
+#define ADXL_X_AXIS			0
+#define ADXL_Y_AXIS			1
+#define ADXL_Z_AXIS			2
+
+#define AC_READ(ac, reg)	((ac)->bops->read((ac)->dev, reg))
+#define AC_WRITE(ac, reg, val)	((ac)->bops->write((ac)->dev, reg, val))
+
+struct axis_triple {
+	int x;
+	int y;
+	int z;
+};
+
+struct adxl34x {
+	struct device *dev;
+	struct input_dev *input;
+	struct mutex mutex;	/* reentrant protection for struct */
+	struct adxl34x_platform_data pdata;
+	struct axis_triple swcal;
+	struct axis_triple hwcal;
+	struct axis_triple saved;
+	char phys[32];
+	bool disabled;	/* P: mutex */
+	bool opened;	/* P: mutex */
+	bool fifo_delay;
+	int irq;
+	unsigned model;
+	unsigned int_mask;
+
+	const struct adxl34x_bus_ops *bops;
+};
+
+static const struct adxl34x_platform_data adxl34x_default_init = {
+	.tap_threshold = 35,
+	.tap_duration = 3,
+	.tap_latency = 20,
+	.tap_window = 20,
+	.tap_axis_control = ADXL_TAP_X_EN | ADXL_TAP_Y_EN | ADXL_TAP_Z_EN,
+	.act_axis_control = 0xFF,
+	.activity_threshold = 6,
+	.inactivity_threshold = 4,
+	.inactivity_time = 3,
+	.free_fall_threshold = 8,
+	.free_fall_time = 0x20,
+	.data_rate = 8,
+	.data_range = ADXL_FULL_RES,
+
+	.ev_type = EV_ABS,
+	.ev_code_x = ABS_X,	/* EV_REL */
+	.ev_code_y = ABS_Y,	/* EV_REL */
+	.ev_code_z = ABS_Z,	/* EV_REL */
+
+	.ev_code_tap = {BTN_TOUCH, BTN_TOUCH, BTN_TOUCH}, /* EV_KEY {x,y,z} */
+	.power_mode = ADXL_AUTO_SLEEP | ADXL_LINK,
+	.fifo_mode = FIFO_STREAM,
+	.watermark = 0,
+};
+
+static void adxl34x_get_triple(struct adxl34x *ac, struct axis_triple *axis)
+{
+	short buf[3];
+
+	ac->bops->read_block(ac->dev, DATAX0, DATAZ1 - DATAX0 + 1, buf);
+
+	mutex_lock(&ac->mutex);
+	ac->saved.x = (s16) le16_to_cpu(buf[0]);
+	axis->x = ac->saved.x;
+
+	ac->saved.y = (s16) le16_to_cpu(buf[1]);
+	axis->y = ac->saved.y;
+
+	ac->saved.z = (s16) le16_to_cpu(buf[2]);
+	axis->z = ac->saved.z;
+	mutex_unlock(&ac->mutex);
+}
+
+static void adxl34x_service_ev_fifo(struct adxl34x *ac)
+{
+	struct adxl34x_platform_data *pdata = &ac->pdata;
+	struct axis_triple axis;
+
+	adxl34x_get_triple(ac, &axis);
+
+	input_event(ac->input, pdata->ev_type, pdata->ev_code_x,
+		    axis.x - ac->swcal.x);
+	input_event(ac->input, pdata->ev_type, pdata->ev_code_y,
+		    axis.y - ac->swcal.y);
+	input_event(ac->input, pdata->ev_type, pdata->ev_code_z,
+		    axis.z - ac->swcal.z);
+}
+
+static void adxl34x_report_key_single(struct input_dev *input, int key)
+{
+	input_report_key(input, key, true);
+	input_sync(input);
+	input_report_key(input, key, false);
+}
+
+static void adxl34x_send_key_events(struct adxl34x *ac,
+		struct adxl34x_platform_data *pdata, int status, int press)
+{
+	int i;
+
+	for (i = ADXL_X_AXIS; i <= ADXL_Z_AXIS; i++) {
+		if (status & (1 << (ADXL_Z_AXIS - i)))
+			input_report_key(ac->input,
+					 pdata->ev_code_tap[i], press);
+	}
+}
+
+static void adxl34x_do_tap(struct adxl34x *ac,
+		struct adxl34x_platform_data *pdata, int status)
+{
+	adxl34x_send_key_events(ac, pdata, status, true);
+	input_sync(ac->input);
+	adxl34x_send_key_events(ac, pdata, status, false);
+}
+
+static irqreturn_t adxl34x_irq(int irq, void *handle)
+{
+	struct adxl34x *ac = handle;
+	struct adxl34x_platform_data *pdata = &ac->pdata;
+	int int_stat, tap_stat, samples;
+
+	/*
+	 * ACT_TAP_STATUS should be read before clearing the interrupt
+	 * Avoid reading ACT_TAP_STATUS in case TAP detection is disabled
+	 */
+
+	if (pdata->tap_axis_control & (TAP_X_EN | TAP_Y_EN | TAP_Z_EN))
+		tap_stat = AC_READ(ac, ACT_TAP_STATUS);
+	else
+		tap_stat = 0;
+
+	int_stat = AC_READ(ac, INT_SOURCE);
+
+	if (int_stat & FREE_FALL)
+		adxl34x_report_key_single(ac->input, pdata->ev_code_ff);
+
+	if (int_stat & OVERRUN)
+		dev_dbg(ac->dev, "OVERRUN\n");
+
+	if (int_stat & (SINGLE_TAP | DOUBLE_TAP)) {
+		adxl34x_do_tap(ac, pdata, tap_stat);
+
+		if (int_stat & DOUBLE_TAP)
+			adxl34x_do_tap(ac, pdata, tap_stat);
+	}
+
+	if (pdata->ev_code_act_inactivity) {
+		if (int_stat & ACTIVITY)
+			input_report_key(ac->input,
+					 pdata->ev_code_act_inactivity, 1);
+		if (int_stat & INACTIVITY)
+			input_report_key(ac->input,
+					 pdata->ev_code_act_inactivity, 0);
+	}
+
+	if (int_stat & (DATA_READY | WATERMARK)) {
+
+		if (pdata->fifo_mode)
+			samples = ENTRIES(AC_READ(ac, FIFO_STATUS)) + 1;
+		else
+			samples = 1;
+
+		for (; samples > 0; samples--) {
+			adxl34x_service_ev_fifo(ac);
+			/*
+			 * To ensure that the FIFO has
+			 * completely popped, there must be at least 5 us between
+			 * the end of reading the data registers, signified by the
+			 * transition to register 0x38 from 0x37 or the CS pin
+			 * going high, and the start of new reads of the FIFO or
+			 * reading the FIFO_STATUS register. For SPI operation at
+			 * 1.5 MHz or lower, the register addressing portion of the
+			 * transmission is sufficient delay to ensure the FIFO has
+			 * completely popped. It is necessary for SPI operation
+			 * greater than 1.5 MHz to de-assert the CS pin to ensure a
+			 * total of 5 us, which is at most 3.4 us at 5 MHz
+			 * operation.
+			 */
+			if (ac->fifo_delay && (samples > 1))
+				udelay(3);
+		}
+	}
+
+	input_sync(ac->input);
+
+	return IRQ_HANDLED;
+}
+
+static void __adxl34x_disable(struct adxl34x *ac)
+{
+	if (!ac->disabled && ac->opened) {
+		/*
+		 * A '0' places the ADXL34x into standby mode
+		 * with minimum power consumption.
+		 */
+		AC_WRITE(ac, POWER_CTL, 0);
+
+		ac->disabled = true;
+	}
+}
+
+static void __adxl34x_enable(struct adxl34x *ac)
+{
+	if (ac->disabled && ac->opened) {
+		AC_WRITE(ac, POWER_CTL, ac->pdata.power_mode | PCTL_MEASURE);
+		ac->disabled = false;
+	}
+}
+
+void adxl34x_disable(struct adxl34x *ac)
+{
+	mutex_lock(&ac->mutex);
+	__adxl34x_disable(ac);
+	mutex_unlock(&ac->mutex);
+}
+EXPORT_SYMBOL_GPL(adxl34x_disable);
+
+void adxl34x_enable(struct adxl34x *ac)
+{
+	mutex_lock(&ac->mutex);
+	__adxl34x_enable(ac);
+	mutex_unlock(&ac->mutex);
+}
+
+EXPORT_SYMBOL_GPL(adxl34x_enable);
+
+static ssize_t adxl34x_disable_show(struct device *dev,
+				    struct device_attribute *attr, char *buf)
+{
+	struct adxl34x *ac = dev_get_drvdata(dev);
+
+	return sprintf(buf, "%u\n", ac->disabled);
+}
+
+static ssize_t adxl34x_disable_store(struct device *dev,
+				     struct device_attribute *attr,
+				     const char *buf, size_t count)
+{
+	struct adxl34x *ac = dev_get_drvdata(dev);
+	unsigned long val;
+	int error;
+
+	error = strict_strtoul(buf, 10, &val);
+	if (error)
+		return error;
+
+	if (val)
+		adxl34x_disable(ac);
+	else
+		adxl34x_enable(ac);
+
+	return count;
+}
+
+static DEVICE_ATTR(disable, 0664, adxl34x_disable_show, adxl34x_disable_store);
+
+static ssize_t adxl34x_calibrate_show(struct device *dev,
+				      struct device_attribute *attr, char *buf)
+{
+	struct adxl34x *ac = dev_get_drvdata(dev);
+	ssize_t count;
+
+	mutex_lock(&ac->mutex);
+	count = sprintf(buf, "%d,%d,%d\n",
+			ac->hwcal.x * 4 + ac->swcal.x,
+			ac->hwcal.y * 4 + ac->swcal.y,
+			ac->hwcal.z * 4 + ac->swcal.z);
+	mutex_unlock(&ac->mutex);
+
+	return count;
+}
+
+static ssize_t adxl34x_calibrate_store(struct device *dev,
+				       struct device_attribute *attr,
+				       const char *buf, size_t count)
+{
+	struct adxl34x *ac = dev_get_drvdata(dev);
+
+	/*
+	 * Hardware offset calibration has a resolution of 15.6 mg/LSB.
+	 * We use HW calibration and handle the remaining bits in SW. (4mg/LSB)
+	 */
+
+	mutex_lock(&ac->mutex);
+	ac->hwcal.x -= (ac->saved.x / 4);
+	ac->swcal.x = ac->saved.x % 4;
+
+	ac->hwcal.y -= (ac->saved.y / 4);
+	ac->swcal.y = ac->saved.y % 4;
+
+	ac->hwcal.z -= (ac->saved.z / 4);
+	ac->swcal.z = ac->saved.z % 4;
+
+	AC_WRITE(ac, OFSX, (s8) ac->hwcal.x);
+	AC_WRITE(ac, OFSY, (s8) ac->hwcal.y);
+	AC_WRITE(ac, OFSZ, (s8) ac->hwcal.z);
+	mutex_unlock(&ac->mutex);
+
+	return count;
+}
+
+static DEVICE_ATTR(calibrate, 0664,
+		   adxl34x_calibrate_show, adxl34x_calibrate_store);
+
+static ssize_t adxl34x_rate_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	struct adxl34x *ac = dev_get_drvdata(dev);
+
+	return sprintf(buf, "%u\n", RATE(ac->pdata.data_rate));
+}
+
+static ssize_t adxl34x_rate_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t count)
+{
+	struct adxl34x *ac = dev_get_drvdata(dev);
+	unsigned long val;
+	int error;
+
+	error = strict_strtoul(buf, 10, &val);
+	if (error)
+		return error;
+
+	mutex_lock(&ac->mutex);
+
+	ac->pdata.data_rate = RATE(val);
+	AC_WRITE(ac, BW_RATE,
+		 ac->pdata.data_rate |
+			(ac->pdata.low_power_mode ? LOW_POWER : 0));
+
+	mutex_unlock(&ac->mutex);
+
+	return count;
+}
+
+static DEVICE_ATTR(rate, 0664, adxl34x_rate_show, adxl34x_rate_store);
+
+static ssize_t adxl34x_autosleep_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	struct adxl34x *ac = dev_get_drvdata(dev);
+
+	return sprintf(buf, "%u\n",
+		ac->pdata.power_mode & (PCTL_AUTO_SLEEP | PCTL_LINK) ? 1 : 0);
+}
+
+static ssize_t adxl34x_autosleep_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t count)
+{
+	struct adxl34x *ac = dev_get_drvdata(dev);
+	unsigned long val;
+	int error;
+
+	error = strict_strtoul(buf, 10, &val);
+	if (error)
+		return error;
+
+	mutex_lock(&ac->mutex);
+
+	if (val)
+		ac->pdata.power_mode |= (PCTL_AUTO_SLEEP | PCTL_LINK);
+	else
+		ac->pdata.power_mode &= ~(PCTL_AUTO_SLEEP | PCTL_LINK);
+
+	if (!ac->disabled && ac->opened)
+		AC_WRITE(ac, POWER_CTL, ac->pdata.power_mode | PCTL_MEASURE);
+
+	mutex_unlock(&ac->mutex);
+
+	return count;
+}
+
+static DEVICE_ATTR(autosleep, 0664,
+		   adxl34x_autosleep_show, adxl34x_autosleep_store);
+
+static ssize_t adxl34x_position_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	struct adxl34x *ac = dev_get_drvdata(dev);
+	ssize_t count;
+
+	mutex_lock(&ac->mutex);
+	count = sprintf(buf, "(%d, %d, %d)\n",
+			ac->saved.x, ac->saved.y, ac->saved.z);
+	mutex_unlock(&ac->mutex);
+
+	return count;
+}
+
+static DEVICE_ATTR(position, S_IRUGO, adxl34x_position_show, NULL);
+
+#ifdef ADXL_DEBUG
+static ssize_t adxl34x_write_store(struct device *dev,
+				   struct device_attribute *attr,
+				   const char *buf, size_t count)
+{
+	struct adxl34x *ac = dev_get_drvdata(dev);
+	unsigned long val;
+	int error;
+
+	/*
+	 * This allows basic ADXL register write access for debug purposes.
+	 */
+	error = strict_strtoul(buf, 16, &val);
+	if (error)
+		return error;
+
+	mutex_lock(&ac->mutex);
+	AC_WRITE(ac, val >> 8, val & 0xFF);
+	mutex_unlock(&ac->mutex);
+
+	return count;
+}
+
+static DEVICE_ATTR(write, 0664, NULL, adxl34x_write_store);
+#endif
+
+static struct attribute *adxl34x_attributes[] = {
+	&dev_attr_disable.attr,
+	&dev_attr_calibrate.attr,
+	&dev_attr_rate.attr,
+	&dev_attr_autosleep.attr,
+	&dev_attr_position.attr,
+#ifdef ADXL_DEBUG
+	&dev_attr_write.attr,
+#endif
+	NULL
+};
+
+static const struct attribute_group adxl34x_attr_group = {
+	.attrs = adxl34x_attributes,
+};
+
+static int adxl34x_input_open(struct input_dev *input)
+{
+	struct adxl34x *ac = input_get_drvdata(input);
+
+	mutex_lock(&ac->mutex);
+	ac->opened = true;
+	__adxl34x_enable(ac);
+	mutex_unlock(&ac->mutex);
+
+	return 0;
+}
+
+static void adxl34x_input_close(struct input_dev *input)
+{
+	struct adxl34x *ac = input_get_drvdata(input);
+
+	mutex_lock(&ac->mutex);
+	__adxl34x_disable(ac);
+	ac->opened = false;
+	mutex_unlock(&ac->mutex);
+}
+
+struct adxl34x *adxl34x_probe(struct device *dev, int irq,
+			      bool fifo_delay_default,
+			      const struct adxl34x_bus_ops *bops)
+{
+	struct adxl34x *ac;
+	struct input_dev *input_dev;
+	const struct adxl34x_platform_data *pdata;
+	int err, range;
+	unsigned char revid;
+
+	if (!irq) {
+		dev_err(dev, "no IRQ?\n");
+		err = -ENODEV;
+		goto err_out;
+	}
+
+	ac = kzalloc(sizeof(*ac), GFP_KERNEL);
+	input_dev = input_allocate_device();
+	if (!ac || !input_dev) {
+		err = -ENOMEM;
+		goto err_out;
+	}
+
+	ac->fifo_delay = fifo_delay_default;
+
+	pdata = dev->platform_data;
+	if (!pdata) {
+		dev_dbg(dev,
+			"No platfrom data: Using default initialization\n");
+		pdata = &adxl34x_default_init;
+	}
+
+	ac->pdata = *pdata;
+	pdata = &ac->pdata;
+
+	ac->input = input_dev;
+	ac->disabled = true;
+	ac->dev = dev;
+	ac->irq = irq;
+	ac->bops = bops;
+
+	mutex_init(&ac->mutex);
+
+	input_dev->name = "ADXL34x accelerometer";
+	revid = ac->bops->read(dev, DEVID);
+
+	switch (revid) {
+	case ID_ADXL345:
+		ac->model = 345;
+		break;
+	case ID_ADXL346:
+		ac->model = 346;
+		break;
+	default:
+		dev_err(dev, "Failed to probe %s\n", input_dev->name);
+		err = -ENODEV;
+		goto err_free_mem;
+	}
+
+	snprintf(ac->phys, sizeof(ac->phys), "%s/input0", dev_name(dev));
+
+	input_dev->phys = ac->phys;
+	input_dev->dev.parent = dev;
+	input_dev->id.product = ac->model;
+	input_dev->id.bustype = bops->bustype;
+	input_dev->open = adxl34x_input_open;
+	input_dev->close = adxl34x_input_close;
+
+	input_set_drvdata(input_dev, ac);
+
+	__set_bit(ac->pdata.ev_type, input_dev->evbit);
+
+	if (ac->pdata.ev_type == EV_REL) {
+		__set_bit(REL_X, input_dev->relbit);
+		__set_bit(REL_Y, input_dev->relbit);
+		__set_bit(REL_Z, input_dev->relbit);
+	} else {
+		/* EV_ABS */
+		__set_bit(ABS_X, input_dev->absbit);
+		__set_bit(ABS_Y, input_dev->absbit);
+		__set_bit(ABS_Z, input_dev->absbit);
+
+		if (pdata->data_range & FULL_RES)
+			range = ADXL_FULLRES_MAX_VAL;	/* Signed 13-bit */
+		else
+			range = ADXL_FIXEDRES_MAX_VAL;	/* Signed 10-bit */
+
+		input_set_abs_params(input_dev, ABS_X, -range, range, 3, 3);
+		input_set_abs_params(input_dev, ABS_Y, -range, range, 3, 3);
+		input_set_abs_params(input_dev, ABS_Z, -range, range, 3, 3);
+	}
+
+	__set_bit(EV_KEY, input_dev->evbit);
+	__set_bit(pdata->ev_code_tap[ADXL_X_AXIS], input_dev->keybit);
+	__set_bit(pdata->ev_code_tap[ADXL_Y_AXIS], input_dev->keybit);
+	__set_bit(pdata->ev_code_tap[ADXL_Z_AXIS], input_dev->keybit);
+
+	if (pdata->ev_code_ff) {
+		ac->int_mask = FREE_FALL;
+		__set_bit(pdata->ev_code_ff, input_dev->keybit);
+	}
+
+	if (pdata->ev_code_act_inactivity)
+		__set_bit(pdata->ev_code_act_inactivity, input_dev->keybit);
+
+	ac->int_mask |= ACTIVITY | INACTIVITY;
+
+	if (pdata->watermark) {
+		ac->int_mask |= WATERMARK;
+		if (!FIFO_MODE(pdata->fifo_mode))
+			ac->pdata.fifo_mode |= FIFO_STREAM;
+	} else {
+		ac->int_mask |= DATA_READY;
+	}
+
+	if (pdata->tap_axis_control & (TAP_X_EN | TAP_Y_EN | TAP_Z_EN))
+		ac->int_mask |= SINGLE_TAP | DOUBLE_TAP;
+
+	if (FIFO_MODE(pdata->fifo_mode) == FIFO_BYPASS)
+		ac->fifo_delay = false;
+
+	ac->bops->write(dev, POWER_CTL, 0);
+
+	err = request_threaded_irq(ac->irq, NULL, adxl34x_irq,
+				   IRQF_TRIGGER_HIGH | IRQF_ONESHOT,
+				   dev_name(dev), ac);
+	if (err) {
+		dev_err(dev, "irq %d busy?\n", ac->irq);
+		goto err_free_mem;
+	}
+
+	err = sysfs_create_group(&dev->kobj, &adxl34x_attr_group);
+	if (err)
+		goto err_free_irq;
+
+	err = input_register_device(input_dev);
+	if (err)
+		goto err_remove_attr;
+
+	AC_WRITE(ac, THRESH_TAP, pdata->tap_threshold);
+	AC_WRITE(ac, OFSX, pdata->x_axis_offset);
+	ac->hwcal.x = pdata->x_axis_offset;
+	AC_WRITE(ac, OFSY, pdata->y_axis_offset);
+	ac->hwcal.y = pdata->y_axis_offset;
+	AC_WRITE(ac, OFSZ, pdata->z_axis_offset);
+	ac->hwcal.z = pdata->z_axis_offset;
+	AC_WRITE(ac, THRESH_TAP, pdata->tap_threshold);
+	AC_WRITE(ac, DUR, pdata->tap_duration);
+	AC_WRITE(ac, LATENT, pdata->tap_latency);
+	AC_WRITE(ac, WINDOW, pdata->tap_window);
+	AC_WRITE(ac, THRESH_ACT, pdata->activity_threshold);
+	AC_WRITE(ac, THRESH_INACT, pdata->inactivity_threshold);
+	AC_WRITE(ac, TIME_INACT, pdata->inactivity_time);
+	AC_WRITE(ac, THRESH_FF, pdata->free_fall_threshold);
+	AC_WRITE(ac, TIME_FF, pdata->free_fall_time);
+	AC_WRITE(ac, TAP_AXES, pdata->tap_axis_control);
+	AC_WRITE(ac, ACT_INACT_CTL, pdata->act_axis_control);
+	AC_WRITE(ac, BW_RATE, RATE(ac->pdata.data_rate) |
+		 (pdata->low_power_mode ? LOW_POWER : 0));
+	AC_WRITE(ac, DATA_FORMAT, pdata->data_range);
+	AC_WRITE(ac, FIFO_CTL, FIFO_MODE(pdata->fifo_mode) |
+			SAMPLES(pdata->watermark));
+
+	if (pdata->use_int2)
+		/* Map all INTs to INT2 */
+		AC_WRITE(ac, INT_MAP, ac->int_mask | OVERRUN);
+	else
+		/* Map all INTs to INT1 */
+		AC_WRITE(ac, INT_MAP, 0);
+
+	AC_WRITE(ac, INT_ENABLE, ac->int_mask | OVERRUN);
+
+	ac->pdata.power_mode &= (PCTL_AUTO_SLEEP | PCTL_LINK);
+
+	return ac;
+
+ err_remove_attr:
+	sysfs_remove_group(&dev->kobj, &adxl34x_attr_group);
+ err_free_irq:
+	free_irq(ac->irq, ac);
+ err_free_mem:
+	input_free_device(input_dev);
+	kfree(ac);
+ err_out:
+	return ERR_PTR(err);
+}
+EXPORT_SYMBOL_GPL(adxl34x_probe);
+
+int adxl34x_remove(struct adxl34x *ac)
+{
+	adxl34x_disable(ac);
+	sysfs_remove_group(&ac->dev->kobj, &adxl34x_attr_group);
+	free_irq(ac->irq, ac);
+	input_unregister_device(ac->input);
+	kfree(ac);
+
+	dev_dbg(ac->dev, "unregistered accelerometer\n");
+	return 0;
+}
+EXPORT_SYMBOL_GPL(adxl34x_remove);
+
+MODULE_AUTHOR("Michael Hennerich <hennerich@blackfin.uclinux.org>");
+MODULE_DESCRIPTION("ADXL345/346 Three-Axis Digital Accelerometer Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/input/misc/adxl34x.h b/drivers/input/misc/adxl34x.h
new file mode 100644
index 00000000000..ea9093c15c8
--- /dev/null
+++ b/drivers/input/misc/adxl34x.h
@@ -0,0 +1,30 @@
+/*
+ * ADXL345/346 Three-Axis Digital Accelerometers (I2C/SPI Interface)
+ *
+ * Enter bugs at http://blackfin.uclinux.org/
+ *
+ * Copyright (C) 2009 Michael Hennerich, Analog Devices Inc.
+ * Licensed under the GPL-2 or later.
+ */
+
+#ifndef _ADXL34X_H_
+#define _ADXL34X_H_
+
+struct device;
+struct adxl34x;
+
+struct adxl34x_bus_ops {
+	u16 bustype;
+	int (*read)(struct device *, unsigned char);
+	int (*read_block)(struct device *, unsigned char, int, void *);
+	int (*write)(struct device *, unsigned char, unsigned char);
+};
+
+void adxl34x_disable(struct adxl34x *ac);
+void adxl34x_enable(struct adxl34x *ac);
+struct adxl34x *adxl34x_probe(struct device *dev, int irq,
+			      bool fifo_delay_default,
+			      const struct adxl34x_bus_ops *bops);
+int adxl34x_remove(struct adxl34x *ac);
+
+#endif
diff --git a/include/linux/input/adxl34x.h b/include/linux/input/adxl34x.h
new file mode 100644
index 00000000000..71211823803
--- /dev/null
+++ b/include/linux/input/adxl34x.h
@@ -0,0 +1,293 @@
+/*
+ * include/linux/input/adxl34x.h
+ *
+ * Digital Accelerometer characteristics are highly application specific
+ * and may vary between boards and models. The platform_data for the
+ * device's "struct device" holds this information.
+ *
+ * Copyright 2009 Analog Devices Inc.
+ *
+ * Licensed under the GPL-2 or later.
+ */
+
+#ifndef __LINUX_INPUT_ADXL34X_H__
+#define __LINUX_INPUT_ADXL34X_H__
+
+struct adxl34x_platform_data {
+
+	/*
+	 * X,Y,Z Axis Offset:
+	 * offer user offset adjustments in twoscompliment
+	 * form with a scale factor of 15.6 mg/LSB (i.e. 0x7F = +2 g)
+	 */
+
+	s8 x_axis_offset;
+	s8 y_axis_offset;
+	s8 z_axis_offset;
+
+	/*
+	 * TAP_X/Y/Z Enable: Setting TAP_X, Y, or Z Enable enables X,
+	 * Y, or Z participation in Tap detection. A '0' excludes the
+	 * selected axis from participation in Tap detection.
+	 * Setting the SUPPRESS bit suppresses Double Tap detection if
+	 * acceleration greater than tap_threshold is present between
+	 * taps.
+	 */
+
+#define ADXL_SUPPRESS	(1 << 3)
+#define ADXL_TAP_X_EN	(1 << 2)
+#define ADXL_TAP_Y_EN	(1 << 1)
+#define ADXL_TAP_Z_EN	(1 << 0)
+
+	u8 tap_axis_control;
+
+	/*
+	 * tap_threshold:
+	 * holds the threshold value for tap detection/interrupts.
+	 * The data format is unsigned. The scale factor is 62.5 mg/LSB
+	 * (i.e. 0xFF = +16 g). A zero value may result in undesirable
+	 * behavior if Tap/Double Tap is enabled.
+	 */
+
+	u8 tap_threshold;
+
+	/*
+	 * tap_duration:
+	 * is an unsigned time value representing the maximum
+	 * time that an event must be above the tap_threshold threshold
+	 * to qualify as a tap event. The scale factor is 625 us/LSB. A zero
+	 * value will prevent Tap/Double Tap functions from working.
+	 */
+
+	u8 tap_duration;
+
+	/*
+	 * tap_latency:
+	 * is an unsigned time value representing the wait time
+	 * from the detection of a tap event to the opening of the time
+	 * window tap_window for a possible second tap event. The scale
+	 * factor is 1.25 ms/LSB. A zero value will disable the Double Tap
+	 * function.
+	 */
+
+	u8 tap_latency;
+
+	/*
+	 * tap_window:
+	 * is an unsigned time value representing the amount
+	 * of time after the expiration of tap_latency during which a second
+	 * tap can begin. The scale factor is 1.25 ms/LSB. A zero value will
+	 * disable the Double Tap function.
+	 */
+
+	u8 tap_window;
+
+	/*
+	 * act_axis_control:
+	 * X/Y/Z Enable: A '1' enables X, Y, or Z participation in activity
+	 * or inactivity detection. A '0' excludes the selected axis from
+	 * participation. If all of the axes are excluded, the function is
+	 * disabled.
+	 * AC/DC: A '0' = DC coupled operation and a '1' = AC coupled
+	 * operation. In DC coupled operation, the current acceleration is
+	 * compared with activity_threshold and inactivity_threshold directly
+	 * to determine whether activity or inactivity is detected. In AC
+	 * coupled operation for activity detection, the acceleration value
+	 * at the start of activity detection is taken as a reference value.
+	 * New samples of acceleration are then compared to this
+	 * reference value and if the magnitude of the difference exceeds
+	 * activity_threshold the device will trigger an activity interrupt. In
+	 * AC coupled operation for inactivity detection, a reference value
+	 * is used again for comparison and is updated whenever the
+	 * device exceeds the inactivity threshold. Once the reference
+	 * value is selected, the device compares the magnitude of the
+	 * difference between the reference value and the current
+	 * acceleration with inactivity_threshold. If the difference is below
+	 * inactivity_threshold for a total of inactivity_time, the device is
+	 * considered inactive and the inactivity interrupt is triggered.
+	 */
+
+#define ADXL_ACT_ACDC		(1 << 7)
+#define ADXL_ACT_X_EN		(1 << 6)
+#define ADXL_ACT_Y_EN		(1 << 5)
+#define ADXL_ACT_Z_EN		(1 << 4)
+#define ADXL_INACT_ACDC		(1 << 3)
+#define ADXL_INACT_X_EN		(1 << 2)
+#define ADXL_INACT_Y_EN		(1 << 1)
+#define ADXL_INACT_Z_EN		(1 << 0)
+
+	u8 act_axis_control;
+
+	/*
+	 * activity_threshold:
+	 * holds the threshold value for activity detection.
+	 * The data format is unsigned. The scale factor is
+	 * 62.5 mg/LSB. A zero value may result in undesirable behavior if
+	 * Activity interrupt is enabled.
+	 */
+
+	u8 activity_threshold;
+
+	/*
+	 * inactivity_threshold:
+	 * holds the threshold value for inactivity
+	 * detection. The data format is unsigned. The scale
+	 * factor is 62.5 mg/LSB. A zero value may result in undesirable
+	 * behavior if Inactivity interrupt is enabled.
+	 */
+
+	u8 inactivity_threshold;
+
+	/*
+	 * inactivity_time:
+	 * is an unsigned time value representing the
+	 * amount of time that acceleration must be below the value in
+	 * inactivity_threshold for inactivity to be declared. The scale factor
+	 * is 1 second/LSB. Unlike the other interrupt functions, which
+	 * operate on unfiltered data, the inactivity function operates on the
+	 * filtered output data. At least one output sample must be
+	 * generated for the inactivity interrupt to be triggered. This will
+	 * result in the function appearing un-responsive if the
+	 * inactivity_time register is set with a value less than the time
+	 * constant of the Output Data Rate. A zero value will result in an
+	 * interrupt when the output data is below inactivity_threshold.
+	 */
+
+	u8 inactivity_time;
+
+	/*
+	 * free_fall_threshold:
+	 * holds the threshold value for Free-Fall detection.
+	 * The data format is unsigned. The root-sum-square(RSS) value
+	 * of all axes is calculated and compared to the value in
+	 * free_fall_threshold to determine if a free fall event may be
+	 * occurring.  The scale factor is 62.5 mg/LSB. A zero value may
+	 * result in undesirable behavior if Free-Fall interrupt is
+	 * enabled. Values between 300 and 600 mg (0x05 to 0x09) are
+	 * recommended.
+	 */
+
+	u8 free_fall_threshold;
+
+	/*
+	 * free_fall_time:
+	 * is an unsigned time value representing the minimum
+	 * time that the RSS value of all axes must be less than
+	 * free_fall_threshold to generate a Free-Fall interrupt. The
+	 * scale factor is 5 ms/LSB. A zero value may result in
+	 * undesirable behavior if Free-Fall interrupt is enabled.
+	 * Values between 100 to 350 ms (0x14 to 0x46) are recommended.
+	 */
+
+	u8 free_fall_time;
+
+	/*
+	 * data_rate:
+	 * Selects device bandwidth and output data rate.
+	 * RATE = 3200 Hz / (2^(15 - x)). Default value is 0x0A, or 100 Hz
+	 * Output Data Rate. An Output Data Rate should be selected that
+	 * is appropriate for the communication protocol and frequency
+	 * selected. Selecting too high of an Output Data Rate with a low
+	 * communication speed will result in samples being discarded.
+	 */
+
+	u8 data_rate;
+
+	/*
+	 * data_range:
+	 * FULL_RES: When this bit is set with the device is
+	 * in Full-Resolution Mode, where the output resolution increases
+	 * with RANGE to maintain a 4 mg/LSB scale factor. When this
+	 * bit is cleared the device is in 10-bit Mode and RANGE determine the
+	 * maximum g-Range and scale factor.
+	 */
+
+#define ADXL_FULL_RES		(1 << 3)
+#define ADXL_RANGE_PM_2g	0
+#define ADXL_RANGE_PM_4g	1
+#define ADXL_RANGE_PM_8g	2
+#define ADXL_RANGE_PM_16g	3
+
+	u8 data_range;
+
+	/*
+	 * low_power_mode:
+	 * A '0' = Normal operation and a '1' = Reduced
+	 * power operation with somewhat higher noise.
+	 */
+
+	u8 low_power_mode;
+
+	/*
+	 * power_mode:
+	 * LINK: A '1' with both the activity and inactivity functions
+	 * enabled will delay the start of the activity function until
+	 * inactivity is detected. Once activity is detected, inactivity
+	 * detection will begin and prevent the detection of activity. This
+	 * bit serially links the activity and inactivity functions. When '0'
+	 * the inactivity and activity functions are concurrent. Additional
+	 * information can be found in the Application section under Link
+	 * Mode.
+	 * AUTO_SLEEP: A '1' sets the ADXL34x to switch to Sleep Mode
+	 * when inactivity (acceleration has been below inactivity_threshold
+	 * for at least inactivity_time) is detected and the LINK bit is set.
+	 * A '0' disables automatic switching to Sleep Mode. See SLEEP
+	 * for further description.
+	 */
+
+#define ADXL_LINK	(1 << 5)
+#define ADXL_AUTO_SLEEP	(1 << 4)
+
+	u8 power_mode;
+
+	/*
+	 * fifo_mode:
+	 * BYPASS The FIFO is bypassed
+	 * FIFO   FIFO collects up to 32 values then stops collecting data
+	 * STREAM FIFO holds the last 32 data values. Once full, the FIFO's
+	 *        oldest data is lost as it is replaced with newer data
+	 *
+	 * DEFAULT should be ADXL_FIFO_STREAM
+	 */
+
+#define ADXL_FIFO_BYPASS	0
+#define ADXL_FIFO_FIFO		1
+#define ADXL_FIFO_STREAM	2
+
+	u8 fifo_mode;
+
+	/*
+	 * watermark:
+	 * The Watermark feature can be used to reduce the interrupt load
+	 * of the system. The FIFO fills up to the value stored in watermark
+	 * [1..32] and then generates an interrupt.
+	 * A '0' disables the watermark feature.
+	 */
+
+	u8 watermark;
+
+	u32 ev_type;	/* EV_ABS or EV_REL */
+
+	u32 ev_code_x;	/* ABS_X,Y,Z or REL_X,Y,Z */
+	u32 ev_code_y;	/* ABS_X,Y,Z or REL_X,Y,Z */
+	u32 ev_code_z;	/* ABS_X,Y,Z or REL_X,Y,Z */
+
+	/*
+	 * A valid BTN or KEY Code; use tap_axis_control to disable
+	 * event reporting
+	 */
+
+	u32 ev_code_tap[3];	/* EV_KEY {X-Axis, Y-Axis, Z-Axis} */
+
+	/*
+	 * A valid BTN or KEY Code for Free-Fall or Activity enables
+	 * input event reporting. A '0' disables the Free-Fall or
+	 * Activity reporting.
+	 */
+
+	u32 ev_code_ff;	/* EV_KEY */
+	u32 ev_code_act_inactivity;	/* EV_KEY */
+
+	u8 use_int2;
+};
+#endif
-- 
cgit v1.2.3-70-g09d2


From 671386bb23c57e5448f386a41101ed65ad1d488c Mon Sep 17 00:00:00 2001
From: Michael Hennerich <michael.hennerich@analog.com>
Date: Fri, 25 Jun 2010 08:44:10 -0700
Subject: Input: adxl34x - add support for ADXL346 orientation sensing

Signed-off-by: Michael Hennerich <michael.hennerich@analog.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/misc/adxl34x.c  | 62 ++++++++++++++++++++++++++++++++++++++++---
 include/linux/input/adxl34x.h | 56 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 114 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/input/misc/adxl34x.c b/drivers/input/misc/adxl34x.c
index 07f9ef63154..77fb4098705 100644
--- a/drivers/input/misc/adxl34x.c
+++ b/drivers/input/misc/adxl34x.c
@@ -196,6 +196,8 @@ struct adxl34x {
 	struct axis_triple hwcal;
 	struct axis_triple saved;
 	char phys[32];
+	unsigned orient2d_saved;
+	unsigned orient3d_saved;
 	bool disabled;	/* P: mutex */
 	bool opened;	/* P: mutex */
 	bool fifo_delay;
@@ -296,7 +298,7 @@ static irqreturn_t adxl34x_irq(int irq, void *handle)
 {
 	struct adxl34x *ac = handle;
 	struct adxl34x_platform_data *pdata = &ac->pdata;
-	int int_stat, tap_stat, samples;
+	int int_stat, tap_stat, samples, orient, orient_code;
 
 	/*
 	 * ACT_TAP_STATUS should be read before clearing the interrupt
@@ -332,6 +334,36 @@ static irqreturn_t adxl34x_irq(int irq, void *handle)
 					 pdata->ev_code_act_inactivity, 0);
 	}
 
+	/*
+	 * ORIENTATION SENSING ADXL346 only
+	 */
+	if (pdata->orientation_enable) {
+		orient = AC_READ(ac, ORIENT);
+		if ((pdata->orientation_enable & ADXL_EN_ORIENTATION_2D) &&
+		    (orient & ADXL346_2D_VALID)) {
+
+			orient_code = ADXL346_2D_ORIENT(orient);
+			/* Report orientation only when it changes */
+			if (ac->orient2d_saved != orient_code) {
+				ac->orient2d_saved = orient_code;
+				adxl34x_report_key_single(ac->input,
+					pdata->ev_codes_orient_2d[orient_code]);
+			}
+		}
+
+		if ((pdata->orientation_enable & ADXL_EN_ORIENTATION_3D) &&
+		    (orient & ADXL346_3D_VALID)) {
+
+			orient_code = ADXL346_3D_ORIENT(orient) - 1;
+			/* Report orientation only when it changes */
+			if (ac->orient3d_saved != orient_code) {
+				ac->orient3d_saved = orient_code;
+				adxl34x_report_key_single(ac->input,
+					pdata->ev_codes_orient_3d[orient_code]);
+			}
+		}
+	}
+
 	if (int_stat & (DATA_READY | WATERMARK)) {
 
 		if (pdata->fifo_mode)
@@ -641,7 +673,7 @@ struct adxl34x *adxl34x_probe(struct device *dev, int irq,
 	struct adxl34x *ac;
 	struct input_dev *input_dev;
 	const struct adxl34x_platform_data *pdata;
-	int err, range;
+	int err, range, i;
 	unsigned char revid;
 
 	if (!irq) {
@@ -797,12 +829,34 @@ struct adxl34x *adxl34x_probe(struct device *dev, int irq,
 	AC_WRITE(ac, FIFO_CTL, FIFO_MODE(pdata->fifo_mode) |
 			SAMPLES(pdata->watermark));
 
-	if (pdata->use_int2)
+	if (pdata->use_int2) {
 		/* Map all INTs to INT2 */
 		AC_WRITE(ac, INT_MAP, ac->int_mask | OVERRUN);
-	else
+	} else {
 		/* Map all INTs to INT1 */
 		AC_WRITE(ac, INT_MAP, 0);
+	}
+
+	if (ac->model == 346 && ac->pdata.orientation_enable) {
+		AC_WRITE(ac, ORIENT_CONF,
+			ORIENT_DEADZONE(ac->pdata.deadzone_angle) |
+			ORIENT_DIVISOR(ac->pdata.divisor_length));
+
+		ac->orient2d_saved = 1234;
+		ac->orient3d_saved = 1234;
+
+		if (pdata->orientation_enable & ADXL_EN_ORIENTATION_3D)
+			for (i = 0; i < ARRAY_SIZE(pdata->ev_codes_orient_3d); i++)
+				__set_bit(pdata->ev_codes_orient_3d[i],
+					  input_dev->keybit);
+
+		if (pdata->orientation_enable & ADXL_EN_ORIENTATION_2D)
+			for (i = 0; i < ARRAY_SIZE(pdata->ev_codes_orient_2d); i++)
+				__set_bit(pdata->ev_codes_orient_2d[i],
+					  input_dev->keybit);
+	} else {
+		ac->pdata.orientation_enable = 0;
+	}
 
 	AC_WRITE(ac, INT_ENABLE, ac->int_mask | OVERRUN);
 
diff --git a/include/linux/input/adxl34x.h b/include/linux/input/adxl34x.h
index 71211823803..df00d998a44 100644
--- a/include/linux/input/adxl34x.h
+++ b/include/linux/input/adxl34x.h
@@ -288,6 +288,62 @@ struct adxl34x_platform_data {
 	u32 ev_code_ff;	/* EV_KEY */
 	u32 ev_code_act_inactivity;	/* EV_KEY */
 
+	/*
+	 * Use ADXL34x INT2 instead of INT1
+	 */
 	u8 use_int2;
+
+	/*
+	 * ADXL346 only ORIENTATION SENSING feature
+	 * The orientation function of the ADXL346 reports both 2-D and
+	 * 3-D orientation concurrently.
+	 */
+
+#define ADXL_EN_ORIENTATION_2D		1
+#define ADXL_EN_ORIENTATION_3D		2
+#define ADXL_EN_ORIENTATION_2D_3D	3
+
+	u8 orientation_enable;
+
+	/*
+	 * The width of the deadzone region between two or more
+	 * orientation positions is determined by setting the Deadzone
+	 * value. The deadzone region size can be specified with a
+	 * resolution of 3.6deg. The deadzone angle represents the total
+	 * angle where the orientation is considered invalid.
+	 */
+
+#define ADXL_DEADZONE_ANGLE_0p0		0	/* !!!0.0 [deg] */
+#define ADXL_DEADZONE_ANGLE_3p6		1	/* 3.6 [deg] */
+#define ADXL_DEADZONE_ANGLE_7p2		2	/* 7.2 [deg] */
+#define ADXL_DEADZONE_ANGLE_10p8	3	/* 10.8 [deg] */
+#define ADXL_DEADZONE_ANGLE_14p4	4	/* 14.4 [deg] */
+#define ADXL_DEADZONE_ANGLE_18p0	5	/* 18.0 [deg] */
+#define ADXL_DEADZONE_ANGLE_21p6	6	/* 21.6 [deg] */
+#define ADXL_DEADZONE_ANGLE_25p2	7	/* 25.2 [deg] */
+
+	u8 deadzone_angle;
+
+	/*
+	 * To eliminate most human motion such as walking or shaking,
+	 * a Divisor value should be selected to effectively limit the
+	 * orientation bandwidth. Set the depth of the filter used to
+	 * low-pass filter the measured acceleration for stable
+	 * orientation sensing
+	 */
+
+#define ADXL_LP_FILTER_DIVISOR_2	0
+#define ADXL_LP_FILTER_DIVISOR_4	1
+#define ADXL_LP_FILTER_DIVISOR_8	2
+#define ADXL_LP_FILTER_DIVISOR_16	3
+#define ADXL_LP_FILTER_DIVISOR_32	4
+#define ADXL_LP_FILTER_DIVISOR_64	5
+#define ADXL_LP_FILTER_DIVISOR_128	6
+#define ADXL_LP_FILTER_DIVISOR_256	7
+
+	u8 divisor_length;
+
+	u32 ev_codes_orient_2d[4];	/* EV_KEY {+X, -X, +Y, -Y} */
+	u32 ev_codes_orient_3d[6];	/* EV_KEY {+Z, +Y, +X, -X, -Y, -Z} */
 };
 #endif
-- 
cgit v1.2.3-70-g09d2


From b51cae21ee66f77a368428e6bdf75a0c012c9fd7 Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@tilera.com>
Date: Fri, 25 Jun 2010 14:54:16 -0400
Subject: Add wait4() back to the set of <asm-generic/unistd.h> syscalls.

The initial pass at the generic ABI assumed that wait4() could be
easily expressed using waitid().  Although it's true that wait4()
can be built on waitid(), it's awkward enough that it makes more
sense to continue to include wait4 in the generic syscall ABI.

Since there is already a deprecated wait4 in the ABI, this change
converts that wait4 into old_wait, and puts wait4 in the next
available slot for new supported syscalls, after the platform-specific
syscalls at number 260.

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
---
 include/asm-generic/unistd.h | 9 ++++++---
 scripts/checksyscalls.sh     | 1 -
 2 files changed, 6 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/unistd.h b/include/asm-generic/unistd.h
index 30218b4fa4e..c17cebc4995 100644
--- a/include/asm-generic/unistd.h
+++ b/include/asm-generic/unistd.h
@@ -638,8 +638,11 @@ __SYSCALL(__NR_recvmmsg, sys_recvmmsg)
  */
 #define __NR_arch_specific_syscall 244
 
+#define __NR_wait4 260
+__SYSCALL(__NR_wait4, sys_wait4)
+
 #undef __NR_syscalls
-#define __NR_syscalls 260
+#define __NR_syscalls 261
 
 /*
  * All syscalls below here should go away really,
@@ -776,8 +779,8 @@ __SYSCALL(__NR_epoll_wait, sys_epoll_wait)
 __SYSCALL(__NR_ustat, sys_ustat)
 #define __NR_vfork 1071
 __SYSCALL(__NR_vfork, sys_vfork)
-#define __NR_wait4 1072
-__SYSCALL(__NR_wait4, sys_wait4)
+#define __NR_oldwait4 1072
+__SYSCALL(__NR_oldwait4, sys_wait4)
 #define __NR_recv 1073
 __SYSCALL(__NR_recv, sys_recv)
 #define __NR_send 1074
diff --git a/scripts/checksyscalls.sh b/scripts/checksyscalls.sh
index 66ad375612f..6bb42e72e0e 100755
--- a/scripts/checksyscalls.sh
+++ b/scripts/checksyscalls.sh
@@ -183,7 +183,6 @@ cat << EOF
 #define __IGNORE_ustat		/* statfs */
 #define __IGNORE_utime		/* utimes */
 #define __IGNORE_vfork		/* clone */
-#define __IGNORE_wait4		/* waitid */
 
 /* sync_file_range had a stupid ABI. Allow sync_file_range2 instead */
 #ifdef __NR_sync_file_range2
-- 
cgit v1.2.3-70-g09d2


From 01f2f3f6ef4d076c0c10a8a7b42624416d56b523 Mon Sep 17 00:00:00 2001
From: Hagen Paul Pfeifer <hagen@jauu.net>
Date: Sat, 19 Jun 2010 17:05:36 +0000
Subject: net: optimize Berkeley Packet Filter (BPF) processing

Gcc is currenlty not in the ability to optimize the switch statement in
sk_run_filter() because of dense case labels. This patch replace the
OR'd labels with ordered sequenced case labels. The sk_chk_filter()
function is modified to patch/replace the original OPCODES in a
ordered but equivalent form. gcc is now in the ability to transform the
switch statement in sk_run_filter into a jump table of complexity O(1).

Until this patch gcc generates a sequence of conditional branches (O(n) of 567
byte .text segment size (arch x86_64):

7ff: 8b 06                 mov    (%rsi),%eax
801: 66 83 f8 35           cmp    $0x35,%ax
805: 0f 84 d0 02 00 00     je     adb <sk_run_filter+0x31d>
80b: 0f 87 07 01 00 00     ja     918 <sk_run_filter+0x15a>
811: 66 83 f8 15           cmp    $0x15,%ax
815: 0f 84 c5 02 00 00     je     ae0 <sk_run_filter+0x322>
81b: 77 73                 ja     890 <sk_run_filter+0xd2>
81d: 66 83 f8 04           cmp    $0x4,%ax
821: 0f 84 17 02 00 00     je     a3e <sk_run_filter+0x280>
827: 77 29                 ja     852 <sk_run_filter+0x94>
829: 66 83 f8 01           cmp    $0x1,%ax
[...]

With the modification the compiler translate the switch statement into
the following jump table fragment:

7ff: 66 83 3e 2c           cmpw   $0x2c,(%rsi)
803: 0f 87 1f 02 00 00     ja     a28 <sk_run_filter+0x26a>
809: 0f b7 06              movzwl (%rsi),%eax
80c: ff 24 c5 00 00 00 00  jmpq   *0x0(,%rax,8)
813: 44 89 e3              mov    %r12d,%ebx
816: e9 43 03 00 00        jmpq   b5e <sk_run_filter+0x3a0>
81b: 41 89 dc              mov    %ebx,%r12d
81e: e9 3b 03 00 00        jmpq   b5e <sk_run_filter+0x3a0>

Furthermore, I reordered the instructions to reduce cache line misses by
order the most common instruction to the start.

Signed-off-by: Hagen Paul Pfeifer <hagen@jauu.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/filter.h |  48 +++++++++++
 net/core/filter.c      | 212 +++++++++++++++++++++++++++++++++++++------------
 2 files changed, 209 insertions(+), 51 deletions(-)

(limited to 'include')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 151f5d703b7..69b43dbea6c 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -91,6 +91,54 @@ struct sock_fprog {	/* Required for SO_ATTACH_FILTER. */
 #define         BPF_TAX         0x00
 #define         BPF_TXA         0x80
 
+enum {
+	BPF_S_RET_K = 0,
+	BPF_S_RET_A,
+	BPF_S_ALU_ADD_K,
+	BPF_S_ALU_ADD_X,
+	BPF_S_ALU_SUB_K,
+	BPF_S_ALU_SUB_X,
+	BPF_S_ALU_MUL_K,
+	BPF_S_ALU_MUL_X,
+	BPF_S_ALU_DIV_X,
+	BPF_S_ALU_AND_K,
+	BPF_S_ALU_AND_X,
+	BPF_S_ALU_OR_K,
+	BPF_S_ALU_OR_X,
+	BPF_S_ALU_LSH_K,
+	BPF_S_ALU_LSH_X,
+	BPF_S_ALU_RSH_K,
+	BPF_S_ALU_RSH_X,
+	BPF_S_ALU_NEG,
+	BPF_S_LD_W_ABS,
+	BPF_S_LD_H_ABS,
+	BPF_S_LD_B_ABS,
+	BPF_S_LD_W_LEN,
+	BPF_S_LD_W_IND,
+	BPF_S_LD_H_IND,
+	BPF_S_LD_B_IND,
+	BPF_S_LD_IMM,
+	BPF_S_LDX_W_LEN,
+	BPF_S_LDX_B_MSH,
+	BPF_S_LDX_IMM,
+	BPF_S_MISC_TAX,
+	BPF_S_MISC_TXA,
+	BPF_S_ALU_DIV_K,
+	BPF_S_LD_MEM,
+	BPF_S_LDX_MEM,
+	BPF_S_ST,
+	BPF_S_STX,
+	BPF_S_JMP_JA,
+	BPF_S_JMP_JEQ_K,
+	BPF_S_JMP_JEQ_X,
+	BPF_S_JMP_JGE_K,
+	BPF_S_JMP_JGE_X,
+	BPF_S_JMP_JGT_K,
+	BPF_S_JMP_JGT_X,
+	BPF_S_JMP_JSET_K,
+	BPF_S_JMP_JSET_X,
+};
+
 #ifndef BPF_MAXINSNS
 #define BPF_MAXINSNS 4096
 #endif
diff --git a/net/core/filter.c b/net/core/filter.c
index da69fb728d3..52b051f82a0 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -128,87 +128,87 @@ unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int
 		fentry = &filter[pc];
 
 		switch (fentry->code) {
-		case BPF_ALU|BPF_ADD|BPF_X:
+		case BPF_S_ALU_ADD_X:
 			A += X;
 			continue;
-		case BPF_ALU|BPF_ADD|BPF_K:
+		case BPF_S_ALU_ADD_K:
 			A += fentry->k;
 			continue;
-		case BPF_ALU|BPF_SUB|BPF_X:
+		case BPF_S_ALU_SUB_X:
 			A -= X;
 			continue;
-		case BPF_ALU|BPF_SUB|BPF_K:
+		case BPF_S_ALU_SUB_K:
 			A -= fentry->k;
 			continue;
-		case BPF_ALU|BPF_MUL|BPF_X:
+		case BPF_S_ALU_MUL_X:
 			A *= X;
 			continue;
-		case BPF_ALU|BPF_MUL|BPF_K:
+		case BPF_S_ALU_MUL_K:
 			A *= fentry->k;
 			continue;
-		case BPF_ALU|BPF_DIV|BPF_X:
+		case BPF_S_ALU_DIV_X:
 			if (X == 0)
 				return 0;
 			A /= X;
 			continue;
-		case BPF_ALU|BPF_DIV|BPF_K:
+		case BPF_S_ALU_DIV_K:
 			A /= fentry->k;
 			continue;
-		case BPF_ALU|BPF_AND|BPF_X:
+		case BPF_S_ALU_AND_X:
 			A &= X;
 			continue;
-		case BPF_ALU|BPF_AND|BPF_K:
+		case BPF_S_ALU_AND_K:
 			A &= fentry->k;
 			continue;
-		case BPF_ALU|BPF_OR|BPF_X:
+		case BPF_S_ALU_OR_X:
 			A |= X;
 			continue;
-		case BPF_ALU|BPF_OR|BPF_K:
+		case BPF_S_ALU_OR_K:
 			A |= fentry->k;
 			continue;
-		case BPF_ALU|BPF_LSH|BPF_X:
+		case BPF_S_ALU_LSH_X:
 			A <<= X;
 			continue;
-		case BPF_ALU|BPF_LSH|BPF_K:
+		case BPF_S_ALU_LSH_K:
 			A <<= fentry->k;
 			continue;
-		case BPF_ALU|BPF_RSH|BPF_X:
+		case BPF_S_ALU_RSH_X:
 			A >>= X;
 			continue;
-		case BPF_ALU|BPF_RSH|BPF_K:
+		case BPF_S_ALU_RSH_K:
 			A >>= fentry->k;
 			continue;
-		case BPF_ALU|BPF_NEG:
+		case BPF_S_ALU_NEG:
 			A = -A;
 			continue;
-		case BPF_JMP|BPF_JA:
+		case BPF_S_JMP_JA:
 			pc += fentry->k;
 			continue;
-		case BPF_JMP|BPF_JGT|BPF_K:
+		case BPF_S_JMP_JGT_K:
 			pc += (A > fentry->k) ? fentry->jt : fentry->jf;
 			continue;
-		case BPF_JMP|BPF_JGE|BPF_K:
+		case BPF_S_JMP_JGE_K:
 			pc += (A >= fentry->k) ? fentry->jt : fentry->jf;
 			continue;
-		case BPF_JMP|BPF_JEQ|BPF_K:
+		case BPF_S_JMP_JEQ_K:
 			pc += (A == fentry->k) ? fentry->jt : fentry->jf;
 			continue;
-		case BPF_JMP|BPF_JSET|BPF_K:
+		case BPF_S_JMP_JSET_K:
 			pc += (A & fentry->k) ? fentry->jt : fentry->jf;
 			continue;
-		case BPF_JMP|BPF_JGT|BPF_X:
+		case BPF_S_JMP_JGT_X:
 			pc += (A > X) ? fentry->jt : fentry->jf;
 			continue;
-		case BPF_JMP|BPF_JGE|BPF_X:
+		case BPF_S_JMP_JGE_X:
 			pc += (A >= X) ? fentry->jt : fentry->jf;
 			continue;
-		case BPF_JMP|BPF_JEQ|BPF_X:
+		case BPF_S_JMP_JEQ_X:
 			pc += (A == X) ? fentry->jt : fentry->jf;
 			continue;
-		case BPF_JMP|BPF_JSET|BPF_X:
+		case BPF_S_JMP_JSET_X:
 			pc += (A & X) ? fentry->jt : fentry->jf;
 			continue;
-		case BPF_LD|BPF_W|BPF_ABS:
+		case BPF_S_LD_W_ABS:
 			k = fentry->k;
 load_w:
 			ptr = load_pointer(skb, k, 4, &tmp);
@@ -217,7 +217,7 @@ load_w:
 				continue;
 			}
 			break;
-		case BPF_LD|BPF_H|BPF_ABS:
+		case BPF_S_LD_H_ABS:
 			k = fentry->k;
 load_h:
 			ptr = load_pointer(skb, k, 2, &tmp);
@@ -226,7 +226,7 @@ load_h:
 				continue;
 			}
 			break;
-		case BPF_LD|BPF_B|BPF_ABS:
+		case BPF_S_LD_B_ABS:
 			k = fentry->k;
 load_b:
 			ptr = load_pointer(skb, k, 1, &tmp);
@@ -235,54 +235,54 @@ load_b:
 				continue;
 			}
 			break;
-		case BPF_LD|BPF_W|BPF_LEN:
+		case BPF_S_LD_W_LEN:
 			A = skb->len;
 			continue;
-		case BPF_LDX|BPF_W|BPF_LEN:
+		case BPF_S_LDX_W_LEN:
 			X = skb->len;
 			continue;
-		case BPF_LD|BPF_W|BPF_IND:
+		case BPF_S_LD_W_IND:
 			k = X + fentry->k;
 			goto load_w;
-		case BPF_LD|BPF_H|BPF_IND:
+		case BPF_S_LD_H_IND:
 			k = X + fentry->k;
 			goto load_h;
-		case BPF_LD|BPF_B|BPF_IND:
+		case BPF_S_LD_B_IND:
 			k = X + fentry->k;
 			goto load_b;
-		case BPF_LDX|BPF_B|BPF_MSH:
+		case BPF_S_LDX_B_MSH:
 			ptr = load_pointer(skb, fentry->k, 1, &tmp);
 			if (ptr != NULL) {
 				X = (*(u8 *)ptr & 0xf) << 2;
 				continue;
 			}
 			return 0;
-		case BPF_LD|BPF_IMM:
+		case BPF_S_LD_IMM:
 			A = fentry->k;
 			continue;
-		case BPF_LDX|BPF_IMM:
+		case BPF_S_LDX_IMM:
 			X = fentry->k;
 			continue;
-		case BPF_LD|BPF_MEM:
+		case BPF_S_LD_MEM:
 			A = mem[fentry->k];
 			continue;
-		case BPF_LDX|BPF_MEM:
+		case BPF_S_LDX_MEM:
 			X = mem[fentry->k];
 			continue;
-		case BPF_MISC|BPF_TAX:
+		case BPF_S_MISC_TAX:
 			X = A;
 			continue;
-		case BPF_MISC|BPF_TXA:
+		case BPF_S_MISC_TXA:
 			A = X;
 			continue;
-		case BPF_RET|BPF_K:
+		case BPF_S_RET_K:
 			return fentry->k;
-		case BPF_RET|BPF_A:
+		case BPF_S_RET_A:
 			return A;
-		case BPF_ST:
+		case BPF_S_ST:
 			mem[fentry->k] = A;
 			continue;
-		case BPF_STX:
+		case BPF_S_STX:
 			mem[fentry->k] = X;
 			continue;
 		default:
@@ -390,53 +390,128 @@ int sk_chk_filter(struct sock_filter *filter, int flen)
 		/* Only allow valid instructions */
 		switch (ftest->code) {
 		case BPF_ALU|BPF_ADD|BPF_K:
+			ftest->code = BPF_S_ALU_ADD_K;
+			break;
 		case BPF_ALU|BPF_ADD|BPF_X:
+			ftest->code = BPF_S_ALU_ADD_X;
+			break;
 		case BPF_ALU|BPF_SUB|BPF_K:
+			ftest->code = BPF_S_ALU_SUB_K;
+			break;
 		case BPF_ALU|BPF_SUB|BPF_X:
+			ftest->code = BPF_S_ALU_SUB_X;
+			break;
 		case BPF_ALU|BPF_MUL|BPF_K:
+			ftest->code = BPF_S_ALU_MUL_K;
+			break;
 		case BPF_ALU|BPF_MUL|BPF_X:
+			ftest->code = BPF_S_ALU_MUL_X;
+			break;
 		case BPF_ALU|BPF_DIV|BPF_X:
+			ftest->code = BPF_S_ALU_DIV_X;
+			break;
 		case BPF_ALU|BPF_AND|BPF_K:
+			ftest->code = BPF_S_ALU_AND_K;
+			break;
 		case BPF_ALU|BPF_AND|BPF_X:
+			ftest->code = BPF_S_ALU_AND_X;
+			break;
 		case BPF_ALU|BPF_OR|BPF_K:
+			ftest->code = BPF_S_ALU_OR_K;
+			break;
 		case BPF_ALU|BPF_OR|BPF_X:
+			ftest->code = BPF_S_ALU_OR_X;
+			break;
 		case BPF_ALU|BPF_LSH|BPF_K:
+			ftest->code = BPF_S_ALU_LSH_K;
+			break;
 		case BPF_ALU|BPF_LSH|BPF_X:
+			ftest->code = BPF_S_ALU_LSH_X;
+			break;
 		case BPF_ALU|BPF_RSH|BPF_K:
+			ftest->code = BPF_S_ALU_RSH_K;
+			break;
 		case BPF_ALU|BPF_RSH|BPF_X:
+			ftest->code = BPF_S_ALU_RSH_X;
+			break;
 		case BPF_ALU|BPF_NEG:
+			ftest->code = BPF_S_ALU_NEG;
+			break;
 		case BPF_LD|BPF_W|BPF_ABS:
+			ftest->code = BPF_S_LD_W_ABS;
+			break;
 		case BPF_LD|BPF_H|BPF_ABS:
+			ftest->code = BPF_S_LD_H_ABS;
+			break;
 		case BPF_LD|BPF_B|BPF_ABS:
+			ftest->code = BPF_S_LD_B_ABS;
+			break;
 		case BPF_LD|BPF_W|BPF_LEN:
+			ftest->code = BPF_S_LD_W_LEN;
+			break;
 		case BPF_LD|BPF_W|BPF_IND:
+			ftest->code = BPF_S_LD_W_IND;
+			break;
 		case BPF_LD|BPF_H|BPF_IND:
+			ftest->code = BPF_S_LD_H_IND;
+			break;
 		case BPF_LD|BPF_B|BPF_IND:
+			ftest->code = BPF_S_LD_B_IND;
+			break;
 		case BPF_LD|BPF_IMM:
+			ftest->code = BPF_S_LD_IMM;
+			break;
 		case BPF_LDX|BPF_W|BPF_LEN:
+			ftest->code = BPF_S_LDX_W_LEN;
+			break;
 		case BPF_LDX|BPF_B|BPF_MSH:
+			ftest->code = BPF_S_LDX_B_MSH;
+			break;
 		case BPF_LDX|BPF_IMM:
+			ftest->code = BPF_S_LDX_IMM;
+			break;
 		case BPF_MISC|BPF_TAX:
+			ftest->code = BPF_S_MISC_TAX;
+			break;
 		case BPF_MISC|BPF_TXA:
+			ftest->code = BPF_S_MISC_TXA;
+			break;
 		case BPF_RET|BPF_K:
+			ftest->code = BPF_S_RET_K;
+			break;
 		case BPF_RET|BPF_A:
+			ftest->code = BPF_S_RET_A;
 			break;
 
 		/* Some instructions need special checks */
 
-		case BPF_ALU|BPF_DIV|BPF_K:
 			/* check for division by zero */
+		case BPF_ALU|BPF_DIV|BPF_K:
 			if (ftest->k == 0)
 				return -EINVAL;
+			ftest->code = BPF_S_ALU_DIV_K;
 			break;
 
+		/* check for invalid memory addresses */
 		case BPF_LD|BPF_MEM:
+			if (ftest->k >= BPF_MEMWORDS)
+				return -EINVAL;
+			ftest->code = BPF_S_LD_MEM;
+			break;
 		case BPF_LDX|BPF_MEM:
+			if (ftest->k >= BPF_MEMWORDS)
+				return -EINVAL;
+			ftest->code = BPF_S_LDX_MEM;
+			break;
 		case BPF_ST:
+			if (ftest->k >= BPF_MEMWORDS)
+				return -EINVAL;
+			ftest->code = BPF_S_ST;
+			break;
 		case BPF_STX:
-			/* check for invalid memory addresses */
 			if (ftest->k >= BPF_MEMWORDS)
 				return -EINVAL;
+			ftest->code = BPF_S_STX;
 			break;
 
 		case BPF_JMP|BPF_JA:
@@ -447,28 +522,63 @@ int sk_chk_filter(struct sock_filter *filter, int flen)
 			 */
 			if (ftest->k >= (unsigned)(flen-pc-1))
 				return -EINVAL;
+			ftest->code = BPF_S_JMP_JA;
 			break;
 
 		case BPF_JMP|BPF_JEQ|BPF_K:
+			ftest->code = BPF_S_JMP_JEQ_K;
+			break;
 		case BPF_JMP|BPF_JEQ|BPF_X:
+			ftest->code = BPF_S_JMP_JEQ_X;
+			break;
 		case BPF_JMP|BPF_JGE|BPF_K:
+			ftest->code = BPF_S_JMP_JGE_K;
+			break;
 		case BPF_JMP|BPF_JGE|BPF_X:
+			ftest->code = BPF_S_JMP_JGE_X;
+			break;
 		case BPF_JMP|BPF_JGT|BPF_K:
+			ftest->code = BPF_S_JMP_JGT_K;
+			break;
 		case BPF_JMP|BPF_JGT|BPF_X:
+			ftest->code = BPF_S_JMP_JGT_X;
+			break;
 		case BPF_JMP|BPF_JSET|BPF_K:
+			ftest->code = BPF_S_JMP_JSET_K;
+			break;
 		case BPF_JMP|BPF_JSET|BPF_X:
+			ftest->code = BPF_S_JMP_JSET_X;
+			break;
+
+		default:
+			return -EINVAL;
+		}
+
 			/* for conditionals both must be safe */
+		switch (ftest->code) {
+		case BPF_S_JMP_JEQ_K:
+		case BPF_S_JMP_JEQ_X:
+		case BPF_S_JMP_JGE_K:
+		case BPF_S_JMP_JGE_X:
+		case BPF_S_JMP_JGT_K:
+		case BPF_S_JMP_JGT_X:
+		case BPF_S_JMP_JSET_X:
+		case BPF_S_JMP_JSET_K:
 			if (pc + ftest->jt + 1 >= flen ||
 			    pc + ftest->jf + 1 >= flen)
 				return -EINVAL;
-			break;
+		}
+	}
 
+	/* last instruction must be a RET code */
+	switch (filter[flen - 1].code) {
+	case BPF_S_RET_K:
+	case BPF_S_RET_A:
+		return 0;
+		break;
 		default:
 			return -EINVAL;
 		}
-	}
-
-	return (BPF_CLASS(filter[flen - 1].code) == BPF_RET) ? 0 : -EINVAL;
 }
 EXPORT_SYMBOL(sk_chk_filter);
 
-- 
cgit v1.2.3-70-g09d2


From 1823e4c80eeae2a774c75569ce3035070e5ee009 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 22 Jun 2010 20:58:41 +0000
Subject: snmp: add align parameter to snmp_mib_init()

In preparation for 64bit snmp counters for some mibs,
add an 'align' parameter to snmp_mib_init(), instead
of assuming mibs only contain 'unsigned long' fields.

Callers can use __alignof__(type) to provide correct
alignment.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
CC: Hideaki YOSHIFUJI <yoshfuji@linux-ipv6.org>
CC: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip.h       |  2 +-
 net/dccp/proto.c       |  3 ++-
 net/ipv4/af_inet.c     | 27 +++++++++++++++++----------
 net/ipv6/addrconf.c    |  9 ++++++---
 net/ipv6/af_inet6.c    | 15 ++++++++++-----
 net/sctp/protocol.c    |  3 ++-
 net/xfrm/xfrm_policy.c |  3 ++-
 7 files changed, 40 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/include/net/ip.h b/include/net/ip.h
index d52f0118036..3b524df7ddd 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -178,7 +178,7 @@ extern struct ipv4_config ipv4_config;
 #define NET_ADD_STATS_USER(net, field, adnd) SNMP_ADD_STATS_USER((net)->mib.net_statistics, field, adnd)
 
 extern unsigned long snmp_fold_field(void __percpu *mib[], int offt);
-extern int snmp_mib_init(void __percpu *ptr[2], size_t mibsize);
+extern int snmp_mib_init(void __percpu *ptr[2], size_t mibsize, size_t align);
 extern void snmp_mib_free(void __percpu *ptr[2]);
 
 extern struct local_ports {
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index f79bcef5088..096250d1323 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -1002,7 +1002,8 @@ EXPORT_SYMBOL_GPL(dccp_shutdown);
 static inline int dccp_mib_init(void)
 {
 	return snmp_mib_init((void __percpu **)dccp_statistics,
-			     sizeof(struct dccp_mib));
+			     sizeof(struct dccp_mib),
+			     __alignof__(struct dccp_mib));
 }
 
 static inline void dccp_mib_exit(void)
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index b4c0969137c..640db9b9033 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1427,13 +1427,13 @@ unsigned long snmp_fold_field(void __percpu *mib[], int offt)
 }
 EXPORT_SYMBOL_GPL(snmp_fold_field);
 
-int snmp_mib_init(void __percpu *ptr[2], size_t mibsize)
+int snmp_mib_init(void __percpu *ptr[2], size_t mibsize, size_t align)
 {
 	BUG_ON(ptr == NULL);
-	ptr[0] = __alloc_percpu(mibsize, __alignof__(unsigned long));
+	ptr[0] = __alloc_percpu(mibsize, align);
 	if (!ptr[0])
 		goto err0;
-	ptr[1] = __alloc_percpu(mibsize, __alignof__(unsigned long));
+	ptr[1] = __alloc_percpu(mibsize, align);
 	if (!ptr[1])
 		goto err1;
 	return 0;
@@ -1490,25 +1490,32 @@ static const struct net_protocol icmp_protocol = {
 static __net_init int ipv4_mib_init_net(struct net *net)
 {
 	if (snmp_mib_init((void __percpu **)net->mib.tcp_statistics,
-			  sizeof(struct tcp_mib)) < 0)
+			  sizeof(struct tcp_mib),
+			  __alignof__(struct tcp_mib)) < 0)
 		goto err_tcp_mib;
 	if (snmp_mib_init((void __percpu **)net->mib.ip_statistics,
-			  sizeof(struct ipstats_mib)) < 0)
+			  sizeof(struct ipstats_mib),
+			  __alignof__(struct ipstats_mib)) < 0)
 		goto err_ip_mib;
 	if (snmp_mib_init((void __percpu **)net->mib.net_statistics,
-			  sizeof(struct linux_mib)) < 0)
+			  sizeof(struct linux_mib),
+			  __alignof__(struct linux_mib)) < 0)
 		goto err_net_mib;
 	if (snmp_mib_init((void __percpu **)net->mib.udp_statistics,
-			  sizeof(struct udp_mib)) < 0)
+			  sizeof(struct udp_mib),
+			  __alignof__(struct udp_mib)) < 0)
 		goto err_udp_mib;
 	if (snmp_mib_init((void __percpu **)net->mib.udplite_statistics,
-			  sizeof(struct udp_mib)) < 0)
+			  sizeof(struct udp_mib),
+			  __alignof__(struct udp_mib)) < 0)
 		goto err_udplite_mib;
 	if (snmp_mib_init((void __percpu **)net->mib.icmp_statistics,
-			  sizeof(struct icmp_mib)) < 0)
+			  sizeof(struct icmp_mib),
+			  __alignof__(struct icmp_mib)) < 0)
 		goto err_icmp_mib;
 	if (snmp_mib_init((void __percpu **)net->mib.icmpmsg_statistics,
-			  sizeof(struct icmpmsg_mib)) < 0)
+			  sizeof(struct icmpmsg_mib),
+			  __alignof__(struct icmpmsg_mib)) < 0)
 		goto err_icmpmsg_mib;
 
 	tcp_mib_init(net);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index b97bb1f3080..c20a7c260a8 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -284,13 +284,16 @@ static void addrconf_mod_timer(struct inet6_ifaddr *ifp,
 static int snmp6_alloc_dev(struct inet6_dev *idev)
 {
 	if (snmp_mib_init((void __percpu **)idev->stats.ipv6,
-			  sizeof(struct ipstats_mib)) < 0)
+			  sizeof(struct ipstats_mib),
+			  __alignof__(struct ipstats_mib)) < 0)
 		goto err_ip;
 	if (snmp_mib_init((void __percpu **)idev->stats.icmpv6,
-			  sizeof(struct icmpv6_mib)) < 0)
+			  sizeof(struct icmpv6_mib),
+			  __alignof__(struct icmpv6_mib)) < 0)
 		goto err_icmp;
 	if (snmp_mib_init((void __percpu **)idev->stats.icmpv6msg,
-			  sizeof(struct icmpv6msg_mib)) < 0)
+			  sizeof(struct icmpv6msg_mib),
+			  __alignof__(struct icmpv6msg_mib)) < 0)
 		goto err_icmpmsg;
 
 	return 0;
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 94b1b9c954b..e830cd4f9d0 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -971,19 +971,24 @@ static void ipv6_packet_cleanup(void)
 static int __net_init ipv6_init_mibs(struct net *net)
 {
 	if (snmp_mib_init((void __percpu **)net->mib.udp_stats_in6,
-			  sizeof (struct udp_mib)) < 0)
+			  sizeof(struct udp_mib),
+			  __alignof__(struct udp_mib)) < 0)
 		return -ENOMEM;
 	if (snmp_mib_init((void __percpu **)net->mib.udplite_stats_in6,
-			  sizeof (struct udp_mib)) < 0)
+			  sizeof(struct udp_mib),
+			  __alignof__(struct udp_mib)) < 0)
 		goto err_udplite_mib;
 	if (snmp_mib_init((void __percpu **)net->mib.ipv6_statistics,
-			  sizeof(struct ipstats_mib)) < 0)
+			  sizeof(struct ipstats_mib),
+			  __alignof__(struct ipstats_mib)) < 0)
 		goto err_ip_mib;
 	if (snmp_mib_init((void __percpu **)net->mib.icmpv6_statistics,
-			  sizeof(struct icmpv6_mib)) < 0)
+			  sizeof(struct icmpv6_mib),
+			  __alignof__(struct icmpv6_mib)) < 0)
 		goto err_icmp_mib;
 	if (snmp_mib_init((void __percpu **)net->mib.icmpv6msg_statistics,
-			  sizeof(struct icmpv6msg_mib)) < 0)
+			  sizeof(struct icmpv6msg_mib),
+			  __alignof__(struct icmpv6msg_mib)) < 0)
 		goto err_icmpmsg_mib;
 	return 0;
 
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index a0e1a7fdebb..c0e162aeb0b 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -1002,7 +1002,8 @@ int sctp_register_pf(struct sctp_pf *pf, sa_family_t family)
 static inline int init_sctp_mibs(void)
 {
 	return snmp_mib_init((void __percpu **)sctp_statistics,
-			     sizeof(struct sctp_mib));
+			     sizeof(struct sctp_mib),
+			     __alignof__(struct sctp_mib));
 }
 
 static inline void cleanup_sctp_mibs(void)
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 4bf27d90133..593c06be6b6 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -2480,7 +2480,8 @@ static int __net_init xfrm_statistics_init(struct net *net)
 	int rv;
 
 	if (snmp_mib_init((void __percpu **)net->mib.xfrm_statistics,
-			  sizeof(struct linux_xfrm_mib)) < 0)
+			  sizeof(struct linux_xfrm_mib),
+			  __alignof__(struct linux_xfrm_mib)) < 0)
 		return -ENOMEM;
 	rv = xfrm_proc_init(net);
 	if (rv < 0)
-- 
cgit v1.2.3-70-g09d2


From 172d69e63c7f1e8300d0e1c1bbd8eb0f630faa15 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 21 Jun 2010 11:48:45 +0000
Subject: syncookies: add support for ECN

Allows use of ECN when syncookies are in effect by encoding ecn_ok
into the syn-ack tcp timestamp.

While at it, remove a uneeded #ifdef CONFIG_SYN_COOKIES.
With CONFIG_SYN_COOKIES=nm want_cookie is ifdef'd to 0 and gcc
removes the "if (0)".

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h     |  2 +-
 net/ipv4/syncookies.c | 15 ++++++++++-----
 net/ipv4/tcp_ipv4.c   |  6 ++----
 net/ipv6/syncookies.c |  5 +++--
 net/ipv6/tcp_ipv6.c   |  2 +-
 5 files changed, 17 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 18c246c9b00..c2f96c2cc89 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -464,7 +464,7 @@ extern __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb,
 				     __u16 *mss);
 
 extern __u32 cookie_init_timestamp(struct request_sock *req);
-extern bool cookie_check_timestamp(struct tcp_options_received *tcp_opt);
+extern bool cookie_check_timestamp(struct tcp_options_received *opt, bool *);
 
 /* From net/ipv6/syncookies.c */
 extern struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb);
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 8896329aebd..650cace2180 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -19,7 +19,7 @@
 #include <net/route.h>
 
 /* Timestamps: lowest bits store TCP options */
-#define TSBITS 5
+#define TSBITS 6
 #define TSMASK (((__u32)1 << TSBITS) - 1)
 
 extern int sysctl_tcp_syncookies;
@@ -73,6 +73,7 @@ __u32 cookie_init_timestamp(struct request_sock *req)
 
 	options = ireq->wscale_ok ? ireq->snd_wscale : 0xf;
 	options |= ireq->sack_ok << 4;
+	options |= ireq->ecn_ok << 5;
 
 	ts = ts_now & ~TSMASK;
 	ts |= options;
@@ -226,11 +227,11 @@ static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
  * This extracts these options from the timestamp echo.
  *
  * The lowest 4 bits store snd_wscale.
- * The next lsb is for sack_ok
+ * next 2 bits indicate SACK and ECN support.
  *
  * return false if we decode an option that should not be.
  */
-bool cookie_check_timestamp(struct tcp_options_received *tcp_opt)
+bool cookie_check_timestamp(struct tcp_options_received *tcp_opt, bool *ecn_ok)
 {
 	/* echoed timestamp, lowest bits contain options */
 	u32 options = tcp_opt->rcv_tsecr & TSMASK;
@@ -244,6 +245,9 @@ bool cookie_check_timestamp(struct tcp_options_received *tcp_opt)
 		return false;
 
 	tcp_opt->sack_ok = (options >> 4) & 0x1;
+	*ecn_ok = (options >> 5) & 1;
+	if (*ecn_ok && !sysctl_tcp_ecn)
+		return false;
 
 	if (tcp_opt->sack_ok && !sysctl_tcp_sack)
 		return false;
@@ -272,6 +276,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 	int mss;
 	struct rtable *rt;
 	__u8 rcv_wscale;
+	bool ecn_ok;
 
 	if (!sysctl_tcp_syncookies || !th->ack || th->rst)
 		goto out;
@@ -288,7 +293,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 	memset(&tcp_opt, 0, sizeof(tcp_opt));
 	tcp_parse_options(skb, &tcp_opt, &hash_location, 0);
 
-	if (!cookie_check_timestamp(&tcp_opt))
+	if (!cookie_check_timestamp(&tcp_opt, &ecn_ok))
 		goto out;
 
 	ret = NULL;
@@ -305,7 +310,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 	ireq->rmt_port		= th->source;
 	ireq->loc_addr		= ip_hdr(skb)->daddr;
 	ireq->rmt_addr		= ip_hdr(skb)->saddr;
-	ireq->ecn_ok		= 0;
+	ireq->ecn_ok		= ecn_ok;
 	ireq->snd_wscale	= tcp_opt.snd_wscale;
 	ireq->sack_ok		= tcp_opt.sack_ok;
 	ireq->wscale_ok		= tcp_opt.wscale_ok;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 2e41e6f9296..8fa32f5ae2c 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1328,14 +1328,12 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 	if (security_inet_conn_request(sk, skb, req))
 		goto drop_and_free;
 
-	if (!want_cookie)
+	if (!want_cookie || tmp_opt.tstamp_ok)
 		TCP_ECN_create_request(req, tcp_hdr(skb));
 
 	if (want_cookie) {
-#ifdef CONFIG_SYN_COOKIES
-		req->cookie_ts = tmp_opt.tstamp_ok;
-#endif
 		isn = cookie_v4_init_sequence(sk, skb, &req->mss);
+		req->cookie_ts = tmp_opt.tstamp_ok;
 	} else if (!isn) {
 		struct inet_peer *peer = NULL;
 
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 84d818cfae1..09fd34f0dbf 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -164,6 +164,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 	int mss;
 	struct dst_entry *dst;
 	__u8 rcv_wscale;
+	bool ecn_ok;
 
 	if (!sysctl_tcp_syncookies || !th->ack || th->rst)
 		goto out;
@@ -180,7 +181,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 	memset(&tcp_opt, 0, sizeof(tcp_opt));
 	tcp_parse_options(skb, &tcp_opt, &hash_location, 0);
 
-	if (!cookie_check_timestamp(&tcp_opt))
+	if (!cookie_check_timestamp(&tcp_opt, &ecn_ok))
 		goto out;
 
 	ret = NULL;
@@ -215,7 +216,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 
 	req->expires = 0UL;
 	req->retrans = 0;
-	ireq->ecn_ok		= 0;
+	ireq->ecn_ok		= ecn_ok;
 	ireq->snd_wscale	= tcp_opt.snd_wscale;
 	ireq->sack_ok		= tcp_opt.sack_ok;
 	ireq->wscale_ok		= tcp_opt.wscale_ok;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index f8753456936..5ebc27ecebd 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1269,7 +1269,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 	treq = inet6_rsk(req);
 	ipv6_addr_copy(&treq->rmt_addr, &ipv6_hdr(skb)->saddr);
 	ipv6_addr_copy(&treq->loc_addr, &ipv6_hdr(skb)->daddr);
-	if (!want_cookie)
+	if (!want_cookie || tmp_opt.tstamp_ok)
 		TCP_ECN_create_request(req, tcp_hdr(skb));
 
 	if (!isn) {
-- 
cgit v1.2.3-70-g09d2


From 4ba6ce250e406b20bcd6f0f3aed6b3d80965e6c2 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sun, 27 Jun 2010 18:49:59 +0200
Subject: percpu: make @dyn_size always mean min dyn_size in first chunk init
 functions

In pcpu_build_alloc_info() and pcpu_embed_first_chunk(), @dyn_size was
ssize_t, -1 meant auto-size, 0 forced 0 and positive meant minimum
size.  There's no use case for forcing 0 and the upcoming early alloc
support always requires non-zero dynamic size.  Make @dyn_size always
mean minimum dyn_size.

While at it, make pcpu_build_alloc_info() static which doesn't have
any external caller as suggested by David Rientjes.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: David Rientjes <rientjes@google.com>
---
 include/linux/percpu.h |  7 +------
 mm/percpu.c            | 35 ++++++++++-------------------------
 2 files changed, 11 insertions(+), 31 deletions(-)

(limited to 'include')

diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index d3a38d68710..3ffd05e550d 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -104,16 +104,11 @@ extern struct pcpu_alloc_info * __init pcpu_alloc_alloc_info(int nr_groups,
 							     int nr_units);
 extern void __init pcpu_free_alloc_info(struct pcpu_alloc_info *ai);
 
-extern struct pcpu_alloc_info * __init pcpu_build_alloc_info(
-				size_t reserved_size, ssize_t dyn_size,
-				size_t atom_size,
-				pcpu_fc_cpu_distance_fn_t cpu_distance_fn);
-
 extern int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
 					 void *base_addr);
 
 #ifdef CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK
-extern int __init pcpu_embed_first_chunk(size_t reserved_size, ssize_t dyn_size,
+extern int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
 				size_t atom_size,
 				pcpu_fc_cpu_distance_fn_t cpu_distance_fn,
 				pcpu_fc_alloc_fn_t alloc_fn,
diff --git a/mm/percpu.c b/mm/percpu.c
index 6470e771023..c3e7010c6d7 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -1013,20 +1013,6 @@ phys_addr_t per_cpu_ptr_to_phys(void *addr)
 		return page_to_phys(pcpu_addr_to_page(addr));
 }
 
-static inline size_t pcpu_calc_fc_sizes(size_t static_size,
-					size_t reserved_size,
-					ssize_t *dyn_sizep)
-{
-	size_t size_sum;
-
-	size_sum = PFN_ALIGN(static_size + reserved_size +
-			     (*dyn_sizep >= 0 ? *dyn_sizep : 0));
-	if (*dyn_sizep != 0)
-		*dyn_sizep = size_sum - static_size - reserved_size;
-
-	return size_sum;
-}
-
 /**
  * pcpu_alloc_alloc_info - allocate percpu allocation info
  * @nr_groups: the number of groups
@@ -1085,7 +1071,7 @@ void __init pcpu_free_alloc_info(struct pcpu_alloc_info *ai)
 /**
  * pcpu_build_alloc_info - build alloc_info considering distances between CPUs
  * @reserved_size: the size of reserved percpu area in bytes
- * @dyn_size: free size for dynamic allocation in bytes, -1 for auto
+ * @dyn_size: minimum free size for dynamic allocation in bytes
  * @atom_size: allocation atom size
  * @cpu_distance_fn: callback to determine distance between cpus, optional
  *
@@ -1103,8 +1089,8 @@ void __init pcpu_free_alloc_info(struct pcpu_alloc_info *ai)
  * On success, pointer to the new allocation_info is returned.  On
  * failure, ERR_PTR value is returned.
  */
-struct pcpu_alloc_info * __init pcpu_build_alloc_info(
-				size_t reserved_size, ssize_t dyn_size,
+static struct pcpu_alloc_info * __init pcpu_build_alloc_info(
+				size_t reserved_size, size_t dyn_size,
 				size_t atom_size,
 				pcpu_fc_cpu_distance_fn_t cpu_distance_fn)
 {
@@ -1123,13 +1109,15 @@ struct pcpu_alloc_info * __init pcpu_build_alloc_info(
 	memset(group_map, 0, sizeof(group_map));
 	memset(group_cnt, 0, sizeof(group_cnt));
 
+	size_sum = PFN_ALIGN(static_size + reserved_size + dyn_size);
+	dyn_size = size_sum - static_size - reserved_size;
+
 	/*
 	 * Determine min_unit_size, alloc_size and max_upa such that
 	 * alloc_size is multiple of atom_size and is the smallest
 	 * which can accomodate 4k aligned segments which are equal to
 	 * or larger than min_unit_size.
 	 */
-	size_sum = pcpu_calc_fc_sizes(static_size, reserved_size, &dyn_size);
 	min_unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE);
 
 	alloc_size = roundup(min_unit_size, atom_size);
@@ -1532,7 +1520,7 @@ early_param("percpu_alloc", percpu_alloc_setup);
 /**
  * pcpu_embed_first_chunk - embed the first percpu chunk into bootmem
  * @reserved_size: the size of reserved percpu area in bytes
- * @dyn_size: free size for dynamic allocation in bytes, -1 for auto
+ * @dyn_size: minimum free size for dynamic allocation in bytes
  * @atom_size: allocation atom size
  * @cpu_distance_fn: callback to determine distance between cpus, optional
  * @alloc_fn: function to allocate percpu page
@@ -1553,10 +1541,7 @@ early_param("percpu_alloc", percpu_alloc_setup);
  * vmalloc space is not orders of magnitude larger than distances
  * between node memory addresses (ie. 32bit NUMA machines).
  *
- * When @dyn_size is positive, dynamic area might be larger than
- * specified to fill page alignment.  When @dyn_size is auto,
- * @dyn_size is just big enough to fill page alignment after static
- * and reserved areas.
+ * @dyn_size specifies the minimum dynamic area size.
  *
  * If the needed size is smaller than the minimum or specified unit
  * size, the leftover is returned using @free_fn.
@@ -1564,7 +1549,7 @@ early_param("percpu_alloc", percpu_alloc_setup);
  * RETURNS:
  * 0 on success, -errno on failure.
  */
-int __init pcpu_embed_first_chunk(size_t reserved_size, ssize_t dyn_size,
+int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
 				  size_t atom_size,
 				  pcpu_fc_cpu_distance_fn_t cpu_distance_fn,
 				  pcpu_fc_alloc_fn_t alloc_fn,
@@ -1695,7 +1680,7 @@ int __init pcpu_page_first_chunk(size_t reserved_size,
 
 	snprintf(psize_str, sizeof(psize_str), "%luK", PAGE_SIZE >> 10);
 
-	ai = pcpu_build_alloc_info(reserved_size, -1, PAGE_SIZE, NULL);
+	ai = pcpu_build_alloc_info(reserved_size, 0, PAGE_SIZE, NULL);
 	if (IS_ERR(ai))
 		return PTR_ERR(ai);
 	BUG_ON(ai->nr_groups != 1);
-- 
cgit v1.2.3-70-g09d2


From 099a19d91ca429944743d51bef8fee240e94d8e3 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sun, 27 Jun 2010 18:50:00 +0200
Subject: percpu: allow limited allocation before slab is online

This patch updates percpu allocator such that it can serve limited
amount of allocation before slab comes online.  This is primarily to
allow slab to depend on working percpu allocator.

Two parameters, PERCPU_DYNAMIC_EARLY_SIZE and SLOTS, determine how
much memory space and allocation map slots are reserved.  If this
reserved area is exhausted, WARN_ON_ONCE() will trigger and allocation
will fail till slab comes online.

The following changes are made to implement early alloc.

* pcpu_mem_alloc() now checks slab_is_available()

* Chunks are allocated using pcpu_mem_alloc()

* Init paths make sure ai->dyn_size is at least as large as
  PERCPU_DYNAMIC_EARLY_SIZE.

* Initial alloc maps are allocated in __initdata and copied to
  kmalloc'd areas once slab is online.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Christoph Lameter <cl@linux-foundation.org>
---
 include/linux/percpu.h | 13 +++++++++++++
 init/main.c            |  1 +
 mm/percpu.c            | 52 ++++++++++++++++++++++++++++++++++++++------------
 3 files changed, 54 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 3ffd05e550d..b8b9084527b 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -44,6 +44,16 @@
 /* minimum unit size, also is the maximum supported allocation size */
 #define PCPU_MIN_UNIT_SIZE		PFN_ALIGN(64 << 10)
 
+/*
+ * Percpu allocator can serve percpu allocations before slab is
+ * initialized which allows slab to depend on the percpu allocator.
+ * The following two parameters decide how much resource to
+ * preallocate for this.  Keep PERCPU_DYNAMIC_RESERVE equal to or
+ * larger than PERCPU_DYNAMIC_EARLY_SIZE.
+ */
+#define PERCPU_DYNAMIC_EARLY_SLOTS	128
+#define PERCPU_DYNAMIC_EARLY_SIZE	(12 << 10)
+
 /*
  * PERCPU_DYNAMIC_RESERVE indicates the amount of free area to piggy
  * back on the first chunk for dynamic percpu allocation if arch is
@@ -135,6 +145,7 @@ extern bool is_kernel_percpu_address(unsigned long addr);
 #ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA
 extern void __init setup_per_cpu_areas(void);
 #endif
+extern void __init percpu_init_late(void);
 
 #else /* CONFIG_SMP */
 
@@ -148,6 +159,8 @@ static inline bool is_kernel_percpu_address(unsigned long addr)
 
 static inline void __init setup_per_cpu_areas(void) { }
 
+static inline void __init percpu_init_late(void) { }
+
 static inline void *pcpu_lpage_remapped(void *kaddr)
 {
 	return NULL;
diff --git a/init/main.c b/init/main.c
index 3bdb152f412..3ff8dd0fb51 100644
--- a/init/main.c
+++ b/init/main.c
@@ -522,6 +522,7 @@ static void __init mm_init(void)
 	page_cgroup_init_flatmem();
 	mem_init();
 	kmem_cache_init();
+	percpu_init_late();
 	pgtable_cache_init();
 	vmalloc_init();
 }
diff --git a/mm/percpu.c b/mm/percpu.c
index c3e7010c6d7..e61dc2cc587 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -282,6 +282,9 @@ static void __maybe_unused pcpu_next_pop(struct pcpu_chunk *chunk,
  */
 static void *pcpu_mem_alloc(size_t size)
 {
+	if (WARN_ON_ONCE(!slab_is_available()))
+		return NULL;
+
 	if (size <= PAGE_SIZE)
 		return kzalloc(size, GFP_KERNEL);
 	else {
@@ -392,13 +395,6 @@ static int pcpu_extend_area_map(struct pcpu_chunk *chunk, int new_alloc)
 	old_size = chunk->map_alloc * sizeof(chunk->map[0]);
 	memcpy(new, chunk->map, old_size);
 
-	/*
-	 * map_alloc < PCPU_DFL_MAP_ALLOC indicates that the chunk is
-	 * one of the first chunks and still using static map.
-	 */
-	if (chunk->map_alloc >= PCPU_DFL_MAP_ALLOC)
-		old = chunk->map;
-
 	chunk->map_alloc = new_alloc;
 	chunk->map = new;
 	new = NULL;
@@ -604,7 +600,7 @@ static struct pcpu_chunk *pcpu_alloc_chunk(void)
 {
 	struct pcpu_chunk *chunk;
 
-	chunk = kzalloc(pcpu_chunk_struct_size, GFP_KERNEL);
+	chunk = pcpu_mem_alloc(pcpu_chunk_struct_size);
 	if (!chunk)
 		return NULL;
 
@@ -1109,7 +1105,9 @@ static struct pcpu_alloc_info * __init pcpu_build_alloc_info(
 	memset(group_map, 0, sizeof(group_map));
 	memset(group_cnt, 0, sizeof(group_cnt));
 
-	size_sum = PFN_ALIGN(static_size + reserved_size + dyn_size);
+	/* calculate size_sum and ensure dyn_size is enough for early alloc */
+	size_sum = PFN_ALIGN(static_size + reserved_size +
+			    max_t(size_t, dyn_size, PERCPU_DYNAMIC_EARLY_SIZE));
 	dyn_size = size_sum - static_size - reserved_size;
 
 	/*
@@ -1338,7 +1336,8 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
 				  void *base_addr)
 {
 	static char cpus_buf[4096] __initdata;
-	static int smap[2], dmap[2];
+	static int smap[PERCPU_DYNAMIC_EARLY_SLOTS] __initdata;
+	static int dmap[PERCPU_DYNAMIC_EARLY_SLOTS] __initdata;
 	size_t dyn_size = ai->dyn_size;
 	size_t size_sum = ai->static_size + ai->reserved_size + dyn_size;
 	struct pcpu_chunk *schunk, *dchunk = NULL;
@@ -1361,14 +1360,13 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
 } while (0)
 
 	/* sanity checks */
-	BUILD_BUG_ON(ARRAY_SIZE(smap) >= PCPU_DFL_MAP_ALLOC ||
-		     ARRAY_SIZE(dmap) >= PCPU_DFL_MAP_ALLOC);
 	PCPU_SETUP_BUG_ON(ai->nr_groups <= 0);
 	PCPU_SETUP_BUG_ON(!ai->static_size);
 	PCPU_SETUP_BUG_ON(!base_addr);
 	PCPU_SETUP_BUG_ON(ai->unit_size < size_sum);
 	PCPU_SETUP_BUG_ON(ai->unit_size & ~PAGE_MASK);
 	PCPU_SETUP_BUG_ON(ai->unit_size < PCPU_MIN_UNIT_SIZE);
+	PCPU_SETUP_BUG_ON(ai->dyn_size < PERCPU_DYNAMIC_EARLY_SIZE);
 	PCPU_SETUP_BUG_ON(pcpu_verify_alloc_info(ai) < 0);
 
 	/* process group information and build config tables accordingly */
@@ -1806,3 +1804,33 @@ void __init setup_per_cpu_areas(void)
 		__per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu];
 }
 #endif /* CONFIG_HAVE_SETUP_PER_CPU_AREA */
+
+/*
+ * First and reserved chunks are initialized with temporary allocation
+ * map in initdata so that they can be used before slab is online.
+ * This function is called after slab is brought up and replaces those
+ * with properly allocated maps.
+ */
+void __init percpu_init_late(void)
+{
+	struct pcpu_chunk *target_chunks[] =
+		{ pcpu_first_chunk, pcpu_reserved_chunk, NULL };
+	struct pcpu_chunk *chunk;
+	unsigned long flags;
+	int i;
+
+	for (i = 0; (chunk = target_chunks[i]); i++) {
+		int *map;
+		const size_t size = PERCPU_DYNAMIC_EARLY_SLOTS * sizeof(map[0]);
+
+		BUILD_BUG_ON(size > PAGE_SIZE);
+
+		map = pcpu_mem_alloc(size);
+		BUG_ON(!map);
+
+		spin_lock_irqsave(&pcpu_lock, flags);
+		memcpy(map, chunk->map, size);
+		chunk->map = map;
+		spin_unlock_irqrestore(&pcpu_lock, flags);
+	}
+}
-- 
cgit v1.2.3-70-g09d2


From 5daeba34d2aab669aea07abee13d53cd116578fb Mon Sep 17 00:00:00 2001
From: David Dillow <dave@thedillows.org>
Date: Sun, 27 Jun 2010 00:13:20 +0200
Subject: ALSA: pcm_lib: avoid timing jitter in snd_pcm_read/write()

When using poll() to wait for the next period -- or avail_min samples --
one gets a consistent delay for each system call that is usually just a
little short of the selected period time. However, When using
snd_pcm_read/write(), one gets a jittery delay that alternates between
less than a millisecond and approximately two period times. This is
caused by snd_pcm_lib_{read,write}1() transferring any available samples
to the user's buffer and adjusting the application pointer prior to
sleeping to the end of the current period. When the next period
interrupt occurs, there is then less than avail_min samples remaining to
be transferred in the period, so we end up sleeping until a second
period occurs.

This is solved by using runtime->twake as the number of samples needed
for a wakeup in addition to selecting the proper wait queue to wake in
snd_pcm_update_state(). This requires twake to be non-zero when used
by snd_pcm_lib_{read,write}1() even if avail_min is zero.

Signed-off-by: Dave Dillow <dave@thedillows.org>
Signed-off-by: Jaroslav Kysela <perex@perex.cz>
---
 include/sound/pcm.h  |  2 +-
 sound/core/pcm_lib.c | 23 +++++++++++++++--------
 2 files changed, 16 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/sound/pcm.h b/include/sound/pcm.h
index dd76cdede64..83c6fa6aac4 100644
--- a/include/sound/pcm.h
+++ b/include/sound/pcm.h
@@ -313,7 +313,7 @@ struct snd_pcm_runtime {
 	struct snd_pcm_mmap_control *control;
 
 	/* -- locking / scheduling -- */
-	unsigned int twake: 1;		/* do transfer (!poll) wakeup */
+	snd_pcm_uframes_t twake; 	/* do transfer (!poll) wakeup if non-zero */
 	wait_queue_head_t sleep;	/* poll sleep */
 	wait_queue_head_t tsleep;	/* transfer sleep */
 	struct fasync_struct *fasync;
diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c
index e9d98be190c..bcf95d3ff5c 100644
--- a/sound/core/pcm_lib.c
+++ b/sound/core/pcm_lib.c
@@ -287,8 +287,11 @@ int snd_pcm_update_state(struct snd_pcm_substream *substream,
 			return -EPIPE;
 		}
 	}
-	if (avail >= runtime->control->avail_min)
-		wake_up(runtime->twake ? &runtime->tsleep : &runtime->sleep);
+	if (runtime->twake) {
+		if (avail >= runtime->twake)
+			wake_up(&runtime->tsleep);
+	} else if (avail >= runtime->control->avail_min)
+		wake_up(&runtime->sleep);
 	return 0;
 }
 
@@ -1707,7 +1710,7 @@ EXPORT_SYMBOL(snd_pcm_period_elapsed);
  * The available space is stored on availp.  When err = 0 and avail = 0
  * on the capture stream, it indicates the stream is in DRAINING state.
  */
-static int wait_for_avail_min(struct snd_pcm_substream *substream,
+static int wait_for_avail(struct snd_pcm_substream *substream,
 			      snd_pcm_uframes_t *availp)
 {
 	struct snd_pcm_runtime *runtime = substream->runtime;
@@ -1757,7 +1760,7 @@ static int wait_for_avail_min(struct snd_pcm_substream *substream,
 			avail = snd_pcm_playback_avail(runtime);
 		else
 			avail = snd_pcm_capture_avail(runtime);
-		if (avail >= runtime->control->avail_min)
+		if (avail >= runtime->twake)
 			break;
 	}
  _endloop:
@@ -1820,7 +1823,7 @@ static snd_pcm_sframes_t snd_pcm_lib_write1(struct snd_pcm_substream *substream,
 		goto _end_unlock;
 	}
 
-	runtime->twake = 1;
+	runtime->twake = runtime->control->avail_min ? : 1;
 	while (size > 0) {
 		snd_pcm_uframes_t frames, appl_ptr, appl_ofs;
 		snd_pcm_uframes_t avail;
@@ -1833,7 +1836,9 @@ static snd_pcm_sframes_t snd_pcm_lib_write1(struct snd_pcm_substream *substream,
 				err = -EAGAIN;
 				goto _end_unlock;
 			}
-			err = wait_for_avail_min(substream, &avail);
+			runtime->twake = min_t(snd_pcm_uframes_t, size,
+					runtime->control->avail_min ? : 1);
+			err = wait_for_avail(substream, &avail);
 			if (err < 0)
 				goto _end_unlock;
 		}
@@ -2042,7 +2047,7 @@ static snd_pcm_sframes_t snd_pcm_lib_read1(struct snd_pcm_substream *substream,
 		goto _end_unlock;
 	}
 
-	runtime->twake = 1;
+	runtime->twake = runtime->control->avail_min ? : 1;
 	while (size > 0) {
 		snd_pcm_uframes_t frames, appl_ptr, appl_ofs;
 		snd_pcm_uframes_t avail;
@@ -2060,7 +2065,9 @@ static snd_pcm_sframes_t snd_pcm_lib_read1(struct snd_pcm_substream *substream,
 				err = -EAGAIN;
 				goto _end_unlock;
 			}
-			err = wait_for_avail_min(substream, &avail);
+			runtime->twake = min_t(snd_pcm_uframes_t, size,
+					runtime->control->avail_min ? : 1);
+			err = wait_for_avail(substream, &avail);
 			if (err < 0)
 				goto _end_unlock;
 			if (!avail)
-- 
cgit v1.2.3-70-g09d2


From 7804302b14032d357d889e4a23e463eb6a6c5136 Mon Sep 17 00:00:00 2001
From: Anatolij Gustschin <agust@denx.de>
Date: Mon, 28 Jun 2010 01:25:19 -0700
Subject: Input: ads7846 - allow specifying irq trigger type in platform data

On some platforms, for example with GPIO interrupts on mpc5121,
it is not possible to configure falling edge interrupts.

Specifying irq trigger type in platform data structure
allows using ads7846 driver on such platforms.

Signed-off-by: Anatolij Gustschin <agust@denx.de>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/touchscreen/ads7846.c | 5 ++++-
 include/linux/spi/ads7846.h         | 1 +
 2 files changed, 5 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/input/touchscreen/ads7846.c b/drivers/input/touchscreen/ads7846.c
index a9fdf55c023..69210cb56c5 100644
--- a/drivers/input/touchscreen/ads7846.c
+++ b/drivers/input/touchscreen/ads7846.c
@@ -1174,7 +1174,10 @@ static int __devinit ads7846_probe(struct spi_device *spi)
 		goto err_put_regulator;
 	}
 
-	if (request_irq(spi->irq, ads7846_irq, IRQF_TRIGGER_FALLING,
+	if (!pdata->irq_flags)
+		pdata->irq_flags = IRQF_TRIGGER_FALLING;
+
+	if (request_irq(spi->irq, ads7846_irq, pdata->irq_flags,
 			spi->dev.driver->name, ts)) {
 		dev_info(&spi->dev,
 			"trying pin change workaround on irq %d\n", spi->irq);
diff --git a/include/linux/spi/ads7846.h b/include/linux/spi/ads7846.h
index b4ae570d3c9..95d36bfb34b 100644
--- a/include/linux/spi/ads7846.h
+++ b/include/linux/spi/ads7846.h
@@ -54,5 +54,6 @@ struct ads7846_platform_data {
 	void	(*filter_cleanup)(void *filter_data);
 	void	(*wait_for_sync)(void);
 	bool	wakeup;
+	unsigned long irq_flags;
 };
 
-- 
cgit v1.2.3-70-g09d2


From 7eb9282cd0efac08b8377cbd5037ba297c77e3f7 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 28 Jun 2010 14:16:08 +0200
Subject: netfilter: ipt_LOG/ip6t_LOG: add option to print decoded MAC header

The LOG targets print the entire MAC header as one long string, which is not
readable very well:

IN=eth0 OUT= MAC=00:15:f2:24:91:f8:00:1b:24:dc:61:e6:08:00 ...

Add an option to decode known header formats (currently just ARPHRD_ETHER devices)
in their individual fields:

IN=eth0 OUT= MACSRC=00:1b:24:dc:61:e6 MACDST=00:15:f2:24:91:f8 MACPROTO=0800 ...
IN=eth0 OUT= MACSRC=00:1b:24:dc:61:e6 MACDST=00:15:f2:24:91:f8 MACPROTO=86dd ...

The option needs to be explicitly enabled by userspace to avoid breaking
existing parsers.

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter_ipv4/ipt_LOG.h  |  3 +-
 include/linux/netfilter_ipv6/ip6t_LOG.h |  3 +-
 net/ipv4/netfilter/ipt_LOG.c            | 54 ++++++++++++++++------
 net/ipv6/netfilter/ip6t_LOG.c           | 81 +++++++++++++++++++++------------
 4 files changed, 97 insertions(+), 44 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter_ipv4/ipt_LOG.h b/include/linux/netfilter_ipv4/ipt_LOG.h
index 90fa6525ef9..dcdbadf9fd4 100644
--- a/include/linux/netfilter_ipv4/ipt_LOG.h
+++ b/include/linux/netfilter_ipv4/ipt_LOG.h
@@ -7,7 +7,8 @@
 #define IPT_LOG_IPOPT		0x04	/* Log IP options */
 #define IPT_LOG_UID		0x08	/* Log UID owning local socket */
 #define IPT_LOG_NFLOG		0x10	/* Unsupported, don't reuse */
-#define IPT_LOG_MASK		0x1f
+#define IPT_LOG_MACDECODE	0x20	/* Decode MAC header */
+#define IPT_LOG_MASK		0x2f
 
 struct ipt_log_info {
 	unsigned char level;
diff --git a/include/linux/netfilter_ipv6/ip6t_LOG.h b/include/linux/netfilter_ipv6/ip6t_LOG.h
index 0d0119b0458..9dd5579e02e 100644
--- a/include/linux/netfilter_ipv6/ip6t_LOG.h
+++ b/include/linux/netfilter_ipv6/ip6t_LOG.h
@@ -7,7 +7,8 @@
 #define IP6T_LOG_IPOPT		0x04	/* Log IP options */
 #define IP6T_LOG_UID		0x08	/* Log UID owning local socket */
 #define IP6T_LOG_NFLOG		0x10	/* Unsupported, don't use */
-#define IP6T_LOG_MASK		0x1f
+#define IP6T_LOG_MACDECODE	0x20	/* Decode MAC header */
+#define IP6T_LOG_MASK		0x2f
 
 struct ip6t_log_info {
 	unsigned char level;
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c
index 0a452a54adb..915fc17d7ce 100644
--- a/net/ipv4/netfilter/ipt_LOG.c
+++ b/net/ipv4/netfilter/ipt_LOG.c
@@ -13,6 +13,7 @@
 #include <linux/module.h>
 #include <linux/spinlock.h>
 #include <linux/skbuff.h>
+#include <linux/if_arp.h>
 #include <linux/ip.h>
 #include <net/icmp.h>
 #include <net/udp.h>
@@ -363,6 +364,42 @@ static void dump_packet(const struct nf_loginfo *info,
 	/* maxlen = 230+   91  + 230 + 252 = 803 */
 }
 
+static void dump_mac_header(const struct nf_loginfo *info,
+			    const struct sk_buff *skb)
+{
+	struct net_device *dev = skb->dev;
+	unsigned int logflags = 0;
+
+	if (info->type == NF_LOG_TYPE_LOG)
+		logflags = info->u.log.logflags;
+
+	if (!(logflags & IPT_LOG_MACDECODE))
+		goto fallback;
+
+	switch (dev->type) {
+	case ARPHRD_ETHER:
+		printk("MACSRC=%pM MACDST=%pM MACPROTO=%04x ",
+		       eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest,
+		       ntohs(eth_hdr(skb)->h_proto));
+		return;
+	default:
+		break;
+	}
+
+fallback:
+	printk("MAC=");
+	if (dev->hard_header_len &&
+	    skb->mac_header != skb->network_header) {
+		const unsigned char *p = skb_mac_header(skb);
+		unsigned int i;
+
+		printk("%02x", *p++);
+		for (i = 1; i < dev->hard_header_len; i++, p++)
+			printk(":%02x", *p);
+	}
+	printk(" ");
+}
+
 static struct nf_loginfo default_loginfo = {
 	.type	= NF_LOG_TYPE_LOG,
 	.u = {
@@ -404,20 +441,9 @@ ipt_log_packet(u_int8_t pf,
 	}
 #endif
 
-	if (in && !out) {
-		/* MAC logging for input chain only. */
-		printk("MAC=");
-		if (skb->dev && skb->dev->hard_header_len &&
-		    skb->mac_header != skb->network_header) {
-			int i;
-			const unsigned char *p = skb_mac_header(skb);
-
-			printk("%02x", *p++);
-			for (i = 1; i < skb->dev->hard_header_len; i++, p++)
-				printk(":%02x", *p);
-		}
-		printk(" ");
-	}
+	/* MAC logging for input path only. */
+	if (in && !out)
+		dump_mac_header(loginfo, skb);
 
 	dump_packet(loginfo, skb, 0);
 	printk("\n");
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c
index 4c7ddac7c62..0a07ae7b933 100644
--- a/net/ipv6/netfilter/ip6t_LOG.c
+++ b/net/ipv6/netfilter/ip6t_LOG.c
@@ -373,6 +373,56 @@ static void dump_packet(const struct nf_loginfo *info,
 		printk("MARK=0x%x ", skb->mark);
 }
 
+static void dump_mac_header(const struct nf_loginfo *info,
+			    const struct sk_buff *skb)
+{
+	struct net_device *dev = skb->dev;
+	unsigned int logflags = 0;
+
+	if (info->type == NF_LOG_TYPE_LOG)
+		logflags = info->u.log.logflags;
+
+	if (!(logflags & IP6T_LOG_MACDECODE))
+		goto fallback;
+
+	switch (dev->type) {
+	case ARPHRD_ETHER:
+		printk("MACSRC=%pM MACDST=%pM MACPROTO=%04x ",
+		       eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest,
+		       ntohs(eth_hdr(skb)->h_proto));
+		return;
+	default:
+		break;
+	}
+
+fallback:
+	printk("MAC=");
+	if (dev->hard_header_len &&
+	    skb->mac_header != skb->network_header) {
+		const unsigned char *p = skb_mac_header(skb);
+		unsigned int len = dev->hard_header_len;
+		unsigned int i;
+
+		if (dev->type == ARPHRD_SIT &&
+		    (p -= ETH_HLEN) < skb->head)
+			p = NULL;
+
+		if (p != NULL) {
+			printk("%02x", *p++);
+			for (i = 1; i < len; i++)
+				printk(":%02x", p[i]);
+		}
+		printk(" ");
+
+		if (dev->type == ARPHRD_SIT) {
+			const struct iphdr *iph =
+				(struct iphdr *)skb_mac_header(skb);
+			printk("TUNNEL=%pI4->%pI4 ", &iph->saddr, &iph->daddr);
+		}
+	} else
+		printk(" ");
+}
+
 static struct nf_loginfo default_loginfo = {
 	.type	= NF_LOG_TYPE_LOG,
 	.u = {
@@ -400,35 +450,10 @@ ip6t_log_packet(u_int8_t pf,
 		prefix,
 		in ? in->name : "",
 		out ? out->name : "");
-	if (in && !out) {
-		unsigned int len;
-		/* MAC logging for input chain only. */
-		printk("MAC=");
-		if (skb->dev && (len = skb->dev->hard_header_len) &&
-		    skb->mac_header != skb->network_header) {
-			const unsigned char *p = skb_mac_header(skb);
-			int i;
-
-			if (skb->dev->type == ARPHRD_SIT &&
-			    (p -= ETH_HLEN) < skb->head)
-				p = NULL;
-
-			if (p != NULL) {
-				printk("%02x", *p++);
-				for (i = 1; i < len; i++)
-					printk(":%02x", p[i]);
-			}
-			printk(" ");
 
-			if (skb->dev->type == ARPHRD_SIT) {
-				const struct iphdr *iph =
-					(struct iphdr *)skb_mac_header(skb);
-				printk("TUNNEL=%pI4->%pI4 ",
-				       &iph->saddr, &iph->daddr);
-			}
-		} else
-			printk(" ");
-	}
+	/* MAC logging for input path only. */
+	if (in && !out)
+		dump_mac_header(loginfo, skb);
 
 	dump_packet(loginfo, skb, skb_network_offset(skb), 1);
 	printk("\n");
-- 
cgit v1.2.3-70-g09d2


From b505ff5e7291cca6379549297e3852ce3622d550 Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@secretlab.ca>
Date: Fri, 18 Jun 2010 11:09:59 -0600
Subject: of: kill struct of_device

Now that the device tree node pointer has been moved out of struct
of_device and into the common struct device, there isn't anything
unique about of_device anymore.  In fact, there isn't much need
for a separate of_bus when all busses have access to OF style
probing.

arch/powerpc and arch/microblaze are moving away from using the of_bus
and using the regular platform bus instead for mmio devices.  This
patch makes of_device the same as platform_device as a stepping stone
in migrating of_platform_drivers over to the platform bus.

Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
Acked-by: David S. Miller <davem@davemloft.net>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
---
 arch/microblaze/include/asm/of_device.h | 10 ----------
 arch/powerpc/include/asm/of_device.h    | 11 -----------
 arch/powerpc/include/asm/smu.h          |  4 ++--
 arch/sparc/include/asm/device.h         |  4 ++--
 arch/sparc/include/asm/of_device.h      | 14 --------------
 drivers/net/niu.h                       |  4 ++--
 include/linux/of_device.h               | 17 +++++++++++++++++
 7 files changed, 23 insertions(+), 41 deletions(-)

(limited to 'include')

diff --git a/arch/microblaze/include/asm/of_device.h b/arch/microblaze/include/asm/of_device.h
index 73cb9804098..0a5f3f914b4 100644
--- a/arch/microblaze/include/asm/of_device.h
+++ b/arch/microblaze/include/asm/of_device.h
@@ -15,16 +15,6 @@
 #include <linux/device.h>
 #include <linux/of.h>
 
-/*
- * The of_device is a kind of "base class" that is a superset of
- * struct device for use by devices attached to an OF node and
- * probed using OF properties.
- */
-struct of_device {
-	struct device		dev; /* Generic device interface */
-	struct pdev_archdata	archdata;
-};
-
 extern ssize_t of_device_get_modalias(struct of_device *ofdev,
 					char *str, ssize_t len);
 
diff --git a/arch/powerpc/include/asm/of_device.h b/arch/powerpc/include/asm/of_device.h
index 444e97e2982..cb36632f953 100644
--- a/arch/powerpc/include/asm/of_device.h
+++ b/arch/powerpc/include/asm/of_device.h
@@ -5,17 +5,6 @@
 #include <linux/device.h>
 #include <linux/of.h>
 
-/*
- * The of_device is a kind of "base class" that is a superset of
- * struct device for use by devices attached to an OF node and
- * probed using OF properties.
- */
-struct of_device
-{
-	struct device		dev;		/* Generic device interface */
-	struct pdev_archdata	archdata;
-};
-
 extern struct of_device *of_device_alloc(struct device_node *np,
 					 const char *bus_id,
 					 struct device *parent);
diff --git a/arch/powerpc/include/asm/smu.h b/arch/powerpc/include/asm/smu.h
index 7ae2753da56..e3bdada8c54 100644
--- a/arch/powerpc/include/asm/smu.h
+++ b/arch/powerpc/include/asm/smu.h
@@ -457,8 +457,8 @@ extern void smu_poll(void);
  */
 extern int smu_init(void);
 extern int smu_present(void);
-struct of_device;
-extern struct of_device *smu_get_ofdev(void);
+struct platform_device;
+extern struct platform_device *smu_get_ofdev(void);
 
 
 /*
diff --git a/arch/sparc/include/asm/device.h b/arch/sparc/include/asm/device.h
index f9740d065fe..fb220e48203 100644
--- a/arch/sparc/include/asm/device.h
+++ b/arch/sparc/include/asm/device.h
@@ -9,13 +9,13 @@
 #include <asm/openprom.h>
 
 struct device_node;
-struct of_device;
+struct platform_device;
 
 struct dev_archdata {
 	void			*iommu;
 	void			*stc;
 	void			*host_controller;
-	struct of_device	*op;
+	struct platform_device	*op;
 	int			numa_node;
 };
 
diff --git a/arch/sparc/include/asm/of_device.h b/arch/sparc/include/asm/of_device.h
index 6d1844a547b..22b9828fe69 100644
--- a/arch/sparc/include/asm/of_device.h
+++ b/arch/sparc/include/asm/of_device.h
@@ -7,20 +7,6 @@
 #include <linux/mod_devicetable.h>
 #include <asm/openprom.h>
 
-/*
- * The of_device is a kind of "base class" that is a superset of
- * struct device for use by devices attached to an OF node and
- * probed using OF properties.
- */
-struct of_device
-{
-	struct device			dev;
-	u32				num_resources;
-	struct resource			*resource;
-
-	struct pdev_archdata		archdata;
-};
-
 extern void __iomem *of_ioremap(struct resource *res, unsigned long offset, unsigned long size, char *name);
 extern void of_iounmap(struct resource *res, void __iomem *base, unsigned long size);
 
diff --git a/drivers/net/niu.h b/drivers/net/niu.h
index d6715465f35..a41fa8ebe05 100644
--- a/drivers/net/niu.h
+++ b/drivers/net/niu.h
@@ -3236,7 +3236,7 @@ struct niu_phy_ops {
 	int (*link_status)(struct niu *np, int *);
 };
 
-struct of_device;
+struct platform_device;
 struct niu {
 	void __iomem			*regs;
 	struct net_device		*dev;
@@ -3297,7 +3297,7 @@ struct niu {
 	struct niu_vpd			vpd;
 	u32				eeprom_len;
 
-	struct of_device		*op;
+	struct platform_device		*op;
 	void __iomem			*vir_regs_1;
 	void __iomem			*vir_regs_2;
 };
diff --git a/include/linux/of_device.h b/include/linux/of_device.h
index 11651facc5f..a3ae5900fc5 100644
--- a/include/linux/of_device.h
+++ b/include/linux/of_device.h
@@ -3,9 +3,26 @@
 
 #ifdef CONFIG_OF_DEVICE
 #include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/of.h>
 #include <linux/mod_devicetable.h>
 
+
+/*
+ * The of_device *was* a kind of "base class" that was a superset of
+ * struct device for use by devices attached to an OF node and probed
+ * using OF properties.  However, the important bit of OF-style
+ * probing, namely the device node pointer, has been moved into the
+ * common struct device when CONFIG_OF is set to make OF-style probing
+ * available to all bus types.  So now, just make of_device and
+ * platform_device equivalent so that current of_platform bus users
+ * can be transparently migrated over to using the platform bus.
+ *
+ * This line will go away once all references to of_device are removed
+ * from the kernel.
+ */
+#define of_device platform_device
+
 #include <asm/of_device.h>
 
 #define	to_of_device(d) container_of(d, struct of_device, dev)
-- 
cgit v1.2.3-70-g09d2


From e3873444990dd6f8a095d1f72b5ad45192f8c506 Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@secretlab.ca>
Date: Fri, 18 Jun 2010 11:09:59 -0600
Subject: of/irq: Move irq_of_parse_and_map() to common code

Merge common code between PowerPC and Microblaze.  SPARC implements
irq_of_parse_and_map(), but the implementation is different, so it
does not use this code.

Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Michal Simek <monstr@monstr.eu>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Jeremy Kerr <jeremy.kerr@canonical.com>
---
 arch/microblaze/include/asm/irq.h  | 24 --------------------
 arch/microblaze/include/asm/prom.h | 26 +---------------------
 arch/microblaze/kernel/irq.c       | 14 ++----------
 arch/powerpc/include/asm/irq.h     | 28 ------------------------
 arch/powerpc/include/asm/prom.h    | 27 +----------------------
 arch/powerpc/kernel/irq.c          | 14 ++----------
 arch/sparc/include/asm/prom.h      |  1 -
 drivers/of/Kconfig                 |  4 ++++
 drivers/of/Makefile                |  1 +
 drivers/of/irq.c                   | 45 ++++++++++++++++++++++++++++++++++++++
 drivers/of/of_mdio.c               |  1 +
 drivers/of/of_spi.c                |  1 +
 include/linux/of_irq.h             | 41 ++++++++++++++++++++++++++++++++++
 13 files changed, 99 insertions(+), 128 deletions(-)
 create mode 100644 drivers/of/irq.c
 create mode 100644 include/linux/of_irq.h

(limited to 'include')

diff --git a/arch/microblaze/include/asm/irq.h b/arch/microblaze/include/asm/irq.h
index 31a35c33df6..ec5583d6111 100644
--- a/arch/microblaze/include/asm/irq.h
+++ b/arch/microblaze/include/asm/irq.h
@@ -27,17 +27,6 @@ extern unsigned int nr_irq;
 struct pt_regs;
 extern void do_IRQ(struct pt_regs *regs);
 
-/**
- * irq_of_parse_and_map - Parse and Map an interrupt into linux virq space
- * @device: Device node of the device whose interrupt is to be mapped
- * @index: Index of the interrupt to map
- *
- * This function is a wrapper that chains of_irq_map_one() and
- * irq_create_of_mapping() to make things easier to callers
- */
-struct device_node;
-extern unsigned int irq_of_parse_and_map(struct device_node *dev, int index);
-
 /** FIXME - not implement
  * irq_dispose_mapping - Unmap an interrupt
  * @virq: linux virq number of the interrupt to unmap
@@ -62,17 +51,4 @@ struct irq_host;
 extern unsigned int irq_create_mapping(struct irq_host *host,
 					irq_hw_number_t hwirq);
 
-/**
- * irq_create_of_mapping - Map a hardware interrupt into linux virq space
- * @controller: Device node of the interrupt controller
- * @inspec: Interrupt specifier from the device-tree
- * @intsize: Size of the interrupt specifier from the device-tree
- *
- * This function is identical to irq_create_mapping except that it takes
- * as input informations straight from the device-tree (typically the results
- * of the of_irq_map_*() functions.
- */
-extern unsigned int irq_create_of_mapping(struct device_node *controller,
-					u32 *intspec, unsigned int intsize);
-
 #endif /* _ASM_MICROBLAZE_IRQ_H */
diff --git a/arch/microblaze/include/asm/prom.h b/arch/microblaze/include/asm/prom.h
index e7d67a329bd..e9fb2eb0035 100644
--- a/arch/microblaze/include/asm/prom.h
+++ b/arch/microblaze/include/asm/prom.h
@@ -20,6 +20,7 @@
 #ifndef __ASSEMBLY__
 
 #include <linux/types.h>
+#include <linux/of_irq.h>
 #include <linux/of_fdt.h>
 #include <linux/proc_fs.h>
 #include <linux/platform_device.h>
@@ -92,18 +93,6 @@ extern const void *of_get_mac_address(struct device_node *np);
  * OF interrupt mapping
  */
 
-/* This structure is returned when an interrupt is mapped. The controller
- * field needs to be put() after use
- */
-
-#define OF_MAX_IRQ_SPEC		4 /* We handle specifiers of at most 4 cells */
-
-struct of_irq {
-	struct device_node *controller; /* Interrupt controller node */
-	u32 size; /* Specifier size */
-	u32 specifier[OF_MAX_IRQ_SPEC]; /* Specifier copy */
-};
-
 /**
  * of_irq_map_init - Initialize the irq remapper
  * @flags:	flags defining workarounds to enable
@@ -138,19 +127,6 @@ extern int of_irq_map_raw(struct device_node *parent, const u32 *intspec,
 			u32 ointsize, const u32 *addr,
 			struct of_irq *out_irq);
 
-/**
- * of_irq_map_one - Resolve an interrupt for a device
- * @device:	the device whose interrupt is to be resolved
- * @index:	index of the interrupt to resolve
- * @out_irq:	structure of_irq filled by this function
- *
- * This function resolves an interrupt, walking the tree, for a given
- * device-tree node. It's the high level pendant to of_irq_map_raw().
- * It also implements the workarounds for OldWolrd Macs.
- */
-extern int of_irq_map_one(struct device_node *device, int index,
-			struct of_irq *out_irq);
-
 /**
  * of_irq_map_pci - Resolve the interrupt for a PCI device
  * @pdev:	the device whose interrupt is to be resolved
diff --git a/arch/microblaze/kernel/irq.c b/arch/microblaze/kernel/irq.c
index 8f120aca123..dd32b09b4a3 100644
--- a/arch/microblaze/kernel/irq.c
+++ b/arch/microblaze/kernel/irq.c
@@ -17,20 +17,10 @@
 #include <linux/seq_file.h>
 #include <linux/kernel_stat.h>
 #include <linux/irq.h>
+#include <linux/of_irq.h>
 
 #include <asm/prom.h>
 
-unsigned int irq_of_parse_and_map(struct device_node *dev, int index)
-{
-	struct of_irq oirq;
-
-	if (of_irq_map_one(dev, index, &oirq))
-		return NO_IRQ;
-
-	return oirq.specifier[0];
-}
-EXPORT_SYMBOL_GPL(irq_of_parse_and_map);
-
 static u32 concurrent_irq;
 
 void __irq_entry do_IRQ(struct pt_regs *regs)
@@ -104,7 +94,7 @@ unsigned int irq_create_mapping(struct irq_host *host, irq_hw_number_t hwirq)
 EXPORT_SYMBOL_GPL(irq_create_mapping);
 
 unsigned int irq_create_of_mapping(struct device_node *controller,
-					u32 *intspec, unsigned int intsize)
+				   const u32 *intspec, unsigned int intsize)
 {
 	return intspec[0];
 }
diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h
index e054baef184..4e3051595b2 100644
--- a/arch/powerpc/include/asm/irq.h
+++ b/arch/powerpc/include/asm/irq.h
@@ -300,34 +300,6 @@ extern unsigned int irq_alloc_virt(struct irq_host *host,
  */
 extern void irq_free_virt(unsigned int virq, unsigned int count);
 
-
-/* -- OF helpers -- */
-
-/**
- * irq_create_of_mapping - Map a hardware interrupt into linux virq space
- * @controller: Device node of the interrupt controller
- * @inspec: Interrupt specifier from the device-tree
- * @intsize: Size of the interrupt specifier from the device-tree
- *
- * This function is identical to irq_create_mapping except that it takes
- * as input informations straight from the device-tree (typically the results
- * of the of_irq_map_*() functions.
- */
-extern unsigned int irq_create_of_mapping(struct device_node *controller,
-					  const u32 *intspec, unsigned int intsize);
-
-/**
- * irq_of_parse_and_map - Parse and Map an interrupt into linux virq space
- * @device: Device node of the device whose interrupt is to be mapped
- * @index: Index of the interrupt to map
- *
- * This function is a wrapper that chains of_irq_map_one() and
- * irq_create_of_mapping() to make things easier to callers
- */
-extern unsigned int irq_of_parse_and_map(struct device_node *dev, int index);
-
-/* -- End OF helpers -- */
-
 /**
  * irq_early_init - Init irq remapping subsystem
  */
diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h
index ddd408a93b5..47d41b67c94 100644
--- a/arch/powerpc/include/asm/prom.h
+++ b/arch/powerpc/include/asm/prom.h
@@ -18,6 +18,7 @@
  */
 #include <linux/types.h>
 #include <linux/of_fdt.h>
+#include <linux/of_irq.h>
 #include <linux/proc_fs.h>
 #include <linux/platform_device.h>
 #include <asm/irq.h>
@@ -108,18 +109,6 @@ extern const void *of_get_mac_address(struct device_node *np);
  * OF interrupt mapping
  */
 
-/* This structure is returned when an interrupt is mapped. The controller
- * field needs to be put() after use
- */
-
-#define OF_MAX_IRQ_SPEC		 4 /* We handle specifiers of at most 4 cells */
-
-struct of_irq {
-	struct device_node *controller;	/* Interrupt controller node */
-	u32 size;			/* Specifier size */
-	u32 specifier[OF_MAX_IRQ_SPEC];	/* Specifier copy */
-};
-
 /**
  * of_irq_map_init - Initialize the irq remapper
  * @flags:	flags defining workarounds to enable
@@ -154,20 +143,6 @@ extern int of_irq_map_raw(struct device_node *parent, const u32 *intspec,
 			  u32 ointsize, const u32 *addr,
 			  struct of_irq *out_irq);
 
-
-/**
- * of_irq_map_one - Resolve an interrupt for a device
- * @device:	the device whose interrupt is to be resolved
- * @index:     	index of the interrupt to resolve
- * @out_irq:	structure of_irq filled by this function
- *
- * This function resolves an interrupt, walking the tree, for a given
- * device-tree node. It's the high level pendant to of_irq_map_raw().
- * It also implements the workarounds for OldWolrd Macs.
- */
-extern int of_irq_map_one(struct device_node *device, int index,
-			  struct of_irq *out_irq);
-
 /**
  * of_irq_map_pci - Resolve the interrupt for a PCI device
  * @pdev:	the device whose interrupt is to be resolved
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 30817d9b20c..2676ef288bf 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -53,6 +53,8 @@
 #include <linux/bootmem.h>
 #include <linux/pci.h>
 #include <linux/debugfs.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
 
 #include <asm/uaccess.h>
 #include <asm/system.h>
@@ -813,18 +815,6 @@ unsigned int irq_create_of_mapping(struct device_node *controller,
 }
 EXPORT_SYMBOL_GPL(irq_create_of_mapping);
 
-unsigned int irq_of_parse_and_map(struct device_node *dev, int index)
-{
-	struct of_irq oirq;
-
-	if (of_irq_map_one(dev, index, &oirq))
-		return NO_IRQ;
-
-	return irq_create_of_mapping(oirq.controller, oirq.specifier,
-				     oirq.size);
-}
-EXPORT_SYMBOL_GPL(irq_of_parse_and_map);
-
 void irq_dispose_mapping(unsigned int virq)
 {
 	struct irq_host *host;
diff --git a/arch/sparc/include/asm/prom.h b/arch/sparc/include/asm/prom.h
index f845828ca4c..ac695742df8 100644
--- a/arch/sparc/include/asm/prom.h
+++ b/arch/sparc/include/asm/prom.h
@@ -56,7 +56,6 @@ extern void of_fill_in_cpu_data(void);
  * register them in the of_device objects, whereas powerpc computes them
  * on request.
  */
-extern unsigned int irq_of_parse_and_map(struct device_node *node, int index);
 static inline void irq_dispose_mapping(unsigned int virq)
 {
 }
diff --git a/drivers/of/Kconfig b/drivers/of/Kconfig
index 7cecc8fea9b..b87495efa16 100644
--- a/drivers/of/Kconfig
+++ b/drivers/of/Kconfig
@@ -6,6 +6,10 @@ config OF_DYNAMIC
 	def_bool y
 	depends on OF && PPC_OF
 
+config OF_IRQ
+	def_bool y
+	depends on OF && !SPARC
+
 config OF_DEVICE
 	def_bool y
 	depends on OF && (SPARC || PPC_OF || MICROBLAZE)
diff --git a/drivers/of/Makefile b/drivers/of/Makefile
index f232cc98ce0..3631a5ea0b4 100644
--- a/drivers/of/Makefile
+++ b/drivers/of/Makefile
@@ -1,5 +1,6 @@
 obj-y = base.o
 obj-$(CONFIG_OF_FLATTREE) += fdt.o
+obj-$(CONFIG_OF_IRQ)    += irq.o
 obj-$(CONFIG_OF_DEVICE) += device.o platform.o
 obj-$(CONFIG_OF_GPIO)   += gpio.o
 obj-$(CONFIG_OF_I2C)	+= of_i2c.o
diff --git a/drivers/of/irq.c b/drivers/of/irq.c
new file mode 100644
index 00000000000..9b3397c2709
--- /dev/null
+++ b/drivers/of/irq.c
@@ -0,0 +1,45 @@
+/*
+ *  Derived from arch/i386/kernel/irq.c
+ *    Copyright (C) 1992 Linus Torvalds
+ *  Adapted from arch/i386 by Gary Thomas
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *  Updated and modified by Cort Dougan <cort@fsmlabs.com>
+ *    Copyright (C) 1996-2001 Cort Dougan
+ *  Adapted for Power Macintosh by Paul Mackerras
+ *    Copyright (C) 1996 Paul Mackerras (paulus@cs.anu.edu.au)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * This file contains the code used to make IRQ descriptions in the
+ * device tree to actual irq numbers on an interrupt controller
+ * driver.
+ */
+
+#include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
+#include <linux/string.h>
+
+/**
+ * irq_of_parse_and_map - Parse and map an interrupt into linux virq space
+ * @device: Device node of the device whose interrupt is to be mapped
+ * @index: Index of the interrupt to map
+ *
+ * This function is a wrapper that chains of_irq_map_one() and
+ * irq_create_of_mapping() to make things easier to callers
+ */
+unsigned int irq_of_parse_and_map(struct device_node *dev, int index)
+{
+	struct of_irq oirq;
+
+	if (of_irq_map_one(dev, index, &oirq))
+		return NO_IRQ;
+
+	return irq_create_of_mapping(oirq.controller, oirq.specifier,
+				     oirq.size);
+}
+EXPORT_SYMBOL_GPL(irq_of_parse_and_map);
diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c
index 42a6715f8e8..1fce00eb421 100644
--- a/drivers/of/of_mdio.c
+++ b/drivers/of/of_mdio.c
@@ -15,6 +15,7 @@
 #include <linux/err.h>
 #include <linux/phy.h>
 #include <linux/of.h>
+#include <linux/of_irq.h>
 #include <linux/of_mdio.h>
 #include <linux/module.h>
 
diff --git a/drivers/of/of_spi.c b/drivers/of/of_spi.c
index 5fed7e3c7da..d504f1d1324 100644
--- a/drivers/of/of_spi.c
+++ b/drivers/of/of_spi.c
@@ -9,6 +9,7 @@
 #include <linux/of.h>
 #include <linux/device.h>
 #include <linux/spi/spi.h>
+#include <linux/of_irq.h>
 #include <linux/of_spi.h>
 
 /**
diff --git a/include/linux/of_irq.h b/include/linux/of_irq.h
new file mode 100644
index 00000000000..0e37c05b7dd
--- /dev/null
+++ b/include/linux/of_irq.h
@@ -0,0 +1,41 @@
+#ifndef __OF_IRQ_H
+#define __OF_IRQ_H
+
+#if defined(CONFIG_OF)
+struct of_irq;
+#include <linux/types.h>
+#include <linux/of.h>
+
+/*
+ * irq_of_parse_and_map() is used ba all OF enabled platforms; but SPARC
+ * implements it differently.  However, the prototype is the same for all,
+ * so declare it here regardless of the CONFIG_OF_IRQ setting.
+ */
+extern unsigned int irq_of_parse_and_map(struct device_node *node, int index);
+
+#if defined(CONFIG_OF_IRQ)
+/**
+ * of_irq - container for device_node/irq_specifier pair for an irq controller
+ * @controller: pointer to interrupt controller device tree node
+ * @size: size of interrupt specifier
+ * @specifier: array of cells @size long specifing the specific interrupt
+ *
+ * This structure is returned when an interrupt is mapped. The controller
+ * field needs to be put() after use
+ */
+#define OF_MAX_IRQ_SPEC		4 /* We handle specifiers of at most 4 cells */
+struct of_irq {
+	struct device_node *controller; /* Interrupt controller node */
+	u32 size; /* Specifier size */
+	u32 specifier[OF_MAX_IRQ_SPEC]; /* Specifier copy */
+};
+
+extern int of_irq_map_one(struct device_node *device, int index,
+			  struct of_irq *out_irq);
+extern unsigned int irq_create_of_mapping(struct device_node *controller,
+					  const u32 *intspec,
+					  unsigned int intsize);
+
+#endif /* CONFIG_OF_IRQ */
+#endif /* CONFIG_OF */
+#endif /* __OF_IRQ_H */
-- 
cgit v1.2.3-70-g09d2


From c9642c49aae1272d7c24008a40ae614470b957a6 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Mon, 24 May 2010 16:22:30 +0800
Subject: tracing: Use a global field list for all syscall exit events

All syscall exit events have the same fields.

The kernel size drops 2.5K:

   text    data     bss     dec     hex filename
7018612 2034376 7251132 16304120         f8c7f8 vmlinux.o.orig
7018612 2031888 7251132 16301632         f8be40 vmlinux.o

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
LKML-Reference: <4BFA3746.8070100@cn.fujitsu.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/syscalls.h      | 2 --
 include/trace/syscall.h       | 1 -
 kernel/trace/trace_syscalls.c | 7 ++++---
 3 files changed, 4 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 7f614ce274a..7994bd44eb5 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -165,7 +165,6 @@ extern struct trace_event_functions exit_syscall_print_funcs;
 		.enter_event	= &event_enter_##sname,		\
 		.exit_event	= &event_exit_##sname,		\
 		.enter_fields	= LIST_HEAD_INIT(__syscall_meta_##sname.enter_fields), \
-		.exit_fields	= LIST_HEAD_INIT(__syscall_meta_##sname.exit_fields), \
 	};
 
 #define SYSCALL_DEFINE0(sname)					\
@@ -180,7 +179,6 @@ extern struct trace_event_functions exit_syscall_print_funcs;
 		.enter_event	= &event_enter__##sname,	\
 		.exit_event	= &event_exit__##sname,		\
 		.enter_fields	= LIST_HEAD_INIT(__syscall_meta__##sname.enter_fields), \
-		.exit_fields	= LIST_HEAD_INIT(__syscall_meta__##sname.exit_fields), \
 	};							\
 	asmlinkage long sys_##sname(void)
 #else
diff --git a/include/trace/syscall.h b/include/trace/syscall.h
index 257e08960d7..31966a4fb8c 100644
--- a/include/trace/syscall.h
+++ b/include/trace/syscall.h
@@ -26,7 +26,6 @@ struct syscall_metadata {
 	const char	**types;
 	const char	**args;
 	struct list_head enter_fields;
-	struct list_head exit_fields;
 
 	struct ftrace_event_call *enter_event;
 	struct ftrace_event_call *exit_event;
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 34e35804304..bac752f0cfb 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -23,6 +23,9 @@ static int syscall_exit_register(struct ftrace_event_call *event,
 static int syscall_enter_define_fields(struct ftrace_event_call *call);
 static int syscall_exit_define_fields(struct ftrace_event_call *call);
 
+/* All syscall exit events have the same fields */
+static LIST_HEAD(syscall_exit_fields);
+
 static struct list_head *
 syscall_get_enter_fields(struct ftrace_event_call *call)
 {
@@ -34,9 +37,7 @@ syscall_get_enter_fields(struct ftrace_event_call *call)
 static struct list_head *
 syscall_get_exit_fields(struct ftrace_event_call *call)
 {
-	struct syscall_metadata *entry = call->data;
-
-	return &entry->exit_fields;
+	return &syscall_exit_fields;
 }
 
 struct trace_event_functions enter_syscall_print_funcs = {
-- 
cgit v1.2.3-70-g09d2


From 363d0f6490f319d0dd69b7ec7503c5f6cbab36d9 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Mon, 24 May 2010 16:23:15 +0800
Subject: tracing: Convert some timer events to DEFINE_EVENT

Use DECLARE_EVENT_CLASS, and save ~2.3K:

   text    data     bss     dec     hex filename
7018823 2031888 7251132 16301843         f8bf13 vmlinux.o.orig
7016727 2031696 7251132 16299555         f8b623 vmlinux.o

5 events are converted:

  timer_class:   timer_init, timer_expire_exit, timer_cancel
  hrtimer_class: hrtimer_init, hrtimer_cancel

No change in functionality.

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
LKML-Reference: <4BFA3773.3060200@cn.fujitsu.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/trace/events/timer.h | 80 ++++++++++++++++++--------------------------
 1 file changed, 32 insertions(+), 48 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h
index 9496b965d62..c624126a9c8 100644
--- a/include/trace/events/timer.h
+++ b/include/trace/events/timer.h
@@ -8,11 +8,7 @@
 #include <linux/hrtimer.h>
 #include <linux/timer.h>
 
-/**
- * timer_init - called when the timer is initialized
- * @timer:	pointer to struct timer_list
- */
-TRACE_EVENT(timer_init,
+DECLARE_EVENT_CLASS(timer_class,
 
 	TP_PROTO(struct timer_list *timer),
 
@@ -29,6 +25,17 @@ TRACE_EVENT(timer_init,
 	TP_printk("timer=%p", __entry->timer)
 );
 
+/**
+ * timer_init - called when the timer is initialized
+ * @timer:	pointer to struct timer_list
+ */
+DEFINE_EVENT(timer_class, timer_init,
+
+	TP_PROTO(struct timer_list *timer),
+
+	TP_ARGS(timer)
+);
+
 /**
  * timer_start - called when the timer is started
  * @timer:	pointer to struct timer_list
@@ -94,42 +101,22 @@ TRACE_EVENT(timer_expire_entry,
  * NOTE: Do NOT derefernce timer in TP_fast_assign. The pointer might
  * be invalid. We solely track the pointer.
  */
-TRACE_EVENT(timer_expire_exit,
+DEFINE_EVENT(timer_class, timer_expire_exit,
 
 	TP_PROTO(struct timer_list *timer),
 
-	TP_ARGS(timer),
-
-	TP_STRUCT__entry(
-		__field(void *,	timer	)
-	),
-
-	TP_fast_assign(
-		__entry->timer	= timer;
-	),
-
-	TP_printk("timer=%p", __entry->timer)
+	TP_ARGS(timer)
 );
 
 /**
  * timer_cancel - called when the timer is canceled
  * @timer:	pointer to struct timer_list
  */
-TRACE_EVENT(timer_cancel,
+DEFINE_EVENT(timer_class, timer_cancel,
 
 	TP_PROTO(struct timer_list *timer),
 
-	TP_ARGS(timer),
-
-	TP_STRUCT__entry(
-		__field( void *,	timer	)
-	),
-
-	TP_fast_assign(
-		__entry->timer	= timer;
-	),
-
-	TP_printk("timer=%p", __entry->timer)
+	TP_ARGS(timer)
 );
 
 /**
@@ -224,14 +211,7 @@ TRACE_EVENT(hrtimer_expire_entry,
 		  (unsigned long long)ktime_to_ns((ktime_t) { .tv64 = __entry->now }))
  );
 
-/**
- * hrtimer_expire_exit - called immediately after the hrtimer callback returns
- * @timer:	pointer to struct hrtimer
- *
- * When used in combination with the hrtimer_expire_entry tracepoint we can
- * determine the runtime of the callback function.
- */
-TRACE_EVENT(hrtimer_expire_exit,
+DECLARE_EVENT_CLASS(hrtimer_class,
 
 	TP_PROTO(struct hrtimer *hrtimer),
 
@@ -249,24 +229,28 @@ TRACE_EVENT(hrtimer_expire_exit,
 );
 
 /**
- * hrtimer_cancel - called when the hrtimer is canceled
- * @hrtimer:	pointer to struct hrtimer
+ * hrtimer_expire_exit - called immediately after the hrtimer callback returns
+ * @timer:	pointer to struct hrtimer
+ *
+ * When used in combination with the hrtimer_expire_entry tracepoint we can
+ * determine the runtime of the callback function.
  */
-TRACE_EVENT(hrtimer_cancel,
+DEFINE_EVENT(hrtimer_class, hrtimer_expire_exit,
 
 	TP_PROTO(struct hrtimer *hrtimer),
 
-	TP_ARGS(hrtimer),
+	TP_ARGS(hrtimer)
+);
 
-	TP_STRUCT__entry(
-		__field( void *,	hrtimer	)
-	),
+/**
+ * hrtimer_cancel - called when the hrtimer is canceled
+ * @hrtimer:	pointer to struct hrtimer
+ */
+DEFINE_EVENT(hrtimer_class, hrtimer_cancel,
 
-	TP_fast_assign(
-		__entry->hrtimer	= hrtimer;
-	),
+	TP_PROTO(struct hrtimer *hrtimer),
 
-	TP_printk("hrtimer=%p", __entry->hrtimer)
+	TP_ARGS(hrtimer)
 );
 
 /**
-- 
cgit v1.2.3-70-g09d2


From 210f766915207636acccba7bec42248bfe882998 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Mon, 24 May 2010 16:23:35 +0800
Subject: tracing: Convert more sched events to DEFINE_EVENT

Convert sched_wait_task to DEFINE_EVENT, and save ~1K:

   text    data     bss     dec     hex filename
 104595    9424    4992  119011   1d0e3 kernel/sched.o.orig
 103619    9344    4992  117955   1ccc3 kernel/sched.o

No change in functionality.

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
LKML-Reference: <4BFA3787.2040800@cn.fujitsu.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/trace/events/sched.h | 32 +++++++-------------------------
 1 file changed, 7 insertions(+), 25 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index b9e1dd6c620..9208c92aeab 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -49,31 +49,6 @@ TRACE_EVENT(sched_kthread_stop_ret,
 	TP_printk("ret=%d", __entry->ret)
 );
 
-/*
- * Tracepoint for waiting on task to unschedule:
- */
-TRACE_EVENT(sched_wait_task,
-
-	TP_PROTO(struct task_struct *p),
-
-	TP_ARGS(p),
-
-	TP_STRUCT__entry(
-		__array(	char,	comm,	TASK_COMM_LEN	)
-		__field(	pid_t,	pid			)
-		__field(	int,	prio			)
-	),
-
-	TP_fast_assign(
-		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
-		__entry->pid	= p->pid;
-		__entry->prio	= p->prio;
-	),
-
-	TP_printk("comm=%s pid=%d prio=%d",
-		  __entry->comm, __entry->pid, __entry->prio)
-);
-
 /*
  * Tracepoint for waking up a task:
  */
@@ -239,6 +214,13 @@ DEFINE_EVENT(sched_process_template, sched_process_exit,
 	     TP_PROTO(struct task_struct *p),
 	     TP_ARGS(p));
 
+/*
+ * Tracepoint for waiting on task to unschedule:
+ */
+DEFINE_EVENT(sched_process_template, sched_wait_task,
+	TP_PROTO(struct task_struct *p),
+	TP_ARGS(p));
+
 /*
  * Tracepoint for a waiting task:
  */
-- 
cgit v1.2.3-70-g09d2


From a1d0ce8213e9ddf4046ef5ba95c55762d075f541 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Tue, 8 Jun 2010 11:22:06 -0400
Subject: tracing: Use class->reg() for all registering of events

Because kprobes and syscalls need special processing to register
events, the class->reg() method was created to handle the differences.

But instead of creating a default ->reg for perf and ftrace events,
the code was scattered with:

	if (class->reg)
		class->reg();
	else
		default_reg();

This is messy and can also lead to bugs.

This patch cleans up this code and creates a default reg() entry for
the events allowing for the code to directly call the class->reg()
without the condition.

Reported-by: Peter Zijlstra <peterz@infradead.org>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace_event.h    |  3 +++
 include/trace/ftrace.h          |  2 ++
 kernel/trace/trace_event_perf.c | 19 +++-----------
 kernel/trace/trace_events.c     | 55 +++++++++++++++++++++++++++--------------
 4 files changed, 44 insertions(+), 35 deletions(-)

(limited to 'include')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 0af31cd335d..01df7ca4ead 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -146,6 +146,9 @@ struct ftrace_event_class {
 	int			(*raw_init)(struct ftrace_event_call *);
 };
 
+extern int ftrace_event_reg(struct ftrace_event_call *event,
+			    enum trace_reg type);
+
 enum {
 	TRACE_EVENT_FL_ENABLED_BIT,
 	TRACE_EVENT_FL_FILTERED_BIT,
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index fc013a8201e..55c1fd1bbc3 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -439,6 +439,7 @@ static inline notrace int ftrace_get_offsets_##call(			\
  *	.fields			= LIST_HEAD_INIT(event_class_##call.fields),
  *	.raw_init		= trace_event_raw_init,
  *	.probe			= ftrace_raw_event_##call,
+ *	.reg			= ftrace_event_reg,
  * };
  *
  * static struct ftrace_event_call __used
@@ -567,6 +568,7 @@ static struct ftrace_event_class __used event_class_##call = {		\
 	.fields			= LIST_HEAD_INIT(event_class_##call.fields),\
 	.raw_init		= trace_event_raw_init,			\
 	.probe			= ftrace_raw_event_##call,		\
+	.reg			= ftrace_event_reg,			\
 	_TRACE_PERF_INIT(call)						\
 };
 
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index 6053982dc30..23751659582 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -54,13 +54,7 @@ static int perf_trace_event_init(struct ftrace_event_call *tp_event,
 		}
 	}
 
-	if (tp_event->class->reg)
-		ret = tp_event->class->reg(tp_event, TRACE_REG_PERF_REGISTER);
-	else
-		ret = tracepoint_probe_register(tp_event->name,
-						tp_event->class->perf_probe,
-						tp_event);
-
+	ret = tp_event->class->reg(tp_event, TRACE_REG_PERF_REGISTER);
 	if (ret)
 		goto fail;
 
@@ -94,9 +88,7 @@ int perf_trace_init(struct perf_event *p_event)
 	mutex_lock(&event_mutex);
 	list_for_each_entry(tp_event, &ftrace_events, list) {
 		if (tp_event->event.type == event_id &&
-		    tp_event->class &&
-		    (tp_event->class->perf_probe ||
-		     tp_event->class->reg) &&
+		    tp_event->class && tp_event->class->reg &&
 		    try_module_get(tp_event->mod)) {
 			ret = perf_trace_event_init(tp_event, p_event);
 			break;
@@ -136,12 +128,7 @@ void perf_trace_destroy(struct perf_event *p_event)
 	if (--tp_event->perf_refcount > 0)
 		goto out;
 
-	if (tp_event->class->reg)
-		tp_event->class->reg(tp_event, TRACE_REG_PERF_UNREGISTER);
-	else
-		tracepoint_probe_unregister(tp_event->name,
-					    tp_event->class->perf_probe,
-					    tp_event);
+	tp_event->class->reg(tp_event, TRACE_REG_PERF_UNREGISTER);
 
 	/*
 	 * Ensure our callback won't be called anymore. See
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 69bee4cc0e1..e8e6043f4d2 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -141,6 +141,35 @@ int trace_event_raw_init(struct ftrace_event_call *call)
 }
 EXPORT_SYMBOL_GPL(trace_event_raw_init);
 
+int ftrace_event_reg(struct ftrace_event_call *call, enum trace_reg type)
+{
+	switch (type) {
+	case TRACE_REG_REGISTER:
+		return tracepoint_probe_register(call->name,
+						 call->class->probe,
+						 call);
+	case TRACE_REG_UNREGISTER:
+		tracepoint_probe_unregister(call->name,
+					    call->class->probe,
+					    call);
+		return 0;
+
+#ifdef CONFIG_PERF_EVENTS
+	case TRACE_REG_PERF_REGISTER:
+		return tracepoint_probe_register(call->name,
+						 call->class->perf_probe,
+						 call);
+	case TRACE_REG_PERF_UNREGISTER:
+		tracepoint_probe_unregister(call->name,
+					    call->class->perf_probe,
+					    call);
+		return 0;
+#endif
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ftrace_event_reg);
+
 static int ftrace_event_enable_disable(struct ftrace_event_call *call,
 					int enable)
 {
@@ -151,23 +180,13 @@ static int ftrace_event_enable_disable(struct ftrace_event_call *call,
 		if (call->flags & TRACE_EVENT_FL_ENABLED) {
 			call->flags &= ~TRACE_EVENT_FL_ENABLED;
 			tracing_stop_cmdline_record();
-			if (call->class->reg)
-				call->class->reg(call, TRACE_REG_UNREGISTER);
-			else
-				tracepoint_probe_unregister(call->name,
-							    call->class->probe,
-							    call);
+			call->class->reg(call, TRACE_REG_UNREGISTER);
 		}
 		break;
 	case 1:
 		if (!(call->flags & TRACE_EVENT_FL_ENABLED)) {
 			tracing_start_cmdline_record();
-			if (call->class->reg)
-				ret = call->class->reg(call, TRACE_REG_REGISTER);
-			else
-				ret = tracepoint_probe_register(call->name,
-								call->class->probe,
-								call);
+			ret = call->class->reg(call, TRACE_REG_REGISTER);
 			if (ret) {
 				tracing_stop_cmdline_record();
 				pr_info("event trace: Could not enable event "
@@ -205,8 +224,7 @@ static int __ftrace_set_clr_event(const char *match, const char *sub,
 	mutex_lock(&event_mutex);
 	list_for_each_entry(call, &ftrace_events, list) {
 
-		if (!call->name || !call->class ||
-		    (!call->class->probe && !call->class->reg))
+		if (!call->name || !call->class || !call->class->reg)
 			continue;
 
 		if (match &&
@@ -332,7 +350,7 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
 		 * The ftrace subsystem is for showing formats only.
 		 * They can not be enabled or disabled via the event files.
 		 */
-		if (call->class && (call->class->probe || call->class->reg))
+		if (call->class && call->class->reg)
 			return call;
 	}
 
@@ -485,8 +503,7 @@ system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
 
 	mutex_lock(&event_mutex);
 	list_for_each_entry(call, &ftrace_events, list) {
-		if (!call->name || !call->class ||
-		    (!call->class->probe && !call->class->reg))
+		if (!call->name || !call->class || !call->class->reg)
 			continue;
 
 		if (system && strcmp(call->class->system, system) != 0)
@@ -977,12 +994,12 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
 		return -1;
 	}
 
-	if (call->class->probe || call->class->reg)
+	if (call->class->reg)
 		trace_create_file("enable", 0644, call->dir, call,
 				  enable);
 
 #ifdef CONFIG_PERF_EVENTS
-	if (call->event.type && (call->class->perf_probe || call->class->reg))
+	if (call->event.type && call->class->reg)
 		trace_create_file("id", 0444, call->dir, call,
 		 		  id);
 #endif
-- 
cgit v1.2.3-70-g09d2


From b6b3ecc71a0664d44ed8d087d583aee98fbf492a Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 24 Jun 2010 00:04:38 +0000
Subject: net: u64_stats_sync improvements

- Add a comment about interrupts:

6) If counter might be written by an interrupt, readers should block
interrupts.

- Fix a typo in sample of use.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/u64_stats_sync.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/u64_stats_sync.h b/include/linux/u64_stats_sync.h
index d0505156ed5..b38e3a58de8 100644
--- a/include/linux/u64_stats_sync.h
+++ b/include/linux/u64_stats_sync.h
@@ -23,6 +23,10 @@
  *    pure reads. But if they have to fetch many values, it's better to not allow
  *    preemptions/interruptions to avoid many retries.
  *
+ * 6) If counter might be written by an interrupt, readers should block interrupts.
+ *    (On UP, there is no seqcount_t protection, a reader allowing interrupts could
+ *     read partial values)
+ *
  * Usage :
  *
  * Stats producer (writer) should use following template granted it already got
@@ -46,7 +50,7 @@
  *         start = u64_stats_fetch_begin(&stats->syncp);
  *         tbytes = stats->bytes64; // non atomic operation
  *         tpackets = stats->packets64; // non atomic operation
- * } while (u64_stats_fetch_retry(&stats->lock, syncp));
+ * } while (u64_stats_fetch_retry(&stats->syncp, start));
  *
  *
  * Example of use in drivers/net/loopback.c, using per_cpu containers,
-- 
cgit v1.2.3-70-g09d2


From 33d91f00c73ba0012bce18c1690cb8313ca7adaa Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 24 Jun 2010 00:54:06 +0000
Subject: net: u64_stats_fetch_begin_bh() and u64_stats_fetch_retry_bh()

- Must disable preemption in case of 32bit UP in u64_stats_fetch_begin()
and u64_stats_fetch_retry()

- Add new u64_stats_fetch_begin_bh() and u64_stats_fetch_retry_bh() for
network usage, disabling BH on 32bit UP only.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/u64_stats_sync.h | 59 +++++++++++++++++++++++++++++++-----------
 1 file changed, 44 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/linux/u64_stats_sync.h b/include/linux/u64_stats_sync.h
index b38e3a58de8..fa261a0da28 100644
--- a/include/linux/u64_stats_sync.h
+++ b/include/linux/u64_stats_sync.h
@@ -27,6 +27,9 @@
  *    (On UP, there is no seqcount_t protection, a reader allowing interrupts could
  *     read partial values)
  *
+ * 7) For softirq uses, readers can use u64_stats_fetch_begin_bh() and
+ *    u64_stats_fetch_retry_bh() helpers
+ *
  * Usage :
  *
  * Stats producer (writer) should use following template granted it already got
@@ -58,54 +61,80 @@
  */
 #include <linux/seqlock.h>
 
-#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
 struct u64_stats_sync {
+#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
 	seqcount_t	seq;
+#endif
 };
 
 static void inline u64_stats_update_begin(struct u64_stats_sync *syncp)
 {
+#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
 	write_seqcount_begin(&syncp->seq);
+#endif
 }
 
 static void inline u64_stats_update_end(struct u64_stats_sync *syncp)
 {
+#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
 	write_seqcount_end(&syncp->seq);
+#endif
 }
 
 static unsigned int inline u64_stats_fetch_begin(const struct u64_stats_sync *syncp)
 {
+#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
 	return read_seqcount_begin(&syncp->seq);
+#else
+#if BITS_PER_LONG==32
+	preempt_disable();
+#endif
+	return 0;
+#endif
 }
 
 static bool inline u64_stats_fetch_retry(const struct u64_stats_sync *syncp,
 					 unsigned int start)
 {
+#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
 	return read_seqcount_retry(&syncp->seq, start);
-}
-
 #else
-struct u64_stats_sync {
-};
-
-static void inline u64_stats_update_begin(struct u64_stats_sync *syncp)
-{
-}
-
-static void inline u64_stats_update_end(struct u64_stats_sync *syncp)
-{
+#if BITS_PER_LONG==32
+	preempt_enable();
+#endif
+	return false;
+#endif
 }
 
-static unsigned int inline u64_stats_fetch_begin(const struct u64_stats_sync *syncp)
+/*
+ * In case softirq handlers can update u64 counters, readers can use following helpers
+ * - SMP 32bit arches use seqcount protection, irq safe.
+ * - UP 32bit must disable BH.
+ * - 64bit have no problem atomically reading u64 values, irq safe.
+ */
+static unsigned int inline u64_stats_fetch_begin_bh(const struct u64_stats_sync *syncp)
 {
+#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
+	return read_seqcount_begin(&syncp->seq);
+#else
+#if BITS_PER_LONG==32
+	local_bh_disable();
+#endif
 	return 0;
+#endif
 }
 
-static bool inline u64_stats_fetch_retry(const struct u64_stats_sync *syncp,
+static bool inline u64_stats_fetch_retry_bh(const struct u64_stats_sync *syncp,
 					 unsigned int start)
 {
+#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
+	return read_seqcount_retry(&syncp->seq, start);
+#else
+#if BITS_PER_LONG==32
+	local_bh_enable();
+#endif
 	return false;
-}
 #endif
+}
 
 #endif /* _LINUX_U64_STATS_SYNC_H */
-- 
cgit v1.2.3-70-g09d2


From bc66154efe163a80f269d448572f7906756e9338 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 24 Jun 2010 00:54:21 +0000
Subject: macvlan: 64 bit rx counters

Use u64_stats_sync infrastructure to implement 64bit stats.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/macvlan.c      | 37 +++++++++++++++++++++++--------------
 include/linux/if_macvlan.h | 19 ++++++++++++-------
 2 files changed, 35 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index e096875aa05..e6d626e7851 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -431,29 +431,38 @@ static void macvlan_uninit(struct net_device *dev)
 	free_percpu(vlan->rx_stats);
 }
 
-static struct net_device_stats *macvlan_dev_get_stats(struct net_device *dev)
+static struct rtnl_link_stats64 *macvlan_dev_get_stats64(struct net_device *dev)
 {
-	struct net_device_stats *stats = &dev->stats;
+	struct rtnl_link_stats64 *stats = &dev->stats64;
 	struct macvlan_dev *vlan = netdev_priv(dev);
 
-	dev_txq_stats_fold(dev, stats);
+	dev_txq_stats_fold(dev, &dev->stats);
 
 	if (vlan->rx_stats) {
-		struct macvlan_rx_stats *p, rx = {0};
+		struct macvlan_rx_stats *p, accum = {0};
+		u64 rx_packets, rx_bytes, rx_multicast;
+		unsigned int start;
 		int i;
 
 		for_each_possible_cpu(i) {
 			p = per_cpu_ptr(vlan->rx_stats, i);
-			rx.rx_packets += p->rx_packets;
-			rx.rx_bytes   += p->rx_bytes;
-			rx.rx_errors  += p->rx_errors;
-			rx.multicast  += p->multicast;
+			do {
+				start = u64_stats_fetch_begin_bh(&p->syncp);
+				rx_packets	= p->rx_packets;
+				rx_bytes	= p->rx_bytes;
+				rx_multicast	= p->rx_multicast;
+			} while (u64_stats_fetch_retry_bh(&p->syncp, start));
+			accum.rx_packets	+= rx_packets;
+			accum.rx_bytes		+= rx_bytes;
+			accum.rx_multicast	+= rx_multicast;
+			/* rx_errors is an ulong, updated without syncp protection */
+			accum.rx_errors		+= p->rx_errors;
 		}
-		stats->rx_packets = rx.rx_packets;
-		stats->rx_bytes   = rx.rx_bytes;
-		stats->rx_errors  = rx.rx_errors;
-		stats->rx_dropped = rx.rx_errors;
-		stats->multicast  = rx.multicast;
+		stats->rx_packets = accum.rx_packets;
+		stats->rx_bytes   = accum.rx_bytes;
+		stats->rx_errors  = accum.rx_errors;
+		stats->rx_dropped = accum.rx_errors;
+		stats->multicast  = accum.rx_multicast;
 	}
 	return stats;
 }
@@ -502,7 +511,7 @@ static const struct net_device_ops macvlan_netdev_ops = {
 	.ndo_change_rx_flags	= macvlan_change_rx_flags,
 	.ndo_set_mac_address	= macvlan_set_mac_address,
 	.ndo_set_multicast_list	= macvlan_set_multicast_list,
-	.ndo_get_stats		= macvlan_dev_get_stats,
+	.ndo_get_stats64	= macvlan_dev_get_stats64,
 	.ndo_validate_addr	= eth_validate_addr,
 };
 
diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h
index c26a0e4f0ce..e24ce6ea1fa 100644
--- a/include/linux/if_macvlan.h
+++ b/include/linux/if_macvlan.h
@@ -6,6 +6,7 @@
 #include <linux/netdevice.h>
 #include <linux/netlink.h>
 #include <net/netlink.h>
+#include <linux/u64_stats_sync.h>
 
 #if defined(CONFIG_MACVTAP) || defined(CONFIG_MACVTAP_MODULE)
 struct socket *macvtap_get_socket(struct file *);
@@ -27,14 +28,16 @@ struct macvtap_queue;
  *	struct macvlan_rx_stats - MACVLAN percpu rx stats
  *	@rx_packets: number of received packets
  *	@rx_bytes: number of received bytes
- *	@multicast: number of received multicast packets
+ *	@rx_multicast: number of received multicast packets
+ *	@syncp: synchronization point for 64bit counters
  *	@rx_errors: number of errors
  */
 struct macvlan_rx_stats {
-	unsigned long rx_packets;
-	unsigned long rx_bytes;
-	unsigned long multicast;
-	unsigned long rx_errors;
+	u64			rx_packets;
+	u64			rx_bytes;
+	u64			rx_multicast;
+	struct u64_stats_sync	syncp;
+	unsigned long		rx_errors;
 };
 
 struct macvlan_dev {
@@ -56,12 +59,14 @@ static inline void macvlan_count_rx(const struct macvlan_dev *vlan,
 {
 	struct macvlan_rx_stats *rx_stats;
 
-	rx_stats = per_cpu_ptr(vlan->rx_stats, smp_processor_id());
+	rx_stats = this_cpu_ptr(vlan->rx_stats);
 	if (likely(success)) {
+		u64_stats_update_begin(&rx_stats->syncp);
 		rx_stats->rx_packets++;;
 		rx_stats->rx_bytes += len;
 		if (multicast)
-			rx_stats->multicast++;
+			rx_stats->rx_multicast++;
+		u64_stats_update_end(&rx_stats->syncp);
 	} else {
 		rx_stats->rx_errors++;
 	}
-- 
cgit v1.2.3-70-g09d2


From 210d6de78c5d7c785fc532556cea340e517955e1 Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Thu, 24 Jun 2010 16:25:12 +0000
Subject: act_mirred: don't clone skb when skb isn't shared

don't clone skb when skb isn't shared

When the tcf_action is TC_ACT_STOLEN, and the skb isn't shared, we don't need
to clone a new skb. As the skb will be freed after this function returns, we
can use it freely once we get a reference to it.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
----
 include/net/sch_generic.h |   11 +++++++++--
 net/sched/act_mirred.c    |    6 +++---
 2 files changed, 12 insertions(+), 5 deletions(-)
Signed-off-by: Jamal Hadi Salim <hadi@cyberus.ca>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h | 11 +++++++++--
 net/sched/act_mirred.c    |  6 +++---
 2 files changed, 12 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index b35301b0c7b..977ec06ed0c 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -594,9 +594,16 @@ static inline u32 qdisc_l2t(struct qdisc_rate_table* rtab, unsigned int pktlen)
 }
 
 #ifdef CONFIG_NET_CLS_ACT
-static inline struct sk_buff *skb_act_clone(struct sk_buff *skb, gfp_t gfp_mask)
+static inline struct sk_buff *skb_act_clone(struct sk_buff *skb, gfp_t gfp_mask,
+					    int action)
 {
-	struct sk_buff *n = skb_clone(skb, gfp_mask);
+	struct sk_buff *n;
+
+	if ((action == TC_ACT_STOLEN || action == TC_ACT_QUEUED) &&
+	    !skb_shared(skb))
+		n = skb_get(skb);
+	else
+		n = skb_clone(skb, gfp_mask);
 
 	if (n) {
 		n->tc_verd = SET_TC_VERD(n->tc_verd, 0);
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index c0b6863e3b8..2e9a7b91aa1 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -169,13 +169,13 @@ static int tcf_mirred(struct sk_buff *skb, struct tc_action *a,
 		goto out;
 	}
 
-	skb2 = skb_act_clone(skb, GFP_ATOMIC);
+	at = G_TC_AT(skb->tc_verd);
+	skb2 = skb_act_clone(skb, GFP_ATOMIC, m->tcf_action);
 	if (skb2 == NULL)
 		goto out;
 
 	m->tcf_bstats.bytes += qdisc_pkt_len(skb2);
 	m->tcf_bstats.packets++;
-	at = G_TC_AT(skb->tc_verd);
 	if (!(at & AT_EGRESS)) {
 		if (m->tcfm_ok_push)
 			skb_push(skb2, skb2->dev->hard_header_len);
@@ -185,8 +185,8 @@ static int tcf_mirred(struct sk_buff *skb, struct tc_action *a,
 	if (m->tcfm_eaction != TCA_EGRESS_MIRROR)
 		skb2->tc_verd = SET_TC_FROM(skb2->tc_verd, at);
 
-	skb2->dev = dev;
 	skb2->skb_iif = skb->dev->ifindex;
+	skb2->dev = dev;
 	dev_queue_xmit(skb2);
 	err = 0;
 
-- 
cgit v1.2.3-70-g09d2


From 529d6dad5bc69de14cdd24831e2a14264e93daa4 Mon Sep 17 00:00:00 2001
From: Sjur Braendeland <sjur.brandeland@stericsson.com>
Date: Tue, 29 Jun 2010 00:08:21 -0700
Subject: caif-driver: Add CAIF-SPI Protocol driver.

This patch introduces the CAIF SPI Protocol Driver for
CAIF Link Layer.

This driver implements a platform driver to accommodate for a
platform specific SPI device. A general platform driver is not
possible as there are no SPI Slave side Kernel API defined.
A sample CAIF SPI Platform device can be found in
.../Documentation/networking/caif/spi_porting.txt

Signed-off-by: Sjur Braendeland <sjur.brandeland@stericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/caif/spi_porting.txt | 208 +++++++
 drivers/net/caif/Kconfig                      |  19 +
 drivers/net/caif/Makefile                     |   4 +
 drivers/net/caif/caif_spi.c                   | 847 ++++++++++++++++++++++++++
 drivers/net/caif/caif_spi_slave.c             | 252 ++++++++
 include/net/caif/caif_spi.h                   | 153 +++++
 6 files changed, 1483 insertions(+)
 create mode 100644 Documentation/networking/caif/spi_porting.txt
 create mode 100644 drivers/net/caif/caif_spi.c
 create mode 100644 drivers/net/caif/caif_spi_slave.c
 create mode 100644 include/net/caif/caif_spi.h

(limited to 'include')

diff --git a/Documentation/networking/caif/spi_porting.txt b/Documentation/networking/caif/spi_porting.txt
new file mode 100644
index 00000000000..61d7c924745
--- /dev/null
+++ b/Documentation/networking/caif/spi_porting.txt
@@ -0,0 +1,208 @@
+- CAIF SPI porting -
+
+- CAIF SPI basics:
+
+Running CAIF over SPI needs some extra setup, owing to the nature of SPI.
+Two extra GPIOs have been added in order to negotiate the transfers
+ between the master and the slave. The minimum requirement for running
+CAIF over SPI is a SPI slave chip and two GPIOs (more details below).
+Please note that running as a slave implies that you need to keep up
+with the master clock. An overrun or underrun event is fatal.
+
+- CAIF SPI framework:
+
+To make porting as easy as possible, the CAIF SPI has been divided in
+two parts. The first part (called the interface part) deals with all
+generic functionality such as length framing, SPI frame negotiation
+and SPI frame delivery and transmission. The other part is the CAIF
+SPI slave device part, which is the module that you have to write if
+you want to run SPI CAIF on a new hardware. This part takes care of
+the physical hardware, both with regard to SPI and to GPIOs.
+
+- Implementing a CAIF SPI device:
+
+	- Functionality provided by the CAIF SPI slave device:
+
+	In order to implement a SPI device you will, as a minimum,
+	need to implement the following
+	functions:
+
+	int (*init_xfer) (struct cfspi_xfer * xfer, struct cfspi_dev *dev):
+
+	This function is called by the CAIF SPI interface to give
+	you a chance to set up your hardware to be ready to receive
+	a stream of data from the master. The xfer structure contains
+	both physical and logical adresses, as well as the total length
+	of the transfer in both directions.The dev parameter can be used
+	to map to different CAIF SPI slave devices.
+
+	void (*sig_xfer) (bool xfer, struct cfspi_dev *dev):
+
+	This function is called by the CAIF SPI interface when the output
+	(SPI_INT) GPIO needs to change state. The boolean value of the xfer
+	variable indicates whether the GPIO should be asserted (HIGH) or
+	deasserted (LOW). The dev parameter can be used to map to different CAIF
+	SPI slave devices.
+
+	- Functionality provided by the CAIF SPI interface:
+
+	void (*ss_cb) (bool assert, struct cfspi_ifc *ifc);
+
+	This function is called by the CAIF SPI slave device in order to
+	signal a change of state of the input GPIO (SS) to the interface.
+	Only active edges are mandatory to be reported.
+	This function can be called from IRQ context (recommended in order
+	not to introduce latency). The ifc parameter should be the pointer
+	returned from the platform probe function in the SPI device structure.
+
+	void (*xfer_done_cb) (struct cfspi_ifc *ifc);
+
+	This function is called by the CAIF SPI slave device in order to
+	report that a transfer is completed. This function should only be
+	called once both the transmission and the reception are completed.
+	This function can be called from IRQ context (recommended in order
+	not to introduce latency). The ifc parameter should be the pointer
+	returned from the platform probe function in the SPI device structure.
+
+	- Connecting the bits and pieces:
+
+		- Filling in the SPI slave device structure:
+
+		Connect the necessary callback functions.
+		Indicate clock speed (used to calculate toggle delays).
+		Chose a suitable name (helps debugging if you use several CAIF
+		SPI slave devices).
+		Assign your private data (can be used to map to your structure).
+
+		- Filling in the SPI slave platform device structure:
+		Add name of driver to connect to ("cfspi_sspi").
+		Assign the SPI slave device structure as platform data.
+
+- Padding:
+
+In order to optimize throughput, a number of SPI padding options are provided.
+Padding can be enabled independently for uplink and downlink transfers.
+Padding can be enabled for the head, the tail and for the total frame size.
+The padding needs to be correctly configured on both sides of the link.
+The padding can be changed via module parameters in cfspi_sspi.c or via
+the sysfs directory of the cfspi_sspi driver (before device registration).
+
+- CAIF SPI device template:
+
+/*
+ *	Copyright (C) ST-Ericsson AB 2010
+ *	Author: Daniel Martensson / Daniel.Martensson@stericsson.com
+ *	License terms: GNU General Public License (GPL), version 2.
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/wait.h>
+#include <linux/interrupt.h>
+#include <linux/dma-mapping.h>
+#include <net/caif/caif_spi.h>
+
+MODULE_LICENSE("GPL");
+
+struct sspi_struct {
+	struct cfspi_dev sdev;
+	struct cfspi_xfer *xfer;
+};
+
+static struct sspi_struct slave;
+static struct platform_device slave_device;
+
+static irqreturn_t sspi_irq(int irq, void *arg)
+{
+	/* You only need to trigger on an edge to the active state of the
+	 * SS signal. Once a edge is detected, the ss_cb() function should be
+	 * called with the parameter assert set to true. It is OK
+	 * (and even advised) to call the ss_cb() function in IRQ context in
+	 * order not to add any delay. */
+
+	return IRQ_HANDLED;
+}
+
+static void sspi_complete(void *context)
+{
+	/* Normally the DMA or the SPI framework will call you back
+	 * in something similar to this. The only thing you need to
+	 * do is to call the xfer_done_cb() function, providing the pointer
+	 * to the CAIF SPI interface. It is OK to call this function
+	 * from IRQ context. */
+}
+
+static int sspi_init_xfer(struct cfspi_xfer *xfer, struct cfspi_dev *dev)
+{
+	/* Store transfer info. For a normal implementation you should
+	 * set up your DMA here and make sure that you are ready to
+	 * receive the data from the master SPI. */
+
+	struct sspi_struct *sspi = (struct sspi_struct *)dev->priv;
+
+	sspi->xfer = xfer;
+
+	return 0;
+}
+
+void sspi_sig_xfer(bool xfer, struct cfspi_dev *dev)
+{
+	/* If xfer is true then you should assert the SPI_INT to indicate to
+	 * the master that you are ready to recieve the data from the master
+	 * SPI. If xfer is false then you should de-assert SPI_INT to indicate
+	 * that the transfer is done.
+	 */
+
+	struct sspi_struct *sspi = (struct sspi_struct *)dev->priv;
+}
+
+static void sspi_release(struct device *dev)
+{
+	/*
+	 * Here you should release your SPI device resources.
+	 */
+}
+
+static int __init sspi_init(void)
+{
+	/* Here you should initialize your SPI device by providing the
+	 * necessary functions, clock speed, name and private data. Once
+	 * done, you can register your device with the
+	 * platform_device_register() function. This function will return
+	 * with the CAIF SPI interface initialized. This is probably also
+	 * the place where you should set up your GPIOs, interrupts and SPI
+	 * resources. */
+
+	int res = 0;
+
+	/* Initialize slave device. */
+	slave.sdev.init_xfer = sspi_init_xfer;
+	slave.sdev.sig_xfer = sspi_sig_xfer;
+	slave.sdev.clk_mhz = 13;
+	slave.sdev.priv = &slave;
+	slave.sdev.name = "spi_sspi";
+	slave_device.dev.release = sspi_release;
+
+	/* Initialize platform device. */
+	slave_device.name = "cfspi_sspi";
+	slave_device.dev.platform_data = &slave.sdev;
+
+	/* Register platform device. */
+	res = platform_device_register(&slave_device);
+	if (res) {
+		printk(KERN_WARNING "sspi_init: failed to register dev.\n");
+		return -ENODEV;
+	}
+
+	return res;
+}
+
+static void __exit sspi_exit(void)
+{
+	platform_device_del(&slave_device);
+}
+
+module_init(sspi_init);
+module_exit(sspi_exit);
diff --git a/drivers/net/caif/Kconfig b/drivers/net/caif/Kconfig
index 6f33ee453f4..631a6242b01 100644
--- a/drivers/net/caif/Kconfig
+++ b/drivers/net/caif/Kconfig
@@ -12,3 +12,22 @@ config CAIF_TTY
 	The CAIF TTY transport driver is a Line Discipline (ldisc)
 	identified as N_CAIF. When this ldisc is opened from user space
 	it will redirect the TTY's traffic into the CAIF stack.
+
+config CAIF_SPI_SLAVE
+	tristate "CAIF SPI transport driver for slave interface"
+	depends on CAIF
+	default n
+	---help---
+	The CAIF Link layer SPI Protocol driver for Slave SPI interface.
+	This driver implements a platform driver to accommodate for a
+	platform specific SPI device. A sample CAIF SPI Platform device is
+	provided in Documentation/networking/caif/spi_porting.txt
+
+config CAIF_SPI_SYNC
+	bool "Next command and length in start of frame"
+	depends on CAIF_SPI_SLAVE
+	default n
+	---help---
+	Putting the next command and length in the start of the frame can
+	help to synchronize to the next transfer in case of over or under-runs.
+	This option also needs to be enabled on the modem.
diff --git a/drivers/net/caif/Makefile b/drivers/net/caif/Makefile
index e6d3ca06ecf..3a11d619452 100644
--- a/drivers/net/caif/Makefile
+++ b/drivers/net/caif/Makefile
@@ -4,3 +4,7 @@ endif
 
 # Serial interface
 obj-$(CONFIG_CAIF_TTY) += caif_serial.o
+
+# SPI slave physical interfaces module
+cfspi_slave-objs := caif_spi.o caif_spi_slave.o
+obj-$(CONFIG_CAIF_SPI_SLAVE) += cfspi_slave.o
diff --git a/drivers/net/caif/caif_spi.c b/drivers/net/caif/caif_spi.c
new file mode 100644
index 00000000000..03049e86e8a
--- /dev/null
+++ b/drivers/net/caif/caif_spi.c
@@ -0,0 +1,847 @@
+/*
+ * Copyright (C) ST-Ericsson AB 2010
+ * Contact: Sjur Brendeland / sjur.brandeland@stericsson.com
+ * Author:  Daniel Martensson / Daniel.Martensson@stericsson.com
+ * License terms: GNU General Public License (GPL) version 2.
+ */
+
+#include <linux/version.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/platform_device.h>
+#include <linux/string.h>
+#include <linux/workqueue.h>
+#include <linux/completion.h>
+#include <linux/list.h>
+#include <linux/interrupt.h>
+#include <linux/dma-mapping.h>
+#include <linux/delay.h>
+#include <linux/sched.h>
+#include <linux/debugfs.h>
+#include <linux/if_arp.h>
+#include <net/caif/caif_layer.h>
+#include <net/caif/caif_spi.h>
+
+#ifndef CONFIG_CAIF_SPI_SYNC
+#define FLAVOR "Flavour: Vanilla.\n"
+#else
+#define FLAVOR "Flavour: Master CMD&LEN at start.\n"
+#endif /* CONFIG_CAIF_SPI_SYNC */
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Daniel Martensson<daniel.martensson@stericsson.com>");
+MODULE_DESCRIPTION("CAIF SPI driver");
+
+static int spi_loop;
+module_param(spi_loop, bool, S_IRUGO);
+MODULE_PARM_DESC(spi_loop, "SPI running in loopback mode.");
+
+/* SPI frame alignment. */
+module_param(spi_frm_align, int, S_IRUGO);
+MODULE_PARM_DESC(spi_frm_align, "SPI frame alignment.");
+
+/* SPI padding options. */
+module_param(spi_up_head_align, int, S_IRUGO);
+MODULE_PARM_DESC(spi_up_head_align, "SPI uplink head alignment.");
+
+module_param(spi_up_tail_align, int, S_IRUGO);
+MODULE_PARM_DESC(spi_up_tail_align, "SPI uplink tail alignment.");
+
+module_param(spi_down_head_align, int, S_IRUGO);
+MODULE_PARM_DESC(spi_down_head_align, "SPI downlink head alignment.");
+
+module_param(spi_down_tail_align, int, S_IRUGO);
+MODULE_PARM_DESC(spi_down_tail_align, "SPI downlink tail alignment.");
+
+#ifdef CONFIG_ARM
+#define BYTE_HEX_FMT "%02X"
+#else
+#define BYTE_HEX_FMT "%02hhX"
+#endif
+
+#define SPI_MAX_PAYLOAD_SIZE 4096
+/*
+ * Threshold values for the SPI packet queue. Flowcontrol will be asserted
+ * when the number of packets exceeds HIGH_WATER_MARK. It will not be
+ * deasserted before the number of packets drops below LOW_WATER_MARK.
+ */
+#define LOW_WATER_MARK   100
+#define HIGH_WATER_MARK  (LOW_WATER_MARK*5)
+
+#ifdef CONFIG_UML
+
+/*
+ * We sometimes use UML for debugging, but it cannot handle
+ * dma_alloc_coherent so we have to wrap it.
+ */
+static inline void *dma_alloc(dma_addr_t *daddr)
+{
+	return kmalloc(SPI_DMA_BUF_LEN, GFP_KERNEL);
+}
+
+static inline void dma_free(void *cpu_addr, dma_addr_t handle)
+{
+	kfree(cpu_addr);
+}
+
+#else
+
+static inline void *dma_alloc(dma_addr_t *daddr)
+{
+	return dma_alloc_coherent(NULL, SPI_DMA_BUF_LEN, daddr,
+				GFP_KERNEL);
+}
+
+static inline void dma_free(void *cpu_addr, dma_addr_t handle)
+{
+	dma_free_coherent(NULL, SPI_DMA_BUF_LEN, cpu_addr, handle);
+}
+#endif	/* CONFIG_UML */
+
+#ifdef CONFIG_DEBUG_FS
+
+#define DEBUGFS_BUF_SIZE	4096
+
+static struct dentry *dbgfs_root;
+
+static inline void driver_debugfs_create(void)
+{
+	dbgfs_root = debugfs_create_dir(cfspi_spi_driver.driver.name, NULL);
+}
+
+static inline void driver_debugfs_remove(void)
+{
+	debugfs_remove(dbgfs_root);
+}
+
+static inline void dev_debugfs_rem(struct cfspi *cfspi)
+{
+	debugfs_remove(cfspi->dbgfs_frame);
+	debugfs_remove(cfspi->dbgfs_state);
+	debugfs_remove(cfspi->dbgfs_dir);
+}
+
+static int dbgfs_open(struct inode *inode, struct file *file)
+{
+	file->private_data = inode->i_private;
+	return 0;
+}
+
+static ssize_t dbgfs_state(struct file *file, char __user *user_buf,
+			   size_t count, loff_t *ppos)
+{
+	char *buf;
+	int len = 0;
+	ssize_t size;
+	struct cfspi *cfspi = (struct cfspi *)file->private_data;
+
+	buf = kzalloc(DEBUGFS_BUF_SIZE, GFP_KERNEL);
+	if (!buf)
+		return 0;
+
+	/* Print out debug information. */
+	len += snprintf((buf + len), (DEBUGFS_BUF_SIZE - len),
+			"CAIF SPI debug information:\n");
+
+	len += snprintf((buf + len), (DEBUGFS_BUF_SIZE - len), FLAVOR);
+
+	len += snprintf((buf + len), (DEBUGFS_BUF_SIZE - len),
+			"STATE: %d\n", cfspi->dbg_state);
+	len += snprintf((buf + len), (DEBUGFS_BUF_SIZE - len),
+			"Previous CMD: 0x%x\n", cfspi->pcmd);
+	len += snprintf((buf + len), (DEBUGFS_BUF_SIZE - len),
+			"Current CMD: 0x%x\n", cfspi->cmd);
+	len += snprintf((buf + len), (DEBUGFS_BUF_SIZE - len),
+			"Previous TX len: %d\n", cfspi->tx_ppck_len);
+	len += snprintf((buf + len), (DEBUGFS_BUF_SIZE - len),
+			"Previous RX len: %d\n", cfspi->rx_ppck_len);
+	len += snprintf((buf + len), (DEBUGFS_BUF_SIZE - len),
+			"Current TX len: %d\n", cfspi->tx_cpck_len);
+	len += snprintf((buf + len), (DEBUGFS_BUF_SIZE - len),
+			"Current RX len: %d\n", cfspi->rx_cpck_len);
+	len += snprintf((buf + len), (DEBUGFS_BUF_SIZE - len),
+			"Next TX len: %d\n", cfspi->tx_npck_len);
+	len += snprintf((buf + len), (DEBUGFS_BUF_SIZE - len),
+			"Next RX len: %d\n", cfspi->rx_npck_len);
+
+	size = simple_read_from_buffer(user_buf, count, ppos, buf, len);
+	kfree(buf);
+
+	return size;
+}
+
+static ssize_t print_frame(char *buf, size_t size, char *frm,
+			   size_t count, size_t cut)
+{
+	int len = 0;
+	int i;
+	for (i = 0; i < count; i++) {
+		len += snprintf((buf + len), (size - len),
+					"[0x" BYTE_HEX_FMT "]",
+					frm[i]);
+		if ((i == cut) && (count > (cut * 2))) {
+			/* Fast forward. */
+			i = count - cut;
+			len += snprintf((buf + len), (size - len),
+					"--- %u bytes skipped ---\n",
+					(int)(count - (cut * 2)));
+		}
+
+		if ((!(i % 10)) && i) {
+			len += snprintf((buf + len), (DEBUGFS_BUF_SIZE - len),
+					"\n");
+		}
+	}
+	len += snprintf((buf + len), (DEBUGFS_BUF_SIZE - len), "\n");
+	return len;
+}
+
+static ssize_t dbgfs_frame(struct file *file, char __user *user_buf,
+			   size_t count, loff_t *ppos)
+{
+	char *buf;
+	int len = 0;
+	ssize_t size;
+	struct cfspi *cfspi;
+
+	cfspi = (struct cfspi *)file->private_data;
+	buf = kzalloc(DEBUGFS_BUF_SIZE, GFP_KERNEL);
+	if (!buf)
+		return 0;
+
+	/* Print out debug information. */
+	len += snprintf((buf + len), (DEBUGFS_BUF_SIZE - len),
+			"Current frame:\n");
+
+	len += snprintf((buf + len), (DEBUGFS_BUF_SIZE - len),
+			"Tx data (Len: %d):\n", cfspi->tx_cpck_len);
+
+	len += print_frame((buf + len), (DEBUGFS_BUF_SIZE - len),
+			   cfspi->xfer.va_tx,
+			   (cfspi->tx_cpck_len + SPI_CMD_SZ), 100);
+
+	len += snprintf((buf + len), (DEBUGFS_BUF_SIZE - len),
+			"Rx data (Len: %d):\n", cfspi->rx_cpck_len);
+
+	len += print_frame((buf + len), (DEBUGFS_BUF_SIZE - len),
+			   cfspi->xfer.va_rx,
+			   (cfspi->rx_cpck_len + SPI_CMD_SZ), 100);
+
+	size = simple_read_from_buffer(user_buf, count, ppos, buf, len);
+	kfree(buf);
+
+	return size;
+}
+
+static const struct file_operations dbgfs_state_fops = {
+	.open = dbgfs_open,
+	.read = dbgfs_state,
+	.owner = THIS_MODULE
+};
+
+static const struct file_operations dbgfs_frame_fops = {
+	.open = dbgfs_open,
+	.read = dbgfs_frame,
+	.owner = THIS_MODULE
+};
+
+static inline void dev_debugfs_add(struct cfspi *cfspi)
+{
+	cfspi->dbgfs_dir = debugfs_create_dir(cfspi->pdev->name, dbgfs_root);
+	cfspi->dbgfs_state = debugfs_create_file("state", S_IRUGO,
+						 cfspi->dbgfs_dir, cfspi,
+						 &dbgfs_state_fops);
+	cfspi->dbgfs_frame = debugfs_create_file("frame", S_IRUGO,
+						 cfspi->dbgfs_dir, cfspi,
+						 &dbgfs_frame_fops);
+}
+
+inline void cfspi_dbg_state(struct cfspi *cfspi, int state)
+{
+	cfspi->dbg_state = state;
+};
+#else
+
+static inline void driver_debugfs_create(void)
+{
+}
+
+static inline void driver_debugfs_remove(void)
+{
+}
+
+static inline void dev_debugfs_add(struct cfspi *cfspi)
+{
+}
+
+static inline void dev_debugfs_rem(struct cfspi *cfspi)
+{
+}
+
+inline void cfspi_dbg_state(struct cfspi *cfspi, int state)
+{
+}
+#endif				/* CONFIG_DEBUG_FS */
+
+static LIST_HEAD(cfspi_list);
+static spinlock_t cfspi_list_lock;
+
+/* SPI uplink head alignment. */
+static ssize_t show_up_head_align(struct device_driver *driver, char *buf)
+{
+	return sprintf(buf, "%d\n", spi_up_head_align);
+}
+
+static DRIVER_ATTR(up_head_align, S_IRUSR, show_up_head_align, NULL);
+
+/* SPI uplink tail alignment. */
+static ssize_t show_up_tail_align(struct device_driver *driver, char *buf)
+{
+	return sprintf(buf, "%d\n", spi_up_tail_align);
+}
+
+static DRIVER_ATTR(up_tail_align, S_IRUSR, show_up_tail_align, NULL);
+
+/* SPI downlink head alignment. */
+static ssize_t show_down_head_align(struct device_driver *driver, char *buf)
+{
+	return sprintf(buf, "%d\n", spi_down_head_align);
+}
+
+static DRIVER_ATTR(down_head_align, S_IRUSR, show_down_head_align, NULL);
+
+/* SPI downlink tail alignment. */
+static ssize_t show_down_tail_align(struct device_driver *driver, char *buf)
+{
+	return sprintf(buf, "%d\n", spi_down_tail_align);
+}
+
+static DRIVER_ATTR(down_tail_align, S_IRUSR, show_down_tail_align, NULL);
+
+/* SPI frame alignment. */
+static ssize_t show_frame_align(struct device_driver *driver, char *buf)
+{
+	return sprintf(buf, "%d\n", spi_frm_align);
+}
+
+static DRIVER_ATTR(frame_align, S_IRUSR, show_frame_align, NULL);
+
+int cfspi_xmitfrm(struct cfspi *cfspi, u8 *buf, size_t len)
+{
+	u8 *dst = buf;
+	caif_assert(buf);
+
+	do {
+		struct sk_buff *skb;
+		struct caif_payload_info *info;
+		int spad = 0;
+		int epad;
+
+		skb = skb_dequeue(&cfspi->chead);
+		if (!skb)
+			break;
+
+		/*
+		 * Calculate length of frame including SPI padding.
+		 * The payload position is found in the control buffer.
+		 */
+		info = (struct caif_payload_info *)&skb->cb;
+
+		/*
+		 * Compute head offset i.e. number of bytes to add to
+		 * get the start of the payload aligned.
+		 */
+		if (spi_up_head_align) {
+			spad = 1 + ((info->hdr_len + 1) & spi_up_head_align);
+			*dst = (u8)(spad - 1);
+			dst += spad;
+		}
+
+		/* Copy in CAIF frame. */
+		skb_copy_bits(skb, 0, dst, skb->len);
+		dst += skb->len;
+		cfspi->ndev->stats.tx_packets++;
+		cfspi->ndev->stats.tx_bytes += skb->len;
+
+		/*
+		 * Compute tail offset i.e. number of bytes to add to
+		 * get the complete CAIF frame aligned.
+		 */
+		epad = (skb->len + spad) & spi_up_tail_align;
+		dst += epad;
+
+		dev_kfree_skb(skb);
+
+	} while ((dst - buf) < len);
+
+	return dst - buf;
+}
+
+int cfspi_xmitlen(struct cfspi *cfspi)
+{
+	struct sk_buff *skb = NULL;
+	int frm_len = 0;
+	int pkts = 0;
+
+	/*
+	 * Decommit previously commited frames.
+	 * skb_queue_splice_tail(&cfspi->chead,&cfspi->qhead)
+	 */
+	while (skb_peek(&cfspi->chead)) {
+		skb = skb_dequeue_tail(&cfspi->chead);
+		skb_queue_head(&cfspi->qhead, skb);
+	}
+
+	do {
+		struct caif_payload_info *info = NULL;
+		int spad = 0;
+		int epad = 0;
+
+		skb = skb_dequeue(&cfspi->qhead);
+		if (!skb)
+			break;
+
+		/*
+		 * Calculate length of frame including SPI padding.
+		 * The payload position is found in the control buffer.
+		 */
+		info = (struct caif_payload_info *)&skb->cb;
+
+		/*
+		 * Compute head offset i.e. number of bytes to add to
+		 * get the start of the payload aligned.
+		 */
+		if (spi_up_head_align)
+			spad = 1 + ((info->hdr_len + 1) & spi_up_head_align);
+
+		/*
+		 * Compute tail offset i.e. number of bytes to add to
+		 * get the complete CAIF frame aligned.
+		 */
+		epad = (skb->len + spad) & spi_up_tail_align;
+
+		if ((skb->len + spad + epad + frm_len) <= CAIF_MAX_SPI_FRAME) {
+			skb_queue_tail(&cfspi->chead, skb);
+			pkts++;
+			frm_len += skb->len + spad + epad;
+		} else {
+			/* Put back packet. */
+			skb_queue_head(&cfspi->qhead, skb);
+		}
+	} while (pkts <= CAIF_MAX_SPI_PKTS);
+
+	/*
+	 * Send flow on if previously sent flow off
+	 * and now go below the low water mark
+	 */
+	if (cfspi->flow_off_sent && cfspi->qhead.qlen < cfspi->qd_low_mark &&
+		cfspi->cfdev.flowctrl) {
+		cfspi->flow_off_sent = 0;
+		cfspi->cfdev.flowctrl(cfspi->ndev, 1);
+	}
+
+	return frm_len;
+}
+
+static void cfspi_ss_cb(bool assert, struct cfspi_ifc *ifc)
+{
+	struct cfspi *cfspi = (struct cfspi *)ifc->priv;
+
+	if (!in_interrupt())
+		spin_lock(&cfspi->lock);
+	if (assert) {
+		set_bit(SPI_SS_ON, &cfspi->state);
+		set_bit(SPI_XFER, &cfspi->state);
+	} else {
+		set_bit(SPI_SS_OFF, &cfspi->state);
+	}
+	if (!in_interrupt())
+		spin_unlock(&cfspi->lock);
+
+	/* Wake up the xfer thread. */
+	wake_up_interruptible(&cfspi->wait);
+}
+
+static void cfspi_xfer_done_cb(struct cfspi_ifc *ifc)
+{
+	struct cfspi *cfspi = (struct cfspi *)ifc->priv;
+
+	/* Transfer done, complete work queue */
+	complete(&cfspi->comp);
+}
+
+static int cfspi_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct cfspi *cfspi = NULL;
+	unsigned long flags;
+	if (!dev)
+		return -EINVAL;
+
+	cfspi = netdev_priv(dev);
+
+	skb_queue_tail(&cfspi->qhead, skb);
+
+	spin_lock_irqsave(&cfspi->lock, flags);
+	if (!test_and_set_bit(SPI_XFER, &cfspi->state)) {
+		/* Wake up xfer thread. */
+		wake_up_interruptible(&cfspi->wait);
+	}
+	spin_unlock_irqrestore(&cfspi->lock, flags);
+
+	/* Send flow off if number of bytes is above high water mark */
+	if (!cfspi->flow_off_sent &&
+		cfspi->qhead.qlen > cfspi->qd_high_mark &&
+		cfspi->cfdev.flowctrl) {
+		cfspi->flow_off_sent = 1;
+		cfspi->cfdev.flowctrl(cfspi->ndev, 0);
+	}
+
+	return 0;
+}
+
+int cfspi_rxfrm(struct cfspi *cfspi, u8 *buf, size_t len)
+{
+	u8 *src = buf;
+
+	caif_assert(buf != NULL);
+
+	do {
+		int res;
+		struct sk_buff *skb = NULL;
+		int spad = 0;
+		int epad = 0;
+		u8 *dst = NULL;
+		int pkt_len = 0;
+
+		/*
+		 * Compute head offset i.e. number of bytes added to
+		 * get the start of the payload aligned.
+		 */
+		if (spi_down_head_align) {
+			spad = 1 + *src;
+			src += spad;
+		}
+
+		/* Read length of CAIF frame (little endian). */
+		pkt_len = *src;
+		pkt_len |= ((*(src+1)) << 8) & 0xFF00;
+		pkt_len += 2;	/* Add FCS fields. */
+
+		/* Get a suitable caif packet and copy in data. */
+
+		skb = netdev_alloc_skb(cfspi->ndev, pkt_len + 1);
+		caif_assert(skb != NULL);
+
+		dst = skb_put(skb, pkt_len);
+		memcpy(dst, src, pkt_len);
+		src += pkt_len;
+
+		skb->protocol = htons(ETH_P_CAIF);
+		skb_reset_mac_header(skb);
+		skb->dev = cfspi->ndev;
+
+		/*
+		 * Push received packet up the stack.
+		 */
+		if (!spi_loop)
+			res = netif_rx_ni(skb);
+		else
+			res = cfspi_xmit(skb, cfspi->ndev);
+
+		if (!res) {
+			cfspi->ndev->stats.rx_packets++;
+			cfspi->ndev->stats.rx_bytes += pkt_len;
+		} else
+			cfspi->ndev->stats.rx_dropped++;
+
+		/*
+		 * Compute tail offset i.e. number of bytes added to
+		 * get the complete CAIF frame aligned.
+		 */
+		epad = (pkt_len + spad) & spi_down_tail_align;
+		src += epad;
+	} while ((src - buf) < len);
+
+	return src - buf;
+}
+
+static int cfspi_open(struct net_device *dev)
+{
+	netif_wake_queue(dev);
+	return 0;
+}
+
+static int cfspi_close(struct net_device *dev)
+{
+	netif_stop_queue(dev);
+	return 0;
+}
+static const struct net_device_ops cfspi_ops = {
+	.ndo_open = cfspi_open,
+	.ndo_stop = cfspi_close,
+	.ndo_start_xmit = cfspi_xmit
+};
+
+static void cfspi_setup(struct net_device *dev)
+{
+	struct cfspi *cfspi = netdev_priv(dev);
+	dev->features = 0;
+	dev->netdev_ops = &cfspi_ops;
+	dev->type = ARPHRD_CAIF;
+	dev->flags = IFF_NOARP | IFF_POINTOPOINT;
+	dev->tx_queue_len = 0;
+	dev->mtu = SPI_MAX_PAYLOAD_SIZE;
+	dev->destructor = free_netdev;
+	skb_queue_head_init(&cfspi->qhead);
+	skb_queue_head_init(&cfspi->chead);
+	cfspi->cfdev.link_select = CAIF_LINK_HIGH_BANDW;
+	cfspi->cfdev.use_frag = false;
+	cfspi->cfdev.use_stx = false;
+	cfspi->cfdev.use_fcs = false;
+	cfspi->ndev = dev;
+}
+
+int cfspi_spi_probe(struct platform_device *pdev)
+{
+	struct cfspi *cfspi = NULL;
+	struct net_device *ndev;
+	struct cfspi_dev *dev;
+	int res;
+	dev = (struct cfspi_dev *)pdev->dev.platform_data;
+
+	ndev = alloc_netdev(sizeof(struct cfspi),
+			"cfspi%d", cfspi_setup);
+	if (!dev)
+		return -ENODEV;
+
+	cfspi = netdev_priv(ndev);
+	netif_stop_queue(ndev);
+	cfspi->ndev = ndev;
+	cfspi->pdev = pdev;
+
+	/* Set flow info */
+	cfspi->flow_off_sent = 0;
+	cfspi->qd_low_mark = LOW_WATER_MARK;
+	cfspi->qd_high_mark = HIGH_WATER_MARK;
+
+	/* Assign the SPI device. */
+	cfspi->dev = dev;
+	/* Assign the device ifc to this SPI interface. */
+	dev->ifc = &cfspi->ifc;
+
+	/* Allocate DMA buffers. */
+	cfspi->xfer.va_tx = dma_alloc(&cfspi->xfer.pa_tx);
+	if (!cfspi->xfer.va_tx) {
+		printk(KERN_WARNING
+		       "CFSPI: failed to allocate dma TX buffer.\n");
+		res = -ENODEV;
+		goto err_dma_alloc_tx;
+	}
+
+	cfspi->xfer.va_rx = dma_alloc(&cfspi->xfer.pa_rx);
+
+	if (!cfspi->xfer.va_rx) {
+		printk(KERN_WARNING
+		       "CFSPI: failed to allocate dma TX buffer.\n");
+		res = -ENODEV;
+		goto err_dma_alloc_rx;
+	}
+
+	/* Initialize the work queue. */
+	INIT_WORK(&cfspi->work, cfspi_xfer);
+
+	/* Initialize spin locks. */
+	spin_lock_init(&cfspi->lock);
+
+	/* Initialize flow control state. */
+	cfspi->flow_stop = false;
+
+	/* Initialize wait queue. */
+	init_waitqueue_head(&cfspi->wait);
+
+	/* Create work thread. */
+	cfspi->wq = create_singlethread_workqueue(dev->name);
+	if (!cfspi->wq) {
+		printk(KERN_WARNING "CFSPI: failed to create work queue.\n");
+		res = -ENODEV;
+		goto err_create_wq;
+	}
+
+	/* Initialize work queue. */
+	init_completion(&cfspi->comp);
+
+	/* Create debugfs entries. */
+	dev_debugfs_add(cfspi);
+
+	/* Set up the ifc. */
+	cfspi->ifc.ss_cb = cfspi_ss_cb;
+	cfspi->ifc.xfer_done_cb = cfspi_xfer_done_cb;
+	cfspi->ifc.priv = cfspi;
+
+	/* Add CAIF SPI device to list. */
+	spin_lock(&cfspi_list_lock);
+	list_add_tail(&cfspi->list, &cfspi_list);
+	spin_unlock(&cfspi_list_lock);
+
+	/* Schedule the work queue. */
+	queue_work(cfspi->wq, &cfspi->work);
+
+	/* Register network device. */
+	res = register_netdev(ndev);
+	if (res) {
+		printk(KERN_ERR "CFSPI: Reg. error: %d.\n", res);
+		goto err_net_reg;
+	}
+	return res;
+
+ err_net_reg:
+	dev_debugfs_rem(cfspi);
+	set_bit(SPI_TERMINATE, &cfspi->state);
+	wake_up_interruptible(&cfspi->wait);
+	destroy_workqueue(cfspi->wq);
+ err_create_wq:
+	dma_free(cfspi->xfer.va_rx, cfspi->xfer.pa_rx);
+ err_dma_alloc_rx:
+	dma_free(cfspi->xfer.va_tx, cfspi->xfer.pa_tx);
+ err_dma_alloc_tx:
+	free_netdev(ndev);
+
+	return res;
+}
+
+int cfspi_spi_remove(struct platform_device *pdev)
+{
+	struct list_head *list_node;
+	struct list_head *n;
+	struct cfspi *cfspi = NULL;
+	struct cfspi_dev *dev;
+
+	dev = (struct cfspi_dev *)pdev->dev.platform_data;
+	spin_lock(&cfspi_list_lock);
+	list_for_each_safe(list_node, n, &cfspi_list) {
+		cfspi = list_entry(list_node, struct cfspi, list);
+		/* Find the corresponding device. */
+		if (cfspi->dev == dev) {
+			/* Remove from list. */
+			list_del(list_node);
+			/* Free DMA buffers. */
+			dma_free(cfspi->xfer.va_rx, cfspi->xfer.pa_rx);
+			dma_free(cfspi->xfer.va_tx, cfspi->xfer.pa_tx);
+			set_bit(SPI_TERMINATE, &cfspi->state);
+			wake_up_interruptible(&cfspi->wait);
+			destroy_workqueue(cfspi->wq);
+			/* Destroy debugfs directory and files. */
+			dev_debugfs_rem(cfspi);
+			unregister_netdev(cfspi->ndev);
+			spin_unlock(&cfspi_list_lock);
+			return 0;
+		}
+	}
+	spin_unlock(&cfspi_list_lock);
+	return -ENODEV;
+}
+
+static void __exit cfspi_exit_module(void)
+{
+	struct list_head *list_node;
+	struct list_head *n;
+	struct cfspi *cfspi = NULL;
+
+	list_for_each_safe(list_node, n, &cfspi_list) {
+		cfspi = list_entry(list_node, struct cfspi, list);
+		platform_device_unregister(cfspi->pdev);
+	}
+
+	/* Destroy sysfs files. */
+	driver_remove_file(&cfspi_spi_driver.driver,
+			   &driver_attr_up_head_align);
+	driver_remove_file(&cfspi_spi_driver.driver,
+			   &driver_attr_up_tail_align);
+	driver_remove_file(&cfspi_spi_driver.driver,
+			   &driver_attr_down_head_align);
+	driver_remove_file(&cfspi_spi_driver.driver,
+			   &driver_attr_down_tail_align);
+	driver_remove_file(&cfspi_spi_driver.driver, &driver_attr_frame_align);
+	/* Unregister platform driver. */
+	platform_driver_unregister(&cfspi_spi_driver);
+	/* Destroy debugfs root directory. */
+	driver_debugfs_remove();
+}
+
+static int __init cfspi_init_module(void)
+{
+	int result;
+
+	/* Initialize spin lock. */
+	spin_lock_init(&cfspi_list_lock);
+
+	/* Register platform driver. */
+	result = platform_driver_register(&cfspi_spi_driver);
+	if (result) {
+		printk(KERN_ERR "Could not register platform SPI driver.\n");
+		goto err_dev_register;
+	}
+
+	/* Create sysfs files. */
+	result =
+	    driver_create_file(&cfspi_spi_driver.driver,
+			       &driver_attr_up_head_align);
+	if (result) {
+		printk(KERN_ERR "Sysfs creation failed 1.\n");
+		goto err_create_up_head_align;
+	}
+
+	result =
+	    driver_create_file(&cfspi_spi_driver.driver,
+			       &driver_attr_up_tail_align);
+	if (result) {
+		printk(KERN_ERR "Sysfs creation failed 2.\n");
+		goto err_create_up_tail_align;
+	}
+
+	result =
+	    driver_create_file(&cfspi_spi_driver.driver,
+			       &driver_attr_down_head_align);
+	if (result) {
+		printk(KERN_ERR "Sysfs creation failed 3.\n");
+		goto err_create_down_head_align;
+	}
+
+	result =
+	    driver_create_file(&cfspi_spi_driver.driver,
+			       &driver_attr_down_tail_align);
+	if (result) {
+		printk(KERN_ERR "Sysfs creation failed 4.\n");
+		goto err_create_down_tail_align;
+	}
+
+	result =
+	    driver_create_file(&cfspi_spi_driver.driver,
+			       &driver_attr_frame_align);
+	if (result) {
+		printk(KERN_ERR "Sysfs creation failed 5.\n");
+		goto err_create_frame_align;
+	}
+	driver_debugfs_create();
+	return result;
+
+ err_create_frame_align:
+	driver_remove_file(&cfspi_spi_driver.driver,
+			   &driver_attr_down_tail_align);
+ err_create_down_tail_align:
+	driver_remove_file(&cfspi_spi_driver.driver,
+			   &driver_attr_down_head_align);
+ err_create_down_head_align:
+	driver_remove_file(&cfspi_spi_driver.driver,
+			   &driver_attr_up_tail_align);
+ err_create_up_tail_align:
+	driver_remove_file(&cfspi_spi_driver.driver,
+			   &driver_attr_up_head_align);
+ err_create_up_head_align:
+ err_dev_register:
+	return result;
+}
+
+module_init(cfspi_init_module);
+module_exit(cfspi_exit_module);
diff --git a/drivers/net/caif/caif_spi_slave.c b/drivers/net/caif/caif_spi_slave.c
new file mode 100644
index 00000000000..077ccf840ed
--- /dev/null
+++ b/drivers/net/caif/caif_spi_slave.c
@@ -0,0 +1,252 @@
+/*
+ * Copyright (C) ST-Ericsson AB 2010
+ * Contact: Sjur Brendeland / sjur.brandeland@stericsson.com
+ * Author:  Daniel Martensson / Daniel.Martensson@stericsson.com
+ * License terms: GNU General Public License (GPL) version 2.
+ */
+#include <linux/version.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/platform_device.h>
+#include <linux/string.h>
+#include <linux/semaphore.h>
+#include <linux/workqueue.h>
+#include <linux/completion.h>
+#include <linux/list.h>
+#include <linux/interrupt.h>
+#include <linux/dma-mapping.h>
+#include <linux/delay.h>
+#include <linux/sched.h>
+#include <linux/debugfs.h>
+#include <net/caif/caif_spi.h>
+
+#ifndef CONFIG_CAIF_SPI_SYNC
+#define SPI_DATA_POS SPI_CMD_SZ
+static inline int forward_to_spi_cmd(struct cfspi *cfspi)
+{
+	return cfspi->rx_cpck_len;
+}
+#else
+#define SPI_DATA_POS 0
+static inline int forward_to_spi_cmd(struct cfspi *cfspi)
+{
+	return 0;
+}
+#endif
+
+int spi_frm_align = 2;
+int spi_up_head_align = 1;
+int spi_up_tail_align;
+int spi_down_head_align = 3;
+int spi_down_tail_align = 1;
+
+#ifdef CONFIG_DEBUG_FS
+static inline void debugfs_store_prev(struct cfspi *cfspi)
+{
+	/* Store previous command for debugging reasons.*/
+	cfspi->pcmd = cfspi->cmd;
+	/* Store previous transfer. */
+	cfspi->tx_ppck_len = cfspi->tx_cpck_len;
+	cfspi->rx_ppck_len = cfspi->rx_cpck_len;
+}
+#else
+static inline void debugfs_store_prev(struct cfspi *cfspi)
+{
+}
+#endif
+
+void cfspi_xfer(struct work_struct *work)
+{
+	struct cfspi *cfspi;
+	u8 *ptr = NULL;
+	unsigned long flags;
+	int ret;
+	cfspi = container_of(work, struct cfspi, work);
+
+	/* Initialize state. */
+	cfspi->cmd = SPI_CMD_EOT;
+
+	for (;;) {
+
+		cfspi_dbg_state(cfspi, CFSPI_STATE_WAITING);
+
+		/* Wait for master talk or transmit event. */
+		wait_event_interruptible(cfspi->wait,
+				 test_bit(SPI_XFER, &cfspi->state) ||
+				 test_bit(SPI_TERMINATE, &cfspi->state));
+
+		if (test_bit(SPI_TERMINATE, &cfspi->state))
+			return;
+
+#if CFSPI_DBG_PREFILL
+		/* Prefill buffers for easier debugging. */
+		memset(cfspi->xfer.va_tx, 0xFF, SPI_DMA_BUF_LEN);
+		memset(cfspi->xfer.va_rx, 0xFF, SPI_DMA_BUF_LEN);
+#endif	/* CFSPI_DBG_PREFILL */
+
+		cfspi_dbg_state(cfspi, CFSPI_STATE_AWAKE);
+
+	/* Check whether we have a committed frame. */
+		if (cfspi->tx_cpck_len) {
+			int len;
+
+			cfspi_dbg_state(cfspi, CFSPI_STATE_FETCH_PKT);
+
+			/* Copy commited SPI frames after the SPI indication. */
+			ptr = (u8 *) cfspi->xfer.va_tx;
+			ptr += SPI_IND_SZ;
+			len = cfspi_xmitfrm(cfspi, ptr, cfspi->tx_cpck_len);
+			WARN_ON(len != cfspi->tx_cpck_len);
+	}
+
+		cfspi_dbg_state(cfspi, CFSPI_STATE_GET_NEXT);
+
+		/* Get length of next frame to commit. */
+		cfspi->tx_npck_len = cfspi_xmitlen(cfspi);
+
+		WARN_ON(cfspi->tx_npck_len > SPI_DMA_BUF_LEN);
+
+		/*
+		 * Add indication and length at the beginning of the frame,
+		 * using little endian.
+		 */
+		ptr = (u8 *) cfspi->xfer.va_tx;
+		*ptr++ = SPI_CMD_IND;
+		*ptr++ = (SPI_CMD_IND  & 0xFF00) >> 8;
+		*ptr++ = cfspi->tx_npck_len & 0x00FF;
+		*ptr++ = (cfspi->tx_npck_len & 0xFF00) >> 8;
+
+		/* Calculate length of DMAs. */
+		cfspi->xfer.tx_dma_len = cfspi->tx_cpck_len + SPI_IND_SZ;
+		cfspi->xfer.rx_dma_len = cfspi->rx_cpck_len + SPI_CMD_SZ;
+
+		/* Add SPI TX frame alignment padding, if necessary. */
+		if (cfspi->tx_cpck_len &&
+			(cfspi->xfer.tx_dma_len % spi_frm_align)) {
+
+			cfspi->xfer.tx_dma_len += spi_frm_align -
+			    (cfspi->xfer.tx_dma_len % spi_frm_align);
+		}
+
+		/* Add SPI RX frame alignment padding, if necessary. */
+		if (cfspi->rx_cpck_len &&
+			(cfspi->xfer.rx_dma_len % spi_frm_align)) {
+
+			cfspi->xfer.rx_dma_len += spi_frm_align -
+			    (cfspi->xfer.rx_dma_len % spi_frm_align);
+		}
+
+		cfspi_dbg_state(cfspi, CFSPI_STATE_INIT_XFER);
+
+		/* Start transfer. */
+		ret = cfspi->dev->init_xfer(&cfspi->xfer, cfspi->dev);
+		WARN_ON(ret);
+
+		cfspi_dbg_state(cfspi, CFSPI_STATE_WAIT_ACTIVE);
+
+		/*
+		 * TODO: We might be able to make an assumption if this is the
+		 * first loop. Make sure that minimum toggle time is respected.
+		 */
+		udelay(MIN_TRANSITION_TIME_USEC);
+
+		cfspi_dbg_state(cfspi, CFSPI_STATE_SIG_ACTIVE);
+
+		/* Signal that we are ready to recieve data. */
+		cfspi->dev->sig_xfer(true, cfspi->dev);
+
+		cfspi_dbg_state(cfspi, CFSPI_STATE_WAIT_XFER_DONE);
+
+		/* Wait for transfer completion. */
+		wait_for_completion(&cfspi->comp);
+
+		cfspi_dbg_state(cfspi, CFSPI_STATE_XFER_DONE);
+
+		if (cfspi->cmd == SPI_CMD_EOT) {
+			/*
+			 * Clear the master talk bit. A xfer is always at
+			 *  least two bursts.
+			 */
+			clear_bit(SPI_SS_ON, &cfspi->state);
+		}
+
+		cfspi_dbg_state(cfspi, CFSPI_STATE_WAIT_INACTIVE);
+
+		/* Make sure that the minimum toggle time is respected. */
+		if (SPI_XFER_TIME_USEC(cfspi->xfer.tx_dma_len,
+					cfspi->dev->clk_mhz) <
+			MIN_TRANSITION_TIME_USEC) {
+
+			udelay(MIN_TRANSITION_TIME_USEC -
+				SPI_XFER_TIME_USEC
+				(cfspi->xfer.tx_dma_len, cfspi->dev->clk_mhz));
+		}
+
+		cfspi_dbg_state(cfspi, CFSPI_STATE_SIG_INACTIVE);
+
+		/* De-assert transfer signal. */
+		cfspi->dev->sig_xfer(false, cfspi->dev);
+
+		/* Check whether we received a CAIF packet. */
+		if (cfspi->rx_cpck_len) {
+			int len;
+
+			cfspi_dbg_state(cfspi, CFSPI_STATE_DELIVER_PKT);
+
+			/* Parse SPI frame. */
+			ptr = ((u8 *)(cfspi->xfer.va_rx + SPI_DATA_POS));
+
+			len = cfspi_rxfrm(cfspi, ptr, cfspi->rx_cpck_len);
+			WARN_ON(len != cfspi->rx_cpck_len);
+		}
+
+		/* Check the next SPI command and length. */
+		ptr = (u8 *) cfspi->xfer.va_rx;
+
+		ptr += forward_to_spi_cmd(cfspi);
+
+		cfspi->cmd = *ptr++;
+		cfspi->cmd |= ((*ptr++) << 8) & 0xFF00;
+		cfspi->rx_npck_len = *ptr++;
+		cfspi->rx_npck_len |= ((*ptr++) << 8) & 0xFF00;
+
+		WARN_ON(cfspi->rx_npck_len > SPI_DMA_BUF_LEN);
+		WARN_ON(cfspi->cmd > SPI_CMD_EOT);
+
+		debugfs_store_prev(cfspi);
+
+		/* Check whether the master issued an EOT command. */
+		if (cfspi->cmd == SPI_CMD_EOT) {
+			/* Reset state. */
+			cfspi->tx_cpck_len = 0;
+			cfspi->rx_cpck_len = 0;
+		} else {
+			/* Update state. */
+			cfspi->tx_cpck_len = cfspi->tx_npck_len;
+			cfspi->rx_cpck_len = cfspi->rx_npck_len;
+		}
+
+		/*
+		 * Check whether we need to clear the xfer bit.
+		 * Spin lock needed for packet insertion.
+		 * Test and clear of different bits
+		 * are not supported.
+		 */
+		spin_lock_irqsave(&cfspi->lock, flags);
+		if (cfspi->cmd == SPI_CMD_EOT && !cfspi_xmitlen(cfspi)
+			&& !test_bit(SPI_SS_ON, &cfspi->state))
+			clear_bit(SPI_XFER, &cfspi->state);
+
+		spin_unlock_irqrestore(&cfspi->lock, flags);
+	}
+}
+
+struct platform_driver cfspi_spi_driver = {
+	.probe = cfspi_spi_probe,
+	.remove = cfspi_spi_remove,
+	.driver = {
+		   .name = "cfspi_sspi",
+		   .owner = THIS_MODULE,
+		   },
+};
diff --git a/include/net/caif/caif_spi.h b/include/net/caif/caif_spi.h
new file mode 100644
index 00000000000..ce4570dff02
--- /dev/null
+++ b/include/net/caif/caif_spi.h
@@ -0,0 +1,153 @@
+/*
+ * Copyright (C) ST-Ericsson AB 2010
+ * Author:	Daniel Martensson / Daniel.Martensson@stericsson.com
+ * License terms: GNU General Public License (GPL) version 2
+ */
+
+#ifndef CAIF_SPI_H_
+#define CAIF_SPI_H_
+
+#include <net/caif/caif_device.h>
+
+#define SPI_CMD_WR			0x00
+#define SPI_CMD_RD			0x01
+#define SPI_CMD_EOT			0x02
+#define SPI_CMD_IND			0x04
+
+#define SPI_DMA_BUF_LEN			8192
+
+#define WL_SZ				2	/* 16 bits. */
+#define SPI_CMD_SZ			4	/* 32 bits. */
+#define SPI_IND_SZ			4	/* 32 bits. */
+
+#define SPI_XFER			0
+#define SPI_SS_ON			1
+#define SPI_SS_OFF			2
+#define SPI_TERMINATE			3
+
+/* Minimum time between different levels is 50 microseconds. */
+#define MIN_TRANSITION_TIME_USEC	50
+
+/* Defines for calculating duration of SPI transfers for a particular
+ * number of bytes.
+ */
+#define SPI_MASTER_CLK_MHZ		13
+#define SPI_XFER_TIME_USEC(bytes, clk) (((bytes) * 8) / clk)
+
+/* Normally this should be aligned on the modem in order to benefit from full
+ * duplex transfers. However a size of 8188 provokes errors when running with
+ * the modem. These errors occur when packet sizes approaches 4 kB of data.
+ */
+#define CAIF_MAX_SPI_FRAME 4092
+
+/* Maximum number of uplink CAIF frames that can reside in the same SPI frame.
+ * This number should correspond with the modem setting. The application side
+ * CAIF accepts any number of embedded downlink CAIF frames.
+ */
+#define CAIF_MAX_SPI_PKTS 9
+
+/* Decides if SPI buffers should be prefilled with 0xFF pattern for easier
+ * debugging. Both TX and RX buffers will be filled before the transfer.
+ */
+#define CFSPI_DBG_PREFILL		0
+
+/* Structure describing a SPI transfer. */
+struct cfspi_xfer {
+	u16 tx_dma_len;
+	u16 rx_dma_len;
+	void *va_tx;
+	dma_addr_t pa_tx;
+	void *va_rx;
+	dma_addr_t pa_rx;
+};
+
+/* Structure implemented by the SPI interface. */
+struct cfspi_ifc {
+	void (*ss_cb) (bool assert, struct cfspi_ifc *ifc);
+	void (*xfer_done_cb) (struct cfspi_ifc *ifc);
+	void *priv;
+};
+
+/* Structure implemented by SPI clients. */
+struct cfspi_dev {
+	int (*init_xfer) (struct cfspi_xfer *xfer, struct cfspi_dev *dev);
+	void (*sig_xfer) (bool xfer, struct cfspi_dev *dev);
+	struct cfspi_ifc *ifc;
+	char *name;
+	u32 clk_mhz;
+	void *priv;
+};
+
+/* Enumeration describing the CAIF SPI state. */
+enum cfspi_state {
+	CFSPI_STATE_WAITING = 0,
+	CFSPI_STATE_AWAKE,
+	CFSPI_STATE_FETCH_PKT,
+	CFSPI_STATE_GET_NEXT,
+	CFSPI_STATE_INIT_XFER,
+	CFSPI_STATE_WAIT_ACTIVE,
+	CFSPI_STATE_SIG_ACTIVE,
+	CFSPI_STATE_WAIT_XFER_DONE,
+	CFSPI_STATE_XFER_DONE,
+	CFSPI_STATE_WAIT_INACTIVE,
+	CFSPI_STATE_SIG_INACTIVE,
+	CFSPI_STATE_DELIVER_PKT,
+	CFSPI_STATE_MAX,
+};
+
+/* Structure implemented by SPI physical interfaces. */
+struct cfspi {
+	struct caif_dev_common cfdev;
+	struct net_device *ndev;
+	struct platform_device *pdev;
+	struct sk_buff_head qhead;
+	struct sk_buff_head chead;
+	u16 cmd;
+	u16 tx_cpck_len;
+	u16 tx_npck_len;
+	u16 rx_cpck_len;
+	u16 rx_npck_len;
+	struct cfspi_ifc ifc;
+	struct cfspi_xfer xfer;
+	struct cfspi_dev *dev;
+	unsigned long state;
+	struct work_struct work;
+	struct workqueue_struct *wq;
+	struct list_head list;
+	int    flow_off_sent;
+	u32 qd_low_mark;
+	u32 qd_high_mark;
+	struct completion comp;
+	wait_queue_head_t wait;
+	spinlock_t lock;
+	bool flow_stop;
+#ifdef CONFIG_DEBUG_FS
+	enum cfspi_state dbg_state;
+	u16 pcmd;
+	u16 tx_ppck_len;
+	u16 rx_ppck_len;
+	struct dentry *dbgfs_dir;
+	struct dentry *dbgfs_state;
+	struct dentry *dbgfs_frame;
+#endif				/* CONFIG_DEBUG_FS */
+};
+
+extern int spi_frm_align;
+extern int spi_up_head_align;
+extern int spi_up_tail_align;
+extern int spi_down_head_align;
+extern int spi_down_tail_align;
+extern struct platform_driver cfspi_spi_driver;
+
+void cfspi_dbg_state(struct cfspi *cfspi, int state);
+int cfspi_xmitfrm(struct cfspi *cfspi, u8 *buf, size_t len);
+int cfspi_xmitlen(struct cfspi *cfspi);
+int cfspi_rxfrm(struct cfspi *cfspi, u8 *buf, size_t len);
+int cfspi_spi_remove(struct platform_device *pdev);
+int cfspi_spi_probe(struct platform_device *pdev);
+int cfspi_xmitfrm(struct cfspi *cfspi, u8 *buf, size_t len);
+int cfspi_xmitlen(struct cfspi *cfspi);
+int cfspi_rxfrm(struct cfspi *cfspi, u8 *buf, size_t len);
+void cfspi_xfer(struct work_struct *work);
+
+#endif				/* CAIF_SPI_H_ */
-- 
cgit v1.2.3-70-g09d2


From b56c0d8937e665a27d90517ee7a746d0aa05af46 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 29 Jun 2010 10:07:09 +0200
Subject: kthread: implement kthread_worker

Implement simple work processor for kthread.  This is to ease using
kthread.  Single thread workqueue used to be used for things like this
but workqueue won't guarantee fixed kthread association anymore to
enable worker sharing.

This can be used in cases where specific kthread association is
necessary, for example, when it should have RT priority or be assigned
to certain cgroup.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/kthread.h |  64 +++++++++++++++++++++
 kernel/kthread.c        | 149 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 213 insertions(+)

(limited to 'include')

diff --git a/include/linux/kthread.h b/include/linux/kthread.h
index aabc8a13ba7..f93cb6979ed 100644
--- a/include/linux/kthread.h
+++ b/include/linux/kthread.h
@@ -34,4 +34,68 @@ int kthread_should_stop(void);
 int kthreadd(void *unused);
 extern struct task_struct *kthreadd_task;
 
+/*
+ * Simple work processor based on kthread.
+ *
+ * This provides easier way to make use of kthreads.  A kthread_work
+ * can be queued and flushed using queue/flush_kthread_work()
+ * respectively.  Queued kthread_works are processed by a kthread
+ * running kthread_worker_fn().
+ *
+ * A kthread_work can't be freed while it is executing.
+ */
+struct kthread_work;
+typedef void (*kthread_work_func_t)(struct kthread_work *work);
+
+struct kthread_worker {
+	spinlock_t		lock;
+	struct list_head	work_list;
+	struct task_struct	*task;
+};
+
+struct kthread_work {
+	struct list_head	node;
+	kthread_work_func_t	func;
+	wait_queue_head_t	done;
+	atomic_t		flushing;
+	int			queue_seq;
+	int			done_seq;
+};
+
+#define KTHREAD_WORKER_INIT(worker)	{				\
+	.lock = SPIN_LOCK_UNLOCKED,					\
+	.work_list = LIST_HEAD_INIT((worker).work_list),		\
+	}
+
+#define KTHREAD_WORK_INIT(work, fn)	{				\
+	.node = LIST_HEAD_INIT((work).node),				\
+	.func = (fn),							\
+	.done = __WAIT_QUEUE_HEAD_INITIALIZER((work).done),		\
+	.flushing = ATOMIC_INIT(0),					\
+	}
+
+#define DEFINE_KTHREAD_WORKER(worker)					\
+	struct kthread_worker worker = KTHREAD_WORKER_INIT(worker)
+
+#define DEFINE_KTHREAD_WORK(work, fn)					\
+	struct kthread_work work = KTHREAD_WORK_INIT(work, fn)
+
+static inline void init_kthread_worker(struct kthread_worker *worker)
+{
+	*worker = (struct kthread_worker)KTHREAD_WORKER_INIT(*worker);
+}
+
+static inline void init_kthread_work(struct kthread_work *work,
+				     kthread_work_func_t fn)
+{
+	*work = (struct kthread_work)KTHREAD_WORK_INIT(*work, fn);
+}
+
+int kthread_worker_fn(void *worker_ptr);
+
+bool queue_kthread_work(struct kthread_worker *worker,
+			struct kthread_work *work);
+void flush_kthread_work(struct kthread_work *work);
+void flush_kthread_worker(struct kthread_worker *worker);
+
 #endif /* _LINUX_KTHREAD_H */
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 83911c78017..8b63c7fee73 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -14,6 +14,8 @@
 #include <linux/file.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/freezer.h>
 #include <trace/events/sched.h>
 
 static DEFINE_SPINLOCK(kthread_create_lock);
@@ -247,3 +249,150 @@ int kthreadd(void *unused)
 
 	return 0;
 }
+
+/**
+ * kthread_worker_fn - kthread function to process kthread_worker
+ * @worker_ptr: pointer to initialized kthread_worker
+ *
+ * This function can be used as @threadfn to kthread_create() or
+ * kthread_run() with @worker_ptr argument pointing to an initialized
+ * kthread_worker.  The started kthread will process work_list until
+ * the it is stopped with kthread_stop().  A kthread can also call
+ * this function directly after extra initialization.
+ *
+ * Different kthreads can be used for the same kthread_worker as long
+ * as there's only one kthread attached to it at any given time.  A
+ * kthread_worker without an attached kthread simply collects queued
+ * kthread_works.
+ */
+int kthread_worker_fn(void *worker_ptr)
+{
+	struct kthread_worker *worker = worker_ptr;
+	struct kthread_work *work;
+
+	WARN_ON(worker->task);
+	worker->task = current;
+repeat:
+	set_current_state(TASK_INTERRUPTIBLE);	/* mb paired w/ kthread_stop */
+
+	if (kthread_should_stop()) {
+		__set_current_state(TASK_RUNNING);
+		spin_lock_irq(&worker->lock);
+		worker->task = NULL;
+		spin_unlock_irq(&worker->lock);
+		return 0;
+	}
+
+	work = NULL;
+	spin_lock_irq(&worker->lock);
+	if (!list_empty(&worker->work_list)) {
+		work = list_first_entry(&worker->work_list,
+					struct kthread_work, node);
+		list_del_init(&work->node);
+	}
+	spin_unlock_irq(&worker->lock);
+
+	if (work) {
+		__set_current_state(TASK_RUNNING);
+		work->func(work);
+		smp_wmb();	/* wmb worker-b0 paired with flush-b1 */
+		work->done_seq = work->queue_seq;
+		smp_mb();	/* mb worker-b1 paired with flush-b0 */
+		if (atomic_read(&work->flushing))
+			wake_up_all(&work->done);
+	} else if (!freezing(current))
+		schedule();
+
+	try_to_freeze();
+	goto repeat;
+}
+EXPORT_SYMBOL_GPL(kthread_worker_fn);
+
+/**
+ * queue_kthread_work - queue a kthread_work
+ * @worker: target kthread_worker
+ * @work: kthread_work to queue
+ *
+ * Queue @work to work processor @task for async execution.  @task
+ * must have been created with kthread_worker_create().  Returns %true
+ * if @work was successfully queued, %false if it was already pending.
+ */
+bool queue_kthread_work(struct kthread_worker *worker,
+			struct kthread_work *work)
+{
+	bool ret = false;
+	unsigned long flags;
+
+	spin_lock_irqsave(&worker->lock, flags);
+	if (list_empty(&work->node)) {
+		list_add_tail(&work->node, &worker->work_list);
+		work->queue_seq++;
+		if (likely(worker->task))
+			wake_up_process(worker->task);
+		ret = true;
+	}
+	spin_unlock_irqrestore(&worker->lock, flags);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(queue_kthread_work);
+
+/**
+ * flush_kthread_work - flush a kthread_work
+ * @work: work to flush
+ *
+ * If @work is queued or executing, wait for it to finish execution.
+ */
+void flush_kthread_work(struct kthread_work *work)
+{
+	int seq = work->queue_seq;
+
+	atomic_inc(&work->flushing);
+
+	/*
+	 * mb flush-b0 paired with worker-b1, to make sure either
+	 * worker sees the above increment or we see done_seq update.
+	 */
+	smp_mb__after_atomic_inc();
+
+	/* A - B <= 0 tests whether B is in front of A regardless of overflow */
+	wait_event(work->done, seq - work->done_seq <= 0);
+	atomic_dec(&work->flushing);
+
+	/*
+	 * rmb flush-b1 paired with worker-b0, to make sure our caller
+	 * sees every change made by work->func().
+	 */
+	smp_mb__after_atomic_dec();
+}
+EXPORT_SYMBOL_GPL(flush_kthread_work);
+
+struct kthread_flush_work {
+	struct kthread_work	work;
+	struct completion	done;
+};
+
+static void kthread_flush_work_fn(struct kthread_work *work)
+{
+	struct kthread_flush_work *fwork =
+		container_of(work, struct kthread_flush_work, work);
+	complete(&fwork->done);
+}
+
+/**
+ * flush_kthread_worker - flush all current works on a kthread_worker
+ * @worker: worker to flush
+ *
+ * Wait until all currently executing or pending works on @worker are
+ * finished.
+ */
+void flush_kthread_worker(struct kthread_worker *worker)
+{
+	struct kthread_flush_work fwork = {
+		KTHREAD_WORK_INIT(fwork.work, kthread_flush_work_fn),
+		COMPLETION_INITIALIZER_ONSTACK(fwork.done),
+	};
+
+	queue_kthread_work(worker, &fwork.work);
+	wait_for_completion(&fwork.done);
+}
+EXPORT_SYMBOL_GPL(flush_kthread_worker);
-- 
cgit v1.2.3-70-g09d2


From 82805ab77d25643f579d90397dcd34f05d1b750a Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 29 Jun 2010 10:07:09 +0200
Subject: kthread: implement kthread_data()

Implement kthread_data() which takes @task pointing to a kthread and
returns @data specified when creating the kthread.  The caller is
responsible for ensuring the validity of @task when calling this
function.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/kthread.h |  1 +
 kernel/kthread.c        | 15 +++++++++++++++
 2 files changed, 16 insertions(+)

(limited to 'include')

diff --git a/include/linux/kthread.h b/include/linux/kthread.h
index f93cb6979ed..685ea65eb80 100644
--- a/include/linux/kthread.h
+++ b/include/linux/kthread.h
@@ -30,6 +30,7 @@ struct task_struct *kthread_create(int (*threadfn)(void *data),
 void kthread_bind(struct task_struct *k, unsigned int cpu);
 int kthread_stop(struct task_struct *k);
 int kthread_should_stop(void);
+void *kthread_data(struct task_struct *k);
 
 int kthreadd(void *unused);
 extern struct task_struct *kthreadd_task;
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 8b63c7fee73..2dc3786349d 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -37,6 +37,7 @@ struct kthread_create_info
 
 struct kthread {
 	int should_stop;
+	void *data;
 	struct completion exited;
 };
 
@@ -56,6 +57,19 @@ int kthread_should_stop(void)
 }
 EXPORT_SYMBOL(kthread_should_stop);
 
+/**
+ * kthread_data - return data value specified on kthread creation
+ * @task: kthread task in question
+ *
+ * Return the data value specified when kthread @task was created.
+ * The caller is responsible for ensuring the validity of @task when
+ * calling this function.
+ */
+void *kthread_data(struct task_struct *task)
+{
+	return to_kthread(task)->data;
+}
+
 static int kthread(void *_create)
 {
 	/* Copy data: it's on kthread's stack */
@@ -66,6 +80,7 @@ static int kthread(void *_create)
 	int ret;
 
 	self.should_stop = 0;
+	self.data = data;
 	init_completion(&self.exited);
 	current->vfork_done = &self.exited;
 
-- 
cgit v1.2.3-70-g09d2


From c790bce0481857412c964c5e9d46d56e41c4b051 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 29 Jun 2010 10:07:09 +0200
Subject: workqueue: kill RT workqueue

With stop_machine() converted to use cpu_stop, RT workqueue doesn't
have any user left.  Kill RT workqueue support.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h | 20 +++++++++-----------
 kernel/workqueue.c        |  6 ------
 2 files changed, 9 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 9466e860d8c..0697946c66a 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -181,12 +181,11 @@ static inline void destroy_work_on_stack(struct work_struct *work) { }
 
 
 extern struct workqueue_struct *
-__create_workqueue_key(const char *name, int singlethread,
-		       int freezeable, int rt, struct lock_class_key *key,
-		       const char *lock_name);
+__create_workqueue_key(const char *name, int singlethread, int freezeable,
+		       struct lock_class_key *key, const char *lock_name);
 
 #ifdef CONFIG_LOCKDEP
-#define __create_workqueue(name, singlethread, freezeable, rt)	\
+#define __create_workqueue(name, singlethread, freezeable)	\
 ({								\
 	static struct lock_class_key __key;			\
 	const char *__lock_name;				\
@@ -197,19 +196,18 @@ __create_workqueue_key(const char *name, int singlethread,
 		__lock_name = #name;				\
 								\
 	__create_workqueue_key((name), (singlethread),		\
-			       (freezeable), (rt), &__key,	\
+			       (freezeable), &__key,		\
 			       __lock_name);			\
 })
 #else
-#define __create_workqueue(name, singlethread, freezeable, rt)	\
-	__create_workqueue_key((name), (singlethread), (freezeable), (rt), \
+#define __create_workqueue(name, singlethread, freezeable)	\
+	__create_workqueue_key((name), (singlethread), (freezeable), \
 			       NULL, NULL)
 #endif
 
-#define create_workqueue(name) __create_workqueue((name), 0, 0, 0)
-#define create_rt_workqueue(name) __create_workqueue((name), 0, 0, 1)
-#define create_freezeable_workqueue(name) __create_workqueue((name), 1, 1, 0)
-#define create_singlethread_workqueue(name) __create_workqueue((name), 1, 0, 0)
+#define create_workqueue(name) __create_workqueue((name), 0, 0)
+#define create_freezeable_workqueue(name) __create_workqueue((name), 1, 1)
+#define create_singlethread_workqueue(name) __create_workqueue((name), 1, 0)
 
 extern void destroy_workqueue(struct workqueue_struct *wq);
 
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 327d2deb445..1a47fbf92fa 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -62,7 +62,6 @@ struct workqueue_struct {
 	const char *name;
 	int singlethread;
 	int freezeable;		/* Freeze threads during suspend */
-	int rt;
 #ifdef CONFIG_LOCKDEP
 	struct lockdep_map lockdep_map;
 #endif
@@ -947,7 +946,6 @@ init_cpu_workqueue(struct workqueue_struct *wq, int cpu)
 
 static int create_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
 {
-	struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
 	struct workqueue_struct *wq = cwq->wq;
 	const char *fmt = is_wq_single_threaded(wq) ? "%s" : "%s/%d";
 	struct task_struct *p;
@@ -963,8 +961,6 @@ static int create_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
 	 */
 	if (IS_ERR(p))
 		return PTR_ERR(p);
-	if (cwq->wq->rt)
-		sched_setscheduler_nocheck(p, SCHED_FIFO, &param);
 	cwq->thread = p;
 
 	trace_workqueue_creation(cwq->thread, cpu);
@@ -986,7 +982,6 @@ static void start_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
 struct workqueue_struct *__create_workqueue_key(const char *name,
 						int singlethread,
 						int freezeable,
-						int rt,
 						struct lock_class_key *key,
 						const char *lock_name)
 {
@@ -1008,7 +1003,6 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
 	lockdep_init_map(&wq->lockdep_map, lock_name, key, 0);
 	wq->singlethread = singlethread;
 	wq->freezeable = freezeable;
-	wq->rt = rt;
 	INIT_LIST_HEAD(&wq->list);
 
 	if (singlethread) {
-- 
cgit v1.2.3-70-g09d2


From 4690c4ab56c71919893ca25252f2dd65b58188c7 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 29 Jun 2010 10:07:10 +0200
Subject: workqueue: misc/cosmetic updates

Make the following updates in preparation of concurrency managed
workqueue.  None of these changes causes any visible behavior
difference.

* Add comments and adjust indentations to data structures and several
  functions.

* Rename wq_per_cpu() to get_cwq() and swap the position of two
  parameters for consistency.  Convert a direct per_cpu_ptr() access
  to wq->cpu_wq to get_cwq().

* Add work_static() and Update set_wq_data() such that it sets the
  flags part to WORK_STRUCT_PENDING | WORK_STRUCT_STATIC if static |
  @extra_flags.

* Move santiy check on work->entry emptiness from queue_work_on() to
  __queue_work() which all queueing paths share.

* Make __queue_work() take @cpu and @wq instead of @cwq.

* Restructure flush_work() and __create_workqueue_key() to make them
  easier to modify.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h |   5 ++
 kernel/workqueue.c        | 131 +++++++++++++++++++++++++++++-----------------
 2 files changed, 89 insertions(+), 47 deletions(-)

(limited to 'include')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 0697946c66a..e724dafc9e6 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -96,9 +96,14 @@ struct execute_work {
 #ifdef CONFIG_DEBUG_OBJECTS_WORK
 extern void __init_work(struct work_struct *work, int onstack);
 extern void destroy_work_on_stack(struct work_struct *work);
+static inline unsigned int work_static(struct work_struct *work)
+{
+	return *work_data_bits(work) & (1 << WORK_STRUCT_STATIC);
+}
 #else
 static inline void __init_work(struct work_struct *work, int onstack) { }
 static inline void destroy_work_on_stack(struct work_struct *work) { }
+static inline unsigned int work_static(struct work_struct *work) { return 0; }
 #endif
 
 /*
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 1a47fbf92fa..c56146a755e 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -36,6 +36,16 @@
 #define CREATE_TRACE_POINTS
 #include <trace/events/workqueue.h>
 
+/*
+ * Structure fields follow one of the following exclusion rules.
+ *
+ * I: Set during initialization and read-only afterwards.
+ *
+ * L: cwq->lock protected.  Access with cwq->lock held.
+ *
+ * W: workqueue_lock protected.
+ */
+
 /*
  * The per-CPU workqueue (if single thread, we always use the first
  * possible cpu).
@@ -48,8 +58,8 @@ struct cpu_workqueue_struct {
 	wait_queue_head_t more_work;
 	struct work_struct *current_work;
 
-	struct workqueue_struct *wq;
-	struct task_struct *thread;
+	struct workqueue_struct *wq;		/* I: the owning workqueue */
+	struct task_struct	*thread;
 } ____cacheline_aligned;
 
 /*
@@ -57,13 +67,13 @@ struct cpu_workqueue_struct {
  * per-CPU workqueues:
  */
 struct workqueue_struct {
-	struct cpu_workqueue_struct *cpu_wq;
-	struct list_head list;
-	const char *name;
+	struct cpu_workqueue_struct *cpu_wq;	/* I: cwq's */
+	struct list_head	list;		/* W: list of all workqueues */
+	const char		*name;		/* I: workqueue name */
 	int singlethread;
 	int freezeable;		/* Freeze threads during suspend */
 #ifdef CONFIG_LOCKDEP
-	struct lockdep_map lockdep_map;
+	struct lockdep_map	lockdep_map;
 #endif
 };
 
@@ -204,8 +214,8 @@ static const struct cpumask *wq_cpu_map(struct workqueue_struct *wq)
 		? cpu_singlethread_map : cpu_populated_map;
 }
 
-static
-struct cpu_workqueue_struct *wq_per_cpu(struct workqueue_struct *wq, int cpu)
+static struct cpu_workqueue_struct *get_cwq(unsigned int cpu,
+					    struct workqueue_struct *wq)
 {
 	if (unlikely(is_wq_single_threaded(wq)))
 		cpu = singlethread_cpu;
@@ -217,15 +227,13 @@ struct cpu_workqueue_struct *wq_per_cpu(struct workqueue_struct *wq, int cpu)
  * - Must *only* be called if the pending flag is set
  */
 static inline void set_wq_data(struct work_struct *work,
-				struct cpu_workqueue_struct *cwq)
+			       struct cpu_workqueue_struct *cwq,
+			       unsigned long extra_flags)
 {
-	unsigned long new;
-
 	BUG_ON(!work_pending(work));
 
-	new = (unsigned long) cwq | (1UL << WORK_STRUCT_PENDING);
-	new |= WORK_STRUCT_FLAG_MASK & *work_data_bits(work);
-	atomic_long_set(&work->data, new);
+	atomic_long_set(&work->data, (unsigned long)cwq | work_static(work) |
+			(1UL << WORK_STRUCT_PENDING) | extra_flags);
 }
 
 /*
@@ -233,9 +241,7 @@ static inline void set_wq_data(struct work_struct *work,
  */
 static inline void clear_wq_data(struct work_struct *work)
 {
-	unsigned long flags = *work_data_bits(work) &
-				(1UL << WORK_STRUCT_STATIC);
-	atomic_long_set(&work->data, flags);
+	atomic_long_set(&work->data, work_static(work));
 }
 
 static inline
@@ -244,29 +250,47 @@ struct cpu_workqueue_struct *get_wq_data(struct work_struct *work)
 	return (void *) (atomic_long_read(&work->data) & WORK_STRUCT_WQ_DATA_MASK);
 }
 
+/**
+ * insert_work - insert a work into cwq
+ * @cwq: cwq @work belongs to
+ * @work: work to insert
+ * @head: insertion point
+ * @extra_flags: extra WORK_STRUCT_* flags to set
+ *
+ * Insert @work into @cwq after @head.
+ *
+ * CONTEXT:
+ * spin_lock_irq(cwq->lock).
+ */
 static void insert_work(struct cpu_workqueue_struct *cwq,
-			struct work_struct *work, struct list_head *head)
+			struct work_struct *work, struct list_head *head,
+			unsigned int extra_flags)
 {
 	trace_workqueue_insertion(cwq->thread, work);
 
-	set_wq_data(work, cwq);
+	/* we own @work, set data and link */
+	set_wq_data(work, cwq, extra_flags);
+
 	/*
 	 * Ensure that we get the right work->data if we see the
 	 * result of list_add() below, see try_to_grab_pending().
 	 */
 	smp_wmb();
+
 	list_add_tail(&work->entry, head);
 	wake_up(&cwq->more_work);
 }
 
-static void __queue_work(struct cpu_workqueue_struct *cwq,
+static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
 			 struct work_struct *work)
 {
+	struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
 	unsigned long flags;
 
 	debug_work_activate(work);
 	spin_lock_irqsave(&cwq->lock, flags);
-	insert_work(cwq, work, &cwq->worklist);
+	BUG_ON(!list_empty(&work->entry));
+	insert_work(cwq, work, &cwq->worklist, 0);
 	spin_unlock_irqrestore(&cwq->lock, flags);
 }
 
@@ -308,8 +332,7 @@ queue_work_on(int cpu, struct workqueue_struct *wq, struct work_struct *work)
 	int ret = 0;
 
 	if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work))) {
-		BUG_ON(!list_empty(&work->entry));
-		__queue_work(wq_per_cpu(wq, cpu), work);
+		__queue_work(cpu, wq, work);
 		ret = 1;
 	}
 	return ret;
@@ -320,9 +343,8 @@ static void delayed_work_timer_fn(unsigned long __data)
 {
 	struct delayed_work *dwork = (struct delayed_work *)__data;
 	struct cpu_workqueue_struct *cwq = get_wq_data(&dwork->work);
-	struct workqueue_struct *wq = cwq->wq;
 
-	__queue_work(wq_per_cpu(wq, smp_processor_id()), &dwork->work);
+	__queue_work(smp_processor_id(), cwq->wq, &dwork->work);
 }
 
 /**
@@ -366,7 +388,7 @@ int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
 		timer_stats_timer_set_start_info(&dwork->timer);
 
 		/* This stores cwq for the moment, for the timer_fn */
-		set_wq_data(work, wq_per_cpu(wq, raw_smp_processor_id()));
+		set_wq_data(work, get_cwq(raw_smp_processor_id(), wq), 0);
 		timer->expires = jiffies + delay;
 		timer->data = (unsigned long)dwork;
 		timer->function = delayed_work_timer_fn;
@@ -430,6 +452,12 @@ static void run_workqueue(struct cpu_workqueue_struct *cwq)
 	spin_unlock_irq(&cwq->lock);
 }
 
+/**
+ * worker_thread - the worker thread function
+ * @__cwq: cwq to serve
+ *
+ * The cwq worker thread function.
+ */
 static int worker_thread(void *__cwq)
 {
 	struct cpu_workqueue_struct *cwq = __cwq;
@@ -468,6 +496,17 @@ static void wq_barrier_func(struct work_struct *work)
 	complete(&barr->done);
 }
 
+/**
+ * insert_wq_barrier - insert a barrier work
+ * @cwq: cwq to insert barrier into
+ * @barr: wq_barrier to insert
+ * @head: insertion point
+ *
+ * Insert barrier @barr into @cwq before @head.
+ *
+ * CONTEXT:
+ * spin_lock_irq(cwq->lock).
+ */
 static void insert_wq_barrier(struct cpu_workqueue_struct *cwq,
 			struct wq_barrier *barr, struct list_head *head)
 {
@@ -479,11 +518,10 @@ static void insert_wq_barrier(struct cpu_workqueue_struct *cwq,
 	 */
 	INIT_WORK_ON_STACK(&barr->work, wq_barrier_func);
 	__set_bit(WORK_STRUCT_PENDING, work_data_bits(&barr->work));
-
 	init_completion(&barr->done);
 
 	debug_work_activate(&barr->work);
-	insert_work(cwq, &barr->work, head);
+	insert_work(cwq, &barr->work, head, 0);
 }
 
 static int flush_cpu_workqueue(struct cpu_workqueue_struct *cwq)
@@ -517,9 +555,6 @@ static int flush_cpu_workqueue(struct cpu_workqueue_struct *cwq)
  *
  * We sleep until all works which were queued on entry have been handled,
  * but we are not livelocked by new incoming ones.
- *
- * This function used to run the workqueues itself.  Now we just wait for the
- * helper threads to do it.
  */
 void flush_workqueue(struct workqueue_struct *wq)
 {
@@ -558,7 +593,6 @@ int flush_work(struct work_struct *work)
 	lock_map_acquire(&cwq->wq->lockdep_map);
 	lock_map_release(&cwq->wq->lockdep_map);
 
-	prev = NULL;
 	spin_lock_irq(&cwq->lock);
 	if (!list_empty(&work->entry)) {
 		/*
@@ -567,22 +601,22 @@ int flush_work(struct work_struct *work)
 		 */
 		smp_rmb();
 		if (unlikely(cwq != get_wq_data(work)))
-			goto out;
+			goto already_gone;
 		prev = &work->entry;
 	} else {
 		if (cwq->current_work != work)
-			goto out;
+			goto already_gone;
 		prev = &cwq->worklist;
 	}
 	insert_wq_barrier(cwq, &barr, prev->next);
-out:
-	spin_unlock_irq(&cwq->lock);
-	if (!prev)
-		return 0;
 
+	spin_unlock_irq(&cwq->lock);
 	wait_for_completion(&barr.done);
 	destroy_work_on_stack(&barr.work);
 	return 1;
+already_gone:
+	spin_unlock_irq(&cwq->lock);
+	return 0;
 }
 EXPORT_SYMBOL_GPL(flush_work);
 
@@ -665,7 +699,7 @@ static void wait_on_work(struct work_struct *work)
 	cpu_map = wq_cpu_map(wq);
 
 	for_each_cpu(cpu, cpu_map)
-		wait_on_cpu_work(per_cpu_ptr(wq->cpu_wq, cpu), work);
+		wait_on_cpu_work(get_cwq(cpu, wq), work);
 }
 
 static int __cancel_work_timer(struct work_struct *work,
@@ -782,9 +816,8 @@ EXPORT_SYMBOL(schedule_delayed_work);
 void flush_delayed_work(struct delayed_work *dwork)
 {
 	if (del_timer_sync(&dwork->timer)) {
-		struct cpu_workqueue_struct *cwq;
-		cwq = wq_per_cpu(get_wq_data(&dwork->work)->wq, get_cpu());
-		__queue_work(cwq, &dwork->work);
+		__queue_work(get_cpu(), get_wq_data(&dwork->work)->wq,
+			     &dwork->work);
 		put_cpu();
 	}
 	flush_work(&dwork->work);
@@ -991,13 +1024,11 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
 
 	wq = kzalloc(sizeof(*wq), GFP_KERNEL);
 	if (!wq)
-		return NULL;
+		goto err;
 
 	wq->cpu_wq = alloc_percpu(struct cpu_workqueue_struct);
-	if (!wq->cpu_wq) {
-		kfree(wq);
-		return NULL;
-	}
+	if (!wq->cpu_wq)
+		goto err;
 
 	wq->name = name;
 	lockdep_init_map(&wq->lockdep_map, lock_name, key, 0);
@@ -1041,6 +1072,12 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
 		wq = NULL;
 	}
 	return wq;
+err:
+	if (wq) {
+		free_percpu(wq->cpu_wq);
+		kfree(wq);
+	}
+	return NULL;
 }
 EXPORT_SYMBOL_GPL(__create_workqueue_key);
 
-- 
cgit v1.2.3-70-g09d2


From 97e37d7b9e65a6ac939f796f91081135b7a08acc Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 29 Jun 2010 10:07:10 +0200
Subject: workqueue: merge feature parameters into flags

Currently, __create_workqueue_key() takes @singlethread and
@freezeable paramters and store them separately in workqueue_struct.
Merge them into a single flags parameter and field and use
WQ_FREEZEABLE and WQ_SINGLE_THREAD.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h | 25 +++++++++++++++----------
 kernel/workqueue.c        | 17 +++++++----------
 2 files changed, 22 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index e724dafc9e6..d89cfc143b1 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -184,13 +184,17 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; }
 #define work_clear_pending(work) \
 	clear_bit(WORK_STRUCT_PENDING, work_data_bits(work))
 
+enum {
+	WQ_FREEZEABLE		= 1 << 0, /* freeze during suspend */
+	WQ_SINGLE_THREAD	= 1 << 1, /* no per-cpu worker */
+};
 
 extern struct workqueue_struct *
-__create_workqueue_key(const char *name, int singlethread, int freezeable,
+__create_workqueue_key(const char *name, unsigned int flags,
 		       struct lock_class_key *key, const char *lock_name);
 
 #ifdef CONFIG_LOCKDEP
-#define __create_workqueue(name, singlethread, freezeable)	\
+#define __create_workqueue(name, flags)				\
 ({								\
 	static struct lock_class_key __key;			\
 	const char *__lock_name;				\
@@ -200,19 +204,20 @@ __create_workqueue_key(const char *name, int singlethread, int freezeable,
 	else							\
 		__lock_name = #name;				\
 								\
-	__create_workqueue_key((name), (singlethread),		\
-			       (freezeable), &__key,		\
+	__create_workqueue_key((name), (flags), &__key,		\
 			       __lock_name);			\
 })
 #else
-#define __create_workqueue(name, singlethread, freezeable)	\
-	__create_workqueue_key((name), (singlethread), (freezeable), \
-			       NULL, NULL)
+#define __create_workqueue(name, flags)				\
+	__create_workqueue_key((name), (flags), NULL, NULL)
 #endif
 
-#define create_workqueue(name) __create_workqueue((name), 0, 0)
-#define create_freezeable_workqueue(name) __create_workqueue((name), 1, 1)
-#define create_singlethread_workqueue(name) __create_workqueue((name), 1, 0)
+#define create_workqueue(name)					\
+	__create_workqueue((name), 0)
+#define create_freezeable_workqueue(name)			\
+	__create_workqueue((name), WQ_FREEZEABLE | WQ_SINGLE_THREAD)
+#define create_singlethread_workqueue(name)			\
+	__create_workqueue((name), WQ_SINGLE_THREAD)
 
 extern void destroy_workqueue(struct workqueue_struct *wq);
 
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index c56146a755e..68e4dd808ec 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -67,11 +67,10 @@ struct cpu_workqueue_struct {
  * per-CPU workqueues:
  */
 struct workqueue_struct {
+	unsigned int		flags;		/* I: WQ_* flags */
 	struct cpu_workqueue_struct *cpu_wq;	/* I: cwq's */
 	struct list_head	list;		/* W: list of all workqueues */
 	const char		*name;		/* I: workqueue name */
-	int singlethread;
-	int freezeable;		/* Freeze threads during suspend */
 #ifdef CONFIG_LOCKDEP
 	struct lockdep_map	lockdep_map;
 #endif
@@ -203,9 +202,9 @@ static const struct cpumask *cpu_singlethread_map __read_mostly;
 static cpumask_var_t cpu_populated_map __read_mostly;
 
 /* If it's single threaded, it isn't in the list of workqueues. */
-static inline int is_wq_single_threaded(struct workqueue_struct *wq)
+static inline bool is_wq_single_threaded(struct workqueue_struct *wq)
 {
-	return wq->singlethread;
+	return wq->flags & WQ_SINGLE_THREAD;
 }
 
 static const struct cpumask *wq_cpu_map(struct workqueue_struct *wq)
@@ -463,7 +462,7 @@ static int worker_thread(void *__cwq)
 	struct cpu_workqueue_struct *cwq = __cwq;
 	DEFINE_WAIT(wait);
 
-	if (cwq->wq->freezeable)
+	if (cwq->wq->flags & WQ_FREEZEABLE)
 		set_freezable();
 
 	for (;;) {
@@ -1013,8 +1012,7 @@ static void start_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
 }
 
 struct workqueue_struct *__create_workqueue_key(const char *name,
-						int singlethread,
-						int freezeable,
+						unsigned int flags,
 						struct lock_class_key *key,
 						const char *lock_name)
 {
@@ -1030,13 +1028,12 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
 	if (!wq->cpu_wq)
 		goto err;
 
+	wq->flags = flags;
 	wq->name = name;
 	lockdep_init_map(&wq->lockdep_map, lock_name, key, 0);
-	wq->singlethread = singlethread;
-	wq->freezeable = freezeable;
 	INIT_LIST_HEAD(&wq->list);
 
-	if (singlethread) {
+	if (flags & WQ_SINGLE_THREAD) {
 		cwq = init_cpu_workqueue(wq, singlethread_cpu);
 		err = create_workqueue_thread(cwq, singlethread_cpu);
 		start_workqueue_thread(cwq, -1);
-- 
cgit v1.2.3-70-g09d2


From 22df02bb3fab24af97bff4c69cc6fd8529fc66fe Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 29 Jun 2010 10:07:10 +0200
Subject: workqueue: define masks for work flags and conditionalize STATIC
 flags

Work flags are about to see more traditional mask handling.  Define
WORK_STRUCT_*_BIT as the bit position constant and redefine
WORK_STRUCT_* as bit masks.  Also, make WORK_STRUCT_STATIC_* flags
conditional

While at it, re-define these constants as enums and use
WORK_STRUCT_STATIC instead of hard-coding 2 in
WORK_DATA_STATIC_INIT().

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h | 29 +++++++++++++++++++++--------
 kernel/workqueue.c        | 12 ++++++------
 2 files changed, 27 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index d89cfc143b1..d60c5701ab4 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -22,12 +22,25 @@ typedef void (*work_func_t)(struct work_struct *work);
  */
 #define work_data_bits(work) ((unsigned long *)(&(work)->data))
 
+enum {
+	WORK_STRUCT_PENDING_BIT	= 0,	/* work item is pending execution */
+#ifdef CONFIG_DEBUG_OBJECTS_WORK
+	WORK_STRUCT_STATIC_BIT	= 1,	/* static initializer (debugobjects) */
+#endif
+
+	WORK_STRUCT_PENDING	= 1 << WORK_STRUCT_PENDING_BIT,
+#ifdef CONFIG_DEBUG_OBJECTS_WORK
+	WORK_STRUCT_STATIC	= 1 << WORK_STRUCT_STATIC_BIT,
+#else
+	WORK_STRUCT_STATIC	= 0,
+#endif
+
+	WORK_STRUCT_FLAG_MASK	= 3UL,
+	WORK_STRUCT_WQ_DATA_MASK = ~WORK_STRUCT_FLAG_MASK,
+};
+
 struct work_struct {
 	atomic_long_t data;
-#define WORK_STRUCT_PENDING 0		/* T if work item pending execution */
-#define WORK_STRUCT_STATIC  1		/* static initializer (debugobjects) */
-#define WORK_STRUCT_FLAG_MASK (3UL)
-#define WORK_STRUCT_WQ_DATA_MASK (~WORK_STRUCT_FLAG_MASK)
 	struct list_head entry;
 	work_func_t func;
 #ifdef CONFIG_LOCKDEP
@@ -36,7 +49,7 @@ struct work_struct {
 };
 
 #define WORK_DATA_INIT()	ATOMIC_LONG_INIT(0)
-#define WORK_DATA_STATIC_INIT()	ATOMIC_LONG_INIT(2)
+#define WORK_DATA_STATIC_INIT()	ATOMIC_LONG_INIT(WORK_STRUCT_STATIC)
 
 struct delayed_work {
 	struct work_struct work;
@@ -98,7 +111,7 @@ extern void __init_work(struct work_struct *work, int onstack);
 extern void destroy_work_on_stack(struct work_struct *work);
 static inline unsigned int work_static(struct work_struct *work)
 {
-	return *work_data_bits(work) & (1 << WORK_STRUCT_STATIC);
+	return *work_data_bits(work) & WORK_STRUCT_STATIC;
 }
 #else
 static inline void __init_work(struct work_struct *work, int onstack) { }
@@ -167,7 +180,7 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; }
  * @work: The work item in question
  */
 #define work_pending(work) \
-	test_bit(WORK_STRUCT_PENDING, work_data_bits(work))
+	test_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))
 
 /**
  * delayed_work_pending - Find out whether a delayable work item is currently
@@ -182,7 +195,7 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; }
  * @work: The work item in question
  */
 #define work_clear_pending(work) \
-	clear_bit(WORK_STRUCT_PENDING, work_data_bits(work))
+	clear_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))
 
 enum {
 	WQ_FREEZEABLE		= 1 << 0, /* freeze during suspend */
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 68e4dd808ec..5c49d762293 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -115,7 +115,7 @@ static int work_fixup_activate(void *addr, enum debug_obj_state state)
 		 * statically initialized. We just make sure that it
 		 * is tracked in the object tracker.
 		 */
-		if (test_bit(WORK_STRUCT_STATIC, work_data_bits(work))) {
+		if (test_bit(WORK_STRUCT_STATIC_BIT, work_data_bits(work))) {
 			debug_object_init(work, &work_debug_descr);
 			debug_object_activate(work, &work_debug_descr);
 			return 0;
@@ -232,7 +232,7 @@ static inline void set_wq_data(struct work_struct *work,
 	BUG_ON(!work_pending(work));
 
 	atomic_long_set(&work->data, (unsigned long)cwq | work_static(work) |
-			(1UL << WORK_STRUCT_PENDING) | extra_flags);
+			WORK_STRUCT_PENDING | extra_flags);
 }
 
 /*
@@ -330,7 +330,7 @@ queue_work_on(int cpu, struct workqueue_struct *wq, struct work_struct *work)
 {
 	int ret = 0;
 
-	if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work))) {
+	if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
 		__queue_work(cpu, wq, work);
 		ret = 1;
 	}
@@ -380,7 +380,7 @@ int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
 	struct timer_list *timer = &dwork->timer;
 	struct work_struct *work = &dwork->work;
 
-	if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work))) {
+	if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
 		BUG_ON(timer_pending(timer));
 		BUG_ON(!list_empty(&work->entry));
 
@@ -516,7 +516,7 @@ static void insert_wq_barrier(struct cpu_workqueue_struct *cwq,
 	 * might deadlock.
 	 */
 	INIT_WORK_ON_STACK(&barr->work, wq_barrier_func);
-	__set_bit(WORK_STRUCT_PENDING, work_data_bits(&barr->work));
+	__set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(&barr->work));
 	init_completion(&barr->done);
 
 	debug_work_activate(&barr->work);
@@ -628,7 +628,7 @@ static int try_to_grab_pending(struct work_struct *work)
 	struct cpu_workqueue_struct *cwq;
 	int ret = -1;
 
-	if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work)))
+	if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work)))
 		return 0;
 
 	/*
-- 
cgit v1.2.3-70-g09d2


From 64166699752006f1a23a9cf7c96ae36654ccfc2c Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 29 Jun 2010 10:07:11 +0200
Subject: workqueue: temporarily remove workqueue tracing

Strip tracing code from workqueue and remove workqueue tracing.  This
is temporary measure till concurrency managed workqueue is complete.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
---
 include/trace/events/workqueue.h | 92 ----------------------------------------
 kernel/trace/Kconfig             | 11 -----
 kernel/workqueue.c               | 14 ++----
 3 files changed, 3 insertions(+), 114 deletions(-)
 delete mode 100644 include/trace/events/workqueue.h

(limited to 'include')

diff --git a/include/trace/events/workqueue.h b/include/trace/events/workqueue.h
deleted file mode 100644
index d6c974474e7..00000000000
--- a/include/trace/events/workqueue.h
+++ /dev/null
@@ -1,92 +0,0 @@
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM workqueue
-
-#if !defined(_TRACE_WORKQUEUE_H) || defined(TRACE_HEADER_MULTI_READ)
-#define _TRACE_WORKQUEUE_H
-
-#include <linux/workqueue.h>
-#include <linux/sched.h>
-#include <linux/tracepoint.h>
-
-DECLARE_EVENT_CLASS(workqueue,
-
-	TP_PROTO(struct task_struct *wq_thread, struct work_struct *work),
-
-	TP_ARGS(wq_thread, work),
-
-	TP_STRUCT__entry(
-		__array(char,		thread_comm,	TASK_COMM_LEN)
-		__field(pid_t,		thread_pid)
-		__field(work_func_t,	func)
-	),
-
-	TP_fast_assign(
-		memcpy(__entry->thread_comm, wq_thread->comm, TASK_COMM_LEN);
-		__entry->thread_pid	= wq_thread->pid;
-		__entry->func		= work->func;
-	),
-
-	TP_printk("thread=%s:%d func=%pf", __entry->thread_comm,
-		__entry->thread_pid, __entry->func)
-);
-
-DEFINE_EVENT(workqueue, workqueue_insertion,
-
-	TP_PROTO(struct task_struct *wq_thread, struct work_struct *work),
-
-	TP_ARGS(wq_thread, work)
-);
-
-DEFINE_EVENT(workqueue, workqueue_execution,
-
-	TP_PROTO(struct task_struct *wq_thread, struct work_struct *work),
-
-	TP_ARGS(wq_thread, work)
-);
-
-/* Trace the creation of one workqueue thread on a cpu */
-TRACE_EVENT(workqueue_creation,
-
-	TP_PROTO(struct task_struct *wq_thread, int cpu),
-
-	TP_ARGS(wq_thread, cpu),
-
-	TP_STRUCT__entry(
-		__array(char,	thread_comm,	TASK_COMM_LEN)
-		__field(pid_t,	thread_pid)
-		__field(int,	cpu)
-	),
-
-	TP_fast_assign(
-		memcpy(__entry->thread_comm, wq_thread->comm, TASK_COMM_LEN);
-		__entry->thread_pid	= wq_thread->pid;
-		__entry->cpu		= cpu;
-	),
-
-	TP_printk("thread=%s:%d cpu=%d", __entry->thread_comm,
-		__entry->thread_pid, __entry->cpu)
-);
-
-TRACE_EVENT(workqueue_destruction,
-
-	TP_PROTO(struct task_struct *wq_thread),
-
-	TP_ARGS(wq_thread),
-
-	TP_STRUCT__entry(
-		__array(char,	thread_comm,	TASK_COMM_LEN)
-		__field(pid_t,	thread_pid)
-	),
-
-	TP_fast_assign(
-		memcpy(__entry->thread_comm, wq_thread->comm, TASK_COMM_LEN);
-		__entry->thread_pid	= wq_thread->pid;
-	),
-
-	TP_printk("thread=%s:%d", __entry->thread_comm, __entry->thread_pid)
-);
-
-#endif /* _TRACE_WORKQUEUE_H */
-
-/* This part must be outside protection */
-#include <trace/define_trace.h>
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 8b1797c4545..a0d95c1f3f8 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -391,17 +391,6 @@ config KMEMTRACE
 
 	  If unsure, say N.
 
-config WORKQUEUE_TRACER
-	bool "Trace workqueues"
-	select GENERIC_TRACER
-	help
-	  The workqueue tracer provides some statistical information
-          about each cpu workqueue thread such as the number of the
-          works inserted and executed since their creation. It can help
-          to evaluate the amount of work each of them has to perform.
-          For example it can help a developer to decide whether he should
-          choose a per-cpu workqueue instead of a singlethreaded one.
-
 config BLK_DEV_IO_TRACE
 	bool "Support for tracing block IO actions"
 	depends on SYSFS
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 8e3082b76c7..f7ab703285a 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -33,8 +33,6 @@
 #include <linux/kallsyms.h>
 #include <linux/debug_locks.h>
 #include <linux/lockdep.h>
-#define CREATE_TRACE_POINTS
-#include <trace/events/workqueue.h>
 
 /*
  * Structure fields follow one of the following exclusion rules.
@@ -243,10 +241,10 @@ static inline void clear_wq_data(struct work_struct *work)
 	atomic_long_set(&work->data, work_static(work));
 }
 
-static inline
-struct cpu_workqueue_struct *get_wq_data(struct work_struct *work)
+static inline struct cpu_workqueue_struct *get_wq_data(struct work_struct *work)
 {
-	return (void *) (atomic_long_read(&work->data) & WORK_STRUCT_WQ_DATA_MASK);
+	return (void *)(atomic_long_read(&work->data) &
+			WORK_STRUCT_WQ_DATA_MASK);
 }
 
 /**
@@ -265,8 +263,6 @@ static void insert_work(struct cpu_workqueue_struct *cwq,
 			struct work_struct *work, struct list_head *head,
 			unsigned int extra_flags)
 {
-	trace_workqueue_insertion(cwq->thread, work);
-
 	/* we own @work, set data and link */
 	set_wq_data(work, cwq, extra_flags);
 
@@ -431,7 +427,6 @@ static void process_one_work(struct cpu_workqueue_struct *cwq,
 	struct lockdep_map lockdep_map = work->lockdep_map;
 #endif
 	/* claim and process */
-	trace_workqueue_execution(cwq->thread, work);
 	debug_work_deactivate(work);
 	cwq->current_work = work;
 	list_del_init(&work->entry);
@@ -1017,8 +1012,6 @@ static int create_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
 		return PTR_ERR(p);
 	cwq->thread = p;
 
-	trace_workqueue_creation(cwq->thread, cpu);
-
 	return 0;
 }
 
@@ -1123,7 +1116,6 @@ static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq)
 	 * checks list_empty(), and a "normal" queue_work() can't use
 	 * a dead CPU.
 	 */
-	trace_workqueue_destruction(cwq->thread);
 	kthread_stop(cwq->thread);
 	cwq->thread = NULL;
 }
-- 
cgit v1.2.3-70-g09d2


From 0f900049cbe2767d47c2a62b54f0e822e1d66840 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 29 Jun 2010 10:07:11 +0200
Subject: workqueue: update cwq alignement

work->data field is used for two purposes.  It points to cwq it's
queued on and the lower bits are used for flags.  Currently, two bits
are reserved which is always safe as 4 byte alignment is guaranteed on
every architecture.  However, future changes will need more flag bits.

On SMP, the percpu allocator is capable of honoring larger alignment
(there are other users which depend on it) and larger alignment works
just fine.  On UP, percpu allocator is a thin wrapper around
kzalloc/kfree() and don't honor alignment request.

This patch introduces WORK_STRUCT_FLAG_BITS and implements
alloc/free_cwqs() which guarantees max(1 << WORK_STRUCT_FLAG_BITS,
__alignof__(unsigned long long) alignment both on SMP and UP.  On SMP,
simply wrapping percpu allocator is enough.  On UP, extra space is
allocated so that cwq can be aligned and the original pointer can be
stored after it which is used in the free path.

* Alignment problem on UP is reported by Michal Simek.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Reported-by: Michal Simek <michal.simek@petalogix.com>
---
 include/linux/workqueue.h |  5 +++-
 kernel/workqueue.c        | 60 +++++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 59 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index d60c5701ab4..b90958a037d 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -26,6 +26,9 @@ enum {
 	WORK_STRUCT_PENDING_BIT	= 0,	/* work item is pending execution */
 #ifdef CONFIG_DEBUG_OBJECTS_WORK
 	WORK_STRUCT_STATIC_BIT	= 1,	/* static initializer (debugobjects) */
+	WORK_STRUCT_FLAG_BITS	= 2,
+#else
+	WORK_STRUCT_FLAG_BITS	= 1,
 #endif
 
 	WORK_STRUCT_PENDING	= 1 << WORK_STRUCT_PENDING_BIT,
@@ -35,7 +38,7 @@ enum {
 	WORK_STRUCT_STATIC	= 0,
 #endif
 
-	WORK_STRUCT_FLAG_MASK	= 3UL,
+	WORK_STRUCT_FLAG_MASK	= (1UL << WORK_STRUCT_FLAG_BITS) - 1,
 	WORK_STRUCT_WQ_DATA_MASK = ~WORK_STRUCT_FLAG_MASK,
 };
 
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index dc78956ccf0..74a38499b19 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -46,7 +46,9 @@
 
 /*
  * The per-CPU workqueue (if single thread, we always use the first
- * possible cpu).
+ * possible cpu).  The lower WORK_STRUCT_FLAG_BITS of
+ * work_struct->data are used for flags and thus cwqs need to be
+ * aligned at two's power of the number of flag bits.
  */
 struct cpu_workqueue_struct {
 
@@ -59,7 +61,7 @@ struct cpu_workqueue_struct {
 
 	struct workqueue_struct *wq;		/* I: the owning workqueue */
 	struct task_struct	*thread;
-} ____cacheline_aligned;
+};
 
 /*
  * The externally visible workqueue abstraction is an array of
@@ -967,6 +969,53 @@ int current_is_keventd(void)
 
 }
 
+static struct cpu_workqueue_struct *alloc_cwqs(void)
+{
+	/*
+	 * cwqs are forced aligned according to WORK_STRUCT_FLAG_BITS.
+	 * Make sure that the alignment isn't lower than that of
+	 * unsigned long long.
+	 */
+	const size_t size = sizeof(struct cpu_workqueue_struct);
+	const size_t align = max_t(size_t, 1 << WORK_STRUCT_FLAG_BITS,
+				   __alignof__(unsigned long long));
+	struct cpu_workqueue_struct *cwqs;
+#ifndef CONFIG_SMP
+	void *ptr;
+
+	/*
+	 * On UP, percpu allocator doesn't honor alignment parameter
+	 * and simply uses arch-dependent default.  Allocate enough
+	 * room to align cwq and put an extra pointer at the end
+	 * pointing back to the originally allocated pointer which
+	 * will be used for free.
+	 *
+	 * FIXME: This really belongs to UP percpu code.  Update UP
+	 * percpu code to honor alignment and remove this ugliness.
+	 */
+	ptr = __alloc_percpu(size + align + sizeof(void *), 1);
+	cwqs = PTR_ALIGN(ptr, align);
+	*(void **)per_cpu_ptr(cwqs + 1, 0) = ptr;
+#else
+	/* On SMP, percpu allocator can do it itself */
+	cwqs = __alloc_percpu(size, align);
+#endif
+	/* just in case, make sure it's actually aligned */
+	BUG_ON(!IS_ALIGNED((unsigned long)cwqs, align));
+	return cwqs;
+}
+
+static void free_cwqs(struct cpu_workqueue_struct *cwqs)
+{
+#ifndef CONFIG_SMP
+	/* on UP, the pointer to free is stored right after the cwq */
+	if (cwqs)
+		free_percpu(*(void **)per_cpu_ptr(cwqs + 1, 0));
+#else
+	free_percpu(cwqs);
+#endif
+}
+
 static int create_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
 {
 	struct workqueue_struct *wq = cwq->wq;
@@ -1012,7 +1061,7 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
 	if (!wq)
 		goto err;
 
-	wq->cpu_wq = alloc_percpu(struct cpu_workqueue_struct);
+	wq->cpu_wq = alloc_cwqs();
 	if (!wq->cpu_wq)
 		goto err;
 
@@ -1031,6 +1080,7 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
 	for_each_possible_cpu(cpu) {
 		struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
 
+		BUG_ON((unsigned long)cwq & WORK_STRUCT_FLAG_MASK);
 		cwq->wq = wq;
 		cwq->cpu = cpu;
 		spin_lock_init(&cwq->lock);
@@ -1059,7 +1109,7 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
 	return wq;
 err:
 	if (wq) {
-		free_percpu(wq->cpu_wq);
+		free_cwqs(wq->cpu_wq);
 		kfree(wq);
 	}
 	return NULL;
@@ -1112,7 +1162,7 @@ void destroy_workqueue(struct workqueue_struct *wq)
 	for_each_possible_cpu(cpu)
 		cleanup_workqueue_thread(get_cwq(cpu, wq));
 
-	free_percpu(wq->cpu_wq);
+	free_cwqs(wq->cpu_wq);
 	kfree(wq);
 }
 EXPORT_SYMBOL_GPL(destroy_workqueue);
-- 
cgit v1.2.3-70-g09d2


From 73f53c4aa732eced5fcb1844d3d452c30905f20f Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 29 Jun 2010 10:07:11 +0200
Subject: workqueue: reimplement workqueue flushing using color coded works

Reimplement workqueue flushing using color coded works.  wq has the
current work color which is painted on the works being issued via
cwqs.  Flushing a workqueue is achieved by advancing the current work
colors of cwqs and waiting for all the works which have any of the
previous colors to drain.

Currently there are 16 possible colors, one is reserved for no color
and 15 colors are useable allowing 14 concurrent flushes.  When color
space gets full, flush attempts are batched up and processed together
when color frees up, so even with many concurrent flushers, the new
implementation won't build up huge queue of flushers which has to be
processed one after another.

Only works which are queued via __queue_work() are colored.  Works
which are directly put on queue using insert_work() use NO_COLOR and
don't participate in workqueue flushing.  Currently only works used
for work-specific flush fall in this category.

This new implementation leaves only cleanup_workqueue_thread() as the
user of flush_cpu_workqueue().  Just make its users use
flush_workqueue() and kthread_stop() directly and kill
cleanup_workqueue_thread().  As workqueue flushing doesn't use barrier
request anymore, the comment describing the complex synchronization
around it in cleanup_workqueue_thread() is removed together with the
function.

This new implementation is to allow having and sharing multiple
workers per cpu.

Please note that one more bit is reserved for a future work flag by
this patch.  This is to avoid shifting bits and updating comments
later.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h |  21 ++-
 kernel/workqueue.c        | 355 +++++++++++++++++++++++++++++++++++++++-------
 2 files changed, 322 insertions(+), 54 deletions(-)

(limited to 'include')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index b90958a037d..8762f62103d 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -26,11 +26,13 @@ enum {
 	WORK_STRUCT_PENDING_BIT	= 0,	/* work item is pending execution */
 #ifdef CONFIG_DEBUG_OBJECTS_WORK
 	WORK_STRUCT_STATIC_BIT	= 1,	/* static initializer (debugobjects) */
-	WORK_STRUCT_FLAG_BITS	= 2,
+	WORK_STRUCT_COLOR_SHIFT	= 3,	/* color for workqueue flushing */
 #else
-	WORK_STRUCT_FLAG_BITS	= 1,
+	WORK_STRUCT_COLOR_SHIFT	= 2,	/* color for workqueue flushing */
 #endif
 
+	WORK_STRUCT_COLOR_BITS	= 4,
+
 	WORK_STRUCT_PENDING	= 1 << WORK_STRUCT_PENDING_BIT,
 #ifdef CONFIG_DEBUG_OBJECTS_WORK
 	WORK_STRUCT_STATIC	= 1 << WORK_STRUCT_STATIC_BIT,
@@ -38,6 +40,21 @@ enum {
 	WORK_STRUCT_STATIC	= 0,
 #endif
 
+	/*
+	 * The last color is no color used for works which don't
+	 * participate in workqueue flushing.
+	 */
+	WORK_NR_COLORS		= (1 << WORK_STRUCT_COLOR_BITS) - 1,
+	WORK_NO_COLOR		= WORK_NR_COLORS,
+
+	/*
+	 * Reserve 6 bits off of cwq pointer w/ debugobjects turned
+	 * off.  This makes cwqs aligned to 64 bytes which isn't too
+	 * excessive while allowing 15 workqueue flush colors.
+	 */
+	WORK_STRUCT_FLAG_BITS	= WORK_STRUCT_COLOR_SHIFT +
+				  WORK_STRUCT_COLOR_BITS,
+
 	WORK_STRUCT_FLAG_MASK	= (1UL << WORK_STRUCT_FLAG_BITS) - 1,
 	WORK_STRUCT_WQ_DATA_MASK = ~WORK_STRUCT_FLAG_MASK,
 };
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 74a38499b19..56e47c59d73 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -41,6 +41,8 @@
  *
  * L: cwq->lock protected.  Access with cwq->lock held.
  *
+ * F: wq->flush_mutex protected.
+ *
  * W: workqueue_lock protected.
  */
 
@@ -60,9 +62,22 @@ struct cpu_workqueue_struct {
 	unsigned int		cpu;
 
 	struct workqueue_struct *wq;		/* I: the owning workqueue */
+	int			work_color;	/* L: current color */
+	int			flush_color;	/* L: flushing color */
+	int			nr_in_flight[WORK_NR_COLORS];
+						/* L: nr of in_flight works */
 	struct task_struct	*thread;
 };
 
+/*
+ * Structure used to wait for workqueue flush.
+ */
+struct wq_flusher {
+	struct list_head	list;		/* F: list of flushers */
+	int			flush_color;	/* F: flush color waiting for */
+	struct completion	done;		/* flush completion */
+};
+
 /*
  * The externally visible workqueue abstraction is an array of
  * per-CPU workqueues:
@@ -71,6 +86,15 @@ struct workqueue_struct {
 	unsigned int		flags;		/* I: WQ_* flags */
 	struct cpu_workqueue_struct *cpu_wq;	/* I: cwq's */
 	struct list_head	list;		/* W: list of all workqueues */
+
+	struct mutex		flush_mutex;	/* protects wq flushing */
+	int			work_color;	/* F: current work color */
+	int			flush_color;	/* F: current flush color */
+	atomic_t		nr_cwqs_to_flush; /* flush in progress */
+	struct wq_flusher	*first_flusher;	/* F: first flusher */
+	struct list_head	flusher_queue;	/* F: flush waiters */
+	struct list_head	flusher_overflow; /* F: flush overflow list */
+
 	const char		*name;		/* I: workqueue name */
 #ifdef CONFIG_LOCKDEP
 	struct lockdep_map	lockdep_map;
@@ -207,6 +231,22 @@ static struct cpu_workqueue_struct *target_cwq(unsigned int cpu,
 	return get_cwq(cpu, wq);
 }
 
+static unsigned int work_color_to_flags(int color)
+{
+	return color << WORK_STRUCT_COLOR_SHIFT;
+}
+
+static int get_work_color(struct work_struct *work)
+{
+	return (*work_data_bits(work) >> WORK_STRUCT_COLOR_SHIFT) &
+		((1 << WORK_STRUCT_COLOR_BITS) - 1);
+}
+
+static int work_next_color(int color)
+{
+	return (color + 1) % WORK_NR_COLORS;
+}
+
 /*
  * Set the workqueue on which a work item is to be run
  * - Must *only* be called if the pending flag is set
@@ -273,7 +313,9 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
 	debug_work_activate(work);
 	spin_lock_irqsave(&cwq->lock, flags);
 	BUG_ON(!list_empty(&work->entry));
-	insert_work(cwq, work, &cwq->worklist, 0);
+	cwq->nr_in_flight[cwq->work_color]++;
+	insert_work(cwq, work, &cwq->worklist,
+		    work_color_to_flags(cwq->work_color));
 	spin_unlock_irqrestore(&cwq->lock, flags);
 }
 
@@ -386,6 +428,44 @@ int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
 }
 EXPORT_SYMBOL_GPL(queue_delayed_work_on);
 
+/**
+ * cwq_dec_nr_in_flight - decrement cwq's nr_in_flight
+ * @cwq: cwq of interest
+ * @color: color of work which left the queue
+ *
+ * A work either has completed or is removed from pending queue,
+ * decrement nr_in_flight of its cwq and handle workqueue flushing.
+ *
+ * CONTEXT:
+ * spin_lock_irq(cwq->lock).
+ */
+static void cwq_dec_nr_in_flight(struct cpu_workqueue_struct *cwq, int color)
+{
+	/* ignore uncolored works */
+	if (color == WORK_NO_COLOR)
+		return;
+
+	cwq->nr_in_flight[color]--;
+
+	/* is flush in progress and are we at the flushing tip? */
+	if (likely(cwq->flush_color != color))
+		return;
+
+	/* are there still in-flight works? */
+	if (cwq->nr_in_flight[color])
+		return;
+
+	/* this cwq is done, clear flush_color */
+	cwq->flush_color = -1;
+
+	/*
+	 * If this was the last cwq, wake up the first flusher.  It
+	 * will handle the rest.
+	 */
+	if (atomic_dec_and_test(&cwq->wq->nr_cwqs_to_flush))
+		complete(&cwq->wq->first_flusher->done);
+}
+
 /**
  * process_one_work - process single work
  * @cwq: cwq to process work for
@@ -404,6 +484,7 @@ static void process_one_work(struct cpu_workqueue_struct *cwq,
 			     struct work_struct *work)
 {
 	work_func_t f = work->func;
+	int work_color;
 #ifdef CONFIG_LOCKDEP
 	/*
 	 * It is permissible to free the struct work_struct from
@@ -417,6 +498,7 @@ static void process_one_work(struct cpu_workqueue_struct *cwq,
 	/* claim and process */
 	debug_work_deactivate(work);
 	cwq->current_work = work;
+	work_color = get_work_color(work);
 	list_del_init(&work->entry);
 
 	spin_unlock_irq(&cwq->lock);
@@ -443,6 +525,7 @@ static void process_one_work(struct cpu_workqueue_struct *cwq,
 
 	/* we're done with it, release */
 	cwq->current_work = NULL;
+	cwq_dec_nr_in_flight(cwq, work_color);
 }
 
 static void run_workqueue(struct cpu_workqueue_struct *cwq)
@@ -529,29 +612,78 @@ static void insert_wq_barrier(struct cpu_workqueue_struct *cwq,
 	init_completion(&barr->done);
 
 	debug_work_activate(&barr->work);
-	insert_work(cwq, &barr->work, head, 0);
+	insert_work(cwq, &barr->work, head, work_color_to_flags(WORK_NO_COLOR));
 }
 
-static int flush_cpu_workqueue(struct cpu_workqueue_struct *cwq)
+/**
+ * flush_workqueue_prep_cwqs - prepare cwqs for workqueue flushing
+ * @wq: workqueue being flushed
+ * @flush_color: new flush color, < 0 for no-op
+ * @work_color: new work color, < 0 for no-op
+ *
+ * Prepare cwqs for workqueue flushing.
+ *
+ * If @flush_color is non-negative, flush_color on all cwqs should be
+ * -1.  If no cwq has in-flight commands at the specified color, all
+ * cwq->flush_color's stay at -1 and %false is returned.  If any cwq
+ * has in flight commands, its cwq->flush_color is set to
+ * @flush_color, @wq->nr_cwqs_to_flush is updated accordingly, cwq
+ * wakeup logic is armed and %true is returned.
+ *
+ * The caller should have initialized @wq->first_flusher prior to
+ * calling this function with non-negative @flush_color.  If
+ * @flush_color is negative, no flush color update is done and %false
+ * is returned.
+ *
+ * If @work_color is non-negative, all cwqs should have the same
+ * work_color which is previous to @work_color and all will be
+ * advanced to @work_color.
+ *
+ * CONTEXT:
+ * mutex_lock(wq->flush_mutex).
+ *
+ * RETURNS:
+ * %true if @flush_color >= 0 and there's something to flush.  %false
+ * otherwise.
+ */
+static bool flush_workqueue_prep_cwqs(struct workqueue_struct *wq,
+				      int flush_color, int work_color)
 {
-	int active = 0;
-	struct wq_barrier barr;
+	bool wait = false;
+	unsigned int cpu;
 
-	WARN_ON(cwq->thread == current);
-
-	spin_lock_irq(&cwq->lock);
-	if (!list_empty(&cwq->worklist) || cwq->current_work != NULL) {
-		insert_wq_barrier(cwq, &barr, &cwq->worklist);
-		active = 1;
+	if (flush_color >= 0) {
+		BUG_ON(atomic_read(&wq->nr_cwqs_to_flush));
+		atomic_set(&wq->nr_cwqs_to_flush, 1);
 	}
-	spin_unlock_irq(&cwq->lock);
 
-	if (active) {
-		wait_for_completion(&barr.done);
-		destroy_work_on_stack(&barr.work);
+	for_each_possible_cpu(cpu) {
+		struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
+
+		spin_lock_irq(&cwq->lock);
+
+		if (flush_color >= 0) {
+			BUG_ON(cwq->flush_color != -1);
+
+			if (cwq->nr_in_flight[flush_color]) {
+				cwq->flush_color = flush_color;
+				atomic_inc(&wq->nr_cwqs_to_flush);
+				wait = true;
+			}
+		}
+
+		if (work_color >= 0) {
+			BUG_ON(work_color != work_next_color(cwq->work_color));
+			cwq->work_color = work_color;
+		}
+
+		spin_unlock_irq(&cwq->lock);
 	}
 
-	return active;
+	if (flush_color >= 0 && atomic_dec_and_test(&wq->nr_cwqs_to_flush))
+		complete(&wq->first_flusher->done);
+
+	return wait;
 }
 
 /**
@@ -566,13 +698,143 @@ static int flush_cpu_workqueue(struct cpu_workqueue_struct *cwq)
  */
 void flush_workqueue(struct workqueue_struct *wq)
 {
-	int cpu;
+	struct wq_flusher this_flusher = {
+		.list = LIST_HEAD_INIT(this_flusher.list),
+		.flush_color = -1,
+		.done = COMPLETION_INITIALIZER_ONSTACK(this_flusher.done),
+	};
+	int next_color;
 
-	might_sleep();
 	lock_map_acquire(&wq->lockdep_map);
 	lock_map_release(&wq->lockdep_map);
-	for_each_possible_cpu(cpu)
-		flush_cpu_workqueue(get_cwq(cpu, wq));
+
+	mutex_lock(&wq->flush_mutex);
+
+	/*
+	 * Start-to-wait phase
+	 */
+	next_color = work_next_color(wq->work_color);
+
+	if (next_color != wq->flush_color) {
+		/*
+		 * Color space is not full.  The current work_color
+		 * becomes our flush_color and work_color is advanced
+		 * by one.
+		 */
+		BUG_ON(!list_empty(&wq->flusher_overflow));
+		this_flusher.flush_color = wq->work_color;
+		wq->work_color = next_color;
+
+		if (!wq->first_flusher) {
+			/* no flush in progress, become the first flusher */
+			BUG_ON(wq->flush_color != this_flusher.flush_color);
+
+			wq->first_flusher = &this_flusher;
+
+			if (!flush_workqueue_prep_cwqs(wq, wq->flush_color,
+						       wq->work_color)) {
+				/* nothing to flush, done */
+				wq->flush_color = next_color;
+				wq->first_flusher = NULL;
+				goto out_unlock;
+			}
+		} else {
+			/* wait in queue */
+			BUG_ON(wq->flush_color == this_flusher.flush_color);
+			list_add_tail(&this_flusher.list, &wq->flusher_queue);
+			flush_workqueue_prep_cwqs(wq, -1, wq->work_color);
+		}
+	} else {
+		/*
+		 * Oops, color space is full, wait on overflow queue.
+		 * The next flush completion will assign us
+		 * flush_color and transfer to flusher_queue.
+		 */
+		list_add_tail(&this_flusher.list, &wq->flusher_overflow);
+	}
+
+	mutex_unlock(&wq->flush_mutex);
+
+	wait_for_completion(&this_flusher.done);
+
+	/*
+	 * Wake-up-and-cascade phase
+	 *
+	 * First flushers are responsible for cascading flushes and
+	 * handling overflow.  Non-first flushers can simply return.
+	 */
+	if (wq->first_flusher != &this_flusher)
+		return;
+
+	mutex_lock(&wq->flush_mutex);
+
+	wq->first_flusher = NULL;
+
+	BUG_ON(!list_empty(&this_flusher.list));
+	BUG_ON(wq->flush_color != this_flusher.flush_color);
+
+	while (true) {
+		struct wq_flusher *next, *tmp;
+
+		/* complete all the flushers sharing the current flush color */
+		list_for_each_entry_safe(next, tmp, &wq->flusher_queue, list) {
+			if (next->flush_color != wq->flush_color)
+				break;
+			list_del_init(&next->list);
+			complete(&next->done);
+		}
+
+		BUG_ON(!list_empty(&wq->flusher_overflow) &&
+		       wq->flush_color != work_next_color(wq->work_color));
+
+		/* this flush_color is finished, advance by one */
+		wq->flush_color = work_next_color(wq->flush_color);
+
+		/* one color has been freed, handle overflow queue */
+		if (!list_empty(&wq->flusher_overflow)) {
+			/*
+			 * Assign the same color to all overflowed
+			 * flushers, advance work_color and append to
+			 * flusher_queue.  This is the start-to-wait
+			 * phase for these overflowed flushers.
+			 */
+			list_for_each_entry(tmp, &wq->flusher_overflow, list)
+				tmp->flush_color = wq->work_color;
+
+			wq->work_color = work_next_color(wq->work_color);
+
+			list_splice_tail_init(&wq->flusher_overflow,
+					      &wq->flusher_queue);
+			flush_workqueue_prep_cwqs(wq, -1, wq->work_color);
+		}
+
+		if (list_empty(&wq->flusher_queue)) {
+			BUG_ON(wq->flush_color != wq->work_color);
+			break;
+		}
+
+		/*
+		 * Need to flush more colors.  Make the next flusher
+		 * the new first flusher and arm cwqs.
+		 */
+		BUG_ON(wq->flush_color == wq->work_color);
+		BUG_ON(wq->flush_color != next->flush_color);
+
+		list_del_init(&next->list);
+		wq->first_flusher = next;
+
+		if (flush_workqueue_prep_cwqs(wq, wq->flush_color, -1))
+			break;
+
+		/*
+		 * Meh... this color is already done, clear first
+		 * flusher and repeat cascading.
+		 */
+		wq->first_flusher = NULL;
+	}
+
+out_unlock:
+	mutex_unlock(&wq->flush_mutex);
 }
 EXPORT_SYMBOL_GPL(flush_workqueue);
 
@@ -659,6 +921,7 @@ static int try_to_grab_pending(struct work_struct *work)
 		if (cwq == get_wq_data(work)) {
 			debug_work_deactivate(work);
 			list_del_init(&work->entry);
+			cwq_dec_nr_in_flight(cwq, get_work_color(work));
 			ret = 1;
 		}
 	}
@@ -1066,6 +1329,10 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
 		goto err;
 
 	wq->flags = flags;
+	mutex_init(&wq->flush_mutex);
+	atomic_set(&wq->nr_cwqs_to_flush, 0);
+	INIT_LIST_HEAD(&wq->flusher_queue);
+	INIT_LIST_HEAD(&wq->flusher_overflow);
 	wq->name = name;
 	lockdep_init_map(&wq->lockdep_map, lock_name, key, 0);
 	INIT_LIST_HEAD(&wq->list);
@@ -1083,6 +1350,7 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
 		BUG_ON((unsigned long)cwq & WORK_STRUCT_FLAG_MASK);
 		cwq->wq = wq;
 		cwq->cpu = cpu;
+		cwq->flush_color = -1;
 		spin_lock_init(&cwq->lock);
 		INIT_LIST_HEAD(&cwq->worklist);
 		init_waitqueue_head(&cwq->more_work);
@@ -1116,33 +1384,6 @@ err:
 }
 EXPORT_SYMBOL_GPL(__create_workqueue_key);
 
-static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq)
-{
-	/*
-	 * Our caller is either destroy_workqueue() or CPU_POST_DEAD,
-	 * cpu_add_remove_lock protects cwq->thread.
-	 */
-	if (cwq->thread == NULL)
-		return;
-
-	lock_map_acquire(&cwq->wq->lockdep_map);
-	lock_map_release(&cwq->wq->lockdep_map);
-
-	flush_cpu_workqueue(cwq);
-	/*
-	 * If the caller is CPU_POST_DEAD and cwq->worklist was not empty,
-	 * a concurrent flush_workqueue() can insert a barrier after us.
-	 * However, in that case run_workqueue() won't return and check
-	 * kthread_should_stop() until it flushes all work_struct's.
-	 * When ->worklist becomes empty it is safe to exit because no
-	 * more work_structs can be queued on this cwq: flush_workqueue
-	 * checks list_empty(), and a "normal" queue_work() can't use
-	 * a dead CPU.
-	 */
-	kthread_stop(cwq->thread);
-	cwq->thread = NULL;
-}
-
 /**
  * destroy_workqueue - safely terminate a workqueue
  * @wq: target workqueue
@@ -1159,8 +1400,20 @@ void destroy_workqueue(struct workqueue_struct *wq)
 	spin_unlock(&workqueue_lock);
 	cpu_maps_update_done();
 
-	for_each_possible_cpu(cpu)
-		cleanup_workqueue_thread(get_cwq(cpu, wq));
+	flush_workqueue(wq);
+
+	for_each_possible_cpu(cpu) {
+		struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
+		int i;
+
+		if (cwq->thread) {
+			kthread_stop(cwq->thread);
+			cwq->thread = NULL;
+		}
+
+		for (i = 0; i < WORK_NR_COLORS; i++)
+			BUG_ON(cwq->nr_in_flight[i]);
+	}
 
 	free_cwqs(wq->cpu_wq);
 	kfree(wq);
@@ -1185,9 +1438,7 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
 
 		switch (action) {
 		case CPU_POST_DEAD:
-			lock_map_acquire(&cwq->wq->lockdep_map);
-			lock_map_release(&cwq->wq->lockdep_map);
-			flush_cpu_workqueue(cwq);
+			flush_workqueue(wq);
 			break;
 		}
 	}
-- 
cgit v1.2.3-70-g09d2


From affee4b294a0fc97d67c8a77dc080c4dd262a79e Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 29 Jun 2010 10:07:12 +0200
Subject: workqueue: reimplement work flushing using linked works

A work is linked to the next one by having WORK_STRUCT_LINKED bit set
and these links can be chained.  When a linked work is dispatched to a
worker, all linked works are dispatched to the worker's newly added
->scheduled queue and processed back-to-back.

Currently, as there's only single worker per cwq, having linked works
doesn't make any visible behavior difference.  This change is to
prepare for multiple shared workers per cpu.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h |   4 +-
 kernel/workqueue.c        | 152 +++++++++++++++++++++++++++++++++++++++-------
 2 files changed, 134 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 8762f62103d..4f4fdba722c 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -24,8 +24,9 @@ typedef void (*work_func_t)(struct work_struct *work);
 
 enum {
 	WORK_STRUCT_PENDING_BIT	= 0,	/* work item is pending execution */
+	WORK_STRUCT_LINKED_BIT	= 1,	/* next work is linked to this one */
 #ifdef CONFIG_DEBUG_OBJECTS_WORK
-	WORK_STRUCT_STATIC_BIT	= 1,	/* static initializer (debugobjects) */
+	WORK_STRUCT_STATIC_BIT	= 2,	/* static initializer (debugobjects) */
 	WORK_STRUCT_COLOR_SHIFT	= 3,	/* color for workqueue flushing */
 #else
 	WORK_STRUCT_COLOR_SHIFT	= 2,	/* color for workqueue flushing */
@@ -34,6 +35,7 @@ enum {
 	WORK_STRUCT_COLOR_BITS	= 4,
 
 	WORK_STRUCT_PENDING	= 1 << WORK_STRUCT_PENDING_BIT,
+	WORK_STRUCT_LINKED	= 1 << WORK_STRUCT_LINKED_BIT,
 #ifdef CONFIG_DEBUG_OBJECTS_WORK
 	WORK_STRUCT_STATIC	= 1 << WORK_STRUCT_STATIC_BIT,
 #else
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 600db10a4db..9953d3c7bd1 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -51,6 +51,7 @@ struct cpu_workqueue_struct;
 
 struct worker {
 	struct work_struct	*current_work;	/* L: work being processed */
+	struct list_head	scheduled;	/* L: scheduled works */
 	struct task_struct	*task;		/* I: worker task */
 	struct cpu_workqueue_struct *cwq;	/* I: the associated cwq */
 	int			id;		/* I: worker id */
@@ -445,6 +446,8 @@ static struct worker *alloc_worker(void)
 	struct worker *worker;
 
 	worker = kzalloc(sizeof(*worker), GFP_KERNEL);
+	if (worker)
+		INIT_LIST_HEAD(&worker->scheduled);
 	return worker;
 }
 
@@ -530,6 +533,7 @@ static void destroy_worker(struct worker *worker)
 
 	/* sanity check frenzy */
 	BUG_ON(worker->current_work);
+	BUG_ON(!list_empty(&worker->scheduled));
 
 	kthread_stop(worker->task);
 	kfree(worker);
@@ -539,6 +543,47 @@ static void destroy_worker(struct worker *worker)
 	spin_unlock(&workqueue_lock);
 }
 
+/**
+ * move_linked_works - move linked works to a list
+ * @work: start of series of works to be scheduled
+ * @head: target list to append @work to
+ * @nextp: out paramter for nested worklist walking
+ *
+ * Schedule linked works starting from @work to @head.  Work series to
+ * be scheduled starts at @work and includes any consecutive work with
+ * WORK_STRUCT_LINKED set in its predecessor.
+ *
+ * If @nextp is not NULL, it's updated to point to the next work of
+ * the last scheduled work.  This allows move_linked_works() to be
+ * nested inside outer list_for_each_entry_safe().
+ *
+ * CONTEXT:
+ * spin_lock_irq(cwq->lock).
+ */
+static void move_linked_works(struct work_struct *work, struct list_head *head,
+			      struct work_struct **nextp)
+{
+	struct work_struct *n;
+
+	/*
+	 * Linked worklist will always end before the end of the list,
+	 * use NULL for list head.
+	 */
+	list_for_each_entry_safe_from(work, n, NULL, entry) {
+		list_move_tail(&work->entry, head);
+		if (!(*work_data_bits(work) & WORK_STRUCT_LINKED))
+			break;
+	}
+
+	/*
+	 * If we're already inside safe list traversal and have moved
+	 * multiple works to the scheduled queue, the next position
+	 * needs to be updated.
+	 */
+	if (nextp)
+		*nextp = n;
+}
+
 /**
  * cwq_dec_nr_in_flight - decrement cwq's nr_in_flight
  * @cwq: cwq of interest
@@ -639,17 +684,25 @@ static void process_one_work(struct worker *worker, struct work_struct *work)
 	cwq_dec_nr_in_flight(cwq, work_color);
 }
 
-static void run_workqueue(struct worker *worker)
+/**
+ * process_scheduled_works - process scheduled works
+ * @worker: self
+ *
+ * Process all scheduled works.  Please note that the scheduled list
+ * may change while processing a work, so this function repeatedly
+ * fetches a work from the top and executes it.
+ *
+ * CONTEXT:
+ * spin_lock_irq(cwq->lock) which may be released and regrabbed
+ * multiple times.
+ */
+static void process_scheduled_works(struct worker *worker)
 {
-	struct cpu_workqueue_struct *cwq = worker->cwq;
-
-	spin_lock_irq(&cwq->lock);
-	while (!list_empty(&cwq->worklist)) {
-		struct work_struct *work = list_entry(cwq->worklist.next,
+	while (!list_empty(&worker->scheduled)) {
+		struct work_struct *work = list_first_entry(&worker->scheduled,
 						struct work_struct, entry);
 		process_one_work(worker, work);
 	}
-	spin_unlock_irq(&cwq->lock);
 }
 
 /**
@@ -684,7 +737,28 @@ static int worker_thread(void *__worker)
 					    get_cpu_mask(cwq->cpu))))
 			set_cpus_allowed_ptr(worker->task,
 					     get_cpu_mask(cwq->cpu));
-		run_workqueue(worker);
+
+		spin_lock_irq(&cwq->lock);
+
+		while (!list_empty(&cwq->worklist)) {
+			struct work_struct *work =
+				list_first_entry(&cwq->worklist,
+						 struct work_struct, entry);
+
+			if (likely(!(*work_data_bits(work) &
+				     WORK_STRUCT_LINKED))) {
+				/* optimization path, not strictly necessary */
+				process_one_work(worker, work);
+				if (unlikely(!list_empty(&worker->scheduled)))
+					process_scheduled_works(worker);
+			} else {
+				move_linked_works(work, &worker->scheduled,
+						  NULL);
+				process_scheduled_works(worker);
+			}
+		}
+
+		spin_unlock_irq(&cwq->lock);
 	}
 
 	return 0;
@@ -705,16 +779,33 @@ static void wq_barrier_func(struct work_struct *work)
  * insert_wq_barrier - insert a barrier work
  * @cwq: cwq to insert barrier into
  * @barr: wq_barrier to insert
- * @head: insertion point
+ * @target: target work to attach @barr to
+ * @worker: worker currently executing @target, NULL if @target is not executing
  *
- * Insert barrier @barr into @cwq before @head.
+ * @barr is linked to @target such that @barr is completed only after
+ * @target finishes execution.  Please note that the ordering
+ * guarantee is observed only with respect to @target and on the local
+ * cpu.
+ *
+ * Currently, a queued barrier can't be canceled.  This is because
+ * try_to_grab_pending() can't determine whether the work to be
+ * grabbed is at the head of the queue and thus can't clear LINKED
+ * flag of the previous work while there must be a valid next work
+ * after a work with LINKED flag set.
+ *
+ * Note that when @worker is non-NULL, @target may be modified
+ * underneath us, so we can't reliably determine cwq from @target.
  *
  * CONTEXT:
  * spin_lock_irq(cwq->lock).
  */
 static void insert_wq_barrier(struct cpu_workqueue_struct *cwq,
-			struct wq_barrier *barr, struct list_head *head)
+			      struct wq_barrier *barr,
+			      struct work_struct *target, struct worker *worker)
 {
+	struct list_head *head;
+	unsigned int linked = 0;
+
 	/*
 	 * debugobject calls are safe here even with cwq->lock locked
 	 * as we know for sure that this will not trigger any of the
@@ -725,8 +816,24 @@ static void insert_wq_barrier(struct cpu_workqueue_struct *cwq,
 	__set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(&barr->work));
 	init_completion(&barr->done);
 
+	/*
+	 * If @target is currently being executed, schedule the
+	 * barrier to the worker; otherwise, put it after @target.
+	 */
+	if (worker)
+		head = worker->scheduled.next;
+	else {
+		unsigned long *bits = work_data_bits(target);
+
+		head = target->entry.next;
+		/* there can already be other linked works, inherit and set */
+		linked = *bits & WORK_STRUCT_LINKED;
+		__set_bit(WORK_STRUCT_LINKED_BIT, bits);
+	}
+
 	debug_work_activate(&barr->work);
-	insert_work(cwq, &barr->work, head, work_color_to_flags(WORK_NO_COLOR));
+	insert_work(cwq, &barr->work, head,
+		    work_color_to_flags(WORK_NO_COLOR) | linked);
 }
 
 /**
@@ -964,8 +1071,8 @@ EXPORT_SYMBOL_GPL(flush_workqueue);
  */
 int flush_work(struct work_struct *work)
 {
+	struct worker *worker = NULL;
 	struct cpu_workqueue_struct *cwq;
-	struct list_head *prev;
 	struct wq_barrier barr;
 
 	might_sleep();
@@ -985,14 +1092,14 @@ int flush_work(struct work_struct *work)
 		smp_rmb();
 		if (unlikely(cwq != get_wq_data(work)))
 			goto already_gone;
-		prev = &work->entry;
 	} else {
-		if (!cwq->worker || cwq->worker->current_work != work)
+		if (cwq->worker && cwq->worker->current_work == work)
+			worker = cwq->worker;
+		if (!worker)
 			goto already_gone;
-		prev = &cwq->worklist;
 	}
-	insert_wq_barrier(cwq, &barr, prev->next);
 
+	insert_wq_barrier(cwq, &barr, work, worker);
 	spin_unlock_irq(&cwq->lock);
 	wait_for_completion(&barr.done);
 	destroy_work_on_stack(&barr.work);
@@ -1048,16 +1155,19 @@ static void wait_on_cpu_work(struct cpu_workqueue_struct *cwq,
 				struct work_struct *work)
 {
 	struct wq_barrier barr;
-	int running = 0;
+	struct worker *worker;
 
 	spin_lock_irq(&cwq->lock);
+
+	worker = NULL;
 	if (unlikely(cwq->worker && cwq->worker->current_work == work)) {
-		insert_wq_barrier(cwq, &barr, cwq->worklist.next);
-		running = 1;
+		worker = cwq->worker;
+		insert_wq_barrier(cwq, &barr, work, worker);
 	}
+
 	spin_unlock_irq(&cwq->lock);
 
-	if (unlikely(running)) {
+	if (unlikely(worker)) {
 		wait_for_completion(&barr.done);
 		destroy_work_on_stack(&barr.work);
 	}
-- 
cgit v1.2.3-70-g09d2


From 1e19ffc63dbbaea7a7d1c63d99c38d3e5a4c7edf Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 29 Jun 2010 10:07:12 +0200
Subject: workqueue: implement per-cwq active work limit

Add cwq->nr_active, cwq->max_active and cwq->delayed_work.  nr_active
counts the number of active works per cwq.  A work is active if it's
flushable (colored) and is on cwq's worklist.  If nr_active reaches
max_active, new works are queued on cwq->delayed_work and activated
later as works on the cwq complete and decrement nr_active.

cwq->max_active can be specified via the new @max_active parameter to
__create_workqueue() and is set to 1 for all workqueues for now.  As
each cwq has only single worker now, this double queueing doesn't
cause any behavior difference visible to its users.

This will be used to reimplement freeze/thaw and implement shared
worker pool.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h | 18 +++++++++---------
 kernel/workqueue.c        | 39 +++++++++++++++++++++++++++++++++++++--
 2 files changed, 46 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 4f4fdba722c..eb753b7790e 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -225,11 +225,11 @@ enum {
 };
 
 extern struct workqueue_struct *
-__create_workqueue_key(const char *name, unsigned int flags,
+__create_workqueue_key(const char *name, unsigned int flags, int max_active,
 		       struct lock_class_key *key, const char *lock_name);
 
 #ifdef CONFIG_LOCKDEP
-#define __create_workqueue(name, flags)				\
+#define __create_workqueue(name, flags, max_active)		\
 ({								\
 	static struct lock_class_key __key;			\
 	const char *__lock_name;				\
@@ -239,20 +239,20 @@ __create_workqueue_key(const char *name, unsigned int flags,
 	else							\
 		__lock_name = #name;				\
 								\
-	__create_workqueue_key((name), (flags), &__key,		\
-			       __lock_name);			\
+	__create_workqueue_key((name), (flags), (max_active),	\
+				&__key, __lock_name);		\
 })
 #else
-#define __create_workqueue(name, flags)				\
-	__create_workqueue_key((name), (flags), NULL, NULL)
+#define __create_workqueue(name, flags, max_active)		\
+	__create_workqueue_key((name), (flags), (max_active), NULL, NULL)
 #endif
 
 #define create_workqueue(name)					\
-	__create_workqueue((name), 0)
+	__create_workqueue((name), 0, 1)
 #define create_freezeable_workqueue(name)			\
-	__create_workqueue((name), WQ_FREEZEABLE | WQ_SINGLE_THREAD)
+	__create_workqueue((name), WQ_FREEZEABLE | WQ_SINGLE_THREAD, 1)
 #define create_singlethread_workqueue(name)			\
-	__create_workqueue((name), WQ_SINGLE_THREAD)
+	__create_workqueue((name), WQ_SINGLE_THREAD, 1)
 
 extern void destroy_workqueue(struct workqueue_struct *wq);
 
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 9953d3c7bd1..e541b5db67d 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -77,6 +77,9 @@ struct cpu_workqueue_struct {
 	int			flush_color;	/* L: flushing color */
 	int			nr_in_flight[WORK_NR_COLORS];
 						/* L: nr of in_flight works */
+	int			nr_active;	/* L: nr of active works */
+	int			max_active;	/* I: max active works */
+	struct list_head	delayed_works;	/* L: delayed works */
 };
 
 /*
@@ -321,14 +324,24 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
 			 struct work_struct *work)
 {
 	struct cpu_workqueue_struct *cwq = target_cwq(cpu, wq);
+	struct list_head *worklist;
 	unsigned long flags;
 
 	debug_work_activate(work);
+
 	spin_lock_irqsave(&cwq->lock, flags);
 	BUG_ON(!list_empty(&work->entry));
+
 	cwq->nr_in_flight[cwq->work_color]++;
-	insert_work(cwq, work, &cwq->worklist,
-		    work_color_to_flags(cwq->work_color));
+
+	if (likely(cwq->nr_active < cwq->max_active)) {
+		cwq->nr_active++;
+		worklist = &cwq->worklist;
+	} else
+		worklist = &cwq->delayed_works;
+
+	insert_work(cwq, work, worklist, work_color_to_flags(cwq->work_color));
+
 	spin_unlock_irqrestore(&cwq->lock, flags);
 }
 
@@ -584,6 +597,15 @@ static void move_linked_works(struct work_struct *work, struct list_head *head,
 		*nextp = n;
 }
 
+static void cwq_activate_first_delayed(struct cpu_workqueue_struct *cwq)
+{
+	struct work_struct *work = list_first_entry(&cwq->delayed_works,
+						    struct work_struct, entry);
+
+	move_linked_works(work, &cwq->worklist, NULL);
+	cwq->nr_active++;
+}
+
 /**
  * cwq_dec_nr_in_flight - decrement cwq's nr_in_flight
  * @cwq: cwq of interest
@@ -602,6 +624,12 @@ static void cwq_dec_nr_in_flight(struct cpu_workqueue_struct *cwq, int color)
 		return;
 
 	cwq->nr_in_flight[color]--;
+	cwq->nr_active--;
+
+	/* one down, submit a delayed one */
+	if (!list_empty(&cwq->delayed_works) &&
+	    cwq->nr_active < cwq->max_active)
+		cwq_activate_first_delayed(cwq);
 
 	/* is flush in progress and are we at the flushing tip? */
 	if (likely(cwq->flush_color != color))
@@ -1505,6 +1533,7 @@ static void free_cwqs(struct cpu_workqueue_struct *cwqs)
 
 struct workqueue_struct *__create_workqueue_key(const char *name,
 						unsigned int flags,
+						int max_active,
 						struct lock_class_key *key,
 						const char *lock_name)
 {
@@ -1513,6 +1542,8 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
 	bool failed = false;
 	unsigned int cpu;
 
+	max_active = clamp_val(max_active, 1, INT_MAX);
+
 	wq = kzalloc(sizeof(*wq), GFP_KERNEL);
 	if (!wq)
 		goto err;
@@ -1544,8 +1575,10 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
 		cwq->cpu = cpu;
 		cwq->wq = wq;
 		cwq->flush_color = -1;
+		cwq->max_active = max_active;
 		spin_lock_init(&cwq->lock);
 		INIT_LIST_HEAD(&cwq->worklist);
+		INIT_LIST_HEAD(&cwq->delayed_works);
 		init_waitqueue_head(&cwq->more_work);
 
 		if (failed)
@@ -1607,6 +1640,8 @@ void destroy_workqueue(struct workqueue_struct *wq)
 
 		for (i = 0; i < WORK_NR_COLORS; i++)
 			BUG_ON(cwq->nr_in_flight[i]);
+		BUG_ON(cwq->nr_active);
+		BUG_ON(!list_empty(&cwq->delayed_works));
 	}
 
 	free_cwqs(wq->cpu_wq);
-- 
cgit v1.2.3-70-g09d2


From a0a1a5fd4fb15ec61117c759fe9f5c16c53d9e9c Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 29 Jun 2010 10:07:12 +0200
Subject: workqueue: reimplement workqueue freeze using max_active

Currently, workqueue freezing is implemented by marking the worker
freezeable and calling try_to_freeze() from dispatch loop.
Reimplement it using cwq->limit so that the workqueue is frozen
instead of the worker.

* workqueue_struct->saved_max_active is added which stores the
  specified max_active on initialization.

* On freeze, all cwq->max_active's are quenched to zero.  Freezing is
  complete when nr_active on all cwqs reach zero.

* On thaw, all cwq->max_active's are restored to wq->saved_max_active
  and the worklist is repopulated.

This new implementation allows having single shared pool of workers
per cpu.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h |   7 ++
 kernel/power/process.c    |  21 +++++-
 kernel/workqueue.c        | 163 +++++++++++++++++++++++++++++++++++++++++++---
 3 files changed, 179 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index eb753b7790e..ab0b7fb99bc 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -340,4 +340,11 @@ static inline long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg)
 #else
 long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg);
 #endif /* CONFIG_SMP */
+
+#ifdef CONFIG_FREEZER
+extern void freeze_workqueues_begin(void);
+extern bool freeze_workqueues_busy(void);
+extern void thaw_workqueues(void);
+#endif /* CONFIG_FREEZER */
+
 #endif
diff --git a/kernel/power/process.c b/kernel/power/process.c
index 71ae29052ab..028a99598f4 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -15,6 +15,7 @@
 #include <linux/syscalls.h>
 #include <linux/freezer.h>
 #include <linux/delay.h>
+#include <linux/workqueue.h>
 
 /* 
  * Timeout for stopping processes
@@ -35,6 +36,7 @@ static int try_to_freeze_tasks(bool sig_only)
 	struct task_struct *g, *p;
 	unsigned long end_time;
 	unsigned int todo;
+	bool wq_busy = false;
 	struct timeval start, end;
 	u64 elapsed_csecs64;
 	unsigned int elapsed_csecs;
@@ -42,6 +44,10 @@ static int try_to_freeze_tasks(bool sig_only)
 	do_gettimeofday(&start);
 
 	end_time = jiffies + TIMEOUT;
+
+	if (!sig_only)
+		freeze_workqueues_begin();
+
 	while (true) {
 		todo = 0;
 		read_lock(&tasklist_lock);
@@ -63,6 +69,12 @@ static int try_to_freeze_tasks(bool sig_only)
 				todo++;
 		} while_each_thread(g, p);
 		read_unlock(&tasklist_lock);
+
+		if (!sig_only) {
+			wq_busy = freeze_workqueues_busy();
+			todo += wq_busy;
+		}
+
 		if (!todo || time_after(jiffies, end_time))
 			break;
 
@@ -86,8 +98,12 @@ static int try_to_freeze_tasks(bool sig_only)
 		 */
 		printk("\n");
 		printk(KERN_ERR "Freezing of tasks failed after %d.%02d seconds "
-				"(%d tasks refusing to freeze):\n",
-				elapsed_csecs / 100, elapsed_csecs % 100, todo);
+		       "(%d tasks refusing to freeze, wq_busy=%d):\n",
+		       elapsed_csecs / 100, elapsed_csecs % 100,
+		       todo - wq_busy, wq_busy);
+
+		thaw_workqueues();
+
 		read_lock(&tasklist_lock);
 		do_each_thread(g, p) {
 			task_lock(p);
@@ -157,6 +173,7 @@ void thaw_processes(void)
 	oom_killer_enable();
 
 	printk("Restarting tasks ... ");
+	thaw_workqueues();
 	thaw_tasks(true);
 	thaw_tasks(false);
 	schedule();
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index e541b5db67d..4d059c53279 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -78,7 +78,7 @@ struct cpu_workqueue_struct {
 	int			nr_in_flight[WORK_NR_COLORS];
 						/* L: nr of in_flight works */
 	int			nr_active;	/* L: nr of active works */
-	int			max_active;	/* I: max active works */
+	int			max_active;	/* L: max active works */
 	struct list_head	delayed_works;	/* L: delayed works */
 };
 
@@ -108,6 +108,7 @@ struct workqueue_struct {
 	struct list_head	flusher_queue;	/* F: flush waiters */
 	struct list_head	flusher_overflow; /* F: flush overflow list */
 
+	int			saved_max_active; /* I: saved cwq max_active */
 	const char		*name;		/* I: workqueue name */
 #ifdef CONFIG_LOCKDEP
 	struct lockdep_map	lockdep_map;
@@ -228,6 +229,7 @@ static inline void debug_work_deactivate(struct work_struct *work) { }
 static DEFINE_SPINLOCK(workqueue_lock);
 static LIST_HEAD(workqueues);
 static DEFINE_PER_CPU(struct ida, worker_ida);
+static bool workqueue_freezing;		/* W: have wqs started freezing? */
 
 static int worker_thread(void *__worker);
 
@@ -745,19 +747,13 @@ static int worker_thread(void *__worker)
 	struct cpu_workqueue_struct *cwq = worker->cwq;
 	DEFINE_WAIT(wait);
 
-	if (cwq->wq->flags & WQ_FREEZEABLE)
-		set_freezable();
-
 	for (;;) {
 		prepare_to_wait(&cwq->more_work, &wait, TASK_INTERRUPTIBLE);
-		if (!freezing(current) &&
-		    !kthread_should_stop() &&
+		if (!kthread_should_stop() &&
 		    list_empty(&cwq->worklist))
 			schedule();
 		finish_wait(&cwq->more_work, &wait);
 
-		try_to_freeze();
-
 		if (kthread_should_stop())
 			break;
 
@@ -1553,6 +1549,7 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
 		goto err;
 
 	wq->flags = flags;
+	wq->saved_max_active = max_active;
 	mutex_init(&wq->flush_mutex);
 	atomic_set(&wq->nr_cwqs_to_flush, 0);
 	INIT_LIST_HEAD(&wq->flusher_queue);
@@ -1591,8 +1588,19 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
 			failed = true;
 	}
 
+	/*
+	 * workqueue_lock protects global freeze state and workqueues
+	 * list.  Grab it, set max_active accordingly and add the new
+	 * workqueue to workqueues list.
+	 */
 	spin_lock(&workqueue_lock);
+
+	if (workqueue_freezing && wq->flags & WQ_FREEZEABLE)
+		for_each_possible_cpu(cpu)
+			get_cwq(cpu, wq)->max_active = 0;
+
 	list_add(&wq->list, &workqueues);
+
 	spin_unlock(&workqueue_lock);
 
 	cpu_maps_update_done();
@@ -1621,14 +1629,18 @@ void destroy_workqueue(struct workqueue_struct *wq)
 {
 	int cpu;
 
+	flush_workqueue(wq);
+
+	/*
+	 * wq list is used to freeze wq, remove from list after
+	 * flushing is complete in case freeze races us.
+	 */
 	cpu_maps_update_begin();
 	spin_lock(&workqueue_lock);
 	list_del(&wq->list);
 	spin_unlock(&workqueue_lock);
 	cpu_maps_update_done();
 
-	flush_workqueue(wq);
-
 	for_each_possible_cpu(cpu) {
 		struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
 		int i;
@@ -1722,6 +1734,137 @@ long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg)
 EXPORT_SYMBOL_GPL(work_on_cpu);
 #endif /* CONFIG_SMP */
 
+#ifdef CONFIG_FREEZER
+
+/**
+ * freeze_workqueues_begin - begin freezing workqueues
+ *
+ * Start freezing workqueues.  After this function returns, all
+ * freezeable workqueues will queue new works to their frozen_works
+ * list instead of the cwq ones.
+ *
+ * CONTEXT:
+ * Grabs and releases workqueue_lock and cwq->lock's.
+ */
+void freeze_workqueues_begin(void)
+{
+	struct workqueue_struct *wq;
+	unsigned int cpu;
+
+	spin_lock(&workqueue_lock);
+
+	BUG_ON(workqueue_freezing);
+	workqueue_freezing = true;
+
+	for_each_possible_cpu(cpu) {
+		list_for_each_entry(wq, &workqueues, list) {
+			struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
+
+			spin_lock_irq(&cwq->lock);
+
+			if (wq->flags & WQ_FREEZEABLE)
+				cwq->max_active = 0;
+
+			spin_unlock_irq(&cwq->lock);
+		}
+	}
+
+	spin_unlock(&workqueue_lock);
+}
+
+/**
+ * freeze_workqueues_busy - are freezeable workqueues still busy?
+ *
+ * Check whether freezing is complete.  This function must be called
+ * between freeze_workqueues_begin() and thaw_workqueues().
+ *
+ * CONTEXT:
+ * Grabs and releases workqueue_lock.
+ *
+ * RETURNS:
+ * %true if some freezeable workqueues are still busy.  %false if
+ * freezing is complete.
+ */
+bool freeze_workqueues_busy(void)
+{
+	struct workqueue_struct *wq;
+	unsigned int cpu;
+	bool busy = false;
+
+	spin_lock(&workqueue_lock);
+
+	BUG_ON(!workqueue_freezing);
+
+	for_each_possible_cpu(cpu) {
+		/*
+		 * nr_active is monotonically decreasing.  It's safe
+		 * to peek without lock.
+		 */
+		list_for_each_entry(wq, &workqueues, list) {
+			struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
+
+			if (!(wq->flags & WQ_FREEZEABLE))
+				continue;
+
+			BUG_ON(cwq->nr_active < 0);
+			if (cwq->nr_active) {
+				busy = true;
+				goto out_unlock;
+			}
+		}
+	}
+out_unlock:
+	spin_unlock(&workqueue_lock);
+	return busy;
+}
+
+/**
+ * thaw_workqueues - thaw workqueues
+ *
+ * Thaw workqueues.  Normal queueing is restored and all collected
+ * frozen works are transferred to their respective cwq worklists.
+ *
+ * CONTEXT:
+ * Grabs and releases workqueue_lock and cwq->lock's.
+ */
+void thaw_workqueues(void)
+{
+	struct workqueue_struct *wq;
+	unsigned int cpu;
+
+	spin_lock(&workqueue_lock);
+
+	if (!workqueue_freezing)
+		goto out_unlock;
+
+	for_each_possible_cpu(cpu) {
+		list_for_each_entry(wq, &workqueues, list) {
+			struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
+
+			if (!(wq->flags & WQ_FREEZEABLE))
+				continue;
+
+			spin_lock_irq(&cwq->lock);
+
+			/* restore max_active and repopulate worklist */
+			cwq->max_active = wq->saved_max_active;
+
+			while (!list_empty(&cwq->delayed_works) &&
+			       cwq->nr_active < cwq->max_active)
+				cwq_activate_first_delayed(cwq);
+
+			wake_up(&cwq->more_work);
+
+			spin_unlock_irq(&cwq->lock);
+		}
+	}
+
+	workqueue_freezing = false;
+out_unlock:
+	spin_unlock(&workqueue_lock);
+}
+#endif /* CONFIG_FREEZER */
+
 void __init init_workqueues(void)
 {
 	unsigned int cpu;
-- 
cgit v1.2.3-70-g09d2


From db7bccf45cb87522096b8f43144e31ca605a9f24 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 29 Jun 2010 10:07:12 +0200
Subject: workqueue: reimplement CPU hotplugging support using trustee

Reimplement CPU hotplugging support using trustee thread.  On CPU
down, a trustee thread is created and each step of CPU down is
executed by the trustee and workqueue_cpu_callback() simply drives and
waits for trustee state transitions.

CPU down operation no longer waits for works to be drained but trustee
sticks around till all pending works have been completed.  If CPU is
brought back up while works are still draining,
workqueue_cpu_callback() tells trustee to step down and tell workers
to rebind to the cpu.

As it's difficult to tell whether cwqs are empty if it's freezing or
frozen, trustee doesn't consider draining to be complete while a gcwq
is freezing or frozen (tracked by new GCWQ_FREEZING flag).  Also,
workers which get unbound from their cpu are marked with WORKER_ROGUE.

Trustee based implementation doesn't bring any new feature at this
point but it will be used to manage worker pool when dynamic shared
worker pool is implemented.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/cpu.h |   2 +
 kernel/workqueue.c  | 293 +++++++++++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 279 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index de6b1722cdc..4823af64e9d 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -71,6 +71,8 @@ enum {
 	/* migration should happen before other stuff but after perf */
 	CPU_PRI_PERF		= 20,
 	CPU_PRI_MIGRATION	= 10,
+	/* prepare workqueues for other notifiers */
+	CPU_PRI_WORKQUEUE	= 5,
 };
 
 #ifdef CONFIG_SMP
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index d64913aa486..f57855f718d 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -36,14 +36,27 @@
 #include <linux/idr.h>
 
 enum {
+	/* global_cwq flags */
+	GCWQ_FREEZING		= 1 << 3,	/* freeze in progress */
+
 	/* worker flags */
 	WORKER_STARTED		= 1 << 0,	/* started */
 	WORKER_DIE		= 1 << 1,	/* die die die */
 	WORKER_IDLE		= 1 << 2,	/* is idle */
+	WORKER_ROGUE		= 1 << 4,	/* not bound to any cpu */
+
+	/* gcwq->trustee_state */
+	TRUSTEE_START		= 0,		/* start */
+	TRUSTEE_IN_CHARGE	= 1,		/* trustee in charge of gcwq */
+	TRUSTEE_BUTCHER		= 2,		/* butcher workers */
+	TRUSTEE_RELEASE		= 3,		/* release workers */
+	TRUSTEE_DONE		= 4,		/* trustee is done */
 
 	BUSY_WORKER_HASH_ORDER	= 6,		/* 64 pointers */
 	BUSY_WORKER_HASH_SIZE	= 1 << BUSY_WORKER_HASH_ORDER,
 	BUSY_WORKER_HASH_MASK	= BUSY_WORKER_HASH_SIZE - 1,
+
+	TRUSTEE_COOLDOWN	= HZ / 10,	/* for trustee draining */
 };
 
 /*
@@ -83,6 +96,7 @@ struct worker {
 struct global_cwq {
 	spinlock_t		lock;		/* the gcwq lock */
 	unsigned int		cpu;		/* I: the associated cpu */
+	unsigned int		flags;		/* L: GCWQ_* flags */
 
 	int			nr_workers;	/* L: total number of workers */
 	int			nr_idle;	/* L: currently idle ones */
@@ -93,6 +107,10 @@ struct global_cwq {
 						/* L: hash of busy workers */
 
 	struct ida		worker_ida;	/* L: for worker IDs */
+
+	struct task_struct	*trustee;	/* L: for gcwq shutdown */
+	unsigned int		trustee_state;	/* L: trustee state */
+	wait_queue_head_t	trustee_wait;	/* trustee wait */
 } ____cacheline_aligned_in_smp;
 
 /*
@@ -148,6 +166,10 @@ struct workqueue_struct {
 #endif
 };
 
+#define for_each_busy_worker(worker, i, pos, gcwq)			\
+	for (i = 0; i < BUSY_WORKER_HASH_SIZE; i++)			\
+		hlist_for_each_entry(worker, pos, &gcwq->busy_hash[i], hentry)
+
 #ifdef CONFIG_DEBUG_OBJECTS_WORK
 
 static struct debug_obj_descr work_debug_descr;
@@ -546,6 +568,9 @@ static void worker_enter_idle(struct worker *worker)
 
 	/* idle_list is LIFO */
 	list_add(&worker->entry, &gcwq->idle_list);
+
+	if (unlikely(worker->flags & WORKER_ROGUE))
+		wake_up_all(&gcwq->trustee_wait);
 }
 
 /**
@@ -622,8 +647,15 @@ static struct worker *create_worker(struct cpu_workqueue_struct *cwq, bool bind)
 	if (IS_ERR(worker->task))
 		goto fail;
 
+	/*
+	 * A rogue worker will become a regular one if CPU comes
+	 * online later on.  Make sure every worker has
+	 * PF_THREAD_BOUND set.
+	 */
 	if (bind)
 		kthread_bind(worker->task, gcwq->cpu);
+	else
+		worker->task->flags |= PF_THREAD_BOUND;
 
 	return worker;
 fail:
@@ -882,10 +914,6 @@ static int worker_thread(void *__worker)
 	struct cpu_workqueue_struct *cwq = worker->cwq;
 
 woke_up:
-	if (unlikely(!cpumask_equal(&worker->task->cpus_allowed,
-				    get_cpu_mask(gcwq->cpu))))
-		set_cpus_allowed_ptr(worker->task, get_cpu_mask(gcwq->cpu));
-
 	spin_lock_irq(&gcwq->lock);
 
 	/* DIE can be set only while we're idle, checking here is enough */
@@ -895,7 +923,7 @@ woke_up:
 	}
 
 	worker_leave_idle(worker);
-
+recheck:
 	/*
 	 * ->scheduled list can only be filled while a worker is
 	 * preparing to process a work or actually processing it.
@@ -908,6 +936,22 @@ woke_up:
 			list_first_entry(&cwq->worklist,
 					 struct work_struct, entry);
 
+		/*
+		 * The following is a rather inefficient way to close
+		 * race window against cpu hotplug operations.  Will
+		 * be replaced soon.
+		 */
+		if (unlikely(!(worker->flags & WORKER_ROGUE) &&
+			     !cpumask_equal(&worker->task->cpus_allowed,
+					    get_cpu_mask(gcwq->cpu)))) {
+			spin_unlock_irq(&gcwq->lock);
+			set_cpus_allowed_ptr(worker->task,
+					     get_cpu_mask(gcwq->cpu));
+			cpu_relax();
+			spin_lock_irq(&gcwq->lock);
+			goto recheck;
+		}
+
 		if (likely(!(*work_data_bits(work) & WORK_STRUCT_LINKED))) {
 			/* optimization path, not strictly necessary */
 			process_one_work(worker, work);
@@ -1812,29 +1856,237 @@ void destroy_workqueue(struct workqueue_struct *wq)
 }
 EXPORT_SYMBOL_GPL(destroy_workqueue);
 
+/*
+ * CPU hotplug.
+ *
+ * CPU hotplug is implemented by allowing cwqs to be detached from
+ * CPU, running with unbound workers and allowing them to be
+ * reattached later if the cpu comes back online.  A separate thread
+ * is created to govern cwqs in such state and is called the trustee.
+ *
+ * Trustee states and their descriptions.
+ *
+ * START	Command state used on startup.  On CPU_DOWN_PREPARE, a
+ *		new trustee is started with this state.
+ *
+ * IN_CHARGE	Once started, trustee will enter this state after
+ *		making all existing workers rogue.  DOWN_PREPARE waits
+ *		for trustee to enter this state.  After reaching
+ *		IN_CHARGE, trustee tries to execute the pending
+ *		worklist until it's empty and the state is set to
+ *		BUTCHER, or the state is set to RELEASE.
+ *
+ * BUTCHER	Command state which is set by the cpu callback after
+ *		the cpu has went down.  Once this state is set trustee
+ *		knows that there will be no new works on the worklist
+ *		and once the worklist is empty it can proceed to
+ *		killing idle workers.
+ *
+ * RELEASE	Command state which is set by the cpu callback if the
+ *		cpu down has been canceled or it has come online
+ *		again.  After recognizing this state, trustee stops
+ *		trying to drain or butcher and transits to DONE.
+ *
+ * DONE		Trustee will enter this state after BUTCHER or RELEASE
+ *		is complete.
+ *
+ *          trustee                 CPU                draining
+ *         took over                down               complete
+ * START -----------> IN_CHARGE -----------> BUTCHER -----------> DONE
+ *                        |                     |                  ^
+ *                        | CPU is back online  v   return workers |
+ *                         ----------------> RELEASE --------------
+ */
+
+/**
+ * trustee_wait_event_timeout - timed event wait for trustee
+ * @cond: condition to wait for
+ * @timeout: timeout in jiffies
+ *
+ * wait_event_timeout() for trustee to use.  Handles locking and
+ * checks for RELEASE request.
+ *
+ * CONTEXT:
+ * spin_lock_irq(gcwq->lock) which may be released and regrabbed
+ * multiple times.  To be used by trustee.
+ *
+ * RETURNS:
+ * Positive indicating left time if @cond is satisfied, 0 if timed
+ * out, -1 if canceled.
+ */
+#define trustee_wait_event_timeout(cond, timeout) ({			\
+	long __ret = (timeout);						\
+	while (!((cond) || (gcwq->trustee_state == TRUSTEE_RELEASE)) &&	\
+	       __ret) {							\
+		spin_unlock_irq(&gcwq->lock);				\
+		__wait_event_timeout(gcwq->trustee_wait, (cond) ||	\
+			(gcwq->trustee_state == TRUSTEE_RELEASE),	\
+			__ret);						\
+		spin_lock_irq(&gcwq->lock);				\
+	}								\
+	gcwq->trustee_state == TRUSTEE_RELEASE ? -1 : (__ret);		\
+})
+
+/**
+ * trustee_wait_event - event wait for trustee
+ * @cond: condition to wait for
+ *
+ * wait_event() for trustee to use.  Automatically handles locking and
+ * checks for CANCEL request.
+ *
+ * CONTEXT:
+ * spin_lock_irq(gcwq->lock) which may be released and regrabbed
+ * multiple times.  To be used by trustee.
+ *
+ * RETURNS:
+ * 0 if @cond is satisfied, -1 if canceled.
+ */
+#define trustee_wait_event(cond) ({					\
+	long __ret1;							\
+	__ret1 = trustee_wait_event_timeout(cond, MAX_SCHEDULE_TIMEOUT);\
+	__ret1 < 0 ? -1 : 0;						\
+})
+
+static int __cpuinit trustee_thread(void *__gcwq)
+{
+	struct global_cwq *gcwq = __gcwq;
+	struct worker *worker;
+	struct hlist_node *pos;
+	int i;
+
+	BUG_ON(gcwq->cpu != smp_processor_id());
+
+	spin_lock_irq(&gcwq->lock);
+	/*
+	 * Make all multithread workers rogue.  Trustee must be bound
+	 * to the target cpu and can't be cancelled.
+	 */
+	BUG_ON(gcwq->cpu != smp_processor_id());
+
+	list_for_each_entry(worker, &gcwq->idle_list, entry)
+		if (!(worker->cwq->wq->flags & WQ_SINGLE_THREAD))
+			worker->flags |= WORKER_ROGUE;
+
+	for_each_busy_worker(worker, i, pos, gcwq)
+		if (!(worker->cwq->wq->flags & WQ_SINGLE_THREAD))
+			worker->flags |= WORKER_ROGUE;
+
+	/*
+	 * We're now in charge.  Notify and proceed to drain.  We need
+	 * to keep the gcwq running during the whole CPU down
+	 * procedure as other cpu hotunplug callbacks may need to
+	 * flush currently running tasks.
+	 */
+	gcwq->trustee_state = TRUSTEE_IN_CHARGE;
+	wake_up_all(&gcwq->trustee_wait);
+
+	/*
+	 * The original cpu is in the process of dying and may go away
+	 * anytime now.  When that happens, we and all workers would
+	 * be migrated to other cpus.  Try draining any left work.
+	 * Note that if the gcwq is frozen, there may be frozen works
+	 * in freezeable cwqs.  Don't declare completion while frozen.
+	 */
+	while (gcwq->nr_workers != gcwq->nr_idle ||
+	       gcwq->flags & GCWQ_FREEZING ||
+	       gcwq->trustee_state == TRUSTEE_IN_CHARGE) {
+		/* give a breather */
+		if (trustee_wait_event_timeout(false, TRUSTEE_COOLDOWN) < 0)
+			break;
+	}
+
+	/* notify completion */
+	gcwq->trustee = NULL;
+	gcwq->trustee_state = TRUSTEE_DONE;
+	wake_up_all(&gcwq->trustee_wait);
+	spin_unlock_irq(&gcwq->lock);
+	return 0;
+}
+
+/**
+ * wait_trustee_state - wait for trustee to enter the specified state
+ * @gcwq: gcwq the trustee of interest belongs to
+ * @state: target state to wait for
+ *
+ * Wait for the trustee to reach @state.  DONE is already matched.
+ *
+ * CONTEXT:
+ * spin_lock_irq(gcwq->lock) which may be released and regrabbed
+ * multiple times.  To be used by cpu_callback.
+ */
+static void __cpuinit wait_trustee_state(struct global_cwq *gcwq, int state)
+{
+	if (!(gcwq->trustee_state == state ||
+	      gcwq->trustee_state == TRUSTEE_DONE)) {
+		spin_unlock_irq(&gcwq->lock);
+		__wait_event(gcwq->trustee_wait,
+			     gcwq->trustee_state == state ||
+			     gcwq->trustee_state == TRUSTEE_DONE);
+		spin_lock_irq(&gcwq->lock);
+	}
+}
+
 static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
 						unsigned long action,
 						void *hcpu)
 {
 	unsigned int cpu = (unsigned long)hcpu;
-	struct cpu_workqueue_struct *cwq;
-	struct workqueue_struct *wq;
+	struct global_cwq *gcwq = get_gcwq(cpu);
+	struct task_struct *new_trustee = NULL;
+	struct worker *worker;
+	struct hlist_node *pos;
+	unsigned long flags;
+	int i;
 
 	action &= ~CPU_TASKS_FROZEN;
 
-	list_for_each_entry(wq, &workqueues, list) {
-		if (wq->flags & WQ_SINGLE_THREAD)
-			continue;
+	switch (action) {
+	case CPU_DOWN_PREPARE:
+		new_trustee = kthread_create(trustee_thread, gcwq,
+					     "workqueue_trustee/%d\n", cpu);
+		if (IS_ERR(new_trustee))
+			return notifier_from_errno(PTR_ERR(new_trustee));
+		kthread_bind(new_trustee, cpu);
+	}
 
-		cwq = get_cwq(cpu, wq);
+	/* some are called w/ irq disabled, don't disturb irq status */
+	spin_lock_irqsave(&gcwq->lock, flags);
 
-		switch (action) {
-		case CPU_POST_DEAD:
-			flush_workqueue(wq);
-			break;
+	switch (action) {
+	case CPU_DOWN_PREPARE:
+		/* initialize trustee and tell it to acquire the gcwq */
+		BUG_ON(gcwq->trustee || gcwq->trustee_state != TRUSTEE_DONE);
+		gcwq->trustee = new_trustee;
+		gcwq->trustee_state = TRUSTEE_START;
+		wake_up_process(gcwq->trustee);
+		wait_trustee_state(gcwq, TRUSTEE_IN_CHARGE);
+		break;
+
+	case CPU_POST_DEAD:
+		gcwq->trustee_state = TRUSTEE_BUTCHER;
+		break;
+
+	case CPU_DOWN_FAILED:
+	case CPU_ONLINE:
+		if (gcwq->trustee_state != TRUSTEE_DONE) {
+			gcwq->trustee_state = TRUSTEE_RELEASE;
+			wake_up_process(gcwq->trustee);
+			wait_trustee_state(gcwq, TRUSTEE_DONE);
 		}
+
+		/* clear ROGUE from all multithread workers */
+		list_for_each_entry(worker, &gcwq->idle_list, entry)
+			if (!(worker->cwq->wq->flags & WQ_SINGLE_THREAD))
+				worker->flags &= ~WORKER_ROGUE;
+
+		for_each_busy_worker(worker, i, pos, gcwq)
+			if (!(worker->cwq->wq->flags & WQ_SINGLE_THREAD))
+				worker->flags &= ~WORKER_ROGUE;
+		break;
 	}
 
+	spin_unlock_irqrestore(&gcwq->lock, flags);
+
 	return notifier_from_errno(0);
 }
 
@@ -1912,6 +2164,9 @@ void freeze_workqueues_begin(void)
 
 		spin_lock_irq(&gcwq->lock);
 
+		BUG_ON(gcwq->flags & GCWQ_FREEZING);
+		gcwq->flags |= GCWQ_FREEZING;
+
 		list_for_each_entry(wq, &workqueues, list) {
 			struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
 
@@ -1995,6 +2250,9 @@ void thaw_workqueues(void)
 
 		spin_lock_irq(&gcwq->lock);
 
+		BUG_ON(!(gcwq->flags & GCWQ_FREEZING));
+		gcwq->flags &= ~GCWQ_FREEZING;
+
 		list_for_each_entry(wq, &workqueues, list) {
 			struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
 
@@ -2026,7 +2284,7 @@ void __init init_workqueues(void)
 	int i;
 
 	singlethread_cpu = cpumask_first(cpu_possible_mask);
-	hotcpu_notifier(workqueue_cpu_callback, 0);
+	hotcpu_notifier(workqueue_cpu_callback, CPU_PRI_WORKQUEUE);
 
 	/* initialize gcwqs */
 	for_each_possible_cpu(cpu) {
@@ -2040,6 +2298,9 @@ void __init init_workqueues(void)
 			INIT_HLIST_HEAD(&gcwq->busy_hash[i]);
 
 		ida_init(&gcwq->worker_ida);
+
+		gcwq->trustee_state = TRUSTEE_DONE;
+		init_waitqueue_head(&gcwq->trustee_wait);
 	}
 
 	keventd_wq = create_workqueue("events");
-- 
cgit v1.2.3-70-g09d2


From 502ca9d819792e7d79b6e002afe9094c641fe410 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 29 Jun 2010 10:07:13 +0200
Subject: workqueue: make single thread workqueue shared worker pool friendly

Reimplement st (single thread) workqueue so that it's friendly to
shared worker pool.  It was originally implemented by confining st
workqueues to use cwq of a fixed cpu and always having a worker for
the cpu.  This implementation isn't very friendly to shared worker
pool and suboptimal in that it ends up crossing cpu boundaries often.

Reimplement st workqueue using dynamic single cpu binding and
cwq->limit.  WQ_SINGLE_THREAD is replaced with WQ_SINGLE_CPU.  In a
single cpu workqueue, at most single cwq is bound to the wq at any
given time.  Arbitration is done using atomic accesses to
wq->single_cpu when queueing a work.  Once bound, the binding stays
till the workqueue is drained.

Note that the binding is never broken while a workqueue is frozen.
This is because idle cwqs may have works waiting in delayed_works
queue while frozen.  On thaw, the cwq is restarted if there are any
delayed works or unbound otherwise.

When combined with max_active limit of 1, single cpu workqueue has
exactly the same execution properties as the original single thread
workqueue while allowing sharing of per-cpu workers.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h |   6 +--
 kernel/workqueue.c        | 135 ++++++++++++++++++++++++++++++++++------------
 2 files changed, 103 insertions(+), 38 deletions(-)

(limited to 'include')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index ab0b7fb99bc..10611f7fc80 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -221,7 +221,7 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; }
 
 enum {
 	WQ_FREEZEABLE		= 1 << 0, /* freeze during suspend */
-	WQ_SINGLE_THREAD	= 1 << 1, /* no per-cpu worker */
+	WQ_SINGLE_CPU		= 1 << 1, /* only single cpu at a time */
 };
 
 extern struct workqueue_struct *
@@ -250,9 +250,9 @@ __create_workqueue_key(const char *name, unsigned int flags, int max_active,
 #define create_workqueue(name)					\
 	__create_workqueue((name), 0, 1)
 #define create_freezeable_workqueue(name)			\
-	__create_workqueue((name), WQ_FREEZEABLE | WQ_SINGLE_THREAD, 1)
+	__create_workqueue((name), WQ_FREEZEABLE | WQ_SINGLE_CPU, 1)
 #define create_singlethread_workqueue(name)			\
-	__create_workqueue((name), WQ_SINGLE_THREAD, 1)
+	__create_workqueue((name), WQ_SINGLE_CPU, 1)
 
 extern void destroy_workqueue(struct workqueue_struct *wq);
 
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index f57855f718d..cfb8aa567e1 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -114,8 +114,7 @@ struct global_cwq {
 } ____cacheline_aligned_in_smp;
 
 /*
- * The per-CPU workqueue (if single thread, we always use the first
- * possible cpu).  The lower WORK_STRUCT_FLAG_BITS of
+ * The per-CPU workqueue.  The lower WORK_STRUCT_FLAG_BITS of
  * work_struct->data are used for flags and thus cwqs need to be
  * aligned at two's power of the number of flag bits.
  */
@@ -159,6 +158,8 @@ struct workqueue_struct {
 	struct list_head	flusher_queue;	/* F: flush waiters */
 	struct list_head	flusher_overflow; /* F: flush overflow list */
 
+	unsigned long		single_cpu;	/* cpu for single cpu wq */
+
 	int			saved_max_active; /* I: saved cwq max_active */
 	const char		*name;		/* I: workqueue name */
 #ifdef CONFIG_LOCKDEP
@@ -289,8 +290,6 @@ static DEFINE_PER_CPU(struct global_cwq, global_cwq);
 
 static int worker_thread(void *__worker);
 
-static int singlethread_cpu __read_mostly;
-
 static struct global_cwq *get_gcwq(unsigned int cpu)
 {
 	return &per_cpu(global_cwq, cpu);
@@ -302,14 +301,6 @@ static struct cpu_workqueue_struct *get_cwq(unsigned int cpu,
 	return per_cpu_ptr(wq->cpu_wq, cpu);
 }
 
-static struct cpu_workqueue_struct *target_cwq(unsigned int cpu,
-					       struct workqueue_struct *wq)
-{
-	if (unlikely(wq->flags & WQ_SINGLE_THREAD))
-		cpu = singlethread_cpu;
-	return get_cwq(cpu, wq);
-}
-
 static unsigned int work_color_to_flags(int color)
 {
 	return color << WORK_STRUCT_COLOR_SHIFT;
@@ -410,17 +401,87 @@ static void insert_work(struct cpu_workqueue_struct *cwq,
 	wake_up_process(cwq->worker->task);
 }
 
+/**
+ * cwq_unbind_single_cpu - unbind cwq from single cpu workqueue processing
+ * @cwq: cwq to unbind
+ *
+ * Try to unbind @cwq from single cpu workqueue processing.  If
+ * @cwq->wq is frozen, unbind is delayed till the workqueue is thawed.
+ *
+ * CONTEXT:
+ * spin_lock_irq(gcwq->lock).
+ */
+static void cwq_unbind_single_cpu(struct cpu_workqueue_struct *cwq)
+{
+	struct workqueue_struct *wq = cwq->wq;
+	struct global_cwq *gcwq = cwq->gcwq;
+
+	BUG_ON(wq->single_cpu != gcwq->cpu);
+	/*
+	 * Unbind from workqueue if @cwq is not frozen.  If frozen,
+	 * thaw_workqueues() will either restart processing on this
+	 * cpu or unbind if empty.  This keeps works queued while
+	 * frozen fully ordered and flushable.
+	 */
+	if (likely(!(gcwq->flags & GCWQ_FREEZING))) {
+		smp_wmb();	/* paired with cmpxchg() in __queue_work() */
+		wq->single_cpu = NR_CPUS;
+	}
+}
+
 static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
 			 struct work_struct *work)
 {
-	struct cpu_workqueue_struct *cwq = target_cwq(cpu, wq);
-	struct global_cwq *gcwq = cwq->gcwq;
+	struct global_cwq *gcwq;
+	struct cpu_workqueue_struct *cwq;
 	struct list_head *worklist;
 	unsigned long flags;
+	bool arbitrate;
 
 	debug_work_activate(work);
 
-	spin_lock_irqsave(&gcwq->lock, flags);
+	/* determine gcwq to use */
+	if (!(wq->flags & WQ_SINGLE_CPU)) {
+		/* just use the requested cpu for multicpu workqueues */
+		gcwq = get_gcwq(cpu);
+		spin_lock_irqsave(&gcwq->lock, flags);
+	} else {
+		unsigned int req_cpu = cpu;
+
+		/*
+		 * It's a bit more complex for single cpu workqueues.
+		 * We first need to determine which cpu is going to be
+		 * used.  If no cpu is currently serving this
+		 * workqueue, arbitrate using atomic accesses to
+		 * wq->single_cpu; otherwise, use the current one.
+		 */
+	retry:
+		cpu = wq->single_cpu;
+		arbitrate = cpu == NR_CPUS;
+		if (arbitrate)
+			cpu = req_cpu;
+
+		gcwq = get_gcwq(cpu);
+		spin_lock_irqsave(&gcwq->lock, flags);
+
+		/*
+		 * The following cmpxchg() is a full barrier paired
+		 * with smp_wmb() in cwq_unbind_single_cpu() and
+		 * guarantees that all changes to wq->st_* fields are
+		 * visible on the new cpu after this point.
+		 */
+		if (arbitrate)
+			cmpxchg(&wq->single_cpu, NR_CPUS, cpu);
+
+		if (unlikely(wq->single_cpu != cpu)) {
+			spin_unlock_irqrestore(&gcwq->lock, flags);
+			goto retry;
+		}
+	}
+
+	/* gcwq determined, get cwq and queue */
+	cwq = get_cwq(gcwq->cpu, wq);
+
 	BUG_ON(!list_empty(&work->entry));
 
 	cwq->nr_in_flight[cwq->work_color]++;
@@ -530,7 +591,7 @@ int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
 		timer_stats_timer_set_start_info(&dwork->timer);
 
 		/* This stores cwq for the moment, for the timer_fn */
-		set_wq_data(work, target_cwq(raw_smp_processor_id(), wq), 0);
+		set_wq_data(work, get_cwq(raw_smp_processor_id(), wq), 0);
 		timer->expires = jiffies + delay;
 		timer->data = (unsigned long)dwork;
 		timer->function = delayed_work_timer_fn;
@@ -790,10 +851,14 @@ static void cwq_dec_nr_in_flight(struct cpu_workqueue_struct *cwq, int color)
 	cwq->nr_in_flight[color]--;
 	cwq->nr_active--;
 
-	/* one down, submit a delayed one */
-	if (!list_empty(&cwq->delayed_works) &&
-	    cwq->nr_active < cwq->max_active)
-		cwq_activate_first_delayed(cwq);
+	if (!list_empty(&cwq->delayed_works)) {
+		/* one down, submit a delayed one */
+		if (cwq->nr_active < cwq->max_active)
+			cwq_activate_first_delayed(cwq);
+	} else if (!cwq->nr_active && cwq->wq->flags & WQ_SINGLE_CPU) {
+		/* this was the last work, unbind from single cpu */
+		cwq_unbind_single_cpu(cwq);
+	}
 
 	/* is flush in progress and are we at the flushing tip? */
 	if (likely(cwq->flush_color != color))
@@ -1727,7 +1792,6 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
 						struct lock_class_key *key,
 						const char *lock_name)
 {
-	bool singlethread = flags & WQ_SINGLE_THREAD;
 	struct workqueue_struct *wq;
 	bool failed = false;
 	unsigned int cpu;
@@ -1748,6 +1812,8 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
 	atomic_set(&wq->nr_cwqs_to_flush, 0);
 	INIT_LIST_HEAD(&wq->flusher_queue);
 	INIT_LIST_HEAD(&wq->flusher_overflow);
+	wq->single_cpu = NR_CPUS;
+
 	wq->name = name;
 	lockdep_init_map(&wq->lockdep_map, lock_name, key, 0);
 	INIT_LIST_HEAD(&wq->list);
@@ -1773,8 +1839,7 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
 
 		if (failed)
 			continue;
-		cwq->worker = create_worker(cwq,
-					    cpu_online(cpu) && !singlethread);
+		cwq->worker = create_worker(cwq, cpu_online(cpu));
 		if (cwq->worker)
 			start_worker(cwq->worker);
 		else
@@ -1958,18 +2023,16 @@ static int __cpuinit trustee_thread(void *__gcwq)
 
 	spin_lock_irq(&gcwq->lock);
 	/*
-	 * Make all multithread workers rogue.  Trustee must be bound
-	 * to the target cpu and can't be cancelled.
+	 * Make all workers rogue.  Trustee must be bound to the
+	 * target cpu and can't be cancelled.
 	 */
 	BUG_ON(gcwq->cpu != smp_processor_id());
 
 	list_for_each_entry(worker, &gcwq->idle_list, entry)
-		if (!(worker->cwq->wq->flags & WQ_SINGLE_THREAD))
-			worker->flags |= WORKER_ROGUE;
+		worker->flags |= WORKER_ROGUE;
 
 	for_each_busy_worker(worker, i, pos, gcwq)
-		if (!(worker->cwq->wq->flags & WQ_SINGLE_THREAD))
-			worker->flags |= WORKER_ROGUE;
+		worker->flags |= WORKER_ROGUE;
 
 	/*
 	 * We're now in charge.  Notify and proceed to drain.  We need
@@ -2074,14 +2137,12 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
 			wait_trustee_state(gcwq, TRUSTEE_DONE);
 		}
 
-		/* clear ROGUE from all multithread workers */
+		/* clear ROGUE from all workers */
 		list_for_each_entry(worker, &gcwq->idle_list, entry)
-			if (!(worker->cwq->wq->flags & WQ_SINGLE_THREAD))
-				worker->flags &= ~WORKER_ROGUE;
+			worker->flags &= ~WORKER_ROGUE;
 
 		for_each_busy_worker(worker, i, pos, gcwq)
-			if (!(worker->cwq->wq->flags & WQ_SINGLE_THREAD))
-				worker->flags &= ~WORKER_ROGUE;
+			worker->flags &= ~WORKER_ROGUE;
 		break;
 	}
 
@@ -2266,6 +2327,11 @@ void thaw_workqueues(void)
 			       cwq->nr_active < cwq->max_active)
 				cwq_activate_first_delayed(cwq);
 
+			/* perform delayed unbind from single cpu if empty */
+			if (wq->single_cpu == gcwq->cpu &&
+			    !cwq->nr_active && list_empty(&cwq->delayed_works))
+				cwq_unbind_single_cpu(cwq);
+
 			wake_up_process(cwq->worker->task);
 		}
 
@@ -2283,7 +2349,6 @@ void __init init_workqueues(void)
 	unsigned int cpu;
 	int i;
 
-	singlethread_cpu = cpumask_first(cpu_possible_mask);
 	hotcpu_notifier(workqueue_cpu_callback, CPU_PRI_WORKQUEUE);
 
 	/* initialize gcwqs */
-- 
cgit v1.2.3-70-g09d2


From 7a22ad757ec75186ad43a5b4670fa7423ee8f480 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 29 Jun 2010 10:07:13 +0200
Subject: workqueue: carry cpu number in work data once execution starts

To implement non-reentrant workqueue, the last gcwq a work was
executed on must be reliably obtainable as long as the work structure
is valid even if the previous workqueue has been destroyed.

To achieve this, work->data will be overloaded to carry the last cpu
number once execution starts so that the previous gcwq can be located
reliably.  This means that cwq can't be obtained from work after
execution starts but only gcwq.

Implement set_work_{cwq|cpu}(), get_work_[g]cwq() and
clear_work_data() to set work data to the cpu number when starting
execution, access the overloaded work data and clear it after
cancellation.

queue_delayed_work_on() is updated to preserve the last cpu while
in-flight in timer and other callers which depended on getting cwq
from work after execution starts are converted to depend on gcwq
instead.

* Anton Blanchard fixed compile error on powerpc due to missing
  linux/threads.h include.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Anton Blanchard <anton@samba.org>
---
 include/linux/workqueue.h |   7 +-
 kernel/workqueue.c        | 163 +++++++++++++++++++++++++++++-----------------
 2 files changed, 109 insertions(+), 61 deletions(-)

(limited to 'include')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 10611f7fc80..0a7814131e6 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -9,6 +9,7 @@
 #include <linux/linkage.h>
 #include <linux/bitops.h>
 #include <linux/lockdep.h>
+#include <linux/threads.h>
 #include <asm/atomic.h>
 
 struct workqueue_struct;
@@ -59,6 +60,7 @@ enum {
 
 	WORK_STRUCT_FLAG_MASK	= (1UL << WORK_STRUCT_FLAG_BITS) - 1,
 	WORK_STRUCT_WQ_DATA_MASK = ~WORK_STRUCT_FLAG_MASK,
+	WORK_STRUCT_NO_CPU	= NR_CPUS << WORK_STRUCT_FLAG_BITS,
 };
 
 struct work_struct {
@@ -70,8 +72,9 @@ struct work_struct {
 #endif
 };
 
-#define WORK_DATA_INIT()	ATOMIC_LONG_INIT(0)
-#define WORK_DATA_STATIC_INIT()	ATOMIC_LONG_INIT(WORK_STRUCT_STATIC)
+#define WORK_DATA_INIT()	ATOMIC_LONG_INIT(WORK_STRUCT_NO_CPU)
+#define WORK_DATA_STATIC_INIT()	\
+	ATOMIC_LONG_INIT(WORK_STRUCT_NO_CPU | WORK_STRUCT_STATIC)
 
 struct delayed_work {
 	struct work_struct work;
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index c276dec75ea..c68277c204a 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -319,31 +319,71 @@ static int work_next_color(int color)
 }
 
 /*
- * Set the workqueue on which a work item is to be run
- * - Must *only* be called if the pending flag is set
+ * Work data points to the cwq while a work is on queue.  Once
+ * execution starts, it points to the cpu the work was last on.  This
+ * can be distinguished by comparing the data value against
+ * PAGE_OFFSET.
+ *
+ * set_work_{cwq|cpu}() and clear_work_data() can be used to set the
+ * cwq, cpu or clear work->data.  These functions should only be
+ * called while the work is owned - ie. while the PENDING bit is set.
+ *
+ * get_work_[g]cwq() can be used to obtain the gcwq or cwq
+ * corresponding to a work.  gcwq is available once the work has been
+ * queued anywhere after initialization.  cwq is available only from
+ * queueing until execution starts.
  */
-static inline void set_wq_data(struct work_struct *work,
-			       struct cpu_workqueue_struct *cwq,
-			       unsigned long extra_flags)
+static inline void set_work_data(struct work_struct *work, unsigned long data,
+				 unsigned long flags)
 {
 	BUG_ON(!work_pending(work));
+	atomic_long_set(&work->data, data | flags | work_static(work));
+}
 
-	atomic_long_set(&work->data, (unsigned long)cwq | work_static(work) |
-			WORK_STRUCT_PENDING | extra_flags);
+static void set_work_cwq(struct work_struct *work,
+			 struct cpu_workqueue_struct *cwq,
+			 unsigned long extra_flags)
+{
+	set_work_data(work, (unsigned long)cwq,
+		      WORK_STRUCT_PENDING | extra_flags);
 }
 
-/*
- * Clear WORK_STRUCT_PENDING and the workqueue on which it was queued.
- */
-static inline void clear_wq_data(struct work_struct *work)
+static void set_work_cpu(struct work_struct *work, unsigned int cpu)
+{
+	set_work_data(work, cpu << WORK_STRUCT_FLAG_BITS, WORK_STRUCT_PENDING);
+}
+
+static void clear_work_data(struct work_struct *work)
+{
+	set_work_data(work, WORK_STRUCT_NO_CPU, 0);
+}
+
+static inline unsigned long get_work_data(struct work_struct *work)
+{
+	return atomic_long_read(&work->data) & WORK_STRUCT_WQ_DATA_MASK;
+}
+
+static struct cpu_workqueue_struct *get_work_cwq(struct work_struct *work)
 {
-	atomic_long_set(&work->data, work_static(work));
+	unsigned long data = get_work_data(work);
+
+	return data >= PAGE_OFFSET ? (void *)data : NULL;
 }
 
-static inline struct cpu_workqueue_struct *get_wq_data(struct work_struct *work)
+static struct global_cwq *get_work_gcwq(struct work_struct *work)
 {
-	return (void *)(atomic_long_read(&work->data) &
-			WORK_STRUCT_WQ_DATA_MASK);
+	unsigned long data = get_work_data(work);
+	unsigned int cpu;
+
+	if (data >= PAGE_OFFSET)
+		return ((struct cpu_workqueue_struct *)data)->gcwq;
+
+	cpu = data >> WORK_STRUCT_FLAG_BITS;
+	if (cpu == NR_CPUS)
+		return NULL;
+
+	BUG_ON(cpu >= num_possible_cpus());
+	return get_gcwq(cpu);
 }
 
 /**
@@ -443,7 +483,7 @@ static void insert_work(struct cpu_workqueue_struct *cwq,
 			unsigned int extra_flags)
 {
 	/* we own @work, set data and link */
-	set_wq_data(work, cwq, extra_flags);
+	set_work_cwq(work, cwq, extra_flags);
 
 	/*
 	 * Ensure that we get the right work->data if we see the
@@ -599,7 +639,7 @@ EXPORT_SYMBOL_GPL(queue_work_on);
 static void delayed_work_timer_fn(unsigned long __data)
 {
 	struct delayed_work *dwork = (struct delayed_work *)__data;
-	struct cpu_workqueue_struct *cwq = get_wq_data(&dwork->work);
+	struct cpu_workqueue_struct *cwq = get_work_cwq(&dwork->work);
 
 	__queue_work(smp_processor_id(), cwq->wq, &dwork->work);
 }
@@ -639,13 +679,19 @@ int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
 	struct work_struct *work = &dwork->work;
 
 	if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
+		struct global_cwq *gcwq = get_work_gcwq(work);
+		unsigned int lcpu = gcwq ? gcwq->cpu : raw_smp_processor_id();
+
 		BUG_ON(timer_pending(timer));
 		BUG_ON(!list_empty(&work->entry));
 
 		timer_stats_timer_set_start_info(&dwork->timer);
-
-		/* This stores cwq for the moment, for the timer_fn */
-		set_wq_data(work, get_cwq(raw_smp_processor_id(), wq), 0);
+		/*
+		 * This stores cwq for the moment, for the timer_fn.
+		 * Note that the work's gcwq is preserved to allow
+		 * reentrance detection for delayed works.
+		 */
+		set_work_cwq(work, get_cwq(lcpu, wq), 0);
 		timer->expires = jiffies + delay;
 		timer->data = (unsigned long)dwork;
 		timer->function = delayed_work_timer_fn;
@@ -970,11 +1016,14 @@ static void process_one_work(struct worker *worker, struct work_struct *work)
 	worker->current_work = work;
 	worker->current_cwq = cwq;
 	work_color = get_work_color(work);
+
+	BUG_ON(get_work_cwq(work) != cwq);
+	/* record the current cpu number in the work data and dequeue */
+	set_work_cpu(work, gcwq->cpu);
 	list_del_init(&work->entry);
 
 	spin_unlock_irq(&gcwq->lock);
 
-	BUG_ON(get_wq_data(work) != cwq);
 	work_clear_pending(work);
 	lock_map_acquire(&cwq->wq->lockdep_map);
 	lock_map_acquire(&lockdep_map);
@@ -1406,37 +1455,39 @@ EXPORT_SYMBOL_GPL(flush_workqueue);
 int flush_work(struct work_struct *work)
 {
 	struct worker *worker = NULL;
-	struct cpu_workqueue_struct *cwq;
 	struct global_cwq *gcwq;
+	struct cpu_workqueue_struct *cwq;
 	struct wq_barrier barr;
 
 	might_sleep();
-	cwq = get_wq_data(work);
-	if (!cwq)
+	gcwq = get_work_gcwq(work);
+	if (!gcwq)
 		return 0;
-	gcwq = cwq->gcwq;
-
-	lock_map_acquire(&cwq->wq->lockdep_map);
-	lock_map_release(&cwq->wq->lockdep_map);
 
 	spin_lock_irq(&gcwq->lock);
 	if (!list_empty(&work->entry)) {
 		/*
 		 * See the comment near try_to_grab_pending()->smp_rmb().
-		 * If it was re-queued under us we are not going to wait.
+		 * If it was re-queued to a different gcwq under us, we
+		 * are not going to wait.
 		 */
 		smp_rmb();
-		if (unlikely(cwq != get_wq_data(work)))
+		cwq = get_work_cwq(work);
+		if (unlikely(!cwq || gcwq != cwq->gcwq))
 			goto already_gone;
 	} else {
-		if (cwq->worker && cwq->worker->current_work == work)
-			worker = cwq->worker;
+		worker = find_worker_executing_work(gcwq, work);
 		if (!worker)
 			goto already_gone;
+		cwq = worker->current_cwq;
 	}
 
 	insert_wq_barrier(cwq, &barr, work, worker);
 	spin_unlock_irq(&gcwq->lock);
+
+	lock_map_acquire(&cwq->wq->lockdep_map);
+	lock_map_release(&cwq->wq->lockdep_map);
+
 	wait_for_completion(&barr.done);
 	destroy_work_on_stack(&barr.work);
 	return 1;
@@ -1453,7 +1504,6 @@ EXPORT_SYMBOL_GPL(flush_work);
 static int try_to_grab_pending(struct work_struct *work)
 {
 	struct global_cwq *gcwq;
-	struct cpu_workqueue_struct *cwq;
 	int ret = -1;
 
 	if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work)))
@@ -1463,24 +1513,23 @@ static int try_to_grab_pending(struct work_struct *work)
 	 * The queueing is in progress, or it is already queued. Try to
 	 * steal it from ->worklist without clearing WORK_STRUCT_PENDING.
 	 */
-
-	cwq = get_wq_data(work);
-	if (!cwq)
+	gcwq = get_work_gcwq(work);
+	if (!gcwq)
 		return ret;
-	gcwq = cwq->gcwq;
 
 	spin_lock_irq(&gcwq->lock);
 	if (!list_empty(&work->entry)) {
 		/*
-		 * This work is queued, but perhaps we locked the wrong cwq.
+		 * This work is queued, but perhaps we locked the wrong gcwq.
 		 * In that case we must see the new value after rmb(), see
 		 * insert_work()->wmb().
 		 */
 		smp_rmb();
-		if (cwq == get_wq_data(work)) {
+		if (gcwq == get_work_gcwq(work)) {
 			debug_work_deactivate(work);
 			list_del_init(&work->entry);
-			cwq_dec_nr_in_flight(cwq, get_work_color(work));
+			cwq_dec_nr_in_flight(get_work_cwq(work),
+					     get_work_color(work));
 			ret = 1;
 		}
 	}
@@ -1489,20 +1538,16 @@ static int try_to_grab_pending(struct work_struct *work)
 	return ret;
 }
 
-static void wait_on_cpu_work(struct cpu_workqueue_struct *cwq,
-				struct work_struct *work)
+static void wait_on_cpu_work(struct global_cwq *gcwq, struct work_struct *work)
 {
-	struct global_cwq *gcwq = cwq->gcwq;
 	struct wq_barrier barr;
 	struct worker *worker;
 
 	spin_lock_irq(&gcwq->lock);
 
-	worker = NULL;
-	if (unlikely(cwq->worker && cwq->worker->current_work == work)) {
-		worker = cwq->worker;
-		insert_wq_barrier(cwq, &barr, work, worker);
-	}
+	worker = find_worker_executing_work(gcwq, work);
+	if (unlikely(worker))
+		insert_wq_barrier(worker->current_cwq, &barr, work, worker);
 
 	spin_unlock_irq(&gcwq->lock);
 
@@ -1514,8 +1559,6 @@ static void wait_on_cpu_work(struct cpu_workqueue_struct *cwq,
 
 static void wait_on_work(struct work_struct *work)
 {
-	struct cpu_workqueue_struct *cwq;
-	struct workqueue_struct *wq;
 	int cpu;
 
 	might_sleep();
@@ -1523,14 +1566,8 @@ static void wait_on_work(struct work_struct *work)
 	lock_map_acquire(&work->lockdep_map);
 	lock_map_release(&work->lockdep_map);
 
-	cwq = get_wq_data(work);
-	if (!cwq)
-		return;
-
-	wq = cwq->wq;
-
 	for_each_possible_cpu(cpu)
-		wait_on_cpu_work(get_cwq(cpu, wq), work);
+		wait_on_cpu_work(get_gcwq(cpu), work);
 }
 
 static int __cancel_work_timer(struct work_struct *work,
@@ -1545,7 +1582,7 @@ static int __cancel_work_timer(struct work_struct *work,
 		wait_on_work(work);
 	} while (unlikely(ret < 0));
 
-	clear_wq_data(work);
+	clear_work_data(work);
 	return ret;
 }
 
@@ -1647,7 +1684,7 @@ EXPORT_SYMBOL(schedule_delayed_work);
 void flush_delayed_work(struct delayed_work *dwork)
 {
 	if (del_timer_sync(&dwork->timer)) {
-		__queue_work(get_cpu(), get_wq_data(&dwork->work)->wq,
+		__queue_work(get_cpu(), get_work_cwq(&dwork->work)->wq,
 			     &dwork->work);
 		put_cpu();
 	}
@@ -2405,6 +2442,14 @@ void __init init_workqueues(void)
 	unsigned int cpu;
 	int i;
 
+	/*
+	 * The pointer part of work->data is either pointing to the
+	 * cwq or contains the cpu number the work ran last on.  Make
+	 * sure cpu number won't overflow into kernel pointer area so
+	 * that they can be distinguished.
+	 */
+	BUILD_BUG_ON(NR_CPUS << WORK_STRUCT_FLAG_BITS >= PAGE_OFFSET);
+
 	hotcpu_notifier(workqueue_cpu_callback, CPU_PRI_WORKQUEUE);
 
 	/* initialize gcwqs */
-- 
cgit v1.2.3-70-g09d2


From 18aa9effad4adb2c1efe123af4eb24fec9f59b30 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 29 Jun 2010 10:07:13 +0200
Subject: workqueue: implement WQ_NON_REENTRANT

With gcwq managing all the workers and work->data pointing to the last
gcwq it was on, non-reentrance can be easily implemented by checking
whether the work is still running on the previous gcwq on queueing.
Implement it.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h |  1 +
 kernel/workqueue.c        | 32 +++++++++++++++++++++++++++++---
 2 files changed, 30 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 0a7814131e6..07cf5e5f91c 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -225,6 +225,7 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; }
 enum {
 	WQ_FREEZEABLE		= 1 << 0, /* freeze during suspend */
 	WQ_SINGLE_CPU		= 1 << 1, /* only single cpu at a time */
+	WQ_NON_REENTRANT	= 1 << 2, /* guarantee non-reentrance */
 };
 
 extern struct workqueue_struct *
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index c68277c204a..bce1074bdec 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -534,11 +534,37 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
 
 	debug_work_activate(work);
 
-	/* determine gcwq to use */
+	/*
+	 * Determine gcwq to use.  SINGLE_CPU is inherently
+	 * NON_REENTRANT, so test it first.
+	 */
 	if (!(wq->flags & WQ_SINGLE_CPU)) {
-		/* just use the requested cpu for multicpu workqueues */
+		struct global_cwq *last_gcwq;
+
+		/*
+		 * It's multi cpu.  If @wq is non-reentrant and @work
+		 * was previously on a different cpu, it might still
+		 * be running there, in which case the work needs to
+		 * be queued on that cpu to guarantee non-reentrance.
+		 */
 		gcwq = get_gcwq(cpu);
-		spin_lock_irqsave(&gcwq->lock, flags);
+		if (wq->flags & WQ_NON_REENTRANT &&
+		    (last_gcwq = get_work_gcwq(work)) && last_gcwq != gcwq) {
+			struct worker *worker;
+
+			spin_lock_irqsave(&last_gcwq->lock, flags);
+
+			worker = find_worker_executing_work(last_gcwq, work);
+
+			if (worker && worker->current_cwq->wq == wq)
+				gcwq = last_gcwq;
+			else {
+				/* meh... not running there, queue here */
+				spin_unlock_irqrestore(&last_gcwq->lock, flags);
+				spin_lock_irqsave(&gcwq->lock, flags);
+			}
+		} else
+			spin_lock_irqsave(&gcwq->lock, flags);
 	} else {
 		unsigned int req_cpu = cpu;
 
-- 
cgit v1.2.3-70-g09d2


From e22bee782b3b00bd4534ae9b1c5fb2e8e6573c5c Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 29 Jun 2010 10:07:14 +0200
Subject: workqueue: implement concurrency managed dynamic worker pool

Instead of creating a worker for each cwq and putting it into the
shared pool, manage per-cpu workers dynamically.

Works aren't supposed to be cpu cycle hogs and maintaining just enough
concurrency to prevent work processing from stalling due to lack of
processing context is optimal.  gcwq keeps the number of concurrent
active workers to minimum but no less.  As long as there's one or more
running workers on the cpu, no new worker is scheduled so that works
can be processed in batch as much as possible but when the last
running worker blocks, gcwq immediately schedules new worker so that
the cpu doesn't sit idle while there are works to be processed.

gcwq always keeps at least single idle worker around.  When a new
worker is necessary and the worker is the last idle one, the worker
assumes the role of "manager" and manages the worker pool -
ie. creates another worker.  Forward-progress is guaranteed by having
dedicated rescue workers for workqueues which may be necessary while
creating a new worker.  When the manager is having problem creating a
new worker, mayday timer activates and rescue workers are summoned to
the cpu and execute works which might be necessary to create new
workers.

Trustee is expanded to serve the role of manager while a CPU is being
taken down and stays down.  As no new works are supposed to be queued
on a dead cpu, it just needs to drain all the existing ones.  Trustee
continues to try to create new workers and summon rescuers as long as
there are pending works.  If the CPU is brought back up while the
trustee is still trying to drain the gcwq from the previous offlining,
the trustee will kill all idles ones and tell workers which are still
busy to rebind to the cpu, and pass control over to gcwq which assumes
the manager role as necessary.

Concurrency managed worker pool reduces the number of workers
drastically.  Only workers which are necessary to keep the processing
going are created and kept.  Also, it reduces cache footprint by
avoiding unnecessarily switching contexts between different workers.

Please note that this patch does not increase max_active of any
workqueue.  All workqueues can still only process one work per cpu.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h |   8 +-
 kernel/workqueue.c        | 936 +++++++++++++++++++++++++++++++++++++++++-----
 kernel/workqueue_sched.h  |  13 +-
 3 files changed, 841 insertions(+), 116 deletions(-)

(limited to 'include')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 07cf5e5f91c..b8f4ec45c40 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -226,6 +226,7 @@ enum {
 	WQ_FREEZEABLE		= 1 << 0, /* freeze during suspend */
 	WQ_SINGLE_CPU		= 1 << 1, /* only single cpu at a time */
 	WQ_NON_REENTRANT	= 1 << 2, /* guarantee non-reentrance */
+	WQ_RESCUER		= 1 << 3, /* has an rescue worker */
 };
 
 extern struct workqueue_struct *
@@ -252,11 +253,12 @@ __create_workqueue_key(const char *name, unsigned int flags, int max_active,
 #endif
 
 #define create_workqueue(name)					\
-	__create_workqueue((name), 0, 1)
+	__create_workqueue((name), WQ_RESCUER, 1)
 #define create_freezeable_workqueue(name)			\
-	__create_workqueue((name), WQ_FREEZEABLE | WQ_SINGLE_CPU, 1)
+	__create_workqueue((name),				\
+			   WQ_FREEZEABLE | WQ_SINGLE_CPU | WQ_RESCUER, 1)
 #define create_singlethread_workqueue(name)			\
-	__create_workqueue((name), WQ_SINGLE_CPU, 1)
+	__create_workqueue((name), WQ_SINGLE_CPU | WQ_RESCUER, 1)
 
 extern void destroy_workqueue(struct workqueue_struct *wq);
 
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 4c31fde092c..0ad46523b42 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -34,17 +34,25 @@
 #include <linux/debug_locks.h>
 #include <linux/lockdep.h>
 #include <linux/idr.h>
-#include <linux/delay.h>
+
+#include "workqueue_sched.h"
 
 enum {
 	/* global_cwq flags */
+	GCWQ_MANAGE_WORKERS	= 1 << 0,	/* need to manage workers */
+	GCWQ_MANAGING_WORKERS	= 1 << 1,	/* managing workers */
+	GCWQ_DISASSOCIATED	= 1 << 2,	/* cpu can't serve workers */
 	GCWQ_FREEZING		= 1 << 3,	/* freeze in progress */
 
 	/* worker flags */
 	WORKER_STARTED		= 1 << 0,	/* started */
 	WORKER_DIE		= 1 << 1,	/* die die die */
 	WORKER_IDLE		= 1 << 2,	/* is idle */
+	WORKER_PREP		= 1 << 3,	/* preparing to run works */
 	WORKER_ROGUE		= 1 << 4,	/* not bound to any cpu */
+	WORKER_REBIND		= 1 << 5,	/* mom is home, come back */
+
+	WORKER_NOT_RUNNING	= WORKER_PREP | WORKER_ROGUE | WORKER_REBIND,
 
 	/* gcwq->trustee_state */
 	TRUSTEE_START		= 0,		/* start */
@@ -57,7 +65,19 @@ enum {
 	BUSY_WORKER_HASH_SIZE	= 1 << BUSY_WORKER_HASH_ORDER,
 	BUSY_WORKER_HASH_MASK	= BUSY_WORKER_HASH_SIZE - 1,
 
+	MAX_IDLE_WORKERS_RATIO	= 4,		/* 1/4 of busy can be idle */
+	IDLE_WORKER_TIMEOUT	= 300 * HZ,	/* keep idle ones for 5 mins */
+
+	MAYDAY_INITIAL_TIMEOUT	= HZ / 100,	/* call for help after 10ms */
+	MAYDAY_INTERVAL		= HZ / 10,	/* and then every 100ms */
+	CREATE_COOLDOWN		= HZ,		/* time to breath after fail */
 	TRUSTEE_COOLDOWN	= HZ / 10,	/* for trustee draining */
+
+	/*
+	 * Rescue workers are used only on emergencies and shared by
+	 * all cpus.  Give -20.
+	 */
+	RESCUER_NICE_LEVEL	= -20,
 };
 
 /*
@@ -65,8 +85,16 @@ enum {
  *
  * I: Set during initialization and read-only afterwards.
  *
+ * P: Preemption protected.  Disabling preemption is enough and should
+ *    only be modified and accessed from the local cpu.
+ *
  * L: gcwq->lock protected.  Access with gcwq->lock held.
  *
+ * X: During normal operation, modification requires gcwq->lock and
+ *    should be done only from local cpu.  Either disabling preemption
+ *    on local cpu or grabbing gcwq->lock is enough for read access.
+ *    While trustee is in charge, it's identical to L.
+ *
  * F: wq->flush_mutex protected.
  *
  * W: workqueue_lock protected.
@@ -74,6 +102,10 @@ enum {
 
 struct global_cwq;
 
+/*
+ * The poor guys doing the actual heavy lifting.  All on-duty workers
+ * are either serving the manager role, on idle list or on busy hash.
+ */
 struct worker {
 	/* on idle list while idle, on busy hash table while busy */
 	union {
@@ -86,12 +118,17 @@ struct worker {
 	struct list_head	scheduled;	/* L: scheduled works */
 	struct task_struct	*task;		/* I: worker task */
 	struct global_cwq	*gcwq;		/* I: the associated gcwq */
-	unsigned int		flags;		/* L: flags */
+	/* 64 bytes boundary on 64bit, 32 on 32bit */
+	unsigned long		last_active;	/* L: last active timestamp */
+	unsigned int		flags;		/* X: flags */
 	int			id;		/* I: worker id */
+	struct work_struct	rebind_work;	/* L: rebind worker to cpu */
 };
 
 /*
- * Global per-cpu workqueue.
+ * Global per-cpu workqueue.  There's one and only one for each cpu
+ * and all works are queued and processed here regardless of their
+ * target workqueues.
  */
 struct global_cwq {
 	spinlock_t		lock;		/* the gcwq lock */
@@ -103,15 +140,19 @@ struct global_cwq {
 	int			nr_idle;	/* L: currently idle ones */
 
 	/* workers are chained either in the idle_list or busy_hash */
-	struct list_head	idle_list;	/* L: list of idle workers */
+	struct list_head	idle_list;	/* X: list of idle workers */
 	struct hlist_head	busy_hash[BUSY_WORKER_HASH_SIZE];
 						/* L: hash of busy workers */
 
+	struct timer_list	idle_timer;	/* L: worker idle timeout */
+	struct timer_list	mayday_timer;	/* L: SOS timer for dworkers */
+
 	struct ida		worker_ida;	/* L: for worker IDs */
 
 	struct task_struct	*trustee;	/* L: for gcwq shutdown */
 	unsigned int		trustee_state;	/* L: trustee state */
 	wait_queue_head_t	trustee_wait;	/* trustee wait */
+	struct worker		*first_idle;	/* L: first idle worker */
 } ____cacheline_aligned_in_smp;
 
 /*
@@ -121,7 +162,6 @@ struct global_cwq {
  */
 struct cpu_workqueue_struct {
 	struct global_cwq	*gcwq;		/* I: the associated gcwq */
-	struct worker		*worker;
 	struct workqueue_struct *wq;		/* I: the owning workqueue */
 	int			work_color;	/* L: current color */
 	int			flush_color;	/* L: flushing color */
@@ -160,6 +200,9 @@ struct workqueue_struct {
 
 	unsigned long		single_cpu;	/* cpu for single cpu wq */
 
+	cpumask_var_t		mayday_mask;	/* cpus requesting rescue */
+	struct worker		*rescuer;	/* I: rescue worker */
+
 	int			saved_max_active; /* I: saved cwq max_active */
 	const char		*name;		/* I: workqueue name */
 #ifdef CONFIG_LOCKDEP
@@ -286,7 +329,13 @@ static DEFINE_SPINLOCK(workqueue_lock);
 static LIST_HEAD(workqueues);
 static bool workqueue_freezing;		/* W: have wqs started freezing? */
 
+/*
+ * The almighty global cpu workqueues.  nr_running is the only field
+ * which is expected to be used frequently by other cpus via
+ * try_to_wake_up().  Put it in a separate cacheline.
+ */
 static DEFINE_PER_CPU(struct global_cwq, global_cwq);
+static DEFINE_PER_CPU_SHARED_ALIGNED(atomic_t, gcwq_nr_running);
 
 static int worker_thread(void *__worker);
 
@@ -295,6 +344,11 @@ static struct global_cwq *get_gcwq(unsigned int cpu)
 	return &per_cpu(global_cwq, cpu);
 }
 
+static atomic_t *get_gcwq_nr_running(unsigned int cpu)
+{
+	return &per_cpu(gcwq_nr_running, cpu);
+}
+
 static struct cpu_workqueue_struct *get_cwq(unsigned int cpu,
 					    struct workqueue_struct *wq)
 {
@@ -385,6 +439,63 @@ static struct global_cwq *get_work_gcwq(struct work_struct *work)
 	return get_gcwq(cpu);
 }
 
+/*
+ * Policy functions.  These define the policies on how the global
+ * worker pool is managed.  Unless noted otherwise, these functions
+ * assume that they're being called with gcwq->lock held.
+ */
+
+/*
+ * Need to wake up a worker?  Called from anything but currently
+ * running workers.
+ */
+static bool need_more_worker(struct global_cwq *gcwq)
+{
+	atomic_t *nr_running = get_gcwq_nr_running(gcwq->cpu);
+
+	return !list_empty(&gcwq->worklist) && !atomic_read(nr_running);
+}
+
+/* Can I start working?  Called from busy but !running workers. */
+static bool may_start_working(struct global_cwq *gcwq)
+{
+	return gcwq->nr_idle;
+}
+
+/* Do I need to keep working?  Called from currently running workers. */
+static bool keep_working(struct global_cwq *gcwq)
+{
+	atomic_t *nr_running = get_gcwq_nr_running(gcwq->cpu);
+
+	return !list_empty(&gcwq->worklist) && atomic_read(nr_running) <= 1;
+}
+
+/* Do we need a new worker?  Called from manager. */
+static bool need_to_create_worker(struct global_cwq *gcwq)
+{
+	return need_more_worker(gcwq) && !may_start_working(gcwq);
+}
+
+/* Do I need to be the manager? */
+static bool need_to_manage_workers(struct global_cwq *gcwq)
+{
+	return need_to_create_worker(gcwq) || gcwq->flags & GCWQ_MANAGE_WORKERS;
+}
+
+/* Do we have too many workers and should some go away? */
+static bool too_many_workers(struct global_cwq *gcwq)
+{
+	bool managing = gcwq->flags & GCWQ_MANAGING_WORKERS;
+	int nr_idle = gcwq->nr_idle + managing; /* manager is considered idle */
+	int nr_busy = gcwq->nr_workers - nr_idle;
+
+	return nr_idle > 2 && (nr_idle - 2) * MAX_IDLE_WORKERS_RATIO >= nr_busy;
+}
+
+/*
+ * Wake up functions.
+ */
+
 /* Return the first worker.  Safe with preemption disabled */
 static struct worker *first_worker(struct global_cwq *gcwq)
 {
@@ -412,12 +523,77 @@ static void wake_up_worker(struct global_cwq *gcwq)
 }
 
 /**
- * worker_set_flags - set worker flags
+ * wq_worker_waking_up - a worker is waking up
+ * @task: task waking up
+ * @cpu: CPU @task is waking up to
+ *
+ * This function is called during try_to_wake_up() when a worker is
+ * being awoken.
+ *
+ * CONTEXT:
+ * spin_lock_irq(rq->lock)
+ */
+void wq_worker_waking_up(struct task_struct *task, unsigned int cpu)
+{
+	struct worker *worker = kthread_data(task);
+
+	if (likely(!(worker->flags & WORKER_NOT_RUNNING)))
+		atomic_inc(get_gcwq_nr_running(cpu));
+}
+
+/**
+ * wq_worker_sleeping - a worker is going to sleep
+ * @task: task going to sleep
+ * @cpu: CPU in question, must be the current CPU number
+ *
+ * This function is called during schedule() when a busy worker is
+ * going to sleep.  Worker on the same cpu can be woken up by
+ * returning pointer to its task.
+ *
+ * CONTEXT:
+ * spin_lock_irq(rq->lock)
+ *
+ * RETURNS:
+ * Worker task on @cpu to wake up, %NULL if none.
+ */
+struct task_struct *wq_worker_sleeping(struct task_struct *task,
+				       unsigned int cpu)
+{
+	struct worker *worker = kthread_data(task), *to_wakeup = NULL;
+	struct global_cwq *gcwq = get_gcwq(cpu);
+	atomic_t *nr_running = get_gcwq_nr_running(cpu);
+
+	if (unlikely(worker->flags & WORKER_NOT_RUNNING))
+		return NULL;
+
+	/* this can only happen on the local cpu */
+	BUG_ON(cpu != raw_smp_processor_id());
+
+	/*
+	 * The counterpart of the following dec_and_test, implied mb,
+	 * worklist not empty test sequence is in insert_work().
+	 * Please read comment there.
+	 *
+	 * NOT_RUNNING is clear.  This means that trustee is not in
+	 * charge and we're running on the local cpu w/ rq lock held
+	 * and preemption disabled, which in turn means that none else
+	 * could be manipulating idle_list, so dereferencing idle_list
+	 * without gcwq lock is safe.
+	 */
+	if (atomic_dec_and_test(nr_running) && !list_empty(&gcwq->worklist))
+		to_wakeup = first_worker(gcwq);
+	return to_wakeup ? to_wakeup->task : NULL;
+}
+
+/**
+ * worker_set_flags - set worker flags and adjust nr_running accordingly
  * @worker: worker to set flags for
  * @flags: flags to set
  * @wakeup: wakeup an idle worker if necessary
  *
- * Set @flags in @worker->flags.
+ * Set @flags in @worker->flags and adjust nr_running accordingly.  If
+ * nr_running becomes zero and @wakeup is %true, an idle worker is
+ * woken up.
  *
  * LOCKING:
  * spin_lock_irq(gcwq->lock).
@@ -425,22 +601,49 @@ static void wake_up_worker(struct global_cwq *gcwq)
 static inline void worker_set_flags(struct worker *worker, unsigned int flags,
 				    bool wakeup)
 {
+	struct global_cwq *gcwq = worker->gcwq;
+
+	/*
+	 * If transitioning into NOT_RUNNING, adjust nr_running and
+	 * wake up an idle worker as necessary if requested by
+	 * @wakeup.
+	 */
+	if ((flags & WORKER_NOT_RUNNING) &&
+	    !(worker->flags & WORKER_NOT_RUNNING)) {
+		atomic_t *nr_running = get_gcwq_nr_running(gcwq->cpu);
+
+		if (wakeup) {
+			if (atomic_dec_and_test(nr_running) &&
+			    !list_empty(&gcwq->worklist))
+				wake_up_worker(gcwq);
+		} else
+			atomic_dec(nr_running);
+	}
+
 	worker->flags |= flags;
 }
 
 /**
- * worker_clr_flags - clear worker flags
+ * worker_clr_flags - clear worker flags and adjust nr_running accordingly
  * @worker: worker to set flags for
  * @flags: flags to clear
  *
- * Clear @flags in @worker->flags.
+ * Clear @flags in @worker->flags and adjust nr_running accordingly.
  *
  * LOCKING:
  * spin_lock_irq(gcwq->lock).
  */
 static inline void worker_clr_flags(struct worker *worker, unsigned int flags)
 {
+	struct global_cwq *gcwq = worker->gcwq;
+	unsigned int oflags = worker->flags;
+
 	worker->flags &= ~flags;
+
+	/* if transitioning out of NOT_RUNNING, increment nr_running */
+	if ((flags & WORKER_NOT_RUNNING) && (oflags & WORKER_NOT_RUNNING))
+		if (!(worker->flags & WORKER_NOT_RUNNING))
+			atomic_inc(get_gcwq_nr_running(gcwq->cpu));
 }
 
 /**
@@ -540,6 +743,8 @@ static void insert_work(struct cpu_workqueue_struct *cwq,
 			struct work_struct *work, struct list_head *head,
 			unsigned int extra_flags)
 {
+	struct global_cwq *gcwq = cwq->gcwq;
+
 	/* we own @work, set data and link */
 	set_work_cwq(work, cwq, extra_flags);
 
@@ -550,7 +755,16 @@ static void insert_work(struct cpu_workqueue_struct *cwq,
 	smp_wmb();
 
 	list_add_tail(&work->entry, head);
-	wake_up_worker(cwq->gcwq);
+
+	/*
+	 * Ensure either worker_sched_deactivated() sees the above
+	 * list_add_tail() or we see zero nr_running to avoid workers
+	 * lying around lazily while there are works to be processed.
+	 */
+	smp_mb();
+
+	if (!atomic_read(get_gcwq_nr_running(gcwq->cpu)))
+		wake_up_worker(gcwq);
 }
 
 /**
@@ -810,11 +1024,16 @@ static void worker_enter_idle(struct worker *worker)
 
 	worker_set_flags(worker, WORKER_IDLE, false);
 	gcwq->nr_idle++;
+	worker->last_active = jiffies;
 
 	/* idle_list is LIFO */
 	list_add(&worker->entry, &gcwq->idle_list);
 
-	if (unlikely(worker->flags & WORKER_ROGUE))
+	if (likely(!(worker->flags & WORKER_ROGUE))) {
+		if (too_many_workers(gcwq) && !timer_pending(&gcwq->idle_timer))
+			mod_timer(&gcwq->idle_timer,
+				  jiffies + IDLE_WORKER_TIMEOUT);
+	} else
 		wake_up_all(&gcwq->trustee_wait);
 }
 
@@ -837,6 +1056,81 @@ static void worker_leave_idle(struct worker *worker)
 	list_del_init(&worker->entry);
 }
 
+/**
+ * worker_maybe_bind_and_lock - bind worker to its cpu if possible and lock gcwq
+ * @worker: self
+ *
+ * Works which are scheduled while the cpu is online must at least be
+ * scheduled to a worker which is bound to the cpu so that if they are
+ * flushed from cpu callbacks while cpu is going down, they are
+ * guaranteed to execute on the cpu.
+ *
+ * This function is to be used by rogue workers and rescuers to bind
+ * themselves to the target cpu and may race with cpu going down or
+ * coming online.  kthread_bind() can't be used because it may put the
+ * worker to already dead cpu and set_cpus_allowed_ptr() can't be used
+ * verbatim as it's best effort and blocking and gcwq may be
+ * [dis]associated in the meantime.
+ *
+ * This function tries set_cpus_allowed() and locks gcwq and verifies
+ * the binding against GCWQ_DISASSOCIATED which is set during
+ * CPU_DYING and cleared during CPU_ONLINE, so if the worker enters
+ * idle state or fetches works without dropping lock, it can guarantee
+ * the scheduling requirement described in the first paragraph.
+ *
+ * CONTEXT:
+ * Might sleep.  Called without any lock but returns with gcwq->lock
+ * held.
+ *
+ * RETURNS:
+ * %true if the associated gcwq is online (@worker is successfully
+ * bound), %false if offline.
+ */
+static bool worker_maybe_bind_and_lock(struct worker *worker)
+{
+	struct global_cwq *gcwq = worker->gcwq;
+	struct task_struct *task = worker->task;
+
+	while (true) {
+		/*
+		 * The following call may fail, succeed or succeed
+		 * without actually migrating the task to the cpu if
+		 * it races with cpu hotunplug operation.  Verify
+		 * against GCWQ_DISASSOCIATED.
+		 */
+		set_cpus_allowed_ptr(task, get_cpu_mask(gcwq->cpu));
+
+		spin_lock_irq(&gcwq->lock);
+		if (gcwq->flags & GCWQ_DISASSOCIATED)
+			return false;
+		if (task_cpu(task) == gcwq->cpu &&
+		    cpumask_equal(&current->cpus_allowed,
+				  get_cpu_mask(gcwq->cpu)))
+			return true;
+		spin_unlock_irq(&gcwq->lock);
+
+		/* CPU has come up inbetween, retry migration */
+		cpu_relax();
+	}
+}
+
+/*
+ * Function for worker->rebind_work used to rebind rogue busy workers
+ * to the associated cpu which is coming back online.  This is
+ * scheduled by cpu up but can race with other cpu hotplug operations
+ * and may be executed twice without intervening cpu down.
+ */
+static void worker_rebind_fn(struct work_struct *work)
+{
+	struct worker *worker = container_of(work, struct worker, rebind_work);
+	struct global_cwq *gcwq = worker->gcwq;
+
+	if (worker_maybe_bind_and_lock(worker))
+		worker_clr_flags(worker, WORKER_REBIND);
+
+	spin_unlock_irq(&gcwq->lock);
+}
+
 static struct worker *alloc_worker(void)
 {
 	struct worker *worker;
@@ -845,6 +1139,9 @@ static struct worker *alloc_worker(void)
 	if (worker) {
 		INIT_LIST_HEAD(&worker->entry);
 		INIT_LIST_HEAD(&worker->scheduled);
+		INIT_WORK(&worker->rebind_work, worker_rebind_fn);
+		/* on creation a worker is in !idle && prep state */
+		worker->flags = WORKER_PREP;
 	}
 	return worker;
 }
@@ -963,6 +1260,220 @@ static void destroy_worker(struct worker *worker)
 	ida_remove(&gcwq->worker_ida, id);
 }
 
+static void idle_worker_timeout(unsigned long __gcwq)
+{
+	struct global_cwq *gcwq = (void *)__gcwq;
+
+	spin_lock_irq(&gcwq->lock);
+
+	if (too_many_workers(gcwq)) {
+		struct worker *worker;
+		unsigned long expires;
+
+		/* idle_list is kept in LIFO order, check the last one */
+		worker = list_entry(gcwq->idle_list.prev, struct worker, entry);
+		expires = worker->last_active + IDLE_WORKER_TIMEOUT;
+
+		if (time_before(jiffies, expires))
+			mod_timer(&gcwq->idle_timer, expires);
+		else {
+			/* it's been idle for too long, wake up manager */
+			gcwq->flags |= GCWQ_MANAGE_WORKERS;
+			wake_up_worker(gcwq);
+		}
+	}
+
+	spin_unlock_irq(&gcwq->lock);
+}
+
+static bool send_mayday(struct work_struct *work)
+{
+	struct cpu_workqueue_struct *cwq = get_work_cwq(work);
+	struct workqueue_struct *wq = cwq->wq;
+
+	if (!(wq->flags & WQ_RESCUER))
+		return false;
+
+	/* mayday mayday mayday */
+	if (!cpumask_test_and_set_cpu(cwq->gcwq->cpu, wq->mayday_mask))
+		wake_up_process(wq->rescuer->task);
+	return true;
+}
+
+static void gcwq_mayday_timeout(unsigned long __gcwq)
+{
+	struct global_cwq *gcwq = (void *)__gcwq;
+	struct work_struct *work;
+
+	spin_lock_irq(&gcwq->lock);
+
+	if (need_to_create_worker(gcwq)) {
+		/*
+		 * We've been trying to create a new worker but
+		 * haven't been successful.  We might be hitting an
+		 * allocation deadlock.  Send distress signals to
+		 * rescuers.
+		 */
+		list_for_each_entry(work, &gcwq->worklist, entry)
+			send_mayday(work);
+	}
+
+	spin_unlock_irq(&gcwq->lock);
+
+	mod_timer(&gcwq->mayday_timer, jiffies + MAYDAY_INTERVAL);
+}
+
+/**
+ * maybe_create_worker - create a new worker if necessary
+ * @gcwq: gcwq to create a new worker for
+ *
+ * Create a new worker for @gcwq if necessary.  @gcwq is guaranteed to
+ * have at least one idle worker on return from this function.  If
+ * creating a new worker takes longer than MAYDAY_INTERVAL, mayday is
+ * sent to all rescuers with works scheduled on @gcwq to resolve
+ * possible allocation deadlock.
+ *
+ * On return, need_to_create_worker() is guaranteed to be false and
+ * may_start_working() true.
+ *
+ * LOCKING:
+ * spin_lock_irq(gcwq->lock) which may be released and regrabbed
+ * multiple times.  Does GFP_KERNEL allocations.  Called only from
+ * manager.
+ *
+ * RETURNS:
+ * false if no action was taken and gcwq->lock stayed locked, true
+ * otherwise.
+ */
+static bool maybe_create_worker(struct global_cwq *gcwq)
+{
+	if (!need_to_create_worker(gcwq))
+		return false;
+restart:
+	/* if we don't make progress in MAYDAY_INITIAL_TIMEOUT, call for help */
+	mod_timer(&gcwq->mayday_timer, jiffies + MAYDAY_INITIAL_TIMEOUT);
+
+	while (true) {
+		struct worker *worker;
+
+		spin_unlock_irq(&gcwq->lock);
+
+		worker = create_worker(gcwq, true);
+		if (worker) {
+			del_timer_sync(&gcwq->mayday_timer);
+			spin_lock_irq(&gcwq->lock);
+			start_worker(worker);
+			BUG_ON(need_to_create_worker(gcwq));
+			return true;
+		}
+
+		if (!need_to_create_worker(gcwq))
+			break;
+
+		spin_unlock_irq(&gcwq->lock);
+		__set_current_state(TASK_INTERRUPTIBLE);
+		schedule_timeout(CREATE_COOLDOWN);
+		spin_lock_irq(&gcwq->lock);
+		if (!need_to_create_worker(gcwq))
+			break;
+	}
+
+	spin_unlock_irq(&gcwq->lock);
+	del_timer_sync(&gcwq->mayday_timer);
+	spin_lock_irq(&gcwq->lock);
+	if (need_to_create_worker(gcwq))
+		goto restart;
+	return true;
+}
+
+/**
+ * maybe_destroy_worker - destroy workers which have been idle for a while
+ * @gcwq: gcwq to destroy workers for
+ *
+ * Destroy @gcwq workers which have been idle for longer than
+ * IDLE_WORKER_TIMEOUT.
+ *
+ * LOCKING:
+ * spin_lock_irq(gcwq->lock) which may be released and regrabbed
+ * multiple times.  Called only from manager.
+ *
+ * RETURNS:
+ * false if no action was taken and gcwq->lock stayed locked, true
+ * otherwise.
+ */
+static bool maybe_destroy_workers(struct global_cwq *gcwq)
+{
+	bool ret = false;
+
+	while (too_many_workers(gcwq)) {
+		struct worker *worker;
+		unsigned long expires;
+
+		worker = list_entry(gcwq->idle_list.prev, struct worker, entry);
+		expires = worker->last_active + IDLE_WORKER_TIMEOUT;
+
+		if (time_before(jiffies, expires)) {
+			mod_timer(&gcwq->idle_timer, expires);
+			break;
+		}
+
+		destroy_worker(worker);
+		ret = true;
+	}
+
+	return ret;
+}
+
+/**
+ * manage_workers - manage worker pool
+ * @worker: self
+ *
+ * Assume the manager role and manage gcwq worker pool @worker belongs
+ * to.  At any given time, there can be only zero or one manager per
+ * gcwq.  The exclusion is handled automatically by this function.
+ *
+ * The caller can safely start processing works on false return.  On
+ * true return, it's guaranteed that need_to_create_worker() is false
+ * and may_start_working() is true.
+ *
+ * CONTEXT:
+ * spin_lock_irq(gcwq->lock) which may be released and regrabbed
+ * multiple times.  Does GFP_KERNEL allocations.
+ *
+ * RETURNS:
+ * false if no action was taken and gcwq->lock stayed locked, true if
+ * some action was taken.
+ */
+static bool manage_workers(struct worker *worker)
+{
+	struct global_cwq *gcwq = worker->gcwq;
+	bool ret = false;
+
+	if (gcwq->flags & GCWQ_MANAGING_WORKERS)
+		return ret;
+
+	gcwq->flags &= ~GCWQ_MANAGE_WORKERS;
+	gcwq->flags |= GCWQ_MANAGING_WORKERS;
+
+	/*
+	 * Destroy and then create so that may_start_working() is true
+	 * on return.
+	 */
+	ret |= maybe_destroy_workers(gcwq);
+	ret |= maybe_create_worker(gcwq);
+
+	gcwq->flags &= ~GCWQ_MANAGING_WORKERS;
+
+	/*
+	 * The trustee might be waiting to take over the manager
+	 * position, tell it we're done.
+	 */
+	if (unlikely(gcwq->trustee))
+		wake_up_all(&gcwq->trustee_wait);
+
+	return ret;
+}
+
 /**
  * move_linked_works - move linked works to a list
  * @work: start of series of works to be scheduled
@@ -1169,24 +1680,39 @@ static void process_scheduled_works(struct worker *worker)
  * worker_thread - the worker thread function
  * @__worker: self
  *
- * The cwq worker thread function.
+ * The gcwq worker thread function.  There's a single dynamic pool of
+ * these per each cpu.  These workers process all works regardless of
+ * their specific target workqueue.  The only exception is works which
+ * belong to workqueues with a rescuer which will be explained in
+ * rescuer_thread().
  */
 static int worker_thread(void *__worker)
 {
 	struct worker *worker = __worker;
 	struct global_cwq *gcwq = worker->gcwq;
 
+	/* tell the scheduler that this is a workqueue worker */
+	worker->task->flags |= PF_WQ_WORKER;
 woke_up:
 	spin_lock_irq(&gcwq->lock);
 
 	/* DIE can be set only while we're idle, checking here is enough */
 	if (worker->flags & WORKER_DIE) {
 		spin_unlock_irq(&gcwq->lock);
+		worker->task->flags &= ~PF_WQ_WORKER;
 		return 0;
 	}
 
 	worker_leave_idle(worker);
 recheck:
+	/* no more worker necessary? */
+	if (!need_more_worker(gcwq))
+		goto sleep;
+
+	/* do we need to manage? */
+	if (unlikely(!may_start_working(gcwq)) && manage_workers(worker))
+		goto recheck;
+
 	/*
 	 * ->scheduled list can only be filled while a worker is
 	 * preparing to process a work or actually processing it.
@@ -1194,27 +1720,18 @@ recheck:
 	 */
 	BUG_ON(!list_empty(&worker->scheduled));
 
-	while (!list_empty(&gcwq->worklist)) {
+	/*
+	 * When control reaches this point, we're guaranteed to have
+	 * at least one idle worker or that someone else has already
+	 * assumed the manager role.
+	 */
+	worker_clr_flags(worker, WORKER_PREP);
+
+	do {
 		struct work_struct *work =
 			list_first_entry(&gcwq->worklist,
 					 struct work_struct, entry);
 
-		/*
-		 * The following is a rather inefficient way to close
-		 * race window against cpu hotplug operations.  Will
-		 * be replaced soon.
-		 */
-		if (unlikely(!(worker->flags & WORKER_ROGUE) &&
-			     !cpumask_equal(&worker->task->cpus_allowed,
-					    get_cpu_mask(gcwq->cpu)))) {
-			spin_unlock_irq(&gcwq->lock);
-			set_cpus_allowed_ptr(worker->task,
-					     get_cpu_mask(gcwq->cpu));
-			cpu_relax();
-			spin_lock_irq(&gcwq->lock);
-			goto recheck;
-		}
-
 		if (likely(!(*work_data_bits(work) & WORK_STRUCT_LINKED))) {
 			/* optimization path, not strictly necessary */
 			process_one_work(worker, work);
@@ -1224,13 +1741,19 @@ recheck:
 			move_linked_works(work, &worker->scheduled, NULL);
 			process_scheduled_works(worker);
 		}
-	}
+	} while (keep_working(gcwq));
+
+	worker_set_flags(worker, WORKER_PREP, false);
 
+	if (unlikely(need_to_manage_workers(gcwq)) && manage_workers(worker))
+		goto recheck;
+sleep:
 	/*
-	 * gcwq->lock is held and there's no work to process, sleep.
-	 * Workers are woken up only while holding gcwq->lock, so
-	 * setting the current state before releasing gcwq->lock is
-	 * enough to prevent losing any event.
+	 * gcwq->lock is held and there's no work to process and no
+	 * need to manage, sleep.  Workers are woken up only while
+	 * holding gcwq->lock or from local cpu, so setting the
+	 * current state before releasing gcwq->lock is enough to
+	 * prevent losing any event.
 	 */
 	worker_enter_idle(worker);
 	__set_current_state(TASK_INTERRUPTIBLE);
@@ -1239,6 +1762,68 @@ recheck:
 	goto woke_up;
 }
 
+/**
+ * rescuer_thread - the rescuer thread function
+ * @__wq: the associated workqueue
+ *
+ * Workqueue rescuer thread function.  There's one rescuer for each
+ * workqueue which has WQ_RESCUER set.
+ *
+ * Regular work processing on a gcwq may block trying to create a new
+ * worker which uses GFP_KERNEL allocation which has slight chance of
+ * developing into deadlock if some works currently on the same queue
+ * need to be processed to satisfy the GFP_KERNEL allocation.  This is
+ * the problem rescuer solves.
+ *
+ * When such condition is possible, the gcwq summons rescuers of all
+ * workqueues which have works queued on the gcwq and let them process
+ * those works so that forward progress can be guaranteed.
+ *
+ * This should happen rarely.
+ */
+static int rescuer_thread(void *__wq)
+{
+	struct workqueue_struct *wq = __wq;
+	struct worker *rescuer = wq->rescuer;
+	struct list_head *scheduled = &rescuer->scheduled;
+	unsigned int cpu;
+
+	set_user_nice(current, RESCUER_NICE_LEVEL);
+repeat:
+	set_current_state(TASK_INTERRUPTIBLE);
+
+	if (kthread_should_stop())
+		return 0;
+
+	for_each_cpu(cpu, wq->mayday_mask) {
+		struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
+		struct global_cwq *gcwq = cwq->gcwq;
+		struct work_struct *work, *n;
+
+		__set_current_state(TASK_RUNNING);
+		cpumask_clear_cpu(cpu, wq->mayday_mask);
+
+		/* migrate to the target cpu if possible */
+		rescuer->gcwq = gcwq;
+		worker_maybe_bind_and_lock(rescuer);
+
+		/*
+		 * Slurp in all works issued via this workqueue and
+		 * process'em.
+		 */
+		BUG_ON(!list_empty(&rescuer->scheduled));
+		list_for_each_entry_safe(work, n, &gcwq->worklist, entry)
+			if (get_work_cwq(work) == cwq)
+				move_linked_works(work, scheduled, &n);
+
+		process_scheduled_works(rescuer);
+		spin_unlock_irq(&gcwq->lock);
+	}
+
+	schedule();
+	goto repeat;
+}
+
 struct wq_barrier {
 	struct work_struct	work;
 	struct completion	done;
@@ -1998,7 +2583,6 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
 						const char *lock_name)
 {
 	struct workqueue_struct *wq;
-	bool failed = false;
 	unsigned int cpu;
 
 	max_active = clamp_val(max_active, 1, INT_MAX);
@@ -2023,13 +2607,6 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
 	lockdep_init_map(&wq->lockdep_map, lock_name, key, 0);
 	INIT_LIST_HEAD(&wq->list);
 
-	cpu_maps_update_begin();
-	/*
-	 * We must initialize cwqs for each possible cpu even if we
-	 * are going to call destroy_workqueue() finally. Otherwise
-	 * cpu_up() can hit the uninitialized cwq once we drop the
-	 * lock.
-	 */
 	for_each_possible_cpu(cpu) {
 		struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
 		struct global_cwq *gcwq = get_gcwq(cpu);
@@ -2040,14 +2617,25 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
 		cwq->flush_color = -1;
 		cwq->max_active = max_active;
 		INIT_LIST_HEAD(&cwq->delayed_works);
+	}
 
-		if (failed)
-			continue;
-		cwq->worker = create_worker(gcwq, cpu_online(cpu));
-		if (cwq->worker)
-			start_worker(cwq->worker);
-		else
-			failed = true;
+	if (flags & WQ_RESCUER) {
+		struct worker *rescuer;
+
+		if (!alloc_cpumask_var(&wq->mayday_mask, GFP_KERNEL))
+			goto err;
+
+		wq->rescuer = rescuer = alloc_worker();
+		if (!rescuer)
+			goto err;
+
+		rescuer->task = kthread_create(rescuer_thread, wq, "%s", name);
+		if (IS_ERR(rescuer->task))
+			goto err;
+
+		wq->rescuer = rescuer;
+		rescuer->task->flags |= PF_THREAD_BOUND;
+		wake_up_process(rescuer->task);
 	}
 
 	/*
@@ -2065,16 +2653,12 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
 
 	spin_unlock(&workqueue_lock);
 
-	cpu_maps_update_done();
-
-	if (failed) {
-		destroy_workqueue(wq);
-		wq = NULL;
-	}
 	return wq;
 err:
 	if (wq) {
 		free_cwqs(wq->cpu_wq);
+		free_cpumask_var(wq->mayday_mask);
+		kfree(wq->rescuer);
 		kfree(wq);
 	}
 	return NULL;
@@ -2097,42 +2681,26 @@ void destroy_workqueue(struct workqueue_struct *wq)
 	 * wq list is used to freeze wq, remove from list after
 	 * flushing is complete in case freeze races us.
 	 */
-	cpu_maps_update_begin();
 	spin_lock(&workqueue_lock);
 	list_del(&wq->list);
 	spin_unlock(&workqueue_lock);
-	cpu_maps_update_done();
 
+	/* sanity check */
 	for_each_possible_cpu(cpu) {
 		struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
-		struct global_cwq *gcwq = cwq->gcwq;
 		int i;
 
-		if (cwq->worker) {
-		retry:
-			spin_lock_irq(&gcwq->lock);
-			/*
-			 * Worker can only be destroyed while idle.
-			 * Wait till it becomes idle.  This is ugly
-			 * and prone to starvation.  It will go away
-			 * once dynamic worker pool is implemented.
-			 */
-			if (!(cwq->worker->flags & WORKER_IDLE)) {
-				spin_unlock_irq(&gcwq->lock);
-				msleep(100);
-				goto retry;
-			}
-			destroy_worker(cwq->worker);
-			cwq->worker = NULL;
-			spin_unlock_irq(&gcwq->lock);
-		}
-
 		for (i = 0; i < WORK_NR_COLORS; i++)
 			BUG_ON(cwq->nr_in_flight[i]);
 		BUG_ON(cwq->nr_active);
 		BUG_ON(!list_empty(&cwq->delayed_works));
 	}
 
+	if (wq->flags & WQ_RESCUER) {
+		kthread_stop(wq->rescuer->task);
+		free_cpumask_var(wq->mayday_mask);
+	}
+
 	free_cwqs(wq->cpu_wq);
 	kfree(wq);
 }
@@ -2141,10 +2709,18 @@ EXPORT_SYMBOL_GPL(destroy_workqueue);
 /*
  * CPU hotplug.
  *
- * CPU hotplug is implemented by allowing cwqs to be detached from
- * CPU, running with unbound workers and allowing them to be
- * reattached later if the cpu comes back online.  A separate thread
- * is created to govern cwqs in such state and is called the trustee.
+ * There are two challenges in supporting CPU hotplug.  Firstly, there
+ * are a lot of assumptions on strong associations among work, cwq and
+ * gcwq which make migrating pending and scheduled works very
+ * difficult to implement without impacting hot paths.  Secondly,
+ * gcwqs serve mix of short, long and very long running works making
+ * blocked draining impractical.
+ *
+ * This is solved by allowing a gcwq to be detached from CPU, running
+ * it with unbound (rogue) workers and allowing it to be reattached
+ * later if the cpu comes back online.  A separate thread is created
+ * to govern a gcwq in such state and is called the trustee of the
+ * gcwq.
  *
  * Trustee states and their descriptions.
  *
@@ -2152,11 +2728,12 @@ EXPORT_SYMBOL_GPL(destroy_workqueue);
  *		new trustee is started with this state.
  *
  * IN_CHARGE	Once started, trustee will enter this state after
- *		making all existing workers rogue.  DOWN_PREPARE waits
- *		for trustee to enter this state.  After reaching
- *		IN_CHARGE, trustee tries to execute the pending
- *		worklist until it's empty and the state is set to
- *		BUTCHER, or the state is set to RELEASE.
+ *		assuming the manager role and making all existing
+ *		workers rogue.  DOWN_PREPARE waits for trustee to
+ *		enter this state.  After reaching IN_CHARGE, trustee
+ *		tries to execute the pending worklist until it's empty
+ *		and the state is set to BUTCHER, or the state is set
+ *		to RELEASE.
  *
  * BUTCHER	Command state which is set by the cpu callback after
  *		the cpu has went down.  Once this state is set trustee
@@ -2167,7 +2744,9 @@ EXPORT_SYMBOL_GPL(destroy_workqueue);
  * RELEASE	Command state which is set by the cpu callback if the
  *		cpu down has been canceled or it has come online
  *		again.  After recognizing this state, trustee stops
- *		trying to drain or butcher and transits to DONE.
+ *		trying to drain or butcher and clears ROGUE, rebinds
+ *		all remaining workers back to the cpu and releases
+ *		manager role.
  *
  * DONE		Trustee will enter this state after BUTCHER or RELEASE
  *		is complete.
@@ -2233,17 +2812,24 @@ static int __cpuinit trustee_thread(void *__gcwq)
 {
 	struct global_cwq *gcwq = __gcwq;
 	struct worker *worker;
+	struct work_struct *work;
 	struct hlist_node *pos;
+	long rc;
 	int i;
 
 	BUG_ON(gcwq->cpu != smp_processor_id());
 
 	spin_lock_irq(&gcwq->lock);
 	/*
-	 * Make all workers rogue.  Trustee must be bound to the
-	 * target cpu and can't be cancelled.
+	 * Claim the manager position and make all workers rogue.
+	 * Trustee must be bound to the target cpu and can't be
+	 * cancelled.
 	 */
 	BUG_ON(gcwq->cpu != smp_processor_id());
+	rc = trustee_wait_event(!(gcwq->flags & GCWQ_MANAGING_WORKERS));
+	BUG_ON(rc < 0);
+
+	gcwq->flags |= GCWQ_MANAGING_WORKERS;
 
 	list_for_each_entry(worker, &gcwq->idle_list, entry)
 		worker_set_flags(worker, WORKER_ROGUE, false);
@@ -2251,6 +2837,28 @@ static int __cpuinit trustee_thread(void *__gcwq)
 	for_each_busy_worker(worker, i, pos, gcwq)
 		worker_set_flags(worker, WORKER_ROGUE, false);
 
+	/*
+	 * Call schedule() so that we cross rq->lock and thus can
+	 * guarantee sched callbacks see the rogue flag.  This is
+	 * necessary as scheduler callbacks may be invoked from other
+	 * cpus.
+	 */
+	spin_unlock_irq(&gcwq->lock);
+	schedule();
+	spin_lock_irq(&gcwq->lock);
+
+	/*
+	 * Sched callbacks are disabled now.  gcwq->nr_running should
+	 * be zero and will stay that way, making need_more_worker()
+	 * and keep_working() always return true as long as the
+	 * worklist is not empty.
+	 */
+	WARN_ON_ONCE(atomic_read(get_gcwq_nr_running(gcwq->cpu)) != 0);
+
+	spin_unlock_irq(&gcwq->lock);
+	del_timer_sync(&gcwq->idle_timer);
+	spin_lock_irq(&gcwq->lock);
+
 	/*
 	 * We're now in charge.  Notify and proceed to drain.  We need
 	 * to keep the gcwq running during the whole CPU down
@@ -2263,18 +2871,90 @@ static int __cpuinit trustee_thread(void *__gcwq)
 	/*
 	 * The original cpu is in the process of dying and may go away
 	 * anytime now.  When that happens, we and all workers would
-	 * be migrated to other cpus.  Try draining any left work.
-	 * Note that if the gcwq is frozen, there may be frozen works
-	 * in freezeable cwqs.  Don't declare completion while frozen.
+	 * be migrated to other cpus.  Try draining any left work.  We
+	 * want to get it over with ASAP - spam rescuers, wake up as
+	 * many idlers as necessary and create new ones till the
+	 * worklist is empty.  Note that if the gcwq is frozen, there
+	 * may be frozen works in freezeable cwqs.  Don't declare
+	 * completion while frozen.
 	 */
 	while (gcwq->nr_workers != gcwq->nr_idle ||
 	       gcwq->flags & GCWQ_FREEZING ||
 	       gcwq->trustee_state == TRUSTEE_IN_CHARGE) {
+		int nr_works = 0;
+
+		list_for_each_entry(work, &gcwq->worklist, entry) {
+			send_mayday(work);
+			nr_works++;
+		}
+
+		list_for_each_entry(worker, &gcwq->idle_list, entry) {
+			if (!nr_works--)
+				break;
+			wake_up_process(worker->task);
+		}
+
+		if (need_to_create_worker(gcwq)) {
+			spin_unlock_irq(&gcwq->lock);
+			worker = create_worker(gcwq, false);
+			spin_lock_irq(&gcwq->lock);
+			if (worker) {
+				worker_set_flags(worker, WORKER_ROGUE, false);
+				start_worker(worker);
+			}
+		}
+
 		/* give a breather */
 		if (trustee_wait_event_timeout(false, TRUSTEE_COOLDOWN) < 0)
 			break;
 	}
 
+	/*
+	 * Either all works have been scheduled and cpu is down, or
+	 * cpu down has already been canceled.  Wait for and butcher
+	 * all workers till we're canceled.
+	 */
+	do {
+		rc = trustee_wait_event(!list_empty(&gcwq->idle_list));
+		while (!list_empty(&gcwq->idle_list))
+			destroy_worker(list_first_entry(&gcwq->idle_list,
+							struct worker, entry));
+	} while (gcwq->nr_workers && rc >= 0);
+
+	/*
+	 * At this point, either draining has completed and no worker
+	 * is left, or cpu down has been canceled or the cpu is being
+	 * brought back up.  There shouldn't be any idle one left.
+	 * Tell the remaining busy ones to rebind once it finishes the
+	 * currently scheduled works by scheduling the rebind_work.
+	 */
+	WARN_ON(!list_empty(&gcwq->idle_list));
+
+	for_each_busy_worker(worker, i, pos, gcwq) {
+		struct work_struct *rebind_work = &worker->rebind_work;
+
+		/*
+		 * Rebind_work may race with future cpu hotplug
+		 * operations.  Use a separate flag to mark that
+		 * rebinding is scheduled.
+		 */
+		worker_set_flags(worker, WORKER_REBIND, false);
+		worker_clr_flags(worker, WORKER_ROGUE);
+
+		/* queue rebind_work, wq doesn't matter, use the default one */
+		if (test_and_set_bit(WORK_STRUCT_PENDING_BIT,
+				     work_data_bits(rebind_work)))
+			continue;
+
+		debug_work_activate(rebind_work);
+		insert_work(get_cwq(gcwq->cpu, keventd_wq), rebind_work,
+			    worker->scheduled.next,
+			    work_color_to_flags(WORK_NO_COLOR));
+	}
+
+	/* relinquish manager role */
+	gcwq->flags &= ~GCWQ_MANAGING_WORKERS;
+
 	/* notify completion */
 	gcwq->trustee = NULL;
 	gcwq->trustee_state = TRUSTEE_DONE;
@@ -2313,10 +2993,8 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
 	unsigned int cpu = (unsigned long)hcpu;
 	struct global_cwq *gcwq = get_gcwq(cpu);
 	struct task_struct *new_trustee = NULL;
-	struct worker *worker;
-	struct hlist_node *pos;
+	struct worker *uninitialized_var(new_worker);
 	unsigned long flags;
-	int i;
 
 	action &= ~CPU_TASKS_FROZEN;
 
@@ -2327,6 +3005,15 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
 		if (IS_ERR(new_trustee))
 			return notifier_from_errno(PTR_ERR(new_trustee));
 		kthread_bind(new_trustee, cpu);
+		/* fall through */
+	case CPU_UP_PREPARE:
+		BUG_ON(gcwq->first_idle);
+		new_worker = create_worker(gcwq, false);
+		if (!new_worker) {
+			if (new_trustee)
+				kthread_stop(new_trustee);
+			return NOTIFY_BAD;
+		}
 	}
 
 	/* some are called w/ irq disabled, don't disturb irq status */
@@ -2340,26 +3027,50 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
 		gcwq->trustee_state = TRUSTEE_START;
 		wake_up_process(gcwq->trustee);
 		wait_trustee_state(gcwq, TRUSTEE_IN_CHARGE);
+		/* fall through */
+	case CPU_UP_PREPARE:
+		BUG_ON(gcwq->first_idle);
+		gcwq->first_idle = new_worker;
+		break;
+
+	case CPU_DYING:
+		/*
+		 * Before this, the trustee and all workers except for
+		 * the ones which are still executing works from
+		 * before the last CPU down must be on the cpu.  After
+		 * this, they'll all be diasporas.
+		 */
+		gcwq->flags |= GCWQ_DISASSOCIATED;
 		break;
 
 	case CPU_POST_DEAD:
 		gcwq->trustee_state = TRUSTEE_BUTCHER;
+		/* fall through */
+	case CPU_UP_CANCELED:
+		destroy_worker(gcwq->first_idle);
+		gcwq->first_idle = NULL;
 		break;
 
 	case CPU_DOWN_FAILED:
 	case CPU_ONLINE:
+		gcwq->flags &= ~GCWQ_DISASSOCIATED;
 		if (gcwq->trustee_state != TRUSTEE_DONE) {
 			gcwq->trustee_state = TRUSTEE_RELEASE;
 			wake_up_process(gcwq->trustee);
 			wait_trustee_state(gcwq, TRUSTEE_DONE);
 		}
 
-		/* clear ROGUE from all workers */
-		list_for_each_entry(worker, &gcwq->idle_list, entry)
-			worker_clr_flags(worker, WORKER_ROGUE);
-
-		for_each_busy_worker(worker, i, pos, gcwq)
-			worker_clr_flags(worker, WORKER_ROGUE);
+		/*
+		 * Trustee is done and there might be no worker left.
+		 * Put the first_idle in and request a real manager to
+		 * take a look.
+		 */
+		spin_unlock_irq(&gcwq->lock);
+		kthread_bind(gcwq->first_idle->task, cpu);
+		spin_lock_irq(&gcwq->lock);
+		gcwq->flags |= GCWQ_MANAGE_WORKERS;
+		start_worker(gcwq->first_idle);
+		gcwq->first_idle = NULL;
 		break;
 	}
 
@@ -2548,10 +3259,10 @@ void thaw_workqueues(void)
 			if (wq->single_cpu == gcwq->cpu &&
 			    !cwq->nr_active && list_empty(&cwq->delayed_works))
 				cwq_unbind_single_cpu(cwq);
-
-			wake_up_process(cwq->worker->task);
 		}
 
+		wake_up_worker(gcwq);
+
 		spin_unlock_irq(&gcwq->lock);
 	}
 
@@ -2588,12 +3299,31 @@ void __init init_workqueues(void)
 		for (i = 0; i < BUSY_WORKER_HASH_SIZE; i++)
 			INIT_HLIST_HEAD(&gcwq->busy_hash[i]);
 
+		init_timer_deferrable(&gcwq->idle_timer);
+		gcwq->idle_timer.function = idle_worker_timeout;
+		gcwq->idle_timer.data = (unsigned long)gcwq;
+
+		setup_timer(&gcwq->mayday_timer, gcwq_mayday_timeout,
+			    (unsigned long)gcwq);
+
 		ida_init(&gcwq->worker_ida);
 
 		gcwq->trustee_state = TRUSTEE_DONE;
 		init_waitqueue_head(&gcwq->trustee_wait);
 	}
 
+	/* create the initial worker */
+	for_each_online_cpu(cpu) {
+		struct global_cwq *gcwq = get_gcwq(cpu);
+		struct worker *worker;
+
+		worker = create_worker(gcwq, true);
+		BUG_ON(!worker);
+		spin_lock_irq(&gcwq->lock);
+		start_worker(worker);
+		spin_unlock_irq(&gcwq->lock);
+	}
+
 	keventd_wq = create_workqueue("events");
 	BUG_ON(!keventd_wq);
 }
diff --git a/kernel/workqueue_sched.h b/kernel/workqueue_sched.h
index af040babb74..2d10fc98dc7 100644
--- a/kernel/workqueue_sched.h
+++ b/kernel/workqueue_sched.h
@@ -4,13 +4,6 @@
  * Scheduler hooks for concurrency managed workqueue.  Only to be
  * included from sched.c and workqueue.c.
  */
-static inline void wq_worker_waking_up(struct task_struct *task,
-				       unsigned int cpu)
-{
-}
-
-static inline struct task_struct *wq_worker_sleeping(struct task_struct *task,
-						     unsigned int cpu)
-{
-	return NULL;
-}
+void wq_worker_waking_up(struct task_struct *task, unsigned int cpu);
+struct task_struct *wq_worker_sleeping(struct task_struct *task,
+				       unsigned int cpu);
-- 
cgit v1.2.3-70-g09d2


From b71ab8c2025caef8db719aa41af0ed735dc543cd Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 29 Jun 2010 10:07:14 +0200
Subject: workqueue: increase max_active of keventd and kill
 current_is_keventd()

Define WQ_MAX_ACTIVE and create keventd with max_active set to half of
it which means that keventd now can process upto WQ_MAX_ACTIVE / 2 - 1
works concurrently.  Unless some combination can result in dependency
loop longer than max_active, deadlock won't happen and thus it's
unnecessary to check whether current_is_keventd() before trying to
schedule a work.  Kill current_is_keventd().

(Lockdep annotations are broken.  We need lock_map_acquire_read_norecurse())

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Oleg Nesterov <oleg@redhat.com>
---
 arch/ia64/kernel/smpboot.c |  2 +-
 arch/x86/kernel/smpboot.c  |  2 +-
 include/linux/workqueue.h  |  4 ++-
 kernel/workqueue.c         | 63 ++++++++++------------------------------------
 4 files changed, 18 insertions(+), 53 deletions(-)

(limited to 'include')

diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c
index 6a1380e90f8..99dcc85193c 100644
--- a/arch/ia64/kernel/smpboot.c
+++ b/arch/ia64/kernel/smpboot.c
@@ -519,7 +519,7 @@ do_boot_cpu (int sapicid, int cpu)
 	/*
 	 * We can't use kernel_thread since we must avoid to reschedule the child.
 	 */
-	if (!keventd_up() || current_is_keventd())
+	if (!keventd_up())
 		c_idle.work.func(&c_idle.work);
 	else {
 		schedule_work(&c_idle.work);
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index c4f33b2e77d..4d90f376e98 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -735,7 +735,7 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu)
 		goto do_rest;
 	}
 
-	if (!keventd_up() || current_is_keventd())
+	if (!keventd_up())
 		c_idle.work.func(&c_idle.work);
 	else {
 		schedule_work(&c_idle.work);
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index b8f4ec45c40..33e24e734d5 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -227,6 +227,9 @@ enum {
 	WQ_SINGLE_CPU		= 1 << 1, /* only single cpu at a time */
 	WQ_NON_REENTRANT	= 1 << 2, /* guarantee non-reentrance */
 	WQ_RESCUER		= 1 << 3, /* has an rescue worker */
+
+	WQ_MAX_ACTIVE		= 512,	  /* I like 512, better ideas? */
+	WQ_DFL_ACTIVE		= WQ_MAX_ACTIVE / 2,
 };
 
 extern struct workqueue_struct *
@@ -280,7 +283,6 @@ extern int schedule_delayed_work(struct delayed_work *work, unsigned long delay)
 extern int schedule_delayed_work_on(int cpu, struct delayed_work *work,
 					unsigned long delay);
 extern int schedule_on_each_cpu(work_func_t func);
-extern int current_is_keventd(void);
 extern int keventd_up(void);
 
 extern void init_workqueues(void);
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 0ad46523b42..4190e84cf99 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -2398,7 +2398,6 @@ EXPORT_SYMBOL(schedule_delayed_work_on);
 int schedule_on_each_cpu(work_func_t func)
 {
 	int cpu;
-	int orig = -1;
 	struct work_struct *works;
 
 	works = alloc_percpu(struct work_struct);
@@ -2407,23 +2406,12 @@ int schedule_on_each_cpu(work_func_t func)
 
 	get_online_cpus();
 
-	/*
-	 * When running in keventd don't schedule a work item on
-	 * itself.  Can just call directly because the work queue is
-	 * already bound.  This also is faster.
-	 */
-	if (current_is_keventd())
-		orig = raw_smp_processor_id();
-
 	for_each_online_cpu(cpu) {
 		struct work_struct *work = per_cpu_ptr(works, cpu);
 
 		INIT_WORK(work, func);
-		if (cpu != orig)
-			schedule_work_on(cpu, work);
+		schedule_work_on(cpu, work);
 	}
-	if (orig >= 0)
-		func(per_cpu_ptr(works, orig));
 
 	for_each_online_cpu(cpu)
 		flush_work(per_cpu_ptr(works, cpu));
@@ -2494,41 +2482,6 @@ int keventd_up(void)
 	return keventd_wq != NULL;
 }
 
-int current_is_keventd(void)
-{
-	bool found = false;
-	unsigned int cpu;
-
-	/*
-	 * There no longer is one-to-one relation between worker and
-	 * work queue and a worker task might be unbound from its cpu
-	 * if the cpu was offlined.  Match all busy workers.  This
-	 * function will go away once dynamic pool is implemented.
-	 */
-	for_each_possible_cpu(cpu) {
-		struct global_cwq *gcwq = get_gcwq(cpu);
-		struct worker *worker;
-		struct hlist_node *pos;
-		unsigned long flags;
-		int i;
-
-		spin_lock_irqsave(&gcwq->lock, flags);
-
-		for_each_busy_worker(worker, i, pos, gcwq) {
-			if (worker->task == current) {
-				found = true;
-				break;
-			}
-		}
-
-		spin_unlock_irqrestore(&gcwq->lock, flags);
-		if (found)
-			break;
-	}
-
-	return found;
-}
-
 static struct cpu_workqueue_struct *alloc_cwqs(void)
 {
 	/*
@@ -2576,6 +2529,16 @@ static void free_cwqs(struct cpu_workqueue_struct *cwqs)
 #endif
 }
 
+static int wq_clamp_max_active(int max_active, const char *name)
+{
+	if (max_active < 1 || max_active > WQ_MAX_ACTIVE)
+		printk(KERN_WARNING "workqueue: max_active %d requested for %s "
+		       "is out of range, clamping between %d and %d\n",
+		       max_active, name, 1, WQ_MAX_ACTIVE);
+
+	return clamp_val(max_active, 1, WQ_MAX_ACTIVE);
+}
+
 struct workqueue_struct *__create_workqueue_key(const char *name,
 						unsigned int flags,
 						int max_active,
@@ -2585,7 +2548,7 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
 	struct workqueue_struct *wq;
 	unsigned int cpu;
 
-	max_active = clamp_val(max_active, 1, INT_MAX);
+	max_active = wq_clamp_max_active(max_active, name);
 
 	wq = kzalloc(sizeof(*wq), GFP_KERNEL);
 	if (!wq)
@@ -3324,6 +3287,6 @@ void __init init_workqueues(void)
 		spin_unlock_irq(&gcwq->lock);
 	}
 
-	keventd_wq = create_workqueue("events");
+	keventd_wq = __create_workqueue("events", 0, WQ_DFL_ACTIVE);
 	BUG_ON(!keventd_wq);
 }
-- 
cgit v1.2.3-70-g09d2


From d320c03830b17af64e4547075003b1eeb274bc6c Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 29 Jun 2010 10:07:14 +0200
Subject: workqueue: s/__create_workqueue()/alloc_workqueue()/, and add system
 workqueues

This patch makes changes to make new workqueue features available to
its users.

* Now that workqueue is more featureful, there should be a public
  workqueue creation function which takes paramters to control them.
  Rename __create_workqueue() to alloc_workqueue() and make 0
  max_active mean WQ_DFL_ACTIVE.  In the long run, all
  create_workqueue_*() will be converted over to alloc_workqueue().

* To further unify access interface, rename keventd_wq to system_wq
  and export it.

* Add system_long_wq and system_nrt_wq.  The former is to host long
  running works separately (so that flush_scheduled_work() dosen't
  take so long) and the latter guarantees any queued work item is
  never executed in parallel by multiple CPUs.  These will be used by
  future patches to update workqueue users.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h | 40 +++++++++++++++++++++++++++++-----------
 kernel/workqueue.c        | 42 +++++++++++++++++++++++++-----------------
 2 files changed, 54 insertions(+), 28 deletions(-)

(limited to 'include')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 33e24e734d5..48b7422f25a 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -232,12 +232,31 @@ enum {
 	WQ_DFL_ACTIVE		= WQ_MAX_ACTIVE / 2,
 };
 
+/*
+ * System-wide workqueues which are always present.
+ *
+ * system_wq is the one used by schedule[_delayed]_work[_on]().
+ * Multi-CPU multi-threaded.  There are users which expect relatively
+ * short queue flush time.  Don't queue works which can run for too
+ * long.
+ *
+ * system_long_wq is similar to system_wq but may host long running
+ * works.  Queue flushing might take relatively long.
+ *
+ * system_nrt_wq is non-reentrant and guarantees that any given work
+ * item is never executed in parallel by multiple CPUs.  Queue
+ * flushing might take relatively long.
+ */
+extern struct workqueue_struct *system_wq;
+extern struct workqueue_struct *system_long_wq;
+extern struct workqueue_struct *system_nrt_wq;
+
 extern struct workqueue_struct *
-__create_workqueue_key(const char *name, unsigned int flags, int max_active,
-		       struct lock_class_key *key, const char *lock_name);
+__alloc_workqueue_key(const char *name, unsigned int flags, int max_active,
+		      struct lock_class_key *key, const char *lock_name);
 
 #ifdef CONFIG_LOCKDEP
-#define __create_workqueue(name, flags, max_active)		\
+#define alloc_workqueue(name, flags, max_active)		\
 ({								\
 	static struct lock_class_key __key;			\
 	const char *__lock_name;				\
@@ -247,21 +266,20 @@ __create_workqueue_key(const char *name, unsigned int flags, int max_active,
 	else							\
 		__lock_name = #name;				\
 								\
-	__create_workqueue_key((name), (flags), (max_active),	\
-				&__key, __lock_name);		\
+	__alloc_workqueue_key((name), (flags), (max_active),	\
+			      &__key, __lock_name);		\
 })
 #else
-#define __create_workqueue(name, flags, max_active)		\
-	__create_workqueue_key((name), (flags), (max_active), NULL, NULL)
+#define alloc_workqueue(name, flags, max_active)		\
+	__alloc_workqueue_key((name), (flags), (max_active), NULL, NULL)
 #endif
 
 #define create_workqueue(name)					\
-	__create_workqueue((name), WQ_RESCUER, 1)
+	alloc_workqueue((name), WQ_RESCUER, 1)
 #define create_freezeable_workqueue(name)			\
-	__create_workqueue((name),				\
-			   WQ_FREEZEABLE | WQ_SINGLE_CPU | WQ_RESCUER, 1)
+	alloc_workqueue((name), WQ_FREEZEABLE | WQ_SINGLE_CPU | WQ_RESCUER, 1)
 #define create_singlethread_workqueue(name)			\
-	__create_workqueue((name), WQ_SINGLE_CPU | WQ_RESCUER, 1)
+	alloc_workqueue((name), WQ_SINGLE_CPU | WQ_RESCUER, 1)
 
 extern void destroy_workqueue(struct workqueue_struct *wq);
 
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 4190e84cf99..16ce617974d 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -210,6 +210,13 @@ struct workqueue_struct {
 #endif
 };
 
+struct workqueue_struct *system_wq __read_mostly;
+struct workqueue_struct *system_long_wq __read_mostly;
+struct workqueue_struct *system_nrt_wq __read_mostly;
+EXPORT_SYMBOL_GPL(system_wq);
+EXPORT_SYMBOL_GPL(system_long_wq);
+EXPORT_SYMBOL_GPL(system_nrt_wq);
+
 #define for_each_busy_worker(worker, i, pos, gcwq)			\
 	for (i = 0; i < BUSY_WORKER_HASH_SIZE; i++)			\
 		hlist_for_each_entry(worker, pos, &gcwq->busy_hash[i], hentry)
@@ -2306,8 +2313,6 @@ int cancel_delayed_work_sync(struct delayed_work *dwork)
 }
 EXPORT_SYMBOL(cancel_delayed_work_sync);
 
-static struct workqueue_struct *keventd_wq __read_mostly;
-
 /**
  * schedule_work - put work task in global workqueue
  * @work: job to be done
@@ -2321,7 +2326,7 @@ static struct workqueue_struct *keventd_wq __read_mostly;
  */
 int schedule_work(struct work_struct *work)
 {
-	return queue_work(keventd_wq, work);
+	return queue_work(system_wq, work);
 }
 EXPORT_SYMBOL(schedule_work);
 
@@ -2334,7 +2339,7 @@ EXPORT_SYMBOL(schedule_work);
  */
 int schedule_work_on(int cpu, struct work_struct *work)
 {
-	return queue_work_on(cpu, keventd_wq, work);
+	return queue_work_on(cpu, system_wq, work);
 }
 EXPORT_SYMBOL(schedule_work_on);
 
@@ -2349,7 +2354,7 @@ EXPORT_SYMBOL(schedule_work_on);
 int schedule_delayed_work(struct delayed_work *dwork,
 					unsigned long delay)
 {
-	return queue_delayed_work(keventd_wq, dwork, delay);
+	return queue_delayed_work(system_wq, dwork, delay);
 }
 EXPORT_SYMBOL(schedule_delayed_work);
 
@@ -2382,7 +2387,7 @@ EXPORT_SYMBOL(flush_delayed_work);
 int schedule_delayed_work_on(int cpu,
 			struct delayed_work *dwork, unsigned long delay)
 {
-	return queue_delayed_work_on(cpu, keventd_wq, dwork, delay);
+	return queue_delayed_work_on(cpu, system_wq, dwork, delay);
 }
 EXPORT_SYMBOL(schedule_delayed_work_on);
 
@@ -2447,7 +2452,7 @@ int schedule_on_each_cpu(work_func_t func)
  */
 void flush_scheduled_work(void)
 {
-	flush_workqueue(keventd_wq);
+	flush_workqueue(system_wq);
 }
 EXPORT_SYMBOL(flush_scheduled_work);
 
@@ -2479,7 +2484,7 @@ EXPORT_SYMBOL_GPL(execute_in_process_context);
 
 int keventd_up(void)
 {
-	return keventd_wq != NULL;
+	return system_wq != NULL;
 }
 
 static struct cpu_workqueue_struct *alloc_cwqs(void)
@@ -2539,15 +2544,16 @@ static int wq_clamp_max_active(int max_active, const char *name)
 	return clamp_val(max_active, 1, WQ_MAX_ACTIVE);
 }
 
-struct workqueue_struct *__create_workqueue_key(const char *name,
-						unsigned int flags,
-						int max_active,
-						struct lock_class_key *key,
-						const char *lock_name)
+struct workqueue_struct *__alloc_workqueue_key(const char *name,
+					       unsigned int flags,
+					       int max_active,
+					       struct lock_class_key *key,
+					       const char *lock_name)
 {
 	struct workqueue_struct *wq;
 	unsigned int cpu;
 
+	max_active = max_active ?: WQ_DFL_ACTIVE;
 	max_active = wq_clamp_max_active(max_active, name);
 
 	wq = kzalloc(sizeof(*wq), GFP_KERNEL);
@@ -2626,7 +2632,7 @@ err:
 	}
 	return NULL;
 }
-EXPORT_SYMBOL_GPL(__create_workqueue_key);
+EXPORT_SYMBOL_GPL(__alloc_workqueue_key);
 
 /**
  * destroy_workqueue - safely terminate a workqueue
@@ -2910,7 +2916,7 @@ static int __cpuinit trustee_thread(void *__gcwq)
 			continue;
 
 		debug_work_activate(rebind_work);
-		insert_work(get_cwq(gcwq->cpu, keventd_wq), rebind_work,
+		insert_work(get_cwq(gcwq->cpu, system_wq), rebind_work,
 			    worker->scheduled.next,
 			    work_color_to_flags(WORK_NO_COLOR));
 	}
@@ -3287,6 +3293,8 @@ void __init init_workqueues(void)
 		spin_unlock_irq(&gcwq->lock);
 	}
 
-	keventd_wq = __create_workqueue("events", 0, WQ_DFL_ACTIVE);
-	BUG_ON(!keventd_wq);
+	system_wq = alloc_workqueue("events", 0, 0);
+	system_long_wq = alloc_workqueue("events_long", 0, 0);
+	system_nrt_wq = alloc_workqueue("events_nrt", WQ_NON_REENTRANT, 0);
+	BUG_ON(!system_wq || !system_long_wq || !system_nrt_wq);
 }
-- 
cgit v1.2.3-70-g09d2


From dcd989cb73ab0f7b722d64ab6516f101d9f43f88 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 29 Jun 2010 10:07:14 +0200
Subject: workqueue: implement several utility APIs

Implement the following utility APIs.

 workqueue_set_max_active()	: adjust max_active of a wq
 workqueue_congested()		: test whether a wq is contested
 work_cpu()			: determine the last / current cpu of a work
 work_busy()			: query whether a work is busy

* Anton Blanchard fixed missing ret initialization in work_busy().

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Anton Blanchard <anton@samba.org>
---
 include/linux/workqueue.h |  11 ++++-
 kernel/workqueue.c        | 108 +++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 117 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 48b7422f25a..0a7f7972938 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -61,6 +61,10 @@ enum {
 	WORK_STRUCT_FLAG_MASK	= (1UL << WORK_STRUCT_FLAG_BITS) - 1,
 	WORK_STRUCT_WQ_DATA_MASK = ~WORK_STRUCT_FLAG_MASK,
 	WORK_STRUCT_NO_CPU	= NR_CPUS << WORK_STRUCT_FLAG_BITS,
+
+	/* bit mask for work_busy() return values */
+	WORK_BUSY_PENDING	= 1 << 0,
+	WORK_BUSY_RUNNING	= 1 << 1,
 };
 
 struct work_struct {
@@ -307,9 +311,14 @@ extern void init_workqueues(void);
 int execute_in_process_context(work_func_t fn, struct execute_work *);
 
 extern int flush_work(struct work_struct *work);
-
 extern int cancel_work_sync(struct work_struct *work);
 
+extern void workqueue_set_max_active(struct workqueue_struct *wq,
+				     int max_active);
+extern bool workqueue_congested(unsigned int cpu, struct workqueue_struct *wq);
+extern unsigned int work_cpu(struct work_struct *work);
+extern unsigned int work_busy(struct work_struct *work);
+
 /*
  * Kill off a pending schedule_delayed_work().  Note that the work callback
  * function may still be running on return from cancel_delayed_work(), unless
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 16ce617974d..c1aa65c2ff3 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -203,7 +203,7 @@ struct workqueue_struct {
 	cpumask_var_t		mayday_mask;	/* cpus requesting rescue */
 	struct worker		*rescuer;	/* I: rescue worker */
 
-	int			saved_max_active; /* I: saved cwq max_active */
+	int			saved_max_active; /* W: saved cwq max_active */
 	const char		*name;		/* I: workqueue name */
 #ifdef CONFIG_LOCKDEP
 	struct lockdep_map	lockdep_map;
@@ -2675,6 +2675,112 @@ void destroy_workqueue(struct workqueue_struct *wq)
 }
 EXPORT_SYMBOL_GPL(destroy_workqueue);
 
+/**
+ * workqueue_set_max_active - adjust max_active of a workqueue
+ * @wq: target workqueue
+ * @max_active: new max_active value.
+ *
+ * Set max_active of @wq to @max_active.
+ *
+ * CONTEXT:
+ * Don't call from IRQ context.
+ */
+void workqueue_set_max_active(struct workqueue_struct *wq, int max_active)
+{
+	unsigned int cpu;
+
+	max_active = wq_clamp_max_active(max_active, wq->name);
+
+	spin_lock(&workqueue_lock);
+
+	wq->saved_max_active = max_active;
+
+	for_each_possible_cpu(cpu) {
+		struct global_cwq *gcwq = get_gcwq(cpu);
+
+		spin_lock_irq(&gcwq->lock);
+
+		if (!(wq->flags & WQ_FREEZEABLE) ||
+		    !(gcwq->flags & GCWQ_FREEZING))
+			get_cwq(gcwq->cpu, wq)->max_active = max_active;
+
+		spin_unlock_irq(&gcwq->lock);
+	}
+
+	spin_unlock(&workqueue_lock);
+}
+EXPORT_SYMBOL_GPL(workqueue_set_max_active);
+
+/**
+ * workqueue_congested - test whether a workqueue is congested
+ * @cpu: CPU in question
+ * @wq: target workqueue
+ *
+ * Test whether @wq's cpu workqueue for @cpu is congested.  There is
+ * no synchronization around this function and the test result is
+ * unreliable and only useful as advisory hints or for debugging.
+ *
+ * RETURNS:
+ * %true if congested, %false otherwise.
+ */
+bool workqueue_congested(unsigned int cpu, struct workqueue_struct *wq)
+{
+	struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
+
+	return !list_empty(&cwq->delayed_works);
+}
+EXPORT_SYMBOL_GPL(workqueue_congested);
+
+/**
+ * work_cpu - return the last known associated cpu for @work
+ * @work: the work of interest
+ *
+ * RETURNS:
+ * CPU number if @work was ever queued.  NR_CPUS otherwise.
+ */
+unsigned int work_cpu(struct work_struct *work)
+{
+	struct global_cwq *gcwq = get_work_gcwq(work);
+
+	return gcwq ? gcwq->cpu : NR_CPUS;
+}
+EXPORT_SYMBOL_GPL(work_cpu);
+
+/**
+ * work_busy - test whether a work is currently pending or running
+ * @work: the work to be tested
+ *
+ * Test whether @work is currently pending or running.  There is no
+ * synchronization around this function and the test result is
+ * unreliable and only useful as advisory hints or for debugging.
+ * Especially for reentrant wqs, the pending state might hide the
+ * running state.
+ *
+ * RETURNS:
+ * OR'd bitmask of WORK_BUSY_* bits.
+ */
+unsigned int work_busy(struct work_struct *work)
+{
+	struct global_cwq *gcwq = get_work_gcwq(work);
+	unsigned long flags;
+	unsigned int ret = 0;
+
+	if (!gcwq)
+		return false;
+
+	spin_lock_irqsave(&gcwq->lock, flags);
+
+	if (work_pending(work))
+		ret |= WORK_BUSY_PENDING;
+	if (find_worker_executing_work(gcwq, work))
+		ret |= WORK_BUSY_RUNNING;
+
+	spin_unlock_irqrestore(&gcwq->lock, flags);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(work_busy);
+
 /*
  * CPU hotplug.
  *
-- 
cgit v1.2.3-70-g09d2


From 649027d73a6309ac34dc2886362e662bd73456dc Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 29 Jun 2010 10:07:14 +0200
Subject: workqueue: implement high priority workqueue

This patch implements high priority workqueue which can be specified
with WQ_HIGHPRI flag on creation.  A high priority workqueue has the
following properties.

* A work queued to it is queued at the head of the worklist of the
  respective gcwq after other highpri works, while normal works are
  always appended at the end.

* As long as there are highpri works on gcwq->worklist,
  [__]need_more_worker() remains %true and process_one_work() wakes up
  another worker before it start executing a work.

The above two properties guarantee that works queued to high priority
workqueues are dispatched to workers and start execution as soon as
possible regardless of the state of other works.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/workqueue.h |  1 +
 kernel/workqueue.c        | 70 +++++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 65 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 0a7f7972938..006dcf7e808 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -231,6 +231,7 @@ enum {
 	WQ_SINGLE_CPU		= 1 << 1, /* only single cpu at a time */
 	WQ_NON_REENTRANT	= 1 << 2, /* guarantee non-reentrance */
 	WQ_RESCUER		= 1 << 3, /* has an rescue worker */
+	WQ_HIGHPRI		= 1 << 4, /* high priority */
 
 	WQ_MAX_ACTIVE		= 512,	  /* I like 512, better ideas? */
 	WQ_DFL_ACTIVE		= WQ_MAX_ACTIVE / 2,
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index c1aa65c2ff3..5775717288d 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -43,6 +43,7 @@ enum {
 	GCWQ_MANAGING_WORKERS	= 1 << 1,	/* managing workers */
 	GCWQ_DISASSOCIATED	= 1 << 2,	/* cpu can't serve workers */
 	GCWQ_FREEZING		= 1 << 3,	/* freeze in progress */
+	GCWQ_HIGHPRI_PENDING	= 1 << 4,	/* highpri works on queue */
 
 	/* worker flags */
 	WORKER_STARTED		= 1 << 0,	/* started */
@@ -452,15 +453,19 @@ static struct global_cwq *get_work_gcwq(struct work_struct *work)
  * assume that they're being called with gcwq->lock held.
  */
 
+static bool __need_more_worker(struct global_cwq *gcwq)
+{
+	return !atomic_read(get_gcwq_nr_running(gcwq->cpu)) ||
+		gcwq->flags & GCWQ_HIGHPRI_PENDING;
+}
+
 /*
  * Need to wake up a worker?  Called from anything but currently
  * running workers.
  */
 static bool need_more_worker(struct global_cwq *gcwq)
 {
-	atomic_t *nr_running = get_gcwq_nr_running(gcwq->cpu);
-
-	return !list_empty(&gcwq->worklist) && !atomic_read(nr_running);
+	return !list_empty(&gcwq->worklist) && __need_more_worker(gcwq);
 }
 
 /* Can I start working?  Called from busy but !running workers. */
@@ -733,6 +738,43 @@ static struct worker *find_worker_executing_work(struct global_cwq *gcwq,
 					    work);
 }
 
+/**
+ * gcwq_determine_ins_pos - find insertion position
+ * @gcwq: gcwq of interest
+ * @cwq: cwq a work is being queued for
+ *
+ * A work for @cwq is about to be queued on @gcwq, determine insertion
+ * position for the work.  If @cwq is for HIGHPRI wq, the work is
+ * queued at the head of the queue but in FIFO order with respect to
+ * other HIGHPRI works; otherwise, at the end of the queue.  This
+ * function also sets GCWQ_HIGHPRI_PENDING flag to hint @gcwq that
+ * there are HIGHPRI works pending.
+ *
+ * CONTEXT:
+ * spin_lock_irq(gcwq->lock).
+ *
+ * RETURNS:
+ * Pointer to inserstion position.
+ */
+static inline struct list_head *gcwq_determine_ins_pos(struct global_cwq *gcwq,
+					       struct cpu_workqueue_struct *cwq)
+{
+	struct work_struct *twork;
+
+	if (likely(!(cwq->wq->flags & WQ_HIGHPRI)))
+		return &gcwq->worklist;
+
+	list_for_each_entry(twork, &gcwq->worklist, entry) {
+		struct cpu_workqueue_struct *tcwq = get_work_cwq(twork);
+
+		if (!(tcwq->wq->flags & WQ_HIGHPRI))
+			break;
+	}
+
+	gcwq->flags |= GCWQ_HIGHPRI_PENDING;
+	return &twork->entry;
+}
+
 /**
  * insert_work - insert a work into gcwq
  * @cwq: cwq @work belongs to
@@ -770,7 +812,7 @@ static void insert_work(struct cpu_workqueue_struct *cwq,
 	 */
 	smp_mb();
 
-	if (!atomic_read(get_gcwq_nr_running(gcwq->cpu)))
+	if (__need_more_worker(gcwq))
 		wake_up_worker(gcwq);
 }
 
@@ -887,7 +929,7 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
 
 	if (likely(cwq->nr_active < cwq->max_active)) {
 		cwq->nr_active++;
-		worklist = &gcwq->worklist;
+		worklist = gcwq_determine_ins_pos(gcwq, cwq);
 	} else
 		worklist = &cwq->delayed_works;
 
@@ -1526,8 +1568,9 @@ static void cwq_activate_first_delayed(struct cpu_workqueue_struct *cwq)
 {
 	struct work_struct *work = list_first_entry(&cwq->delayed_works,
 						    struct work_struct, entry);
+	struct list_head *pos = gcwq_determine_ins_pos(cwq->gcwq, cwq);
 
-	move_linked_works(work, &cwq->gcwq->worklist, NULL);
+	move_linked_works(work, pos, NULL);
 	cwq->nr_active++;
 }
 
@@ -1634,6 +1677,21 @@ static void process_one_work(struct worker *worker, struct work_struct *work)
 	set_work_cpu(work, gcwq->cpu);
 	list_del_init(&work->entry);
 
+	/*
+	 * If HIGHPRI_PENDING, check the next work, and, if HIGHPRI,
+	 * wake up another worker; otherwise, clear HIGHPRI_PENDING.
+	 */
+	if (unlikely(gcwq->flags & GCWQ_HIGHPRI_PENDING)) {
+		struct work_struct *nwork = list_first_entry(&gcwq->worklist,
+						struct work_struct, entry);
+
+		if (!list_empty(&gcwq->worklist) &&
+		    get_work_cwq(nwork)->wq->flags & WQ_HIGHPRI)
+			wake_up_worker(gcwq);
+		else
+			gcwq->flags &= ~GCWQ_HIGHPRI_PENDING;
+	}
+
 	spin_unlock_irq(&gcwq->lock);
 
 	work_clear_pending(work);
-- 
cgit v1.2.3-70-g09d2


From fb0e7beb5c1b6fb4da786ba709d7138373d5fb22 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 29 Jun 2010 10:07:15 +0200
Subject: workqueue: implement cpu intensive workqueue

This patch implements cpu intensive workqueue which can be specified
with WQ_CPU_INTENSIVE flag on creation.  Works queued to a cpu
intensive workqueue don't participate in concurrency management.  IOW,
it doesn't contribute to gcwq->nr_running and thus doesn't delay
excution of other works.

Note that although cpu intensive works won't delay other works, they
can be delayed by other works.  Combine with WQ_HIGHPRI to avoid being
delayed by other works too.

As the name suggests this is useful when using workqueue for cpu
intensive works.  Workers executing cpu intensive works are not
considered for workqueue concurrency management and left for the
scheduler to manage.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/workqueue.h |  1 +
 kernel/workqueue.c        | 16 +++++++++++++++-
 2 files changed, 16 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 006dcf7e808..3f36d37ac5b 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -232,6 +232,7 @@ enum {
 	WQ_NON_REENTRANT	= 1 << 2, /* guarantee non-reentrance */
 	WQ_RESCUER		= 1 << 3, /* has an rescue worker */
 	WQ_HIGHPRI		= 1 << 4, /* high priority */
+	WQ_CPU_INTENSIVE	= 1 << 5, /* cpu instensive workqueue */
 
 	WQ_MAX_ACTIVE		= 512,	  /* I like 512, better ideas? */
 	WQ_DFL_ACTIVE		= WQ_MAX_ACTIVE / 2,
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 5775717288d..6fa847c5c5e 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -52,8 +52,10 @@ enum {
 	WORKER_PREP		= 1 << 3,	/* preparing to run works */
 	WORKER_ROGUE		= 1 << 4,	/* not bound to any cpu */
 	WORKER_REBIND		= 1 << 5,	/* mom is home, come back */
+	WORKER_CPU_INTENSIVE	= 1 << 6,	/* cpu intensive */
 
-	WORKER_NOT_RUNNING	= WORKER_PREP | WORKER_ROGUE | WORKER_REBIND,
+	WORKER_NOT_RUNNING	= WORKER_PREP | WORKER_ROGUE | WORKER_REBIND |
+				  WORKER_CPU_INTENSIVE,
 
 	/* gcwq->trustee_state */
 	TRUSTEE_START		= 0,		/* start */
@@ -1641,6 +1643,7 @@ static void process_one_work(struct worker *worker, struct work_struct *work)
 	struct cpu_workqueue_struct *cwq = get_work_cwq(work);
 	struct global_cwq *gcwq = cwq->gcwq;
 	struct hlist_head *bwh = busy_worker_head(gcwq, work);
+	bool cpu_intensive = cwq->wq->flags & WQ_CPU_INTENSIVE;
 	work_func_t f = work->func;
 	int work_color;
 	struct worker *collision;
@@ -1692,6 +1695,13 @@ static void process_one_work(struct worker *worker, struct work_struct *work)
 			gcwq->flags &= ~GCWQ_HIGHPRI_PENDING;
 	}
 
+	/*
+	 * CPU intensive works don't participate in concurrency
+	 * management.  They're the scheduler's responsibility.
+	 */
+	if (unlikely(cpu_intensive))
+		worker_set_flags(worker, WORKER_CPU_INTENSIVE, true);
+
 	spin_unlock_irq(&gcwq->lock);
 
 	work_clear_pending(work);
@@ -1713,6 +1723,10 @@ static void process_one_work(struct worker *worker, struct work_struct *work)
 
 	spin_lock_irq(&gcwq->lock);
 
+	/* clear cpu intensive status */
+	if (unlikely(cpu_intensive))
+		worker_clr_flags(worker, WORKER_CPU_INTENSIVE);
+
 	/* we're done with it, release */
 	hlist_del_init(&worker->hentry);
 	worker->current_work = NULL;
-- 
cgit v1.2.3-70-g09d2


From 2ec57d448b2e8fcfba539a46701b43f14f037f17 Mon Sep 17 00:00:00 2001
From: Michael Neuling <mikey@neuling.org>
Date: Tue, 29 Jun 2010 12:02:01 +1000
Subject: sched: Fix spelling of sibling

No logic changes, only spelling.

Signed-off-by: Michael Neuling <mikey@neuling.org>
Cc: linuxppc-dev@ozlabs.org
Cc: David Howells <dhowells@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
LKML-Reference: <15249.1277776921@neuling.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/powerpc/kernel/process.c | 2 +-
 include/linux/topology.h      | 2 +-
 kernel/sched_fair.c           | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 9b41ece010b..22f08cb7e7d 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1270,7 +1270,7 @@ unsigned long randomize_et_dyn(unsigned long base)
 }
 
 #ifdef CONFIG_SMP
-int arch_sd_sibiling_asym_packing(void)
+int arch_sd_sibling_asym_packing(void)
 {
 	if (cpu_has_feature(CPU_FTR_ASYM_SMT)) {
 		printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n");
diff --git a/include/linux/topology.h b/include/linux/topology.h
index cf57f30d0dc..b572e432d2f 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -103,7 +103,7 @@ int arch_update_cpu_topology(void);
 				| 1*SD_SHARE_PKG_RESOURCES		\
 				| 0*SD_SERIALIZE			\
 				| 0*SD_PREFER_SIBLING			\
-				| arch_sd_sibiling_asym_packing()	\
+				| arch_sd_sibling_asym_packing()	\
 				,					\
 	.last_balance		= jiffies,				\
 	.balance_interval	= 1,					\
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 5e8f98c103f..b4da534f4b8 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -2567,7 +2567,7 @@ static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu,
 	} while (sg != sd->groups);
 }
 
-int __weak arch_sd_sibiling_asym_packing(void)
+int __weak arch_sd_sibling_asym_packing(void)
 {
        return 0*SD_ASYM_PACKING;
 }
-- 
cgit v1.2.3-70-g09d2


From afd2a5ca1ef6ffe1f9fd0846ae39795527ead555 Mon Sep 17 00:00:00 2001
From: Gertjan van Wingerde <gwingerde@gmail.com>
Date: Tue, 29 Jun 2010 21:43:44 +0200
Subject: eeprom_93cx6: Add support for 93c86 EEPROMs.

Signed-off-by: Gertjan van Wingerde <gwingerde@gmail.com>
Signed-off-by: Ivo van Doorn <IvDoorn@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/eeprom_93cx6.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/eeprom_93cx6.h b/include/linux/eeprom_93cx6.h
index a55c873e8b6..c4627cbdb8e 100644
--- a/include/linux/eeprom_93cx6.h
+++ b/include/linux/eeprom_93cx6.h
@@ -30,6 +30,7 @@
 #define PCI_EEPROM_WIDTH_93C46	6
 #define PCI_EEPROM_WIDTH_93C56	8
 #define PCI_EEPROM_WIDTH_93C66	8
+#define PCI_EEPROM_WIDTH_93C86	8
 #define PCI_EEPROM_WIDTH_OPCODE	3
 #define PCI_EEPROM_WRITE_OPCODE	0x05
 #define PCI_EEPROM_READ_OPCODE	0x06
-- 
cgit v1.2.3-70-g09d2


From 787a34456dc34bdd75b29cebb53cb09c727674d6 Mon Sep 17 00:00:00 2001
From: Kulikov Vasiliy <segooon@gmail.com>
Date: Wed, 30 Jun 2010 06:08:15 +0000
Subject: net/neighbour.h: fix typo

'Shoul' must be 'should'.

Signed-off-by: Kulikov Vasiliy <segooon@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/neighbour.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index eb21340a573..242879b6c4d 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -151,7 +151,7 @@ struct neigh_table {
 	void			(*proxy_redo)(struct sk_buff *skb);
 	char			*id;
 	struct neigh_parms	parms;
-	/* HACK. gc_* shoul follow parms without a gap! */
+	/* HACK. gc_* should follow parms without a gap! */
 	int			gc_interval;
 	int			gc_thresh1;
 	int			gc_thresh2;
-- 
cgit v1.2.3-70-g09d2


From 4ce3c183fcade7f4b30a33dae90cd774c3d9e094 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 30 Jun 2010 13:31:19 -0700
Subject: snmp: 64bit ipstats_mib for all arches

/proc/net/snmp and /proc/net/netstat expose SNMP counters.

Width of these counters is either 32 or 64 bits, depending on the size
of "unsigned long" in kernel.

This means user program parsing these files must already be prepared to
deal with 64bit values, regardless of user program being 32 or 64 bit.

This patch introduces 64bit snmp values for IPSTAT mib, where some
counters can wrap pretty fast if they are 32bit wide.

# netstat -s|egrep "InOctets|OutOctets"
    InOctets: 244068329096
    OutOctets: 244069348848

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip.h    | 20 +++++++++-----
 include/net/ipv6.h  | 12 ++++-----
 include/net/snmp.h  | 75 ++++++++++++++++++++++++++++++++++++++++++++++++++---
 net/ipv4/af_inet.c  | 36 +++++++++++++++++++++++++
 net/ipv4/proc.c     | 15 ++++++-----
 net/ipv6/addrconf.c | 18 ++++++++++++-
 net/ipv6/proc.c     | 17 +++++++++---
 7 files changed, 167 insertions(+), 26 deletions(-)

(limited to 'include')

diff --git a/include/net/ip.h b/include/net/ip.h
index 3b524df7ddd..890f9725d68 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -165,12 +165,12 @@ struct ipv4_config {
 };
 
 extern struct ipv4_config ipv4_config;
-#define IP_INC_STATS(net, field)	SNMP_INC_STATS((net)->mib.ip_statistics, field)
-#define IP_INC_STATS_BH(net, field)	SNMP_INC_STATS_BH((net)->mib.ip_statistics, field)
-#define IP_ADD_STATS(net, field, val)	SNMP_ADD_STATS((net)->mib.ip_statistics, field, val)
-#define IP_ADD_STATS_BH(net, field, val) SNMP_ADD_STATS_BH((net)->mib.ip_statistics, field, val)
-#define IP_UPD_PO_STATS(net, field, val) SNMP_UPD_PO_STATS((net)->mib.ip_statistics, field, val)
-#define IP_UPD_PO_STATS_BH(net, field, val) SNMP_UPD_PO_STATS_BH((net)->mib.ip_statistics, field, val)
+#define IP_INC_STATS(net, field)	SNMP_INC_STATS64((net)->mib.ip_statistics, field)
+#define IP_INC_STATS_BH(net, field)	SNMP_INC_STATS64_BH((net)->mib.ip_statistics, field)
+#define IP_ADD_STATS(net, field, val)	SNMP_ADD_STATS64((net)->mib.ip_statistics, field, val)
+#define IP_ADD_STATS_BH(net, field, val) SNMP_ADD_STATS64_BH((net)->mib.ip_statistics, field, val)
+#define IP_UPD_PO_STATS(net, field, val) SNMP_UPD_PO_STATS64((net)->mib.ip_statistics, field, val)
+#define IP_UPD_PO_STATS_BH(net, field, val) SNMP_UPD_PO_STATS64_BH((net)->mib.ip_statistics, field, val)
 #define NET_INC_STATS(net, field)	SNMP_INC_STATS((net)->mib.net_statistics, field)
 #define NET_INC_STATS_BH(net, field)	SNMP_INC_STATS_BH((net)->mib.net_statistics, field)
 #define NET_INC_STATS_USER(net, field) 	SNMP_INC_STATS_USER((net)->mib.net_statistics, field)
@@ -178,6 +178,14 @@ extern struct ipv4_config ipv4_config;
 #define NET_ADD_STATS_USER(net, field, adnd) SNMP_ADD_STATS_USER((net)->mib.net_statistics, field, adnd)
 
 extern unsigned long snmp_fold_field(void __percpu *mib[], int offt);
+#if BITS_PER_LONG==32
+extern u64 snmp_fold_field64(void __percpu *mib[], int offt, size_t sync_off);
+#else
+static inline u64 snmp_fold_field64(void __percpu *mib[], int offt, size_t syncp_off)
+{
+	return snmp_fold_field(mib, offt);
+}
+#endif
 extern int snmp_mib_init(void __percpu *ptr[2], size_t mibsize, size_t align);
 extern void snmp_mib_free(void __percpu *ptr[2]);
 
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index f5808d596aa..1f841241099 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -136,17 +136,17 @@ extern struct ctl_path net_ipv6_ctl_path[];
 /* MIBs */
 
 #define IP6_INC_STATS(net, idev,field)		\
-		_DEVINC(net, ipv6, , idev, field)
+		_DEVINC(net, ipv6, 64, idev, field)
 #define IP6_INC_STATS_BH(net, idev,field)	\
-		_DEVINC(net, ipv6, _BH, idev, field)
+		_DEVINC(net, ipv6, 64_BH, idev, field)
 #define IP6_ADD_STATS(net, idev,field,val)	\
-		_DEVADD(net, ipv6, , idev, field, val)
+		_DEVADD(net, ipv6, 64, idev, field, val)
 #define IP6_ADD_STATS_BH(net, idev,field,val)	\
-		_DEVADD(net, ipv6, _BH, idev, field, val)
+		_DEVADD(net, ipv6, 64_BH, idev, field, val)
 #define IP6_UPD_PO_STATS(net, idev,field,val)   \
-		_DEVUPD(net, ipv6, , idev, field, val)
+		_DEVUPD(net, ipv6, 64, idev, field, val)
 #define IP6_UPD_PO_STATS_BH(net, idev,field,val)   \
-		_DEVUPD(net, ipv6, _BH, idev, field, val)
+		_DEVUPD(net, ipv6, 64_BH, idev, field, val)
 #define ICMP6_INC_STATS(net, idev, field)	\
 		_DEVINC(net, icmpv6, , idev, field)
 #define ICMP6_INC_STATS_BH(net, idev, field)	\
diff --git a/include/net/snmp.h b/include/net/snmp.h
index 899003d18db..a0e61806d48 100644
--- a/include/net/snmp.h
+++ b/include/net/snmp.h
@@ -47,15 +47,16 @@ struct snmp_mib {
 }
 
 /*
- * We use all unsigned longs. Linux will soon be so reliable that even 
- * these will rapidly get too small 8-). Seriously consider the IpInReceives 
- * count on the 20Gb/s + networks people expect in a few years time!
+ * We use unsigned longs for most mibs but u64 for ipstats.
  */
+#include <linux/u64_stats_sync.h>
 
 /* IPstats */
 #define IPSTATS_MIB_MAX	__IPSTATS_MIB_MAX
 struct ipstats_mib {
-	unsigned long	mibs[IPSTATS_MIB_MAX];
+	/* mibs[] must be first field of struct ipstats_mib */
+	u64		mibs[IPSTATS_MIB_MAX];
+	struct u64_stats_sync syncp;
 };
 
 /* ICMP */
@@ -155,4 +156,70 @@ struct linux_xfrm_mib {
 		ptr->mibs[basefield##PKTS]++; \
 		ptr->mibs[basefield##OCTETS] += addend;\
 	} while (0)
+
+
+#if BITS_PER_LONG==32
+
+#define SNMP_ADD_STATS64_BH(mib, field, addend) 			\
+	do {								\
+		__typeof__(*mib[0]) *ptr = __this_cpu_ptr((mib)[0]);	\
+		u64_stats_update_begin(&ptr->syncp);			\
+		ptr->mibs[field] += addend;				\
+		u64_stats_update_end(&ptr->syncp);			\
+	} while (0)
+#define SNMP_ADD_STATS64_USER(mib, field, addend) 			\
+	do {								\
+		__typeof__(*mib[0]) *ptr;				\
+		preempt_disable();					\
+		ptr = __this_cpu_ptr((mib)[1]);				\
+		u64_stats_update_begin(&ptr->syncp);			\
+		ptr->mibs[field] += addend;				\
+		u64_stats_update_end(&ptr->syncp);			\
+		preempt_enable();					\
+	} while (0)
+#define SNMP_ADD_STATS64(mib, field, addend)				\
+	do {								\
+		__typeof__(*mib[0]) *ptr;				\
+		preempt_disable();					\
+		ptr = __this_cpu_ptr((mib)[!in_softirq()]);		\
+		u64_stats_update_begin(&ptr->syncp);			\
+		ptr->mibs[field] += addend;				\
+		u64_stats_update_end(&ptr->syncp);			\
+		preempt_enable();					\
+	} while (0)
+#define SNMP_INC_STATS64_BH(mib, field) SNMP_ADD_STATS64_BH(mib, field, 1)
+#define SNMP_INC_STATS64_USER(mib, field) SNMP_ADD_STATS64_USER(mib, field, 1)
+#define SNMP_INC_STATS64(mib, field) SNMP_ADD_STATS64(mib, field, 1)
+#define SNMP_UPD_PO_STATS64(mib, basefield, addend)			\
+	do {								\
+		__typeof__(*mib[0]) *ptr;				\
+		preempt_disable();					\
+		ptr = __this_cpu_ptr((mib)[!in_softirq()]);		\
+		u64_stats_update_begin(&ptr->syncp);			\
+		ptr->mibs[basefield##PKTS]++;				\
+		ptr->mibs[basefield##OCTETS] += addend;			\
+		u64_stats_update_end(&ptr->syncp);			\
+		preempt_enable();					\
+	} while (0)
+#define SNMP_UPD_PO_STATS64_BH(mib, basefield, addend)			\
+	do {								\
+		__typeof__(*mib[0]) *ptr;				\
+		ptr = __this_cpu_ptr((mib)[!in_softirq()]);		\
+		u64_stats_update_begin(&ptr->syncp);			\
+		ptr->mibs[basefield##PKTS]++;				\
+		ptr->mibs[basefield##OCTETS] += addend;			\
+		u64_stats_update_end(&ptr->syncp);			\
+	} while (0)
+#else
+#define SNMP_INC_STATS64_BH(mib, field)		SNMP_INC_STATS_BH(mib, field)
+#define SNMP_INC_STATS64_USER(mib, field)	SNMP_INC_STATS_USER(mib, field)
+#define SNMP_INC_STATS64(mib, field)		SNMP_INC_STATS(mib, field)
+#define SNMP_DEC_STATS64(mib, field)		SNMP_DEC_STATS(mib, field)
+#define SNMP_ADD_STATS64_BH(mib, field, addend) SNMP_ADD_STATS_BH(mib, field, addend)
+#define SNMP_ADD_STATS64_USER(mib, field, addend) SNMP_ADD_STATS_USER(mib, field, addend)
+#define SNMP_ADD_STATS64(mib, field, addend)	SNMP_ADD_STATS(mib, field, addend)
+#define SNMP_UPD_PO_STATS64(mib, basefield, addend) SNMP_UPD_PO_STATS(mib, basefield, addend)
+#define SNMP_UPD_PO_STATS64_BH(mib, basefield, addend) SNMP_UPD_PO_STATS_BH(mib, basefield, addend)
+#endif
+
 #endif
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 640db9b9033..3ceb025b16f 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1427,6 +1427,42 @@ unsigned long snmp_fold_field(void __percpu *mib[], int offt)
 }
 EXPORT_SYMBOL_GPL(snmp_fold_field);
 
+#if BITS_PER_LONG==32
+
+u64 snmp_fold_field64(void __percpu *mib[], int offt, size_t syncp_offset)
+{
+	u64 res = 0;
+	int cpu;
+
+	for_each_possible_cpu(cpu) {
+		void *bhptr, *userptr;
+		struct u64_stats_sync *syncp;
+		u64 v_bh, v_user;
+		unsigned int start;
+
+		/* first mib used by softirq context, we must use _bh() accessors */
+		bhptr = per_cpu_ptr(SNMP_STAT_BHPTR(mib), cpu);
+		syncp = (struct u64_stats_sync *)(bhptr + syncp_offset);
+		do {
+			start = u64_stats_fetch_begin_bh(syncp);
+			v_bh = *(((u64 *) bhptr) + offt);
+		} while (u64_stats_fetch_retry_bh(syncp, start));
+
+		/* second mib used in USER context */
+		userptr = per_cpu_ptr(SNMP_STAT_USRPTR(mib), cpu);
+		syncp = (struct u64_stats_sync *)(userptr + syncp_offset);
+		do {
+			start = u64_stats_fetch_begin(syncp);
+			v_user = *(((u64 *) userptr) + offt);
+		} while (u64_stats_fetch_retry(syncp, start));
+
+		res += v_bh + v_user;
+	}
+	return res;
+}
+EXPORT_SYMBOL_GPL(snmp_fold_field64);
+#endif
+
 int snmp_mib_init(void __percpu *ptr[2], size_t mibsize, size_t align)
 {
 	BUG_ON(ptr == NULL);
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index e320ca6b3ef..4ae1f203f7c 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -343,10 +343,12 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
 		   IPV4_DEVCONF_ALL(net, FORWARDING) ? 1 : 2,
 		   sysctl_ip_default_ttl);
 
+	BUILD_BUG_ON(offsetof(struct ipstats_mib, mibs) != 0);
 	for (i = 0; snmp4_ipstats_list[i].name != NULL; i++)
-		seq_printf(seq, " %lu",
-			   snmp_fold_field((void __percpu **)net->mib.ip_statistics,
-					   snmp4_ipstats_list[i].entry));
+		seq_printf(seq, " %llu",
+			   snmp_fold_field64((void __percpu **)net->mib.ip_statistics,
+					     snmp4_ipstats_list[i].entry,
+					     offsetof(struct ipstats_mib, syncp)));
 
 	icmp_put(seq);	/* RFC 2011 compatibility */
 	icmpmsg_put(seq);
@@ -432,9 +434,10 @@ static int netstat_seq_show(struct seq_file *seq, void *v)
 
 	seq_puts(seq, "\nIpExt:");
 	for (i = 0; snmp4_ipextstats_list[i].name != NULL; i++)
-		seq_printf(seq, " %lu",
-			   snmp_fold_field((void __percpu **)net->mib.ip_statistics,
-					   snmp4_ipextstats_list[i].entry));
+		seq_printf(seq, " %llu",
+			   snmp_fold_field64((void __percpu **)net->mib.ip_statistics,
+					     snmp4_ipextstats_list[i].entry,
+					     offsetof(struct ipstats_mib, syncp)));
 
 	seq_putc(seq, '\n');
 	return 0;
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 2514adf5251..e81155d2f25 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3862,12 +3862,28 @@ static inline void __snmp6_fill_stats(u64 *stats, void __percpu **mib,
 	memset(&stats[items], 0, pad);
 }
 
+static inline void __snmp6_fill_stats64(u64 *stats, void __percpu **mib,
+				      int items, int bytes, size_t syncpoff)
+{
+	int i;
+	int pad = bytes - sizeof(u64) * items;
+	BUG_ON(pad < 0);
+
+	/* Use put_unaligned() because stats may not be aligned for u64. */
+	put_unaligned(items, &stats[0]);
+	for (i = 1; i < items; i++)
+		put_unaligned(snmp_fold_field64(mib, i, syncpoff), &stats[i]);
+
+	memset(&stats[items], 0, pad);
+}
+
 static void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype,
 			     int bytes)
 {
 	switch (attrtype) {
 	case IFLA_INET6_STATS:
-		__snmp6_fill_stats(stats, (void __percpu **)idev->stats.ipv6, IPSTATS_MIB_MAX, bytes);
+		__snmp6_fill_stats64(stats, (void __percpu **)idev->stats.ipv6,
+				     IPSTATS_MIB_MAX, bytes, offsetof(struct ipstats_mib, syncp));
 		break;
 	case IFLA_INET6_ICMP6STATS:
 		__snmp6_fill_stats(stats, (void __percpu **)idev->stats.icmpv6, ICMP6_MIB_MAX, bytes);
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index 566798d69f3..d082eaeefa2 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -174,17 +174,28 @@ static void snmp6_seq_show_item(struct seq_file *seq, void __percpu **mib,
 				const struct snmp_mib *itemlist)
 {
 	int i;
-	for (i=0; itemlist[i].name; i++)
+
+	for (i = 0; itemlist[i].name; i++)
 		seq_printf(seq, "%-32s\t%lu\n", itemlist[i].name,
 			   snmp_fold_field(mib, itemlist[i].entry));
 }
 
+static void snmp6_seq_show_item64(struct seq_file *seq, void __percpu **mib,
+				  const struct snmp_mib *itemlist, size_t syncpoff)
+{
+	int i;
+
+	for (i = 0; itemlist[i].name; i++)
+		seq_printf(seq, "%-32s\t%llu\n", itemlist[i].name,
+			   snmp_fold_field64(mib, itemlist[i].entry, syncpoff));
+}
+
 static int snmp6_seq_show(struct seq_file *seq, void *v)
 {
 	struct net *net = (struct net *)seq->private;
 
-	snmp6_seq_show_item(seq, (void __percpu **)net->mib.ipv6_statistics,
-			    snmp6_ipstats_list);
+	snmp6_seq_show_item64(seq, (void __percpu **)net->mib.ipv6_statistics,
+			    snmp6_ipstats_list, offsetof(struct ipstats_mib, syncp));
 	snmp6_seq_show_item(seq, (void __percpu **)net->mib.icmpv6_statistics,
 			    snmp6_icmp6_list);
 	snmp6_seq_show_icmpv6msg(seq,
-- 
cgit v1.2.3-70-g09d2


From d6bebca92c663fb216c072193945946f3807ca7f Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Tue, 29 Jun 2010 04:39:37 +0000
Subject: fragment: add fast path for in-order fragments

add fast path for in-order fragments

As the fragments are sent in order in most of OSes, such as Windows, Darwin and
FreeBSD, it is likely the new fragments are at the end of the inet_frag_queue.
In the fast path, we check if the skb at the end of the inet_frag_queue is the
prev we expect.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
----
 include/net/inet_frag.h |    1 +
 net/ipv4/ip_fragment.c  |   12 ++++++++++++
 net/ipv6/reassembly.c   |   11 +++++++++++
 3 files changed, 24 insertions(+)
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_frag.h |  1 +
 net/ipv4/ip_fragment.c  | 12 ++++++++++++
 net/ipv6/reassembly.c   | 11 +++++++++++
 3 files changed, 24 insertions(+)

(limited to 'include')

diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index 39f2dc94390..16ff29a7bb3 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -20,6 +20,7 @@ struct inet_frag_queue {
 	atomic_t		refcnt;
 	struct timer_list	timer;      /* when will this queue expire? */
 	struct sk_buff		*fragments; /* list of received fragments */
+	struct sk_buff		*fragments_tail;
 	ktime_t			stamp;
 	int			len;        /* total length of orig datagram */
 	int			meat;
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 858d34648ee..dd0dbf0c6b7 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -314,6 +314,7 @@ static int ip_frag_reinit(struct ipq *qp)
 	qp->q.len = 0;
 	qp->q.meat = 0;
 	qp->q.fragments = NULL;
+	qp->q.fragments_tail = NULL;
 	qp->iif = 0;
 
 	return 0;
@@ -386,6 +387,11 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
 	 * in the chain of fragments so far.  We must know where to put
 	 * this fragment, right?
 	 */
+	prev = qp->q.fragments_tail;
+	if (!prev || FRAG_CB(prev)->offset < offset) {
+		next = NULL;
+		goto found;
+	}
 	prev = NULL;
 	for (next = qp->q.fragments; next != NULL; next = next->next) {
 		if (FRAG_CB(next)->offset >= offset)
@@ -393,6 +399,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
 		prev = next;
 	}
 
+found:
 	/* We found where to put this one.  Check for overlap with
 	 * preceding fragment, and, if needed, align things so that
 	 * any overlaps are eliminated.
@@ -451,6 +458,8 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
 
 	/* Insert this fragment in the chain of fragments. */
 	skb->next = next;
+	if (!next)
+		qp->q.fragments_tail = skb;
 	if (prev)
 		prev->next = skb;
 	else
@@ -504,6 +513,8 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
 			goto out_nomem;
 
 		fp->next = head->next;
+		if (!fp->next)
+			qp->q.fragments_tail = fp;
 		prev->next = fp;
 
 		skb_morph(head, qp->q.fragments);
@@ -574,6 +585,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
 	iph->tot_len = htons(len);
 	IP_INC_STATS_BH(net, IPSTATS_MIB_REASMOKS);
 	qp->q.fragments = NULL;
+	qp->q.fragments_tail = NULL;
 	return 0;
 
 out_nomem:
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 0b97230a325..545c4141b75 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -333,6 +333,11 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
 	 * in the chain of fragments so far.  We must know where to put
 	 * this fragment, right?
 	 */
+	prev = fq->q.fragments_tail;
+	if (!prev || FRAG6_CB(prev)->offset < offset) {
+		next = NULL;
+		goto found;
+	}
 	prev = NULL;
 	for(next = fq->q.fragments; next != NULL; next = next->next) {
 		if (FRAG6_CB(next)->offset >= offset)
@@ -340,6 +345,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
 		prev = next;
 	}
 
+found:
 	/* We found where to put this one.  Check for overlap with
 	 * preceding fragment, and, if needed, align things so that
 	 * any overlaps are eliminated.
@@ -397,6 +403,8 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
 
 	/* Insert this fragment in the chain of fragments. */
 	skb->next = next;
+	if (!next)
+		fq->q.fragments_tail = skb;
 	if (prev)
 		prev->next = skb;
 	else
@@ -463,6 +471,8 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
 			goto out_oom;
 
 		fp->next = head->next;
+		if (!fp->next)
+			fq->q.fragments_tail = fp;
 		prev->next = fp;
 
 		skb_morph(head, fq->q.fragments);
@@ -549,6 +559,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
 	IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMOKS);
 	rcu_read_unlock();
 	fq->q.fragments = NULL;
+	fq->q.fragments_tail = NULL;
 	return 1;
 
 out_oversize:
-- 
cgit v1.2.3-70-g09d2


From 1437ce3983bcbc0447a0dedcd644c14fe833d266 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Wed, 30 Jun 2010 02:44:32 +0000
Subject: ethtool: Change ethtool_op_set_flags to validate flags

ethtool_op_set_flags() does not check for unsupported flags, and has
no way of doing so.  This means it is not suitable for use as a
default implementation of ethtool_ops::set_flags.

Add a 'supported' parameter specifying the flags that the driver and
hardware support, validate the requested flags against this, and
change all current callers to pass this parameter.

Change some other trivial implementations of ethtool_ops::set_flags to
call ethtool_op_set_flags().

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Reviewed-by: Stanislaw Gruszka <sgruszka@redhat.com>
Acked-by: Jeff Garzik <jgarzik@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/cxgb4/cxgb4_main.c    |  9 +--------
 drivers/net/enic/enic_main.c      |  1 -
 drivers/net/ixgbe/ixgbe_ethtool.c |  5 ++++-
 drivers/net/mv643xx_eth.c         |  7 ++++++-
 drivers/net/myri10ge/myri10ge.c   | 10 +++++++---
 drivers/net/niu.c                 |  9 +--------
 drivers/net/sfc/ethtool.c         |  5 +----
 drivers/net/sky2.c                | 16 ++++++----------
 include/linux/ethtool.h           |  2 +-
 net/core/ethtool.c                | 28 +++++-----------------------
 10 files changed, 32 insertions(+), 60 deletions(-)

(limited to 'include')

diff --git a/drivers/net/cxgb4/cxgb4_main.c b/drivers/net/cxgb4/cxgb4_main.c
index 65281674de9..55a720e4abd 100644
--- a/drivers/net/cxgb4/cxgb4_main.c
+++ b/drivers/net/cxgb4/cxgb4_main.c
@@ -1799,14 +1799,7 @@ static int set_tso(struct net_device *dev, u32 value)
 
 static int set_flags(struct net_device *dev, u32 flags)
 {
-	if (flags & ~ETH_FLAG_RXHASH)
-		return -EOPNOTSUPP;
-
-	if (flags & ETH_FLAG_RXHASH)
-		dev->features |= NETIF_F_RXHASH;
-	else
-		dev->features &= ~NETIF_F_RXHASH;
-	return 0;
+	return ethtool_op_set_flags(dev, flags, ETH_FLAG_RXHASH);
 }
 
 static struct ethtool_ops cxgb_ethtool_ops = {
diff --git a/drivers/net/enic/enic_main.c b/drivers/net/enic/enic_main.c
index 6c6795b90fa..77a7f87d498 100644
--- a/drivers/net/enic/enic_main.c
+++ b/drivers/net/enic/enic_main.c
@@ -365,7 +365,6 @@ static const struct ethtool_ops enic_ethtool_ops = {
 	.get_coalesce = enic_get_coalesce,
 	.set_coalesce = enic_set_coalesce,
 	.get_flags = ethtool_op_get_flags,
-	.set_flags = ethtool_op_set_flags,
 };
 
 static void enic_free_wq_buf(struct vnic_wq *wq, struct vnic_wq_buf *buf)
diff --git a/drivers/net/ixgbe/ixgbe_ethtool.c b/drivers/net/ixgbe/ixgbe_ethtool.c
index 873b45efca4..7d2e5ea2deb 100644
--- a/drivers/net/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ixgbe/ixgbe_ethtool.c
@@ -2205,8 +2205,11 @@ static int ixgbe_set_flags(struct net_device *netdev, u32 data)
 {
 	struct ixgbe_adapter *adapter = netdev_priv(netdev);
 	bool need_reset = false;
+	int rc;
 
-	ethtool_op_set_flags(netdev, data);
+	rc = ethtool_op_set_flags(netdev, data, ETH_FLAG_LRO | ETH_FLAG_NTUPLE);
+	if (rc)
+		return rc;
 
 	/* if state changes we need to update adapter->flags and reset */
 	if (adapter->flags2 & IXGBE_FLAG2_RSC_CAPABLE) {
diff --git a/drivers/net/mv643xx_eth.c b/drivers/net/mv643xx_eth.c
index e345ec8cb47..82b720f29c7 100644
--- a/drivers/net/mv643xx_eth.c
+++ b/drivers/net/mv643xx_eth.c
@@ -1636,6 +1636,11 @@ static void mv643xx_eth_get_ethtool_stats(struct net_device *dev,
 	}
 }
 
+static int mv643xx_eth_set_flags(struct net_device *dev, u32 data)
+{
+	return ethtool_op_set_flags(dev, data, ETH_FLAG_LRO);
+}
+
 static int mv643xx_eth_get_sset_count(struct net_device *dev, int sset)
 {
 	if (sset == ETH_SS_STATS)
@@ -1661,7 +1666,7 @@ static const struct ethtool_ops mv643xx_eth_ethtool_ops = {
 	.get_strings		= mv643xx_eth_get_strings,
 	.get_ethtool_stats	= mv643xx_eth_get_ethtool_stats,
 	.get_flags		= ethtool_op_get_flags,
-	.set_flags		= ethtool_op_set_flags,
+	.set_flags		= mv643xx_eth_set_flags,
 	.get_sset_count		= mv643xx_eth_get_sset_count,
 };
 
diff --git a/drivers/net/myri10ge/myri10ge.c b/drivers/net/myri10ge/myri10ge.c
index e0b47cc8a86..d771d1650d6 100644
--- a/drivers/net/myri10ge/myri10ge.c
+++ b/drivers/net/myri10ge/myri10ge.c
@@ -1730,8 +1730,7 @@ static int myri10ge_set_rx_csum(struct net_device *netdev, u32 csum_enabled)
 	if (csum_enabled)
 		mgp->csum_flag = MXGEFW_FLAGS_CKSUM;
 	else {
-		u32 flags = ethtool_op_get_flags(netdev);
-		err = ethtool_op_set_flags(netdev, (flags & ~ETH_FLAG_LRO));
+		netdev->features &= ~NETIF_F_LRO;
 		mgp->csum_flag = 0;
 
 	}
@@ -1900,6 +1899,11 @@ static u32 myri10ge_get_msglevel(struct net_device *netdev)
 	return mgp->msg_enable;
 }
 
+static int myri10ge_set_flags(struct net_device *netdev, u32 value)
+{
+	return ethtool_op_set_flags(netdev, value, ETH_FLAG_LRO);
+}
+
 static const struct ethtool_ops myri10ge_ethtool_ops = {
 	.get_settings = myri10ge_get_settings,
 	.get_drvinfo = myri10ge_get_drvinfo,
@@ -1920,7 +1924,7 @@ static const struct ethtool_ops myri10ge_ethtool_ops = {
 	.set_msglevel = myri10ge_set_msglevel,
 	.get_msglevel = myri10ge_get_msglevel,
 	.get_flags = ethtool_op_get_flags,
-	.set_flags = ethtool_op_set_flags
+	.set_flags = myri10ge_set_flags
 };
 
 static int myri10ge_allocate_rings(struct myri10ge_slice_state *ss)
diff --git a/drivers/net/niu.c b/drivers/net/niu.c
index 63e8e3893bd..3d523cb7975 100644
--- a/drivers/net/niu.c
+++ b/drivers/net/niu.c
@@ -7920,14 +7920,7 @@ static int niu_phys_id(struct net_device *dev, u32 data)
 
 static int niu_set_flags(struct net_device *dev, u32 data)
 {
-	if (data & (ETH_FLAG_LRO | ETH_FLAG_NTUPLE))
-		return -EOPNOTSUPP;
-
-	if (data & ETH_FLAG_RXHASH)
-		dev->features |= NETIF_F_RXHASH;
-	else
-		dev->features &= ~NETIF_F_RXHASH;
-	return 0;
+	return ethtool_op_set_flags(dev, data, ETH_FLAG_RXHASH);
 }
 
 static const struct ethtool_ops niu_ethtool_ops = {
diff --git a/drivers/net/sfc/ethtool.c b/drivers/net/sfc/ethtool.c
index 7693cfbf9cf..23372bf5cd5 100644
--- a/drivers/net/sfc/ethtool.c
+++ b/drivers/net/sfc/ethtool.c
@@ -551,10 +551,7 @@ static int efx_ethtool_set_flags(struct net_device *net_dev, u32 data)
 	struct efx_nic *efx = netdev_priv(net_dev);
 	u32 supported = efx->type->offload_features & ETH_FLAG_RXHASH;
 
-	if (data & ~supported)
-		return -EOPNOTSUPP;
-
-	return ethtool_op_set_flags(net_dev, data);
+	return ethtool_op_set_flags(net_dev, data, supported);
 }
 
 static void efx_ethtool_self_test(struct net_device *net_dev,
diff --git a/drivers/net/sky2.c b/drivers/net/sky2.c
index 7985165e84f..c762c6ac055 100644
--- a/drivers/net/sky2.c
+++ b/drivers/net/sky2.c
@@ -4188,17 +4188,13 @@ static int sky2_set_eeprom(struct net_device *dev, struct ethtool_eeprom *eeprom
 static int sky2_set_flags(struct net_device *dev, u32 data)
 {
 	struct sky2_port *sky2 = netdev_priv(dev);
+	u32 supported =
+		(sky2->hw->flags & SKY2_HW_RSS_BROKEN) ? 0 : ETH_FLAG_RXHASH;
+	int rc;
 
-	if (data & ~ETH_FLAG_RXHASH)
-		return -EOPNOTSUPP;
-
-	if (data & ETH_FLAG_RXHASH) {
-		if (sky2->hw->flags & SKY2_HW_RSS_BROKEN)
-			return -EINVAL;
-
-		dev->features |= NETIF_F_RXHASH;
-	} else
-		dev->features &= ~NETIF_F_RXHASH;
+	rc = ethtool_op_set_flags(dev, data, supported);
+	if (rc)
+		return rc;
 
 	rx_set_rss(dev);
 
diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 2c8af093d8b..084ddb3c803 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -457,7 +457,7 @@ int ethtool_op_set_tso(struct net_device *dev, u32 data);
 u32 ethtool_op_get_ufo(struct net_device *dev);
 int ethtool_op_set_ufo(struct net_device *dev, u32 data);
 u32 ethtool_op_get_flags(struct net_device *dev);
-int ethtool_op_set_flags(struct net_device *dev, u32 data);
+int ethtool_op_set_flags(struct net_device *dev, u32 data, u32 supported);
 void ethtool_ntuple_flush(struct net_device *dev);
 
 /**
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index a0f4964033d..5d42fae520d 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -144,31 +144,13 @@ u32 ethtool_op_get_flags(struct net_device *dev)
 }
 EXPORT_SYMBOL(ethtool_op_get_flags);
 
-int ethtool_op_set_flags(struct net_device *dev, u32 data)
+int ethtool_op_set_flags(struct net_device *dev, u32 data, u32 supported)
 {
-	const struct ethtool_ops *ops = dev->ethtool_ops;
-	unsigned long features = dev->features;
-
-	if (data & ETH_FLAG_LRO)
-		features |= NETIF_F_LRO;
-	else
-		features &= ~NETIF_F_LRO;
-
-	if (data & ETH_FLAG_NTUPLE) {
-		if (!ops->set_rx_ntuple)
-			return -EOPNOTSUPP;
-		features |= NETIF_F_NTUPLE;
-	} else {
-		/* safe to clear regardless */
-		features &= ~NETIF_F_NTUPLE;
-	}
-
-	if (data & ETH_FLAG_RXHASH)
-		features |= NETIF_F_RXHASH;
-	else
-		features &= ~NETIF_F_RXHASH;
+	if (data & ~supported)
+		return -EINVAL;
 
-	dev->features = features;
+	dev->features = ((dev->features & ~flags_dup_features) |
+			 (data & flags_dup_features));
 	return 0;
 }
 EXPORT_SYMBOL(ethtool_op_set_flags);
-- 
cgit v1.2.3-70-g09d2


From a5b6ee291e39e285e021cf251dbcf770c83cd74e Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Wed, 30 Jun 2010 05:05:23 +0000
Subject: ethtool: Add support for control of RX flow hash indirection

Many NICs use an indirection table to map an RX flow hash value to one
of an arbitrary number of queues (not necessarily a power of 2).  It
can be useful to remove some queues from this indirection table so
that they are only used for flows that are specifically filtered
there.  It may also be useful to weight the mapping to account for
user processes with the same CPU-affinity as the RX interrupts.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool.h | 15 ++++++++++
 net/core/ethtool.c      | 80 +++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 95 insertions(+)

(limited to 'include')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 084ddb3c803..c1be61f3938 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -384,6 +384,15 @@ struct ethtool_rxnfc {
 	__u32				rule_locs[0];
 };
 
+struct ethtool_rxfh_indir {
+	__u32	cmd;
+	/* On entry, this is the array size of the user buffer.  On
+	 * return from ETHTOOL_GRXFHINDIR, this is the array size of
+	 * the hardware indirection table. */
+	__u32	size;
+	__u32	ring_index[0];	/* ring/queue index for each hash value */
+};
+
 struct ethtool_rx_ntuple_flow_spec {
 	__u32		 flow_type;
 	union {
@@ -576,6 +585,10 @@ struct ethtool_ops {
 	int	(*set_rx_ntuple)(struct net_device *,
 				 struct ethtool_rx_ntuple *);
 	int	(*get_rx_ntuple)(struct net_device *, u32 stringset, void *);
+	int	(*get_rxfh_indir)(struct net_device *,
+				  struct ethtool_rxfh_indir *);
+	int	(*set_rxfh_indir)(struct net_device *,
+				  const struct ethtool_rxfh_indir *);
 };
 #endif /* __KERNEL__ */
 
@@ -637,6 +650,8 @@ struct ethtool_ops {
 #define ETHTOOL_SRXNTUPLE	0x00000035 /* Add an n-tuple filter to device */
 #define ETHTOOL_GRXNTUPLE	0x00000036 /* Get n-tuple filters from device */
 #define ETHTOOL_GSSET_INFO	0x00000037 /* Get string set info */
+#define ETHTOOL_GRXFHINDIR	0x00000038 /* Get RX flow hash indir'n table */
+#define ETHTOOL_SRXFHINDIR	0x00000039 /* Set RX flow hash indir'n table */
 
 /* compatibility with older code */
 #define SPARC_ETH_GSET		ETHTOOL_GSET
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 5d42fae520d..072d1d3796c 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -358,6 +358,80 @@ err_out:
 	return ret;
 }
 
+static noinline_for_stack int ethtool_get_rxfh_indir(struct net_device *dev,
+						     void __user *useraddr)
+{
+	struct ethtool_rxfh_indir *indir;
+	u32 table_size;
+	size_t full_size;
+	int ret;
+
+	if (!dev->ethtool_ops->get_rxfh_indir)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&table_size,
+			   useraddr + offsetof(struct ethtool_rxfh_indir, size),
+			   sizeof(table_size)))
+		return -EFAULT;
+
+	if (table_size >
+	    (KMALLOC_MAX_SIZE - sizeof(*indir)) / sizeof(*indir->ring_index))
+		return -ENOMEM;
+	full_size = sizeof(*indir) + sizeof(*indir->ring_index) * table_size;
+	indir = kmalloc(full_size, GFP_USER);
+	if (!indir)
+		return -ENOMEM;
+
+	indir->cmd = ETHTOOL_GRXFHINDIR;
+	indir->size = table_size;
+	ret = dev->ethtool_ops->get_rxfh_indir(dev, indir);
+	if (ret)
+		goto out;
+
+	if (copy_to_user(useraddr, indir, full_size))
+		ret = -EFAULT;
+
+out:
+	kfree(indir);
+	return ret;
+}
+
+static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev,
+						     void __user *useraddr)
+{
+	struct ethtool_rxfh_indir *indir;
+	u32 table_size;
+	size_t full_size;
+	int ret;
+
+	if (!dev->ethtool_ops->set_rxfh_indir)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&table_size,
+			   useraddr + offsetof(struct ethtool_rxfh_indir, size),
+			   sizeof(table_size)))
+		return -EFAULT;
+
+	if (table_size >
+	    (KMALLOC_MAX_SIZE - sizeof(*indir)) / sizeof(*indir->ring_index))
+		return -ENOMEM;
+	full_size = sizeof(*indir) + sizeof(*indir->ring_index) * table_size;
+	indir = kmalloc(full_size, GFP_USER);
+	if (!indir)
+		return -ENOMEM;
+
+	if (copy_from_user(indir, useraddr, full_size)) {
+		ret = -EFAULT;
+		goto out;
+	}
+
+	ret = dev->ethtool_ops->set_rxfh_indir(dev, indir);
+
+out:
+	kfree(indir);
+	return ret;
+}
+
 static void __rx_ntuple_filter_add(struct ethtool_rx_ntuple_list *list,
 			struct ethtool_rx_ntuple_flow_spec *spec,
 			struct ethtool_rx_ntuple_flow_spec_container *fsc)
@@ -1526,6 +1600,12 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 	case ETHTOOL_GSSET_INFO:
 		rc = ethtool_get_sset_info(dev, useraddr);
 		break;
+	case ETHTOOL_GRXFHINDIR:
+		rc = ethtool_get_rxfh_indir(dev, useraddr);
+		break;
+	case ETHTOOL_SRXFHINDIR:
+		rc = ethtool_set_rxfh_indir(dev, useraddr);
+		break;
 	default:
 		rc = -EOPNOTSUPP;
 	}
-- 
cgit v1.2.3-70-g09d2


From a53f4b61a76a7e95139b8e8abba02e9bfe87a58a Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Thu, 1 Jul 2010 12:45:34 -0700
Subject: Revert "net: Make accesses to ->br_port safe for sparse RCU"

This reverts commit 81bdf5bd7349bd4523538cbd7878f334bc2bfe14, which is
obsoleted by commit f350a0a87374 from the net tree.
---
 include/linux/if_bridge.h           | 3 ---
 net/bridge/br_fdb.c                 | 2 +-
 net/bridge/br_private.h             | 5 -----
 net/bridge/netfilter/ebt_redirect.c | 2 +-
 net/bridge/netfilter/ebt_ulog.c     | 4 ++--
 net/bridge/netfilter/ebtables.c     | 4 ++--
 net/netfilter/nfnetlink_log.c       | 4 ++--
 net/netfilter/nfnetlink_queue.c     | 4 ++--
 8 files changed, 10 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
index d001d782922..938b7e81df9 100644
--- a/include/linux/if_bridge.h
+++ b/include/linux/if_bridge.h
@@ -101,9 +101,6 @@ struct __fdb_entry {
 
 #include <linux/netdevice.h>
 
-/* br_handle_frame_hook() needs the following forward declaration. */
-struct net_bridge_port;
-
 extern void brioctl_set(int (*ioctl_hook)(struct net *, unsigned int, void __user *));
 extern struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
 					       struct sk_buff *skb);
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 09c479e0562..b01dde35a69 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -244,7 +244,7 @@ int br_fdb_test_addr(struct net_device *dev, unsigned char *addr)
 		return 0;
 
 	rcu_read_lock();
-	fdb = __br_fdb_get(br_port(dev)->br, addr);
+	fdb = __br_fdb_get(dev->br_port->br, addr);
 	ret = fdb && fdb->dst->dev != dev &&
 		fdb->dst->state == BR_STATE_FORWARDING;
 	rcu_read_unlock();
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 3255188355b..0f4a74bc6a9 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -268,11 +268,6 @@ static inline int br_is_root_bridge(const struct net_bridge *br)
 	return !memcmp(&br->bridge_id, &br->designated_root, 8);
 }
 
-static inline struct net_bridge_port *br_port(const struct net_device *dev)
-{
-	return rcu_dereference(dev->br_port);
-}
-
 /* br_device.c */
 extern void br_dev_setup(struct net_device *dev);
 extern netdev_tx_t br_dev_xmit(struct sk_buff *skb,
diff --git a/net/bridge/netfilter/ebt_redirect.c b/net/bridge/netfilter/ebt_redirect.c
index a39df0ae0f8..9e19166ba45 100644
--- a/net/bridge/netfilter/ebt_redirect.c
+++ b/net/bridge/netfilter/ebt_redirect.c
@@ -25,7 +25,7 @@ ebt_redirect_tg(struct sk_buff *skb, const struct xt_action_param *par)
 
 	if (par->hooknum != NF_BR_BROUTING)
 		memcpy(eth_hdr(skb)->h_dest,
-		       br_port(par->in)->br->dev->dev_addr, ETH_ALEN);
+		       par->in->br_port->br->dev->dev_addr, ETH_ALEN);
 	else
 		memcpy(eth_hdr(skb)->h_dest, par->in->dev_addr, ETH_ALEN);
 	skb->pkt_type = PACKET_HOST;
diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c
index 5a4996bbb09..ae3c7cef148 100644
--- a/net/bridge/netfilter/ebt_ulog.c
+++ b/net/bridge/netfilter/ebt_ulog.c
@@ -178,7 +178,7 @@ static void ebt_ulog_packet(unsigned int hooknr, const struct sk_buff *skb,
 		strcpy(pm->physindev, in->name);
 		/* If in isn't a bridge, then physindev==indev */
 		if (in->br_port)
-			strcpy(pm->indev, br_port(in)->br->dev->name);
+			strcpy(pm->indev, in->br_port->br->dev->name);
 		else
 			strcpy(pm->indev, in->name);
 	} else
@@ -187,7 +187,7 @@ static void ebt_ulog_packet(unsigned int hooknr, const struct sk_buff *skb,
 	if (out) {
 		/* If out exists, then out is a bridge port */
 		strcpy(pm->physoutdev, out->name);
-		strcpy(pm->outdev, br_port(out)->br->dev->name);
+		strcpy(pm->outdev, out->br_port->br->dev->name);
 	} else
 		pm->outdev[0] = pm->physoutdev[0] = '\0';
 
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 4c2aab8cbfc..59ca00e40de 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -141,10 +141,10 @@ ebt_basic_match(const struct ebt_entry *e, const struct ethhdr *h,
 	if (FWINV2(ebt_dev_check(e->out, out), EBT_IOUT))
 		return 1;
 	if ((!in || !in->br_port) ? 0 : FWINV2(ebt_dev_check(
-	   e->logical_in, br_port(in)->br->dev), EBT_ILOGICALIN))
+	   e->logical_in, in->br_port->br->dev), EBT_ILOGICALIN))
 		return 1;
 	if ((!out || !out->br_port) ? 0 : FWINV2(ebt_dev_check(
-	   e->logical_out, br_port(out)->br->dev), EBT_ILOGICALOUT))
+	   e->logical_out, out->br_port->br->dev), EBT_ILOGICALOUT))
 		return 1;
 
 	if (e->bitmask & EBT_SOURCEMAC) {
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 78957cfa3bd..fc9a211e629 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -404,7 +404,7 @@ __build_packet_message(struct nfulnl_instance *inst,
 				     htonl(indev->ifindex));
 			/* this is the bridge group "brX" */
 			NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_INDEV,
-				     htonl(br_port(indev)->br->dev->ifindex));
+				     htonl(indev->br_port->br->dev->ifindex));
 		} else {
 			/* Case 2: indev is bridge group, we need to look for
 			 * physical device (when called from ipv4) */
@@ -431,7 +431,7 @@ __build_packet_message(struct nfulnl_instance *inst,
 				     htonl(outdev->ifindex));
 			/* this is the bridge group "brX" */
 			NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_OUTDEV,
-				     htonl(br_port(outdev)->br->dev->ifindex));
+				     htonl(outdev->br_port->br->dev->ifindex));
 		} else {
 			/* Case 2: indev is a bridge group, we need to look
 			 * for physical device (when called from ipv4) */
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index c3c17498298..12e1ab37fcd 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -297,7 +297,7 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
 				     htonl(indev->ifindex));
 			/* this is the bridge group "brX" */
 			NLA_PUT_BE32(skb, NFQA_IFINDEX_INDEV,
-				     htonl(br_port(indev)->br->dev->ifindex));
+				     htonl(indev->br_port->br->dev->ifindex));
 		} else {
 			/* Case 2: indev is bridge group, we need to look for
 			 * physical device (when called from ipv4) */
@@ -322,7 +322,7 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
 				     htonl(outdev->ifindex));
 			/* this is the bridge group "brX" */
 			NLA_PUT_BE32(skb, NFQA_IFINDEX_OUTDEV,
-				     htonl(br_port(outdev)->br->dev->ifindex));
+				     htonl(outdev->br_port->br->dev->ifindex));
 		} else {
 			/* Case 2: outdev is bridge group, we need to look for
 			 * physical output device (when called from ipv4) */
-- 
cgit v1.2.3-70-g09d2


From b9c2c9ae882f058084e13e339925dbf8d2d20271 Mon Sep 17 00:00:00 2001
From: Jesse Barnes <jbarnes@virtuousgeek.org>
Date: Thu, 1 Jul 2010 16:48:09 -0700
Subject: drm: add per-event vblank event trace points

Allows us to track each process that requests and completes events.

Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_irq.c   |  8 +++++++
 drivers/gpu/drm/drm_trace.h | 57 ++++++++++++++++++++++++++++++++++-----------
 include/drm/drmP.h          |  2 ++
 3 files changed, 53 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c
index 6d201a89441..c2ecb3ed009 100644
--- a/drivers/gpu/drm/drm_irq.c
+++ b/drivers/gpu/drm/drm_irq.c
@@ -588,6 +588,7 @@ static int drm_queue_vblank_event(struct drm_device *dev, int pipe,
 		return -ENOMEM;
 
 	e->pipe = pipe;
+	e->base.pid = current->pid;
 	e->event.base.type = DRM_EVENT_VBLANK;
 	e->event.base.length = sizeof e->event;
 	e->event.user_data = vblwait->request.signal;
@@ -615,6 +616,9 @@ static int drm_queue_vblank_event(struct drm_device *dev, int pipe,
 	DRM_DEBUG("event on vblank count %d, current %d, crtc %d\n",
 		  vblwait->request.sequence, seq, pipe);
 
+	trace_drm_vblank_event_queued(current->pid, pipe,
+				      vblwait->request.sequence);
+
 	e->event.sequence = vblwait->request.sequence;
 	if ((seq - vblwait->request.sequence) <= (1 << 23)) {
 		e->event.tv_sec = now.tv_sec;
@@ -622,6 +626,8 @@ static int drm_queue_vblank_event(struct drm_device *dev, int pipe,
 		drm_vblank_put(dev, e->pipe);
 		list_add_tail(&e->base.link, &e->base.file_priv->event_list);
 		wake_up_interruptible(&e->base.file_priv->event_wait);
+		trace_drm_vblank_event_delivered(current->pid, pipe,
+						 vblwait->request.sequence);
 	} else {
 		list_add_tail(&e->base.link, &dev->vblank_event_list);
 	}
@@ -752,6 +758,8 @@ void drm_handle_vblank_events(struct drm_device *dev, int crtc)
 		drm_vblank_put(dev, e->pipe);
 		list_move_tail(&e->base.link, &e->base.file_priv->event_list);
 		wake_up_interruptible(&e->base.file_priv->event_wait);
+		trace_drm_vblank_event_delivered(e->base.pid, e->pipe,
+						 e->event.sequence);
 	}
 
 	spin_unlock_irqrestore(&dev->event_lock, flags);
diff --git a/drivers/gpu/drm/drm_trace.h b/drivers/gpu/drm/drm_trace.h
index 8a92683f14e..03ea964aa60 100644
--- a/drivers/gpu/drm/drm_trace.h
+++ b/drivers/gpu/drm/drm_trace.h
@@ -11,22 +11,51 @@
 #define TRACE_INCLUDE_FILE drm_trace
 
 TRACE_EVENT(drm_vblank_event,
+	    TP_PROTO(int crtc, unsigned int seq),
+	    TP_ARGS(crtc, seq),
+	    TP_STRUCT__entry(
+		    __field(int, crtc)
+		    __field(unsigned int, seq)
+		    ),
+	    TP_fast_assign(
+		    __entry->crtc = crtc;
+		    __entry->seq = seq;
+		    ),
+	    TP_printk("crtc=%d, seq=%d", __entry->crtc, __entry->seq)
+);
 
-	TP_PROTO(int crtc, unsigned int seq),
-
-	TP_ARGS(crtc, seq),
-
-	TP_STRUCT__entry(
-		__field(int, crtc)
-		__field(unsigned int, seq)
-		),
-
-	TP_fast_assign(
-		__entry->crtc = crtc;
-		__entry->seq = seq;
-		),
+TRACE_EVENT(drm_vblank_event_queued,
+	    TP_PROTO(pid_t pid, int crtc, unsigned int seq),
+	    TP_ARGS(pid, crtc, seq),
+	    TP_STRUCT__entry(
+		    __field(pid_t, pid)
+		    __field(int, crtc)
+		    __field(unsigned int, seq)
+		    ),
+	    TP_fast_assign(
+		    __entry->pid = pid;
+		    __entry->crtc = crtc;
+		    __entry->seq = seq;
+		    ),
+	    TP_printk("pid=%d, crtc=%d, seq=%d", __entry->pid, __entry->crtc, \
+		      __entry->seq)
+);
 
-	TP_printk("crtc=%d, seq=%d", __entry->crtc, __entry->seq)
+TRACE_EVENT(drm_vblank_event_delivered,
+	    TP_PROTO(pid_t pid, int crtc, unsigned int seq),
+	    TP_ARGS(pid, crtc, seq),
+	    TP_STRUCT__entry(
+		    __field(pid_t, pid)
+		    __field(int, crtc)
+		    __field(unsigned int, seq)
+		    ),
+	    TP_fast_assign(
+		    __entry->pid = pid;
+		    __entry->crtc = crtc;
+		    __entry->seq = seq;
+		    ),
+	    TP_printk("pid=%d, crtc=%d, seq=%d", __entry->pid, __entry->crtc, \
+		      __entry->seq)
 );
 
 #endif /* _DRM_TRACE_H_ */
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index c1b987158df..8364a705f12 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -403,6 +403,8 @@ struct drm_pending_event {
 	struct drm_event *event;
 	struct list_head link;
 	struct drm_file *file_priv;
+	pid_t pid; /* pid of requester, no guarantee it's valid by the time
+		      we deliver the event, for tracing only */
 	void (*destroy)(struct drm_pending_event *event);
 };
 
-- 
cgit v1.2.3-70-g09d2


From ad72cf9885c536e3adae03f8337557ac9dd1e4bb Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 2 Jul 2010 10:03:52 +0200
Subject: libata: take advantage of cmwq and remove concurrency limitations

libata has two concurrency related limitations.

a. ata_wq which is used for polling PIO has single thread per CPU.  If
   there are multiple devices doing polling PIO on the same CPU, they
   can't be executed simultaneously.

b. ata_aux_wq which is used for SCSI probing has single thread.  In
   cases where SCSI probing is stalled for extended period of time
   which is possible for ATAPI devices, this will stall all probing.

#a is solved by increasing maximum concurrency of ata_wq.  Please note
that polling PIO might be used under allocation path and thus needs to
be served by a separate wq with a rescuer.

#b is solved by using the default wq instead and achieving exclusion
via per-port mutex.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Jeff Garzik <jgarzik@pobox.com>
---
 drivers/ata/libata-core.c | 20 +++++---------------
 drivers/ata/libata-eh.c   |  4 ++--
 drivers/ata/libata-scsi.c | 10 ++++++----
 drivers/ata/libata-sff.c  |  9 +--------
 drivers/ata/libata.h      |  1 -
 include/linux/libata.h    |  1 +
 6 files changed, 15 insertions(+), 30 deletions(-)

(limited to 'include')

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index ddf8e486278..4f78741692d 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -98,8 +98,6 @@ static unsigned long ata_dev_blacklisted(const struct ata_device *dev);
 
 unsigned int ata_print_id = 1;
 
-struct workqueue_struct *ata_aux_wq;
-
 struct ata_force_param {
 	const char	*name;
 	unsigned int	cbl;
@@ -5611,6 +5609,7 @@ struct ata_port *ata_port_alloc(struct ata_host *host)
 	ap->msg_enable = ATA_MSG_DRV | ATA_MSG_ERR | ATA_MSG_WARN;
 #endif
 
+	mutex_init(&ap->scsi_scan_mutex);
 	INIT_DELAYED_WORK(&ap->hotplug_task, ata_scsi_hotplug);
 	INIT_WORK(&ap->scsi_rescan_task, ata_scsi_dev_rescan);
 	INIT_LIST_HEAD(&ap->eh_done_q);
@@ -6549,29 +6548,20 @@ static int __init ata_init(void)
 
 	ata_parse_force_param();
 
-	ata_aux_wq = create_singlethread_workqueue("ata_aux");
-	if (!ata_aux_wq)
-		goto fail;
-
 	rc = ata_sff_init();
-	if (rc)
-		goto fail;
+	if (rc) {
+		kfree(ata_force_tbl);
+		return rc;
+	}
 
 	printk(KERN_DEBUG "libata version " DRV_VERSION " loaded.\n");
 	return 0;
-
-fail:
-	kfree(ata_force_tbl);
-	if (ata_aux_wq)
-		destroy_workqueue(ata_aux_wq);
-	return rc;
 }
 
 static void __exit ata_exit(void)
 {
 	ata_sff_exit();
 	kfree(ata_force_tbl);
-	destroy_workqueue(ata_aux_wq);
 }
 
 subsys_initcall(ata_init);
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index f77a67303f8..4d2af824dd2 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -727,7 +727,7 @@ void ata_scsi_error(struct Scsi_Host *host)
 	if (ap->pflags & ATA_PFLAG_LOADING)
 		ap->pflags &= ~ATA_PFLAG_LOADING;
 	else if (ap->pflags & ATA_PFLAG_SCSI_HOTPLUG)
-		queue_delayed_work(ata_aux_wq, &ap->hotplug_task, 0);
+		schedule_delayed_work(&ap->hotplug_task, 0);
 
 	if (ap->pflags & ATA_PFLAG_RECOVERED)
 		ata_port_printk(ap, KERN_INFO, "EH complete\n");
@@ -2944,7 +2944,7 @@ static int ata_eh_revalidate_and_attach(struct ata_link *link,
 			ehc->i.flags |= ATA_EHI_SETMODE;
 
 			/* schedule the scsi_rescan_device() here */
-			queue_work(ata_aux_wq, &(ap->scsi_rescan_task));
+			schedule_work(&(ap->scsi_rescan_task));
 		} else if (dev->class == ATA_DEV_UNKNOWN &&
 			   ehc->tries[dev->devno] &&
 			   ata_class_enabled(ehc->classes[dev->devno])) {
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index a54273d2c3c..d75c9c479d1 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -3435,7 +3435,7 @@ void ata_scsi_scan_host(struct ata_port *ap, int sync)
 				"                  switching to async\n");
 	}
 
-	queue_delayed_work(ata_aux_wq, &ap->hotplug_task,
+	queue_delayed_work(system_long_wq, &ap->hotplug_task,
 			   round_jiffies_relative(HZ));
 }
 
@@ -3582,6 +3582,7 @@ void ata_scsi_hotplug(struct work_struct *work)
 	}
 
 	DPRINTK("ENTER\n");
+	mutex_lock(&ap->scsi_scan_mutex);
 
 	/* Unplug detached devices.  We cannot use link iterator here
 	 * because PMP links have to be scanned even if PMP is
@@ -3595,6 +3596,7 @@ void ata_scsi_hotplug(struct work_struct *work)
 	/* scan for new ones */
 	ata_scsi_scan_host(ap, 0);
 
+	mutex_unlock(&ap->scsi_scan_mutex);
 	DPRINTK("EXIT\n");
 }
 
@@ -3673,9 +3675,7 @@ static int ata_scsi_user_scan(struct Scsi_Host *shost, unsigned int channel,
  *	@work: Pointer to ATA port to perform scsi_rescan_device()
  *
  *	After ATA pass thru (SAT) commands are executed successfully,
- *	libata need to propagate the changes to SCSI layer.  This
- *	function must be executed from ata_aux_wq such that sdev
- *	attach/detach don't race with rescan.
+ *	libata need to propagate the changes to SCSI layer.
  *
  *	LOCKING:
  *	Kernel thread context (may sleep).
@@ -3688,6 +3688,7 @@ void ata_scsi_dev_rescan(struct work_struct *work)
 	struct ata_device *dev;
 	unsigned long flags;
 
+	mutex_lock(&ap->scsi_scan_mutex);
 	spin_lock_irqsave(ap->lock, flags);
 
 	ata_for_each_link(link, ap, EDGE) {
@@ -3707,6 +3708,7 @@ void ata_scsi_dev_rescan(struct work_struct *work)
 	}
 
 	spin_unlock_irqrestore(ap->lock, flags);
+	mutex_unlock(&ap->scsi_scan_mutex);
 }
 
 /**
diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c
index efa4a18cfb9..674c1436491 100644
--- a/drivers/ata/libata-sff.c
+++ b/drivers/ata/libata-sff.c
@@ -3318,14 +3318,7 @@ void ata_sff_port_init(struct ata_port *ap)
 
 int __init ata_sff_init(void)
 {
-	/*
-	 * FIXME: In UP case, there is only one workqueue thread and if you
-	 * have more than one PIO device, latency is bloody awful, with
-	 * occasional multi-second "hiccups" as one PIO device waits for
-	 * another.  It's an ugly wart that users DO occasionally complain
-	 * about; luckily most users have at most one PIO polled device.
-	 */
-	ata_sff_wq = create_workqueue("ata_sff");
+	ata_sff_wq = alloc_workqueue("ata_sff", WQ_RESCUER, WQ_MAX_ACTIVE);
 	if (!ata_sff_wq)
 		return -ENOMEM;
 
diff --git a/drivers/ata/libata.h b/drivers/ata/libata.h
index 4b84ed60324..9ce1ecc63e3 100644
--- a/drivers/ata/libata.h
+++ b/drivers/ata/libata.h
@@ -54,7 +54,6 @@ enum {
 };
 
 extern unsigned int ata_print_id;
-extern struct workqueue_struct *ata_aux_wq;
 extern int atapi_passthru16;
 extern int libata_fua;
 extern int libata_noacpi;
diff --git a/include/linux/libata.h b/include/linux/libata.h
index b85f3ff34d7..f010f18a0f8 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -751,6 +751,7 @@ struct ata_port {
 	struct ata_host		*host;
 	struct device 		*dev;
 
+	struct mutex		scsi_scan_mutex;
 	struct delayed_work	hotplug_task;
 	struct work_struct	scsi_rescan_task;
 
-- 
cgit v1.2.3-70-g09d2


From bdbc5dd7de5d07d6c9d3536e598956165a031d4c Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 2 Jul 2010 10:03:51 +0200
Subject: workqueue: prepare for WQ_UNBOUND implementation

In preparation of WQ_UNBOUND addition, make the following changes.

* Add WORK_CPU_* constants for pseudo cpu id numbers used (currently
  only WORK_CPU_NONE) and use them instead of NR_CPUS.  This is to
  allow another pseudo cpu id for unbound cpu.

* Reorder WQ_* flags.

* Make workqueue_struct->cpu_wq a union which contains a percpu
  pointer, regular pointer and an unsigned long value and use
  kzalloc/kfree() in UP allocation path.  This will be used to
  implement unbound workqueues which will use only one cwq on SMPs.

* Move alloc_cwqs() allocation after initialization of wq fields, so
  that alloc_cwqs() has access to wq->flags.

* Trivial relocation of wq local variables in freeze functions.

These changes don't cause any functional change.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h | 10 ++++--
 kernel/workqueue.c        | 83 ++++++++++++++++++++++++-----------------------
 2 files changed, 50 insertions(+), 43 deletions(-)

(limited to 'include')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 3f36d37ac5b..139069a6286 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -50,6 +50,10 @@ enum {
 	WORK_NR_COLORS		= (1 << WORK_STRUCT_COLOR_BITS) - 1,
 	WORK_NO_COLOR		= WORK_NR_COLORS,
 
+	/* special cpu IDs */
+	WORK_CPU_NONE		= NR_CPUS,
+	WORK_CPU_LAST		= WORK_CPU_NONE,
+
 	/*
 	 * Reserve 6 bits off of cwq pointer w/ debugobjects turned
 	 * off.  This makes cwqs aligned to 64 bytes which isn't too
@@ -60,7 +64,7 @@ enum {
 
 	WORK_STRUCT_FLAG_MASK	= (1UL << WORK_STRUCT_FLAG_BITS) - 1,
 	WORK_STRUCT_WQ_DATA_MASK = ~WORK_STRUCT_FLAG_MASK,
-	WORK_STRUCT_NO_CPU	= NR_CPUS << WORK_STRUCT_FLAG_BITS,
+	WORK_STRUCT_NO_CPU	= WORK_CPU_NONE << WORK_STRUCT_FLAG_BITS,
 
 	/* bit mask for work_busy() return values */
 	WORK_BUSY_PENDING	= 1 << 0,
@@ -227,9 +231,9 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; }
 	clear_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))
 
 enum {
-	WQ_FREEZEABLE		= 1 << 0, /* freeze during suspend */
+	WQ_NON_REENTRANT	= 1 << 0, /* guarantee non-reentrance */
 	WQ_SINGLE_CPU		= 1 << 1, /* only single cpu at a time */
-	WQ_NON_REENTRANT	= 1 << 2, /* guarantee non-reentrance */
+	WQ_FREEZEABLE		= 1 << 2, /* freeze during suspend */
 	WQ_RESCUER		= 1 << 3, /* has an rescue worker */
 	WQ_HIGHPRI		= 1 << 4, /* high priority */
 	WQ_CPU_INTENSIVE	= 1 << 5, /* cpu instensive workqueue */
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 2eb9fbddf5c..a105ddf55f7 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -190,7 +190,11 @@ struct wq_flusher {
  */
 struct workqueue_struct {
 	unsigned int		flags;		/* I: WQ_* flags */
-	struct cpu_workqueue_struct *cpu_wq;	/* I: cwq's */
+	union {
+		struct cpu_workqueue_struct __percpu	*pcpu;
+		struct cpu_workqueue_struct		*single;
+		unsigned long				v;
+	} cpu_wq;				/* I: cwq's */
 	struct list_head	list;		/* W: list of all workqueues */
 
 	struct mutex		flush_mutex;	/* protects wq flushing */
@@ -362,7 +366,11 @@ static atomic_t *get_gcwq_nr_running(unsigned int cpu)
 static struct cpu_workqueue_struct *get_cwq(unsigned int cpu,
 					    struct workqueue_struct *wq)
 {
-	return per_cpu_ptr(wq->cpu_wq, cpu);
+#ifndef CONFIG_SMP
+	return wq->cpu_wq.single;
+#else
+	return per_cpu_ptr(wq->cpu_wq.pcpu, cpu);
+#endif
 }
 
 static unsigned int work_color_to_flags(int color)
@@ -442,7 +450,7 @@ static struct global_cwq *get_work_gcwq(struct work_struct *work)
 		return ((struct cpu_workqueue_struct *)data)->gcwq;
 
 	cpu = data >> WORK_STRUCT_FLAG_BITS;
-	if (cpu == NR_CPUS)
+	if (cpu == WORK_CPU_NONE)
 		return NULL;
 
 	BUG_ON(cpu >= nr_cpu_ids);
@@ -846,7 +854,7 @@ static void cwq_unbind_single_cpu(struct cpu_workqueue_struct *cwq)
 	 */
 	if (likely(!(gcwq->flags & GCWQ_FREEZING))) {
 		smp_wmb();	/* paired with cmpxchg() in __queue_work() */
-		wq->single_cpu = NR_CPUS;
+		wq->single_cpu = WORK_CPU_NONE;
 	}
 }
 
@@ -904,7 +912,7 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
 		 */
 	retry:
 		cpu = wq->single_cpu;
-		arbitrate = cpu == NR_CPUS;
+		arbitrate = cpu == WORK_CPU_NONE;
 		if (arbitrate)
 			cpu = req_cpu;
 
@@ -918,7 +926,7 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
 		 * visible on the new cpu after this point.
 		 */
 		if (arbitrate)
-			cmpxchg(&wq->single_cpu, NR_CPUS, cpu);
+			cmpxchg(&wq->single_cpu, WORK_CPU_NONE, cpu);
 
 		if (unlikely(wq->single_cpu != cpu)) {
 			spin_unlock_irqrestore(&gcwq->lock, flags);
@@ -2572,7 +2580,7 @@ int keventd_up(void)
 	return system_wq != NULL;
 }
 
-static struct cpu_workqueue_struct *alloc_cwqs(void)
+static int alloc_cwqs(struct workqueue_struct *wq)
 {
 	/*
 	 * cwqs are forced aligned according to WORK_STRUCT_FLAG_BITS.
@@ -2582,40 +2590,36 @@ static struct cpu_workqueue_struct *alloc_cwqs(void)
 	const size_t size = sizeof(struct cpu_workqueue_struct);
 	const size_t align = max_t(size_t, 1 << WORK_STRUCT_FLAG_BITS,
 				   __alignof__(unsigned long long));
-	struct cpu_workqueue_struct *cwqs;
 #ifndef CONFIG_SMP
 	void *ptr;
 
 	/*
-	 * On UP, percpu allocator doesn't honor alignment parameter
-	 * and simply uses arch-dependent default.  Allocate enough
-	 * room to align cwq and put an extra pointer at the end
-	 * pointing back to the originally allocated pointer which
-	 * will be used for free.
-	 *
-	 * FIXME: This really belongs to UP percpu code.  Update UP
-	 * percpu code to honor alignment and remove this ugliness.
+	 * Allocate enough room to align cwq and put an extra pointer
+	 * at the end pointing back to the originally allocated
+	 * pointer which will be used for free.
 	 */
-	ptr = __alloc_percpu(size + align + sizeof(void *), 1);
-	cwqs = PTR_ALIGN(ptr, align);
-	*(void **)per_cpu_ptr(cwqs + 1, 0) = ptr;
+	ptr = kzalloc(size + align + sizeof(void *), GFP_KERNEL);
+	if (ptr) {
+		wq->cpu_wq.single = PTR_ALIGN(ptr, align);
+		*(void **)(wq->cpu_wq.single + 1) = ptr;
+	}
 #else
-	/* On SMP, percpu allocator can do it itself */
-	cwqs = __alloc_percpu(size, align);
+	/* On SMP, percpu allocator can align itself */
+	wq->cpu_wq.pcpu = __alloc_percpu(size, align);
 #endif
 	/* just in case, make sure it's actually aligned */
-	BUG_ON(!IS_ALIGNED((unsigned long)cwqs, align));
-	return cwqs;
+	BUG_ON(!IS_ALIGNED(wq->cpu_wq.v, align));
+	return wq->cpu_wq.v ? 0 : -ENOMEM;
 }
 
-static void free_cwqs(struct cpu_workqueue_struct *cwqs)
+static void free_cwqs(struct workqueue_struct *wq)
 {
 #ifndef CONFIG_SMP
 	/* on UP, the pointer to free is stored right after the cwq */
-	if (cwqs)
-		free_percpu(*(void **)per_cpu_ptr(cwqs + 1, 0));
+	if (wq->cpu_wq.single)
+		kfree(*(void **)(wq->cpu_wq.single + 1));
 #else
-	free_percpu(cwqs);
+	free_percpu(wq->cpu_wq.pcpu);
 #endif
 }
 
@@ -2645,22 +2649,21 @@ struct workqueue_struct *__alloc_workqueue_key(const char *name,
 	if (!wq)
 		goto err;
 
-	wq->cpu_wq = alloc_cwqs();
-	if (!wq->cpu_wq)
-		goto err;
-
 	wq->flags = flags;
 	wq->saved_max_active = max_active;
 	mutex_init(&wq->flush_mutex);
 	atomic_set(&wq->nr_cwqs_to_flush, 0);
 	INIT_LIST_HEAD(&wq->flusher_queue);
 	INIT_LIST_HEAD(&wq->flusher_overflow);
-	wq->single_cpu = NR_CPUS;
+	wq->single_cpu = WORK_CPU_NONE;
 
 	wq->name = name;
 	lockdep_init_map(&wq->lockdep_map, lock_name, key, 0);
 	INIT_LIST_HEAD(&wq->list);
 
+	if (alloc_cwqs(wq) < 0)
+		goto err;
+
 	for_each_possible_cpu(cpu) {
 		struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
 		struct global_cwq *gcwq = get_gcwq(cpu);
@@ -2710,7 +2713,7 @@ struct workqueue_struct *__alloc_workqueue_key(const char *name,
 	return wq;
 err:
 	if (wq) {
-		free_cwqs(wq->cpu_wq);
+		free_cwqs(wq);
 		free_cpumask_var(wq->mayday_mask);
 		kfree(wq->rescuer);
 		kfree(wq);
@@ -2755,7 +2758,7 @@ void destroy_workqueue(struct workqueue_struct *wq)
 		free_cpumask_var(wq->mayday_mask);
 	}
 
-	free_cwqs(wq->cpu_wq);
+	free_cwqs(wq);
 	kfree(wq);
 }
 EXPORT_SYMBOL_GPL(destroy_workqueue);
@@ -2821,13 +2824,13 @@ EXPORT_SYMBOL_GPL(workqueue_congested);
  * @work: the work of interest
  *
  * RETURNS:
- * CPU number if @work was ever queued.  NR_CPUS otherwise.
+ * CPU number if @work was ever queued.  WORK_CPU_NONE otherwise.
  */
 unsigned int work_cpu(struct work_struct *work)
 {
 	struct global_cwq *gcwq = get_work_gcwq(work);
 
-	return gcwq ? gcwq->cpu : NR_CPUS;
+	return gcwq ? gcwq->cpu : WORK_CPU_NONE;
 }
 EXPORT_SYMBOL_GPL(work_cpu);
 
@@ -3300,7 +3303,6 @@ EXPORT_SYMBOL_GPL(work_on_cpu);
  */
 void freeze_workqueues_begin(void)
 {
-	struct workqueue_struct *wq;
 	unsigned int cpu;
 
 	spin_lock(&workqueue_lock);
@@ -3310,6 +3312,7 @@ void freeze_workqueues_begin(void)
 
 	for_each_possible_cpu(cpu) {
 		struct global_cwq *gcwq = get_gcwq(cpu);
+		struct workqueue_struct *wq;
 
 		spin_lock_irq(&gcwq->lock);
 
@@ -3344,7 +3347,6 @@ void freeze_workqueues_begin(void)
  */
 bool freeze_workqueues_busy(void)
 {
-	struct workqueue_struct *wq;
 	unsigned int cpu;
 	bool busy = false;
 
@@ -3353,6 +3355,7 @@ bool freeze_workqueues_busy(void)
 	BUG_ON(!workqueue_freezing);
 
 	for_each_possible_cpu(cpu) {
+		struct workqueue_struct *wq;
 		/*
 		 * nr_active is monotonically decreasing.  It's safe
 		 * to peek without lock.
@@ -3386,7 +3389,6 @@ out_unlock:
  */
 void thaw_workqueues(void)
 {
-	struct workqueue_struct *wq;
 	unsigned int cpu;
 
 	spin_lock(&workqueue_lock);
@@ -3396,6 +3398,7 @@ void thaw_workqueues(void)
 
 	for_each_possible_cpu(cpu) {
 		struct global_cwq *gcwq = get_gcwq(cpu);
+		struct workqueue_struct *wq;
 
 		spin_lock_irq(&gcwq->lock);
 
@@ -3443,7 +3446,7 @@ void __init init_workqueues(void)
 	 * sure cpu number won't overflow into kernel pointer area so
 	 * that they can be distinguished.
 	 */
-	BUILD_BUG_ON(NR_CPUS << WORK_STRUCT_FLAG_BITS >= PAGE_OFFSET);
+	BUILD_BUG_ON(WORK_CPU_LAST << WORK_STRUCT_FLAG_BITS >= PAGE_OFFSET);
 
 	hotcpu_notifier(workqueue_cpu_callback, CPU_PRI_WORKQUEUE);
 
-- 
cgit v1.2.3-70-g09d2


From f34217977d717385a3e9fd7018ac39fade3964c0 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 2 Jul 2010 10:03:51 +0200
Subject: workqueue: implement unbound workqueue

This patch implements unbound workqueue which can be specified with
WQ_UNBOUND flag on creation.  An unbound workqueue has the following
properties.

* It uses a dedicated gcwq with a pseudo CPU number WORK_CPU_UNBOUND.
  This gcwq is always online and disassociated.

* Workers are not bound to any CPU and not concurrency managed.  Works
  are dispatched to workers as soon as possible and the only applied
  limitation is @max_active.  IOW, all unbound workqeueues are
  implicitly high priority.

Unbound workqueues can be used as simple execution context provider.
Contexts unbound to any cpu are served as soon as possible.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: David Howells <dhowells@redhat.com>
---
 include/linux/workqueue.h |  15 +++-
 kernel/workqueue.c        | 218 +++++++++++++++++++++++++++++++++-------------
 2 files changed, 173 insertions(+), 60 deletions(-)

(limited to 'include')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 139069a6286..67ce734747f 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -51,7 +51,8 @@ enum {
 	WORK_NO_COLOR		= WORK_NR_COLORS,
 
 	/* special cpu IDs */
-	WORK_CPU_NONE		= NR_CPUS,
+	WORK_CPU_UNBOUND	= NR_CPUS,
+	WORK_CPU_NONE		= NR_CPUS + 1,
 	WORK_CPU_LAST		= WORK_CPU_NONE,
 
 	/*
@@ -237,11 +238,17 @@ enum {
 	WQ_RESCUER		= 1 << 3, /* has an rescue worker */
 	WQ_HIGHPRI		= 1 << 4, /* high priority */
 	WQ_CPU_INTENSIVE	= 1 << 5, /* cpu instensive workqueue */
+	WQ_UNBOUND		= 1 << 6, /* not bound to any cpu */
 
 	WQ_MAX_ACTIVE		= 512,	  /* I like 512, better ideas? */
+	WQ_MAX_UNBOUND_PER_CPU	= 4,	  /* 4 * #cpus for unbound wq */
 	WQ_DFL_ACTIVE		= WQ_MAX_ACTIVE / 2,
 };
 
+/* unbound wq's aren't per-cpu, scale max_active according to #cpus */
+#define WQ_UNBOUND_MAX_ACTIVE	\
+	max_t(int, WQ_MAX_ACTIVE, num_possible_cpus() * WQ_MAX_UNBOUND_PER_CPU)
+
 /*
  * System-wide workqueues which are always present.
  *
@@ -256,10 +263,16 @@ enum {
  * system_nrt_wq is non-reentrant and guarantees that any given work
  * item is never executed in parallel by multiple CPUs.  Queue
  * flushing might take relatively long.
+ *
+ * system_unbound_wq is unbound workqueue.  Workers are not bound to
+ * any specific CPU, not concurrency managed, and all queued works are
+ * executed immediately as long as max_active limit is not reached and
+ * resources are available.
  */
 extern struct workqueue_struct *system_wq;
 extern struct workqueue_struct *system_long_wq;
 extern struct workqueue_struct *system_nrt_wq;
+extern struct workqueue_struct *system_unbound_wq;
 
 extern struct workqueue_struct *
 __alloc_workqueue_key(const char *name, unsigned int flags, int max_active,
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index a105ddf55f7..4608563cdd6 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -53,9 +53,10 @@ enum {
 	WORKER_ROGUE		= 1 << 4,	/* not bound to any cpu */
 	WORKER_REBIND		= 1 << 5,	/* mom is home, come back */
 	WORKER_CPU_INTENSIVE	= 1 << 6,	/* cpu intensive */
+	WORKER_UNBOUND		= 1 << 7,	/* worker is unbound */
 
 	WORKER_NOT_RUNNING	= WORKER_PREP | WORKER_ROGUE | WORKER_REBIND |
-				  WORKER_CPU_INTENSIVE,
+				  WORKER_CPU_INTENSIVE | WORKER_UNBOUND,
 
 	/* gcwq->trustee_state */
 	TRUSTEE_START		= 0,		/* start */
@@ -96,7 +97,7 @@ enum {
  * X: During normal operation, modification requires gcwq->lock and
  *    should be done only from local cpu.  Either disabling preemption
  *    on local cpu or grabbing gcwq->lock is enough for read access.
- *    While trustee is in charge, it's identical to L.
+ *    If GCWQ_DISASSOCIATED is set, it's identical to L.
  *
  * F: wq->flush_mutex protected.
  *
@@ -220,14 +221,52 @@ struct workqueue_struct {
 struct workqueue_struct *system_wq __read_mostly;
 struct workqueue_struct *system_long_wq __read_mostly;
 struct workqueue_struct *system_nrt_wq __read_mostly;
+struct workqueue_struct *system_unbound_wq __read_mostly;
 EXPORT_SYMBOL_GPL(system_wq);
 EXPORT_SYMBOL_GPL(system_long_wq);
 EXPORT_SYMBOL_GPL(system_nrt_wq);
+EXPORT_SYMBOL_GPL(system_unbound_wq);
 
 #define for_each_busy_worker(worker, i, pos, gcwq)			\
 	for (i = 0; i < BUSY_WORKER_HASH_SIZE; i++)			\
 		hlist_for_each_entry(worker, pos, &gcwq->busy_hash[i], hentry)
 
+static inline int __next_gcwq_cpu(int cpu, const struct cpumask *mask,
+				  unsigned int sw)
+{
+	if (cpu < nr_cpu_ids) {
+		if (sw & 1) {
+			cpu = cpumask_next(cpu, mask);
+			if (cpu < nr_cpu_ids)
+				return cpu;
+		}
+		if (sw & 2)
+			return WORK_CPU_UNBOUND;
+	}
+	return WORK_CPU_NONE;
+}
+
+static inline int __next_wq_cpu(int cpu, const struct cpumask *mask,
+				struct workqueue_struct *wq)
+{
+	return __next_gcwq_cpu(cpu, mask, !(wq->flags & WQ_UNBOUND) ? 1 : 2);
+}
+
+#define for_each_gcwq_cpu(cpu)						\
+	for ((cpu) = __next_gcwq_cpu(-1, cpu_possible_mask, 3);		\
+	     (cpu) < WORK_CPU_NONE;					\
+	     (cpu) = __next_gcwq_cpu((cpu), cpu_possible_mask, 3))
+
+#define for_each_online_gcwq_cpu(cpu)					\
+	for ((cpu) = __next_gcwq_cpu(-1, cpu_online_mask, 3);		\
+	     (cpu) < WORK_CPU_NONE;					\
+	     (cpu) = __next_gcwq_cpu((cpu), cpu_online_mask, 3))
+
+#define for_each_cwq_cpu(cpu, wq)					\
+	for ((cpu) = __next_wq_cpu(-1, cpu_possible_mask, (wq));	\
+	     (cpu) < WORK_CPU_NONE;					\
+	     (cpu) = __next_wq_cpu((cpu), cpu_possible_mask, (wq)))
+
 #ifdef CONFIG_DEBUG_OBJECTS_WORK
 
 static struct debug_obj_descr work_debug_descr;
@@ -351,26 +390,46 @@ static bool workqueue_freezing;		/* W: have wqs started freezing? */
 static DEFINE_PER_CPU(struct global_cwq, global_cwq);
 static DEFINE_PER_CPU_SHARED_ALIGNED(atomic_t, gcwq_nr_running);
 
+/*
+ * Global cpu workqueue and nr_running counter for unbound gcwq.  The
+ * gcwq is always online, has GCWQ_DISASSOCIATED set, and all its
+ * workers have WORKER_UNBOUND set.
+ */
+static struct global_cwq unbound_global_cwq;
+static atomic_t unbound_gcwq_nr_running = ATOMIC_INIT(0);	/* always 0 */
+
 static int worker_thread(void *__worker);
 
 static struct global_cwq *get_gcwq(unsigned int cpu)
 {
-	return &per_cpu(global_cwq, cpu);
+	if (cpu != WORK_CPU_UNBOUND)
+		return &per_cpu(global_cwq, cpu);
+	else
+		return &unbound_global_cwq;
 }
 
 static atomic_t *get_gcwq_nr_running(unsigned int cpu)
 {
-	return &per_cpu(gcwq_nr_running, cpu);
+	if (cpu != WORK_CPU_UNBOUND)
+		return &per_cpu(gcwq_nr_running, cpu);
+	else
+		return &unbound_gcwq_nr_running;
 }
 
 static struct cpu_workqueue_struct *get_cwq(unsigned int cpu,
 					    struct workqueue_struct *wq)
 {
-#ifndef CONFIG_SMP
-	return wq->cpu_wq.single;
+	if (!(wq->flags & WQ_UNBOUND)) {
+		if (likely(cpu < nr_cpu_ids)) {
+#ifdef CONFIG_SMP
+			return per_cpu_ptr(wq->cpu_wq.pcpu, cpu);
 #else
-	return per_cpu_ptr(wq->cpu_wq.pcpu, cpu);
+			return wq->cpu_wq.single;
 #endif
+		}
+	} else if (likely(cpu == WORK_CPU_UNBOUND))
+		return wq->cpu_wq.single;
+	return NULL;
 }
 
 static unsigned int work_color_to_flags(int color)
@@ -453,7 +512,7 @@ static struct global_cwq *get_work_gcwq(struct work_struct *work)
 	if (cpu == WORK_CPU_NONE)
 		return NULL;
 
-	BUG_ON(cpu >= nr_cpu_ids);
+	BUG_ON(cpu >= nr_cpu_ids && cpu != WORK_CPU_UNBOUND);
 	return get_gcwq(cpu);
 }
 
@@ -869,11 +928,14 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
 
 	debug_work_activate(work);
 
+	if (unlikely(cpu == WORK_CPU_UNBOUND))
+		cpu = raw_smp_processor_id();
+
 	/*
 	 * Determine gcwq to use.  SINGLE_CPU is inherently
 	 * NON_REENTRANT, so test it first.
 	 */
-	if (!(wq->flags & WQ_SINGLE_CPU)) {
+	if (!(wq->flags & (WQ_SINGLE_CPU | WQ_UNBOUND))) {
 		struct global_cwq *last_gcwq;
 
 		/*
@@ -900,7 +962,7 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
 			}
 		} else
 			spin_lock_irqsave(&gcwq->lock, flags);
-	} else {
+	} else if (!(wq->flags & WQ_UNBOUND)) {
 		unsigned int req_cpu = cpu;
 
 		/*
@@ -932,6 +994,9 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
 			spin_unlock_irqrestore(&gcwq->lock, flags);
 			goto retry;
 		}
+	} else {
+		gcwq = get_gcwq(WORK_CPU_UNBOUND);
+		spin_lock_irqsave(&gcwq->lock, flags);
 	}
 
 	/* gcwq determined, get cwq and queue */
@@ -1166,7 +1231,8 @@ static bool worker_maybe_bind_and_lock(struct worker *worker)
 		 * it races with cpu hotunplug operation.  Verify
 		 * against GCWQ_DISASSOCIATED.
 		 */
-		set_cpus_allowed_ptr(task, get_cpu_mask(gcwq->cpu));
+		if (!(gcwq->flags & GCWQ_DISASSOCIATED))
+			set_cpus_allowed_ptr(task, get_cpu_mask(gcwq->cpu));
 
 		spin_lock_irq(&gcwq->lock);
 		if (gcwq->flags & GCWQ_DISASSOCIATED)
@@ -1231,8 +1297,9 @@ static struct worker *alloc_worker(void)
  */
 static struct worker *create_worker(struct global_cwq *gcwq, bool bind)
 {
-	int id = -1;
+	bool on_unbound_cpu = gcwq->cpu == WORK_CPU_UNBOUND;
 	struct worker *worker = NULL;
+	int id = -1;
 
 	spin_lock_irq(&gcwq->lock);
 	while (ida_get_new(&gcwq->worker_ida, &id)) {
@@ -1250,8 +1317,12 @@ static struct worker *create_worker(struct global_cwq *gcwq, bool bind)
 	worker->gcwq = gcwq;
 	worker->id = id;
 
-	worker->task = kthread_create(worker_thread, worker, "kworker/%u:%d",
-				      gcwq->cpu, id);
+	if (!on_unbound_cpu)
+		worker->task = kthread_create(worker_thread, worker,
+					      "kworker/%u:%d", gcwq->cpu, id);
+	else
+		worker->task = kthread_create(worker_thread, worker,
+					      "kworker/u:%d", id);
 	if (IS_ERR(worker->task))
 		goto fail;
 
@@ -1260,10 +1331,13 @@ static struct worker *create_worker(struct global_cwq *gcwq, bool bind)
 	 * online later on.  Make sure every worker has
 	 * PF_THREAD_BOUND set.
 	 */
-	if (bind)
+	if (bind && !on_unbound_cpu)
 		kthread_bind(worker->task, gcwq->cpu);
-	else
+	else {
 		worker->task->flags |= PF_THREAD_BOUND;
+		if (on_unbound_cpu)
+			worker->flags |= WORKER_UNBOUND;
+	}
 
 	return worker;
 fail:
@@ -1358,12 +1432,17 @@ static bool send_mayday(struct work_struct *work)
 {
 	struct cpu_workqueue_struct *cwq = get_work_cwq(work);
 	struct workqueue_struct *wq = cwq->wq;
+	unsigned int cpu;
 
 	if (!(wq->flags & WQ_RESCUER))
 		return false;
 
 	/* mayday mayday mayday */
-	if (!cpumask_test_and_set_cpu(cwq->gcwq->cpu, wq->mayday_mask))
+	cpu = cwq->gcwq->cpu;
+	/* WORK_CPU_UNBOUND can't be set in cpumask, use cpu 0 instead */
+	if (cpu == WORK_CPU_UNBOUND)
+		cpu = 0;
+	if (!cpumask_test_and_set_cpu(cpu, wq->mayday_mask))
 		wake_up_process(wq->rescuer->task);
 	return true;
 }
@@ -1882,6 +1961,7 @@ static int rescuer_thread(void *__wq)
 	struct workqueue_struct *wq = __wq;
 	struct worker *rescuer = wq->rescuer;
 	struct list_head *scheduled = &rescuer->scheduled;
+	bool is_unbound = wq->flags & WQ_UNBOUND;
 	unsigned int cpu;
 
 	set_user_nice(current, RESCUER_NICE_LEVEL);
@@ -1891,8 +1971,13 @@ repeat:
 	if (kthread_should_stop())
 		return 0;
 
+	/*
+	 * See whether any cpu is asking for help.  Unbounded
+	 * workqueues use cpu 0 in mayday_mask for CPU_UNBOUND.
+	 */
 	for_each_cpu(cpu, wq->mayday_mask) {
-		struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
+		unsigned int tcpu = is_unbound ? WORK_CPU_UNBOUND : cpu;
+		struct cpu_workqueue_struct *cwq = get_cwq(tcpu, wq);
 		struct global_cwq *gcwq = cwq->gcwq;
 		struct work_struct *work, *n;
 
@@ -2034,7 +2119,7 @@ static bool flush_workqueue_prep_cwqs(struct workqueue_struct *wq,
 		atomic_set(&wq->nr_cwqs_to_flush, 1);
 	}
 
-	for_each_possible_cpu(cpu) {
+	for_each_cwq_cpu(cpu, wq) {
 		struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
 		struct global_cwq *gcwq = cwq->gcwq;
 
@@ -2344,7 +2429,7 @@ static void wait_on_work(struct work_struct *work)
 	lock_map_acquire(&work->lockdep_map);
 	lock_map_release(&work->lockdep_map);
 
-	for_each_possible_cpu(cpu)
+	for_each_gcwq_cpu(cpu)
 		wait_on_cpu_work(get_gcwq(cpu), work);
 }
 
@@ -2590,23 +2675,25 @@ static int alloc_cwqs(struct workqueue_struct *wq)
 	const size_t size = sizeof(struct cpu_workqueue_struct);
 	const size_t align = max_t(size_t, 1 << WORK_STRUCT_FLAG_BITS,
 				   __alignof__(unsigned long long));
-#ifndef CONFIG_SMP
-	void *ptr;
 
-	/*
-	 * Allocate enough room to align cwq and put an extra pointer
-	 * at the end pointing back to the originally allocated
-	 * pointer which will be used for free.
-	 */
-	ptr = kzalloc(size + align + sizeof(void *), GFP_KERNEL);
-	if (ptr) {
-		wq->cpu_wq.single = PTR_ALIGN(ptr, align);
-		*(void **)(wq->cpu_wq.single + 1) = ptr;
+	if (CONFIG_SMP && !(wq->flags & WQ_UNBOUND)) {
+		/* on SMP, percpu allocator can align itself */
+		wq->cpu_wq.pcpu = __alloc_percpu(size, align);
+	} else {
+		void *ptr;
+
+		/*
+		 * Allocate enough room to align cwq and put an extra
+		 * pointer at the end pointing back to the originally
+		 * allocated pointer which will be used for free.
+		 */
+		ptr = kzalloc(size + align + sizeof(void *), GFP_KERNEL);
+		if (ptr) {
+			wq->cpu_wq.single = PTR_ALIGN(ptr, align);
+			*(void **)(wq->cpu_wq.single + 1) = ptr;
+		}
 	}
-#else
-	/* On SMP, percpu allocator can align itself */
-	wq->cpu_wq.pcpu = __alloc_percpu(size, align);
-#endif
+
 	/* just in case, make sure it's actually aligned */
 	BUG_ON(!IS_ALIGNED(wq->cpu_wq.v, align));
 	return wq->cpu_wq.v ? 0 : -ENOMEM;
@@ -2614,23 +2701,25 @@ static int alloc_cwqs(struct workqueue_struct *wq)
 
 static void free_cwqs(struct workqueue_struct *wq)
 {
-#ifndef CONFIG_SMP
-	/* on UP, the pointer to free is stored right after the cwq */
-	if (wq->cpu_wq.single)
+	if (CONFIG_SMP && !(wq->flags & WQ_UNBOUND))
+		free_percpu(wq->cpu_wq.pcpu);
+	else if (wq->cpu_wq.single) {
+		/* the pointer to free is stored right after the cwq */
 		kfree(*(void **)(wq->cpu_wq.single + 1));
-#else
-	free_percpu(wq->cpu_wq.pcpu);
-#endif
+	}
 }
 
-static int wq_clamp_max_active(int max_active, const char *name)
+static int wq_clamp_max_active(int max_active, unsigned int flags,
+			       const char *name)
 {
-	if (max_active < 1 || max_active > WQ_MAX_ACTIVE)
+	int lim = flags & WQ_UNBOUND ? WQ_UNBOUND_MAX_ACTIVE : WQ_MAX_ACTIVE;
+
+	if (max_active < 1 || max_active > lim)
 		printk(KERN_WARNING "workqueue: max_active %d requested for %s "
 		       "is out of range, clamping between %d and %d\n",
-		       max_active, name, 1, WQ_MAX_ACTIVE);
+		       max_active, name, 1, lim);
 
-	return clamp_val(max_active, 1, WQ_MAX_ACTIVE);
+	return clamp_val(max_active, 1, lim);
 }
 
 struct workqueue_struct *__alloc_workqueue_key(const char *name,
@@ -2642,8 +2731,15 @@ struct workqueue_struct *__alloc_workqueue_key(const char *name,
 	struct workqueue_struct *wq;
 	unsigned int cpu;
 
+	/*
+	 * Unbound workqueues aren't concurrency managed and should be
+	 * dispatched to workers immediately.
+	 */
+	if (flags & WQ_UNBOUND)
+		flags |= WQ_HIGHPRI;
+
 	max_active = max_active ?: WQ_DFL_ACTIVE;
-	max_active = wq_clamp_max_active(max_active, name);
+	max_active = wq_clamp_max_active(max_active, flags, name);
 
 	wq = kzalloc(sizeof(*wq), GFP_KERNEL);
 	if (!wq)
@@ -2664,7 +2760,7 @@ struct workqueue_struct *__alloc_workqueue_key(const char *name,
 	if (alloc_cwqs(wq) < 0)
 		goto err;
 
-	for_each_possible_cpu(cpu) {
+	for_each_cwq_cpu(cpu, wq) {
 		struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
 		struct global_cwq *gcwq = get_gcwq(cpu);
 
@@ -2703,7 +2799,7 @@ struct workqueue_struct *__alloc_workqueue_key(const char *name,
 	spin_lock(&workqueue_lock);
 
 	if (workqueue_freezing && wq->flags & WQ_FREEZEABLE)
-		for_each_possible_cpu(cpu)
+		for_each_cwq_cpu(cpu, wq)
 			get_cwq(cpu, wq)->max_active = 0;
 
 	list_add(&wq->list, &workqueues);
@@ -2743,7 +2839,7 @@ void destroy_workqueue(struct workqueue_struct *wq)
 	spin_unlock(&workqueue_lock);
 
 	/* sanity check */
-	for_each_possible_cpu(cpu) {
+	for_each_cwq_cpu(cpu, wq) {
 		struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
 		int i;
 
@@ -2777,13 +2873,13 @@ void workqueue_set_max_active(struct workqueue_struct *wq, int max_active)
 {
 	unsigned int cpu;
 
-	max_active = wq_clamp_max_active(max_active, wq->name);
+	max_active = wq_clamp_max_active(max_active, wq->flags, wq->name);
 
 	spin_lock(&workqueue_lock);
 
 	wq->saved_max_active = max_active;
 
-	for_each_possible_cpu(cpu) {
+	for_each_cwq_cpu(cpu, wq) {
 		struct global_cwq *gcwq = get_gcwq(cpu);
 
 		spin_lock_irq(&gcwq->lock);
@@ -3310,7 +3406,7 @@ void freeze_workqueues_begin(void)
 	BUG_ON(workqueue_freezing);
 	workqueue_freezing = true;
 
-	for_each_possible_cpu(cpu) {
+	for_each_gcwq_cpu(cpu) {
 		struct global_cwq *gcwq = get_gcwq(cpu);
 		struct workqueue_struct *wq;
 
@@ -3322,7 +3418,7 @@ void freeze_workqueues_begin(void)
 		list_for_each_entry(wq, &workqueues, list) {
 			struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
 
-			if (wq->flags & WQ_FREEZEABLE)
+			if (cwq && wq->flags & WQ_FREEZEABLE)
 				cwq->max_active = 0;
 		}
 
@@ -3354,7 +3450,7 @@ bool freeze_workqueues_busy(void)
 
 	BUG_ON(!workqueue_freezing);
 
-	for_each_possible_cpu(cpu) {
+	for_each_gcwq_cpu(cpu) {
 		struct workqueue_struct *wq;
 		/*
 		 * nr_active is monotonically decreasing.  It's safe
@@ -3363,7 +3459,7 @@ bool freeze_workqueues_busy(void)
 		list_for_each_entry(wq, &workqueues, list) {
 			struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
 
-			if (!(wq->flags & WQ_FREEZEABLE))
+			if (!cwq || !(wq->flags & WQ_FREEZEABLE))
 				continue;
 
 			BUG_ON(cwq->nr_active < 0);
@@ -3396,7 +3492,7 @@ void thaw_workqueues(void)
 	if (!workqueue_freezing)
 		goto out_unlock;
 
-	for_each_possible_cpu(cpu) {
+	for_each_gcwq_cpu(cpu) {
 		struct global_cwq *gcwq = get_gcwq(cpu);
 		struct workqueue_struct *wq;
 
@@ -3408,7 +3504,7 @@ void thaw_workqueues(void)
 		list_for_each_entry(wq, &workqueues, list) {
 			struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
 
-			if (!(wq->flags & WQ_FREEZEABLE))
+			if (!cwq || !(wq->flags & WQ_FREEZEABLE))
 				continue;
 
 			/* restore max_active and repopulate worklist */
@@ -3451,12 +3547,14 @@ void __init init_workqueues(void)
 	hotcpu_notifier(workqueue_cpu_callback, CPU_PRI_WORKQUEUE);
 
 	/* initialize gcwqs */
-	for_each_possible_cpu(cpu) {
+	for_each_gcwq_cpu(cpu) {
 		struct global_cwq *gcwq = get_gcwq(cpu);
 
 		spin_lock_init(&gcwq->lock);
 		INIT_LIST_HEAD(&gcwq->worklist);
 		gcwq->cpu = cpu;
+		if (cpu == WORK_CPU_UNBOUND)
+			gcwq->flags |= GCWQ_DISASSOCIATED;
 
 		INIT_LIST_HEAD(&gcwq->idle_list);
 		for (i = 0; i < BUSY_WORKER_HASH_SIZE; i++)
@@ -3476,7 +3574,7 @@ void __init init_workqueues(void)
 	}
 
 	/* create the initial worker */
-	for_each_online_cpu(cpu) {
+	for_each_online_gcwq_cpu(cpu) {
 		struct global_cwq *gcwq = get_gcwq(cpu);
 		struct worker *worker;
 
@@ -3490,5 +3588,7 @@ void __init init_workqueues(void)
 	system_wq = alloc_workqueue("events", 0, 0);
 	system_long_wq = alloc_workqueue("events_long", 0, 0);
 	system_nrt_wq = alloc_workqueue("events_nrt", WQ_NON_REENTRANT, 0);
+	system_unbound_wq = alloc_workqueue("events_unbound", WQ_UNBOUND,
+					    WQ_UNBOUND_MAX_ACTIVE);
 	BUG_ON(!system_wq || !system_long_wq || !system_nrt_wq);
 }
-- 
cgit v1.2.3-70-g09d2


From c7fc77f78f16d138ca997ce096a62f46e2e9420a Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 2 Jul 2010 10:03:51 +0200
Subject: workqueue: remove WQ_SINGLE_CPU and use WQ_UNBOUND instead

WQ_SINGLE_CPU combined with @max_active of 1 is used to achieve full
ordering among works queued to a workqueue.  The same can be achieved
using WQ_UNBOUND as unbound workqueues always use the gcwq for
WORK_CPU_UNBOUND.  As @max_active is always one and benefits from cpu
locality isn't accessible anyway, serving them with unbound workqueues
should be fine.

Drop WQ_SINGLE_CPU support and use WQ_UNBOUND instead.  Note that most
single thread workqueue users will be converted to use multithread or
non-reentrant instead and only the ones which require strict ordering
will keep using WQ_UNBOUND + @max_active of 1.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h |   7 ++--
 kernel/workqueue.c        | 100 +++++++++-------------------------------------
 2 files changed, 21 insertions(+), 86 deletions(-)

(limited to 'include')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 67ce734747f..d74a529ed13 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -233,12 +233,11 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; }
 
 enum {
 	WQ_NON_REENTRANT	= 1 << 0, /* guarantee non-reentrance */
-	WQ_SINGLE_CPU		= 1 << 1, /* only single cpu at a time */
+	WQ_UNBOUND		= 1 << 1, /* not bound to any cpu */
 	WQ_FREEZEABLE		= 1 << 2, /* freeze during suspend */
 	WQ_RESCUER		= 1 << 3, /* has an rescue worker */
 	WQ_HIGHPRI		= 1 << 4, /* high priority */
 	WQ_CPU_INTENSIVE	= 1 << 5, /* cpu instensive workqueue */
-	WQ_UNBOUND		= 1 << 6, /* not bound to any cpu */
 
 	WQ_MAX_ACTIVE		= 512,	  /* I like 512, better ideas? */
 	WQ_MAX_UNBOUND_PER_CPU	= 4,	  /* 4 * #cpus for unbound wq */
@@ -300,9 +299,9 @@ __alloc_workqueue_key(const char *name, unsigned int flags, int max_active,
 #define create_workqueue(name)					\
 	alloc_workqueue((name), WQ_RESCUER, 1)
 #define create_freezeable_workqueue(name)			\
-	alloc_workqueue((name), WQ_FREEZEABLE | WQ_SINGLE_CPU | WQ_RESCUER, 1)
+	alloc_workqueue((name), WQ_FREEZEABLE | WQ_UNBOUND | WQ_RESCUER, 1)
 #define create_singlethread_workqueue(name)			\
-	alloc_workqueue((name), WQ_SINGLE_CPU | WQ_RESCUER, 1)
+	alloc_workqueue((name), WQ_UNBOUND | WQ_RESCUER, 1)
 
 extern void destroy_workqueue(struct workqueue_struct *wq);
 
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 4608563cdd6..20d6237d749 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -206,8 +206,6 @@ struct workqueue_struct {
 	struct list_head	flusher_queue;	/* F: flush waiters */
 	struct list_head	flusher_overflow; /* F: flush overflow list */
 
-	unsigned long		single_cpu;	/* cpu for single cpu wq */
-
 	cpumask_var_t		mayday_mask;	/* cpus requesting rescue */
 	struct worker		*rescuer;	/* I: rescue worker */
 
@@ -889,34 +887,6 @@ static void insert_work(struct cpu_workqueue_struct *cwq,
 		wake_up_worker(gcwq);
 }
 
-/**
- * cwq_unbind_single_cpu - unbind cwq from single cpu workqueue processing
- * @cwq: cwq to unbind
- *
- * Try to unbind @cwq from single cpu workqueue processing.  If
- * @cwq->wq is frozen, unbind is delayed till the workqueue is thawed.
- *
- * CONTEXT:
- * spin_lock_irq(gcwq->lock).
- */
-static void cwq_unbind_single_cpu(struct cpu_workqueue_struct *cwq)
-{
-	struct workqueue_struct *wq = cwq->wq;
-	struct global_cwq *gcwq = cwq->gcwq;
-
-	BUG_ON(wq->single_cpu != gcwq->cpu);
-	/*
-	 * Unbind from workqueue if @cwq is not frozen.  If frozen,
-	 * thaw_workqueues() will either restart processing on this
-	 * cpu or unbind if empty.  This keeps works queued while
-	 * frozen fully ordered and flushable.
-	 */
-	if (likely(!(gcwq->flags & GCWQ_FREEZING))) {
-		smp_wmb();	/* paired with cmpxchg() in __queue_work() */
-		wq->single_cpu = WORK_CPU_NONE;
-	}
-}
-
 static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
 			 struct work_struct *work)
 {
@@ -924,20 +894,16 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
 	struct cpu_workqueue_struct *cwq;
 	struct list_head *worklist;
 	unsigned long flags;
-	bool arbitrate;
 
 	debug_work_activate(work);
 
-	if (unlikely(cpu == WORK_CPU_UNBOUND))
-		cpu = raw_smp_processor_id();
-
-	/*
-	 * Determine gcwq to use.  SINGLE_CPU is inherently
-	 * NON_REENTRANT, so test it first.
-	 */
-	if (!(wq->flags & (WQ_SINGLE_CPU | WQ_UNBOUND))) {
+	/* determine gcwq to use */
+	if (!(wq->flags & WQ_UNBOUND)) {
 		struct global_cwq *last_gcwq;
 
+		if (unlikely(cpu == WORK_CPU_UNBOUND))
+			cpu = raw_smp_processor_id();
+
 		/*
 		 * It's multi cpu.  If @wq is non-reentrant and @work
 		 * was previously on a different cpu, it might still
@@ -962,38 +928,6 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
 			}
 		} else
 			spin_lock_irqsave(&gcwq->lock, flags);
-	} else if (!(wq->flags & WQ_UNBOUND)) {
-		unsigned int req_cpu = cpu;
-
-		/*
-		 * It's a bit more complex for single cpu workqueues.
-		 * We first need to determine which cpu is going to be
-		 * used.  If no cpu is currently serving this
-		 * workqueue, arbitrate using atomic accesses to
-		 * wq->single_cpu; otherwise, use the current one.
-		 */
-	retry:
-		cpu = wq->single_cpu;
-		arbitrate = cpu == WORK_CPU_NONE;
-		if (arbitrate)
-			cpu = req_cpu;
-
-		gcwq = get_gcwq(cpu);
-		spin_lock_irqsave(&gcwq->lock, flags);
-
-		/*
-		 * The following cmpxchg() is a full barrier paired
-		 * with smp_wmb() in cwq_unbind_single_cpu() and
-		 * guarantees that all changes to wq->st_* fields are
-		 * visible on the new cpu after this point.
-		 */
-		if (arbitrate)
-			cmpxchg(&wq->single_cpu, WORK_CPU_NONE, cpu);
-
-		if (unlikely(wq->single_cpu != cpu)) {
-			spin_unlock_irqrestore(&gcwq->lock, flags);
-			goto retry;
-		}
 	} else {
 		gcwq = get_gcwq(WORK_CPU_UNBOUND);
 		spin_lock_irqsave(&gcwq->lock, flags);
@@ -1105,19 +1039,30 @@ int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
 	struct work_struct *work = &dwork->work;
 
 	if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
-		struct global_cwq *gcwq = get_work_gcwq(work);
-		unsigned int lcpu = gcwq ? gcwq->cpu : raw_smp_processor_id();
+		unsigned int lcpu;
 
 		BUG_ON(timer_pending(timer));
 		BUG_ON(!list_empty(&work->entry));
 
 		timer_stats_timer_set_start_info(&dwork->timer);
+
 		/*
 		 * This stores cwq for the moment, for the timer_fn.
 		 * Note that the work's gcwq is preserved to allow
 		 * reentrance detection for delayed works.
 		 */
+		if (!(wq->flags & WQ_UNBOUND)) {
+			struct global_cwq *gcwq = get_work_gcwq(work);
+
+			if (gcwq && gcwq->cpu != WORK_CPU_UNBOUND)
+				lcpu = gcwq->cpu;
+			else
+				lcpu = raw_smp_processor_id();
+		} else
+			lcpu = WORK_CPU_UNBOUND;
+
 		set_work_cwq(work, get_cwq(lcpu, wq), 0);
+
 		timer->expires = jiffies + delay;
 		timer->data = (unsigned long)dwork;
 		timer->function = delayed_work_timer_fn;
@@ -1696,9 +1641,6 @@ static void cwq_dec_nr_in_flight(struct cpu_workqueue_struct *cwq, int color)
 		/* one down, submit a delayed one */
 		if (cwq->nr_active < cwq->max_active)
 			cwq_activate_first_delayed(cwq);
-	} else if (!cwq->nr_active && cwq->wq->flags & WQ_SINGLE_CPU) {
-		/* this was the last work, unbind from single cpu */
-		cwq_unbind_single_cpu(cwq);
 	}
 
 	/* is flush in progress and are we at the flushing tip? */
@@ -2751,7 +2693,6 @@ struct workqueue_struct *__alloc_workqueue_key(const char *name,
 	atomic_set(&wq->nr_cwqs_to_flush, 0);
 	INIT_LIST_HEAD(&wq->flusher_queue);
 	INIT_LIST_HEAD(&wq->flusher_overflow);
-	wq->single_cpu = WORK_CPU_NONE;
 
 	wq->name = name;
 	lockdep_init_map(&wq->lockdep_map, lock_name, key, 0);
@@ -3513,11 +3454,6 @@ void thaw_workqueues(void)
 			while (!list_empty(&cwq->delayed_works) &&
 			       cwq->nr_active < cwq->max_active)
 				cwq_activate_first_delayed(cwq);
-
-			/* perform delayed unbind from single cpu if empty */
-			if (wq->single_cpu == gcwq->cpu &&
-			    !cwq->nr_active && list_empty(&cwq->delayed_works))
-				cwq_unbind_single_cpu(cwq);
 		}
 
 		wake_up_worker(gcwq);
-- 
cgit v1.2.3-70-g09d2


From 0f622bf465e78c390e13c5f4a14d0b3f8fb7c7e5 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Thu, 1 Jul 2010 09:01:50 -0700
Subject: Input: ads7846 - do not allow altering platform data

Tested-by: Anatolij Gustschin <agust@denx.de>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/touchscreen/ads7846.c | 35 +++++++++++++++++++----------------
 include/linux/spi/ads7846.h         |  2 +-
 2 files changed, 20 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/drivers/input/touchscreen/ads7846.c b/drivers/input/touchscreen/ads7846.c
index 69210cb56c5..a3771607ead 100644
--- a/drivers/input/touchscreen/ads7846.c
+++ b/drivers/input/touchscreen/ads7846.c
@@ -878,14 +878,15 @@ static int __devinit setup_pendown(struct spi_device *spi, struct ads7846 *ts)
 
 static int __devinit ads7846_probe(struct spi_device *spi)
 {
-	struct ads7846			*ts;
-	struct ads7846_packet		*packet;
-	struct input_dev		*input_dev;
-	struct ads7846_platform_data	*pdata = spi->dev.platform_data;
-	struct spi_message		*m;
-	struct spi_transfer		*x;
-	int				vref;
-	int				err;
+	struct ads7846 *ts;
+	struct ads7846_packet *packet;
+	struct input_dev *input_dev;
+	const struct ads7846_platform_data *pdata = spi->dev.platform_data;
+	struct spi_message *m;
+	struct spi_transfer *x;
+	unsigned long irq_flags;
+	int vref;
+	int err;
 
 	if (!spi->irq) {
 		dev_dbg(&spi->dev, "no IRQ?\n");
@@ -1174,20 +1175,22 @@ static int __devinit ads7846_probe(struct spi_device *spi)
 		goto err_put_regulator;
 	}
 
-	if (!pdata->irq_flags)
-		pdata->irq_flags = IRQF_TRIGGER_FALLING;
+	irq_flags = pdata->irq_flags ? : IRQF_TRIGGER_FALLING;
 
-	if (request_irq(spi->irq, ads7846_irq, pdata->irq_flags,
-			spi->dev.driver->name, ts)) {
+	err = request_irq(spi->irq, ads7846_irq, irq_flags,
+			  spi->dev.driver->name, ts);
+
+	if (err && !pdata->irq_flags) {
 		dev_info(&spi->dev,
 			"trying pin change workaround on irq %d\n", spi->irq);
 		err = request_irq(spi->irq, ads7846_irq,
 				  IRQF_TRIGGER_FALLING | IRQF_TRIGGER_RISING,
 				  spi->dev.driver->name, ts);
-		if (err) {
-			dev_dbg(&spi->dev, "irq %d busy?\n", spi->irq);
-			goto err_disable_regulator;
-		}
+	}
+
+	if (err) {
+		dev_dbg(&spi->dev, "irq %d busy?\n", spi->irq);
+		goto err_disable_regulator;
 	}
 
 	err = ads784x_hwmon_register(spi, ts);
diff --git a/include/linux/spi/ads7846.h b/include/linux/spi/ads7846.h
index 95d36bfb34b..92bd0839d5b 100644
--- a/include/linux/spi/ads7846.h
+++ b/include/linux/spi/ads7846.h
@@ -48,7 +48,7 @@ struct ads7846_platform_data {
 					 * state if get_pendown_state == NULL
 					 */
 	int	(*get_pendown_state)(void);
-	int	(*filter_init)	(struct ads7846_platform_data *pdata,
+	int	(*filter_init)	(const struct ads7846_platform_data *pdata,
 				 void **filter_data);
 	int	(*filter)	(void *filter_data, int data_idx, int *val);
 	void	(*filter_cleanup)(void *filter_data);
-- 
cgit v1.2.3-70-g09d2


From 312e8e8a9e2471b0ada7366497fffb3ff1a40e2c Mon Sep 17 00:00:00 2001
From: Joonyoung Shim <jy0922.shim@samsung.com>
Date: Sun, 4 Jul 2010 01:21:25 -0700
Subject: Input: mcs - Add MCS touchkey driver

This adds support for MELPAS MCS5000/MSC5080 touch key controllers.

Signed-off-by: Joonyoung Shim <jy0922.shim@samsung.com>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/keyboard/Kconfig         |  12 ++
 drivers/input/keyboard/Makefile        |   1 +
 drivers/input/keyboard/mcs_touchkey.c  | 239 +++++++++++++++++++++++++++++++++
 drivers/input/touchscreen/mcs5000_ts.c |   6 +-
 include/linux/i2c/mcs.h                |  34 +++++
 include/linux/i2c/mcs5000_ts.h         |  24 ----
 6 files changed, 289 insertions(+), 27 deletions(-)
 create mode 100644 drivers/input/keyboard/mcs_touchkey.c
 create mode 100644 include/linux/i2c/mcs.h
 delete mode 100644 include/linux/i2c/mcs5000_ts.h

(limited to 'include')

diff --git a/drivers/input/keyboard/Kconfig b/drivers/input/keyboard/Kconfig
index d8fa5d724c5..c7480934cee 100644
--- a/drivers/input/keyboard/Kconfig
+++ b/drivers/input/keyboard/Kconfig
@@ -297,6 +297,18 @@ config KEYBOARD_MAX7359
 	  To compile this driver as a module, choose M here: the
 	  module will be called max7359_keypad.
 
+config KEYBOARD_MCS
+	tristate "MELFAS MCS Touchkey"
+	depends on I2C
+	help
+	  Say Y here if you have the MELFAS MCS5000/5080 touchkey controller
+	  chip in your system.
+
+	  If unsure, say N.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called mcs_touchkey.
+
 config KEYBOARD_IMX
 	tristate "IMX keypad support"
 	depends on ARCH_MXC
diff --git a/drivers/input/keyboard/Makefile b/drivers/input/keyboard/Makefile
index 4596d0c6f92..0a16674ed3d 100644
--- a/drivers/input/keyboard/Makefile
+++ b/drivers/input/keyboard/Makefile
@@ -26,6 +26,7 @@ obj-$(CONFIG_KEYBOARD_LOCOMO)		+= locomokbd.o
 obj-$(CONFIG_KEYBOARD_MAPLE)		+= maple_keyb.o
 obj-$(CONFIG_KEYBOARD_MATRIX)		+= matrix_keypad.o
 obj-$(CONFIG_KEYBOARD_MAX7359)		+= max7359_keypad.o
+obj-$(CONFIG_KEYBOARD_MCS)		+= mcs_touchkey.o
 obj-$(CONFIG_KEYBOARD_NEWTON)		+= newtonkbd.o
 obj-$(CONFIG_KEYBOARD_OMAP)		+= omap-keypad.o
 obj-$(CONFIG_KEYBOARD_OPENCORES)	+= opencores-kbd.o
diff --git a/drivers/input/keyboard/mcs_touchkey.c b/drivers/input/keyboard/mcs_touchkey.c
new file mode 100644
index 00000000000..63b849d7e90
--- /dev/null
+++ b/drivers/input/keyboard/mcs_touchkey.c
@@ -0,0 +1,239 @@
+/*
+ * mcs_touchkey.c - Touchkey driver for MELFAS MCS5000/5080 controller
+ *
+ * Copyright (C) 2010 Samsung Electronics Co.Ltd
+ * Author: HeungJun Kim <riverful.kim@samsung.com>
+ * Author: Joonyoung Shim <jy0922.shim@samsung.com>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/i2c.h>
+#include <linux/i2c/mcs.h>
+#include <linux/interrupt.h>
+#include <linux/input.h>
+#include <linux/irq.h>
+#include <linux/slab.h>
+
+/* MCS5000 Touchkey */
+#define MCS5000_TOUCHKEY_STATUS		0x04
+#define MCS5000_TOUCHKEY_STATUS_PRESS	7
+#define MCS5000_TOUCHKEY_FW		0x0a
+#define MCS5000_TOUCHKEY_BASE_VAL	0x61
+
+/* MCS5080 Touchkey */
+#define MCS5080_TOUCHKEY_STATUS		0x00
+#define MCS5080_TOUCHKEY_STATUS_PRESS	3
+#define MCS5080_TOUCHKEY_FW		0x01
+#define MCS5080_TOUCHKEY_BASE_VAL	0x1
+
+enum mcs_touchkey_type {
+	MCS5000_TOUCHKEY,
+	MCS5080_TOUCHKEY,
+};
+
+struct mcs_touchkey_chip {
+	unsigned int status_reg;
+	unsigned int pressbit;
+	unsigned int press_invert;
+	unsigned int baseval;
+};
+
+struct mcs_touchkey_data {
+	struct i2c_client *client;
+	struct input_dev *input_dev;
+	struct mcs_touchkey_chip chip;
+	unsigned int key_code;
+	unsigned int key_val;
+	unsigned short keycodes[];
+};
+
+static irqreturn_t mcs_touchkey_interrupt(int irq, void *dev_id)
+{
+	struct mcs_touchkey_data *data = dev_id;
+	struct mcs_touchkey_chip *chip = &data->chip;
+	struct i2c_client *client = data->client;
+	struct input_dev *input = data->input_dev;
+	unsigned int key_val;
+	unsigned int pressed;
+	int val;
+
+	val = i2c_smbus_read_byte_data(client, chip->status_reg);
+	if (val < 0) {
+		dev_err(&client->dev, "i2c read error [%d]\n", val);
+		goto out;
+	}
+
+	pressed = (val & (1 << chip->pressbit)) >> chip->pressbit;
+	if (chip->press_invert)
+		pressed ^= chip->press_invert;
+
+	/* key_val is 0 when released, so we should use key_val of press. */
+	if (pressed) {
+		key_val = val & (0xff >> (8 - chip->pressbit));
+		if (!key_val)
+			goto out;
+		key_val -= chip->baseval;
+		data->key_code = data->keycodes[key_val];
+		data->key_val = key_val;
+	}
+
+	input_event(input, EV_MSC, MSC_SCAN, data->key_val);
+	input_report_key(input, data->key_code, pressed);
+	input_sync(input);
+
+	dev_dbg(&client->dev, "key %d %d %s\n", data->key_val, data->key_code,
+		pressed ? "pressed" : "released");
+
+ out:
+	return IRQ_HANDLED;
+}
+
+static int __devinit mcs_touchkey_probe(struct i2c_client *client,
+		const struct i2c_device_id *id)
+{
+	const struct mcs_platform_data *pdata;
+	struct mcs_touchkey_data *data;
+	struct input_dev *input_dev;
+	unsigned int fw_reg;
+	int fw_ver;
+	int error;
+	int i;
+
+	pdata = client->dev.platform_data;
+	if (!pdata) {
+		dev_err(&client->dev, "no platform data defined\n");
+		return -EINVAL;
+	}
+
+	data = kzalloc(sizeof(struct mcs_touchkey_data) +
+			sizeof(data->keycodes[0]) * (pdata->key_maxval + 1),
+			GFP_KERNEL);
+	input_dev = input_allocate_device();
+	if (!data || !input_dev) {
+		dev_err(&client->dev, "Failed to allocate memory\n");
+		error = -ENOMEM;
+		goto err_free_mem;
+	}
+
+	data->client = client;
+	data->input_dev = input_dev;
+
+	if (id->driver_data == MCS5000_TOUCHKEY) {
+		data->chip.status_reg = MCS5000_TOUCHKEY_STATUS;
+		data->chip.pressbit = MCS5000_TOUCHKEY_STATUS_PRESS;
+		data->chip.baseval = MCS5000_TOUCHKEY_BASE_VAL;
+		fw_reg = MCS5000_TOUCHKEY_FW;
+	} else {
+		data->chip.status_reg = MCS5080_TOUCHKEY_STATUS;
+		data->chip.pressbit = MCS5080_TOUCHKEY_STATUS_PRESS;
+		data->chip.press_invert = 1;
+		data->chip.baseval = MCS5080_TOUCHKEY_BASE_VAL;
+		fw_reg = MCS5080_TOUCHKEY_FW;
+	}
+
+	fw_ver = i2c_smbus_read_byte_data(client, fw_reg);
+	if (fw_ver < 0) {
+		error = fw_ver;
+		dev_err(&client->dev, "i2c read error[%d]\n", error);
+		goto err_free_mem;
+	}
+	dev_info(&client->dev, "Firmware version: %d\n", fw_ver);
+
+	input_dev->name = "MELPAS MCS Touchkey";
+	input_dev->id.bustype = BUS_I2C;
+	input_dev->dev.parent = &client->dev;
+	input_dev->evbit[0] = BIT_MASK(EV_KEY);
+	if (!pdata->no_autorepeat)
+		input_dev->evbit[0] |= BIT_MASK(EV_REP);
+	input_dev->keycode = data->keycodes;
+	input_dev->keycodesize = sizeof(data->keycodes[0]);
+	input_dev->keycodemax = pdata->key_maxval + 1;
+
+	for (i = 0; i < pdata->keymap_size; i++) {
+		unsigned int val = MCS_KEY_VAL(pdata->keymap[i]);
+		unsigned int code = MCS_KEY_CODE(pdata->keymap[i]);
+
+		data->keycodes[val] = code;
+		__set_bit(code, input_dev->keybit);
+	}
+
+	input_set_capability(input_dev, EV_MSC, MSC_SCAN);
+	input_set_drvdata(input_dev, data);
+
+	if (pdata->cfg_pin)
+		pdata->cfg_pin();
+
+	error = request_threaded_irq(client->irq, NULL, mcs_touchkey_interrupt,
+			IRQF_TRIGGER_FALLING, client->dev.driver->name, data);
+	if (error) {
+		dev_err(&client->dev, "Failed to register interrupt\n");
+		goto err_free_mem;
+	}
+
+	error = input_register_device(input_dev);
+	if (error)
+		goto err_free_irq;
+
+	i2c_set_clientdata(client, data);
+	return 0;
+
+err_free_irq:
+	free_irq(client->irq, data);
+err_free_mem:
+	input_free_device(input_dev);
+	kfree(data);
+	return error;
+}
+
+static int __devexit mcs_touchkey_remove(struct i2c_client *client)
+{
+	struct mcs_touchkey_data *data = i2c_get_clientdata(client);
+
+	free_irq(client->irq, data);
+	input_unregister_device(data->input_dev);
+	kfree(data);
+
+	return 0;
+}
+
+static const struct i2c_device_id mcs_touchkey_id[] = {
+	{ "mcs5000_touchkey", MCS5000_TOUCHKEY },
+	{ "mcs5080_touchkey", MCS5080_TOUCHKEY },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, mcs_touchkey_id);
+
+static struct i2c_driver mcs_touchkey_driver = {
+	.driver = {
+		.name	= "mcs_touchkey",
+		.owner	= THIS_MODULE,
+	},
+	.probe		= mcs_touchkey_probe,
+	.remove		= __devexit_p(mcs_touchkey_remove),
+	.id_table	= mcs_touchkey_id,
+};
+
+static int __init mcs_touchkey_init(void)
+{
+	return i2c_add_driver(&mcs_touchkey_driver);
+}
+
+static void __exit mcs_touchkey_exit(void)
+{
+	i2c_del_driver(&mcs_touchkey_driver);
+}
+
+module_init(mcs_touchkey_init);
+module_exit(mcs_touchkey_exit);
+
+/* Module information */
+MODULE_AUTHOR("Joonyoung Shim <jy0922.shim@samsung.com>");
+MODULE_AUTHOR("HeungJun Kim <riverful.kim@samsung.com>");
+MODULE_DESCRIPTION("Touchkey driver for MELFAS MCS5000/5080 controller");
+MODULE_LICENSE("GPL");
diff --git a/drivers/input/touchscreen/mcs5000_ts.c b/drivers/input/touchscreen/mcs5000_ts.c
index 1fb0c2f06a4..6ee9940aaf5 100644
--- a/drivers/input/touchscreen/mcs5000_ts.c
+++ b/drivers/input/touchscreen/mcs5000_ts.c
@@ -16,7 +16,7 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/i2c.h>
-#include <linux/i2c/mcs5000_ts.h>
+#include <linux/i2c/mcs.h>
 #include <linux/interrupt.h>
 #include <linux/input.h>
 #include <linux/irq.h>
@@ -105,7 +105,7 @@ enum mcs5000_ts_read_offset {
 struct mcs5000_ts_data {
 	struct i2c_client *client;
 	struct input_dev *input_dev;
-	const struct mcs5000_ts_platform_data *platform_data;
+	const struct mcs_platform_data *platform_data;
 };
 
 static irqreturn_t mcs5000_ts_interrupt(int irq, void *dev_id)
@@ -164,7 +164,7 @@ static irqreturn_t mcs5000_ts_interrupt(int irq, void *dev_id)
 
 static void mcs5000_ts_phys_init(struct mcs5000_ts_data *data)
 {
-	const struct mcs5000_ts_platform_data *platform_data =
+	const struct mcs_platform_data *platform_data =
 		data->platform_data;
 	struct i2c_client *client = data->client;
 
diff --git a/include/linux/i2c/mcs.h b/include/linux/i2c/mcs.h
new file mode 100644
index 00000000000..725ae7c313f
--- /dev/null
+++ b/include/linux/i2c/mcs.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (C) 2009 - 2010 Samsung Electronics Co.Ltd
+ * Author: Joonyoung Shim <jy0922.shim@samsung.com>
+ * Author: HeungJun Kim <riverful.kim@samsung.com>
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ *
+ */
+
+#ifndef __LINUX_MCS_H
+#define __LINUX_MCS_H
+
+#define MCS_KEY_MAP(v, c)	((((v) & 0xff) << 16) | ((c) & 0xffff))
+#define MCS_KEY_VAL(v)		(((v) >> 16) & 0xff)
+#define MCS_KEY_CODE(v)		((v) & 0xffff)
+
+struct mcs_platform_data {
+	void (*cfg_pin)(void);
+
+	/* touchscreen */
+	unsigned int x_size;
+	unsigned int y_size;
+
+	/* touchkey */
+	const u32 *keymap;
+	unsigned int keymap_size;
+	unsigned int key_maxval;
+	bool no_autorepeat;
+};
+
+#endif	/* __LINUX_MCS_H */
diff --git a/include/linux/i2c/mcs5000_ts.h b/include/linux/i2c/mcs5000_ts.h
deleted file mode 100644
index 5a117b5ca15..00000000000
--- a/include/linux/i2c/mcs5000_ts.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * mcs5000_ts.h
- *
- * Copyright (C) 2009 Samsung Electronics Co.Ltd
- * Author: Joonyoung Shim <jy0922.shim@samsung.com>
- *
- *  This program is free software; you can redistribute  it and/or modify it
- *  under  the terms of  the GNU General  Public License as published by the
- *  Free Software Foundation;  either version 2 of the  License, or (at your
- *  option) any later version.
- *
- */
-
-#ifndef __LINUX_MCS5000_TS_H
-#define __LINUX_MCS5000_TS_H
-
-/* platform data for the MELFAS MCS-5000 touchscreen driver */
-struct mcs5000_ts_platform_data {
-	void (*cfg_pin)(void);
-	int x_size;
-	int y_size;
-};
-
-#endif	/* __LINUX_MCS5000_TS_H */
-- 
cgit v1.2.3-70-g09d2


From 7adde04a2f5a798f04a556dfb3b69bff388e5dc4 Mon Sep 17 00:00:00 2001
From: Xiaotian Feng <dfeng@redhat.com>
Date: Wed, 30 Jun 2010 17:57:22 +0800
Subject: slab: fix caller tracking on !CONFIG_DEBUG_SLAB && CONFIG_TRACING

In slab, all __xxx_track_caller is defined on CONFIG_DEBUG_SLAB || CONFIG_TRACING,
thus caller tracking function should be worked for CONFIG_TRACING. But if
CONFIG_DEBUG_SLAB is not set, include/linux/slab.h will define xxx_track_caller to
__xxx() without consideration of CONFIG_TRACING. This will break the caller tracking
behaviour then.

Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Matt Mackall <mpm@selenic.com>
Cc: Vegard Nossum <vegard.nossum@gmail.com>
Cc: Dmitry Monakhov <dmonakhov@openvz.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: David Rientjes <rientjes@google.com>
Signed-off-by: Xiaotian Feng <dfeng@redhat.com>
Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>
---
 include/linux/slab.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/slab.h b/include/linux/slab.h
index 49d1247cd6d..59260e21bdf 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -268,7 +268,8 @@ static inline void *kmem_cache_alloc_node(struct kmem_cache *cachep,
  * allocator where we care about the real place the memory allocation
  * request comes from.
  */
-#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_SLUB)
+#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_SLUB) || \
+	(defined(CONFIG_SLAB) && defined(CONFIG_TRACING))
 extern void *__kmalloc_track_caller(size_t, gfp_t, unsigned long);
 #define kmalloc_track_caller(size, flags) \
 	__kmalloc_track_caller(size, flags, _RET_IP_)
@@ -286,7 +287,8 @@ extern void *__kmalloc_track_caller(size_t, gfp_t, unsigned long);
  * standard allocator where we care about the real place the memory
  * allocation request comes from.
  */
-#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_SLUB)
+#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_SLUB) || \
+	(defined(CONFIG_SLAB) && defined(CONFIG_TRACING))
 extern void *__kmalloc_node_track_caller(size_t, gfp_t, int, unsigned long);
 #define kmalloc_node_track_caller(size, flags, node) \
 	__kmalloc_node_track_caller(size, flags, node, \
-- 
cgit v1.2.3-70-g09d2


From 7db6f5fb65a82af03229eef104dc9899c5eecf33 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Sun, 27 Jun 2010 01:02:33 +0000
Subject: vsprintf: Recursive vsnprintf: Add "%pV", struct va_format

Add the ability to print a format and va_list from a structure pointer

Allows __dev_printk to be implemented as a single printk while
minimizing string space duplication.

%pV should not be used without some mechanism to verify the
format and argument use ala __attribute__(format (printf(...))).

Signed-off-by: Joe Perches <joe@perches.com>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/kernel.h | 5 +++++
 lib/vsprintf.c         | 9 +++++++++
 2 files changed, 14 insertions(+)

(limited to 'include')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 8317ec4b9f3..01dfc05ef4a 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -171,6 +171,11 @@ static inline void might_fault(void)
 }
 #endif
 
+struct va_format {
+	const char *fmt;
+	va_list *va;
+};
+
 extern struct atomic_notifier_head panic_notifier_list;
 extern long (*panic_blink)(long time);
 NORET_TYPE void panic(const char * fmt, ...)
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index b8a2f549ab0..4ee19d0d391 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -980,6 +980,11 @@ char *uuid_string(char *buf, char *end, const u8 *addr,
  *             [0][1][2][3]-[4][5]-[6][7]-[8][9]-[10][11][12][13][14][15]
  *           little endian output byte order is:
  *             [3][2][1][0]-[5][4]-[7][6]-[8][9]-[10][11][12][13][14][15]
+ * - 'V' For a struct va_format which contains a format string * and va_list *,
+ *       call vsnprintf(->format, *->va_list).
+ *       Implements a "recursive vsnprintf".
+ *       Do not use this feature without some mechanism to verify the
+ *       correctness of the format string and va_list arguments.
  *
  * Note: The difference between 'S' and 'F' is that on ia64 and ppc64
  * function pointers are really function descriptors, which contain a
@@ -1025,6 +1030,10 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr,
 		break;
 	case 'U':
 		return uuid_string(buf, end, ptr, spec, fmt);
+	case 'V':
+		return buf + vsnprintf(buf, end - buf,
+				       ((struct va_format *)ptr)->fmt,
+				       *(((struct va_format *)ptr)->va));
 	}
 	spec.flags |= SMALL;
 	if (spec.field_width == -1) {
-- 
cgit v1.2.3-70-g09d2


From 99bcf217183e02ebae46373896fba7f12d588001 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Sun, 27 Jun 2010 01:02:34 +0000
Subject: device.h drivers/base/core.c Convert dev_<level> logging macros to
 functions

Reduces an x86 defconfig text and data ~55k, .6% smaller.

$ size vmlinux*
   text	   data	    bss	    dec	    hex	filename
7205273	 716016	1366288	9287577	 8db799	vmlinux
7258890	 719768	1366288	9344946	 8e97b2	vmlinux.master

Uses %pV and struct va_format
Format arguments are verified before printk

The dev_info macro is converted to _dev_info because there are
existing uses of variables named dev_info in the kernel tree
like drivers/net/pcmcia/pcnet_cs.c

A dev_info macro is created to call _dev_info

Signed-off-by: Joe Perches <joe@perches.com>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/base/core.c    |  64 ++++++++++++++++++++++++++++
 include/linux/device.h | 112 +++++++++++++++++++++++++++++++++++++------------
 2 files changed, 150 insertions(+), 26 deletions(-)

(limited to 'include')

diff --git a/drivers/base/core.c b/drivers/base/core.c
index 9630fbdf4e6..38bbbd02930 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -1819,3 +1819,67 @@ void device_shutdown(void)
 	spin_unlock(&devices_kset->list_lock);
 	async_synchronize_full();
 }
+
+/*
+ * Device logging functions
+ */
+
+#ifdef CONFIG_PRINTK
+
+static int __dev_printk(const char *level, const struct device *dev,
+			struct va_format *vaf)
+{
+	if (!dev)
+		return printk("%s(NULL device *): %pV", level, vaf);
+
+	return printk("%s%s %s: %pV",
+		      level, dev_driver_string(dev), dev_name(dev), vaf);
+}
+
+int dev_printk(const char *level, const struct device *dev,
+	       const char *fmt, ...)
+{
+	struct va_format vaf;
+	va_list args;
+	int r;
+
+	va_start(args, fmt);
+
+	vaf.fmt = fmt;
+	vaf.va = &args;
+
+	r = __dev_printk(level, dev, &vaf);
+	va_end(args);
+
+	return r;
+}
+EXPORT_SYMBOL(dev_printk);
+
+#define define_dev_printk_level(func, kern_level)		\
+int func(const struct device *dev, const char *fmt, ...)	\
+{								\
+	struct va_format vaf;					\
+	va_list args;						\
+	int r;							\
+								\
+	va_start(args, fmt);					\
+								\
+	vaf.fmt = fmt;						\
+	vaf.va = &args;						\
+								\
+	r = __dev_printk(kern_level, dev, &vaf);		\
+	va_end(args);						\
+								\
+	return r;						\
+}								\
+EXPORT_SYMBOL(func);
+
+define_dev_printk_level(dev_emerg, KERN_EMERG);
+define_dev_printk_level(dev_alert, KERN_ALERT);
+define_dev_printk_level(dev_crit, KERN_CRIT);
+define_dev_printk_level(dev_err, KERN_ERR);
+define_dev_printk_level(dev_warn, KERN_WARNING);
+define_dev_printk_level(dev_notice, KERN_NOTICE);
+define_dev_printk_level(_dev_info, KERN_INFO);
+
+#endif
diff --git a/include/linux/device.h b/include/linux/device.h
index 0713e10571d..6a8276f683b 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -638,43 +638,103 @@ extern void sysdev_shutdown(void);
 
 /* debugging and troubleshooting/diagnostic helpers. */
 extern const char *dev_driver_string(const struct device *dev);
-#define dev_printk(level, dev, format, arg...)	\
-	printk(level "%s %s: " format , dev_driver_string(dev) , \
-	       dev_name(dev) , ## arg)
-
-#define dev_emerg(dev, format, arg...)		\
-	dev_printk(KERN_EMERG , dev , format , ## arg)
-#define dev_alert(dev, format, arg...)		\
-	dev_printk(KERN_ALERT , dev , format , ## arg)
-#define dev_crit(dev, format, arg...)		\
-	dev_printk(KERN_CRIT , dev , format , ## arg)
-#define dev_err(dev, format, arg...)		\
-	dev_printk(KERN_ERR , dev , format , ## arg)
-#define dev_warn(dev, format, arg...)		\
-	dev_printk(KERN_WARNING , dev , format , ## arg)
-#define dev_notice(dev, format, arg...)		\
-	dev_printk(KERN_NOTICE , dev , format , ## arg)
-#define dev_info(dev, format, arg...)		\
-	dev_printk(KERN_INFO , dev , format , ## arg)
+
+
+#ifdef CONFIG_PRINTK
+
+extern int dev_printk(const char *level, const struct device *dev,
+		      const char *fmt, ...)
+	__attribute__ ((format (printf, 3, 4)));
+extern int dev_emerg(const struct device *dev, const char *fmt, ...)
+	__attribute__ ((format (printf, 2, 3)));
+extern int dev_alert(const struct device *dev, const char *fmt, ...)
+	__attribute__ ((format (printf, 2, 3)));
+extern int dev_crit(const struct device *dev, const char *fmt, ...)
+	__attribute__ ((format (printf, 2, 3)));
+extern int dev_err(const struct device *dev, const char *fmt, ...)
+	__attribute__ ((format (printf, 2, 3)));
+extern int dev_warn(const struct device *dev, const char *fmt, ...)
+	__attribute__ ((format (printf, 2, 3)));
+extern int dev_notice(const struct device *dev, const char *fmt, ...)
+	__attribute__ ((format (printf, 2, 3)));
+extern int _dev_info(const struct device *dev, const char *fmt, ...)
+	__attribute__ ((format (printf, 2, 3)));
+
+#else
+
+static inline int dev_printk(const char *level, const struct device *dev,
+		      const char *fmt, ...)
+	__attribute__ ((format (printf, 3, 4)));
+static inline int dev_printk(const char *level, const struct device *dev,
+		      const char *fmt, ...)
+	 { return 0; }
+
+static inline int dev_emerg(const struct device *dev, const char *fmt, ...)
+	__attribute__ ((format (printf, 2, 3)));
+static inline int dev_emerg(const struct device *dev, const char *fmt, ...)
+	{ return 0; }
+static inline int dev_crit(const struct device *dev, const char *fmt, ...)
+	__attribute__ ((format (printf, 2, 3)));
+static inline int dev_crit(const struct device *dev, const char *fmt, ...)
+	{ return 0; }
+static inline int dev_alert(const struct device *dev, const char *fmt, ...)
+	__attribute__ ((format (printf, 2, 3)));
+static inline int dev_alert(const struct device *dev, const char *fmt, ...)
+	{ return 0; }
+static inline int dev_err(const struct device *dev, const char *fmt, ...)
+	__attribute__ ((format (printf, 2, 3)));
+static inline int dev_err(const struct device *dev, const char *fmt, ...)
+	{ return 0; }
+static inline int dev_warn(const struct device *dev, const char *fmt, ...)
+	__attribute__ ((format (printf, 2, 3)));
+static inline int dev_warn(const struct device *dev, const char *fmt, ...)
+	{ return 0; }
+static inline int dev_notice(const struct device *dev, const char *fmt, ...)
+	__attribute__ ((format (printf, 2, 3)));
+static inline int dev_notice(const struct device *dev, const char *fmt, ...)
+	{ return 0; }
+static inline int _dev_info(const struct device *dev, const char *fmt, ...)
+	__attribute__ ((format (printf, 2, 3)));
+static inline int _dev_info(const struct device *dev, const char *fmt, ...)
+	{ return 0; }
+
+#endif
+
+/*
+ * Stupid hackaround for existing uses of non-printk uses dev_info
+ *
+ * Note that the definition of dev_info below is actually _dev_info
+ * and a macro is used to avoid redefining dev_info
+ */
+
+#define dev_info(dev, fmt, arg...) _dev_info(dev, fmt, ##arg)
 
 #if defined(DEBUG)
 #define dev_dbg(dev, format, arg...)		\
-	dev_printk(KERN_DEBUG , dev , format , ## arg)
+	dev_printk(KERN_DEBUG, dev, format, ##arg)
 #elif defined(CONFIG_DYNAMIC_DEBUG)
-#define dev_dbg(dev, format, ...) do { \
+#define dev_dbg(dev, format, ...)		     \
+do {						     \
 	dynamic_dev_dbg(dev, format, ##__VA_ARGS__); \
-	} while (0)
+} while (0)
 #else
-#define dev_dbg(dev, format, arg...)		\
-	({ if (0) dev_printk(KERN_DEBUG, dev, format, ##arg); 0; })
+#define dev_dbg(dev, format, arg...)				\
+({								\
+	if (0)							\
+		dev_printk(KERN_DEBUG, dev, format, ##arg);	\
+	0;							\
+})
 #endif
 
 #ifdef VERBOSE_DEBUG
 #define dev_vdbg	dev_dbg
 #else
-
-#define dev_vdbg(dev, format, arg...)		\
-	({ if (0) dev_printk(KERN_DEBUG, dev, format, ##arg); 0; })
+#define dev_vdbg(dev, format, arg...)				\
+({								\
+	if (0)							\
+		dev_printk(KERN_DEBUG, dev, format, ##arg);	\
+	0;							\
+})
 #endif
 
 /*
-- 
cgit v1.2.3-70-g09d2


From 256df2f3879efdb2e9808bdb1b54b16fbb11fa38 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Sun, 27 Jun 2010 01:02:35 +0000
Subject: netdevice.h net/core/dev.c: Convert netdev_<level> logging macros to
 functions

Reduces an x86 defconfig text and data ~2k.
text is smaller, data is larger.

$ size vmlinux*
   text	   data	    bss	    dec	    hex	filename
7198862	 720112	1366288	9285262	 8dae8e	vmlinux
7205273	 716016	1366288	9287577	 8db799	vmlinux.device_h

Uses %pV and struct va_format
Format arguments are verified before printk

Signed-off-by: Joe Perches <joe@perches.com>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 36 +++++++++++++--------------
 net/core/dev.c            | 62 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 79 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 8fa5e5aa879..0183901ea47 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2250,25 +2250,23 @@ static inline const char *netdev_name(const struct net_device *dev)
 	return dev->name;
 }
 
-#define netdev_printk(level, netdev, format, args...)		\
-	dev_printk(level, (netdev)->dev.parent,			\
-		   "%s: " format,				\
-		   netdev_name(netdev), ##args)
-
-#define netdev_emerg(dev, format, args...)			\
-	netdev_printk(KERN_EMERG, dev, format, ##args)
-#define netdev_alert(dev, format, args...)			\
-	netdev_printk(KERN_ALERT, dev, format, ##args)
-#define netdev_crit(dev, format, args...)			\
-	netdev_printk(KERN_CRIT, dev, format, ##args)
-#define netdev_err(dev, format, args...)			\
-	netdev_printk(KERN_ERR, dev, format, ##args)
-#define netdev_warn(dev, format, args...)			\
-	netdev_printk(KERN_WARNING, dev, format, ##args)
-#define netdev_notice(dev, format, args...)			\
-	netdev_printk(KERN_NOTICE, dev, format, ##args)
-#define netdev_info(dev, format, args...)			\
-	netdev_printk(KERN_INFO, dev, format, ##args)
+extern int netdev_printk(const char *level, const struct net_device *dev,
+			 const char *format, ...)
+	__attribute__ ((format (printf, 3, 4)));
+extern int netdev_emerg(const struct net_device *dev, const char *format, ...)
+	__attribute__ ((format (printf, 2, 3)));
+extern int netdev_alert(const struct net_device *dev, const char *format, ...)
+	__attribute__ ((format (printf, 2, 3)));
+extern int netdev_crit(const struct net_device *dev, const char *format, ...)
+	__attribute__ ((format (printf, 2, 3)));
+extern int netdev_err(const struct net_device *dev, const char *format, ...)
+	__attribute__ ((format (printf, 2, 3)));
+extern int netdev_warn(const struct net_device *dev, const char *format, ...)
+	__attribute__ ((format (printf, 2, 3)));
+extern int netdev_notice(const struct net_device *dev, const char *format, ...)
+	__attribute__ ((format (printf, 2, 3)));
+extern int netdev_info(const struct net_device *dev, const char *format, ...)
+	__attribute__ ((format (printf, 2, 3)));
 
 #if defined(DEBUG)
 #define netdev_dbg(__dev, format, args...)			\
diff --git a/net/core/dev.c b/net/core/dev.c
index e85cc5fa3c4..93b8929fa21 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5802,6 +5802,68 @@ char *netdev_drivername(const struct net_device *dev, char *buffer, int len)
 	return buffer;
 }
 
+static int __netdev_printk(const char *level, const struct net_device *dev,
+			   struct va_format *vaf)
+{
+	int r;
+
+	if (dev && dev->dev.parent)
+		r = dev_printk(level, dev->dev.parent, "%s: %pV",
+			       netdev_name(dev), vaf);
+	else if (dev)
+		r = printk("%s%s: %pV", level, netdev_name(dev), vaf);
+	else
+		r = printk("%s(NULL net_device): %pV", level, vaf);
+
+	return r;
+}
+
+int netdev_printk(const char *level, const struct net_device *dev,
+		  const char *format, ...)
+{
+	struct va_format vaf;
+	va_list args;
+	int r;
+
+	va_start(args, format);
+
+	vaf.fmt = format;
+	vaf.va = &args;
+
+	r = __netdev_printk(level, dev, &vaf);
+	va_end(args);
+
+	return r;
+}
+EXPORT_SYMBOL(netdev_printk);
+
+#define define_netdev_printk_level(func, level)			\
+int func(const struct net_device *dev, const char *fmt, ...)	\
+{								\
+	int r;							\
+	struct va_format vaf;					\
+	va_list args;						\
+								\
+	va_start(args, fmt);					\
+								\
+	vaf.fmt = fmt;						\
+	vaf.va = &args;						\
+								\
+	r = __netdev_printk(level, dev, &vaf);			\
+	va_end(args);						\
+								\
+	return r;						\
+}								\
+EXPORT_SYMBOL(func);
+
+define_netdev_printk_level(netdev_emerg, KERN_EMERG);
+define_netdev_printk_level(netdev_alert, KERN_ALERT);
+define_netdev_printk_level(netdev_crit, KERN_CRIT);
+define_netdev_printk_level(netdev_err, KERN_ERR);
+define_netdev_printk_level(netdev_warn, KERN_WARNING);
+define_netdev_printk_level(netdev_notice, KERN_NOTICE);
+define_netdev_printk_level(netdev_info, KERN_INFO);
+
 static void __net_exit netdev_exit(struct net *net)
 {
 	kfree(net->dev_name_head);
-- 
cgit v1.2.3-70-g09d2


From f45f4321d2c977c9eff77e5a5225f3cd2140eb20 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Sun, 27 Jun 2010 01:02:36 +0000
Subject: netdevice.h: Change netif_<level> macros to call netdev_<level>
 functions

Reduces text ~300 bytes of text (woohoo!) in an x86 defconfig

$ size vmlinux*
   text	   data	    bss	    dec	    hex	filename
7198526	 720112	1366288	9284926	 8dad3e	vmlinux
7198862	 720112	1366288	9285262	 8dae8e	vmlinux.netdev

Signed-off-by: Joe Perches <joe@perches.com>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 0183901ea47..4d27368674d 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2314,20 +2314,26 @@ do {					  			\
 		netdev_printk(level, (dev), fmt, ##args);	\
 } while (0)
 
+#define netif_level(level, priv, type, dev, fmt, args...)	\
+do {								\
+	if (netif_msg_##type(priv))				\
+		netdev_##level(dev, fmt, ##args);		\
+} while (0)
+
 #define netif_emerg(priv, type, dev, fmt, args...)		\
-	netif_printk(priv, type, KERN_EMERG, dev, fmt, ##args)
+	netif_level(emerg, priv, type, dev, fmt, ##args)
 #define netif_alert(priv, type, dev, fmt, args...)		\
-	netif_printk(priv, type, KERN_ALERT, dev, fmt, ##args)
+	netif_level(alert, priv, type, dev, fmt, ##args)
 #define netif_crit(priv, type, dev, fmt, args...)		\
-	netif_printk(priv, type, KERN_CRIT, dev, fmt, ##args)
+	netif_level(crit, priv, type, dev, fmt, ##args)
 #define netif_err(priv, type, dev, fmt, args...)		\
-	netif_printk(priv, type, KERN_ERR, dev, fmt, ##args)
+	netif_level(err, priv, type, dev, fmt, ##args)
 #define netif_warn(priv, type, dev, fmt, args...)		\
-	netif_printk(priv, type, KERN_WARNING, dev, fmt, ##args)
+	netif_level(warn, priv, type, dev, fmt, ##args)
 #define netif_notice(priv, type, dev, fmt, args...)		\
-	netif_printk(priv, type, KERN_NOTICE, dev, fmt, ##args)
+	netif_level(notice, priv, type, dev, fmt, ##args)
 #define netif_info(priv, type, dev, fmt, args...)		\
-	netif_printk(priv, type, KERN_INFO, (dev), fmt, ##args)
+	netif_level(info, priv, type, dev, fmt, ##args)
 
 #if defined(DEBUG)
 #define netif_dbg(priv, type, dev, format, args...)		\
-- 
cgit v1.2.3-70-g09d2


From 7dc2e1134a22dc242175d5321c0c9e97d16eb87b Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@secretlab.ca>
Date: Tue, 8 Jun 2010 07:48:06 -0600
Subject: of/irq: merge irq mapping code

Merge common irq mapping code between PowerPC and Microblaze.

This patch merges of_irq_find_parent(), of_irq_map_raw() and
of_irq_map_one().  The functions are dependent on one another, so all
three are merged in a single patch.  Other than cosmetic difference
(ie. DBG() vs. pr_debug()), the implementations are identical.

of_irq_to_resource() is also merged, but in this case the
implementations are different.  This patch drops the microblaze version
and uses the powerpc implementation unchanged.  The microblaze version
essentially open-coded irq_of_parse_and_map() which it does not need
to do.  Therefore the powerpc version is safe to adopt.

Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
CC: Michal Simek <monstr@monstr.eu>
CC: Benjamin Herrenschmidt <benh@kernel.crashing.org>
CC: Stephen Rothwell <sfr@canb.auug.org.au>
---
 arch/microblaze/include/asm/prom.h  |  31 ----
 arch/microblaze/kernel/prom_parse.c | 290 ----------------------------------
 arch/powerpc/include/asm/prom.h     |  47 ------
 arch/powerpc/kernel/prom_parse.c    | 266 -------------------------------
 drivers/of/irq.c                    | 301 ++++++++++++++++++++++++++++++++++++
 include/linux/of_irq.h              |  29 ++++
 6 files changed, 330 insertions(+), 634 deletions(-)

(limited to 'include')

diff --git a/arch/microblaze/include/asm/prom.h b/arch/microblaze/include/asm/prom.h
index 4f34bc5baa8..5fbdfe76fe7 100644
--- a/arch/microblaze/include/asm/prom.h
+++ b/arch/microblaze/include/asm/prom.h
@@ -89,34 +89,6 @@ struct device_node *of_get_cpu_node(int cpu, unsigned int *thread);
 /* Get the MAC address */
 extern const void *of_get_mac_address(struct device_node *np);
 
-/*
- * OF interrupt mapping
- */
-
-#define OF_IMAP_OLDWORLD_MAC	0x00000001
-#define OF_IMAP_NO_PHANDLE	0x00000002
-
-/**
- * of_irq_map_raw - Low level interrupt tree parsing
- * @parent:	the device interrupt parent
- * @intspec:	interrupt specifier ("interrupts" property of the device)
- * @ointsize:	size of the passed in interrupt specifier
- * @addr:	address specifier (start of "reg" property of the device)
- * @out_irq:	structure of_irq filled by this function
- *
- * Returns 0 on success and a negative number on error
- *
- * This function is a low-level interrupt tree walking function. It
- * can be used to do a partial walk with synthetized reg and interrupts
- * properties, for example when resolving PCI interrupts when no device
- * node exist for the parent.
- *
- */
-
-extern int of_irq_map_raw(struct device_node *parent, const u32 *intspec,
-			u32 ointsize, const u32 *addr,
-			struct of_irq *out_irq);
-
 /**
  * of_irq_map_pci - Resolve the interrupt for a PCI device
  * @pdev:	the device whose interrupt is to be resolved
@@ -131,9 +103,6 @@ extern int of_irq_map_raw(struct device_node *parent, const u32 *intspec,
 struct pci_dev;
 extern int of_irq_map_pci(struct pci_dev *pdev, struct of_irq *out_irq);
 
-extern int of_irq_to_resource(struct device_node *dev, int index,
-			struct resource *r);
-
 /**
  * of_iomap - Maps the memory mapped IO for a given device_node
  * @device:	the device whose io range will be mapped
diff --git a/arch/microblaze/kernel/prom_parse.c b/arch/microblaze/kernel/prom_parse.c
index cba05812ab4..e28968fa34c 100644
--- a/arch/microblaze/kernel/prom_parse.c
+++ b/arch/microblaze/kernel/prom_parse.c
@@ -644,267 +644,6 @@ void of_parse_dma_window(struct device_node *dn, const void *dma_window_prop,
 	*size = of_read_number(dma_window, cells);
 }
 
-/*
- * Interrupt remapper
- */
-
-static unsigned int of_irq_workarounds;
-static struct device_node *of_irq_dflt_pic;
-
-static struct device_node *of_irq_find_parent(struct device_node *child)
-{
-	struct device_node *p;
-	const phandle *parp;
-
-	if (!of_node_get(child))
-		return NULL;
-
-	do {
-		parp = of_get_property(child, "interrupt-parent", NULL);
-		if (parp == NULL)
-			p = of_get_parent(child);
-		else {
-			if (of_irq_workarounds & OF_IMAP_NO_PHANDLE)
-				p = of_node_get(of_irq_dflt_pic);
-			else
-				p = of_find_node_by_phandle(*parp);
-		}
-		of_node_put(child);
-		child = p;
-	} while (p && of_get_property(p, "#interrupt-cells", NULL) == NULL);
-
-	return p;
-}
-
-int of_irq_map_raw(struct device_node *parent, const u32 *intspec, u32 ointsize,
-		const u32 *addr, struct of_irq *out_irq)
-{
-	struct device_node *ipar, *tnode, *old = NULL, *newpar = NULL;
-	const u32 *tmp, *imap, *imask;
-	u32 intsize = 1, addrsize, newintsize = 0, newaddrsize = 0;
-	int imaplen, match, i;
-
-	pr_debug("of_irq_map_raw: par=%s,intspec=[0x%08x 0x%08x...],"
-		"ointsize=%d\n",
-		parent->full_name, intspec[0], intspec[1], ointsize);
-
-	ipar = of_node_get(parent);
-
-	/* First get the #interrupt-cells property of the current cursor
-	 * that tells us how to interpret the passed-in intspec. If there
-	 * is none, we are nice and just walk up the tree
-	 */
-	do {
-		tmp = of_get_property(ipar, "#interrupt-cells", NULL);
-		if (tmp != NULL) {
-			intsize = *tmp;
-			break;
-		}
-		tnode = ipar;
-		ipar = of_irq_find_parent(ipar);
-		of_node_put(tnode);
-	} while (ipar);
-	if (ipar == NULL) {
-		pr_debug(" -> no parent found !\n");
-		goto fail;
-	}
-
-	pr_debug("of_irq_map_raw: ipar=%s, size=%d\n",
-			ipar->full_name, intsize);
-
-	if (ointsize != intsize)
-		return -EINVAL;
-
-	/* Look for this #address-cells. We have to implement the old linux
-	 * trick of looking for the parent here as some device-trees rely on it
-	 */
-	old = of_node_get(ipar);
-	do {
-		tmp = of_get_property(old, "#address-cells", NULL);
-		tnode = of_get_parent(old);
-		of_node_put(old);
-		old = tnode;
-	} while (old && tmp == NULL);
-	of_node_put(old);
-	old = NULL;
-	addrsize = (tmp == NULL) ? 2 : *tmp;
-
-	pr_debug(" -> addrsize=%d\n", addrsize);
-
-	/* Now start the actual "proper" walk of the interrupt tree */
-	while (ipar != NULL) {
-		/* Now check if cursor is an interrupt-controller and if it is
-		 * then we are done
-		 */
-		if (of_get_property(ipar, "interrupt-controller", NULL) !=
-				NULL) {
-			pr_debug(" -> got it !\n");
-			memcpy(out_irq->specifier, intspec,
-				intsize * sizeof(u32));
-			out_irq->size = intsize;
-			out_irq->controller = ipar;
-			of_node_put(old);
-			return 0;
-		}
-
-		/* Now look for an interrupt-map */
-		imap = of_get_property(ipar, "interrupt-map", &imaplen);
-		/* No interrupt map, check for an interrupt parent */
-		if (imap == NULL) {
-			pr_debug(" -> no map, getting parent\n");
-			newpar = of_irq_find_parent(ipar);
-			goto skiplevel;
-		}
-		imaplen /= sizeof(u32);
-
-		/* Look for a mask */
-		imask = of_get_property(ipar, "interrupt-map-mask", NULL);
-
-		/* If we were passed no "reg" property and we attempt to parse
-		 * an interrupt-map, then #address-cells must be 0.
-		 * Fail if it's not.
-		 */
-		if (addr == NULL && addrsize != 0) {
-			pr_debug(" -> no reg passed in when needed !\n");
-			goto fail;
-		}
-
-		/* Parse interrupt-map */
-		match = 0;
-		while (imaplen > (addrsize + intsize + 1) && !match) {
-			/* Compare specifiers */
-			match = 1;
-			for (i = 0; i < addrsize && match; ++i) {
-				u32 mask = imask ? imask[i] : 0xffffffffu;
-				match = ((addr[i] ^ imap[i]) & mask) == 0;
-			}
-			for (; i < (addrsize + intsize) && match; ++i) {
-				u32 mask = imask ? imask[i] : 0xffffffffu;
-				match =
-					((intspec[i-addrsize] ^ imap[i])
-						& mask) == 0;
-			}
-			imap += addrsize + intsize;
-			imaplen -= addrsize + intsize;
-
-			pr_debug(" -> match=%d (imaplen=%d)\n", match, imaplen);
-
-			/* Get the interrupt parent */
-			if (of_irq_workarounds & OF_IMAP_NO_PHANDLE)
-				newpar = of_node_get(of_irq_dflt_pic);
-			else
-				newpar =
-					of_find_node_by_phandle((phandle)*imap);
-			imap++;
-			--imaplen;
-
-			/* Check if not found */
-			if (newpar == NULL) {
-				pr_debug(" -> imap parent not found !\n");
-				goto fail;
-			}
-
-			/* Get #interrupt-cells and #address-cells of new
-			 * parent
-			 */
-			tmp = of_get_property(newpar, "#interrupt-cells", NULL);
-			if (tmp == NULL) {
-				pr_debug(" -> parent lacks "
-						"#interrupt-cells!\n");
-				goto fail;
-			}
-			newintsize = *tmp;
-			tmp = of_get_property(newpar, "#address-cells", NULL);
-			newaddrsize = (tmp == NULL) ? 0 : *tmp;
-
-			pr_debug(" -> newintsize=%d, newaddrsize=%d\n",
-				newintsize, newaddrsize);
-
-			/* Check for malformed properties */
-			if (imaplen < (newaddrsize + newintsize))
-				goto fail;
-
-			imap += newaddrsize + newintsize;
-			imaplen -= newaddrsize + newintsize;
-
-			pr_debug(" -> imaplen=%d\n", imaplen);
-		}
-		if (!match)
-			goto fail;
-
-		of_node_put(old);
-		old = of_node_get(newpar);
-		addrsize = newaddrsize;
-		intsize = newintsize;
-		intspec = imap - intsize;
-		addr = intspec - addrsize;
-
-skiplevel:
-		/* Iterate again with new parent */
-		pr_debug(" -> new parent: %s\n",
-				newpar ? newpar->full_name : "<>");
-		of_node_put(ipar);
-		ipar = newpar;
-		newpar = NULL;
-	}
-fail:
-	of_node_put(ipar);
-	of_node_put(old);
-	of_node_put(newpar);
-
-	return -EINVAL;
-}
-EXPORT_SYMBOL_GPL(of_irq_map_raw);
-
-int of_irq_map_one(struct device_node *device,
-			int index, struct of_irq *out_irq)
-{
-	struct device_node *p;
-	const u32 *intspec, *tmp, *addr;
-	u32 intsize, intlen;
-	int res;
-
-	pr_debug("of_irq_map_one: dev=%s, index=%d\n",
-			device->full_name, index);
-
-	/* Get the interrupts property */
-	intspec = of_get_property(device, "interrupts", (int *) &intlen);
-	if (intspec == NULL)
-		return -EINVAL;
-	intlen /= sizeof(u32);
-
-	pr_debug(" intspec=%d intlen=%d\n", *intspec, intlen);
-
-	/* Get the reg property (if any) */
-	addr = of_get_property(device, "reg", NULL);
-
-	/* Look for the interrupt parent. */
-	p = of_irq_find_parent(device);
-	if (p == NULL)
-		return -EINVAL;
-
-	/* Get size of interrupt specifier */
-	tmp = of_get_property(p, "#interrupt-cells", NULL);
-	if (tmp == NULL) {
-		of_node_put(p);
-		return -EINVAL;
-	}
-	intsize = *tmp;
-
-	pr_debug(" intsize=%d intlen=%d\n", intsize, intlen);
-
-	/* Check index */
-	if ((index + 1) * intsize > intlen)
-		return -EINVAL;
-
-	/* Get new specifier and map it */
-	res = of_irq_map_raw(p, intspec + index * intsize, intsize,
-				addr, out_irq);
-	of_node_put(p);
-	return res;
-}
-EXPORT_SYMBOL_GPL(of_irq_map_one);
-
 /**
  * Search the device tree for the best MAC address to use.  'mac-address' is
  * checked first, because that is supposed to contain to "most recent" MAC
@@ -943,35 +682,6 @@ const void *of_get_mac_address(struct device_node *np)
 }
 EXPORT_SYMBOL(of_get_mac_address);
 
-int of_irq_to_resource(struct device_node *dev, int index, struct resource *r)
-{
-	struct of_irq out_irq;
-	int irq;
-	int res;
-
-	res = of_irq_map_one(dev, index, &out_irq);
-
-	/* Get irq for the device */
-	if (res) {
-		pr_debug("IRQ not found... code = %d", res);
-		return NO_IRQ;
-	}
-	/* Assuming single interrupt controller... */
-	irq = out_irq.specifier[0];
-
-	pr_debug("IRQ found = %d", irq);
-
-	/* Only dereference the resource if both the
-	 * resource and the irq are valid. */
-	if (r && irq != NO_IRQ) {
-		r->start = r->end = irq;
-		r->flags = IORESOURCE_IRQ;
-	}
-
-	return irq;
-}
-EXPORT_SYMBOL_GPL(of_irq_to_resource);
-
 void __iomem *of_iomap(struct device_node *np, int index)
 {
 	struct resource res;
diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h
index 4486765db6e..10d5ee55670 100644
--- a/arch/powerpc/include/asm/prom.h
+++ b/arch/powerpc/include/asm/prom.h
@@ -105,50 +105,6 @@ struct device_node *of_find_next_cache_node(struct device_node *np);
 /* Get the MAC address */
 extern const void *of_get_mac_address(struct device_node *np);
 
-/*
- * OF interrupt mapping
- */
-
-#define OF_IMAP_OLDWORLD_MAC	0x00000001
-#define OF_IMAP_NO_PHANDLE	0x00000002
-
-#if defined(CONFIG_PPC32) && defined(CONFIG_PPC_PMAC)
-/* Workarounds only needed for 32bit powermac machines */
-extern unsigned int of_irq_workarounds;
-extern struct device_node *of_irq_dflt_pic;
-extern int of_irq_map_oldworld(struct device_node *device, int index,
-			       struct of_irq *out_irq);
-#else
-#define of_irq_workarounds (0)
-#define of_irq_dflt_pic (NULL)
-static inline int of_irq_map_oldworld(struct device_node *device, int index,
-				      struct of_irq *out_irq)
-{
-	return -EINVAL;
-}
-#endif
-
-/**
- * of_irq_map_raw - Low level interrupt tree parsing
- * @parent:	the device interrupt parent
- * @intspec:	interrupt specifier ("interrupts" property of the device)
- * @ointsize:   size of the passed in interrupt specifier
- * @addr:	address specifier (start of "reg" property of the device)
- * @out_irq:	structure of_irq filled by this function
- *
- * Returns 0 on success and a negative number on error
- *
- * This function is a low-level interrupt tree walking function. It
- * can be used to do a partial walk with synthetized reg and interrupts
- * properties, for example when resolving PCI interrupts when no device
- * node exist for the parent.
- *
- */
-
-extern int of_irq_map_raw(struct device_node *parent, const u32 *intspec,
-			  u32 ointsize, const u32 *addr,
-			  struct of_irq *out_irq);
-
 /**
  * of_irq_map_pci - Resolve the interrupt for a PCI device
  * @pdev:	the device whose interrupt is to be resolved
@@ -163,9 +119,6 @@ extern int of_irq_map_raw(struct device_node *parent, const u32 *intspec,
 struct pci_dev;
 extern int of_irq_map_pci(struct pci_dev *pdev, struct of_irq *out_irq);
 
-extern int of_irq_to_resource(struct device_node *dev, int index,
-			struct resource *r);
-
 /**
  * of_iomap - Maps the memory mapped IO for a given device_node
  * @device:	the device whose io range will be mapped
diff --git a/arch/powerpc/kernel/prom_parse.c b/arch/powerpc/kernel/prom_parse.c
index dfa6de6572b..d61a5c5fe69 100644
--- a/arch/powerpc/kernel/prom_parse.c
+++ b/arch/powerpc/kernel/prom_parse.c
@@ -678,257 +678,6 @@ void of_parse_dma_window(struct device_node *dn, const void *dma_window_prop,
 	*size = of_read_number(dma_window, cells);
 }
 
-/*
- * Interrupt remapper
- */
-
-static struct device_node *of_irq_find_parent(struct device_node *child)
-{
-	struct device_node *p;
-	const phandle *parp;
-
-	if (!of_node_get(child))
-		return NULL;
-
-	do {
-		parp = of_get_property(child, "interrupt-parent", NULL);
-		if (parp == NULL)
-			p = of_get_parent(child);
-		else {
-			if (of_irq_workarounds & OF_IMAP_NO_PHANDLE)
-				p = of_node_get(of_irq_dflt_pic);
-			else
-				p = of_find_node_by_phandle(*parp);
-		}
-		of_node_put(child);
-		child = p;
-	} while (p && of_get_property(p, "#interrupt-cells", NULL) == NULL);
-
-	return p;
-}
-
-int of_irq_map_raw(struct device_node *parent, const u32 *intspec, u32 ointsize,
-		const u32 *addr, struct of_irq *out_irq)
-{
-	struct device_node *ipar, *tnode, *old = NULL, *newpar = NULL;
-	const u32 *tmp, *imap, *imask;
-	u32 intsize = 1, addrsize, newintsize = 0, newaddrsize = 0;
-	int imaplen, match, i;
-
-	DBG("of_irq_map_raw: par=%s,intspec=[0x%08x 0x%08x...],ointsize=%d\n",
-	    parent->full_name, intspec[0], intspec[1], ointsize);
-
-	ipar = of_node_get(parent);
-
-	/* First get the #interrupt-cells property of the current cursor
-	 * that tells us how to interpret the passed-in intspec. If there
-	 * is none, we are nice and just walk up the tree
-	 */
-	do {
-		tmp = of_get_property(ipar, "#interrupt-cells", NULL);
-		if (tmp != NULL) {
-			intsize = *tmp;
-			break;
-		}
-		tnode = ipar;
-		ipar = of_irq_find_parent(ipar);
-		of_node_put(tnode);
-	} while (ipar);
-	if (ipar == NULL) {
-		DBG(" -> no parent found !\n");
-		goto fail;
-	}
-
-	DBG("of_irq_map_raw: ipar=%s, size=%d\n", ipar->full_name, intsize);
-
-	if (ointsize != intsize)
-		return -EINVAL;
-
-	/* Look for this #address-cells. We have to implement the old linux
-	 * trick of looking for the parent here as some device-trees rely on it
-	 */
-	old = of_node_get(ipar);
-	do {
-		tmp = of_get_property(old, "#address-cells", NULL);
-		tnode = of_get_parent(old);
-		of_node_put(old);
-		old = tnode;
-	} while(old && tmp == NULL);
-	of_node_put(old);
-	old = NULL;
-	addrsize = (tmp == NULL) ? 2 : *tmp;
-
-	DBG(" -> addrsize=%d\n", addrsize);
-
-	/* Now start the actual "proper" walk of the interrupt tree */
-	while (ipar != NULL) {
-		/* Now check if cursor is an interrupt-controller and if it is
-		 * then we are done
-		 */
-		if (of_get_property(ipar, "interrupt-controller", NULL) !=
-				NULL) {
-			DBG(" -> got it !\n");
-			memcpy(out_irq->specifier, intspec,
-			       intsize * sizeof(u32));
-			out_irq->size = intsize;
-			out_irq->controller = ipar;
-			of_node_put(old);
-			return 0;
-		}
-
-		/* Now look for an interrupt-map */
-		imap = of_get_property(ipar, "interrupt-map", &imaplen);
-		/* No interrupt map, check for an interrupt parent */
-		if (imap == NULL) {
-			DBG(" -> no map, getting parent\n");
-			newpar = of_irq_find_parent(ipar);
-			goto skiplevel;
-		}
-		imaplen /= sizeof(u32);
-
-		/* Look for a mask */
-		imask = of_get_property(ipar, "interrupt-map-mask", NULL);
-
-		/* If we were passed no "reg" property and we attempt to parse
-		 * an interrupt-map, then #address-cells must be 0.
-		 * Fail if it's not.
-		 */
-		if (addr == NULL && addrsize != 0) {
-			DBG(" -> no reg passed in when needed !\n");
-			goto fail;
-		}
-
-		/* Parse interrupt-map */
-		match = 0;
-		while (imaplen > (addrsize + intsize + 1) && !match) {
-			/* Compare specifiers */
-			match = 1;
-			for (i = 0; i < addrsize && match; ++i) {
-				u32 mask = imask ? imask[i] : 0xffffffffu;
-				match = ((addr[i] ^ imap[i]) & mask) == 0;
-			}
-			for (; i < (addrsize + intsize) && match; ++i) {
-				u32 mask = imask ? imask[i] : 0xffffffffu;
-				match =
-				   ((intspec[i-addrsize] ^ imap[i]) & mask) == 0;
-			}
-			imap += addrsize + intsize;
-			imaplen -= addrsize + intsize;
-
-			DBG(" -> match=%d (imaplen=%d)\n", match, imaplen);
-
-			/* Get the interrupt parent */
-			if (of_irq_workarounds & OF_IMAP_NO_PHANDLE)
-				newpar = of_node_get(of_irq_dflt_pic);
-			else
-				newpar = of_find_node_by_phandle((phandle)*imap);
-			imap++;
-			--imaplen;
-
-			/* Check if not found */
-			if (newpar == NULL) {
-				DBG(" -> imap parent not found !\n");
-				goto fail;
-			}
-
-			/* Get #interrupt-cells and #address-cells of new
-			 * parent
-			 */
-			tmp = of_get_property(newpar, "#interrupt-cells", NULL);
-			if (tmp == NULL) {
-				DBG(" -> parent lacks #interrupt-cells !\n");
-				goto fail;
-			}
-			newintsize = *tmp;
-			tmp = of_get_property(newpar, "#address-cells", NULL);
-			newaddrsize = (tmp == NULL) ? 0 : *tmp;
-
-			DBG(" -> newintsize=%d, newaddrsize=%d\n",
-			    newintsize, newaddrsize);
-
-			/* Check for malformed properties */
-			if (imaplen < (newaddrsize + newintsize))
-				goto fail;
-
-			imap += newaddrsize + newintsize;
-			imaplen -= newaddrsize + newintsize;
-
-			DBG(" -> imaplen=%d\n", imaplen);
-		}
-		if (!match)
-			goto fail;
-
-		of_node_put(old);
-		old = of_node_get(newpar);
-		addrsize = newaddrsize;
-		intsize = newintsize;
-		intspec = imap - intsize;
-		addr = intspec - addrsize;
-
-	skiplevel:
-		/* Iterate again with new parent */
-		DBG(" -> new parent: %s\n", newpar ? newpar->full_name : "<>");
-		of_node_put(ipar);
-		ipar = newpar;
-		newpar = NULL;
-	}
- fail:
-	of_node_put(ipar);
-	of_node_put(old);
-	of_node_put(newpar);
-
-	return -EINVAL;
-}
-EXPORT_SYMBOL_GPL(of_irq_map_raw);
-
-int of_irq_map_one(struct device_node *device, int index, struct of_irq *out_irq)
-{
-	struct device_node *p;
-	const u32 *intspec, *tmp, *addr;
-	u32 intsize, intlen;
-	int res = -EINVAL;
-
-	DBG("of_irq_map_one: dev=%s, index=%d\n", device->full_name, index);
-
-	/* OldWorld mac stuff is "special", handle out of line */
-	if (of_irq_workarounds & OF_IMAP_OLDWORLD_MAC)
-		return of_irq_map_oldworld(device, index, out_irq);
-
-	/* Get the interrupts property */
-	intspec = of_get_property(device, "interrupts", &intlen);
-	if (intspec == NULL)
-		return -EINVAL;
-	intlen /= sizeof(u32);
-
-	/* Get the reg property (if any) */
-	addr = of_get_property(device, "reg", NULL);
-
-	/* Look for the interrupt parent. */
-	p = of_irq_find_parent(device);
-	if (p == NULL)
-		return -EINVAL;
-
-	/* Get size of interrupt specifier */
-	tmp = of_get_property(p, "#interrupt-cells", NULL);
-	if (tmp == NULL)
-		goto out;
-	intsize = *tmp;
-
-	DBG(" intsize=%d intlen=%d\n", intsize, intlen);
-
-	/* Check index */
-	if ((index + 1) * intsize > intlen)
-		goto out;
-
-	/* Get new specifier and map it */
-	res = of_irq_map_raw(p, intspec + index * intsize, intsize,
-			     addr, out_irq);
-out:
-	of_node_put(p);
-	return res;
-}
-EXPORT_SYMBOL_GPL(of_irq_map_one);
-
 /**
  * Search the device tree for the best MAC address to use.  'mac-address' is
  * checked first, because that is supposed to contain to "most recent" MAC
@@ -967,21 +716,6 @@ const void *of_get_mac_address(struct device_node *np)
 }
 EXPORT_SYMBOL(of_get_mac_address);
 
-int of_irq_to_resource(struct device_node *dev, int index, struct resource *r)
-{
-	int irq = irq_of_parse_and_map(dev, index);
-
-	/* Only dereference the resource if both the
-	 * resource and the irq are valid. */
-	if (r && irq != NO_IRQ) {
-		r->start = r->end = irq;
-		r->flags = IORESOURCE_IRQ;
-	}
-
-	return irq;
-}
-EXPORT_SYMBOL_GPL(of_irq_to_resource);
-
 void __iomem *of_iomap(struct device_node *np, int index)
 {
 	struct resource res;
diff --git a/drivers/of/irq.c b/drivers/of/irq.c
index 9b3397c2709..598454fbdd1 100644
--- a/drivers/of/irq.c
+++ b/drivers/of/irq.c
@@ -43,3 +43,304 @@ unsigned int irq_of_parse_and_map(struct device_node *dev, int index)
 				     oirq.size);
 }
 EXPORT_SYMBOL_GPL(irq_of_parse_and_map);
+
+/**
+ * of_irq_find_parent - Given a device node, find its interrupt parent node
+ * @child: pointer to device node
+ *
+ * Returns a pointer to the interrupt parent node, or NULL if the interrupt
+ * parent could not be determined.
+ */
+static struct device_node *of_irq_find_parent(struct device_node *child)
+{
+	struct device_node *p;
+	const phandle *parp;
+
+	if (!of_node_get(child))
+		return NULL;
+
+	do {
+		parp = of_get_property(child, "interrupt-parent", NULL);
+		if (parp == NULL)
+			p = of_get_parent(child);
+		else {
+			if (of_irq_workarounds & OF_IMAP_NO_PHANDLE)
+				p = of_node_get(of_irq_dflt_pic);
+			else
+				p = of_find_node_by_phandle(*parp);
+		}
+		of_node_put(child);
+		child = p;
+	} while (p && of_get_property(p, "#interrupt-cells", NULL) == NULL);
+
+	return p;
+}
+
+/**
+ * of_irq_map_raw - Low level interrupt tree parsing
+ * @parent:	the device interrupt parent
+ * @intspec:	interrupt specifier ("interrupts" property of the device)
+ * @ointsize:   size of the passed in interrupt specifier
+ * @addr:	address specifier (start of "reg" property of the device)
+ * @out_irq:	structure of_irq filled by this function
+ *
+ * Returns 0 on success and a negative number on error
+ *
+ * This function is a low-level interrupt tree walking function. It
+ * can be used to do a partial walk with synthetized reg and interrupts
+ * properties, for example when resolving PCI interrupts when no device
+ * node exist for the parent.
+ */
+int of_irq_map_raw(struct device_node *parent, const u32 *intspec, u32 ointsize,
+		const u32 *addr, struct of_irq *out_irq)
+{
+	struct device_node *ipar, *tnode, *old = NULL, *newpar = NULL;
+	const u32 *tmp, *imap, *imask;
+	u32 intsize = 1, addrsize, newintsize = 0, newaddrsize = 0;
+	int imaplen, match, i;
+
+	pr_debug("of_irq_map_raw: par=%s,intspec=[0x%08x 0x%08x...],ointsize=%d\n",
+	    parent->full_name, intspec[0], intspec[1], ointsize);
+
+	ipar = of_node_get(parent);
+
+	/* First get the #interrupt-cells property of the current cursor
+	 * that tells us how to interpret the passed-in intspec. If there
+	 * is none, we are nice and just walk up the tree
+	 */
+	do {
+		tmp = of_get_property(ipar, "#interrupt-cells", NULL);
+		if (tmp != NULL) {
+			intsize = *tmp;
+			break;
+		}
+		tnode = ipar;
+		ipar = of_irq_find_parent(ipar);
+		of_node_put(tnode);
+	} while (ipar);
+	if (ipar == NULL) {
+		pr_debug(" -> no parent found !\n");
+		goto fail;
+	}
+
+	pr_debug("of_irq_map_raw: ipar=%s, size=%d\n", ipar->full_name, intsize);
+
+	if (ointsize != intsize)
+		return -EINVAL;
+
+	/* Look for this #address-cells. We have to implement the old linux
+	 * trick of looking for the parent here as some device-trees rely on it
+	 */
+	old = of_node_get(ipar);
+	do {
+		tmp = of_get_property(old, "#address-cells", NULL);
+		tnode = of_get_parent(old);
+		of_node_put(old);
+		old = tnode;
+	} while (old && tmp == NULL);
+	of_node_put(old);
+	old = NULL;
+	addrsize = (tmp == NULL) ? 2 : *tmp;
+
+	pr_debug(" -> addrsize=%d\n", addrsize);
+
+	/* Now start the actual "proper" walk of the interrupt tree */
+	while (ipar != NULL) {
+		/* Now check if cursor is an interrupt-controller and if it is
+		 * then we are done
+		 */
+		if (of_get_property(ipar, "interrupt-controller", NULL) !=
+				NULL) {
+			pr_debug(" -> got it !\n");
+			memcpy(out_irq->specifier, intspec,
+			       intsize * sizeof(u32));
+			out_irq->size = intsize;
+			out_irq->controller = ipar;
+			of_node_put(old);
+			return 0;
+		}
+
+		/* Now look for an interrupt-map */
+		imap = of_get_property(ipar, "interrupt-map", &imaplen);
+		/* No interrupt map, check for an interrupt parent */
+		if (imap == NULL) {
+			pr_debug(" -> no map, getting parent\n");
+			newpar = of_irq_find_parent(ipar);
+			goto skiplevel;
+		}
+		imaplen /= sizeof(u32);
+
+		/* Look for a mask */
+		imask = of_get_property(ipar, "interrupt-map-mask", NULL);
+
+		/* If we were passed no "reg" property and we attempt to parse
+		 * an interrupt-map, then #address-cells must be 0.
+		 * Fail if it's not.
+		 */
+		if (addr == NULL && addrsize != 0) {
+			pr_debug(" -> no reg passed in when needed !\n");
+			goto fail;
+		}
+
+		/* Parse interrupt-map */
+		match = 0;
+		while (imaplen > (addrsize + intsize + 1) && !match) {
+			/* Compare specifiers */
+			match = 1;
+			for (i = 0; i < addrsize && match; ++i) {
+				u32 mask = imask ? imask[i] : 0xffffffffu;
+				match = ((addr[i] ^ imap[i]) & mask) == 0;
+			}
+			for (; i < (addrsize + intsize) && match; ++i) {
+				u32 mask = imask ? imask[i] : 0xffffffffu;
+				match =
+				   ((intspec[i-addrsize] ^ imap[i]) & mask) == 0;
+			}
+			imap += addrsize + intsize;
+			imaplen -= addrsize + intsize;
+
+			pr_debug(" -> match=%d (imaplen=%d)\n", match, imaplen);
+
+			/* Get the interrupt parent */
+			if (of_irq_workarounds & OF_IMAP_NO_PHANDLE)
+				newpar = of_node_get(of_irq_dflt_pic);
+			else
+				newpar = of_find_node_by_phandle((phandle)*imap);
+			imap++;
+			--imaplen;
+
+			/* Check if not found */
+			if (newpar == NULL) {
+				pr_debug(" -> imap parent not found !\n");
+				goto fail;
+			}
+
+			/* Get #interrupt-cells and #address-cells of new
+			 * parent
+			 */
+			tmp = of_get_property(newpar, "#interrupt-cells", NULL);
+			if (tmp == NULL) {
+				pr_debug(" -> parent lacks #interrupt-cells!\n");
+				goto fail;
+			}
+			newintsize = *tmp;
+			tmp = of_get_property(newpar, "#address-cells", NULL);
+			newaddrsize = (tmp == NULL) ? 0 : *tmp;
+
+			pr_debug(" -> newintsize=%d, newaddrsize=%d\n",
+			    newintsize, newaddrsize);
+
+			/* Check for malformed properties */
+			if (imaplen < (newaddrsize + newintsize))
+				goto fail;
+
+			imap += newaddrsize + newintsize;
+			imaplen -= newaddrsize + newintsize;
+
+			pr_debug(" -> imaplen=%d\n", imaplen);
+		}
+		if (!match)
+			goto fail;
+
+		of_node_put(old);
+		old = of_node_get(newpar);
+		addrsize = newaddrsize;
+		intsize = newintsize;
+		intspec = imap - intsize;
+		addr = intspec - addrsize;
+
+	skiplevel:
+		/* Iterate again with new parent */
+		pr_debug(" -> new parent: %s\n", newpar ? newpar->full_name : "<>");
+		of_node_put(ipar);
+		ipar = newpar;
+		newpar = NULL;
+	}
+ fail:
+	of_node_put(ipar);
+	of_node_put(old);
+	of_node_put(newpar);
+
+	return -EINVAL;
+}
+EXPORT_SYMBOL_GPL(of_irq_map_raw);
+
+/**
+ * of_irq_map_one - Resolve an interrupt for a device
+ * @device: the device whose interrupt is to be resolved
+ * @index: index of the interrupt to resolve
+ * @out_irq: structure of_irq filled by this function
+ *
+ * This function resolves an interrupt, walking the tree, for a given
+ * device-tree node. It's the high level pendant to of_irq_map_raw().
+ */
+int of_irq_map_one(struct device_node *device, int index, struct of_irq *out_irq)
+{
+	struct device_node *p;
+	const u32 *intspec, *tmp, *addr;
+	u32 intsize, intlen;
+	int res = -EINVAL;
+
+	pr_debug("of_irq_map_one: dev=%s, index=%d\n", device->full_name, index);
+
+	/* OldWorld mac stuff is "special", handle out of line */
+	if (of_irq_workarounds & OF_IMAP_OLDWORLD_MAC)
+		return of_irq_map_oldworld(device, index, out_irq);
+
+	/* Get the interrupts property */
+	intspec = of_get_property(device, "interrupts", &intlen);
+	if (intspec == NULL)
+		return -EINVAL;
+	intlen /= sizeof(u32);
+
+	pr_debug(" intspec=%d intlen=%d\n", *intspec, intlen);
+
+	/* Get the reg property (if any) */
+	addr = of_get_property(device, "reg", NULL);
+
+	/* Look for the interrupt parent. */
+	p = of_irq_find_parent(device);
+	if (p == NULL)
+		return -EINVAL;
+
+	/* Get size of interrupt specifier */
+	tmp = of_get_property(p, "#interrupt-cells", NULL);
+	if (tmp == NULL)
+		goto out;
+	intsize = *tmp;
+
+	pr_debug(" intsize=%d intlen=%d\n", intsize, intlen);
+
+	/* Check index */
+	if ((index + 1) * intsize > intlen)
+		goto out;
+
+	/* Get new specifier and map it */
+	res = of_irq_map_raw(p, intspec + index * intsize, intsize,
+			     addr, out_irq);
+ out:
+	of_node_put(p);
+	return res;
+}
+EXPORT_SYMBOL_GPL(of_irq_map_one);
+
+/**
+ * of_irq_to_resource - Decode a node's IRQ and return it as a resource
+ * @dev: pointer to device tree node
+ * @index: zero-based index of the irq
+ * @r: pointer to resource structure to return result into.
+ */
+int of_irq_to_resource(struct device_node *dev, int index, struct resource *r)
+{
+	int irq = irq_of_parse_and_map(dev, index);
+
+	/* Only dereference the resource if both the
+	 * resource and the irq are valid. */
+	if (r && irq != NO_IRQ) {
+		r->start = r->end = irq;
+		r->flags = IORESOURCE_IRQ;
+	}
+
+	return irq;
+}
+EXPORT_SYMBOL_GPL(of_irq_to_resource);
diff --git a/include/linux/of_irq.h b/include/linux/of_irq.h
index 0e37c05b7dd..5929781c104 100644
--- a/include/linux/of_irq.h
+++ b/include/linux/of_irq.h
@@ -4,6 +4,8 @@
 #if defined(CONFIG_OF)
 struct of_irq;
 #include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/ioport.h>
 #include <linux/of.h>
 
 /*
@@ -30,11 +32,38 @@ struct of_irq {
 	u32 specifier[OF_MAX_IRQ_SPEC]; /* Specifier copy */
 };
 
+/*
+ * Workarounds only applied to 32bit powermac machines
+ */
+#define OF_IMAP_OLDWORLD_MAC	0x00000001
+#define OF_IMAP_NO_PHANDLE	0x00000002
+
+#if defined(CONFIG_PPC32) && defined(CONFIG_PPC_PMAC)
+extern unsigned int of_irq_workarounds;
+extern struct device_node *of_irq_dflt_pic;
+extern int of_irq_map_oldworld(struct device_node *device, int index,
+			       struct of_irq *out_irq);
+#else /* CONFIG_PPC32 && CONFIG_PPC_PMAC */
+#define of_irq_workarounds (0)
+#define of_irq_dflt_pic (NULL)
+static inline int of_irq_map_oldworld(struct device_node *device, int index,
+				      struct of_irq *out_irq)
+{
+	return -EINVAL;
+}
+#endif /* CONFIG_PPC32 && CONFIG_PPC_PMAC */
+
+
+extern int of_irq_map_raw(struct device_node *parent, const u32 *intspec,
+			  u32 ointsize, const u32 *addr,
+			  struct of_irq *out_irq);
 extern int of_irq_map_one(struct device_node *device, int index,
 			  struct of_irq *out_irq);
 extern unsigned int irq_create_of_mapping(struct device_node *controller,
 					  const u32 *intspec,
 					  unsigned int intsize);
+extern int of_irq_to_resource(struct device_node *dev, int index,
+			      struct resource *r);
 
 #endif /* CONFIG_OF_IRQ */
 #endif /* CONFIG_OF */
-- 
cgit v1.2.3-70-g09d2


From 6b884a8d50a6eea2fb3dad7befe748f67193073b Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@secretlab.ca>
Date: Tue, 8 Jun 2010 07:48:09 -0600
Subject: of/address: merge of_iomap()

Merge common code between Microblaze and PowerPC.  This patch creates
new of_address.h and address.c files to containing address translation
and mapping routines.  First routine to be moved it of_iomap()

Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
CC: Michal Simek <monstr@monstr.eu>
CC: Stephen Rothwell <sfr@canb.auug.org.au>
---
 arch/microblaze/include/asm/prom.h  | 10 +---------
 arch/microblaze/kernel/prom_parse.c | 11 -----------
 arch/powerpc/include/asm/prom.h     | 10 +---------
 arch/powerpc/kernel/prom_parse.c    | 11 -----------
 drivers/of/Kconfig                  |  4 ++++
 drivers/of/Makefile                 |  1 +
 drivers/of/address.c                | 22 ++++++++++++++++++++++
 include/linux/of_address.h          |  9 +++++++++
 8 files changed, 38 insertions(+), 40 deletions(-)
 create mode 100644 drivers/of/address.c
 create mode 100644 include/linux/of_address.h

(limited to 'include')

diff --git a/arch/microblaze/include/asm/prom.h b/arch/microblaze/include/asm/prom.h
index 5fbdfe76fe7..6411c3b3a80 100644
--- a/arch/microblaze/include/asm/prom.h
+++ b/arch/microblaze/include/asm/prom.h
@@ -20,6 +20,7 @@
 #ifndef __ASSEMBLY__
 
 #include <linux/types.h>
+#include <linux/of_address.h>
 #include <linux/of_irq.h>
 #include <linux/of_fdt.h>
 #include <linux/proc_fs.h>
@@ -103,15 +104,6 @@ extern const void *of_get_mac_address(struct device_node *np);
 struct pci_dev;
 extern int of_irq_map_pci(struct pci_dev *pdev, struct of_irq *out_irq);
 
-/**
- * of_iomap - Maps the memory mapped IO for a given device_node
- * @device:	the device whose io range will be mapped
- * @index:	index of the io range
- *
- * Returns a pointer to the mapped memory
- */
-extern void __iomem *of_iomap(struct device_node *device, int index);
-
 #endif /* __ASSEMBLY__ */
 #endif /* __KERNEL__ */
 #endif /* _ASM_MICROBLAZE_PROM_H */
diff --git a/arch/microblaze/kernel/prom_parse.c b/arch/microblaze/kernel/prom_parse.c
index e28968fa34c..1159ba52ad4 100644
--- a/arch/microblaze/kernel/prom_parse.c
+++ b/arch/microblaze/kernel/prom_parse.c
@@ -681,14 +681,3 @@ const void *of_get_mac_address(struct device_node *np)
 	return NULL;
 }
 EXPORT_SYMBOL(of_get_mac_address);
-
-void __iomem *of_iomap(struct device_node *np, int index)
-{
-	struct resource res;
-
-	if (of_address_to_resource(np, index, &res))
-		return NULL;
-
-	return ioremap(res.start, 1 + res.end - res.start);
-}
-EXPORT_SYMBOL(of_iomap);
diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h
index 10d5ee55670..0abe379c6f3 100644
--- a/arch/powerpc/include/asm/prom.h
+++ b/arch/powerpc/include/asm/prom.h
@@ -18,6 +18,7 @@
  */
 #include <linux/types.h>
 #include <linux/of_fdt.h>
+#include <linux/of_address.h>
 #include <linux/of_irq.h>
 #include <linux/proc_fs.h>
 #include <linux/platform_device.h>
@@ -119,14 +120,5 @@ extern const void *of_get_mac_address(struct device_node *np);
 struct pci_dev;
 extern int of_irq_map_pci(struct pci_dev *pdev, struct of_irq *out_irq);
 
-/**
- * of_iomap - Maps the memory mapped IO for a given device_node
- * @device:	the device whose io range will be mapped
- * @index:	index of the io range
- *
- * Returns a pointer to the mapped memory
- */
-extern void __iomem *of_iomap(struct device_node *device, int index);
-
 #endif /* __KERNEL__ */
 #endif /* _POWERPC_PROM_H */
diff --git a/arch/powerpc/kernel/prom_parse.c b/arch/powerpc/kernel/prom_parse.c
index d61a5c5fe69..1d5d4f6dfef 100644
--- a/arch/powerpc/kernel/prom_parse.c
+++ b/arch/powerpc/kernel/prom_parse.c
@@ -715,14 +715,3 @@ const void *of_get_mac_address(struct device_node *np)
 	return NULL;
 }
 EXPORT_SYMBOL(of_get_mac_address);
-
-void __iomem *of_iomap(struct device_node *np, int index)
-{
-	struct resource res;
-
-	if (of_address_to_resource(np, index, &res))
-		return NULL;
-
-	return ioremap(res.start, 1 + res.end - res.start);
-}
-EXPORT_SYMBOL(of_iomap);
diff --git a/drivers/of/Kconfig b/drivers/of/Kconfig
index b87495efa16..097f42aebe9 100644
--- a/drivers/of/Kconfig
+++ b/drivers/of/Kconfig
@@ -6,6 +6,10 @@ config OF_DYNAMIC
 	def_bool y
 	depends on OF && PPC_OF
 
+config OF_ADDRESS
+	def_bool y
+	depends on OF && !SPARC
+
 config OF_IRQ
 	def_bool y
 	depends on OF && !SPARC
diff --git a/drivers/of/Makefile b/drivers/of/Makefile
index 3631a5ea0b4..0052c405463 100644
--- a/drivers/of/Makefile
+++ b/drivers/of/Makefile
@@ -1,5 +1,6 @@
 obj-y = base.o
 obj-$(CONFIG_OF_FLATTREE) += fdt.o
+obj-$(CONFIG_OF_ADDRESS)  += address.o
 obj-$(CONFIG_OF_IRQ)    += irq.o
 obj-$(CONFIG_OF_DEVICE) += device.o platform.o
 obj-$(CONFIG_OF_GPIO)   += gpio.o
diff --git a/drivers/of/address.c b/drivers/of/address.c
new file mode 100644
index 00000000000..258528d6c4f
--- /dev/null
+++ b/drivers/of/address.c
@@ -0,0 +1,22 @@
+
+#include <linux/io.h>
+#include <linux/ioport.h>
+#include <linux/of_address.h>
+
+/**
+ * of_iomap - Maps the memory mapped IO for a given device_node
+ * @device:	the device whose io range will be mapped
+ * @index:	index of the io range
+ *
+ * Returns a pointer to the mapped memory
+ */
+void __iomem *of_iomap(struct device_node *np, int index)
+{
+	struct resource res;
+
+	if (of_address_to_resource(np, index, &res))
+		return NULL;
+
+	return ioremap(res.start, 1 + res.end - res.start);
+}
+EXPORT_SYMBOL(of_iomap);
diff --git a/include/linux/of_address.h b/include/linux/of_address.h
new file mode 100644
index 00000000000..570831d7e79
--- /dev/null
+++ b/include/linux/of_address.h
@@ -0,0 +1,9 @@
+#ifndef __OF_ADDRESS_H
+#define __OF_ADDRESS_H
+#include <linux/ioport.h>
+#include <linux/of.h>
+
+extern void __iomem *of_iomap(struct device_node *device, int index);
+
+#endif /* __OF_ADDRESS_H */
+
-- 
cgit v1.2.3-70-g09d2


From 1f5bef30cf6c66f097ea5dfc580a41924df888d1 Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@secretlab.ca>
Date: Tue, 8 Jun 2010 07:48:09 -0600
Subject: of/address: merge of_address_to_resource()

Merge common code between PowerPC and Microblaze.  This patch also
moves the prototype of pci_address_to_pio() out of pci-bridge.h and
into prom.h because the only user of pci_address_to_pio() is
of_address_to_resource().

Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
CC: Michal Simek <monstr@monstr.eu>
CC: Stephen Rothwell <sfr@canb.auug.org.au>
---
 arch/microblaze/include/asm/pci-bridge.h |  5 ----
 arch/microblaze/include/asm/prom.h       | 17 ++++++-----
 arch/microblaze/kernel/prom_parse.c      | 46 +---------------------------
 arch/powerpc/include/asm/pci-bridge.h    |  5 ----
 arch/powerpc/include/asm/prom.h          | 17 ++++++-----
 arch/powerpc/kernel/prom_parse.c         | 47 +----------------------------
 drivers/of/address.c                     | 51 ++++++++++++++++++++++++++++++++
 include/linux/of_address.h               |  5 ++++
 8 files changed, 76 insertions(+), 117 deletions(-)

(limited to 'include')

diff --git a/arch/microblaze/include/asm/pci-bridge.h b/arch/microblaze/include/asm/pci-bridge.h
index 0c77cda9f5d..0c68764ab54 100644
--- a/arch/microblaze/include/asm/pci-bridge.h
+++ b/arch/microblaze/include/asm/pci-bridge.h
@@ -172,13 +172,8 @@ static inline int pci_has_flag(int flag)
 
 extern struct list_head hose_list;
 
-extern unsigned long pci_address_to_pio(phys_addr_t address);
 extern int pcibios_vaddr_is_ioport(void __iomem *address);
 #else
-static inline unsigned long pci_address_to_pio(phys_addr_t address)
-{
-	return (unsigned long)-1;
-}
 static inline int pcibios_vaddr_is_ioport(void __iomem *address)
 {
 	return 0;
diff --git a/arch/microblaze/include/asm/prom.h b/arch/microblaze/include/asm/prom.h
index 6411c3b3a80..4e94c0706c5 100644
--- a/arch/microblaze/include/asm/prom.h
+++ b/arch/microblaze/include/asm/prom.h
@@ -65,17 +65,18 @@ extern const u32 *of_get_address(struct device_node *dev, int index,
 extern const u32 *of_get_pci_address(struct device_node *dev, int bar_no,
 			u64 *size, unsigned int *flags);
 
-/* Get an address as a resource. Note that if your address is
- * a PIO address, the conversion will fail if the physical address
- * can't be internally converted to an IO token with
- * pci_address_to_pio(), that is because it's either called to early
- * or it can't be matched to any host bridge IO space
- */
-extern int of_address_to_resource(struct device_node *dev, int index,
-				struct resource *r);
 extern int of_pci_address_to_resource(struct device_node *dev, int bar,
 				struct resource *r);
 
+#ifdef CONFIG_PCI
+extern unsigned long pci_address_to_pio(phys_addr_t address);
+#else
+static inline unsigned long pci_address_to_pio(phys_addr_t address)
+{
+	return (unsigned long)-1;
+}
+#endif	/* CONFIG_PCI */
+
 /* Parse the ibm,dma-window property of an OF node into the busno, phys and
  * size parameters.
  */
diff --git a/arch/microblaze/kernel/prom_parse.c b/arch/microblaze/kernel/prom_parse.c
index 1159ba52ad4..2f9cdd26ca1 100644
--- a/arch/microblaze/kernel/prom_parse.c
+++ b/arch/microblaze/kernel/prom_parse.c
@@ -6,6 +6,7 @@
 #include <linux/module.h>
 #include <linux/ioport.h>
 #include <linux/etherdevice.h>
+#include <linux/of_address.h>
 #include <asm/prom.h>
 #include <asm/pci-bridge.h>
 
@@ -17,9 +18,6 @@
 			(ns) > 0)
 
 static struct of_bus *of_match_bus(struct device_node *np);
-static int __of_address_to_resource(struct device_node *dev,
-		const u32 *addrp, u64 size, unsigned int flags,
-		struct resource *r);
 
 /* Debug utility */
 #ifdef DEBUG
@@ -576,48 +574,6 @@ const u32 *of_get_address(struct device_node *dev, int index, u64 *size,
 }
 EXPORT_SYMBOL(of_get_address);
 
-static int __of_address_to_resource(struct device_node *dev, const u32 *addrp,
-				u64 size, unsigned int flags,
-				struct resource *r)
-{
-	u64 taddr;
-
-	if ((flags & (IORESOURCE_IO | IORESOURCE_MEM)) == 0)
-		return -EINVAL;
-	taddr = of_translate_address(dev, addrp);
-	if (taddr == OF_BAD_ADDR)
-		return -EINVAL;
-	memset(r, 0, sizeof(struct resource));
-	if (flags & IORESOURCE_IO) {
-		unsigned long port;
-		port = -1; /* pci_address_to_pio(taddr); */
-		if (port == (unsigned long)-1)
-			return -EINVAL;
-		r->start = port;
-		r->end = port + size - 1;
-	} else {
-		r->start = taddr;
-		r->end = taddr + size - 1;
-	}
-	r->flags = flags;
-	r->name = dev->name;
-	return 0;
-}
-
-int of_address_to_resource(struct device_node *dev, int index,
-			struct resource *r)
-{
-	const u32	*addrp;
-	u64		size;
-	unsigned int	flags;
-
-	addrp = of_get_address(dev, index, &size, &flags);
-	if (addrp == NULL)
-		return -EINVAL;
-	return __of_address_to_resource(dev, addrp, size, flags, r);
-}
-EXPORT_SYMBOL_GPL(of_address_to_resource);
-
 void of_parse_dma_window(struct device_node *dn, const void *dma_window_prop,
 		unsigned long *busno, unsigned long *phys, unsigned long *size)
 {
diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h
index 76e1f313a58..51e9e6f90d1 100644
--- a/arch/powerpc/include/asm/pci-bridge.h
+++ b/arch/powerpc/include/asm/pci-bridge.h
@@ -303,13 +303,8 @@ extern void pcibios_free_controller(struct pci_controller *phb);
 extern void pcibios_setup_phb_resources(struct pci_controller *hose);
 
 #ifdef CONFIG_PCI
-extern unsigned long pci_address_to_pio(phys_addr_t address);
 extern int pcibios_vaddr_is_ioport(void __iomem *address);
 #else
-static inline unsigned long pci_address_to_pio(phys_addr_t address)
-{
-	return (unsigned long)-1;
-}
 static inline int pcibios_vaddr_is_ioport(void __iomem *address)
 {
 	return 0;
diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h
index 0abe379c6f3..ceace966c51 100644
--- a/arch/powerpc/include/asm/prom.h
+++ b/arch/powerpc/include/asm/prom.h
@@ -70,14 +70,6 @@ static inline const u32 *of_get_pci_address(struct device_node *dev,
 }
 #endif /* CONFIG_PCI */
 
-/* Get an address as a resource. Note that if your address is
- * a PIO address, the conversion will fail if the physical address
- * can't be internally converted to an IO token with
- * pci_address_to_pio(), that is because it's either called to early
- * or it can't be matched to any host bridge IO space
- */
-extern int of_address_to_resource(struct device_node *dev, int index,
-				  struct resource *r);
 #ifdef CONFIG_PCI
 extern int of_pci_address_to_resource(struct device_node *dev, int bar,
 				      struct resource *r);
@@ -89,6 +81,15 @@ static inline int of_pci_address_to_resource(struct device_node *dev, int bar,
 }
 #endif /* CONFIG_PCI */
 
+#ifdef CONFIG_PCI
+extern unsigned long pci_address_to_pio(phys_addr_t address);
+#else
+static inline unsigned long pci_address_to_pio(phys_addr_t address)
+{
+	return (unsigned long)-1;
+}
+#endif	/* CONFIG_PCI */
+
 /* Parse the ibm,dma-window property of an OF node into the busno, phys and
  * size parameters.
  */
diff --git a/arch/powerpc/kernel/prom_parse.c b/arch/powerpc/kernel/prom_parse.c
index 1d5d4f6dfef..1dac535de78 100644
--- a/arch/powerpc/kernel/prom_parse.c
+++ b/arch/powerpc/kernel/prom_parse.c
@@ -6,6 +6,7 @@
 #include <linux/module.h>
 #include <linux/ioport.h>
 #include <linux/etherdevice.h>
+#include <linux/of_address.h>
 #include <asm/prom.h>
 #include <asm/pci-bridge.h>
 
@@ -27,10 +28,6 @@
 			(ns) > 0)
 
 static struct of_bus *of_match_bus(struct device_node *np);
-static int __of_address_to_resource(struct device_node *dev,
-		const u32 *addrp, u64 size, unsigned int flags,
-		struct resource *r);
-
 
 /* Debug utility */
 #ifdef DEBUG
@@ -610,48 +607,6 @@ const u32 *of_get_address(struct device_node *dev, int index, u64 *size,
 }
 EXPORT_SYMBOL(of_get_address);
 
-static int __of_address_to_resource(struct device_node *dev, const u32 *addrp,
-				    u64 size, unsigned int flags,
-				    struct resource *r)
-{
-	u64 taddr;
-
-	if ((flags & (IORESOURCE_IO | IORESOURCE_MEM)) == 0)
-		return -EINVAL;
-	taddr = of_translate_address(dev, addrp);
-	if (taddr == OF_BAD_ADDR)
-		return -EINVAL;
-	memset(r, 0, sizeof(struct resource));
-	if (flags & IORESOURCE_IO) {
-		unsigned long port;
-		port = pci_address_to_pio(taddr);
-		if (port == (unsigned long)-1)
-			return -EINVAL;
-		r->start = port;
-		r->end = port + size - 1;
-	} else {
-		r->start = taddr;
-		r->end = taddr + size - 1;
-	}
-	r->flags = flags;
-	r->name = dev->name;
-	return 0;
-}
-
-int of_address_to_resource(struct device_node *dev, int index,
-			   struct resource *r)
-{
-	const u32	*addrp;
-	u64		size;
-	unsigned int	flags;
-
-	addrp = of_get_address(dev, index, &size, &flags);
-	if (addrp == NULL)
-		return -EINVAL;
-	return __of_address_to_resource(dev, addrp, size, flags, r);
-}
-EXPORT_SYMBOL_GPL(of_address_to_resource);
-
 void of_parse_dma_window(struct device_node *dn, const void *dma_window_prop,
 		unsigned long *busno, unsigned long *phys, unsigned long *size)
 {
diff --git a/drivers/of/address.c b/drivers/of/address.c
index 258528d6c4f..c3819550f90 100644
--- a/drivers/of/address.c
+++ b/drivers/of/address.c
@@ -3,6 +3,57 @@
 #include <linux/ioport.h>
 #include <linux/of_address.h>
 
+int __of_address_to_resource(struct device_node *dev, const u32 *addrp,
+			     u64 size, unsigned int flags,
+			     struct resource *r)
+{
+	u64 taddr;
+
+	if ((flags & (IORESOURCE_IO | IORESOURCE_MEM)) == 0)
+		return -EINVAL;
+	taddr = of_translate_address(dev, addrp);
+	if (taddr == OF_BAD_ADDR)
+		return -EINVAL;
+	memset(r, 0, sizeof(struct resource));
+	if (flags & IORESOURCE_IO) {
+		unsigned long port;
+		port = pci_address_to_pio(taddr);
+		if (port == (unsigned long)-1)
+			return -EINVAL;
+		r->start = port;
+		r->end = port + size - 1;
+	} else {
+		r->start = taddr;
+		r->end = taddr + size - 1;
+	}
+	r->flags = flags;
+	r->name = dev->name;
+	return 0;
+}
+
+/**
+ * of_address_to_resource - Translate device tree address and return as resource
+ *
+ * Note that if your address is a PIO address, the conversion will fail if
+ * the physical address can't be internally converted to an IO token with
+ * pci_address_to_pio(), that is because it's either called to early or it
+ * can't be matched to any host bridge IO space
+ */
+int of_address_to_resource(struct device_node *dev, int index,
+			   struct resource *r)
+{
+	const u32	*addrp;
+	u64		size;
+	unsigned int	flags;
+
+	addrp = of_get_address(dev, index, &size, &flags);
+	if (addrp == NULL)
+		return -EINVAL;
+	return __of_address_to_resource(dev, addrp, size, flags, r);
+}
+EXPORT_SYMBOL_GPL(of_address_to_resource);
+
+
 /**
  * of_iomap - Maps the memory mapped IO for a given device_node
  * @device:	the device whose io range will be mapped
diff --git a/include/linux/of_address.h b/include/linux/of_address.h
index 570831d7e79..474b794ed9d 100644
--- a/include/linux/of_address.h
+++ b/include/linux/of_address.h
@@ -3,6 +3,11 @@
 #include <linux/ioport.h>
 #include <linux/of.h>
 
+extern int __of_address_to_resource(struct device_node *dev, const u32 *addrp,
+				    u64 size, unsigned int flags,
+				    struct resource *r);
+extern int of_address_to_resource(struct device_node *dev, int index,
+				  struct resource *r);
 extern void __iomem *of_iomap(struct device_node *device, int index);
 
 #endif /* __OF_ADDRESS_H */
-- 
cgit v1.2.3-70-g09d2


From dbbdee94734bf6f1db7af42008a53655e77cab8f Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@secretlab.ca>
Date: Tue, 8 Jun 2010 07:48:10 -0600
Subject: of/address: Merge all of the bus translation code

Microblaze and PowerPC share a large chunk of code for translating
OF device tree data into usable addresses.  Differences between the two
consist of cosmetic differences, and the addition of dma-ranges support
code to powerpc but not microblaze.  This patch moves the powerpc
version into common code and applies many of the cosmetic (non-functional)
changes from the microblaze version.

Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
CC: Michal Simek <monstr@monstr.eu>
CC: Wolfram Sang <w.sang@pengutronix.de>
CC: Stephen Rothwell <sfr@canb.auug.org.au>
---
 arch/microblaze/include/asm/prom.h  |   4 -
 arch/microblaze/kernel/prom_parse.c | 489 ----------------------------------
 arch/powerpc/include/asm/prom.h     |   4 -
 arch/powerpc/kernel/prom_parse.c    | 515 -----------------------------------
 drivers/of/address.c                | 517 +++++++++++++++++++++++++++++++++++-
 include/linux/of_address.h          |   4 +-
 6 files changed, 515 insertions(+), 1018 deletions(-)

(limited to 'include')

diff --git a/arch/microblaze/include/asm/prom.h b/arch/microblaze/include/asm/prom.h
index 4e94c0706c5..cb9c3dd9a23 100644
--- a/arch/microblaze/include/asm/prom.h
+++ b/arch/microblaze/include/asm/prom.h
@@ -52,10 +52,6 @@ extern void pci_create_OF_bus_map(void);
  * OF address retreival & translation
  */
 
-/* Translate an OF address block into a CPU physical address
- */
-extern u64 of_translate_address(struct device_node *np, const u32 *addr);
-
 /* Extract an address from a device, returns the region size and
  * the address space flags too. The PCI version uses a BAR number
  * instead of an absolute index
diff --git a/arch/microblaze/kernel/prom_parse.c b/arch/microblaze/kernel/prom_parse.c
index 2f9cdd26ca1..d33ba17601f 100644
--- a/arch/microblaze/kernel/prom_parse.c
+++ b/arch/microblaze/kernel/prom_parse.c
@@ -10,213 +10,7 @@
 #include <asm/prom.h>
 #include <asm/pci-bridge.h>
 
-#define PRu64	"%llx"
-
-/* Max address size we deal with */
-#define OF_MAX_ADDR_CELLS	4
-#define OF_CHECK_COUNTS(na, ns)	((na) > 0 && (na) <= OF_MAX_ADDR_CELLS && \
-			(ns) > 0)
-
-static struct of_bus *of_match_bus(struct device_node *np);
-
-/* Debug utility */
-#ifdef DEBUG
-static void of_dump_addr(const char *s, const u32 *addr, int na)
-{
-	printk(KERN_INFO "%s", s);
-	while (na--)
-		printk(KERN_INFO " %08x", *(addr++));
-	printk(KERN_INFO "\n");
-}
-#else
-static void of_dump_addr(const char *s, const u32 *addr, int na) { }
-#endif
-
-/* Callbacks for bus specific translators */
-struct of_bus {
-	const char	*name;
-	const char	*addresses;
-	int		(*match)(struct device_node *parent);
-	void		(*count_cells)(struct device_node *child,
-					int *addrc, int *sizec);
-	u64		(*map)(u32 *addr, const u32 *range,
-				int na, int ns, int pna);
-	int		(*translate)(u32 *addr, u64 offset, int na);
-	unsigned int	(*get_flags)(const u32 *addr);
-};
-
-/*
- * Default translator (generic bus)
- */
-
-static void of_bus_default_count_cells(struct device_node *dev,
-					int *addrc, int *sizec)
-{
-	if (addrc)
-		*addrc = of_n_addr_cells(dev);
-	if (sizec)
-		*sizec = of_n_size_cells(dev);
-}
-
-static u64 of_bus_default_map(u32 *addr, const u32 *range,
-		int na, int ns, int pna)
-{
-	u64 cp, s, da;
-
-	cp = of_read_number(range, na);
-	s  = of_read_number(range + na + pna, ns);
-	da = of_read_number(addr, na);
-
-	pr_debug("OF: default map, cp="PRu64", s="PRu64", da="PRu64"\n",
-		cp, s, da);
-
-	if (da < cp || da >= (cp + s))
-		return OF_BAD_ADDR;
-	return da - cp;
-}
-
-static int of_bus_default_translate(u32 *addr, u64 offset, int na)
-{
-	u64 a = of_read_number(addr, na);
-	memset(addr, 0, na * 4);
-	a += offset;
-	if (na > 1)
-		addr[na - 2] = a >> 32;
-	addr[na - 1] = a & 0xffffffffu;
-
-	return 0;
-}
-
-static unsigned int of_bus_default_get_flags(const u32 *addr)
-{
-	return IORESOURCE_MEM;
-}
-
 #ifdef CONFIG_PCI
-/*
- * PCI bus specific translator
- */
-
-static int of_bus_pci_match(struct device_node *np)
-{
-	/* "vci" is for the /chaos bridge on 1st-gen PCI powermacs */
-	return !strcmp(np->type, "pci") || !strcmp(np->type, "vci");
-}
-
-static void of_bus_pci_count_cells(struct device_node *np,
-				int *addrc, int *sizec)
-{
-	if (addrc)
-		*addrc = 3;
-	if (sizec)
-		*sizec = 2;
-}
-
-static u64 of_bus_pci_map(u32 *addr, const u32 *range, int na, int ns, int pna)
-{
-	u64 cp, s, da;
-
-	/* Check address type match */
-	if ((addr[0] ^ range[0]) & 0x03000000)
-		return OF_BAD_ADDR;
-
-	/* Read address values, skipping high cell */
-	cp = of_read_number(range + 1, na - 1);
-	s  = of_read_number(range + na + pna, ns);
-	da = of_read_number(addr + 1, na - 1);
-
-	pr_debug("OF: PCI map, cp="PRu64", s="PRu64", da="PRu64"\n", cp, s, da);
-
-	if (da < cp || da >= (cp + s))
-		return OF_BAD_ADDR;
-	return da - cp;
-}
-
-static int of_bus_pci_translate(u32 *addr, u64 offset, int na)
-{
-	return of_bus_default_translate(addr + 1, offset, na - 1);
-}
-
-static unsigned int of_bus_pci_get_flags(const u32 *addr)
-{
-	unsigned int flags = 0;
-	u32 w = addr[0];
-
-	switch ((w >> 24) & 0x03) {
-	case 0x01:
-		flags |= IORESOURCE_IO;
-		break;
-	case 0x02: /* 32 bits */
-	case 0x03: /* 64 bits */
-		flags |= IORESOURCE_MEM;
-		break;
-	}
-	if (w & 0x40000000)
-		flags |= IORESOURCE_PREFETCH;
-	return flags;
-}
-
-const u32 *of_get_pci_address(struct device_node *dev, int bar_no, u64 *size,
-			unsigned int *flags)
-{
-	const u32 *prop;
-	unsigned int psize;
-	struct device_node *parent;
-	struct of_bus *bus;
-	int onesize, i, na, ns;
-
-	/* Get parent & match bus type */
-	parent = of_get_parent(dev);
-	if (parent == NULL)
-		return NULL;
-	bus = of_match_bus(parent);
-	if (strcmp(bus->name, "pci")) {
-		of_node_put(parent);
-		return NULL;
-	}
-	bus->count_cells(dev, &na, &ns);
-	of_node_put(parent);
-	if (!OF_CHECK_COUNTS(na, ns))
-		return NULL;
-
-	/* Get "reg" or "assigned-addresses" property */
-	prop = of_get_property(dev, bus->addresses, &psize);
-	if (prop == NULL)
-		return NULL;
-	psize /= 4;
-
-	onesize = na + ns;
-	for (i = 0; psize >= onesize; psize -= onesize, prop += onesize, i++)
-		if ((prop[0] & 0xff) == ((bar_no * 4) + PCI_BASE_ADDRESS_0)) {
-			if (size)
-				*size = of_read_number(prop + na, ns);
-			if (flags)
-				*flags = bus->get_flags(prop);
-			return prop;
-		}
-	return NULL;
-}
-EXPORT_SYMBOL(of_get_pci_address);
-
-int of_pci_address_to_resource(struct device_node *dev, int bar,
-				struct resource *r)
-{
-	const u32	*addrp;
-	u64		size;
-	unsigned int	flags;
-
-	addrp = of_get_pci_address(dev, bar, &size, &flags);
-	if (addrp == NULL)
-		return -EINVAL;
-	return __of_address_to_resource(dev, addrp, size, flags, r);
-}
-EXPORT_SYMBOL_GPL(of_pci_address_to_resource);
-
-static u8 of_irq_pci_swizzle(u8 slot, u8 pin)
-{
-	return (((pin - 1) + slot) % 4) + 1;
-}
-
 int of_irq_map_pci(struct pci_dev *pdev, struct of_irq *out_irq)
 {
 	struct device_node *dn, *ppnode;
@@ -291,289 +85,6 @@ int of_irq_map_pci(struct pci_dev *pdev, struct of_irq *out_irq)
 EXPORT_SYMBOL_GPL(of_irq_map_pci);
 #endif /* CONFIG_PCI */
 
-/*
- * ISA bus specific translator
- */
-
-static int of_bus_isa_match(struct device_node *np)
-{
-	return !strcmp(np->name, "isa");
-}
-
-static void of_bus_isa_count_cells(struct device_node *child,
-				int *addrc, int *sizec)
-{
-	if (addrc)
-		*addrc = 2;
-	if (sizec)
-		*sizec = 1;
-}
-
-static u64 of_bus_isa_map(u32 *addr, const u32 *range, int na, int ns, int pna)
-{
-	u64 cp, s, da;
-
-	/* Check address type match */
-	if ((addr[0] ^ range[0]) & 0x00000001)
-		return OF_BAD_ADDR;
-
-	/* Read address values, skipping high cell */
-	cp = of_read_number(range + 1, na - 1);
-	s  = of_read_number(range + na + pna, ns);
-	da = of_read_number(addr + 1, na - 1);
-
-	pr_debug("OF: ISA map, cp="PRu64", s="PRu64", da="PRu64"\n", cp, s, da);
-
-	if (da < cp || da >= (cp + s))
-		return OF_BAD_ADDR;
-	return da - cp;
-}
-
-static int of_bus_isa_translate(u32 *addr, u64 offset, int na)
-{
-	return of_bus_default_translate(addr + 1, offset, na - 1);
-}
-
-static unsigned int of_bus_isa_get_flags(const u32 *addr)
-{
-	unsigned int flags = 0;
-	u32 w = addr[0];
-
-	if (w & 1)
-		flags |= IORESOURCE_IO;
-	else
-		flags |= IORESOURCE_MEM;
-	return flags;
-}
-
-/*
- * Array of bus specific translators
- */
-
-static struct of_bus of_busses[] = {
-#ifdef CONFIG_PCI
-	/* PCI */
-	{
-		.name = "pci",
-		.addresses = "assigned-addresses",
-		.match = of_bus_pci_match,
-		.count_cells = of_bus_pci_count_cells,
-		.map = of_bus_pci_map,
-		.translate = of_bus_pci_translate,
-		.get_flags = of_bus_pci_get_flags,
-	},
-#endif /* CONFIG_PCI */
-	/* ISA */
-	{
-		.name = "isa",
-		.addresses = "reg",
-		.match = of_bus_isa_match,
-		.count_cells = of_bus_isa_count_cells,
-		.map = of_bus_isa_map,
-		.translate = of_bus_isa_translate,
-		.get_flags = of_bus_isa_get_flags,
-	},
-	/* Default */
-	{
-		.name = "default",
-		.addresses = "reg",
-		.match = NULL,
-		.count_cells = of_bus_default_count_cells,
-		.map = of_bus_default_map,
-		.translate = of_bus_default_translate,
-		.get_flags = of_bus_default_get_flags,
-	},
-};
-
-static struct of_bus *of_match_bus(struct device_node *np)
-{
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(of_busses); i++)
-		if (!of_busses[i].match || of_busses[i].match(np))
-			return &of_busses[i];
-	BUG();
-	return NULL;
-}
-
-static int of_translate_one(struct device_node *parent, struct of_bus *bus,
-			struct of_bus *pbus, u32 *addr,
-			int na, int ns, int pna)
-{
-	const u32 *ranges;
-	unsigned int rlen;
-	int rone;
-	u64 offset = OF_BAD_ADDR;
-
-	/* Normally, an absence of a "ranges" property means we are
-	 * crossing a non-translatable boundary, and thus the addresses
-	 * below the current not cannot be converted to CPU physical ones.
-	 * Unfortunately, while this is very clear in the spec, it's not
-	 * what Apple understood, and they do have things like /uni-n or
-	 * /ht nodes with no "ranges" property and a lot of perfectly
-	 * useable mapped devices below them. Thus we treat the absence of
-	 * "ranges" as equivalent to an empty "ranges" property which means
-	 * a 1:1 translation at that level. It's up to the caller not to try
-	 * to translate addresses that aren't supposed to be translated in
-	 * the first place. --BenH.
-	 */
-	ranges = of_get_property(parent, "ranges", (int *) &rlen);
-	if (ranges == NULL || rlen == 0) {
-		offset = of_read_number(addr, na);
-		memset(addr, 0, pna * 4);
-		pr_debug("OF: no ranges, 1:1 translation\n");
-		goto finish;
-	}
-
-	pr_debug("OF: walking ranges...\n");
-
-	/* Now walk through the ranges */
-	rlen /= 4;
-	rone = na + pna + ns;
-	for (; rlen >= rone; rlen -= rone, ranges += rone) {
-		offset = bus->map(addr, ranges, na, ns, pna);
-		if (offset != OF_BAD_ADDR)
-			break;
-	}
-	if (offset == OF_BAD_ADDR) {
-		pr_debug("OF: not found !\n");
-		return 1;
-	}
-	memcpy(addr, ranges + na, 4 * pna);
-
- finish:
-	of_dump_addr("OF: parent translation for:", addr, pna);
-	pr_debug("OF: with offset: "PRu64"\n", offset);
-
-	/* Translate it into parent bus space */
-	return pbus->translate(addr, offset, pna);
-}
-
-/*
- * Translate an address from the device-tree into a CPU physical address,
- * this walks up the tree and applies the various bus mappings on the
- * way.
- *
- * Note: We consider that crossing any level with #size-cells == 0 to mean
- * that translation is impossible (that is we are not dealing with a value
- * that can be mapped to a cpu physical address). This is not really specified
- * that way, but this is traditionally the way IBM at least do things
- */
-u64 of_translate_address(struct device_node *dev, const u32 *in_addr)
-{
-	struct device_node *parent = NULL;
-	struct of_bus *bus, *pbus;
-	u32 addr[OF_MAX_ADDR_CELLS];
-	int na, ns, pna, pns;
-	u64 result = OF_BAD_ADDR;
-
-	pr_debug("OF: ** translation for device %s **\n", dev->full_name);
-
-	/* Increase refcount at current level */
-	of_node_get(dev);
-
-	/* Get parent & match bus type */
-	parent = of_get_parent(dev);
-	if (parent == NULL)
-		goto bail;
-	bus = of_match_bus(parent);
-
-	/* Cound address cells & copy address locally */
-	bus->count_cells(dev, &na, &ns);
-	if (!OF_CHECK_COUNTS(na, ns)) {
-		printk(KERN_ERR "prom_parse: Bad cell count for %s\n",
-			dev->full_name);
-		goto bail;
-	}
-	memcpy(addr, in_addr, na * 4);
-
-	pr_debug("OF: bus is %s (na=%d, ns=%d) on %s\n",
-		bus->name, na, ns, parent->full_name);
-	of_dump_addr("OF: translating address:", addr, na);
-
-	/* Translate */
-	for (;;) {
-		/* Switch to parent bus */
-		of_node_put(dev);
-		dev = parent;
-		parent = of_get_parent(dev);
-
-		/* If root, we have finished */
-		if (parent == NULL) {
-			pr_debug("OF: reached root node\n");
-			result = of_read_number(addr, na);
-			break;
-		}
-
-		/* Get new parent bus and counts */
-		pbus = of_match_bus(parent);
-		pbus->count_cells(dev, &pna, &pns);
-		if (!OF_CHECK_COUNTS(pna, pns)) {
-			printk(KERN_ERR "prom_parse: Bad cell count for %s\n",
-				dev->full_name);
-			break;
-		}
-
-		pr_debug("OF: parent bus is %s (na=%d, ns=%d) on %s\n",
-			pbus->name, pna, pns, parent->full_name);
-
-		/* Apply bus translation */
-		if (of_translate_one(dev, bus, pbus, addr, na, ns, pna))
-			break;
-
-		/* Complete the move up one level */
-		na = pna;
-		ns = pns;
-		bus = pbus;
-
-		of_dump_addr("OF: one level translation:", addr, na);
-	}
- bail:
-	of_node_put(parent);
-	of_node_put(dev);
-
-	return result;
-}
-EXPORT_SYMBOL(of_translate_address);
-
-const u32 *of_get_address(struct device_node *dev, int index, u64 *size,
-			unsigned int *flags)
-{
-	const u32 *prop;
-	unsigned int psize;
-	struct device_node *parent;
-	struct of_bus *bus;
-	int onesize, i, na, ns;
-
-	/* Get parent & match bus type */
-	parent = of_get_parent(dev);
-	if (parent == NULL)
-		return NULL;
-	bus = of_match_bus(parent);
-	bus->count_cells(dev, &na, &ns);
-	of_node_put(parent);
-	if (!OF_CHECK_COUNTS(na, ns))
-		return NULL;
-
-	/* Get "reg" or "assigned-addresses" property */
-	prop = of_get_property(dev, bus->addresses, (int *) &psize);
-	if (prop == NULL)
-		return NULL;
-	psize /= 4;
-
-	onesize = na + ns;
-	for (i = 0; psize >= onesize; psize -= onesize, prop += onesize, i++)
-		if (i == index) {
-			if (size)
-				*size = of_read_number(prop + na, ns);
-			if (flags)
-				*flags = bus->get_flags(prop);
-			return prop;
-		}
-	return NULL;
-}
-EXPORT_SYMBOL(of_get_address);
-
 void of_parse_dma_window(struct device_node *dn, const void *dma_window_prop,
 		unsigned long *busno, unsigned long *phys, unsigned long *size)
 {
diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h
index ceace966c51..f864722679e 100644
--- a/arch/powerpc/include/asm/prom.h
+++ b/arch/powerpc/include/asm/prom.h
@@ -45,10 +45,6 @@ extern void pci_create_OF_bus_map(void);
  * OF address retreival & translation
  */
 
-/* Translate an OF address block into a CPU physical address
- */
-extern u64 of_translate_address(struct device_node *np, const u32 *addr);
-
 /* Translate a DMA address from device space to CPU space */
 extern u64 of_translate_dma_address(struct device_node *dev,
 				    const u32 *in_addr);
diff --git a/arch/powerpc/kernel/prom_parse.c b/arch/powerpc/kernel/prom_parse.c
index 1dac535de78..88334af038e 100644
--- a/arch/powerpc/kernel/prom_parse.c
+++ b/arch/powerpc/kernel/prom_parse.c
@@ -10,225 +10,7 @@
 #include <asm/prom.h>
 #include <asm/pci-bridge.h>
 
-#ifdef DEBUG
-#define DBG(fmt...) do { printk(fmt); } while(0)
-#else
-#define DBG(fmt...) do { } while(0)
-#endif
-
-#ifdef CONFIG_PPC64
-#define PRu64	"%lx"
-#else
-#define PRu64	"%llx"
-#endif
-
-/* Max address size we deal with */
-#define OF_MAX_ADDR_CELLS	4
-#define OF_CHECK_COUNTS(na, ns)	((na) > 0 && (na) <= OF_MAX_ADDR_CELLS && \
-			(ns) > 0)
-
-static struct of_bus *of_match_bus(struct device_node *np);
-
-/* Debug utility */
-#ifdef DEBUG
-static void of_dump_addr(const char *s, const u32 *addr, int na)
-{
-	printk("%s", s);
-	while(na--)
-		printk(" %08x", *(addr++));
-	printk("\n");
-}
-#else
-static void of_dump_addr(const char *s, const u32 *addr, int na) { }
-#endif
-
-
-/* Callbacks for bus specific translators */
-struct of_bus {
-	const char	*name;
-	const char	*addresses;
-	int		(*match)(struct device_node *parent);
-	void		(*count_cells)(struct device_node *child,
-				       int *addrc, int *sizec);
-	u64		(*map)(u32 *addr, const u32 *range,
-				int na, int ns, int pna);
-	int		(*translate)(u32 *addr, u64 offset, int na);
-	unsigned int	(*get_flags)(const u32 *addr);
-};
-
-
-/*
- * Default translator (generic bus)
- */
-
-static void of_bus_default_count_cells(struct device_node *dev,
-				       int *addrc, int *sizec)
-{
-	if (addrc)
-		*addrc = of_n_addr_cells(dev);
-	if (sizec)
-		*sizec = of_n_size_cells(dev);
-}
-
-static u64 of_bus_default_map(u32 *addr, const u32 *range,
-		int na, int ns, int pna)
-{
-	u64 cp, s, da;
-
-	cp = of_read_number(range, na);
-	s  = of_read_number(range + na + pna, ns);
-	da = of_read_number(addr, na);
-
-	DBG("OF: default map, cp="PRu64", s="PRu64", da="PRu64"\n",
-	    cp, s, da);
-
-	if (da < cp || da >= (cp + s))
-		return OF_BAD_ADDR;
-	return da - cp;
-}
-
-static int of_bus_default_translate(u32 *addr, u64 offset, int na)
-{
-	u64 a = of_read_number(addr, na);
-	memset(addr, 0, na * 4);
-	a += offset;
-	if (na > 1)
-		addr[na - 2] = a >> 32;
-	addr[na - 1] = a & 0xffffffffu;
-
-	return 0;
-}
-
-static unsigned int of_bus_default_get_flags(const u32 *addr)
-{
-	return IORESOURCE_MEM;
-}
-
-
 #ifdef CONFIG_PCI
-/*
- * PCI bus specific translator
- */
-
-static int of_bus_pci_match(struct device_node *np)
-{
-	/* "vci" is for the /chaos bridge on 1st-gen PCI powermacs */
-	return !strcmp(np->type, "pci") || !strcmp(np->type, "vci");
-}
-
-static void of_bus_pci_count_cells(struct device_node *np,
-				   int *addrc, int *sizec)
-{
-	if (addrc)
-		*addrc = 3;
-	if (sizec)
-		*sizec = 2;
-}
-
-static unsigned int of_bus_pci_get_flags(const u32 *addr)
-{
-	unsigned int flags = 0;
-	u32 w = addr[0];
-
-	switch((w >> 24) & 0x03) {
-	case 0x01:
-		flags |= IORESOURCE_IO;
-		break;
-	case 0x02: /* 32 bits */
-	case 0x03: /* 64 bits */
-		flags |= IORESOURCE_MEM;
-		break;
-	}
-	if (w & 0x40000000)
-		flags |= IORESOURCE_PREFETCH;
-	return flags;
-}
-
-static u64 of_bus_pci_map(u32 *addr, const u32 *range, int na, int ns, int pna)
-{
-	u64 cp, s, da;
-	unsigned int af, rf;
-
-	af = of_bus_pci_get_flags(addr);
-	rf = of_bus_pci_get_flags(range);
-
-	/* Check address type match */
-	if ((af ^ rf) & (IORESOURCE_MEM | IORESOURCE_IO))
-		return OF_BAD_ADDR;
-
-	/* Read address values, skipping high cell */
-	cp = of_read_number(range + 1, na - 1);
-	s  = of_read_number(range + na + pna, ns);
-	da = of_read_number(addr + 1, na - 1);
-
-	DBG("OF: PCI map, cp="PRu64", s="PRu64", da="PRu64"\n", cp, s, da);
-
-	if (da < cp || da >= (cp + s))
-		return OF_BAD_ADDR;
-	return da - cp;
-}
-
-static int of_bus_pci_translate(u32 *addr, u64 offset, int na)
-{
-	return of_bus_default_translate(addr + 1, offset, na - 1);
-}
-
-const u32 *of_get_pci_address(struct device_node *dev, int bar_no, u64 *size,
-			unsigned int *flags)
-{
-	const u32 *prop;
-	unsigned int psize;
-	struct device_node *parent;
-	struct of_bus *bus;
-	int onesize, i, na, ns;
-
-	/* Get parent & match bus type */
-	parent = of_get_parent(dev);
-	if (parent == NULL)
-		return NULL;
-	bus = of_match_bus(parent);
-	if (strcmp(bus->name, "pci")) {
-		of_node_put(parent);
-		return NULL;
-	}
-	bus->count_cells(dev, &na, &ns);
-	of_node_put(parent);
-	if (!OF_CHECK_COUNTS(na, ns))
-		return NULL;
-
-	/* Get "reg" or "assigned-addresses" property */
-	prop = of_get_property(dev, bus->addresses, &psize);
-	if (prop == NULL)
-		return NULL;
-	psize /= 4;
-
-	onesize = na + ns;
-	for (i = 0; psize >= onesize; psize -= onesize, prop += onesize, i++)
-		if ((prop[0] & 0xff) == ((bar_no * 4) + PCI_BASE_ADDRESS_0)) {
-			if (size)
-				*size = of_read_number(prop + na, ns);
-			if (flags)
-				*flags = bus->get_flags(prop);
-			return prop;
-		}
-	return NULL;
-}
-EXPORT_SYMBOL(of_get_pci_address);
-
-int of_pci_address_to_resource(struct device_node *dev, int bar,
-			       struct resource *r)
-{
-	const u32	*addrp;
-	u64		size;
-	unsigned int	flags;
-
-	addrp = of_get_pci_address(dev, bar, &size, &flags);
-	if (addrp == NULL)
-		return -EINVAL;
-	return __of_address_to_resource(dev, addrp, size, flags, r);
-}
-EXPORT_SYMBOL_GPL(of_pci_address_to_resource);
-
 int of_irq_map_pci(struct pci_dev *pdev, struct of_irq *out_irq)
 {
 	struct device_node *dn, *ppnode;
@@ -310,303 +92,6 @@ int of_irq_map_pci(struct pci_dev *pdev, struct of_irq *out_irq)
 EXPORT_SYMBOL_GPL(of_irq_map_pci);
 #endif /* CONFIG_PCI */
 
-/*
- * ISA bus specific translator
- */
-
-static int of_bus_isa_match(struct device_node *np)
-{
-	return !strcmp(np->name, "isa");
-}
-
-static void of_bus_isa_count_cells(struct device_node *child,
-				   int *addrc, int *sizec)
-{
-	if (addrc)
-		*addrc = 2;
-	if (sizec)
-		*sizec = 1;
-}
-
-static u64 of_bus_isa_map(u32 *addr, const u32 *range, int na, int ns, int pna)
-{
-	u64 cp, s, da;
-
-	/* Check address type match */
-	if ((addr[0] ^ range[0]) & 0x00000001)
-		return OF_BAD_ADDR;
-
-	/* Read address values, skipping high cell */
-	cp = of_read_number(range + 1, na - 1);
-	s  = of_read_number(range + na + pna, ns);
-	da = of_read_number(addr + 1, na - 1);
-
-	DBG("OF: ISA map, cp="PRu64", s="PRu64", da="PRu64"\n", cp, s, da);
-
-	if (da < cp || da >= (cp + s))
-		return OF_BAD_ADDR;
-	return da - cp;
-}
-
-static int of_bus_isa_translate(u32 *addr, u64 offset, int na)
-{
-	return of_bus_default_translate(addr + 1, offset, na - 1);
-}
-
-static unsigned int of_bus_isa_get_flags(const u32 *addr)
-{
-	unsigned int flags = 0;
-	u32 w = addr[0];
-
-	if (w & 1)
-		flags |= IORESOURCE_IO;
-	else
-		flags |= IORESOURCE_MEM;
-	return flags;
-}
-
-
-/*
- * Array of bus specific translators
- */
-
-static struct of_bus of_busses[] = {
-#ifdef CONFIG_PCI
-	/* PCI */
-	{
-		.name = "pci",
-		.addresses = "assigned-addresses",
-		.match = of_bus_pci_match,
-		.count_cells = of_bus_pci_count_cells,
-		.map = of_bus_pci_map,
-		.translate = of_bus_pci_translate,
-		.get_flags = of_bus_pci_get_flags,
-	},
-#endif /* CONFIG_PCI */
-	/* ISA */
-	{
-		.name = "isa",
-		.addresses = "reg",
-		.match = of_bus_isa_match,
-		.count_cells = of_bus_isa_count_cells,
-		.map = of_bus_isa_map,
-		.translate = of_bus_isa_translate,
-		.get_flags = of_bus_isa_get_flags,
-	},
-	/* Default */
-	{
-		.name = "default",
-		.addresses = "reg",
-		.match = NULL,
-		.count_cells = of_bus_default_count_cells,
-		.map = of_bus_default_map,
-		.translate = of_bus_default_translate,
-		.get_flags = of_bus_default_get_flags,
-	},
-};
-
-static struct of_bus *of_match_bus(struct device_node *np)
-{
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(of_busses); i ++)
-		if (!of_busses[i].match || of_busses[i].match(np))
-			return &of_busses[i];
-	BUG();
-	return NULL;
-}
-
-static int of_translate_one(struct device_node *parent, struct of_bus *bus,
-			    struct of_bus *pbus, u32 *addr,
-			    int na, int ns, int pna, const char *rprop)
-{
-	const u32 *ranges;
-	unsigned int rlen;
-	int rone;
-	u64 offset = OF_BAD_ADDR;
-
-	/* Normally, an absence of a "ranges" property means we are
-	 * crossing a non-translatable boundary, and thus the addresses
-	 * below the current not cannot be converted to CPU physical ones.
-	 * Unfortunately, while this is very clear in the spec, it's not
-	 * what Apple understood, and they do have things like /uni-n or
-	 * /ht nodes with no "ranges" property and a lot of perfectly
-	 * useable mapped devices below them. Thus we treat the absence of
-	 * "ranges" as equivalent to an empty "ranges" property which means
-	 * a 1:1 translation at that level. It's up to the caller not to try
-	 * to translate addresses that aren't supposed to be translated in
-	 * the first place. --BenH.
-	 */
-	ranges = of_get_property(parent, rprop, &rlen);
-	if (ranges == NULL || rlen == 0) {
-		offset = of_read_number(addr, na);
-		memset(addr, 0, pna * 4);
-		DBG("OF: no ranges, 1:1 translation\n");
-		goto finish;
-	}
-
-	DBG("OF: walking ranges...\n");
-
-	/* Now walk through the ranges */
-	rlen /= 4;
-	rone = na + pna + ns;
-	for (; rlen >= rone; rlen -= rone, ranges += rone) {
-		offset = bus->map(addr, ranges, na, ns, pna);
-		if (offset != OF_BAD_ADDR)
-			break;
-	}
-	if (offset == OF_BAD_ADDR) {
-		DBG("OF: not found !\n");
-		return 1;
-	}
-	memcpy(addr, ranges + na, 4 * pna);
-
- finish:
-	of_dump_addr("OF: parent translation for:", addr, pna);
-	DBG("OF: with offset: "PRu64"\n", offset);
-
-	/* Translate it into parent bus space */
-	return pbus->translate(addr, offset, pna);
-}
-
-
-/*
- * Translate an address from the device-tree into a CPU physical address,
- * this walks up the tree and applies the various bus mappings on the
- * way.
- *
- * Note: We consider that crossing any level with #size-cells == 0 to mean
- * that translation is impossible (that is we are not dealing with a value
- * that can be mapped to a cpu physical address). This is not really specified
- * that way, but this is traditionally the way IBM at least do things
- */
-u64 __of_translate_address(struct device_node *dev, const u32 *in_addr,
-			   const char *rprop)
-{
-	struct device_node *parent = NULL;
-	struct of_bus *bus, *pbus;
-	u32 addr[OF_MAX_ADDR_CELLS];
-	int na, ns, pna, pns;
-	u64 result = OF_BAD_ADDR;
-
-	DBG("OF: ** translation for device %s **\n", dev->full_name);
-
-	/* Increase refcount at current level */
-	of_node_get(dev);
-
-	/* Get parent & match bus type */
-	parent = of_get_parent(dev);
-	if (parent == NULL)
-		goto bail;
-	bus = of_match_bus(parent);
-
-	/* Cound address cells & copy address locally */
-	bus->count_cells(dev, &na, &ns);
-	if (!OF_CHECK_COUNTS(na, ns)) {
-		printk(KERN_ERR "prom_parse: Bad cell count for %s\n",
-		       dev->full_name);
-		goto bail;
-	}
-	memcpy(addr, in_addr, na * 4);
-
-	DBG("OF: bus is %s (na=%d, ns=%d) on %s\n",
-	    bus->name, na, ns, parent->full_name);
-	of_dump_addr("OF: translating address:", addr, na);
-
-	/* Translate */
-	for (;;) {
-		/* Switch to parent bus */
-		of_node_put(dev);
-		dev = parent;
-		parent = of_get_parent(dev);
-
-		/* If root, we have finished */
-		if (parent == NULL) {
-			DBG("OF: reached root node\n");
-			result = of_read_number(addr, na);
-			break;
-		}
-
-		/* Get new parent bus and counts */
-		pbus = of_match_bus(parent);
-		pbus->count_cells(dev, &pna, &pns);
-		if (!OF_CHECK_COUNTS(pna, pns)) {
-			printk(KERN_ERR "prom_parse: Bad cell count for %s\n",
-			       dev->full_name);
-			break;
-		}
-
-		DBG("OF: parent bus is %s (na=%d, ns=%d) on %s\n",
-		    pbus->name, pna, pns, parent->full_name);
-
-		/* Apply bus translation */
-		if (of_translate_one(dev, bus, pbus, addr, na, ns, pna, rprop))
-			break;
-
-		/* Complete the move up one level */
-		na = pna;
-		ns = pns;
-		bus = pbus;
-
-		of_dump_addr("OF: one level translation:", addr, na);
-	}
- bail:
-	of_node_put(parent);
-	of_node_put(dev);
-
-	return result;
-}
-
-u64 of_translate_address(struct device_node *dev, const u32 *in_addr)
-{
-	return __of_translate_address(dev, in_addr, "ranges");
-}
-EXPORT_SYMBOL(of_translate_address);
-
-u64 of_translate_dma_address(struct device_node *dev, const u32 *in_addr)
-{
-	return __of_translate_address(dev, in_addr, "dma-ranges");
-}
-EXPORT_SYMBOL(of_translate_dma_address);
-
-const u32 *of_get_address(struct device_node *dev, int index, u64 *size,
-		    unsigned int *flags)
-{
-	const u32 *prop;
-	unsigned int psize;
-	struct device_node *parent;
-	struct of_bus *bus;
-	int onesize, i, na, ns;
-
-	/* Get parent & match bus type */
-	parent = of_get_parent(dev);
-	if (parent == NULL)
-		return NULL;
-	bus = of_match_bus(parent);
-	bus->count_cells(dev, &na, &ns);
-	of_node_put(parent);
-	if (!OF_CHECK_COUNTS(na, ns))
-		return NULL;
-
-	/* Get "reg" or "assigned-addresses" property */
-	prop = of_get_property(dev, bus->addresses, &psize);
-	if (prop == NULL)
-		return NULL;
-	psize /= 4;
-
-	onesize = na + ns;
-	for (i = 0; psize >= onesize; psize -= onesize, prop += onesize, i++)
-		if (i == index) {
-			if (size)
-				*size = of_read_number(prop + na, ns);
-			if (flags)
-				*flags = bus->get_flags(prop);
-			return prop;
-		}
-	return NULL;
-}
-EXPORT_SYMBOL(of_get_address);
-
 void of_parse_dma_window(struct device_node *dn, const void *dma_window_prop,
 		unsigned long *busno, unsigned long *phys, unsigned long *size)
 {
diff --git a/drivers/of/address.c b/drivers/of/address.c
index c3819550f90..2a905d560c1 100644
--- a/drivers/of/address.c
+++ b/drivers/of/address.c
@@ -1,11 +1,522 @@
 
 #include <linux/io.h>
 #include <linux/ioport.h>
+#include <linux/module.h>
 #include <linux/of_address.h>
+#include <linux/pci_regs.h>
+#include <linux/string.h>
 
-int __of_address_to_resource(struct device_node *dev, const u32 *addrp,
-			     u64 size, unsigned int flags,
-			     struct resource *r)
+/* Max address size we deal with */
+#define OF_MAX_ADDR_CELLS	4
+#define OF_CHECK_COUNTS(na, ns)	((na) > 0 && (na) <= OF_MAX_ADDR_CELLS && \
+			(ns) > 0)
+
+static struct of_bus *of_match_bus(struct device_node *np);
+static int __of_address_to_resource(struct device_node *dev, const u32 *addrp,
+				    u64 size, unsigned int flags,
+				    struct resource *r);
+
+/* Debug utility */
+#ifdef DEBUG
+static void of_dump_addr(const char *s, const u32 *addr, int na)
+{
+	printk(KERN_DEBUG "%s", s);
+	while (na--)
+		printk(" %08x", *(addr++));
+	printk("\n");
+}
+#else
+static void of_dump_addr(const char *s, const u32 *addr, int na) { }
+#endif
+
+/* Callbacks for bus specific translators */
+struct of_bus {
+	const char	*name;
+	const char	*addresses;
+	int		(*match)(struct device_node *parent);
+	void		(*count_cells)(struct device_node *child,
+				       int *addrc, int *sizec);
+	u64		(*map)(u32 *addr, const u32 *range,
+				int na, int ns, int pna);
+	int		(*translate)(u32 *addr, u64 offset, int na);
+	unsigned int	(*get_flags)(const u32 *addr);
+};
+
+/*
+ * Default translator (generic bus)
+ */
+
+static void of_bus_default_count_cells(struct device_node *dev,
+				       int *addrc, int *sizec)
+{
+	if (addrc)
+		*addrc = of_n_addr_cells(dev);
+	if (sizec)
+		*sizec = of_n_size_cells(dev);
+}
+
+static u64 of_bus_default_map(u32 *addr, const u32 *range,
+		int na, int ns, int pna)
+{
+	u64 cp, s, da;
+
+	cp = of_read_number(range, na);
+	s  = of_read_number(range + na + pna, ns);
+	da = of_read_number(addr, na);
+
+	pr_debug("OF: default map, cp=%llx, s=%llx, da=%llx\n",
+		 (unsigned long long)cp, (unsigned long long)s,
+		 (unsigned long long)da);
+
+	if (da < cp || da >= (cp + s))
+		return OF_BAD_ADDR;
+	return da - cp;
+}
+
+static int of_bus_default_translate(u32 *addr, u64 offset, int na)
+{
+	u64 a = of_read_number(addr, na);
+	memset(addr, 0, na * 4);
+	a += offset;
+	if (na > 1)
+		addr[na - 2] = a >> 32;
+	addr[na - 1] = a & 0xffffffffu;
+
+	return 0;
+}
+
+static unsigned int of_bus_default_get_flags(const u32 *addr)
+{
+	return IORESOURCE_MEM;
+}
+
+#ifdef CONFIG_PCI
+/*
+ * PCI bus specific translator
+ */
+
+static int of_bus_pci_match(struct device_node *np)
+{
+	/* "vci" is for the /chaos bridge on 1st-gen PCI powermacs */
+	return !strcmp(np->type, "pci") || !strcmp(np->type, "vci");
+}
+
+static void of_bus_pci_count_cells(struct device_node *np,
+				   int *addrc, int *sizec)
+{
+	if (addrc)
+		*addrc = 3;
+	if (sizec)
+		*sizec = 2;
+}
+
+static unsigned int of_bus_pci_get_flags(const u32 *addr)
+{
+	unsigned int flags = 0;
+	u32 w = addr[0];
+
+	switch((w >> 24) & 0x03) {
+	case 0x01:
+		flags |= IORESOURCE_IO;
+		break;
+	case 0x02: /* 32 bits */
+	case 0x03: /* 64 bits */
+		flags |= IORESOURCE_MEM;
+		break;
+	}
+	if (w & 0x40000000)
+		flags |= IORESOURCE_PREFETCH;
+	return flags;
+}
+
+static u64 of_bus_pci_map(u32 *addr, const u32 *range, int na, int ns, int pna)
+{
+	u64 cp, s, da;
+	unsigned int af, rf;
+
+	af = of_bus_pci_get_flags(addr);
+	rf = of_bus_pci_get_flags(range);
+
+	/* Check address type match */
+	if ((af ^ rf) & (IORESOURCE_MEM | IORESOURCE_IO))
+		return OF_BAD_ADDR;
+
+	/* Read address values, skipping high cell */
+	cp = of_read_number(range + 1, na - 1);
+	s  = of_read_number(range + na + pna, ns);
+	da = of_read_number(addr + 1, na - 1);
+
+	pr_debug("OF: PCI map, cp=%llx, s=%llx, da=%llx\n",
+		 (unsigned long long)cp, (unsigned long long)s,
+		 (unsigned long long)da);
+
+	if (da < cp || da >= (cp + s))
+		return OF_BAD_ADDR;
+	return da - cp;
+}
+
+static int of_bus_pci_translate(u32 *addr, u64 offset, int na)
+{
+	return of_bus_default_translate(addr + 1, offset, na - 1);
+}
+
+const u32 *of_get_pci_address(struct device_node *dev, int bar_no, u64 *size,
+			unsigned int *flags)
+{
+	const u32 *prop;
+	unsigned int psize;
+	struct device_node *parent;
+	struct of_bus *bus;
+	int onesize, i, na, ns;
+
+	/* Get parent & match bus type */
+	parent = of_get_parent(dev);
+	if (parent == NULL)
+		return NULL;
+	bus = of_match_bus(parent);
+	if (strcmp(bus->name, "pci")) {
+		of_node_put(parent);
+		return NULL;
+	}
+	bus->count_cells(dev, &na, &ns);
+	of_node_put(parent);
+	if (!OF_CHECK_COUNTS(na, ns))
+		return NULL;
+
+	/* Get "reg" or "assigned-addresses" property */
+	prop = of_get_property(dev, bus->addresses, &psize);
+	if (prop == NULL)
+		return NULL;
+	psize /= 4;
+
+	onesize = na + ns;
+	for (i = 0; psize >= onesize; psize -= onesize, prop += onesize, i++)
+		if ((prop[0] & 0xff) == ((bar_no * 4) + PCI_BASE_ADDRESS_0)) {
+			if (size)
+				*size = of_read_number(prop + na, ns);
+			if (flags)
+				*flags = bus->get_flags(prop);
+			return prop;
+		}
+	return NULL;
+}
+EXPORT_SYMBOL(of_get_pci_address);
+
+int of_pci_address_to_resource(struct device_node *dev, int bar,
+			       struct resource *r)
+{
+	const u32	*addrp;
+	u64		size;
+	unsigned int	flags;
+
+	addrp = of_get_pci_address(dev, bar, &size, &flags);
+	if (addrp == NULL)
+		return -EINVAL;
+	return __of_address_to_resource(dev, addrp, size, flags, r);
+}
+EXPORT_SYMBOL_GPL(of_pci_address_to_resource);
+#endif /* CONFIG_PCI */
+
+/*
+ * ISA bus specific translator
+ */
+
+static int of_bus_isa_match(struct device_node *np)
+{
+	return !strcmp(np->name, "isa");
+}
+
+static void of_bus_isa_count_cells(struct device_node *child,
+				   int *addrc, int *sizec)
+{
+	if (addrc)
+		*addrc = 2;
+	if (sizec)
+		*sizec = 1;
+}
+
+static u64 of_bus_isa_map(u32 *addr, const u32 *range, int na, int ns, int pna)
+{
+	u64 cp, s, da;
+
+	/* Check address type match */
+	if ((addr[0] ^ range[0]) & 0x00000001)
+		return OF_BAD_ADDR;
+
+	/* Read address values, skipping high cell */
+	cp = of_read_number(range + 1, na - 1);
+	s  = of_read_number(range + na + pna, ns);
+	da = of_read_number(addr + 1, na - 1);
+
+	pr_debug("OF: ISA map, cp=%llx, s=%llx, da=%llx\n",
+		 (unsigned long long)cp, (unsigned long long)s,
+		 (unsigned long long)da);
+
+	if (da < cp || da >= (cp + s))
+		return OF_BAD_ADDR;
+	return da - cp;
+}
+
+static int of_bus_isa_translate(u32 *addr, u64 offset, int na)
+{
+	return of_bus_default_translate(addr + 1, offset, na - 1);
+}
+
+static unsigned int of_bus_isa_get_flags(const u32 *addr)
+{
+	unsigned int flags = 0;
+	u32 w = addr[0];
+
+	if (w & 1)
+		flags |= IORESOURCE_IO;
+	else
+		flags |= IORESOURCE_MEM;
+	return flags;
+}
+
+/*
+ * Array of bus specific translators
+ */
+
+static struct of_bus of_busses[] = {
+#ifdef CONFIG_PCI
+	/* PCI */
+	{
+		.name = "pci",
+		.addresses = "assigned-addresses",
+		.match = of_bus_pci_match,
+		.count_cells = of_bus_pci_count_cells,
+		.map = of_bus_pci_map,
+		.translate = of_bus_pci_translate,
+		.get_flags = of_bus_pci_get_flags,
+	},
+#endif /* CONFIG_PCI */
+	/* ISA */
+	{
+		.name = "isa",
+		.addresses = "reg",
+		.match = of_bus_isa_match,
+		.count_cells = of_bus_isa_count_cells,
+		.map = of_bus_isa_map,
+		.translate = of_bus_isa_translate,
+		.get_flags = of_bus_isa_get_flags,
+	},
+	/* Default */
+	{
+		.name = "default",
+		.addresses = "reg",
+		.match = NULL,
+		.count_cells = of_bus_default_count_cells,
+		.map = of_bus_default_map,
+		.translate = of_bus_default_translate,
+		.get_flags = of_bus_default_get_flags,
+	},
+};
+
+static struct of_bus *of_match_bus(struct device_node *np)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(of_busses); i++)
+		if (!of_busses[i].match || of_busses[i].match(np))
+			return &of_busses[i];
+	BUG();
+	return NULL;
+}
+
+static int of_translate_one(struct device_node *parent, struct of_bus *bus,
+			    struct of_bus *pbus, u32 *addr,
+			    int na, int ns, int pna, const char *rprop)
+{
+	const u32 *ranges;
+	unsigned int rlen;
+	int rone;
+	u64 offset = OF_BAD_ADDR;
+
+	/* Normally, an absence of a "ranges" property means we are
+	 * crossing a non-translatable boundary, and thus the addresses
+	 * below the current not cannot be converted to CPU physical ones.
+	 * Unfortunately, while this is very clear in the spec, it's not
+	 * what Apple understood, and they do have things like /uni-n or
+	 * /ht nodes with no "ranges" property and a lot of perfectly
+	 * useable mapped devices below them. Thus we treat the absence of
+	 * "ranges" as equivalent to an empty "ranges" property which means
+	 * a 1:1 translation at that level. It's up to the caller not to try
+	 * to translate addresses that aren't supposed to be translated in
+	 * the first place. --BenH.
+	 */
+	ranges = of_get_property(parent, rprop, &rlen);
+	if (ranges == NULL || rlen == 0) {
+		offset = of_read_number(addr, na);
+		memset(addr, 0, pna * 4);
+		pr_debug("OF: no ranges, 1:1 translation\n");
+		goto finish;
+	}
+
+	pr_debug("OF: walking ranges...\n");
+
+	/* Now walk through the ranges */
+	rlen /= 4;
+	rone = na + pna + ns;
+	for (; rlen >= rone; rlen -= rone, ranges += rone) {
+		offset = bus->map(addr, ranges, na, ns, pna);
+		if (offset != OF_BAD_ADDR)
+			break;
+	}
+	if (offset == OF_BAD_ADDR) {
+		pr_debug("OF: not found !\n");
+		return 1;
+	}
+	memcpy(addr, ranges + na, 4 * pna);
+
+ finish:
+	of_dump_addr("OF: parent translation for:", addr, pna);
+	pr_debug("OF: with offset: %llx\n", (unsigned long long)offset);
+
+	/* Translate it into parent bus space */
+	return pbus->translate(addr, offset, pna);
+}
+
+/*
+ * Translate an address from the device-tree into a CPU physical address,
+ * this walks up the tree and applies the various bus mappings on the
+ * way.
+ *
+ * Note: We consider that crossing any level with #size-cells == 0 to mean
+ * that translation is impossible (that is we are not dealing with a value
+ * that can be mapped to a cpu physical address). This is not really specified
+ * that way, but this is traditionally the way IBM at least do things
+ */
+u64 __of_translate_address(struct device_node *dev, const u32 *in_addr,
+			   const char *rprop)
+{
+	struct device_node *parent = NULL;
+	struct of_bus *bus, *pbus;
+	u32 addr[OF_MAX_ADDR_CELLS];
+	int na, ns, pna, pns;
+	u64 result = OF_BAD_ADDR;
+
+	pr_debug("OF: ** translation for device %s **\n", dev->full_name);
+
+	/* Increase refcount at current level */
+	of_node_get(dev);
+
+	/* Get parent & match bus type */
+	parent = of_get_parent(dev);
+	if (parent == NULL)
+		goto bail;
+	bus = of_match_bus(parent);
+
+	/* Cound address cells & copy address locally */
+	bus->count_cells(dev, &na, &ns);
+	if (!OF_CHECK_COUNTS(na, ns)) {
+		printk(KERN_ERR "prom_parse: Bad cell count for %s\n",
+		       dev->full_name);
+		goto bail;
+	}
+	memcpy(addr, in_addr, na * 4);
+
+	pr_debug("OF: bus is %s (na=%d, ns=%d) on %s\n",
+	    bus->name, na, ns, parent->full_name);
+	of_dump_addr("OF: translating address:", addr, na);
+
+	/* Translate */
+	for (;;) {
+		/* Switch to parent bus */
+		of_node_put(dev);
+		dev = parent;
+		parent = of_get_parent(dev);
+
+		/* If root, we have finished */
+		if (parent == NULL) {
+			pr_debug("OF: reached root node\n");
+			result = of_read_number(addr, na);
+			break;
+		}
+
+		/* Get new parent bus and counts */
+		pbus = of_match_bus(parent);
+		pbus->count_cells(dev, &pna, &pns);
+		if (!OF_CHECK_COUNTS(pna, pns)) {
+			printk(KERN_ERR "prom_parse: Bad cell count for %s\n",
+			       dev->full_name);
+			break;
+		}
+
+		pr_debug("OF: parent bus is %s (na=%d, ns=%d) on %s\n",
+		    pbus->name, pna, pns, parent->full_name);
+
+		/* Apply bus translation */
+		if (of_translate_one(dev, bus, pbus, addr, na, ns, pna, rprop))
+			break;
+
+		/* Complete the move up one level */
+		na = pna;
+		ns = pns;
+		bus = pbus;
+
+		of_dump_addr("OF: one level translation:", addr, na);
+	}
+ bail:
+	of_node_put(parent);
+	of_node_put(dev);
+
+	return result;
+}
+
+u64 of_translate_address(struct device_node *dev, const u32 *in_addr)
+{
+	return __of_translate_address(dev, in_addr, "ranges");
+}
+EXPORT_SYMBOL(of_translate_address);
+
+u64 of_translate_dma_address(struct device_node *dev, const u32 *in_addr)
+{
+	return __of_translate_address(dev, in_addr, "dma-ranges");
+}
+EXPORT_SYMBOL(of_translate_dma_address);
+
+const u32 *of_get_address(struct device_node *dev, int index, u64 *size,
+		    unsigned int *flags)
+{
+	const u32 *prop;
+	unsigned int psize;
+	struct device_node *parent;
+	struct of_bus *bus;
+	int onesize, i, na, ns;
+
+	/* Get parent & match bus type */
+	parent = of_get_parent(dev);
+	if (parent == NULL)
+		return NULL;
+	bus = of_match_bus(parent);
+	bus->count_cells(dev, &na, &ns);
+	of_node_put(parent);
+	if (!OF_CHECK_COUNTS(na, ns))
+		return NULL;
+
+	/* Get "reg" or "assigned-addresses" property */
+	prop = of_get_property(dev, bus->addresses, &psize);
+	if (prop == NULL)
+		return NULL;
+	psize /= 4;
+
+	onesize = na + ns;
+	for (i = 0; psize >= onesize; psize -= onesize, prop += onesize, i++)
+		if (i == index) {
+			if (size)
+				*size = of_read_number(prop + na, ns);
+			if (flags)
+				*flags = bus->get_flags(prop);
+			return prop;
+		}
+	return NULL;
+}
+EXPORT_SYMBOL(of_get_address);
+
+static int __of_address_to_resource(struct device_node *dev, const u32 *addrp,
+				    u64 size, unsigned int flags,
+				    struct resource *r)
 {
 	u64 taddr;
 
diff --git a/include/linux/of_address.h b/include/linux/of_address.h
index 474b794ed9d..cc567df9a00 100644
--- a/include/linux/of_address.h
+++ b/include/linux/of_address.h
@@ -3,9 +3,7 @@
 #include <linux/ioport.h>
 #include <linux/of.h>
 
-extern int __of_address_to_resource(struct device_node *dev, const u32 *addrp,
-				    u64 size, unsigned int flags,
-				    struct resource *r);
+extern u64 of_translate_address(struct device_node *np, const u32 *addr);
 extern int of_address_to_resource(struct device_node *dev, int index,
 				  struct resource *r);
 extern void __iomem *of_iomap(struct device_node *device, int index);
-- 
cgit v1.2.3-70-g09d2


From dd27dcda37f0b1a3b674760fb411abc5c8fe309c Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@secretlab.ca>
Date: Tue, 8 Jun 2010 07:48:12 -0600
Subject: of/device: merge of_device_uevent

Merge common code between powerpc and microblaze

Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
CC: Michal Simek <monstr@monstr.eu>
CC: Wolfram Sang <w.sang@pengutronix.de>
CC: Stephen Rothwell <sfr@canb.auug.org.au>
CC: Benjamin Herrenschmidt <benh@kernel.crashing.org>
CC: microblaze-uclinux@itee.uq.edu.au
CC: linuxppc-dev@ozlabs.org
---
 arch/microblaze/include/asm/of_device.h |  3 --
 arch/microblaze/kernel/of_device.c      | 48 --------------------------------
 arch/powerpc/include/asm/of_device.h    |  3 --
 arch/powerpc/kernel/of_device.c         | 49 ---------------------------------
 drivers/of/device.c                     | 48 ++++++++++++++++++++++++++++++++
 include/linux/of_device.h               |  4 +++
 6 files changed, 52 insertions(+), 103 deletions(-)

(limited to 'include')

diff --git a/arch/microblaze/include/asm/of_device.h b/arch/microblaze/include/asm/of_device.h
index 0a5f3f914b4..58e627dc141 100644
--- a/arch/microblaze/include/asm/of_device.h
+++ b/arch/microblaze/include/asm/of_device.h
@@ -22,9 +22,6 @@ extern struct of_device *of_device_alloc(struct device_node *np,
 					 const char *bus_id,
 					 struct device *parent);
 
-extern int of_device_uevent(struct device *dev,
-			    struct kobj_uevent_env *env);
-
 extern void of_device_make_bus_id(struct of_device *dev);
 
 /* This is just here during the transition */
diff --git a/arch/microblaze/kernel/of_device.c b/arch/microblaze/kernel/of_device.c
index b372787886e..3a367d78845 100644
--- a/arch/microblaze/kernel/of_device.c
+++ b/arch/microblaze/kernel/of_device.c
@@ -62,51 +62,3 @@ struct of_device *of_device_alloc(struct device_node *np,
 	return dev;
 }
 EXPORT_SYMBOL(of_device_alloc);
-
-int of_device_uevent(struct device *dev, struct kobj_uevent_env *env)
-{
-	struct of_device *ofdev;
-	const char *compat;
-	int seen = 0, cplen, sl;
-
-	if (!dev)
-		return -ENODEV;
-
-	ofdev = to_of_device(dev);
-
-	if (add_uevent_var(env, "OF_NAME=%s", ofdev->dev.of_node->name))
-		return -ENOMEM;
-
-	if (add_uevent_var(env, "OF_TYPE=%s", ofdev->dev.of_node->type))
-		return -ENOMEM;
-
-	/* Since the compatible field can contain pretty much anything
-	 * it's not really legal to split it out with commas. We split it
-	 * up using a number of environment variables instead. */
-
-	compat = of_get_property(ofdev->dev.of_node, "compatible", &cplen);
-	while (compat && *compat && cplen > 0) {
-		if (add_uevent_var(env, "OF_COMPATIBLE_%d=%s", seen, compat))
-			return -ENOMEM;
-
-		sl = strlen(compat) + 1;
-		compat += sl;
-		cplen -= sl;
-		seen++;
-	}
-
-	if (add_uevent_var(env, "OF_COMPATIBLE_N=%d", seen))
-		return -ENOMEM;
-
-	/* modalias is trickier, we add it in 2 steps */
-	if (add_uevent_var(env, "MODALIAS="))
-		return -ENOMEM;
-	sl = of_device_get_modalias(ofdev, &env->buf[env->buflen-1],
-				    sizeof(env->buf) - env->buflen);
-	if (sl >= (sizeof(env->buf) - env->buflen))
-		return -ENOMEM;
-	env->buflen += sl;
-
-	return 0;
-}
-EXPORT_SYMBOL(of_device_uevent);
diff --git a/arch/powerpc/include/asm/of_device.h b/arch/powerpc/include/asm/of_device.h
index cb36632f953..5d5103cac64 100644
--- a/arch/powerpc/include/asm/of_device.h
+++ b/arch/powerpc/include/asm/of_device.h
@@ -9,8 +9,5 @@ extern struct of_device *of_device_alloc(struct device_node *np,
 					 const char *bus_id,
 					 struct device *parent);
 
-extern int of_device_uevent(struct device *dev,
-			    struct kobj_uevent_env *env);
-
 #endif /* __KERNEL__ */
 #endif /* _ASM_POWERPC_OF_DEVICE_H */
diff --git a/arch/powerpc/kernel/of_device.c b/arch/powerpc/kernel/of_device.c
index df78e0236a0..db91a9dbafb 100644
--- a/arch/powerpc/kernel/of_device.c
+++ b/arch/powerpc/kernel/of_device.c
@@ -82,52 +82,3 @@ struct of_device *of_device_alloc(struct device_node *np,
 	return dev;
 }
 EXPORT_SYMBOL(of_device_alloc);
-
-int of_device_uevent(struct device *dev, struct kobj_uevent_env *env)
-{
-	struct of_device *ofdev;
-	const char *compat;
-	int seen = 0, cplen, sl;
-
-	if (!dev)
-		return -ENODEV;
-
-	ofdev = to_of_device(dev);
-
-	if (add_uevent_var(env, "OF_NAME=%s", ofdev->dev.of_node->name))
-		return -ENOMEM;
-
-	if (add_uevent_var(env, "OF_TYPE=%s", ofdev->dev.of_node->type))
-		return -ENOMEM;
-
-        /* Since the compatible field can contain pretty much anything
-         * it's not really legal to split it out with commas. We split it
-         * up using a number of environment variables instead. */
-
-	compat = of_get_property(ofdev->dev.of_node, "compatible", &cplen);
-	while (compat && *compat && cplen > 0) {
-		if (add_uevent_var(env, "OF_COMPATIBLE_%d=%s", seen, compat))
-			return -ENOMEM;
-
-		sl = strlen (compat) + 1;
-		compat += sl;
-		cplen -= sl;
-		seen++;
-	}
-
-	if (add_uevent_var(env, "OF_COMPATIBLE_N=%d", seen))
-		return -ENOMEM;
-
-	/* modalias is trickier, we add it in 2 steps */
-	if (add_uevent_var(env, "MODALIAS="))
-		return -ENOMEM;
-	sl = of_device_get_modalias(ofdev, &env->buf[env->buflen-1],
-				    sizeof(env->buf) - env->buflen);
-	if (sl >= (sizeof(env->buf) - env->buflen))
-		return -ENOMEM;
-	env->buflen += sl;
-
-	return 0;
-}
-EXPORT_SYMBOL(of_device_uevent);
-EXPORT_SYMBOL(of_device_get_modalias);
diff --git a/drivers/of/device.c b/drivers/of/device.c
index 7d18f8e0b01..275cc9cee14 100644
--- a/drivers/of/device.c
+++ b/drivers/of/device.c
@@ -170,3 +170,51 @@ ssize_t of_device_get_modalias(struct of_device *ofdev,
 
 	return tsize;
 }
+
+/**
+ * of_device_uevent - Display OF related uevent information
+ */
+int of_device_uevent(struct device *dev, struct kobj_uevent_env *env)
+{
+	const char *compat;
+	int seen = 0, cplen, sl;
+
+	if ((!dev) || (!dev->of_node))
+		return -ENODEV;
+
+	if (add_uevent_var(env, "OF_NAME=%s", dev->of_node->name))
+		return -ENOMEM;
+
+	if (add_uevent_var(env, "OF_TYPE=%s", dev->of_node->type))
+		return -ENOMEM;
+
+	/* Since the compatible field can contain pretty much anything
+	 * it's not really legal to split it out with commas. We split it
+	 * up using a number of environment variables instead. */
+
+	compat = of_get_property(dev->of_node, "compatible", &cplen);
+	while (compat && *compat && cplen > 0) {
+		if (add_uevent_var(env, "OF_COMPATIBLE_%d=%s", seen, compat))
+			return -ENOMEM;
+
+		sl = strlen(compat) + 1;
+		compat += sl;
+		cplen -= sl;
+		seen++;
+	}
+
+	if (add_uevent_var(env, "OF_COMPATIBLE_N=%d", seen))
+		return -ENOMEM;
+
+	/* modalias is trickier, we add it in 2 steps */
+	if (add_uevent_var(env, "MODALIAS="))
+		return -ENOMEM;
+
+	sl = of_device_get_modalias(to_of_device(dev), &env->buf[env->buflen-1],
+				    sizeof(env->buf) - env->buflen);
+	if (sl >= (sizeof(env->buf) - env->buflen))
+		return -ENOMEM;
+	env->buflen += sl;
+
+	return 0;
+}
diff --git a/include/linux/of_device.h b/include/linux/of_device.h
index a3ae5900fc5..da83e734c02 100644
--- a/include/linux/of_device.h
+++ b/include/linux/of_device.h
@@ -44,6 +44,10 @@ static inline void of_device_free(struct of_device *dev)
 
 extern ssize_t of_device_get_modalias(struct of_device *ofdev,
 					char *str, ssize_t len);
+
+extern int of_device_uevent(struct device *dev, struct kobj_uevent_env *env);
+
+
 #endif /* CONFIG_OF_DEVICE */
 
 #endif /* _LINUX_OF_DEVICE_H */
-- 
cgit v1.2.3-70-g09d2


From 34a1c1e8c700f7cd849deb21193718a172722f8d Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@secretlab.ca>
Date: Tue, 8 Jun 2010 07:48:13 -0600
Subject: of: Modify of_device_get_modalias to be passed struct device

Now that the of_node pointer is part of struct device,
of_device_get_modalias could be used on any struct device
that has the device node pointer set.  This patch changes
of_device_get_modalias to accept a struct device instead
of a struct of_device.

Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
CC: Michal Simek <monstr@monstr.eu>
CC: Benjamin Herrenschmidt <benh@kernel.crashing.org>
CC: Wolfram Sang <w.sang@pengutronix.de>
CC: Stephen Rothwell <sfr@canb.auug.org.au>
CC: microblaze-uclinux@itee.uq.edu.au
CC: linuxppc-dev@ozlabs.org
---
 arch/microblaze/include/asm/of_device.h |  3 ---
 drivers/macintosh/macio_sysfs.c         |  5 +----
 drivers/of/device.c                     | 16 ++++++----------
 include/linux/of_device.h               |  2 +-
 4 files changed, 8 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/arch/microblaze/include/asm/of_device.h b/arch/microblaze/include/asm/of_device.h
index 58e627dc141..c9be5348743 100644
--- a/arch/microblaze/include/asm/of_device.h
+++ b/arch/microblaze/include/asm/of_device.h
@@ -15,9 +15,6 @@
 #include <linux/device.h>
 #include <linux/of.h>
 
-extern ssize_t of_device_get_modalias(struct of_device *ofdev,
-					char *str, ssize_t len);
-
 extern struct of_device *of_device_alloc(struct device_node *np,
 					 const char *bus_id,
 					 struct device *parent);
diff --git a/drivers/macintosh/macio_sysfs.c b/drivers/macintosh/macio_sysfs.c
index 6999ce59fd1..6024038a5b9 100644
--- a/drivers/macintosh/macio_sysfs.c
+++ b/drivers/macintosh/macio_sysfs.c
@@ -41,10 +41,7 @@ compatible_show (struct device *dev, struct device_attribute *attr, char *buf)
 static ssize_t modalias_show (struct device *dev, struct device_attribute *attr,
 			      char *buf)
 {
-	struct of_device *ofdev = to_of_device(dev);
-	int len;
-
-	len = of_device_get_modalias(ofdev, buf, PAGE_SIZE - 2);
+	int len = of_device_get_modalias(dev, buf, PAGE_SIZE - 2);
 
 	buf[len] = '\n';
 	buf[len+1] = 0;
diff --git a/drivers/of/device.c b/drivers/of/device.c
index 275cc9cee14..c2a98f5ca80 100644
--- a/drivers/of/device.c
+++ b/drivers/of/device.c
@@ -68,10 +68,7 @@ static ssize_t name_show(struct device *dev,
 static ssize_t modalias_show(struct device *dev,
 				struct device_attribute *attr, char *buf)
 {
-	struct of_device *ofdev = to_of_device(dev);
-	ssize_t len = 0;
-
-	len = of_device_get_modalias(ofdev, buf, PAGE_SIZE - 2);
+	ssize_t len = of_device_get_modalias(dev, buf, PAGE_SIZE - 2);
 	buf[len] = '\n';
 	buf[len+1] = 0;
 	return len+1;
@@ -123,19 +120,18 @@ void of_device_unregister(struct of_device *ofdev)
 }
 EXPORT_SYMBOL(of_device_unregister);
 
-ssize_t of_device_get_modalias(struct of_device *ofdev,
-				char *str, ssize_t len)
+ssize_t of_device_get_modalias(struct device *dev, char *str, ssize_t len)
 {
 	const char *compat;
 	int cplen, i;
 	ssize_t tsize, csize, repend;
 
 	/* Name & Type */
-	csize = snprintf(str, len, "of:N%sT%s", ofdev->dev.of_node->name,
-			 ofdev->dev.of_node->type);
+	csize = snprintf(str, len, "of:N%sT%s", dev->of_node->name,
+			 dev->of_node->type);
 
 	/* Get compatible property if any */
-	compat = of_get_property(ofdev->dev.of_node, "compatible", &cplen);
+	compat = of_get_property(dev->of_node, "compatible", &cplen);
 	if (!compat)
 		return csize;
 
@@ -210,7 +206,7 @@ int of_device_uevent(struct device *dev, struct kobj_uevent_env *env)
 	if (add_uevent_var(env, "MODALIAS="))
 		return -ENOMEM;
 
-	sl = of_device_get_modalias(to_of_device(dev), &env->buf[env->buflen-1],
+	sl = of_device_get_modalias(dev, &env->buf[env->buflen-1],
 				    sizeof(env->buf) - env->buflen);
 	if (sl >= (sizeof(env->buf) - env->buflen))
 		return -ENOMEM;
diff --git a/include/linux/of_device.h b/include/linux/of_device.h
index da83e734c02..238e92e007e 100644
--- a/include/linux/of_device.h
+++ b/include/linux/of_device.h
@@ -42,7 +42,7 @@ static inline void of_device_free(struct of_device *dev)
 	of_release_dev(&dev->dev);
 }
 
-extern ssize_t of_device_get_modalias(struct of_device *ofdev,
+extern ssize_t of_device_get_modalias(struct device *dev,
 					char *str, ssize_t len);
 
 extern int of_device_uevent(struct device *dev, struct kobj_uevent_env *env);
-- 
cgit v1.2.3-70-g09d2


From 5fd200f3b351183b5489cef69961c60af9cead2f Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@secretlab.ca>
Date: Tue, 8 Jun 2010 07:48:13 -0600
Subject: of/device: Merge of_platform_bus_probe()

Merge common code between PowerPC and microblaze.  This patch merges
the code that scans the tree and registers devices.  The functions
merged are of_platform_bus_probe(), of_platform_bus_create(), and
of_platform_device_create().

This patch also move the of_default_bus_ids[] table out of a Microblaze
header file and makes it non-static.  The device ids table isn't merged
because powerpc and microblaze use different default data.

Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
CC: Michal Simek <monstr@monstr.eu>
CC: Grant Likely <grant.likely@secretlab.ca>
CC: Benjamin Herrenschmidt <benh@kernel.crashing.org>
CC: Stephen Rothwell <sfr@canb.auug.org.au>
CC: microblaze-uclinux@itee.uq.edu.au
CC: linuxppc-dev@ozlabs.org
---
 arch/microblaze/include/asm/of_platform.h |  33 -------
 arch/microblaze/kernel/of_platform.c      | 141 ++++-------------------------
 arch/powerpc/include/asm/of_platform.h    |  11 ---
 arch/powerpc/kernel/of_platform.c         | 131 +--------------------------
 drivers/of/platform.c                     | 143 ++++++++++++++++++++++++++++++
 include/linux/of_platform.h               |  17 ++++
 6 files changed, 179 insertions(+), 297 deletions(-)

(limited to 'include')

diff --git a/arch/microblaze/include/asm/of_platform.h b/arch/microblaze/include/asm/of_platform.h
index 37491276c6c..625003f7088 100644
--- a/arch/microblaze/include/asm/of_platform.h
+++ b/arch/microblaze/include/asm/of_platform.h
@@ -14,39 +14,6 @@
 /* This is just here during the transition */
 #include <linux/of_platform.h>
 
-/*
- * The list of OF IDs below is used for matching bus types in the
- * system whose devices are to be exposed as of_platform_devices.
- *
- * This is the default list valid for most platforms. This file provides
- * functions who can take an explicit list if necessary though
- *
- * The search is always performed recursively looking for children of
- * the provided device_node and recursively if such a children matches
- * a bus type in the list
- */
-
-static const struct of_device_id of_default_bus_ids[] = {
-	{ .type = "soc", },
-	{ .compatible = "soc", },
-	{ .type = "plb5", },
-	{ .type = "plb4", },
-	{ .type = "opb", },
-	{ .type = "simple", },
-	{},
-};
-
-/* Platform devices and busses creation */
-extern struct of_device *of_platform_device_create(struct device_node *np,
-						const char *bus_id,
-						struct device *parent);
-/* pseudo "matches" value to not do deep probe */
-#define OF_NO_DEEP_PROBE ((struct of_device_id *)-1)
-
-extern int of_platform_bus_probe(struct device_node *root,
-				const struct of_device_id *matches,
-				struct device *parent);
-
 extern struct of_device *of_find_device_by_phandle(phandle ph);
 
 extern void of_instantiate_rtc(void);
diff --git a/arch/microblaze/kernel/of_platform.c b/arch/microblaze/kernel/of_platform.c
index ccf6f4257f4..a07abdd6859 100644
--- a/arch/microblaze/kernel/of_platform.c
+++ b/arch/microblaze/kernel/of_platform.c
@@ -37,132 +37,27 @@ static int __init of_bus_driver_init(void)
 }
 postcore_initcall(of_bus_driver_init);
 
-struct of_device *of_platform_device_create(struct device_node *np,
-					    const char *bus_id,
-					    struct device *parent)
-{
-	struct of_device *dev;
-
-	dev = of_device_alloc(np, bus_id, parent);
-	if (!dev)
-		return NULL;
-
-	dev->archdata.dma_mask = 0xffffffffUL;
-	dev->dev.bus = &of_platform_bus_type;
-
-	/* We do not fill the DMA ops for platform devices by default.
-	 * This is currently the responsibility of the platform code
-	 * to do such, possibly using a device notifier
-	 */
-
-	if (of_device_register(dev) != 0) {
-		of_device_free(dev);
-		return NULL;
-	}
-
-	return dev;
-}
-EXPORT_SYMBOL(of_platform_device_create);
-
-/**
- * of_platform_bus_create - Create an OF device for a bus node and all its
- * children. Optionally recursively instanciate matching busses.
- * @bus: device node of the bus to instanciate
- * @matches: match table, NULL to use the default, OF_NO_DEEP_PROBE to
- * disallow recursive creation of child busses
- */
-static int of_platform_bus_create(const struct device_node *bus,
-				  const struct of_device_id *matches,
-				  struct device *parent)
-{
-	struct device_node *child;
-	struct of_device *dev;
-	int rc = 0;
-
-	for_each_child_of_node(bus, child) {
-		pr_debug("   create child: %s\n", child->full_name);
-		dev = of_platform_device_create(child, NULL, parent);
-		if (dev == NULL)
-			rc = -ENOMEM;
-		else if (!of_match_node(matches, child))
-			continue;
-		if (rc == 0) {
-			pr_debug("   and sub busses\n");
-			rc = of_platform_bus_create(child, matches, &dev->dev);
-		}
-		if (rc) {
-			of_node_put(child);
-			break;
-		}
-	}
-	return rc;
-}
-
-
-/**
- * of_platform_bus_probe - Probe the device-tree for platform busses
- * @root: parent of the first level to probe or NULL for the root of the tree
- * @matches: match table, NULL to use the default
- * @parent: parent to hook devices from, NULL for toplevel
+/*
+ * The list of OF IDs below is used for matching bus types in the
+ * system whose devices are to be exposed as of_platform_devices.
  *
- * Note that children of the provided root are not instanciated as devices
- * unless the specified root itself matches the bus list and is not NULL.
+ * This is the default list valid for most platforms. This file provides
+ * functions who can take an explicit list if necessary though
+ *
+ * The search is always performed recursively looking for children of
+ * the provided device_node and recursively if such a children matches
+ * a bus type in the list
  */
 
-int of_platform_bus_probe(struct device_node *root,
-			  const struct of_device_id *matches,
-			  struct device *parent)
-{
-	struct device_node *child;
-	struct of_device *dev;
-	int rc = 0;
-
-	if (matches == NULL)
-		matches = of_default_bus_ids;
-	if (matches == OF_NO_DEEP_PROBE)
-		return -EINVAL;
-	if (root == NULL)
-		root = of_find_node_by_path("/");
-	else
-		of_node_get(root);
-
-	pr_debug("of_platform_bus_probe()\n");
-	pr_debug(" starting at: %s\n", root->full_name);
-
-	/* Do a self check of bus type, if there's a match, create
-	 * children
-	 */
-	if (of_match_node(matches, root)) {
-		pr_debug(" root match, create all sub devices\n");
-		dev = of_platform_device_create(root, NULL, parent);
-		if (dev == NULL) {
-			rc = -ENOMEM;
-			goto bail;
-		}
-		pr_debug(" create all sub busses\n");
-		rc = of_platform_bus_create(root, matches, &dev->dev);
-		goto bail;
-	}
-	for_each_child_of_node(root, child) {
-		if (!of_match_node(matches, child))
-			continue;
-
-		pr_debug("  match: %s\n", child->full_name);
-		dev = of_platform_device_create(child, NULL, parent);
-		if (dev == NULL)
-			rc = -ENOMEM;
-		else
-			rc = of_platform_bus_create(child, matches, &dev->dev);
-		if (rc) {
-			of_node_put(child);
-			break;
-		}
-	}
- bail:
-	of_node_put(root);
-	return rc;
-}
-EXPORT_SYMBOL(of_platform_bus_probe);
+const struct of_device_id of_default_bus_ids[] = {
+	{ .type = "soc", },
+	{ .compatible = "soc", },
+	{ .type = "plb5", },
+	{ .type = "plb4", },
+	{ .type = "opb", },
+	{ .type = "simple", },
+	{},
+};
 
 static int of_dev_node_match(struct device *dev, void *data)
 {
diff --git a/arch/powerpc/include/asm/of_platform.h b/arch/powerpc/include/asm/of_platform.h
index d4aaa348944..b37d2dcddb9 100644
--- a/arch/powerpc/include/asm/of_platform.h
+++ b/arch/powerpc/include/asm/of_platform.h
@@ -11,17 +11,6 @@
  *
  */
 
-/* Platform devices and busses creation */
-extern struct of_device *of_platform_device_create(struct device_node *np,
-						   const char *bus_id,
-						   struct device *parent);
-/* pseudo "matches" value to not do deep probe */
-#define OF_NO_DEEP_PROBE ((struct of_device_id *)-1)
-
-extern int of_platform_bus_probe(struct device_node *root,
-				 const struct of_device_id *matches,
-				 struct device *parent);
-
 extern struct of_device *of_find_device_by_phandle(phandle ph);
 
 extern void of_instantiate_rtc(void);
diff --git a/arch/powerpc/kernel/of_platform.c b/arch/powerpc/kernel/of_platform.c
index 487a98851ba..0b5cc6d892a 100644
--- a/arch/powerpc/kernel/of_platform.c
+++ b/arch/powerpc/kernel/of_platform.c
@@ -40,7 +40,7 @@
  * a bus type in the list
  */
 
-static const struct of_device_id of_default_bus_ids[] = {
+const struct of_device_id of_default_bus_ids[] = {
 	{ .type = "soc", },
 	{ .compatible = "soc", },
 	{ .type = "spider", },
@@ -64,135 +64,6 @@ static int __init of_bus_driver_init(void)
 
 postcore_initcall(of_bus_driver_init);
 
-struct of_device* of_platform_device_create(struct device_node *np,
-					    const char *bus_id,
-					    struct device *parent)
-{
-	struct of_device *dev;
-
-	dev = of_device_alloc(np, bus_id, parent);
-	if (!dev)
-		return NULL;
-
-	dev->archdata.dma_mask = 0xffffffffUL;
-	dev->dev.coherent_dma_mask = DMA_BIT_MASK(32);
-
-	dev->dev.bus = &of_platform_bus_type;
-
-	/* We do not fill the DMA ops for platform devices by default.
-	 * This is currently the responsibility of the platform code
-	 * to do such, possibly using a device notifier
-	 */
-
-	if (of_device_register(dev) != 0) {
-		of_device_free(dev);
-		return NULL;
-	}
-
-	return dev;
-}
-EXPORT_SYMBOL(of_platform_device_create);
-
-
-
-/**
- * of_platform_bus_create - Create an OF device for a bus node and all its
- * children. Optionally recursively instanciate matching busses.
- * @bus: device node of the bus to instanciate
- * @matches: match table, NULL to use the default, OF_NO_DEEP_PROBE to
- * disallow recursive creation of child busses
- */
-static int of_platform_bus_create(const struct device_node *bus,
-				  const struct of_device_id *matches,
-				  struct device *parent)
-{
-	struct device_node *child;
-	struct of_device *dev;
-	int rc = 0;
-
-	for_each_child_of_node(bus, child) {
-		pr_debug("   create child: %s\n", child->full_name);
-		dev = of_platform_device_create(child, NULL, parent);
-		if (dev == NULL)
-			rc = -ENOMEM;
-		else if (!of_match_node(matches, child))
-			continue;
-		if (rc == 0) {
-			pr_debug("   and sub busses\n");
-			rc = of_platform_bus_create(child, matches, &dev->dev);
-		} if (rc) {
-			of_node_put(child);
-			break;
-		}
-	}
-	return rc;
-}
-
-/**
- * of_platform_bus_probe - Probe the device-tree for platform busses
- * @root: parent of the first level to probe or NULL for the root of the tree
- * @matches: match table, NULL to use the default
- * @parent: parent to hook devices from, NULL for toplevel
- *
- * Note that children of the provided root are not instanciated as devices
- * unless the specified root itself matches the bus list and is not NULL.
- */
-
-int of_platform_bus_probe(struct device_node *root,
-			  const struct of_device_id *matches,
-			  struct device *parent)
-{
-	struct device_node *child;
-	struct of_device *dev;
-	int rc = 0;
-
-	if (matches == NULL)
-		matches = of_default_bus_ids;
-	if (matches == OF_NO_DEEP_PROBE)
-		return -EINVAL;
-	if (root == NULL)
-		root = of_find_node_by_path("/");
-	else
-		of_node_get(root);
-
-	pr_debug("of_platform_bus_probe()\n");
-	pr_debug(" starting at: %s\n", root->full_name);
-
-	/* Do a self check of bus type, if there's a match, create
-	 * children
-	 */
-	if (of_match_node(matches, root)) {
-		pr_debug(" root match, create all sub devices\n");
-		dev = of_platform_device_create(root, NULL, parent);
-		if (dev == NULL) {
-			rc = -ENOMEM;
-			goto bail;
-		}
-		pr_debug(" create all sub busses\n");
-		rc = of_platform_bus_create(root, matches, &dev->dev);
-		goto bail;
-	}
-	for_each_child_of_node(root, child) {
-		if (!of_match_node(matches, child))
-			continue;
-
-		pr_debug("  match: %s\n", child->full_name);
-		dev = of_platform_device_create(child, NULL, parent);
-		if (dev == NULL)
-			rc = -ENOMEM;
-		else
-			rc = of_platform_bus_create(child, matches, &dev->dev);
-		if (rc) {
-			of_node_put(child);
-			break;
-		}
-	}
- bail:
-	of_node_put(root);
-	return rc;
-}
-EXPORT_SYMBOL(of_platform_bus_probe);
-
 static int of_dev_node_match(struct device *dev, void *data)
 {
 	return to_of_device(dev)->dev.of_node == data;
diff --git a/drivers/of/platform.c b/drivers/of/platform.c
index 7dacc1ebe91..d9c81e93bdd 100644
--- a/drivers/of/platform.c
+++ b/drivers/of/platform.c
@@ -14,6 +14,7 @@
 #include <linux/errno.h>
 #include <linux/module.h>
 #include <linux/device.h>
+#include <linux/dma-mapping.h>
 #include <linux/of_device.h>
 #include <linux/of_platform.h>
 
@@ -396,3 +397,145 @@ void of_unregister_driver(struct of_platform_driver *drv)
 	driver_unregister(&drv->driver);
 }
 EXPORT_SYMBOL(of_unregister_driver);
+
+#if !defined(CONFIG_SPARC)
+/*
+ * The following routines scan a subtree and registers a device for
+ * each applicable node.
+ *
+ * Note: sparc doesn't use these routines because it has a different
+ * mechanism for creating devices from device tree nodes.
+ */
+
+/**
+ * of_platform_device_create - Alloc, initialize and register an of_device
+ * @np: pointer to node to create device for
+ * @bus_id: name to assign device
+ * @parent: Linux device model parent device.
+ */
+struct of_device *of_platform_device_create(struct device_node *np,
+					    const char *bus_id,
+					    struct device *parent)
+{
+	struct of_device *dev;
+
+	dev = of_device_alloc(np, bus_id, parent);
+	if (!dev)
+		return NULL;
+
+	dev->archdata.dma_mask = 0xffffffffUL;
+	dev->dev.coherent_dma_mask = DMA_BIT_MASK(32);
+	dev->dev.bus = &of_platform_bus_type;
+
+	/* We do not fill the DMA ops for platform devices by default.
+	 * This is currently the responsibility of the platform code
+	 * to do such, possibly using a device notifier
+	 */
+
+	if (of_device_register(dev) != 0) {
+		of_device_free(dev);
+		return NULL;
+	}
+
+	return dev;
+}
+EXPORT_SYMBOL(of_platform_device_create);
+
+/**
+ * of_platform_bus_create - Create an OF device for a bus node and all its
+ * children. Optionally recursively instantiate matching busses.
+ * @bus: device node of the bus to instantiate
+ * @matches: match table, NULL to use the default, OF_NO_DEEP_PROBE to
+ * disallow recursive creation of child busses
+ */
+static int of_platform_bus_create(const struct device_node *bus,
+				  const struct of_device_id *matches,
+				  struct device *parent)
+{
+	struct device_node *child;
+	struct of_device *dev;
+	int rc = 0;
+
+	for_each_child_of_node(bus, child) {
+		pr_debug("   create child: %s\n", child->full_name);
+		dev = of_platform_device_create(child, NULL, parent);
+		if (dev == NULL)
+			rc = -ENOMEM;
+		else if (!of_match_node(matches, child))
+			continue;
+		if (rc == 0) {
+			pr_debug("   and sub busses\n");
+			rc = of_platform_bus_create(child, matches, &dev->dev);
+		}
+		if (rc) {
+			of_node_put(child);
+			break;
+		}
+	}
+	return rc;
+}
+
+/**
+ * of_platform_bus_probe - Probe the device-tree for platform busses
+ * @root: parent of the first level to probe or NULL for the root of the tree
+ * @matches: match table, NULL to use the default
+ * @parent: parent to hook devices from, NULL for toplevel
+ *
+ * Note that children of the provided root are not instantiated as devices
+ * unless the specified root itself matches the bus list and is not NULL.
+ */
+int of_platform_bus_probe(struct device_node *root,
+			  const struct of_device_id *matches,
+			  struct device *parent)
+{
+	struct device_node *child;
+	struct of_device *dev;
+	int rc = 0;
+
+	if (matches == NULL)
+		matches = of_default_bus_ids;
+	if (matches == OF_NO_DEEP_PROBE)
+		return -EINVAL;
+	if (root == NULL)
+		root = of_find_node_by_path("/");
+	else
+		of_node_get(root);
+
+	pr_debug("of_platform_bus_probe()\n");
+	pr_debug(" starting at: %s\n", root->full_name);
+
+	/* Do a self check of bus type, if there's a match, create
+	 * children
+	 */
+	if (of_match_node(matches, root)) {
+		pr_debug(" root match, create all sub devices\n");
+		dev = of_platform_device_create(root, NULL, parent);
+		if (dev == NULL) {
+			rc = -ENOMEM;
+			goto bail;
+		}
+		pr_debug(" create all sub busses\n");
+		rc = of_platform_bus_create(root, matches, &dev->dev);
+		goto bail;
+	}
+	for_each_child_of_node(root, child) {
+		if (!of_match_node(matches, child))
+			continue;
+
+		pr_debug("  match: %s\n", child->full_name);
+		dev = of_platform_device_create(child, NULL, parent);
+		if (dev == NULL)
+			rc = -ENOMEM;
+		else
+			rc = of_platform_bus_create(child, matches, &dev->dev);
+		if (rc) {
+			of_node_put(child);
+			break;
+		}
+	}
+ bail:
+	of_node_put(root);
+	return rc;
+}
+EXPORT_SYMBOL(of_platform_bus_probe);
+#endif /* !CONFIG_SPARC */
diff --git a/include/linux/of_platform.h b/include/linux/of_platform.h
index 1643d3761eb..4bbba41396e 100644
--- a/include/linux/of_platform.h
+++ b/include/linux/of_platform.h
@@ -25,6 +25,8 @@
  */
 extern struct bus_type of_platform_bus_type;
 
+extern const struct of_device_id of_default_bus_ids[];
+
 /*
  * An of_platform_driver driver is attached to a basic of_device on
  * the "platform bus" (of_platform_bus_type).
@@ -63,6 +65,21 @@ static inline void of_unregister_platform_driver(struct of_platform_driver *drv)
 extern struct of_device *of_find_device_by_node(struct device_node *np);
 
 extern int of_bus_type_init(struct bus_type *bus, const char *name);
+
+#if !defined(CONFIG_SPARC) /* SPARC has its own device registration method */
+/* Platform devices and busses creation */
+extern struct of_device *of_platform_device_create(struct device_node *np,
+						   const char *bus_id,
+						   struct device *parent);
+
+/* pseudo "matches" value to not do deep probe */
+#define OF_NO_DEEP_PROBE ((struct of_device_id *)-1)
+
+extern int of_platform_bus_probe(struct device_node *root,
+				 const struct of_device_id *matches,
+				 struct device *parent);
+#endif /* !CONFIG_SPARC */
+
 #endif /* CONFIG_OF_DEVICE */
 
 #endif	/* _LINUX_OF_PLATFORM_H */
-- 
cgit v1.2.3-70-g09d2


From 94c0931983ee9d1cd96c32d52ac64c17464f0bbd Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@secretlab.ca>
Date: Tue, 8 Jun 2010 07:48:14 -0600
Subject: of: Merge of_device_alloc() and of_device_make_bus_id()

This patch merges the common routines of_device_alloc() and
of_device_make_bus_id() from powerpc and microblaze.

Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
CC: Michal Simek <monstr@monstr.eu>
CC: Grant Likely <grant.likely@secretlab.ca>
CC: Benjamin Herrenschmidt <benh@kernel.crashing.org>
CC: Stephen Rothwell <sfr@canb.auug.org.au>
CC: microblaze-uclinux@itee.uq.edu.au
CC: linuxppc-dev@ozlabs.org
CC: devicetree-discuss@lists.ozlabs.org
---
 arch/microblaze/include/asm/of_device.h | 15 ------
 arch/microblaze/kernel/Makefile         |  2 +-
 arch/microblaze/kernel/of_device.c      | 64 ------------------------
 arch/powerpc/include/asm/of_device.h    | 10 ----
 arch/powerpc/kernel/Makefile            |  2 +-
 arch/powerpc/kernel/of_device.c         | 84 -------------------------------
 drivers/of/platform.c                   | 87 +++++++++++++++++++++++++++++++++
 include/linux/of_platform.h             |  3 ++
 8 files changed, 92 insertions(+), 175 deletions(-)
 delete mode 100644 arch/microblaze/kernel/of_device.c
 delete mode 100644 arch/powerpc/kernel/of_device.c

(limited to 'include')

diff --git a/arch/microblaze/include/asm/of_device.h b/arch/microblaze/include/asm/of_device.h
index c9be5348743..47e8d42aee8 100644
--- a/arch/microblaze/include/asm/of_device.h
+++ b/arch/microblaze/include/asm/of_device.h
@@ -10,19 +10,4 @@
 
 #ifndef _ASM_MICROBLAZE_OF_DEVICE_H
 #define _ASM_MICROBLAZE_OF_DEVICE_H
-#ifdef __KERNEL__
-
-#include <linux/device.h>
-#include <linux/of.h>
-
-extern struct of_device *of_device_alloc(struct device_node *np,
-					 const char *bus_id,
-					 struct device *parent);
-
-extern void of_device_make_bus_id(struct of_device *dev);
-
-/* This is just here during the transition */
-#include <linux/of_device.h>
-
-#endif /* __KERNEL__ */
 #endif /* _ASM_MICROBLAZE_OF_DEVICE_H */
diff --git a/arch/microblaze/kernel/Makefile b/arch/microblaze/kernel/Makefile
index e51bc152082..727e2cbff9c 100644
--- a/arch/microblaze/kernel/Makefile
+++ b/arch/microblaze/kernel/Makefile
@@ -15,7 +15,7 @@ endif
 extra-y := head.o vmlinux.lds
 
 obj-y += dma.o exceptions.o \
-	hw_exception_handler.o init_task.o intc.o irq.o of_device.o \
+	hw_exception_handler.o init_task.o intc.o irq.o \
 	of_platform.o process.o prom.o prom_parse.o ptrace.o \
 	setup.o signal.o sys_microblaze.o timer.o traps.o reset.o
 
diff --git a/arch/microblaze/kernel/of_device.c b/arch/microblaze/kernel/of_device.c
deleted file mode 100644
index 3a367d78845..00000000000
--- a/arch/microblaze/kernel/of_device.c
+++ /dev/null
@@ -1,64 +0,0 @@
-#include <linux/string.h>
-#include <linux/kernel.h>
-#include <linux/of.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/mod_devicetable.h>
-#include <linux/slab.h>
-#include <linux/of_device.h>
-
-#include <linux/errno.h>
-
-void of_device_make_bus_id(struct of_device *dev)
-{
-	static atomic_t bus_no_reg_magic;
-	struct device_node *node = dev->dev.of_node;
-	const u32 *reg;
-	u64 addr;
-	int magic;
-
-	/*
-	 * For MMIO, get the physical address
-	 */
-	reg = of_get_property(node, "reg", NULL);
-	if (reg) {
-		addr = of_translate_address(node, reg);
-		if (addr != OF_BAD_ADDR) {
-			dev_set_name(&dev->dev, "%llx.%s",
-				     (unsigned long long)addr, node->name);
-			return;
-		}
-	}
-
-	/*
-	 * No BusID, use the node name and add a globally incremented
-	 * counter (and pray...)
-	 */
-	magic = atomic_add_return(1, &bus_no_reg_magic);
-	dev_set_name(&dev->dev, "%s.%d", node->name, magic - 1);
-}
-EXPORT_SYMBOL(of_device_make_bus_id);
-
-struct of_device *of_device_alloc(struct device_node *np,
-				  const char *bus_id,
-				  struct device *parent)
-{
-	struct of_device *dev;
-
-	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
-	if (!dev)
-		return NULL;
-
-	dev->dev.of_node = of_node_get(np);
-	dev->dev.dma_mask = &dev->archdata.dma_mask;
-	dev->dev.parent = parent;
-	dev->dev.release = of_release_dev;
-
-	if (bus_id)
-		dev_set_name(&dev->dev, bus_id);
-	else
-		of_device_make_bus_id(dev);
-
-	return dev;
-}
-EXPORT_SYMBOL(of_device_alloc);
diff --git a/arch/powerpc/include/asm/of_device.h b/arch/powerpc/include/asm/of_device.h
index 5d5103cac64..04f76717f82 100644
--- a/arch/powerpc/include/asm/of_device.h
+++ b/arch/powerpc/include/asm/of_device.h
@@ -1,13 +1,3 @@
 #ifndef _ASM_POWERPC_OF_DEVICE_H
 #define _ASM_POWERPC_OF_DEVICE_H
-#ifdef __KERNEL__
-
-#include <linux/device.h>
-#include <linux/of.h>
-
-extern struct of_device *of_device_alloc(struct device_node *np,
-					 const char *bus_id,
-					 struct device *parent);
-
-#endif /* __KERNEL__ */
 #endif /* _ASM_POWERPC_OF_DEVICE_H */
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 58d0572de6f..83aa1fd0908 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -40,7 +40,7 @@ obj-$(CONFIG_PPC_BOOK3E_64)	+= exceptions-64e.o
 obj-$(CONFIG_PPC64)		+= vdso64/
 obj-$(CONFIG_ALTIVEC)		+= vecemu.o
 obj-$(CONFIG_PPC_970_NAP)	+= idle_power4.o
-obj-$(CONFIG_PPC_OF)		+= of_device.o of_platform.o prom_parse.o
+obj-$(CONFIG_PPC_OF)		+= of_platform.o prom_parse.o
 obj-$(CONFIG_PPC_CLOCK)		+= clock.o
 procfs-y			:= proc_powerpc.o
 obj-$(CONFIG_PROC_FS)		+= $(procfs-y)
diff --git a/arch/powerpc/kernel/of_device.c b/arch/powerpc/kernel/of_device.c
deleted file mode 100644
index db91a9dbafb..00000000000
--- a/arch/powerpc/kernel/of_device.c
+++ /dev/null
@@ -1,84 +0,0 @@
-#include <linux/string.h>
-#include <linux/kernel.h>
-#include <linux/of.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/mod_devicetable.h>
-#include <linux/slab.h>
-#include <linux/of_device.h>
-
-#include <asm/errno.h>
-#include <asm/dcr.h>
-
-static void of_device_make_bus_id(struct of_device *dev)
-{
-	static atomic_t bus_no_reg_magic;
-	struct device_node *node = dev->dev.of_node;
-	const u32 *reg;
-	u64 addr;
-	int magic;
-
-	/*
-	 * If it's a DCR based device, use 'd' for native DCRs
-	 * and 'D' for MMIO DCRs.
-	 */
-#ifdef CONFIG_PPC_DCR
-	reg = of_get_property(node, "dcr-reg", NULL);
-	if (reg) {
-#ifdef CONFIG_PPC_DCR_NATIVE
-		dev_set_name(&dev->dev, "d%x.%s", *reg, node->name);
-#else /* CONFIG_PPC_DCR_NATIVE */
-		addr = of_translate_dcr_address(node, *reg, NULL);
-		if (addr != OF_BAD_ADDR) {
-			dev_set_name(&dev->dev, "D%llx.%s",
-				     (unsigned long long)addr, node->name);
-			return;
-		}
-#endif /* !CONFIG_PPC_DCR_NATIVE */
-	}
-#endif /* CONFIG_PPC_DCR */
-
-	/*
-	 * For MMIO, get the physical address
-	 */
-	reg = of_get_property(node, "reg", NULL);
-	if (reg) {
-		addr = of_translate_address(node, reg);
-		if (addr != OF_BAD_ADDR) {
-			dev_set_name(&dev->dev, "%llx.%s",
-				     (unsigned long long)addr, node->name);
-			return;
-		}
-	}
-
-	/*
-	 * No BusID, use the node name and add a globally incremented
-	 * counter (and pray...)
-	 */
-	magic = atomic_add_return(1, &bus_no_reg_magic);
-	dev_set_name(&dev->dev, "%s.%d", node->name, magic - 1);
-}
-
-struct of_device *of_device_alloc(struct device_node *np,
-				  const char *bus_id,
-				  struct device *parent)
-{
-	struct of_device *dev;
-
-	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
-	if (!dev)
-		return NULL;
-
-	dev->dev.of_node = of_node_get(np);
-	dev->dev.dma_mask = &dev->archdata.dma_mask;
-	dev->dev.parent = parent;
-	dev->dev.release = of_release_dev;
-
-	if (bus_id)
-		dev_set_name(&dev->dev, "%s", bus_id);
-	else
-		of_device_make_bus_id(dev);
-
-	return dev;
-}
-EXPORT_SYMBOL(of_device_alloc);
diff --git a/drivers/of/platform.c b/drivers/of/platform.c
index d9c81e93bdd..ea87a3cf786 100644
--- a/drivers/of/platform.c
+++ b/drivers/of/platform.c
@@ -407,6 +407,93 @@ EXPORT_SYMBOL(of_unregister_driver);
  * mechanism for creating devices from device tree nodes.
  */
 
+/**
+ * of_device_make_bus_id - Use the device node data to assign a unique name
+ * @dev: pointer to device structure that is linked to a device tree node
+ *
+ * This routine will first try using either the dcr-reg or the reg property
+ * value to derive a unique name.  As a last resort it will use the node
+ * name followed by a unique number.
+ */
+static void of_device_make_bus_id(struct device *dev)
+{
+	static atomic_t bus_no_reg_magic;
+	struct device_node *node = dev->of_node;
+	const u32 *reg;
+	u64 addr;
+	int magic;
+
+#ifdef CONFIG_PPC_DCR
+	/*
+	 * If it's a DCR based device, use 'd' for native DCRs
+	 * and 'D' for MMIO DCRs.
+	 */
+	reg = of_get_property(node, "dcr-reg", NULL);
+	if (reg) {
+#ifdef CONFIG_PPC_DCR_NATIVE
+		dev_set_name(dev, "d%x.%s", *reg, node->name);
+#else /* CONFIG_PPC_DCR_NATIVE */
+		u64 addr = of_translate_dcr_address(node, *reg, NULL);
+		if (addr != OF_BAD_ADDR) {
+			dev_set_name(dev, "D%llx.%s",
+				     (unsigned long long)addr, node->name);
+			return;
+		}
+#endif /* !CONFIG_PPC_DCR_NATIVE */
+	}
+#endif /* CONFIG_PPC_DCR */
+
+	/*
+	 * For MMIO, get the physical address
+	 */
+	reg = of_get_property(node, "reg", NULL);
+	if (reg) {
+		addr = of_translate_address(node, reg);
+		if (addr != OF_BAD_ADDR) {
+			dev_set_name(dev, "%llx.%s",
+				     (unsigned long long)addr, node->name);
+			return;
+		}
+	}
+
+	/*
+	 * No BusID, use the node name and add a globally incremented
+	 * counter (and pray...)
+	 */
+	magic = atomic_add_return(1, &bus_no_reg_magic);
+	dev_set_name(dev, "%s.%d", node->name, magic - 1);
+}
+
+/**
+ * of_device_alloc - Allocate and initialize an of_device
+ * @np: device node to assign to device
+ * @bus_id: Name to assign to the device.  May be null to use default name.
+ * @parent: Parent device.
+ */
+struct of_device *of_device_alloc(struct device_node *np,
+				  const char *bus_id,
+				  struct device *parent)
+{
+	struct of_device *dev;
+
+	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+	if (!dev)
+		return NULL;
+
+	dev->dev.of_node = of_node_get(np);
+	dev->dev.dma_mask = &dev->archdata.dma_mask;
+	dev->dev.parent = parent;
+	dev->dev.release = of_release_dev;
+
+	if (bus_id)
+		dev_set_name(&dev->dev, "%s", bus_id);
+	else
+		of_device_make_bus_id(&dev->dev);
+
+	return dev;
+}
+EXPORT_SYMBOL(of_device_alloc);
+
 /**
  * of_platform_device_create - Alloc, initialize and register an of_device
  * @np: pointer to node to create device for
diff --git a/include/linux/of_platform.h b/include/linux/of_platform.h
index 4bbba41396e..a51fd30176a 100644
--- a/include/linux/of_platform.h
+++ b/include/linux/of_platform.h
@@ -60,6 +60,9 @@ static inline void of_unregister_platform_driver(struct of_platform_driver *drv)
 	of_unregister_driver(drv);
 }
 
+extern struct of_device *of_device_alloc(struct device_node *np,
+					 const char *bus_id,
+					 struct device *parent);
 #include <asm/of_platform.h>
 
 extern struct of_device *of_find_device_by_node(struct device_node *np);
-- 
cgit v1.2.3-70-g09d2


From a19e3da5bc5fc6c10ab73f310bea80f3845b4531 Mon Sep 17 00:00:00 2001
From: Anton Vorontsov <avorontsov@ru.mvista.com>
Date: Tue, 8 Jun 2010 07:48:16 -0600
Subject: of/gpio: Kill of_gpio_chip and add members directly to gpio_chip

The OF gpio infrastructure is great for describing GPIO connections within
the device tree.  However, using a GPIO binding still requires changes to
the gpio controller just to add an of_gpio structure.  In most cases, the
gpio controller doesn't actually need any special support and the simple
OF gpio mapping function is more than sufficient.  Additional, the current
scheme of using of_gpio_chip requires a convoluted scheme to maintain
1:1 mappings between of_gpio_chip and gpio_chip instances.

If the struct of_gpio_chip data members were moved into struct gpio_chip,
then it would simplify the processing of OF gpio bindings, and it would
make it trivial to use device tree OF connections on existing gpiolib
controller drivers.

This patch eliminates the of_gpio_chip structure and moves the relevant
fields into struct gpio_chip (conditional on CONFIG_OF_GPIO).  This move
simplifies the existing code and prepares for adding automatic device tree
support to existing drivers.

Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Anton Vorontsov <avorontsov@ru.mvista.com>
Cc: Grant Likely <grant.likely@secretlab.ca>
Cc: David Brownell <dbrownell@users.sourceforge.net>
Cc: Bill Gatliff <bgat@billgatliff.com>
Cc: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Jean Delvare <khali@linux-fr.org>
---
 arch/microblaze/kernel/reset.c                 | 12 +++----
 arch/powerpc/platforms/52xx/mpc52xx_gpio.c     | 32 ++++++++---------
 arch/powerpc/platforms/52xx/mpc52xx_gpt.c      | 29 ++++++++-------
 arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c | 15 ++++----
 arch/powerpc/platforms/86xx/gef_gpio.c         | 24 ++++++-------
 arch/powerpc/sysdev/cpm1.c                     | 12 +++----
 arch/powerpc/sysdev/cpm_common.c               |  6 ++--
 arch/powerpc/sysdev/mpc8xxx_gpio.c             |  6 ++--
 arch/powerpc/sysdev/ppc4xx_gpio.c              |  6 ++--
 arch/powerpc/sysdev/qe_lib/gpio.c              | 32 ++++++++---------
 arch/powerpc/sysdev/simple_gpio.c              |  6 ++--
 drivers/gpio/xilinx_gpio.c                     | 16 ++++-----
 drivers/of/gpio.c                              | 50 +++++++++++++-------------
 include/asm-generic/gpio.h                     | 12 +++++++
 include/linux/of_gpio.h                        | 29 +++------------
 15 files changed, 129 insertions(+), 158 deletions(-)

(limited to 'include')

diff --git a/arch/microblaze/kernel/reset.c b/arch/microblaze/kernel/reset.c
index a1721a33042..5476d3caf04 100644
--- a/arch/microblaze/kernel/reset.c
+++ b/arch/microblaze/kernel/reset.c
@@ -24,8 +24,8 @@ static int of_reset_gpio_handle(void)
 	int ret; /* variable which stored handle reset gpio pin */
 	struct device_node *root; /* root node */
 	struct device_node *gpio; /* gpio node */
-	struct of_gpio_chip *of_gc = NULL;
-	enum of_gpio_flags flags ;
+	struct gpio_chip *gc;
+	u32 flags;
 	const void *gpio_spec;
 
 	/* find out root node */
@@ -39,19 +39,19 @@ static int of_reset_gpio_handle(void)
 		goto err0;
 	}
 
-	of_gc = gpio->data;
-	if (!of_gc) {
+	gc = gpio->data;
+	if (!gc) {
 		pr_debug("%s: gpio controller %s isn't registered\n",
 			 root->full_name, gpio->full_name);
 		ret = -ENODEV;
 		goto err1;
 	}
 
-	ret = of_gc->xlate(of_gc, root, gpio_spec, &flags);
+	ret = gc->of_xlate(gc, root, gpio_spec, &flags);
 	if (ret < 0)
 		goto err1;
 
-	ret += of_gc->gc.base;
+	ret += gc->base;
 err1:
 	of_node_put(gpio);
 err0:
diff --git a/arch/powerpc/platforms/52xx/mpc52xx_gpio.c b/arch/powerpc/platforms/52xx/mpc52xx_gpio.c
index ca5305a5bd6..fd0912eeffe 100644
--- a/arch/powerpc/platforms/52xx/mpc52xx_gpio.c
+++ b/arch/powerpc/platforms/52xx/mpc52xx_gpio.c
@@ -152,21 +152,21 @@ static int __devinit mpc52xx_wkup_gpiochip_probe(struct of_device *ofdev,
 {
 	struct mpc52xx_gpiochip *chip;
 	struct mpc52xx_gpio_wkup __iomem *regs;
-	struct of_gpio_chip *ofchip;
+	struct gpio_chip *gc;
 	int ret;
 
 	chip = kzalloc(sizeof(*chip), GFP_KERNEL);
 	if (!chip)
 		return -ENOMEM;
 
-	ofchip = &chip->mmchip.of_gc;
+	gc = &chip->mmchip.gc;
 
-	ofchip->gpio_cells          = 2;
-	ofchip->gc.ngpio            = 8;
-	ofchip->gc.direction_input  = mpc52xx_wkup_gpio_dir_in;
-	ofchip->gc.direction_output = mpc52xx_wkup_gpio_dir_out;
-	ofchip->gc.get              = mpc52xx_wkup_gpio_get;
-	ofchip->gc.set              = mpc52xx_wkup_gpio_set;
+	gc->of_gpio_n_cells  = 2;
+	gc->ngpio            = 8;
+	gc->direction_input  = mpc52xx_wkup_gpio_dir_in;
+	gc->direction_output = mpc52xx_wkup_gpio_dir_out;
+	gc->get              = mpc52xx_wkup_gpio_get;
+	gc->set              = mpc52xx_wkup_gpio_set;
 
 	ret = of_mm_gpiochip_add(ofdev->dev.of_node, &chip->mmchip);
 	if (ret)
@@ -315,7 +315,7 @@ static int __devinit mpc52xx_simple_gpiochip_probe(struct of_device *ofdev,
 					const struct of_device_id *match)
 {
 	struct mpc52xx_gpiochip *chip;
-	struct of_gpio_chip *ofchip;
+	struct gpio_chip *gc;
 	struct mpc52xx_gpio __iomem *regs;
 	int ret;
 
@@ -323,14 +323,14 @@ static int __devinit mpc52xx_simple_gpiochip_probe(struct of_device *ofdev,
 	if (!chip)
 		return -ENOMEM;
 
-	ofchip = &chip->mmchip.of_gc;
+	gc = &chip->mmchip.gc;
 
-	ofchip->gpio_cells          = 2;
-	ofchip->gc.ngpio            = 32;
-	ofchip->gc.direction_input  = mpc52xx_simple_gpio_dir_in;
-	ofchip->gc.direction_output = mpc52xx_simple_gpio_dir_out;
-	ofchip->gc.get              = mpc52xx_simple_gpio_get;
-	ofchip->gc.set              = mpc52xx_simple_gpio_set;
+	gc->of_gpio_n_cells  = 2;
+	gc->ngpio            = 32;
+	gc->direction_input  = mpc52xx_simple_gpio_dir_in;
+	gc->direction_output = mpc52xx_simple_gpio_dir_out;
+	gc->get              = mpc52xx_simple_gpio_get;
+	gc->set              = mpc52xx_simple_gpio_set;
 
 	ret = of_mm_gpiochip_add(ofdev->dev.of_node, &chip->mmchip);
 	if (ret)
diff --git a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
index 46c93578cbf..3f2ee47f1d0 100644
--- a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
+++ b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
@@ -78,7 +78,7 @@ MODULE_LICENSE("GPL");
  * @dev: pointer to device structure
  * @regs: virtual address of GPT registers
  * @lock: spinlock to coordinate between different functions.
- * @of_gc: of_gpio_chip instance structure; used when GPIO is enabled
+ * @gc: gpio_chip instance structure; used when GPIO is enabled
  * @irqhost: Pointer to irq_host instance; used when IRQ mode is supported
  * @wdt_mode: only relevant for gpt0: bit 0 (MPC52xx_GPT_CAN_WDT) indicates
  *   if the gpt may be used as wdt, bit 1 (MPC52xx_GPT_IS_WDT) indicates
@@ -94,7 +94,7 @@ struct mpc52xx_gpt_priv {
 	u8 wdt_mode;
 
 #if defined(CONFIG_GPIOLIB)
-	struct of_gpio_chip of_gc;
+	struct gpio_chip gc;
 #endif
 };
 
@@ -280,7 +280,7 @@ mpc52xx_gpt_irq_setup(struct mpc52xx_gpt_priv *gpt, struct device_node *node)
 #if defined(CONFIG_GPIOLIB)
 static inline struct mpc52xx_gpt_priv *gc_to_mpc52xx_gpt(struct gpio_chip *gc)
 {
-	return container_of(to_of_gpio_chip(gc), struct mpc52xx_gpt_priv,of_gc);
+	return container_of(gc, struct mpc52xx_gpt_priv, gc);
 }
 
 static int mpc52xx_gpt_gpio_get(struct gpio_chip *gc, unsigned int gpio)
@@ -336,28 +336,27 @@ mpc52xx_gpt_gpio_setup(struct mpc52xx_gpt_priv *gpt, struct device_node *node)
 	if (!of_find_property(node, "gpio-controller", NULL))
 		return;
 
-	gpt->of_gc.gc.label = kstrdup(node->full_name, GFP_KERNEL);
-	if (!gpt->of_gc.gc.label) {
+	gpt->gc.label = kstrdup(node->full_name, GFP_KERNEL);
+	if (!gpt->gc.label) {
 		dev_err(gpt->dev, "out of memory\n");
 		return;
 	}
 
-	gpt->of_gc.gpio_cells = 2;
-	gpt->of_gc.gc.ngpio = 1;
-	gpt->of_gc.gc.direction_input  = mpc52xx_gpt_gpio_dir_in;
-	gpt->of_gc.gc.direction_output = mpc52xx_gpt_gpio_dir_out;
-	gpt->of_gc.gc.get = mpc52xx_gpt_gpio_get;
-	gpt->of_gc.gc.set = mpc52xx_gpt_gpio_set;
-	gpt->of_gc.gc.base = -1;
-	gpt->of_gc.xlate = of_gpio_simple_xlate;
-	node->data = &gpt->of_gc;
+	gpt->gc.ngpio = 1;
+	gpt->gc.direction_input  = mpc52xx_gpt_gpio_dir_in;
+	gpt->gc.direction_output = mpc52xx_gpt_gpio_dir_out;
+	gpt->gc.get = mpc52xx_gpt_gpio_get;
+	gpt->gc.set = mpc52xx_gpt_gpio_set;
+	gpt->gc.base = -1;
+	gpt->gc.of_gpio_n_cells = 2;
+	gpt->gc.of_xlate = of_gpio_simple_xlate;
 	of_node_get(node);
 
 	/* Setup external pin in GPIO mode */
 	clrsetbits_be32(&gpt->regs->mode, MPC52xx_GPT_MODE_MS_MASK,
 			MPC52xx_GPT_MODE_MS_GPIO);
 
-	rc = gpiochip_add(&gpt->of_gc.gc);
+	rc = gpiochip_add(&gpt->gc);
 	if (rc)
 		dev_err(gpt->dev, "gpiochip_add() failed; rc=%i\n", rc);
 
diff --git a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c
index d119a7c1c17..e49f4bd2f99 100644
--- a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c
+++ b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c
@@ -37,7 +37,7 @@ struct mcu {
 	struct mutex lock;
 	struct device_node *np;
 	struct i2c_client *client;
-	struct of_gpio_chip of_gc;
+	struct gpio_chip gc;
 	u8 reg_ctrl;
 };
 
@@ -56,8 +56,7 @@ static void mcu_power_off(void)
 
 static void mcu_gpio_set(struct gpio_chip *gc, unsigned int gpio, int val)
 {
-	struct of_gpio_chip *of_gc = to_of_gpio_chip(gc);
-	struct mcu *mcu = container_of(of_gc, struct mcu, of_gc);
+	struct mcu *mcu = container_of(gc, struct mcu, gc);
 	u8 bit = 1 << (4 + gpio);
 
 	mutex_lock(&mcu->lock);
@@ -79,8 +78,7 @@ static int mcu_gpio_dir_out(struct gpio_chip *gc, unsigned int gpio, int val)
 static int mcu_gpiochip_add(struct mcu *mcu)
 {
 	struct device_node *np;
-	struct of_gpio_chip *of_gc = &mcu->of_gc;
-	struct gpio_chip *gc = &of_gc->gc;
+	struct gpio_chip *gc = &mcu->gc;
 	int ret;
 
 	np = of_find_compatible_node(NULL, NULL, "fsl,mcu-mpc8349emitx");
@@ -94,10 +92,9 @@ static int mcu_gpiochip_add(struct mcu *mcu)
 	gc->base = -1;
 	gc->set = mcu_gpio_set;
 	gc->direction_output = mcu_gpio_dir_out;
-	of_gc->gpio_cells = 2;
-	of_gc->xlate = of_gpio_simple_xlate;
+	gc->of_gpio_n_cells = 2;
+	gc->of_xlate = of_gpio_simple_xlate;
 
-	np->data = of_gc;
 	mcu->np = np;
 
 	/*
@@ -114,7 +111,7 @@ static int mcu_gpiochip_remove(struct mcu *mcu)
 {
 	int ret;
 
-	ret = gpiochip_remove(&mcu->of_gc.gc);
+	ret = gpiochip_remove(&mcu->gc);
 	if (ret)
 		return ret;
 	of_node_put(mcu->np);
diff --git a/arch/powerpc/platforms/86xx/gef_gpio.c b/arch/powerpc/platforms/86xx/gef_gpio.c
index b8cb08dbd89..4ff7b1e7bba 100644
--- a/arch/powerpc/platforms/86xx/gef_gpio.c
+++ b/arch/powerpc/platforms/86xx/gef_gpio.c
@@ -118,12 +118,12 @@ static int __init gef_gpio_init(void)
 		}
 
 		/* Setup pointers to chip functions */
-		gef_gpio_chip->of_gc.gpio_cells = 2;
-		gef_gpio_chip->of_gc.gc.ngpio = 19;
-		gef_gpio_chip->of_gc.gc.direction_input = gef_gpio_dir_in;
-		gef_gpio_chip->of_gc.gc.direction_output = gef_gpio_dir_out;
-		gef_gpio_chip->of_gc.gc.get = gef_gpio_get;
-		gef_gpio_chip->of_gc.gc.set = gef_gpio_set;
+		gef_gpio_chip->gc.of_gpio_n_cells = 2;
+		gef_gpio_chip->gc.ngpio = 19;
+		gef_gpio_chip->gc.direction_input = gef_gpio_dir_in;
+		gef_gpio_chip->gc.direction_output = gef_gpio_dir_out;
+		gef_gpio_chip->gc.get = gef_gpio_get;
+		gef_gpio_chip->gc.set = gef_gpio_set;
 
 		/* This function adds a memory mapped GPIO chip */
 		retval = of_mm_gpiochip_add(np, gef_gpio_chip);
@@ -146,12 +146,12 @@ static int __init gef_gpio_init(void)
 		}
 
 		/* Setup pointers to chip functions */
-		gef_gpio_chip->of_gc.gpio_cells = 2;
-		gef_gpio_chip->of_gc.gc.ngpio = 6;
-		gef_gpio_chip->of_gc.gc.direction_input = gef_gpio_dir_in;
-		gef_gpio_chip->of_gc.gc.direction_output = gef_gpio_dir_out;
-		gef_gpio_chip->of_gc.gc.get = gef_gpio_get;
-		gef_gpio_chip->of_gc.gc.set = gef_gpio_set;
+		gef_gpio_chip->gc.of_gpio_n_cells = 2;
+		gef_gpio_chip->gc.ngpio = 6;
+		gef_gpio_chip->gc.direction_input = gef_gpio_dir_in;
+		gef_gpio_chip->gc.direction_output = gef_gpio_dir_out;
+		gef_gpio_chip->gc.get = gef_gpio_get;
+		gef_gpio_chip->gc.set = gef_gpio_set;
 
 		/* This function adds a memory mapped GPIO chip */
 		retval = of_mm_gpiochip_add(np, gef_gpio_chip);
diff --git a/arch/powerpc/sysdev/cpm1.c b/arch/powerpc/sysdev/cpm1.c
index 8d103ca6d6a..d5cf7d4ccf8 100644
--- a/arch/powerpc/sysdev/cpm1.c
+++ b/arch/powerpc/sysdev/cpm1.c
@@ -621,7 +621,6 @@ int cpm1_gpiochip_add16(struct device_node *np)
 {
 	struct cpm1_gpio16_chip *cpm1_gc;
 	struct of_mm_gpio_chip *mm_gc;
-	struct of_gpio_chip *of_gc;
 	struct gpio_chip *gc;
 
 	cpm1_gc = kzalloc(sizeof(*cpm1_gc), GFP_KERNEL);
@@ -631,11 +630,10 @@ int cpm1_gpiochip_add16(struct device_node *np)
 	spin_lock_init(&cpm1_gc->lock);
 
 	mm_gc = &cpm1_gc->mm_gc;
-	of_gc = &mm_gc->of_gc;
-	gc = &of_gc->gc;
+	gc = &mm_gc->gc;
 
 	mm_gc->save_regs = cpm1_gpio16_save_regs;
-	of_gc->gpio_cells = 2;
+	gc->of_gpio_n_cells = 2;
 	gc->ngpio = 16;
 	gc->direction_input = cpm1_gpio16_dir_in;
 	gc->direction_output = cpm1_gpio16_dir_out;
@@ -745,7 +743,6 @@ int cpm1_gpiochip_add32(struct device_node *np)
 {
 	struct cpm1_gpio32_chip *cpm1_gc;
 	struct of_mm_gpio_chip *mm_gc;
-	struct of_gpio_chip *of_gc;
 	struct gpio_chip *gc;
 
 	cpm1_gc = kzalloc(sizeof(*cpm1_gc), GFP_KERNEL);
@@ -755,11 +752,10 @@ int cpm1_gpiochip_add32(struct device_node *np)
 	spin_lock_init(&cpm1_gc->lock);
 
 	mm_gc = &cpm1_gc->mm_gc;
-	of_gc = &mm_gc->of_gc;
-	gc = &of_gc->gc;
+	gc = &mm_gc->gc;
 
 	mm_gc->save_regs = cpm1_gpio32_save_regs;
-	of_gc->gpio_cells = 2;
+	gc->of_gpio_n_cells = 2;
 	gc->ngpio = 32;
 	gc->direction_input = cpm1_gpio32_dir_in;
 	gc->direction_output = cpm1_gpio32_dir_out;
diff --git a/arch/powerpc/sysdev/cpm_common.c b/arch/powerpc/sysdev/cpm_common.c
index 88b9812c854..67e9b47dcf8 100644
--- a/arch/powerpc/sysdev/cpm_common.c
+++ b/arch/powerpc/sysdev/cpm_common.c
@@ -325,7 +325,6 @@ int cpm2_gpiochip_add32(struct device_node *np)
 {
 	struct cpm2_gpio32_chip *cpm2_gc;
 	struct of_mm_gpio_chip *mm_gc;
-	struct of_gpio_chip *of_gc;
 	struct gpio_chip *gc;
 
 	cpm2_gc = kzalloc(sizeof(*cpm2_gc), GFP_KERNEL);
@@ -335,11 +334,10 @@ int cpm2_gpiochip_add32(struct device_node *np)
 	spin_lock_init(&cpm2_gc->lock);
 
 	mm_gc = &cpm2_gc->mm_gc;
-	of_gc = &mm_gc->of_gc;
-	gc = &of_gc->gc;
+	gc = &mm_gc->gc;
 
 	mm_gc->save_regs = cpm2_gpio32_save_regs;
-	of_gc->gpio_cells = 2;
+	gc->of_gpio_n_cells = 2;
 	gc->ngpio = 32;
 	gc->direction_input = cpm2_gpio32_dir_in;
 	gc->direction_output = cpm2_gpio32_dir_out;
diff --git a/arch/powerpc/sysdev/mpc8xxx_gpio.c b/arch/powerpc/sysdev/mpc8xxx_gpio.c
index 83f519655fa..ec8fcd42101 100644
--- a/arch/powerpc/sysdev/mpc8xxx_gpio.c
+++ b/arch/powerpc/sysdev/mpc8xxx_gpio.c
@@ -257,7 +257,6 @@ static void __init mpc8xxx_add_controller(struct device_node *np)
 {
 	struct mpc8xxx_gpio_chip *mpc8xxx_gc;
 	struct of_mm_gpio_chip *mm_gc;
-	struct of_gpio_chip *of_gc;
 	struct gpio_chip *gc;
 	unsigned hwirq;
 	int ret;
@@ -271,11 +270,10 @@ static void __init mpc8xxx_add_controller(struct device_node *np)
 	spin_lock_init(&mpc8xxx_gc->lock);
 
 	mm_gc = &mpc8xxx_gc->mm_gc;
-	of_gc = &mm_gc->of_gc;
-	gc = &of_gc->gc;
+	gc = &mm_gc->gc;
 
 	mm_gc->save_regs = mpc8xxx_gpio_save_regs;
-	of_gc->gpio_cells = 2;
+	gc->of_gpio_n_cells = 2;
 	gc->ngpio = MPC8XXX_GPIO_PINS;
 	gc->direction_input = mpc8xxx_gpio_dir_in;
 	gc->direction_output = mpc8xxx_gpio_dir_out;
diff --git a/arch/powerpc/sysdev/ppc4xx_gpio.c b/arch/powerpc/sysdev/ppc4xx_gpio.c
index 3812fc366be..42e7a5eea66 100644
--- a/arch/powerpc/sysdev/ppc4xx_gpio.c
+++ b/arch/powerpc/sysdev/ppc4xx_gpio.c
@@ -181,7 +181,6 @@ static int __init ppc4xx_add_gpiochips(void)
 		int ret;
 		struct ppc4xx_gpio_chip *ppc4xx_gc;
 		struct of_mm_gpio_chip *mm_gc;
-		struct of_gpio_chip *of_gc;
 		struct gpio_chip *gc;
 
 		ppc4xx_gc = kzalloc(sizeof(*ppc4xx_gc), GFP_KERNEL);
@@ -193,10 +192,9 @@ static int __init ppc4xx_add_gpiochips(void)
 		spin_lock_init(&ppc4xx_gc->lock);
 
 		mm_gc = &ppc4xx_gc->mm_gc;
-		of_gc = &mm_gc->of_gc;
-		gc = &of_gc->gc;
+		gc = &mm_gc->gc;
 
-		of_gc->gpio_cells = 2;
+		gc->of_gpio_n_cells = 2;
 		gc->ngpio = 32;
 		gc->direction_input = ppc4xx_gpio_dir_in;
 		gc->direction_output = ppc4xx_gpio_dir_out;
diff --git a/arch/powerpc/sysdev/qe_lib/gpio.c b/arch/powerpc/sysdev/qe_lib/gpio.c
index dc8f8d61807..194478c2f4b 100644
--- a/arch/powerpc/sysdev/qe_lib/gpio.c
+++ b/arch/powerpc/sysdev/qe_lib/gpio.c
@@ -138,8 +138,8 @@ struct qe_pin {
 struct qe_pin *qe_pin_request(struct device_node *np, int index)
 {
 	struct qe_pin *qe_pin;
-	struct device_node *gc;
-	struct of_gpio_chip *of_gc = NULL;
+	struct device_node *gpio_np;
+	struct gpio_chip *gc;
 	struct of_mm_gpio_chip *mm_gc;
 	struct qe_gpio_chip *qe_gc;
 	int err;
@@ -155,40 +155,40 @@ struct qe_pin *qe_pin_request(struct device_node *np, int index)
 	}
 
 	err = of_parse_phandles_with_args(np, "gpios", "#gpio-cells", index,
-					  &gc, &gpio_spec);
+					  &gpio_np, &gpio_spec);
 	if (err) {
 		pr_debug("%s: can't parse gpios property\n", __func__);
 		goto err0;
 	}
 
-	if (!of_device_is_compatible(gc, "fsl,mpc8323-qe-pario-bank")) {
+	if (!of_device_is_compatible(gpio_np, "fsl,mpc8323-qe-pario-bank")) {
 		pr_debug("%s: tried to get a non-qe pin\n", __func__);
 		err = -EINVAL;
 		goto err1;
 	}
 
-	of_gc = gc->data;
-	if (!of_gc) {
+	gc = gpio_np->data;
+	if (!gc) {
 		pr_debug("%s: gpio controller %s isn't registered\n",
-			 np->full_name, gc->full_name);
+			 np->full_name, gpio_np->full_name);
 		err = -ENODEV;
 		goto err1;
 	}
 
-	gpio_cells = of_get_property(gc, "#gpio-cells", &size);
+	gpio_cells = of_get_property(gpio_np, "#gpio-cells", &size);
 	if (!gpio_cells || size != sizeof(*gpio_cells) ||
-			*gpio_cells != of_gc->gpio_cells) {
+			*gpio_cells != gc->of_gpio_n_cells) {
 		pr_debug("%s: wrong #gpio-cells for %s\n",
-			 np->full_name, gc->full_name);
+			 np->full_name, gpio_np->full_name);
 		err = -EINVAL;
 		goto err1;
 	}
 
-	err = of_gc->xlate(of_gc, np, gpio_spec, NULL);
+	err = gc->of_xlate(gc, np, gpio_spec, NULL);
 	if (err < 0)
 		goto err1;
 
-	mm_gc = to_of_mm_gpio_chip(&of_gc->gc);
+	mm_gc = to_of_mm_gpio_chip(gc);
 	qe_gc = to_qe_gpio_chip(mm_gc);
 
 	spin_lock_irqsave(&qe_gc->lock, flags);
@@ -206,7 +206,7 @@ struct qe_pin *qe_pin_request(struct device_node *np, int index)
 	if (!err)
 		return qe_pin;
 err1:
-	of_node_put(gc);
+	of_node_put(gpio_np);
 err0:
 	kfree(qe_pin);
 	pr_debug("%s failed with status %d\n", __func__, err);
@@ -307,7 +307,6 @@ static int __init qe_add_gpiochips(void)
 		int ret;
 		struct qe_gpio_chip *qe_gc;
 		struct of_mm_gpio_chip *mm_gc;
-		struct of_gpio_chip *of_gc;
 		struct gpio_chip *gc;
 
 		qe_gc = kzalloc(sizeof(*qe_gc), GFP_KERNEL);
@@ -319,11 +318,10 @@ static int __init qe_add_gpiochips(void)
 		spin_lock_init(&qe_gc->lock);
 
 		mm_gc = &qe_gc->mm_gc;
-		of_gc = &mm_gc->of_gc;
-		gc = &of_gc->gc;
+		gc = &mm_gc->gc;
 
 		mm_gc->save_regs = qe_gpio_save_regs;
-		of_gc->gpio_cells = 2;
+		gc->of_gpio_n_cells = 2;
 		gc->ngpio = QE_PIO_PINS;
 		gc->direction_input = qe_gpio_dir_in;
 		gc->direction_output = qe_gpio_dir_out;
diff --git a/arch/powerpc/sysdev/simple_gpio.c b/arch/powerpc/sysdev/simple_gpio.c
index d5fb173e588..b7559aa0c16 100644
--- a/arch/powerpc/sysdev/simple_gpio.c
+++ b/arch/powerpc/sysdev/simple_gpio.c
@@ -91,7 +91,6 @@ static int __init u8_simple_gpiochip_add(struct device_node *np)
 	int ret;
 	struct u8_gpio_chip *u8_gc;
 	struct of_mm_gpio_chip *mm_gc;
-	struct of_gpio_chip *of_gc;
 	struct gpio_chip *gc;
 
 	u8_gc = kzalloc(sizeof(*u8_gc), GFP_KERNEL);
@@ -101,11 +100,10 @@ static int __init u8_simple_gpiochip_add(struct device_node *np)
 	spin_lock_init(&u8_gc->lock);
 
 	mm_gc = &u8_gc->mm_gc;
-	of_gc = &mm_gc->of_gc;
-	gc = &of_gc->gc;
+	gc = &mm_gc->gc;
 
 	mm_gc->save_regs = u8_gpio_save_regs;
-	of_gc->gpio_cells = 2;
+	gc->of_gpio_n_cells = 2;
 	gc->ngpio = 8;
 	gc->direction_input = u8_gpio_dir_in;
 	gc->direction_output = u8_gpio_dir_out;
diff --git a/drivers/gpio/xilinx_gpio.c b/drivers/gpio/xilinx_gpio.c
index b8fa65b5bfc..2993c40b48e 100644
--- a/drivers/gpio/xilinx_gpio.c
+++ b/drivers/gpio/xilinx_gpio.c
@@ -161,14 +161,12 @@ static void xgpio_save_regs(struct of_mm_gpio_chip *mm_gc)
 static int __devinit xgpio_of_probe(struct device_node *np)
 {
 	struct xgpio_instance *chip;
-	struct of_gpio_chip *ofchip;
 	int status = 0;
 	const u32 *tree_info;
 
 	chip = kzalloc(sizeof(*chip), GFP_KERNEL);
 	if (!chip)
 		return -ENOMEM;
-	ofchip = &chip->mmchip.of_gc;
 
 	/* Update GPIO state shadow register with default value */
 	tree_info = of_get_property(np, "xlnx,dout-default", NULL);
@@ -182,21 +180,21 @@ static int __devinit xgpio_of_probe(struct device_node *np)
 		chip->gpio_dir = *tree_info;
 
 	/* Check device node and parent device node for device width */
-	ofchip->gc.ngpio = 32; /* By default assume full GPIO controller */
+	chip->mmchip.gc.ngpio = 32; /* By default assume full GPIO controller */
 	tree_info = of_get_property(np, "xlnx,gpio-width", NULL);
 	if (!tree_info)
 		tree_info = of_get_property(np->parent,
 					    "xlnx,gpio-width", NULL);
 	if (tree_info)
-		ofchip->gc.ngpio = *tree_info;
+		chip->mmchip.gc.ngpio = *tree_info;
 
 	spin_lock_init(&chip->gpio_lock);
 
-	ofchip->gpio_cells = 2;
-	ofchip->gc.direction_input = xgpio_dir_in;
-	ofchip->gc.direction_output = xgpio_dir_out;
-	ofchip->gc.get = xgpio_get;
-	ofchip->gc.set = xgpio_set;
+	chip->mmchip.gc.of_gpio_n_cells = 2;
+	chip->mmchip.gc.direction_input = xgpio_dir_in;
+	chip->mmchip.gc.direction_output = xgpio_dir_out;
+	chip->mmchip.gc.get = xgpio_get;
+	chip->mmchip.gc.set = xgpio_set;
 
 	chip->mmchip.save_regs = xgpio_save_regs;
 
diff --git a/drivers/of/gpio.c b/drivers/of/gpio.c
index a1b31a4abae..fde53a3a45a 100644
--- a/drivers/of/gpio.c
+++ b/drivers/of/gpio.c
@@ -33,32 +33,32 @@ int of_get_gpio_flags(struct device_node *np, int index,
 		      enum of_gpio_flags *flags)
 {
 	int ret;
-	struct device_node *gc;
-	struct of_gpio_chip *of_gc = NULL;
+	struct device_node *gpio_np;
+	struct gpio_chip *gc;
 	int size;
 	const void *gpio_spec;
 	const __be32 *gpio_cells;
 
 	ret = of_parse_phandles_with_args(np, "gpios", "#gpio-cells", index,
-					  &gc, &gpio_spec);
+					  &gpio_np, &gpio_spec);
 	if (ret) {
 		pr_debug("%s: can't parse gpios property\n", __func__);
 		goto err0;
 	}
 
-	of_gc = gc->data;
-	if (!of_gc) {
+	gc = gpio_np->data;
+	if (!gc) {
 		pr_debug("%s: gpio controller %s isn't registered\n",
-			 np->full_name, gc->full_name);
+			 np->full_name, gpio_np->full_name);
 		ret = -ENODEV;
 		goto err1;
 	}
 
-	gpio_cells = of_get_property(gc, "#gpio-cells", &size);
+	gpio_cells = of_get_property(gpio_np, "#gpio-cells", &size);
 	if (!gpio_cells || size != sizeof(*gpio_cells) ||
-			be32_to_cpup(gpio_cells) != of_gc->gpio_cells) {
+			be32_to_cpup(gpio_cells) != gc->of_gpio_n_cells) {
 		pr_debug("%s: wrong #gpio-cells for %s\n",
-			 np->full_name, gc->full_name);
+			 np->full_name, gpio_np->full_name);
 		ret = -EINVAL;
 		goto err1;
 	}
@@ -67,13 +67,13 @@ int of_get_gpio_flags(struct device_node *np, int index,
 	if (flags)
 		*flags = 0;
 
-	ret = of_gc->xlate(of_gc, np, gpio_spec, flags);
+	ret = gc->of_xlate(gc, np, gpio_spec, flags);
 	if (ret < 0)
 		goto err1;
 
-	ret += of_gc->gc.base;
+	ret += gc->base;
 err1:
-	of_node_put(gc);
+	of_node_put(gpio_np);
 err0:
 	pr_debug("%s exited with status %d\n", __func__, ret);
 	return ret;
@@ -116,7 +116,7 @@ EXPORT_SYMBOL(of_gpio_count);
 
 /**
  * of_gpio_simple_xlate - translate gpio_spec to the GPIO number and flags
- * @of_gc:	pointer to the of_gpio_chip structure
+ * @gc:		pointer to the gpio_chip structure
  * @np:		device node of the GPIO chip
  * @gpio_spec:	gpio specifier as found in the device tree
  * @flags:	a flags pointer to fill in
@@ -125,8 +125,8 @@ EXPORT_SYMBOL(of_gpio_count);
  * gpio chips. This function performs only one sanity check: whether gpio
  * is less than ngpios (that is specified in the gpio_chip).
  */
-int of_gpio_simple_xlate(struct of_gpio_chip *of_gc, struct device_node *np,
-			 const void *gpio_spec, enum of_gpio_flags *flags)
+int of_gpio_simple_xlate(struct gpio_chip *gc, struct device_node *np,
+			 const void *gpio_spec, u32 *flags)
 {
 	const __be32 *gpio = gpio_spec;
 	const u32 n = be32_to_cpup(gpio);
@@ -137,12 +137,12 @@ int of_gpio_simple_xlate(struct of_gpio_chip *of_gc, struct device_node *np,
 	 * number and the flags from a single gpio cell -- this is possible,
 	 * but not recommended).
 	 */
-	if (of_gc->gpio_cells < 2) {
+	if (gc->of_gpio_n_cells < 2) {
 		WARN_ON(1);
 		return -EINVAL;
 	}
 
-	if (n > of_gc->gc.ngpio)
+	if (n > gc->ngpio)
 		return -EINVAL;
 
 	if (flags)
@@ -161,10 +161,8 @@ EXPORT_SYMBOL(of_gpio_simple_xlate);
  *
  * 1) In the gpio_chip structure:
  *    - all the callbacks
- *
- * 2) In the of_gpio_chip structure:
- *    - gpio_cells
- *    - xlate callback (optional)
+ *    - of_gpio_n_cells
+ *    - of_xlate callback (optional)
  *
  * 3) In the of_mm_gpio_chip structure:
  *    - save_regs callback (optional)
@@ -177,8 +175,7 @@ int of_mm_gpiochip_add(struct device_node *np,
 		       struct of_mm_gpio_chip *mm_gc)
 {
 	int ret = -ENOMEM;
-	struct of_gpio_chip *of_gc = &mm_gc->of_gc;
-	struct gpio_chip *gc = &of_gc->gc;
+	struct gpio_chip *gc = &mm_gc->gc;
 
 	gc->label = kstrdup(np->full_name, GFP_KERNEL);
 	if (!gc->label)
@@ -190,13 +187,14 @@ int of_mm_gpiochip_add(struct device_node *np,
 
 	gc->base = -1;
 
-	if (!of_gc->xlate)
-		of_gc->xlate = of_gpio_simple_xlate;
+	if (!gc->of_xlate)
+		gc->of_xlate = of_gpio_simple_xlate;
 
 	if (mm_gc->save_regs)
 		mm_gc->save_regs(mm_gc);
 
-	np->data = of_gc;
+	np->data = &mm_gc->gc;
+	mm_gc->gc.of_node = np;
 
 	ret = gpiochip_add(gc);
 	if (ret)
diff --git a/include/asm-generic/gpio.h b/include/asm-generic/gpio.h
index 4f3d75e1ad3..af2544ef0b5 100644
--- a/include/asm-generic/gpio.h
+++ b/include/asm-generic/gpio.h
@@ -31,6 +31,7 @@ static inline int gpio_is_valid(int number)
 struct device;
 struct seq_file;
 struct module;
+struct device_node;
 
 /**
  * struct gpio_chip - abstract a GPIO controller
@@ -106,6 +107,17 @@ struct gpio_chip {
 	const char		*const *names;
 	unsigned		can_sleep:1;
 	unsigned		exported:1;
+
+#if defined(CONFIG_OF_GPIO)
+	/*
+	 * If CONFIG_OF is enabled, then all GPIO controllers described in the
+	 * device tree automatically may have an OF translation
+	 */
+	struct device_node *of_node;
+	int of_gpio_n_cells;
+	int (*of_xlate)(struct gpio_chip *gc, struct device_node *np,
+		        const void *gpio_spec, u32 *flags);
+#endif
 };
 
 extern const char *gpiochip_is_requested(struct gpio_chip *chip,
diff --git a/include/linux/of_gpio.h b/include/linux/of_gpio.h
index fc2472c3c25..460d6810c5e 100644
--- a/include/linux/of_gpio.h
+++ b/include/linux/of_gpio.h
@@ -32,35 +32,18 @@ enum of_gpio_flags {
 
 #ifdef CONFIG_OF_GPIO
 
-/*
- * Generic OF GPIO chip
- */
-struct of_gpio_chip {
-	struct gpio_chip gc;
-	int gpio_cells;
-	int (*xlate)(struct of_gpio_chip *of_gc, struct device_node *np,
-		     const void *gpio_spec, enum of_gpio_flags *flags);
-};
-
-static inline struct of_gpio_chip *to_of_gpio_chip(struct gpio_chip *gc)
-{
-	return container_of(gc, struct of_gpio_chip, gc);
-}
-
 /*
  * OF GPIO chip for memory mapped banks
  */
 struct of_mm_gpio_chip {
-	struct of_gpio_chip of_gc;
+	struct gpio_chip gc;
 	void (*save_regs)(struct of_mm_gpio_chip *mm_gc);
 	void __iomem *regs;
 };
 
 static inline struct of_mm_gpio_chip *to_of_mm_gpio_chip(struct gpio_chip *gc)
 {
-	struct of_gpio_chip *of_gc = to_of_gpio_chip(gc);
-
-	return container_of(of_gc, struct of_mm_gpio_chip, of_gc);
+	return container_of(gc, struct of_mm_gpio_chip, gc);
 }
 
 extern int of_get_gpio_flags(struct device_node *np, int index,
@@ -69,11 +52,9 @@ extern unsigned int of_gpio_count(struct device_node *np);
 
 extern int of_mm_gpiochip_add(struct device_node *np,
 			      struct of_mm_gpio_chip *mm_gc);
-extern int of_gpio_simple_xlate(struct of_gpio_chip *of_gc,
-				struct device_node *np,
-				const void *gpio_spec,
-				enum of_gpio_flags *flags);
-#else
+extern int of_gpio_simple_xlate(struct gpio_chip *gc, struct device_node *np,
+				const void *gpio_spec, u32 *flags);
+#else /* CONFIG_OF_GPIO */
 
 /* Drivers may not strictly depend on the GPIO support, so let them link. */
 static inline int of_get_gpio_flags(struct device_node *np, int index,
-- 
cgit v1.2.3-70-g09d2


From 594fa265e084073443390c5b93d5410fd28e9bcd Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@secretlab.ca>
Date: Tue, 8 Jun 2010 07:48:16 -0600
Subject: of/gpio: stop using device_node data pointer to find gpio_chip

Currently the kernel uses the struct device_node.data pointer to resolve
a struct gpio_chip pointer from a device tree node.  However, the .data
member doesn't provide any type checking and there aren't any rules
enforced on what it should be used for.  There's no guarantee that the
data stored in it actually points to an gpio_chip pointer.

Instead of relying on the .data pointer, this patch modifies the code
to add a lookup function which scans through the registered gpio_chips
and returns the gpio_chip that has a pointer to the specified
device_node.

Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: Anton Vorontsov <avorontsov@ru.mvista.com>
CC: Grant Likely <grant.likely@secretlab.ca>
CC: David Brownell <dbrownell@users.sourceforge.net>
CC: Bill Gatliff <bgat@billgatliff.com>
CC: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
CC: Benjamin Herrenschmidt <benh@kernel.crashing.org>
CC: Jean Delvare <khali@linux-fr.org>
CC: linux-kernel@vger.kernel.org
CC: devicetree-discuss@lists.ozlabs.org
---
 arch/microblaze/kernel/reset.c                 |  2 +-
 arch/powerpc/platforms/52xx/mpc52xx_gpt.c      |  1 +
 arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c | 23 +++---------------
 arch/powerpc/sysdev/qe_lib/gpio.c              |  2 +-
 drivers/gpio/gpiolib.c                         | 32 ++++++++++++++++++++++++++
 drivers/of/gpio.c                              | 15 +++++++++---
 include/asm-generic/gpio.h                     |  3 +++
 include/linux/of_gpio.h                        |  3 +++
 8 files changed, 56 insertions(+), 25 deletions(-)

(limited to 'include')

diff --git a/arch/microblaze/kernel/reset.c b/arch/microblaze/kernel/reset.c
index 5476d3caf04..bd8ccab5cef 100644
--- a/arch/microblaze/kernel/reset.c
+++ b/arch/microblaze/kernel/reset.c
@@ -39,7 +39,7 @@ static int of_reset_gpio_handle(void)
 		goto err0;
 	}
 
-	gc = gpio->data;
+	gc = of_node_to_gpiochip(gpio);
 	if (!gc) {
 		pr_debug("%s: gpio controller %s isn't registered\n",
 			 root->full_name, gpio->full_name);
diff --git a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
index 3f2ee47f1d0..6e82bd27132 100644
--- a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
+++ b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
@@ -350,6 +350,7 @@ mpc52xx_gpt_gpio_setup(struct mpc52xx_gpt_priv *gpt, struct device_node *node)
 	gpt->gc.base = -1;
 	gpt->gc.of_gpio_n_cells = 2;
 	gpt->gc.of_xlate = of_gpio_simple_xlate;
+	gpt->gc.of_node = node;
 	of_node_get(node);
 
 	/* Setup external pin in GPIO mode */
diff --git a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c
index e49f4bd2f99..f0dbace6185 100644
--- a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c
+++ b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c
@@ -35,7 +35,6 @@
 
 struct mcu {
 	struct mutex lock;
-	struct device_node *np;
 	struct i2c_client *client;
 	struct gpio_chip gc;
 	u8 reg_ctrl;
@@ -79,7 +78,6 @@ static int mcu_gpiochip_add(struct mcu *mcu)
 {
 	struct device_node *np;
 	struct gpio_chip *gc = &mcu->gc;
-	int ret;
 
 	np = of_find_compatible_node(NULL, NULL, "fsl,mcu-mpc8349emitx");
 	if (!np)
@@ -94,29 +92,14 @@ static int mcu_gpiochip_add(struct mcu *mcu)
 	gc->direction_output = mcu_gpio_dir_out;
 	gc->of_gpio_n_cells = 2;
 	gc->of_xlate = of_gpio_simple_xlate;
+	gc->of_node = np;
 
-	mcu->np = np;
-
-	/*
-	 * We don't want to lose the node, its ->data and ->full_name...
-	 * So, if succeeded, we don't put the node here.
-	 */
-	ret = gpiochip_add(gc);
-	if (ret)
-		of_node_put(np);
-	return ret;
+	return gpiochip_add(gc);
 }
 
 static int mcu_gpiochip_remove(struct mcu *mcu)
 {
-	int ret;
-
-	ret = gpiochip_remove(&mcu->gc);
-	if (ret)
-		return ret;
-	of_node_put(mcu->np);
-
-	return 0;
+	return gpiochip_remove(&mcu->gc);
 }
 
 static int __devinit mcu_probe(struct i2c_client *client,
diff --git a/arch/powerpc/sysdev/qe_lib/gpio.c b/arch/powerpc/sysdev/qe_lib/gpio.c
index 194478c2f4b..32e9440010a 100644
--- a/arch/powerpc/sysdev/qe_lib/gpio.c
+++ b/arch/powerpc/sysdev/qe_lib/gpio.c
@@ -167,7 +167,7 @@ struct qe_pin *qe_pin_request(struct device_node *np, int index)
 		goto err1;
 	}
 
-	gc = gpio_np->data;
+	gc = of_node_to_gpiochip(gpio_np);
 	if (!gc) {
 		pr_debug("%s: gpio controller %s isn't registered\n",
 			 np->full_name, gpio_np->full_name);
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 713ca0e37f2..73fd328f6fe 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -1153,6 +1153,38 @@ int gpiochip_remove(struct gpio_chip *chip)
 }
 EXPORT_SYMBOL_GPL(gpiochip_remove);
 
+/**
+ * gpiochip_find() - iterator for locating a specific gpio_chip
+ * @data: data to pass to match function
+ * @callback: Callback function to check gpio_chip
+ *
+ * Similar to bus_find_device.  It returns a reference to a gpio_chip as
+ * determined by a user supplied @match callback.  The callback should return
+ * 0 if the device doesn't match and non-zero if it does.  If the callback is
+ * non-zero, this function will return to the caller and not iterate over any
+ * more gpio_chips.
+ */
+struct gpio_chip *gpiochip_find(void *data,
+				int (*match)(struct gpio_chip *chip, void *data))
+{
+	struct gpio_chip *chip = NULL;
+	unsigned long flags;
+	int i;
+
+	spin_lock_irqsave(&gpio_lock, flags);
+	for (i = 0; i < ARCH_NR_GPIOS; i++) {
+		if (!gpio_desc[i].chip)
+			continue;
+
+		if (match(gpio_desc[i].chip, data)) {
+			chip = gpio_desc[i].chip;
+			break;
+		}
+	}
+	spin_unlock_irqrestore(&gpio_lock, flags);
+
+	return chip;
+}
 
 /* These "optional" allocation calls help prevent drivers from stomping
  * on each other, and help provide better diagnostics in debugfs.
diff --git a/drivers/of/gpio.c b/drivers/of/gpio.c
index fde53a3a45a..c8618d3282c 100644
--- a/drivers/of/gpio.c
+++ b/drivers/of/gpio.c
@@ -46,7 +46,7 @@ int of_get_gpio_flags(struct device_node *np, int index,
 		goto err0;
 	}
 
-	gc = gpio_np->data;
+	gc = of_node_to_gpiochip(gpio_np);
 	if (!gc) {
 		pr_debug("%s: gpio controller %s isn't registered\n",
 			 np->full_name, gpio_np->full_name);
@@ -193,7 +193,6 @@ int of_mm_gpiochip_add(struct device_node *np,
 	if (mm_gc->save_regs)
 		mm_gc->save_regs(mm_gc);
 
-	np->data = &mm_gc->gc;
 	mm_gc->gc.of_node = np;
 
 	ret = gpiochip_add(gc);
@@ -207,7 +206,6 @@ int of_mm_gpiochip_add(struct device_node *np,
 		 np->full_name, gc->base);
 	return 0;
 err2:
-	np->data = NULL;
 	iounmap(mm_gc->regs);
 err1:
 	kfree(gc->label);
@@ -217,3 +215,14 @@ err0:
 	return ret;
 }
 EXPORT_SYMBOL(of_mm_gpiochip_add);
+
+/* Private function for resolving node pointer to gpio_chip */
+static int of_gpiochip_is_match(struct gpio_chip *chip, void *data)
+{
+	return chip->of_node == data;
+}
+
+struct gpio_chip *of_node_to_gpiochip(struct device_node *np)
+{
+	return gpiochip_find(np, of_gpiochip_is_match);
+}
diff --git a/include/asm-generic/gpio.h b/include/asm-generic/gpio.h
index af2544ef0b5..c7376bf80b0 100644
--- a/include/asm-generic/gpio.h
+++ b/include/asm-generic/gpio.h
@@ -127,6 +127,9 @@ extern int __must_check gpiochip_reserve(int start, int ngpio);
 /* add/remove chips */
 extern int gpiochip_add(struct gpio_chip *chip);
 extern int __must_check gpiochip_remove(struct gpio_chip *chip);
+extern struct gpio_chip *gpiochip_find(void *data,
+					int (*match)(struct gpio_chip *chip,
+						     void *data));
 
 
 /* Always use the library code for GPIO management calls,
diff --git a/include/linux/of_gpio.h b/include/linux/of_gpio.h
index 460d6810c5e..1020587efed 100644
--- a/include/linux/of_gpio.h
+++ b/include/linux/of_gpio.h
@@ -54,6 +54,9 @@ extern int of_mm_gpiochip_add(struct device_node *np,
 			      struct of_mm_gpio_chip *mm_gc);
 extern int of_gpio_simple_xlate(struct gpio_chip *gc, struct device_node *np,
 				const void *gpio_spec, u32 *flags);
+
+extern struct gpio_chip *of_node_to_gpiochip(struct device_node *np);
+
 #else /* CONFIG_OF_GPIO */
 
 /* Drivers may not strictly depend on the GPIO support, so let them link. */
-- 
cgit v1.2.3-70-g09d2


From 391c970c0dd1100e3b9e1681f7d0f20aac35455a Mon Sep 17 00:00:00 2001
From: Anton Vorontsov <avorontsov@ru.mvista.com>
Date: Tue, 8 Jun 2010 07:48:17 -0600
Subject: of/gpio: add default of_xlate function if device has a node pointer

Implement generic OF gpio hooks and thus make device-enabled GPIO chips
(i.e.  the ones that have gpio_chip->dev specified) automatically attach
to the OpenFirmware subsystem.  Which means that now we can handle I2C and
SPI GPIO chips almost* transparently.

* "Almost" because some chips still require platform data, and for these
  chips OF-glue is still needed, though with this change the glue will
  be much smaller.

Signed-off-by: Anton Vorontsov <avorontsov@ru.mvista.com>
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
Cc: David Brownell <dbrownell@users.sourceforge.net>
Cc: Bill Gatliff <bgat@billgatliff.com>
Cc: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Jean Delvare <khali@linux-fr.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
CC: linux-kernel@vger.kernel.org
CC: devicetree-discuss@lists.ozlabs.org
---
 arch/powerpc/platforms/52xx/mpc52xx_gpio.c     |  2 --
 arch/powerpc/platforms/52xx/mpc52xx_gpt.c      |  3 ---
 arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c |  2 --
 arch/powerpc/sysdev/cpm1.c                     |  2 --
 arch/powerpc/sysdev/cpm_common.c               |  1 -
 arch/powerpc/sysdev/mpc8xxx_gpio.c             |  1 -
 arch/powerpc/sysdev/ppc4xx_gpio.c              |  1 -
 arch/powerpc/sysdev/qe_lib/gpio.c              |  1 -
 arch/powerpc/sysdev/simple_gpio.c              |  1 -
 drivers/gpio/gpiolib.c                         |  5 ++++
 drivers/gpio/xilinx_gpio.c                     |  1 -
 drivers/of/gpio.c                              | 33 +++++++++++++++++++-------
 include/linux/of_gpio.h                        |  7 ++++--
 13 files changed, 34 insertions(+), 26 deletions(-)

(limited to 'include')

diff --git a/arch/powerpc/platforms/52xx/mpc52xx_gpio.c b/arch/powerpc/platforms/52xx/mpc52xx_gpio.c
index fd0912eeffe..0855e804fc0 100644
--- a/arch/powerpc/platforms/52xx/mpc52xx_gpio.c
+++ b/arch/powerpc/platforms/52xx/mpc52xx_gpio.c
@@ -161,7 +161,6 @@ static int __devinit mpc52xx_wkup_gpiochip_probe(struct of_device *ofdev,
 
 	gc = &chip->mmchip.gc;
 
-	gc->of_gpio_n_cells  = 2;
 	gc->ngpio            = 8;
 	gc->direction_input  = mpc52xx_wkup_gpio_dir_in;
 	gc->direction_output = mpc52xx_wkup_gpio_dir_out;
@@ -325,7 +324,6 @@ static int __devinit mpc52xx_simple_gpiochip_probe(struct of_device *ofdev,
 
 	gc = &chip->mmchip.gc;
 
-	gc->of_gpio_n_cells  = 2;
 	gc->ngpio            = 32;
 	gc->direction_input  = mpc52xx_simple_gpio_dir_in;
 	gc->direction_output = mpc52xx_simple_gpio_dir_out;
diff --git a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
index 6e82bd27132..5d7d607617c 100644
--- a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
+++ b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
@@ -348,10 +348,7 @@ mpc52xx_gpt_gpio_setup(struct mpc52xx_gpt_priv *gpt, struct device_node *node)
 	gpt->gc.get = mpc52xx_gpt_gpio_get;
 	gpt->gc.set = mpc52xx_gpt_gpio_set;
 	gpt->gc.base = -1;
-	gpt->gc.of_gpio_n_cells = 2;
-	gpt->gc.of_xlate = of_gpio_simple_xlate;
 	gpt->gc.of_node = node;
-	of_node_get(node);
 
 	/* Setup external pin in GPIO mode */
 	clrsetbits_be32(&gpt->regs->mode, MPC52xx_GPT_MODE_MS_MASK,
diff --git a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c
index f0dbace6185..59b0ed1a56b 100644
--- a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c
+++ b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c
@@ -90,8 +90,6 @@ static int mcu_gpiochip_add(struct mcu *mcu)
 	gc->base = -1;
 	gc->set = mcu_gpio_set;
 	gc->direction_output = mcu_gpio_dir_out;
-	gc->of_gpio_n_cells = 2;
-	gc->of_xlate = of_gpio_simple_xlate;
 	gc->of_node = np;
 
 	return gpiochip_add(gc);
diff --git a/arch/powerpc/sysdev/cpm1.c b/arch/powerpc/sysdev/cpm1.c
index d5cf7d4ccf8..00852124ff4 100644
--- a/arch/powerpc/sysdev/cpm1.c
+++ b/arch/powerpc/sysdev/cpm1.c
@@ -633,7 +633,6 @@ int cpm1_gpiochip_add16(struct device_node *np)
 	gc = &mm_gc->gc;
 
 	mm_gc->save_regs = cpm1_gpio16_save_regs;
-	gc->of_gpio_n_cells = 2;
 	gc->ngpio = 16;
 	gc->direction_input = cpm1_gpio16_dir_in;
 	gc->direction_output = cpm1_gpio16_dir_out;
@@ -755,7 +754,6 @@ int cpm1_gpiochip_add32(struct device_node *np)
 	gc = &mm_gc->gc;
 
 	mm_gc->save_regs = cpm1_gpio32_save_regs;
-	gc->of_gpio_n_cells = 2;
 	gc->ngpio = 32;
 	gc->direction_input = cpm1_gpio32_dir_in;
 	gc->direction_output = cpm1_gpio32_dir_out;
diff --git a/arch/powerpc/sysdev/cpm_common.c b/arch/powerpc/sysdev/cpm_common.c
index 67e9b47dcf8..2b69aa0315b 100644
--- a/arch/powerpc/sysdev/cpm_common.c
+++ b/arch/powerpc/sysdev/cpm_common.c
@@ -337,7 +337,6 @@ int cpm2_gpiochip_add32(struct device_node *np)
 	gc = &mm_gc->gc;
 
 	mm_gc->save_regs = cpm2_gpio32_save_regs;
-	gc->of_gpio_n_cells = 2;
 	gc->ngpio = 32;
 	gc->direction_input = cpm2_gpio32_dir_in;
 	gc->direction_output = cpm2_gpio32_dir_out;
diff --git a/arch/powerpc/sysdev/mpc8xxx_gpio.c b/arch/powerpc/sysdev/mpc8xxx_gpio.c
index ec8fcd42101..2b69084d0f0 100644
--- a/arch/powerpc/sysdev/mpc8xxx_gpio.c
+++ b/arch/powerpc/sysdev/mpc8xxx_gpio.c
@@ -273,7 +273,6 @@ static void __init mpc8xxx_add_controller(struct device_node *np)
 	gc = &mm_gc->gc;
 
 	mm_gc->save_regs = mpc8xxx_gpio_save_regs;
-	gc->of_gpio_n_cells = 2;
 	gc->ngpio = MPC8XXX_GPIO_PINS;
 	gc->direction_input = mpc8xxx_gpio_dir_in;
 	gc->direction_output = mpc8xxx_gpio_dir_out;
diff --git a/arch/powerpc/sysdev/ppc4xx_gpio.c b/arch/powerpc/sysdev/ppc4xx_gpio.c
index 42e7a5eea66..fc65ad1b329 100644
--- a/arch/powerpc/sysdev/ppc4xx_gpio.c
+++ b/arch/powerpc/sysdev/ppc4xx_gpio.c
@@ -194,7 +194,6 @@ static int __init ppc4xx_add_gpiochips(void)
 		mm_gc = &ppc4xx_gc->mm_gc;
 		gc = &mm_gc->gc;
 
-		gc->of_gpio_n_cells = 2;
 		gc->ngpio = 32;
 		gc->direction_input = ppc4xx_gpio_dir_in;
 		gc->direction_output = ppc4xx_gpio_dir_out;
diff --git a/arch/powerpc/sysdev/qe_lib/gpio.c b/arch/powerpc/sysdev/qe_lib/gpio.c
index 32e9440010a..36bf845df12 100644
--- a/arch/powerpc/sysdev/qe_lib/gpio.c
+++ b/arch/powerpc/sysdev/qe_lib/gpio.c
@@ -321,7 +321,6 @@ static int __init qe_add_gpiochips(void)
 		gc = &mm_gc->gc;
 
 		mm_gc->save_regs = qe_gpio_save_regs;
-		gc->of_gpio_n_cells = 2;
 		gc->ngpio = QE_PIO_PINS;
 		gc->direction_input = qe_gpio_dir_in;
 		gc->direction_output = qe_gpio_dir_out;
diff --git a/arch/powerpc/sysdev/simple_gpio.c b/arch/powerpc/sysdev/simple_gpio.c
index b7559aa0c16..b6defda5ccc 100644
--- a/arch/powerpc/sysdev/simple_gpio.c
+++ b/arch/powerpc/sysdev/simple_gpio.c
@@ -103,7 +103,6 @@ static int __init u8_simple_gpiochip_add(struct device_node *np)
 	gc = &mm_gc->gc;
 
 	mm_gc->save_regs = u8_gpio_save_regs;
-	gc->of_gpio_n_cells = 2;
 	gc->ngpio = 8;
 	gc->direction_input = u8_gpio_dir_in;
 	gc->direction_output = u8_gpio_dir_out;
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 73fd328f6fe..83cbc34e3a7 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -8,6 +8,7 @@
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
 #include <linux/gpio.h>
+#include <linux/of_gpio.h>
 #include <linux/idr.h>
 #include <linux/slab.h>
 
@@ -1099,6 +1100,8 @@ int gpiochip_add(struct gpio_chip *chip)
 		}
 	}
 
+	of_gpiochip_add(chip);
+
 unlock:
 	spin_unlock_irqrestore(&gpio_lock, flags);
 
@@ -1133,6 +1136,8 @@ int gpiochip_remove(struct gpio_chip *chip)
 
 	spin_lock_irqsave(&gpio_lock, flags);
 
+	of_gpiochip_remove(chip);
+
 	for (id = chip->base; id < chip->base + chip->ngpio; id++) {
 		if (test_bit(FLAG_REQUESTED, &gpio_desc[id].flags)) {
 			status = -EBUSY;
diff --git a/drivers/gpio/xilinx_gpio.c b/drivers/gpio/xilinx_gpio.c
index 2993c40b48e..709690995d0 100644
--- a/drivers/gpio/xilinx_gpio.c
+++ b/drivers/gpio/xilinx_gpio.c
@@ -190,7 +190,6 @@ static int __devinit xgpio_of_probe(struct device_node *np)
 
 	spin_lock_init(&chip->gpio_lock);
 
-	chip->mmchip.gc.of_gpio_n_cells = 2;
 	chip->mmchip.gc.direction_input = xgpio_dir_in;
 	chip->mmchip.gc.direction_output = xgpio_dir_out;
 	chip->mmchip.gc.get = xgpio_get;
diff --git a/drivers/of/gpio.c b/drivers/of/gpio.c
index c8618d3282c..09f05a17866 100644
--- a/drivers/of/gpio.c
+++ b/drivers/of/gpio.c
@@ -125,8 +125,8 @@ EXPORT_SYMBOL(of_gpio_count);
  * gpio chips. This function performs only one sanity check: whether gpio
  * is less than ngpios (that is specified in the gpio_chip).
  */
-int of_gpio_simple_xlate(struct gpio_chip *gc, struct device_node *np,
-			 const void *gpio_spec, u32 *flags)
+static int of_gpio_simple_xlate(struct gpio_chip *gc, struct device_node *np,
+				const void *gpio_spec, u32 *flags)
 {
 	const __be32 *gpio = gpio_spec;
 	const u32 n = be32_to_cpup(gpio);
@@ -150,7 +150,6 @@ int of_gpio_simple_xlate(struct gpio_chip *gc, struct device_node *np,
 
 	return n;
 }
-EXPORT_SYMBOL(of_gpio_simple_xlate);
 
 /**
  * of_mm_gpiochip_add - Add memory mapped GPIO chip (bank)
@@ -187,9 +186,6 @@ int of_mm_gpiochip_add(struct device_node *np,
 
 	gc->base = -1;
 
-	if (!gc->of_xlate)
-		gc->of_xlate = of_gpio_simple_xlate;
-
 	if (mm_gc->save_regs)
 		mm_gc->save_regs(mm_gc);
 
@@ -199,9 +195,6 @@ int of_mm_gpiochip_add(struct device_node *np,
 	if (ret)
 		goto err2;
 
-	/* We don't want to lose the node and its ->data */
-	of_node_get(np);
-
 	pr_debug("%s: registered as generic GPIO chip, base is %d\n",
 		 np->full_name, gc->base);
 	return 0;
@@ -216,6 +209,28 @@ err0:
 }
 EXPORT_SYMBOL(of_mm_gpiochip_add);
 
+void of_gpiochip_add(struct gpio_chip *chip)
+{
+	if ((!chip->of_node) && (chip->dev))
+		chip->of_node = chip->dev->of_node;
+
+	if (!chip->of_node)
+		return;
+
+	if (!chip->of_xlate) {
+		chip->of_gpio_n_cells = 2;
+		chip->of_xlate = of_gpio_simple_xlate;
+	}
+
+	of_node_get(chip->of_node);
+}
+
+void of_gpiochip_remove(struct gpio_chip *chip)
+{
+	if (chip->of_node)
+		of_node_put(chip->of_node);
+}
+
 /* Private function for resolving node pointer to gpio_chip */
 static int of_gpiochip_is_match(struct gpio_chip *chip, void *data)
 {
diff --git a/include/linux/of_gpio.h b/include/linux/of_gpio.h
index 1020587efed..6598c04dab0 100644
--- a/include/linux/of_gpio.h
+++ b/include/linux/of_gpio.h
@@ -52,9 +52,9 @@ extern unsigned int of_gpio_count(struct device_node *np);
 
 extern int of_mm_gpiochip_add(struct device_node *np,
 			      struct of_mm_gpio_chip *mm_gc);
-extern int of_gpio_simple_xlate(struct gpio_chip *gc, struct device_node *np,
-				const void *gpio_spec, u32 *flags);
 
+extern void of_gpiochip_add(struct gpio_chip *gc);
+extern void of_gpiochip_remove(struct gpio_chip *gc);
 extern struct gpio_chip *of_node_to_gpiochip(struct device_node *np);
 
 #else /* CONFIG_OF_GPIO */
@@ -71,6 +71,9 @@ static inline unsigned int of_gpio_count(struct device_node *np)
 	return 0;
 }
 
+static inline void of_gpiochip_add(struct gpio_chip *gc) { }
+static inline void of_gpiochip_remove(struct gpio_chip *gc) { }
+
 #endif /* CONFIG_OF_GPIO */
 
 /**
-- 
cgit v1.2.3-70-g09d2


From 8cec0e7b4c7c0b76f2b5285f250211ad81c3eafd Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@secretlab.ca>
Date: Tue, 8 Jun 2010 07:48:17 -0600
Subject: of/device: Add OF style matching helper function

Add of_driver_match_device() helper function.  This function can be used
by bus types to determine if a driver works with a device when using OF
style matching.  If CONFIG_OF is unselected, then it is a nop.

Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
CC: Greg Kroah-Hartman <gregkh@suse.de>
CC: Michal Simek <monstr@monstr.eu>
CC: Grant Likely <grant.likely@secretlab.ca>
CC: Benjamin Herrenschmidt <benh@kernel.crashing.org>
CC: Stephen Rothwell <sfr@canb.auug.org.au>
CC: linux-kernel@vger.kernel.org
CC: microblaze-uclinux@itee.uq.edu.au
CC: linuxppc-dev@ozlabs.org
CC: devicetree-discuss@lists.ozlabs.org
---
 drivers/of/device.c       |  2 +-
 include/linux/of_device.h | 19 +++++++++++++++++++
 2 files changed, 20 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/of/device.c b/drivers/of/device.c
index c2a98f5ca80..5282a202f5a 100644
--- a/drivers/of/device.c
+++ b/drivers/of/device.c
@@ -20,7 +20,7 @@
 const struct of_device_id *of_match_device(const struct of_device_id *matches,
 					   const struct device *dev)
 {
-	if (!dev->of_node)
+	if ((!matches) || (!dev->of_node))
 		return NULL;
 	return of_match_node(matches, dev->of_node);
 }
diff --git a/include/linux/of_device.h b/include/linux/of_device.h
index 238e92e007e..91d75fb0c72 100644
--- a/include/linux/of_device.h
+++ b/include/linux/of_device.h
@@ -30,6 +30,17 @@
 extern const struct of_device_id *of_match_device(
 	const struct of_device_id *matches, const struct device *dev);
 
+/**
+ * of_driver_match_device - Tell if a driver's of_match_table matches a device.
+ * @drv: the device_driver structure to test
+ * @dev: the device structure to match against
+ */
+static inline int of_driver_match_device(const struct device *dev,
+					 const struct device_driver *drv)
+{
+	return of_match_device(drv->of_match_table, dev) != NULL;
+}
+
 extern struct of_device *of_dev_get(struct of_device *dev);
 extern void of_dev_put(struct of_device *dev);
 
@@ -48,6 +59,14 @@ extern ssize_t of_device_get_modalias(struct device *dev,
 extern int of_device_uevent(struct device *dev, struct kobj_uevent_env *env);
 
 
+#else /* CONFIG_OF_DEVICE */
+
+static inline int of_driver_match_device(struct device *dev,
+					 struct device_driver *drv)
+{
+	return 0;
+}
+
 #endif /* CONFIG_OF_DEVICE */
 
 #endif /* _LINUX_OF_DEVICE_H */
-- 
cgit v1.2.3-70-g09d2


From f9f5a4669f1334a558f102c311debfd008e7c2bc Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@secretlab.ca>
Date: Wed, 9 Jun 2010 22:22:17 -0600
Subject: of/device: Move struct of_device define outside of CONFIG_OF_DEVICE
 test

Some code uses of_device even when CONFIG_OF_DEVICE is not set.  This
patch makes of_device valid all the time by moving it outside of the
ifdef CONFIG_OF_DEVICE test.

Reported-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
Acked-by: Randy Dunlap <randy.dunlap@oracle.com>
---
 include/linux/of_device.h | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/of_device.h b/include/linux/of_device.h
index 91d75fb0c72..7d27f5a878f 100644
--- a/include/linux/of_device.h
+++ b/include/linux/of_device.h
@@ -1,13 +1,6 @@
 #ifndef _LINUX_OF_DEVICE_H
 #define _LINUX_OF_DEVICE_H
 
-#ifdef CONFIG_OF_DEVICE
-#include <linux/device.h>
-#include <linux/platform_device.h>
-#include <linux/of.h>
-#include <linux/mod_devicetable.h>
-
-
 /*
  * The of_device *was* a kind of "base class" that was a superset of
  * struct device for use by devices attached to an OF node and probed
@@ -22,7 +15,12 @@
  * from the kernel.
  */
 #define of_device platform_device
+#include <linux/platform_device.h>
 
+#ifdef CONFIG_OF_DEVICE
+#include <linux/device.h>
+#include <linux/of.h>
+#include <linux/mod_devicetable.h>
 #include <asm/of_device.h>
 
 #define	to_of_device(d) container_of(d, struct of_device, dev)
-- 
cgit v1.2.3-70-g09d2


From 9fd049927ccba1c1d0343239b82f28c4e07fb95d Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@secretlab.ca>
Date: Tue, 8 Jun 2010 07:48:18 -0600
Subject: of/i2c: Generalize OF support

This patch cleans up the i2c OF support code to make it selectable by
all architectures and allow for automatic registration of i2c devices.

Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 drivers/i2c/busses/i2c-cpm.c     |  3 ++-
 drivers/i2c/busses/i2c-ibm_iic.c |  3 ++-
 drivers/i2c/busses/i2c-mpc.c     |  3 ++-
 drivers/of/Kconfig               |  2 +-
 drivers/of/of_i2c.c              | 50 +++++++++++++++++++++++-----------------
 include/linux/of_i2c.h           | 13 ++++++++---
 6 files changed, 46 insertions(+), 28 deletions(-)

(limited to 'include')

diff --git a/drivers/i2c/busses/i2c-cpm.c b/drivers/i2c/busses/i2c-cpm.c
index b02b4533651..03ae62e6959 100644
--- a/drivers/i2c/busses/i2c-cpm.c
+++ b/drivers/i2c/busses/i2c-cpm.c
@@ -652,6 +652,7 @@ static int __devinit cpm_i2c_probe(struct of_device *ofdev,
 	cpm->adap = cpm_ops;
 	i2c_set_adapdata(&cpm->adap, cpm);
 	cpm->adap.dev.parent = &ofdev->dev;
+	cpm->adap.dev.of_node = of_node_get(ofdev->dev.of_node);
 
 	result = cpm_i2c_setup(cpm);
 	if (result) {
@@ -679,7 +680,7 @@ static int __devinit cpm_i2c_probe(struct of_device *ofdev,
 	/*
 	 * register OF I2C devices
 	 */
-	of_register_i2c_devices(&cpm->adap, ofdev->dev.of_node);
+	of_i2c_register_devices(&cpm->adap);
 
 	return 0;
 out_shut:
diff --git a/drivers/i2c/busses/i2c-ibm_iic.c b/drivers/i2c/busses/i2c-ibm_iic.c
index bf344135647..d9641210dd3 100644
--- a/drivers/i2c/busses/i2c-ibm_iic.c
+++ b/drivers/i2c/busses/i2c-ibm_iic.c
@@ -745,6 +745,7 @@ static int __devinit iic_probe(struct of_device *ofdev,
 	/* Register it with i2c layer */
 	adap = &dev->adap;
 	adap->dev.parent = &ofdev->dev;
+	adap->dev.of_node = of_node_get(np);
 	strlcpy(adap->name, "IBM IIC", sizeof(adap->name));
 	i2c_set_adapdata(adap, dev);
 	adap->class = I2C_CLASS_HWMON | I2C_CLASS_SPD;
@@ -761,7 +762,7 @@ static int __devinit iic_probe(struct of_device *ofdev,
 		 dev->fast_mode ? "fast (400 kHz)" : "standard (100 kHz)");
 
 	/* Now register all the child nodes */
-	of_register_i2c_devices(adap, np);
+	of_i2c_register_devices(adap);
 
 	return 0;
 
diff --git a/drivers/i2c/busses/i2c-mpc.c b/drivers/i2c/busses/i2c-mpc.c
index df00eb1f11f..d2e26d290e7 100644
--- a/drivers/i2c/busses/i2c-mpc.c
+++ b/drivers/i2c/busses/i2c-mpc.c
@@ -600,13 +600,14 @@ static int __devinit fsl_i2c_probe(struct of_device *op,
 	i2c->adap = mpc_ops;
 	i2c_set_adapdata(&i2c->adap, i2c);
 	i2c->adap.dev.parent = &op->dev;
+	i2c->adap.dev.of_node = of_node_get(op->dev.of_node);
 
 	result = i2c_add_adapter(&i2c->adap);
 	if (result < 0) {
 		dev_err(i2c->dev, "failed to add adapter\n");
 		goto fail_add;
 	}
-	of_register_i2c_devices(&i2c->adap, op->dev.of_node);
+	of_i2c_register_devices(&i2c->adap);
 
 	return result;
 
diff --git a/drivers/of/Kconfig b/drivers/of/Kconfig
index 097f42aebe9..80dd6318db6 100644
--- a/drivers/of/Kconfig
+++ b/drivers/of/Kconfig
@@ -26,7 +26,7 @@ config OF_GPIO
 
 config OF_I2C
 	def_tristate I2C
-	depends on (PPC_OF || MICROBLAZE) && I2C
+	depends on OF && !SPARC && I2C
 	help
 	  OpenFirmware I2C accessors
 
diff --git a/drivers/of/of_i2c.c b/drivers/of/of_i2c.c
index ab6522c8e4f..0a694debd22 100644
--- a/drivers/of/of_i2c.c
+++ b/drivers/of/of_i2c.c
@@ -14,57 +14,65 @@
 #include <linux/i2c.h>
 #include <linux/of.h>
 #include <linux/of_i2c.h>
+#include <linux/of_irq.h>
 #include <linux/module.h>
 
-void of_register_i2c_devices(struct i2c_adapter *adap,
-			     struct device_node *adap_node)
+void of_i2c_register_devices(struct i2c_adapter *adap)
 {
 	void *result;
 	struct device_node *node;
 
-	for_each_child_of_node(adap_node, node) {
+	/* Only register child devices if the adapter has a node pointer set */
+	if (!adap->dev.of_node)
+		return;
+
+	dev_dbg(&adap->dev, "of_i2c: walking child nodes\n");
+
+	for_each_child_of_node(adap->dev.of_node, node) {
 		struct i2c_board_info info = {};
 		struct dev_archdata dev_ad = {};
 		const __be32 *addr;
 		int len;
 
-		if (of_modalias_node(node, info.type, sizeof(info.type)) < 0)
+		dev_dbg(&adap->dev, "of_i2c: register %s\n", node->full_name);
+
+		if (of_modalias_node(node, info.type, sizeof(info.type)) < 0) {
+			dev_err(&adap->dev, "of_i2c: modalias failure on %s\n",
+				node->full_name);
 			continue;
+		}
 
 		addr = of_get_property(node, "reg", &len);
-		if (!addr || len < sizeof(int) || *addr > (1 << 10) - 1) {
-			printk(KERN_ERR
-			       "of-i2c: invalid i2c device entry\n");
+		if (!addr || (len < sizeof(int))) {
+			dev_err(&adap->dev, "of_i2c: invalid reg on %s\n",
+				node->full_name);
 			continue;
 		}
 
-		info.irq = irq_of_parse_and_map(node, 0);
-
 		info.addr = be32_to_cpup(addr);
+		if (info.addr > (1 << 10) - 1) {
+			dev_err(&adap->dev, "of_i2c: invalid addr=%x on %s\n",
+				info.addr, node->full_name);
+			continue;
+		}
 
-		info.of_node = node;
+		info.irq = irq_of_parse_and_map(node, 0);
+		info.of_node = of_node_get(node);
 		info.archdata = &dev_ad;
 
 		request_module("%s", info.type);
 
 		result = i2c_new_device(adap, &info);
 		if (result == NULL) {
-			printk(KERN_ERR
-			       "of-i2c: Failed to load driver for %s\n",
-			       info.type);
+			dev_err(&adap->dev, "of_i2c: Failure registering %s\n",
+			        node->full_name);
+			of_node_put(node);
 			irq_dispose_mapping(info.irq);
 			continue;
 		}
-
-		/*
-		 * Get the node to not lose the dev_archdata->of_node.
-		 * Currently there is no way to put it back, as well as no
-		 * of_unregister_i2c_devices() call.
-		 */
-		of_node_get(node);
 	}
 }
-EXPORT_SYMBOL(of_register_i2c_devices);
+EXPORT_SYMBOL(of_i2c_register_devices);
 
 static int of_dev_node_match(struct device *dev, void *data)
 {
diff --git a/include/linux/of_i2c.h b/include/linux/of_i2c.h
index 34974b5a76f..0efe8d465f5 100644
--- a/include/linux/of_i2c.h
+++ b/include/linux/of_i2c.h
@@ -12,12 +12,19 @@
 #ifndef __LINUX_OF_I2C_H
 #define __LINUX_OF_I2C_H
 
+#if defined(CONFIG_OF_I2C) || defined(CONFIG_OF_I2C_MODULE)
 #include <linux/i2c.h>
 
-void of_register_i2c_devices(struct i2c_adapter *adap,
-			     struct device_node *adap_node);
+extern void of_i2c_register_devices(struct i2c_adapter *adap);
 
 /* must call put_device() when done with returned i2c_client device */
-struct i2c_client *of_find_i2c_device_by_node(struct device_node *node);
+extern struct i2c_client *of_find_i2c_device_by_node(struct device_node *node);
+
+#else
+static inline void of_i2c_register_devices(struct i2c_adapter *adap)
+{
+	return;
+}
+#endif /* CONFIG_OF_I2C */
 
 #endif /* __LINUX_OF_I2C_H */
-- 
cgit v1.2.3-70-g09d2


From 8eab945c5616fc984e97b922d6a2559be93f39a1 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Thu, 1 Jul 2010 18:05:56 +0300
Subject: sunrpc: make the cache cleaner workqueue deferrable

This patch makes the cache_cleaner workqueue deferrable, to prevent
unnecessary system wake-ups, which is very important for embedded
battery-powered devices.

do_cache_clean() is called every 30 seconds at the moment, and often
makes the system wake up from its power-save sleep state. With this
change, when the workqueue uses a deferrable timer, the
do_cache_clean() invocation will be delayed and combined with the
closest "real" wake-up. This improves the power consumption situation.

Note, I tried to create a DECLARE_DELAYED_WORK_DEFERRABLE() helper
macro, similar to DECLARE_DELAYED_WORK(), but failed because of the
way the timer wheel core stores the deferrable flag (it is the
LSBit in the time->base pointer). My attempt to define a static
variable with this bit set ended up with the "initializer element is
not constant" error.

Thus, I have to use run-time initialization, so I created a new
cache_initialize() function which is called once when sunrpc is
being initialized.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 include/linux/sunrpc/cache.h | 1 +
 net/sunrpc/cache.c           | 7 ++++++-
 net/sunrpc/sunrpc_syms.c     | 1 +
 3 files changed, 8 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h
index 6f52b4d7c44..7bf3e84b92f 100644
--- a/include/linux/sunrpc/cache.h
+++ b/include/linux/sunrpc/cache.h
@@ -192,6 +192,7 @@ extern int cache_check(struct cache_detail *detail,
 extern void cache_flush(void);
 extern void cache_purge(struct cache_detail *detail);
 #define NEVER (0x7FFFFFFF)
+extern void __init cache_initialize(void);
 extern int cache_register(struct cache_detail *cd);
 extern void cache_unregister(struct cache_detail *cd);
 
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 58de76c8540..939d048ef92 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -320,7 +320,7 @@ static struct cache_detail *current_detail;
 static int current_index;
 
 static void do_cache_clean(struct work_struct *work);
-static DECLARE_DELAYED_WORK(cache_cleaner, do_cache_clean);
+static struct delayed_work cache_cleaner;
 
 static void sunrpc_init_cache_detail(struct cache_detail *cd)
 {
@@ -1504,6 +1504,11 @@ static int create_cache_proc_entries(struct cache_detail *cd)
 }
 #endif
 
+void __init cache_initialize(void)
+{
+	INIT_DELAYED_WORK_DEFERRABLE(&cache_cleaner, do_cache_clean);
+}
+
 int cache_register(struct cache_detail *cd)
 {
 	int ret;
diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c
index f438347d817..c52b1848914 100644
--- a/net/sunrpc/sunrpc_syms.c
+++ b/net/sunrpc/sunrpc_syms.c
@@ -43,6 +43,7 @@ init_sunrpc(void)
 #ifdef CONFIG_PROC_FS
 	rpc_proc_init();
 #endif
+	cache_initialize();
 	cache_register(&ip_map_cache);
 	cache_register(&unix_gid_cache);
 	svc_init_xprt_sock();	/* svc sock transport */
-- 
cgit v1.2.3-70-g09d2


From de5d9bf6541736dc7ad264d2b5cc99bc1b2ad958 Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@tilera.com>
Date: Fri, 2 Jul 2010 13:41:14 -0400
Subject: Move list types from <linux/list.h> to <linux/types.h>.

This allows a list_head (or hlist_head, etc.) to be used from places
that used to be impractical, in particular <asm/processor.h>, which
used to cause include file recursion: <linux/list.h> includes
<linux/prefetch.h>, which always includes <asm/processor.h> for the
prefetch macros, as well as <asm/system.h>, which often includes
<asm/processor.h> directly or indirectly.

This avoids a lot of painful workaround hackery on the tile
architecture, where we use a list_head in the thread_struct to chain
together tasks that are activated on a particular hardwall.

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
Reviewed-by: Matthew Wilcox <willy@linux.intel.com>
---
 include/linux/list.h  | 13 +------------
 include/linux/types.h | 12 ++++++++++++
 2 files changed, 13 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/linux/list.h b/include/linux/list.h
index 8392884a297..bc43e8a0d7f 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -1,6 +1,7 @@
 #ifndef _LINUX_LIST_H
 #define _LINUX_LIST_H
 
+#include <linux/types.h>
 #include <linux/stddef.h>
 #include <linux/poison.h>
 #include <linux/prefetch.h>
@@ -16,10 +17,6 @@
  * using the generic single-entry routines.
  */
 
-struct list_head {
-	struct list_head *next, *prev;
-};
-
 #define LIST_HEAD_INIT(name) { &(name), &(name) }
 
 #define LIST_HEAD(name) \
@@ -551,14 +548,6 @@ static inline void list_splice_tail_init(struct list_head *list,
  * You lose the ability to access the tail in O(1).
  */
 
-struct hlist_head {
-	struct hlist_node *first;
-};
-
-struct hlist_node {
-	struct hlist_node *next, **pprev;
-};
-
 #define HLIST_HEAD_INIT { .first = NULL }
 #define HLIST_HEAD(name) struct hlist_head name = {  .first = NULL }
 #define INIT_HLIST_HEAD(ptr) ((ptr)->first = NULL)
diff --git a/include/linux/types.h b/include/linux/types.h
index 23d237a075e..336cc39c46f 100644
--- a/include/linux/types.h
+++ b/include/linux/types.h
@@ -197,6 +197,18 @@ typedef struct {
 } atomic64_t;
 #endif
 
+struct list_head {
+	struct list_head *next, *prev;
+};
+
+struct hlist_head {
+	struct hlist_node *first;
+};
+
+struct hlist_node {
+	struct hlist_node *next, **pprev;
+};
+
 struct ustat {
 	__kernel_daddr_t	f_tfree;
 	__kernel_ino_t		f_tinode;
-- 
cgit v1.2.3-70-g09d2


From db3307a9f7b8078c654021e3b35354a2b09a8e67 Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Fri, 2 Jul 2010 15:02:12 +0100
Subject: drm: kill drm_mm_node->private

Only ever assigned, never used.

Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
[glisse: I will re-add if needed for range-restricted allocations]
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/i915/i915_gem.c   | 4 +---
 drivers/gpu/drm/ttm/ttm_bo.c      | 6 ------
 drivers/gpu/drm/ttm/ttm_bo_util.c | 2 --
 include/drm/drm_mm.h              | 1 -
 4 files changed, 1 insertion(+), 12 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 074385882cc..75061b305b8 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2633,10 +2633,8 @@ i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment)
 	if (free_space != NULL) {
 		obj_priv->gtt_space = drm_mm_get_block(free_space, obj->size,
 						       alignment);
-		if (obj_priv->gtt_space != NULL) {
-			obj_priv->gtt_space->private = obj;
+		if (obj_priv->gtt_space != NULL)
 			obj_priv->gtt_offset = obj_priv->gtt_space->start;
-		}
 	}
 	if (obj_priv->gtt_space == NULL) {
 		/* If the gtt is empty and we're still having trouble
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 555ebb12ace..9763288c6b2 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -476,7 +476,6 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, bool remove_all)
 			++put_count;
 		}
 		if (bo->mem.mm_node) {
-			bo->mem.mm_node->private = NULL;
 			drm_mm_put_block(bo->mem.mm_node);
 			bo->mem.mm_node = NULL;
 		}
@@ -670,7 +669,6 @@ static int ttm_bo_evict(struct ttm_buffer_object *bo, bool interruptible,
 			printk(KERN_ERR TTM_PFX "Buffer eviction failed\n");
 		spin_lock(&glob->lru_lock);
 		if (evict_mem.mm_node) {
-			evict_mem.mm_node->private = NULL;
 			drm_mm_put_block(evict_mem.mm_node);
 			evict_mem.mm_node = NULL;
 		}
@@ -929,8 +927,6 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo,
 		mem->mm_node = node;
 		mem->mem_type = mem_type;
 		mem->placement = cur_flags;
-		if (node)
-			node->private = bo;
 		return 0;
 	}
 
@@ -973,7 +969,6 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo,
 						interruptible, no_wait_reserve, no_wait_gpu);
 		if (ret == 0 && mem->mm_node) {
 			mem->placement = cur_flags;
-			mem->mm_node->private = bo;
 			return 0;
 		}
 		if (ret == -ERESTARTSYS)
@@ -1029,7 +1024,6 @@ int ttm_bo_move_buffer(struct ttm_buffer_object *bo,
 out_unlock:
 	if (ret && mem.mm_node) {
 		spin_lock(&glob->lru_lock);
-		mem.mm_node->private = NULL;
 		drm_mm_put_block(mem.mm_node);
 		spin_unlock(&glob->lru_lock);
 	}
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
index 13012a1f148..7cffb3e0423 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -353,8 +353,6 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo,
 	fbo->vm_node = NULL;
 
 	fbo->sync_obj = driver->sync_obj_ref(bo->sync_obj);
-	if (fbo->mem.mm_node)
-		fbo->mem.mm_node->private = (void *)fbo;
 	kref_init(&fbo->list_kref);
 	kref_init(&fbo->kref);
 	fbo->destroy = &ttm_transfered_destroy;
diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h
index 4c10be39a43..da94071b170 100644
--- a/include/drm/drm_mm.h
+++ b/include/drm/drm_mm.h
@@ -48,7 +48,6 @@ struct drm_mm_node {
 	unsigned long start;
 	unsigned long size;
 	struct drm_mm *mm;
-	void *private;
 };
 
 struct drm_mm {
-- 
cgit v1.2.3-70-g09d2


From d1024ce91ff4c2c4ccbf692d204c71cbf215157a Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Fri, 2 Jul 2010 15:02:14 +0100
Subject: drm: sane naming for drm_mm.c

Yeah, I've kinda noticed that fl_entry is the free stack. Still
give it (and the memory node list ml_entry) decent names.

Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Acked-by: Thomas Hellstrom <thellstrom@vmwgfx.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_mm.c | 72 +++++++++++++++++++++++-------------------------
 include/drm/drm_mm.h     | 11 +++++---
 2 files changed, 42 insertions(+), 41 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c
index a5a7a16c430..d2267ffd2b7 100644
--- a/drivers/gpu/drm/drm_mm.c
+++ b/drivers/gpu/drm/drm_mm.c
@@ -64,8 +64,8 @@ static struct drm_mm_node *drm_mm_kmalloc(struct drm_mm *mm, int atomic)
 		else {
 			child =
 			    list_entry(mm->unused_nodes.next,
-				       struct drm_mm_node, fl_entry);
-			list_del(&child->fl_entry);
+				       struct drm_mm_node, free_stack);
+			list_del(&child->free_stack);
 			--mm->num_unused;
 		}
 		spin_unlock(&mm->unused_lock);
@@ -94,7 +94,7 @@ int drm_mm_pre_get(struct drm_mm *mm)
 			return ret;
 		}
 		++mm->num_unused;
-		list_add_tail(&node->fl_entry, &mm->unused_nodes);
+		list_add_tail(&node->free_stack, &mm->unused_nodes);
 	}
 	spin_unlock(&mm->unused_lock);
 	return 0;
@@ -116,8 +116,8 @@ static int drm_mm_create_tail_node(struct drm_mm *mm,
 	child->start = start;
 	child->mm = mm;
 
-	list_add_tail(&child->ml_entry, &mm->ml_entry);
-	list_add_tail(&child->fl_entry, &mm->fl_entry);
+	list_add_tail(&child->node_list, &mm->node_list);
+	list_add_tail(&child->free_stack, &mm->free_stack);
 
 	return 0;
 }
@@ -132,15 +132,15 @@ static struct drm_mm_node *drm_mm_split_at_start(struct drm_mm_node *parent,
 	if (unlikely(child == NULL))
 		return NULL;
 
-	INIT_LIST_HEAD(&child->fl_entry);
+	INIT_LIST_HEAD(&child->free_stack);
 
 	child->free = 0;
 	child->size = size;
 	child->start = parent->start;
 	child->mm = parent->mm;
 
-	list_add_tail(&child->ml_entry, &parent->ml_entry);
-	INIT_LIST_HEAD(&child->fl_entry);
+	list_add_tail(&child->node_list, &parent->node_list);
+	INIT_LIST_HEAD(&child->free_stack);
 
 	parent->size -= size;
 	parent->start += size;
@@ -168,7 +168,7 @@ struct drm_mm_node *drm_mm_get_block_generic(struct drm_mm_node *node,
 	}
 
 	if (node->size == size) {
-		list_del_init(&node->fl_entry);
+		list_del_init(&node->free_stack);
 		node->free = 0;
 	} else {
 		node = drm_mm_split_at_start(node, size, atomic);
@@ -206,7 +206,7 @@ struct drm_mm_node *drm_mm_get_block_range_generic(struct drm_mm_node *node,
 	}
 
 	if (node->size == size) {
-		list_del_init(&node->fl_entry);
+		list_del_init(&node->free_stack);
 		node->free = 0;
 	} else {
 		node = drm_mm_split_at_start(node, size, atomic);
@@ -228,8 +228,8 @@ void drm_mm_put_block(struct drm_mm_node *cur)
 {
 
 	struct drm_mm *mm = cur->mm;
-	struct list_head *cur_head = &cur->ml_entry;
-	struct list_head *root_head = &mm->ml_entry;
+	struct list_head *cur_head = &cur->node_list;
+	struct list_head *root_head = &mm->node_list;
 	struct drm_mm_node *prev_node = NULL;
 	struct drm_mm_node *next_node;
 
@@ -237,7 +237,7 @@ void drm_mm_put_block(struct drm_mm_node *cur)
 
 	if (cur_head->prev != root_head) {
 		prev_node =
-		    list_entry(cur_head->prev, struct drm_mm_node, ml_entry);
+		    list_entry(cur_head->prev, struct drm_mm_node, node_list);
 		if (prev_node->free) {
 			prev_node->size += cur->size;
 			merged = 1;
@@ -245,15 +245,15 @@ void drm_mm_put_block(struct drm_mm_node *cur)
 	}
 	if (cur_head->next != root_head) {
 		next_node =
-		    list_entry(cur_head->next, struct drm_mm_node, ml_entry);
+		    list_entry(cur_head->next, struct drm_mm_node, node_list);
 		if (next_node->free) {
 			if (merged) {
 				prev_node->size += next_node->size;
-				list_del(&next_node->ml_entry);
-				list_del(&next_node->fl_entry);
+				list_del(&next_node->node_list);
+				list_del(&next_node->free_stack);
 				spin_lock(&mm->unused_lock);
 				if (mm->num_unused < MM_UNUSED_TARGET) {
-					list_add(&next_node->fl_entry,
+					list_add(&next_node->free_stack,
 						 &mm->unused_nodes);
 					++mm->num_unused;
 				} else
@@ -268,12 +268,12 @@ void drm_mm_put_block(struct drm_mm_node *cur)
 	}
 	if (!merged) {
 		cur->free = 1;
-		list_add(&cur->fl_entry, &mm->fl_entry);
+		list_add(&cur->free_stack, &mm->free_stack);
 	} else {
-		list_del(&cur->ml_entry);
+		list_del(&cur->node_list);
 		spin_lock(&mm->unused_lock);
 		if (mm->num_unused < MM_UNUSED_TARGET) {
-			list_add(&cur->fl_entry, &mm->unused_nodes);
+			list_add(&cur->free_stack, &mm->unused_nodes);
 			++mm->num_unused;
 		} else
 			kfree(cur);
@@ -287,7 +287,6 @@ struct drm_mm_node *drm_mm_search_free(const struct drm_mm *mm,
 				       unsigned long size,
 				       unsigned alignment, int best_match)
 {
-	const struct list_head *free_stack = &mm->fl_entry;
 	struct drm_mm_node *entry;
 	struct drm_mm_node *best;
 	unsigned long best_size;
@@ -296,7 +295,7 @@ struct drm_mm_node *drm_mm_search_free(const struct drm_mm *mm,
 	best = NULL;
 	best_size = ~0UL;
 
-	list_for_each_entry(entry, free_stack, fl_entry) {
+	list_for_each_entry(entry, &mm->free_stack, free_stack) {
 		wasted = 0;
 
 		if (entry->size < size)
@@ -329,7 +328,6 @@ struct drm_mm_node *drm_mm_search_free_in_range(const struct drm_mm *mm,
 						unsigned long end,
 						int best_match)
 {
-	const struct list_head *free_stack = &mm->fl_entry;
 	struct drm_mm_node *entry;
 	struct drm_mm_node *best;
 	unsigned long best_size;
@@ -338,7 +336,7 @@ struct drm_mm_node *drm_mm_search_free_in_range(const struct drm_mm *mm,
 	best = NULL;
 	best_size = ~0UL;
 
-	list_for_each_entry(entry, free_stack, fl_entry) {
+	list_for_each_entry(entry, &mm->free_stack, free_stack) {
 		wasted = 0;
 
 		if (entry->size < size)
@@ -373,7 +371,7 @@ EXPORT_SYMBOL(drm_mm_search_free_in_range);
 
 int drm_mm_clean(struct drm_mm * mm)
 {
-	struct list_head *head = &mm->ml_entry;
+	struct list_head *head = &mm->node_list;
 
 	return (head->next->next == head);
 }
@@ -381,8 +379,8 @@ EXPORT_SYMBOL(drm_mm_clean);
 
 int drm_mm_init(struct drm_mm * mm, unsigned long start, unsigned long size)
 {
-	INIT_LIST_HEAD(&mm->ml_entry);
-	INIT_LIST_HEAD(&mm->fl_entry);
+	INIT_LIST_HEAD(&mm->node_list);
+	INIT_LIST_HEAD(&mm->free_stack);
 	INIT_LIST_HEAD(&mm->unused_nodes);
 	mm->num_unused = 0;
 	spin_lock_init(&mm->unused_lock);
@@ -393,25 +391,25 @@ EXPORT_SYMBOL(drm_mm_init);
 
 void drm_mm_takedown(struct drm_mm * mm)
 {
-	struct list_head *bnode = mm->fl_entry.next;
+	struct list_head *bnode = mm->free_stack.next;
 	struct drm_mm_node *entry;
 	struct drm_mm_node *next;
 
-	entry = list_entry(bnode, struct drm_mm_node, fl_entry);
+	entry = list_entry(bnode, struct drm_mm_node, free_stack);
 
-	if (entry->ml_entry.next != &mm->ml_entry ||
-	    entry->fl_entry.next != &mm->fl_entry) {
+	if (entry->node_list.next != &mm->node_list ||
+	    entry->free_stack.next != &mm->free_stack) {
 		DRM_ERROR("Memory manager not clean. Delaying takedown\n");
 		return;
 	}
 
-	list_del(&entry->fl_entry);
-	list_del(&entry->ml_entry);
+	list_del(&entry->free_stack);
+	list_del(&entry->node_list);
 	kfree(entry);
 
 	spin_lock(&mm->unused_lock);
-	list_for_each_entry_safe(entry, next, &mm->unused_nodes, fl_entry) {
-		list_del(&entry->fl_entry);
+	list_for_each_entry_safe(entry, next, &mm->unused_nodes, free_stack) {
+		list_del(&entry->free_stack);
 		kfree(entry);
 		--mm->num_unused;
 	}
@@ -426,7 +424,7 @@ void drm_mm_debug_table(struct drm_mm *mm, const char *prefix)
 	struct drm_mm_node *entry;
 	int total_used = 0, total_free = 0, total = 0;
 
-	list_for_each_entry(entry, &mm->ml_entry, ml_entry) {
+	list_for_each_entry(entry, &mm->node_list, node_list) {
 		printk(KERN_DEBUG "%s 0x%08lx-0x%08lx: %8ld: %s\n",
 			prefix, entry->start, entry->start + entry->size,
 			entry->size, entry->free ? "free" : "used");
@@ -447,7 +445,7 @@ int drm_mm_dump_table(struct seq_file *m, struct drm_mm *mm)
 	struct drm_mm_node *entry;
 	int total_used = 0, total_free = 0, total = 0;
 
-	list_for_each_entry(entry, &mm->ml_entry, ml_entry) {
+	list_for_each_entry(entry, &mm->node_list, node_list) {
 		seq_printf(m, "0x%08lx-0x%08lx: 0x%08lx: %s\n", entry->start, entry->start + entry->size, entry->size, entry->free ? "free" : "used");
 		total += entry->size;
 		if (entry->free)
diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h
index da94071b170..e8740cc185c 100644
--- a/include/drm/drm_mm.h
+++ b/include/drm/drm_mm.h
@@ -42,8 +42,8 @@
 #endif
 
 struct drm_mm_node {
-	struct list_head fl_entry;
-	struct list_head ml_entry;
+	struct list_head free_stack;
+	struct list_head node_list;
 	int free;
 	unsigned long start;
 	unsigned long size;
@@ -51,8 +51,11 @@ struct drm_mm_node {
 };
 
 struct drm_mm {
-	struct list_head fl_entry;
-	struct list_head ml_entry;
+	/* List of free memory blocks, most recently freed ordered. */
+	struct list_head free_stack;
+	/* List of all memory nodes, ordered according to the (increasing) start
+	 * address of the memory node. */
+	struct list_head node_list;
 	struct list_head unused_nodes;
 	int num_unused;
 	spinlock_t unused_lock;
-- 
cgit v1.2.3-70-g09d2


From 709ea97145c125b3811ff70429e90ebdb0e832e5 Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Fri, 2 Jul 2010 15:02:16 +0100
Subject: drm: implement helper functions for scanning lru list

These helper functions can be used to efficiently scan lru list
for eviction. Eviction becomes a three stage process:
1. Scanning through the lru list until a suitable hole has been found.
2. Scan backwards to restore drm_mm consistency and find out which
   objects fall into the hole.
3. Evict the objects that fall into the hole.

These helper functions don't allocate any memory (at the price of
not allowing any other concurrent operations). Hence this can also be
used for ttm (which does lru scanning under a spinlock).

Evicting objects in this fashion should be more fair than the current
approach by i915 (scan the lru for a object large enough to contain
the new object). It's also more efficient than the current approach used
by ttm (uncoditionally evict objects from the lru until there's enough
free space).

Signed-Off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Acked-by: Thomas Hellstrom <thellstrom@vmwgfx.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_mm.c | 167 +++++++++++++++++++++++++++++++++++++++++++++--
 include/drm/drm_mm.h     |  15 ++++-
 2 files changed, 177 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c
index fd86a6c13aa..da99edc5088 100644
--- a/drivers/gpu/drm/drm_mm.c
+++ b/drivers/gpu/drm/drm_mm.c
@@ -53,9 +53,9 @@ static struct drm_mm_node *drm_mm_kmalloc(struct drm_mm *mm, int atomic)
 	struct drm_mm_node *child;
 
 	if (atomic)
-		child = kmalloc(sizeof(*child), GFP_ATOMIC);
+		child = kzalloc(sizeof(*child), GFP_ATOMIC);
 	else
-		child = kmalloc(sizeof(*child), GFP_KERNEL);
+		child = kzalloc(sizeof(*child), GFP_KERNEL);
 
 	if (unlikely(child == NULL)) {
 		spin_lock(&mm->unused_lock);
@@ -85,7 +85,7 @@ int drm_mm_pre_get(struct drm_mm *mm)
 	spin_lock(&mm->unused_lock);
 	while (mm->num_unused < MM_UNUSED_TARGET) {
 		spin_unlock(&mm->unused_lock);
-		node = kmalloc(sizeof(*node), GFP_KERNEL);
+		node = kzalloc(sizeof(*node), GFP_KERNEL);
 		spin_lock(&mm->unused_lock);
 
 		if (unlikely(node == NULL)) {
@@ -134,7 +134,6 @@ static struct drm_mm_node *drm_mm_split_at_start(struct drm_mm_node *parent,
 
 	INIT_LIST_HEAD(&child->free_stack);
 
-	child->free = 0;
 	child->size = size;
 	child->start = parent->start;
 	child->mm = parent->mm;
@@ -235,6 +234,9 @@ void drm_mm_put_block(struct drm_mm_node *cur)
 
 	int merged = 0;
 
+	BUG_ON(cur->scanned_block || cur->scanned_prev_free
+				  || cur->scanned_next_free);
+
 	if (cur_head->prev != root_head) {
 		prev_node =
 		    list_entry(cur_head->prev, struct drm_mm_node, node_list);
@@ -312,6 +314,8 @@ struct drm_mm_node *drm_mm_search_free(const struct drm_mm *mm,
 	struct drm_mm_node *best;
 	unsigned long best_size;
 
+	BUG_ON(mm->scanned_blocks);
+
 	best = NULL;
 	best_size = ~0UL;
 
@@ -343,6 +347,8 @@ struct drm_mm_node *drm_mm_search_free_in_range(const struct drm_mm *mm,
 	struct drm_mm_node *best;
 	unsigned long best_size;
 
+	BUG_ON(mm->scanned_blocks);
+
 	best = NULL;
 	best_size = ~0UL;
 
@@ -366,6 +372,158 @@ struct drm_mm_node *drm_mm_search_free_in_range(const struct drm_mm *mm,
 }
 EXPORT_SYMBOL(drm_mm_search_free_in_range);
 
+/**
+ * Initializa lru scanning.
+ *
+ * This simply sets up the scanning routines with the parameters for the desired
+ * hole.
+ *
+ * Warning: As long as the scan list is non-empty, no other operations than
+ * adding/removing nodes to/from the scan list are allowed.
+ */
+void drm_mm_init_scan(struct drm_mm *mm, unsigned long size,
+		      unsigned alignment)
+{
+	mm->scan_alignment = alignment;
+	mm->scan_size = size;
+	mm->scanned_blocks = 0;
+	mm->scan_hit_start = 0;
+	mm->scan_hit_size = 0;
+}
+EXPORT_SYMBOL(drm_mm_init_scan);
+
+/**
+ * Add a node to the scan list that might be freed to make space for the desired
+ * hole.
+ *
+ * Returns non-zero, if a hole has been found, zero otherwise.
+ */
+int drm_mm_scan_add_block(struct drm_mm_node *node)
+{
+	struct drm_mm *mm = node->mm;
+	struct list_head *prev_free, *next_free;
+	struct drm_mm_node *prev_node, *next_node;
+
+	mm->scanned_blocks++;
+
+	prev_free = next_free = NULL;
+
+	BUG_ON(node->free);
+	node->scanned_block = 1;
+	node->free = 1;
+
+	if (node->node_list.prev != &mm->node_list) {
+		prev_node = list_entry(node->node_list.prev, struct drm_mm_node,
+				       node_list);
+
+		if (prev_node->free) {
+			list_del(&prev_node->node_list);
+
+			node->start = prev_node->start;
+			node->size += prev_node->size;
+
+			prev_node->scanned_prev_free = 1;
+
+			prev_free = &prev_node->free_stack;
+		}
+	}
+
+	if (node->node_list.next != &mm->node_list) {
+		next_node = list_entry(node->node_list.next, struct drm_mm_node,
+				       node_list);
+
+		if (next_node->free) {
+			list_del(&next_node->node_list);
+
+			node->size += next_node->size;
+
+			next_node->scanned_next_free = 1;
+
+			next_free = &next_node->free_stack;
+		}
+	}
+
+	/* The free_stack list is not used for allocated objects, so these two
+	 * pointers can be abused (as long as no allocations in this memory
+	 * manager happens). */
+	node->free_stack.prev = prev_free;
+	node->free_stack.next = next_free;
+
+	if (check_free_mm_node(node, mm->scan_size, mm->scan_alignment)) {
+		mm->scan_hit_start = node->start;
+		mm->scan_hit_size = node->size;
+
+		return 1;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(drm_mm_scan_add_block);
+
+/**
+ * Remove a node from the scan list.
+ *
+ * Nodes _must_ be removed in the exact same order from the scan list as they
+ * have been added, otherwise the internal state of the memory manager will be
+ * corrupted.
+ *
+ * When the scan list is empty, the selected memory nodes can be freed. An
+ * immediatly following drm_mm_search_free with best_match = 0 will then return
+ * the just freed block (because its at the top of the free_stack list).
+ *
+ * Returns one if this block should be evicted, zero otherwise. Will always
+ * return zero when no hole has been found.
+ */
+int drm_mm_scan_remove_block(struct drm_mm_node *node)
+{
+	struct drm_mm *mm = node->mm;
+	struct drm_mm_node *prev_node, *next_node;
+
+	mm->scanned_blocks--;
+
+	BUG_ON(!node->scanned_block);
+	node->scanned_block = 0;
+	node->free = 0;
+
+	prev_node = list_entry(node->free_stack.prev, struct drm_mm_node,
+			       free_stack);
+	next_node = list_entry(node->free_stack.next, struct drm_mm_node,
+			       free_stack);
+
+	if (prev_node) {
+		BUG_ON(!prev_node->scanned_prev_free);
+		prev_node->scanned_prev_free = 0;
+
+		list_add_tail(&prev_node->node_list, &node->node_list);
+
+		node->start = prev_node->start + prev_node->size;
+		node->size -= prev_node->size;
+	}
+
+	if (next_node) {
+		BUG_ON(!next_node->scanned_next_free);
+		next_node->scanned_next_free = 0;
+
+		list_add(&next_node->node_list, &node->node_list);
+
+		node->size -= next_node->size;
+	}
+
+	INIT_LIST_HEAD(&node->free_stack);
+
+	/* Only need to check for containement because start&size for the
+	 * complete resulting free block (not just the desired part) is
+	 * stored. */
+	if (node->start >= mm->scan_hit_start &&
+	    node->start + node->size
+	    		<= mm->scan_hit_start + mm->scan_hit_size) {
+		return 1;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(drm_mm_scan_remove_block);
+
 int drm_mm_clean(struct drm_mm * mm)
 {
 	struct list_head *head = &mm->node_list;
@@ -380,6 +538,7 @@ int drm_mm_init(struct drm_mm * mm, unsigned long start, unsigned long size)
 	INIT_LIST_HEAD(&mm->free_stack);
 	INIT_LIST_HEAD(&mm->unused_nodes);
 	mm->num_unused = 0;
+	mm->scanned_blocks = 0;
 	spin_lock_init(&mm->unused_lock);
 
 	return drm_mm_create_tail_node(mm, start, size, 0);
diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h
index e8740cc185c..bf01531193d 100644
--- a/include/drm/drm_mm.h
+++ b/include/drm/drm_mm.h
@@ -44,7 +44,10 @@
 struct drm_mm_node {
 	struct list_head free_stack;
 	struct list_head node_list;
-	int free;
+	unsigned free : 1;
+	unsigned scanned_block : 1;
+	unsigned scanned_prev_free : 1;
+	unsigned scanned_next_free : 1;
 	unsigned long start;
 	unsigned long size;
 	struct drm_mm *mm;
@@ -59,6 +62,11 @@ struct drm_mm {
 	struct list_head unused_nodes;
 	int num_unused;
 	spinlock_t unused_lock;
+	unsigned scan_alignment;
+	unsigned long scan_size;
+	unsigned long scan_hit_start;
+	unsigned scan_hit_size;
+	unsigned scanned_blocks;
 };
 
 /*
@@ -135,6 +143,11 @@ static inline struct drm_mm *drm_get_mm(struct drm_mm_node *block)
 	return block->mm;
 }
 
+void drm_mm_init_scan(struct drm_mm *mm, unsigned long size,
+		      unsigned alignment);
+int drm_mm_scan_add_block(struct drm_mm_node *node);
+int drm_mm_scan_remove_block(struct drm_mm_node *node);
+
 extern void drm_mm_debug_table(struct drm_mm *mm, const char *prefix);
 #ifdef CONFIG_DEBUG_FS
 int drm_mm_dump_table(struct seq_file *m, struct drm_mm *mm);
-- 
cgit v1.2.3-70-g09d2


From 4461cf546ec8c97b6b997b8e533d6de1960499d3 Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Mon, 31 May 2010 09:22:12 +0800
Subject: ACPICA: Add signatures for undefined tables: ATKG, GSCI, IEIT

These ACPI tables have been seen in the field, but the actual
table definitions are unkown at this time.

Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 include/acpi/actbl2.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/acpi/actbl2.h b/include/acpi/actbl2.h
index 95f4d0ef481..0a600b8e53f 100644
--- a/include/acpi/actbl2.h
+++ b/include/acpi/actbl2.h
@@ -79,6 +79,15 @@
 #define ACPI_SIG_WDAT           "WDAT"	/* Watchdog Action Table */
 #define ACPI_SIG_WDRT           "WDRT"	/* Watchdog Resource Table */
 
+#ifdef ACPI_UNDEFINED_TABLES
+/*
+ * These tables have been seen in the field, but no definition has been found
+ */
+#define ACPI_SIG_ATKG           "ATKG"
+#define ACPI_SIG_GSCI           "GSCI"	/* GMCH SCI table */
+#define ACPI_SIG_IEIT           "IEIT"
+#endif
+
 /*
  * All tables must be byte-packed to match the ACPI specification, since
  * the tables are provided by the system BIOS.
-- 
cgit v1.2.3-70-g09d2


From ccba77eb45c36cf1d8b22f241eb8a4a292c1362e Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Mon, 31 May 2010 09:23:22 +0800
Subject: ACPICA: Update version to 20100528

Version 20100528.

Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 include/acpi/acpixf.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index 1371cc99739..8aaa596e120 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -47,7 +47,7 @@
 
 /* Current ACPICA subsystem version in YYYYMMDD format */
 
-#define ACPI_CA_VERSION                 0x20100428
+#define ACPI_CA_VERSION                 0x20100528
 
 #include "actypes.h"
 #include "actbl.h"
-- 
cgit v1.2.3-70-g09d2


From e8b6f970107cfc9c00cdcdb12ec6c7e135cf379f Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Fri, 25 Jun 2010 01:18:39 +0200
Subject: ACPICA: Introduce acpi_gpe_wakeup()

ACPICA uses reference counters to avoid disabling GPEs too early in
case they have been enabled for many times.  This is done separately
for runtime and for wakeup, but the wakeup GPE reference counter is
not really necessary, because GPEs are only enabled to wake up the
system at the hardware level by acpi_enter_sleep_state().  Thus it
only is necessary to set the corresponding bits in the wakeup enable
masks of these GPEs' registers right before the system enters a sleep
state.  Moreover, the GPE wakeup enable bits can only be set when the
target sleep state of the system is known and they need to be cleared
immediately after wakeup regardless of how many wakeup devices are
associated with a given GPE.

On the basis of the above observations, introduce function
acpi_gpe_wakeup() to be used for setting or clearing the enable bit
corresponding to a given GPE in its enable register's enable_for_wake
mask.  Modify the ACPI suspend and wakeup code the use
acpi_gpe_wakeup() instead of acpi_{enable|disable}_gpe() to set
and clear GPE enable bits in their registers' enable_for_wake masks
during system transitions to a sleep state and back to the working
state, respectively.  [This will allow us to drop the third
argument of acpi_{enable|disable}_gpe() and simplify the GPE
handling code.]

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Len Brown <len.brown@intel.com>
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/acpica/evxfevnt.c | 67 ++++++++++++++++++++++++++++++++++++++++++
 drivers/acpi/sleep.c           | 15 ++--------
 drivers/acpi/wakeup.c          | 18 +++++++-----
 include/acpi/acpixf.h          |  2 ++
 include/acpi/actypes.h         |  2 +-
 5 files changed, 84 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/acpica/evxfevnt.c b/drivers/acpi/acpica/evxfevnt.c
index d97b8dce166..d6a6d4a7659 100644
--- a/drivers/acpi/acpica/evxfevnt.c
+++ b/drivers/acpi/acpica/evxfevnt.c
@@ -306,6 +306,73 @@ acpi_status acpi_set_gpe(acpi_handle gpe_device, u32 gpe_number, u8 action)
 
 ACPI_EXPORT_SYMBOL(acpi_set_gpe)
 
+/*******************************************************************************
+ *
+ * FUNCTION:    acpi_gpe_wakeup
+ *
+ * PARAMETERS:  gpe_device      - Parent GPE Device. NULL for GPE0/GPE1
+ *              gpe_number      - GPE level within the GPE block
+ *              Action          - Enable or Disable
+ *
+ * RETURN:      Status
+ *
+ * DESCRIPTION: Set or clear the GPE's wakeup enable mask bit.
+ *
+ ******************************************************************************/
+acpi_status acpi_gpe_wakeup(acpi_handle gpe_device, u32 gpe_number, u8 action)
+{
+	acpi_status status = AE_OK;
+	struct acpi_gpe_event_info *gpe_event_info;
+	struct acpi_gpe_register_info *gpe_register_info;
+	acpi_cpu_flags flags;
+	u32 register_bit;
+
+	ACPI_FUNCTION_TRACE(acpi_gpe_wakeup);
+
+	flags = acpi_os_acquire_lock(acpi_gbl_gpe_lock);
+
+	/* Ensure that we have a valid GPE number */
+
+	gpe_event_info = acpi_ev_get_gpe_event_info(gpe_device, gpe_number);
+	if (!gpe_event_info) {
+		status = AE_BAD_PARAMETER;
+		goto unlock_and_exit;
+	}
+
+	gpe_register_info = gpe_event_info->register_info;
+	if (!gpe_register_info) {
+		status = AE_NOT_EXIST;
+		goto unlock_and_exit;
+	}
+
+	register_bit =
+	    acpi_hw_get_gpe_register_bit(gpe_event_info, gpe_register_info);
+
+	/* Perform the action */
+
+	switch (action) {
+	case ACPI_GPE_ENABLE:
+		ACPI_SET_BIT(gpe_register_info->enable_for_wake, register_bit);
+		break;
+
+	case ACPI_GPE_DISABLE:
+		ACPI_CLEAR_BIT(gpe_register_info->enable_for_wake,
+			       register_bit);
+		break;
+
+	default:
+		ACPI_ERROR((AE_INFO, "%u, Invalid action", action));
+		status = AE_BAD_PARAMETER;
+		break;
+	}
+
+unlock_and_exit:
+	acpi_os_release_lock(acpi_gbl_gpe_lock, flags);
+	return_ACPI_STATUS(status);
+}
+
+ACPI_EXPORT_SYMBOL(acpi_gpe_wakeup)
+
 /*******************************************************************************
  *
  * FUNCTION:    acpi_enable_gpe
diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c
index 5b7c52e4a00..aaa1af55e28 100644
--- a/drivers/acpi/sleep.c
+++ b/drivers/acpi/sleep.c
@@ -664,18 +664,9 @@ int acpi_pm_device_sleep_wake(struct device *dev, bool enable)
 		return -ENODEV;
 	}
 
-	if (enable) {
-		error = acpi_enable_wakeup_device_power(adev,
-						acpi_target_sleep_state);
-		if (!error)
-			acpi_enable_gpe(adev->wakeup.gpe_device,
-					adev->wakeup.gpe_number,
-					ACPI_GPE_TYPE_WAKE);
-	} else {
-		acpi_disable_gpe(adev->wakeup.gpe_device, adev->wakeup.gpe_number,
-				ACPI_GPE_TYPE_WAKE);
-		error = acpi_disable_wakeup_device_power(adev);
-	}
+	error = enable ?
+		acpi_enable_wakeup_device_power(adev, acpi_target_sleep_state) :
+		acpi_disable_wakeup_device_power(adev);
 	if (!error)
 		dev_info(dev, "wake-up capability %s by ACPI\n",
 				enable ? "enabled" : "disabled");
diff --git a/drivers/acpi/wakeup.c b/drivers/acpi/wakeup.c
index 388747a7ef4..c80537bc323 100644
--- a/drivers/acpi/wakeup.c
+++ b/drivers/acpi/wakeup.c
@@ -64,13 +64,14 @@ void acpi_enable_wakeup_device(u8 sleep_state)
 		struct acpi_device *dev =
 			container_of(node, struct acpi_device, wakeup_list);
 
-		if (!dev->wakeup.flags.valid || !dev->wakeup.state.enabled
+		if (!dev->wakeup.flags.valid
+		    || !(dev->wakeup.state.enabled || dev->wakeup.prepare_count)
 		    || sleep_state > (u32) dev->wakeup.sleep_state)
 			continue;
 
 		/* The wake-up power should have been enabled already. */
-		acpi_enable_gpe(dev->wakeup.gpe_device, dev->wakeup.gpe_number,
-				ACPI_GPE_TYPE_WAKE);
+		acpi_gpe_wakeup(dev->wakeup.gpe_device, dev->wakeup.gpe_number,
+				ACPI_GPE_ENABLE);
 	}
 }
 
@@ -89,13 +90,16 @@ void acpi_disable_wakeup_device(u8 sleep_state)
 		struct acpi_device *dev =
 			container_of(node, struct acpi_device, wakeup_list);
 
-		if (!dev->wakeup.flags.valid || !dev->wakeup.state.enabled
+		if (!dev->wakeup.flags.valid
+		    || !(dev->wakeup.state.enabled || dev->wakeup.prepare_count)
 		    || (sleep_state > (u32) dev->wakeup.sleep_state))
 			continue;
 
-		acpi_disable_gpe(dev->wakeup.gpe_device, dev->wakeup.gpe_number,
-				ACPI_GPE_TYPE_WAKE);
-		acpi_disable_wakeup_device_power(dev);
+		acpi_gpe_wakeup(dev->wakeup.gpe_device, dev->wakeup.gpe_number,
+				ACPI_GPE_DISABLE);
+
+		if (dev->wakeup.state.enabled)
+			acpi_disable_wakeup_device_power(dev);
 	}
 }
 
diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index 8aaa596e120..17396e83e1a 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -292,6 +292,8 @@ acpi_disable_gpe(acpi_handle gpe_device, u32 gpe_number, u8 gpe_type);
 
 acpi_status acpi_clear_gpe(acpi_handle gpe_device, u32 gpe_number);
 
+acpi_status acpi_gpe_wakeup(acpi_handle gpe_device, u32 gpe_number, u8 action);
+
 acpi_status
 acpi_get_gpe_status(acpi_handle gpe_device,
 		    u32 gpe_number, acpi_event_status *event_status);
diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h
index d55f4a7b824..6a65a94897b 100644
--- a/include/acpi/actypes.h
+++ b/include/acpi/actypes.h
@@ -663,7 +663,7 @@ typedef u32 acpi_event_status;
 #define ACPI_GPE_MAX                    0xFF
 #define ACPI_NUM_GPE                    256
 
-/* Actions for acpi_set_gpe and acpi_hw_low_set_gpe */
+/* Actions for acpi_set_gpe, acpi_gpe_wakeup, acpi_hw_low_set_gpe */
 
 #define ACPI_GPE_ENABLE                 0
 #define ACPI_GPE_DISABLE                1
-- 
cgit v1.2.3-70-g09d2


From a44061aa8b5d58b2729faca4c155a94a5bea2a09 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Thu, 1 Jul 2010 10:11:45 +0800
Subject: ACPICA: Remove wakeup GPE reference counting which is not used

After the previous patch that introduced acpi_gpe_wakeup() and
modified the ACPI suspend and wakeup code to use it, the third
argument of acpi_{enable|disable}_gpe() and the GPE wakeup
reference counter are not necessary any more.  Remove them and
modify all of the users of acpi_{enable|disable}_gpe()
accordingly.  Also drop GPE type constants that aren't used
any more.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Len Brown <len.brown@intel.com>
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/acpica/acevents.h  |   2 +-
 drivers/acpi/acpica/aclocal.h   |   1 -
 drivers/acpi/acpica/evgpe.c     |  21 +++----
 drivers/acpi/acpica/evgpeblk.c  |   3 +-
 drivers/acpi/acpica/evgpeinit.c |   3 +-
 drivers/acpi/acpica/evxfevnt.c  | 119 ++++++++++------------------------------
 drivers/acpi/button.c           |   6 +-
 drivers/acpi/ec.c               |   6 +-
 drivers/acpi/system.c           |   6 +-
 drivers/pci/pci-acpi.c          |   6 +-
 include/acpi/acpixf.h           |   6 +-
 include/acpi/actypes.h          |   6 --
 12 files changed, 50 insertions(+), 135 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/acpica/acevents.h b/drivers/acpi/acpica/acevents.h
index c3f43daa8be..a561944fbaf 100644
--- a/drivers/acpi/acpica/acevents.h
+++ b/drivers/acpi/acpica/acevents.h
@@ -78,7 +78,7 @@ acpi_ev_queue_notify_request(struct acpi_namespace_node *node,
 u32 acpi_ev_gpe_detect(struct acpi_gpe_xrupt_info *gpe_xrupt_list);
 
 acpi_status
-acpi_ev_update_gpe_enable_masks(struct acpi_gpe_event_info *gpe_event_info);
+acpi_ev_update_gpe_enable_mask(struct acpi_gpe_event_info *gpe_event_info);
 
 struct acpi_gpe_event_info *acpi_ev_get_gpe_event_info(acpi_handle gpe_device,
 						       u32 gpe_number);
diff --git a/drivers/acpi/acpica/aclocal.h b/drivers/acpi/acpica/aclocal.h
index eb2d420b39e..1ee0bcf399a 100644
--- a/drivers/acpi/acpica/aclocal.h
+++ b/drivers/acpi/acpica/aclocal.h
@@ -429,7 +429,6 @@ struct acpi_gpe_event_info {
 	u8 flags;		/* Misc info about this GPE */
 	u8 gpe_number;		/* This GPE */
 	u8 runtime_count;	/* References to a run GPE */
-	u8 wakeup_count;	/* References to a wake GPE */
 };
 
 /* Information about a GPE register pair, one per each status/enable pair in an array */
diff --git a/drivers/acpi/acpica/evgpe.c b/drivers/acpi/acpica/evgpe.c
index d24d7d31f40..9413ac61e44 100644
--- a/drivers/acpi/acpica/evgpe.c
+++ b/drivers/acpi/acpica/evgpe.c
@@ -54,24 +54,24 @@ static void ACPI_SYSTEM_XFACE acpi_ev_asynch_execute_gpe_method(void *context);
 
 /*******************************************************************************
  *
- * FUNCTION:    acpi_ev_update_gpe_enable_masks
+ * FUNCTION:    acpi_ev_update_gpe_enable_mask
  *
  * PARAMETERS:  gpe_event_info          - GPE to update
  *
  * RETURN:      Status
  *
- * DESCRIPTION: Updates GPE register enable masks based upon whether there are
- *              references (either wake or run) to this GPE
+ * DESCRIPTION: Updates GPE register enable mask based upon whether there are
+ *              runtime references to this GPE
  *
  ******************************************************************************/
 
 acpi_status
-acpi_ev_update_gpe_enable_masks(struct acpi_gpe_event_info *gpe_event_info)
+acpi_ev_update_gpe_enable_mask(struct acpi_gpe_event_info *gpe_event_info)
 {
 	struct acpi_gpe_register_info *gpe_register_info;
 	u32 register_bit;
 
-	ACPI_FUNCTION_TRACE(ev_update_gpe_enable_masks);
+	ACPI_FUNCTION_TRACE(ev_update_gpe_enable_mask);
 
 	gpe_register_info = gpe_event_info->register_info;
 	if (!gpe_register_info) {
@@ -81,19 +81,14 @@ acpi_ev_update_gpe_enable_masks(struct acpi_gpe_event_info *gpe_event_info)
 	register_bit = acpi_hw_get_gpe_register_bit(gpe_event_info,
 						gpe_register_info);
 
-	/* Clear the wake/run bits up front */
+	/* Clear the run bit up front */
 
-	ACPI_CLEAR_BIT(gpe_register_info->enable_for_wake, register_bit);
 	ACPI_CLEAR_BIT(gpe_register_info->enable_for_run, register_bit);
 
-	/* Set the mask bits only if there are references to this GPE */
+	/* Set the mask bit only if there are references to this GPE */
 
 	if (gpe_event_info->runtime_count) {
-		ACPI_SET_BIT(gpe_register_info->enable_for_run, register_bit);
-	}
-
-	if (gpe_event_info->wakeup_count) {
-		ACPI_SET_BIT(gpe_register_info->enable_for_wake, register_bit);
+		ACPI_SET_BIT(gpe_register_info->enable_for_run, (u8)register_bit);
 	}
 
 	return_ACPI_STATUS(AE_OK);
diff --git a/drivers/acpi/acpica/evgpeblk.c b/drivers/acpi/acpica/evgpeblk.c
index 341a38ce8aa..77e8630043f 100644
--- a/drivers/acpi/acpica/evgpeblk.c
+++ b/drivers/acpi/acpica/evgpeblk.c
@@ -529,8 +529,7 @@ acpi_ev_initialize_gpe_block(struct acpi_namespace_node *gpe_device,
 
 			/* Enable this GPE */
 
-			status = acpi_enable_gpe(gpe_device, gpe_number,
-						 ACPI_GPE_TYPE_RUNTIME);
+			status = acpi_enable_gpe(gpe_device, gpe_number);
 			if (ACPI_FAILURE(status)) {
 				ACPI_EXCEPTION((AE_INFO, status,
 						"Could not enable GPE 0x%02X",
diff --git a/drivers/acpi/acpica/evgpeinit.c b/drivers/acpi/acpica/evgpeinit.c
index 3f6c2d26410..8db9e076a53 100644
--- a/drivers/acpi/acpica/evgpeinit.c
+++ b/drivers/acpi/acpica/evgpeinit.c
@@ -482,8 +482,7 @@ acpi_ev_match_gpe_method(acpi_handle obj_handle,
 				gpe_device = NULL;
 			}
 
-			status = acpi_enable_gpe(gpe_device, gpe_number,
-						 ACPI_GPE_TYPE_RUNTIME);
+			status = acpi_enable_gpe(gpe_device, gpe_number);
 			if (ACPI_FAILURE(status)) {
 				ACPI_EXCEPTION((AE_INFO, status,
 						"Could not enable GPE 0x%02X",
diff --git a/drivers/acpi/acpica/evxfevnt.c b/drivers/acpi/acpica/evxfevnt.c
index d6a6d4a7659..467fde961ae 100644
--- a/drivers/acpi/acpica/evxfevnt.c
+++ b/drivers/acpi/acpica/evxfevnt.c
@@ -352,12 +352,13 @@ acpi_status acpi_gpe_wakeup(acpi_handle gpe_device, u32 gpe_number, u8 action)
 
 	switch (action) {
 	case ACPI_GPE_ENABLE:
-		ACPI_SET_BIT(gpe_register_info->enable_for_wake, register_bit);
+		ACPI_SET_BIT(gpe_register_info->enable_for_wake,
+			     (u8)register_bit);
 		break;
 
 	case ACPI_GPE_DISABLE:
 		ACPI_CLEAR_BIT(gpe_register_info->enable_for_wake,
-			       register_bit);
+			       (u8)register_bit);
 		break;
 
 	default:
@@ -379,17 +380,14 @@ ACPI_EXPORT_SYMBOL(acpi_gpe_wakeup)
  *
  * PARAMETERS:  gpe_device      - Parent GPE Device. NULL for GPE0/GPE1
  *              gpe_number      - GPE level within the GPE block
- *              gpe_type        - ACPI_GPE_TYPE_RUNTIME or ACPI_GPE_TYPE_WAKE
- *                                or both
  *
  * RETURN:      Status
  *
  * DESCRIPTION: Add a reference to a GPE. On the first reference, the GPE is
- *              hardware-enabled (for runtime GPEs), or the GPE register mask
- *              is updated (for wake GPEs).
+ *              hardware-enabled.
  *
  ******************************************************************************/
-acpi_status acpi_enable_gpe(acpi_handle gpe_device, u32 gpe_number, u8 gpe_type)
+acpi_status acpi_enable_gpe(acpi_handle gpe_device, u32 gpe_number)
 {
 	acpi_status status = AE_OK;
 	struct acpi_gpe_event_info *gpe_event_info;
@@ -397,12 +395,6 @@ acpi_status acpi_enable_gpe(acpi_handle gpe_device, u32 gpe_number, u8 gpe_type)
 
 	ACPI_FUNCTION_TRACE(acpi_enable_gpe);
 
-	/* Parameter validation */
-
-	if (!gpe_type || (gpe_type & ~ACPI_GPE_TYPE_WAKE_RUN)) {
-		return_ACPI_STATUS(AE_BAD_PARAMETER);
-	}
-
 	flags = acpi_os_acquire_lock(acpi_gbl_gpe_lock);
 
 	/* Ensure that we have a valid GPE number */
@@ -413,46 +405,19 @@ acpi_status acpi_enable_gpe(acpi_handle gpe_device, u32 gpe_number, u8 gpe_type)
 		goto unlock_and_exit;
 	}
 
-	if (gpe_type & ACPI_GPE_TYPE_RUNTIME) {
-		if (gpe_event_info->runtime_count == ACPI_UINT8_MAX) {
-			status = AE_LIMIT;	/* Too many references */
-			goto unlock_and_exit;
-		}
-
-		gpe_event_info->runtime_count++;
-		if (gpe_event_info->runtime_count == 1) {
-			status = acpi_ev_update_gpe_enable_masks(gpe_event_info);
-			if (ACPI_SUCCESS(status)) {
-				status = acpi_clear_and_enable_gpe(gpe_event_info);
-			}
-
-			if (ACPI_FAILURE(status)) {
-				gpe_event_info->runtime_count--;
-				goto unlock_and_exit;
-			}
-		}
+	if (gpe_event_info->runtime_count == ACPI_UINT8_MAX) {
+		status = AE_LIMIT;	/* Too many references */
+		goto unlock_and_exit;
 	}
 
-	if (gpe_type & ACPI_GPE_TYPE_WAKE) {
-		/* The GPE must have the ability to wake the system */
-
-		if (!(gpe_event_info->flags & ACPI_GPE_CAN_WAKE)) {
-			status = AE_TYPE;
-			goto unlock_and_exit;
-		}
-
-		if (gpe_event_info->wakeup_count == ACPI_UINT8_MAX) {
-			status = AE_LIMIT;	/* Too many references */
-			goto unlock_and_exit;
+	gpe_event_info->runtime_count++;
+	if (gpe_event_info->runtime_count == 1) {
+		status = acpi_ev_update_gpe_enable_mask(gpe_event_info);
+		if (ACPI_SUCCESS(status)) {
+			status = acpi_clear_and_enable_gpe(gpe_event_info);
 		}
-
-		/*
-		 * Update the enable mask on the first wakeup reference. Wake GPEs
-		 * are only hardware-enabled just before sleeping.
-		 */
-		gpe_event_info->wakeup_count++;
-		if (gpe_event_info->wakeup_count == 1) {
-			status = acpi_ev_update_gpe_enable_masks(gpe_event_info);
+		if (ACPI_FAILURE(status)) {
+			gpe_event_info->runtime_count--;
 		}
 	}
 
@@ -468,8 +433,6 @@ ACPI_EXPORT_SYMBOL(acpi_enable_gpe)
  *
  * PARAMETERS:  gpe_device      - Parent GPE Device. NULL for GPE0/GPE1
  *              gpe_number      - GPE level within the GPE block
- *              gpe_type        - ACPI_GPE_TYPE_RUNTIME or ACPI_GPE_TYPE_WAKE
- *                                or both
  *
  * RETURN:      Status
  *
@@ -478,7 +441,7 @@ ACPI_EXPORT_SYMBOL(acpi_enable_gpe)
  *              the GPE mask bit disabled (for wake GPEs)
  *
  ******************************************************************************/
-acpi_status acpi_disable_gpe(acpi_handle gpe_device, u32 gpe_number, u8 gpe_type)
+acpi_status acpi_disable_gpe(acpi_handle gpe_device, u32 gpe_number)
 {
 	acpi_status status = AE_OK;
 	struct acpi_gpe_event_info *gpe_event_info;
@@ -486,12 +449,6 @@ acpi_status acpi_disable_gpe(acpi_handle gpe_device, u32 gpe_number, u8 gpe_type
 
 	ACPI_FUNCTION_TRACE(acpi_disable_gpe);
 
-	/* Parameter validation */
-
-	if (!gpe_type || (gpe_type & ~ACPI_GPE_TYPE_WAKE_RUN)) {
-		return_ACPI_STATUS(AE_BAD_PARAMETER);
-	}
-
 	flags = acpi_os_acquire_lock(acpi_gbl_gpe_lock);
 
 	/* Ensure that we have a valid GPE number */
@@ -504,41 +461,21 @@ acpi_status acpi_disable_gpe(acpi_handle gpe_device, u32 gpe_number, u8 gpe_type
 
 	/* Hardware-disable a runtime GPE on removal of the last reference */
 
-	if (gpe_type & ACPI_GPE_TYPE_RUNTIME) {
-		if (!gpe_event_info->runtime_count) {
-			status = AE_LIMIT;	/* There are no references to remove */
-			goto unlock_and_exit;
-		}
-
-		gpe_event_info->runtime_count--;
-		if (!gpe_event_info->runtime_count) {
-			status = acpi_ev_update_gpe_enable_masks(gpe_event_info);
-			if (ACPI_SUCCESS(status)) {
-				status = acpi_hw_low_set_gpe(gpe_event_info,
-							     ACPI_GPE_DISABLE);
-			}
-
-			if (ACPI_FAILURE(status)) {
-				gpe_event_info->runtime_count++;
-				goto unlock_and_exit;
-			}
-		}
+	if (!gpe_event_info->runtime_count) {
+		status = AE_LIMIT;	/* There are no references to remove */
+		goto unlock_and_exit;
 	}
 
-	/*
-	 * Update masks for wake GPE on removal of the last reference.
-	 * No need to hardware-disable wake GPEs here, they are not currently
-	 * enabled.
-	 */
-	if (gpe_type & ACPI_GPE_TYPE_WAKE) {
-		if (!gpe_event_info->wakeup_count) {
-			status = AE_LIMIT;	/* There are no references to remove */
-			goto unlock_and_exit;
+	gpe_event_info->runtime_count--;
+	if (!gpe_event_info->runtime_count) {
+		status = acpi_ev_update_gpe_enable_mask(gpe_event_info);
+		if (ACPI_SUCCESS(status)) {
+			status =
+			    acpi_hw_low_set_gpe(gpe_event_info,
+						ACPI_GPE_DISABLE);
 		}
-
-		gpe_event_info->wakeup_count--;
-		if (!gpe_event_info->wakeup_count) {
-			status = acpi_ev_update_gpe_enable_masks(gpe_event_info);
+		if (ACPI_FAILURE(status)) {
+			gpe_event_info->runtime_count++;
 		}
 	}
 
diff --git a/drivers/acpi/button.c b/drivers/acpi/button.c
index 7d857dabdde..1575a9b51f1 100644
--- a/drivers/acpi/button.c
+++ b/drivers/acpi/button.c
@@ -424,8 +424,7 @@ static int acpi_button_add(struct acpi_device *device)
 	if (device->wakeup.flags.valid) {
 		/* Button's GPE is run-wake GPE */
 		acpi_enable_gpe(device->wakeup.gpe_device,
-				device->wakeup.gpe_number,
-				ACPI_GPE_TYPE_RUNTIME);
+				device->wakeup.gpe_number);
 		device->wakeup.run_wake_count++;
 		device->wakeup.state.enabled = 1;
 	}
@@ -448,8 +447,7 @@ static int acpi_button_remove(struct acpi_device *device, int type)
 
 	if (device->wakeup.flags.valid) {
 		acpi_disable_gpe(device->wakeup.gpe_device,
-				device->wakeup.gpe_number,
-				ACPI_GPE_TYPE_RUNTIME);
+				device->wakeup.gpe_number);
 		device->wakeup.run_wake_count--;
 		device->wakeup.state.enabled = 0;
 	}
diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c
index 5f2027d782e..bf504541657 100644
--- a/drivers/acpi/ec.c
+++ b/drivers/acpi/ec.c
@@ -822,7 +822,7 @@ static int ec_install_handlers(struct acpi_ec *ec)
 	if (ACPI_FAILURE(status))
 		return -ENODEV;
 
-	acpi_enable_gpe(NULL, ec->gpe, ACPI_GPE_TYPE_RUNTIME);
+	acpi_enable_gpe(NULL, ec->gpe);
 	status = acpi_install_address_space_handler(ec->handle,
 						    ACPI_ADR_SPACE_EC,
 						    &acpi_ec_space_handler,
@@ -839,7 +839,7 @@ static int ec_install_handlers(struct acpi_ec *ec)
 		} else {
 			acpi_remove_gpe_handler(NULL, ec->gpe,
 				&acpi_ec_gpe_handler);
-			acpi_disable_gpe(NULL, ec->gpe, ACPI_GPE_TYPE_RUNTIME);
+			acpi_disable_gpe(NULL, ec->gpe);
 			return -ENODEV;
 		}
 	}
@@ -850,7 +850,7 @@ static int ec_install_handlers(struct acpi_ec *ec)
 
 static void ec_remove_handlers(struct acpi_ec *ec)
 {
-	acpi_disable_gpe(NULL, ec->gpe, ACPI_GPE_TYPE_RUNTIME);
+	acpi_disable_gpe(NULL, ec->gpe);
 	if (ACPI_FAILURE(acpi_remove_address_space_handler(ec->handle,
 				ACPI_ADR_SPACE_EC, &acpi_ec_space_handler)))
 		pr_err(PREFIX "failed to remove space handler\n");
diff --git a/drivers/acpi/system.c b/drivers/acpi/system.c
index f8db50a0941..5981bd07e20 100644
--- a/drivers/acpi/system.c
+++ b/drivers/acpi/system.c
@@ -388,12 +388,10 @@ static ssize_t counter_set(struct kobject *kobj,
 	if (index < num_gpes) {
 		if (!strcmp(buf, "disable\n") &&
 				(status & ACPI_EVENT_FLAG_ENABLED))
-			result = acpi_disable_gpe(handle, index,
-						ACPI_GPE_TYPE_RUNTIME);
+			result = acpi_disable_gpe(handle, index);
 		else if (!strcmp(buf, "enable\n") &&
 				!(status & ACPI_EVENT_FLAG_ENABLED))
-			result = acpi_enable_gpe(handle, index,
-						ACPI_GPE_TYPE_RUNTIME);
+			result = acpi_enable_gpe(handle, index);
 		else if (!strcmp(buf, "clear\n") &&
 				(status & ACPI_EVENT_FLAG_SET))
 			result = acpi_clear_gpe(handle, index);
diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c
index 2e7a3bf1382..5342e037e37 100644
--- a/drivers/pci/pci-acpi.c
+++ b/drivers/pci/pci-acpi.c
@@ -295,14 +295,12 @@ static int acpi_dev_run_wake(struct device *phys_dev, bool enable)
 		if (!dev->wakeup.run_wake_count++) {
 			acpi_enable_wakeup_device_power(dev, ACPI_STATE_S0);
 			acpi_enable_gpe(dev->wakeup.gpe_device,
-					dev->wakeup.gpe_number,
-					ACPI_GPE_TYPE_RUNTIME);
+					dev->wakeup.gpe_number);
 		}
 	} else if (dev->wakeup.run_wake_count > 0) {
 		if (!--dev->wakeup.run_wake_count) {
 			acpi_disable_gpe(dev->wakeup.gpe_device,
-					 dev->wakeup.gpe_number,
-					 ACPI_GPE_TYPE_RUNTIME);
+					 dev->wakeup.gpe_number);
 			acpi_disable_wakeup_device_power(dev);
 		}
 	} else {
diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index 17396e83e1a..354d785e80c 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -284,11 +284,9 @@ acpi_status acpi_get_event_status(u32 event, acpi_event_status * event_status);
  */
 acpi_status acpi_set_gpe(acpi_handle gpe_device, u32 gpe_number, u8 action);
 
-acpi_status
-acpi_enable_gpe(acpi_handle gpe_device, u32 gpe_number, u8 gpe_type);
+acpi_status acpi_enable_gpe(acpi_handle gpe_device, u32 gpe_number);
 
-acpi_status
-acpi_disable_gpe(acpi_handle gpe_device, u32 gpe_number, u8 gpe_type);
+acpi_status acpi_disable_gpe(acpi_handle gpe_device, u32 gpe_number);
 
 acpi_status acpi_clear_gpe(acpi_handle gpe_device, u32 gpe_number);
 
diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h
index 6a65a94897b..a42513ded3a 100644
--- a/include/acpi/actypes.h
+++ b/include/acpi/actypes.h
@@ -669,12 +669,6 @@ typedef u32 acpi_event_status;
 #define ACPI_GPE_DISABLE                1
 #define ACPI_GPE_COND_ENABLE            2
 
-/* gpe_types for acpi_enable_gpe and acpi_disable_gpe */
-
-#define ACPI_GPE_TYPE_WAKE              (u8) 0x01
-#define ACPI_GPE_TYPE_RUNTIME           (u8) 0x02
-#define ACPI_GPE_TYPE_WAKE_RUN          (u8) 0x03
-
 /*
  * GPE info flags - Per GPE
  * +-------+---+-+-+
-- 
cgit v1.2.3-70-g09d2


From 546eb57695875712f676e5f729159b0779f1c0af Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Thu, 1 Jul 2010 11:07:20 +0800
Subject: ACPICA: Drop acpi_set_gpe

The acpi_set_gpe() function is a little awkward, because it doesn't
really work as advertised in the "disable" case.  Namely, if a GPE
has been enabled with acpi_enable_gpe() and triggered a notification
to occur, and if acpi_set_gpe() is used to disable it before
acpi_ev_asynch_enable_gpe() runs, the GPE will be immediately enabled
by the latter as though the acpi_set_gpe() had no effect.

Thus, since it's been possible to make all of its callers use
alternative operations to disable or enable GPEs, acpi_set_gpe() can
be dropped.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/acpica/evxfevnt.c | 60 ------------------------------------------
 include/acpi/acpixf.h          |  2 --
 include/acpi/actypes.h         |  2 +-
 3 files changed, 1 insertion(+), 63 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/acpica/evxfevnt.c b/drivers/acpi/acpica/evxfevnt.c
index b094cc0183d..fda5b44a556 100644
--- a/drivers/acpi/acpica/evxfevnt.c
+++ b/drivers/acpi/acpica/evxfevnt.c
@@ -208,66 +208,6 @@ acpi_status acpi_enable_event(u32 event, u32 flags)
 
 ACPI_EXPORT_SYMBOL(acpi_enable_event)
 
-/*******************************************************************************
- *
- * FUNCTION:    acpi_set_gpe
- *
- * PARAMETERS:  gpe_device      - Parent GPE Device. NULL for GPE0/GPE1
- *              gpe_number      - GPE level within the GPE block
- *              action          - ACPI_GPE_ENABLE or ACPI_GPE_DISABLE
- *
- * RETURN:      Status
- *
- * DESCRIPTION: Enable or disable an individual GPE. This function bypasses
- *              the reference count mechanism used in the acpi_enable_gpe and
- *              acpi_disable_gpe interfaces -- and should be used with care.
- *
- * Note: Typically used to disable a runtime GPE for short period of time,
- * then re-enable it, without disturbing the existing reference counts. This
- * is useful, for example, in the Embedded Controller (EC) driver.
- *
- ******************************************************************************/
-acpi_status acpi_set_gpe(acpi_handle gpe_device, u32 gpe_number, u8 action)
-{
-	struct acpi_gpe_event_info *gpe_event_info;
-	acpi_status status;
-	acpi_cpu_flags flags;
-
-	ACPI_FUNCTION_TRACE(acpi_set_gpe);
-
-	flags = acpi_os_acquire_lock(acpi_gbl_gpe_lock);
-
-	/* Ensure that we have a valid GPE number */
-
-	gpe_event_info = acpi_ev_get_gpe_event_info(gpe_device, gpe_number);
-	if (!gpe_event_info) {
-		status = AE_BAD_PARAMETER;
-		goto unlock_and_exit;
-	}
-
-	/* Perform the action */
-
-	switch (action) {
-	case ACPI_GPE_ENABLE:
-		status = acpi_ev_enable_gpe(gpe_event_info);
-		break;
-
-	case ACPI_GPE_DISABLE:
-		status = acpi_hw_low_set_gpe(gpe_event_info, ACPI_GPE_DISABLE);
-		break;
-
-	default:
-		status = AE_BAD_PARAMETER;
-		break;
-	}
-
-      unlock_and_exit:
-	acpi_os_release_lock(acpi_gbl_gpe_lock, flags);
-	return_ACPI_STATUS(status);
-}
-
-ACPI_EXPORT_SYMBOL(acpi_set_gpe)
-
 /*******************************************************************************
  *
  * FUNCTION:    acpi_gpe_wakeup
diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index 354d785e80c..e0a53e4616d 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -282,8 +282,6 @@ acpi_status acpi_get_event_status(u32 event, acpi_event_status * event_status);
 /*
  * GPE Interfaces
  */
-acpi_status acpi_set_gpe(acpi_handle gpe_device, u32 gpe_number, u8 action);
-
 acpi_status acpi_enable_gpe(acpi_handle gpe_device, u32 gpe_number);
 
 acpi_status acpi_disable_gpe(acpi_handle gpe_device, u32 gpe_number);
diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h
index a42513ded3a..5db8f472fec 100644
--- a/include/acpi/actypes.h
+++ b/include/acpi/actypes.h
@@ -663,7 +663,7 @@ typedef u32 acpi_event_status;
 #define ACPI_GPE_MAX                    0xFF
 #define ACPI_NUM_GPE                    256
 
-/* Actions for acpi_set_gpe, acpi_gpe_wakeup, acpi_hw_low_set_gpe */
+/* Actions for acpi_gpe_wakeup, acpi_hw_low_set_gpe */
 
 #define ACPI_GPE_ENABLE                 0
 #define ACPI_GPE_DISABLE                1
-- 
cgit v1.2.3-70-g09d2


From 9d8b5e7b28179784e2c6250086a44021fbb9c5a0 Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Tue, 6 Jul 2010 09:58:11 +0800
Subject: ACPICA: Add support for WDDT - Watchdog Descriptor Table

Header file support.

Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 include/acpi/actbl2.h | 39 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)

(limited to 'include')

diff --git a/include/acpi/actbl2.h b/include/acpi/actbl2.h
index 0a600b8e53f..d4136b28011 100644
--- a/include/acpi/actbl2.h
+++ b/include/acpi/actbl2.h
@@ -77,6 +77,7 @@
 #define ACPI_SIG_UEFI           "UEFI"	/* Uefi Boot Optimization Table */
 #define ACPI_SIG_WAET           "WAET"	/* Windows ACPI Emulated devices Table */
 #define ACPI_SIG_WDAT           "WDAT"	/* Watchdog Action Table */
+#define ACPI_SIG_WDDT           "WDDT"	/* Watchdog Timer Description Table */
 #define ACPI_SIG_WDRT           "WDRT"	/* Watchdog Resource Table */
 
 #ifdef ACPI_UNDEFINED_TABLES
@@ -916,6 +917,44 @@ enum acpi_wdat_instructions {
 	ACPI_WDAT_PRESERVE_REGISTER = 0x80	/* Except for this value */
 };
 
+/*******************************************************************************
+ *
+ * WDDT - Watchdog Descriptor Table
+ *        Version 1
+ *
+ * Conforms to "Using the Intel ICH Family Watchdog Timer (WDT)",
+ * Version 001, September 2002
+ *
+ ******************************************************************************/
+
+struct acpi_table_wddt {
+	struct acpi_table_header header;	/* Common ACPI table header */
+	u16 spec_version;
+	u16 table_version;
+	u16 pci_vendor_id;
+	struct acpi_generic_address address;
+	u16 max_count;		/* Maximum counter value supported */
+	u16 min_count;		/* Minimum counter value supported */
+	u16 period;
+	u16 status;
+	u16 capability;
+};
+
+/* Flags for Status field above */
+
+#define ACPI_WDDT_AVAILABLE     (1)
+#define ACPI_WDDT_ACTIVE        (1<<1)
+#define ACPI_WDDT_TCO_OS_OWNED  (1<<2)
+#define ACPI_WDDT_USER_RESET    (1<<11)
+#define ACPI_WDDT_WDT_RESET     (1<<12)
+#define ACPI_WDDT_POWER_FAIL    (1<<13)
+#define ACPI_WDDT_UNKNOWN_RESET (1<<14)
+
+/* Flags for Capability field above */
+
+#define ACPI_WDDT_AUTO_RESET    (1)
+#define ACPI_WDDT_ALERT_SUPPORT (1<<1)
+
 /*******************************************************************************
  *
  * WDRT - Watchdog Resource Table
-- 
cgit v1.2.3-70-g09d2


From ddcc6a037c0f9378f29658636a2c2b54c4238ec4 Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Tue, 6 Jul 2010 10:02:16 +0800
Subject: ACPICA: Update debug output components

Add data table compiler output component

Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 include/acpi/acoutput.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/acpi/acoutput.h b/include/acpi/acoutput.h
index 5e952262d6e..bc4a6deb73b 100644
--- a/include/acpi/acoutput.h
+++ b/include/acpi/acoutput.h
@@ -71,8 +71,9 @@
 #define ACPI_TOOLS                  0x00002000
 #define ACPI_EXAMPLE                0x00004000
 #define ACPI_DRIVER                 0x00008000
+#define DT_COMPILER                 0x00010000
 
-#define ACPI_ALL_COMPONENTS         0x0000FFFF
+#define ACPI_ALL_COMPONENTS         0x0001FFFF
 #define ACPI_COMPONENT_DEFAULT      (ACPI_ALL_COMPONENTS)
 
 /* Component IDs reserved for ACPI drivers */
-- 
cgit v1.2.3-70-g09d2


From 9e6c3e996e3c80d00cf931538e17126efe45f45c Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Tue, 6 Jul 2010 10:39:01 +0800
Subject: ACPICA: Update version to 20100702

Version 20100702.

Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 include/acpi/acpixf.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index e0a53e4616d..ba94a889afd 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -47,7 +47,7 @@
 
 /* Current ACPICA subsystem version in YYYYMMDD format */
 
-#define ACPI_CA_VERSION                 0x20100528
+#define ACPI_CA_VERSION                 0x20100702
 
 #include "actypes.h"
 #include "actbl.h"
-- 
cgit v1.2.3-70-g09d2


From 5c8d7171cc4984351af802a525675d50ae555a7b Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexdeucher@gmail.com>
Date: Fri, 11 Jun 2010 17:04:35 -0400
Subject: drm/kms: add crtc disable function

More explicit than dpms. Same as the encoder disable function.

Need this to explicity disconnect plls from crtcs for reuse when you
plls:crtcs ratio isn't 1:1.

Signed-off-by: Alex Deucher <alexdeucher@gmail.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_crtc_helper.c | 5 ++++-
 include/drm/drm_crtc_helper.h     | 3 +++
 2 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_crtc_helper.c b/drivers/gpu/drm/drm_crtc_helper.c
index 9b2a54117c9..fa1323ff56b 100644
--- a/drivers/gpu/drm/drm_crtc_helper.c
+++ b/drivers/gpu/drm/drm_crtc_helper.c
@@ -241,7 +241,10 @@ void drm_helper_disable_unused_functions(struct drm_device *dev)
 		struct drm_crtc_helper_funcs *crtc_funcs = crtc->helper_private;
 		crtc->enabled = drm_helper_crtc_in_use(crtc);
 		if (!crtc->enabled) {
-			crtc_funcs->dpms(crtc, DRM_MODE_DPMS_OFF);
+			if (crtc_funcs->disable)
+				(*crtc_funcs->disable)(crtc);
+			else
+				(*crtc_funcs->dpms)(crtc, DRM_MODE_DPMS_OFF);
 			crtc->fb = NULL;
 		}
 	}
diff --git a/include/drm/drm_crtc_helper.h b/include/drm/drm_crtc_helper.h
index 1121f7799c6..7e3c9766acb 100644
--- a/include/drm/drm_crtc_helper.h
+++ b/include/drm/drm_crtc_helper.h
@@ -63,6 +63,9 @@ struct drm_crtc_helper_funcs {
 
 	/* reload the current crtc LUT */
 	void (*load_lut)(struct drm_crtc *crtc);
+
+	/* disable crtc when not in use - more explicit than dpms off */
+	void (*disable)(struct drm_crtc *crtc);
 };
 
 struct drm_encoder_helper_funcs {
-- 
cgit v1.2.3-70-g09d2


From 28172739f0a276eb8d6ca917b3974c2edb036da3 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 7 Jul 2010 14:58:56 -0700
Subject: net: fix 64 bit counters on 32 bit arches

There is a small possibility that a reader gets incorrect values on 32
bit arches. SNMP applications could catch incorrect counters when a
32bit high part is changed by another stats consumer/provider.

One way to solve this is to add a rtnl_link_stats64 param to all
ndo_get_stats64() methods, and also add such a parameter to
dev_get_stats().

Rule is that we are not allowed to use dev->stats64 as a temporary
storage for 64bit stats, but a caller provided area (usually on stack)

Old drivers (only providing get_stats() method) need no changes.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/s390/appldata/appldata_net_sum.c       |  3 +-
 drivers/net/bonding/bond_main.c             | 64 ++++++++++++++---------------
 drivers/net/ixgbe/ixgbe_ethtool.c           |  8 ++--
 drivers/net/loopback.c                      |  4 +-
 drivers/net/macvlan.c                       |  6 +--
 drivers/net/sfc/efx.c                       |  3 +-
 drivers/net/sfc/ethtool.c                   |  3 +-
 drivers/parisc/led.c                        |  3 +-
 drivers/scsi/fcoe/fcoe.c                    |  3 +-
 drivers/staging/batman-adv/hard-interface.c |  3 +-
 drivers/usb/gadget/rndis.c                  |  3 +-
 include/linux/netdevice.h                   | 12 ++++--
 net/8021q/vlan_dev.c                        |  6 +--
 net/8021q/vlanproc.c                        |  3 +-
 net/bridge/br_device.c                      |  4 +-
 net/core/dev.c                              | 25 +++++++----
 net/core/net-sysfs.c                        |  4 +-
 net/core/rtnetlink.c                        |  3 +-
 18 files changed, 89 insertions(+), 71 deletions(-)

(limited to 'include')

diff --git a/arch/s390/appldata/appldata_net_sum.c b/arch/s390/appldata/appldata_net_sum.c
index 9a9586f4103..f02e89ce4df 100644
--- a/arch/s390/appldata/appldata_net_sum.c
+++ b/arch/s390/appldata/appldata_net_sum.c
@@ -85,7 +85,8 @@ static void appldata_get_net_sum_data(void *data)
 
 	rcu_read_lock();
 	for_each_netdev_rcu(&init_net, dev) {
-		const struct net_device_stats *stats = dev_get_stats(dev);
+		struct rtnl_link_stats64 temp;
+		const struct net_device_stats *stats = dev_get_stats(dev, &temp);
 
 		rx_packets += stats->rx_packets;
 		tx_packets += stats->tx_packets;
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index a95a41b74b4..9bb9bfa225b 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -3804,51 +3804,49 @@ static int bond_close(struct net_device *bond_dev)
 	return 0;
 }
 
-static struct rtnl_link_stats64 *bond_get_stats(struct net_device *bond_dev)
+static struct rtnl_link_stats64 *bond_get_stats(struct net_device *bond_dev,
+						struct rtnl_link_stats64 *stats)
 {
 	struct bonding *bond = netdev_priv(bond_dev);
-	struct rtnl_link_stats64 *stats = &bond_dev->stats64;
-	struct rtnl_link_stats64 local_stats;
+	struct rtnl_link_stats64 temp;
 	struct slave *slave;
 	int i;
 
-	memset(&local_stats, 0, sizeof(local_stats));
+	memset(stats, 0, sizeof(*stats));
 
 	read_lock_bh(&bond->lock);
 
 	bond_for_each_slave(bond, slave, i) {
 		const struct rtnl_link_stats64 *sstats =
-			dev_get_stats(slave->dev);
-
-		local_stats.rx_packets += sstats->rx_packets;
-		local_stats.rx_bytes += sstats->rx_bytes;
-		local_stats.rx_errors += sstats->rx_errors;
-		local_stats.rx_dropped += sstats->rx_dropped;
-
-		local_stats.tx_packets += sstats->tx_packets;
-		local_stats.tx_bytes += sstats->tx_bytes;
-		local_stats.tx_errors += sstats->tx_errors;
-		local_stats.tx_dropped += sstats->tx_dropped;
-
-		local_stats.multicast += sstats->multicast;
-		local_stats.collisions += sstats->collisions;
-
-		local_stats.rx_length_errors += sstats->rx_length_errors;
-		local_stats.rx_over_errors += sstats->rx_over_errors;
-		local_stats.rx_crc_errors += sstats->rx_crc_errors;
-		local_stats.rx_frame_errors += sstats->rx_frame_errors;
-		local_stats.rx_fifo_errors += sstats->rx_fifo_errors;
-		local_stats.rx_missed_errors += sstats->rx_missed_errors;
-
-		local_stats.tx_aborted_errors += sstats->tx_aborted_errors;
-		local_stats.tx_carrier_errors += sstats->tx_carrier_errors;
-		local_stats.tx_fifo_errors += sstats->tx_fifo_errors;
-		local_stats.tx_heartbeat_errors += sstats->tx_heartbeat_errors;
-		local_stats.tx_window_errors += sstats->tx_window_errors;
+			dev_get_stats(slave->dev, &temp);
+
+		stats->rx_packets += sstats->rx_packets;
+		stats->rx_bytes += sstats->rx_bytes;
+		stats->rx_errors += sstats->rx_errors;
+		stats->rx_dropped += sstats->rx_dropped;
+
+		stats->tx_packets += sstats->tx_packets;
+		stats->tx_bytes += sstats->tx_bytes;
+		stats->tx_errors += sstats->tx_errors;
+		stats->tx_dropped += sstats->tx_dropped;
+
+		stats->multicast += sstats->multicast;
+		stats->collisions += sstats->collisions;
+
+		stats->rx_length_errors += sstats->rx_length_errors;
+		stats->rx_over_errors += sstats->rx_over_errors;
+		stats->rx_crc_errors += sstats->rx_crc_errors;
+		stats->rx_frame_errors += sstats->rx_frame_errors;
+		stats->rx_fifo_errors += sstats->rx_fifo_errors;
+		stats->rx_missed_errors += sstats->rx_missed_errors;
+
+		stats->tx_aborted_errors += sstats->tx_aborted_errors;
+		stats->tx_carrier_errors += sstats->tx_carrier_errors;
+		stats->tx_fifo_errors += sstats->tx_fifo_errors;
+		stats->tx_heartbeat_errors += sstats->tx_heartbeat_errors;
+		stats->tx_window_errors += sstats->tx_window_errors;
 	}
 
-	memcpy(stats, &local_stats, sizeof(struct net_device_stats));
-
 	read_unlock_bh(&bond->lock);
 
 	return stats;
diff --git a/drivers/net/ixgbe/ixgbe_ethtool.c b/drivers/net/ixgbe/ixgbe_ethtool.c
index b35ef36741e..da54b38bb48 100644
--- a/drivers/net/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ixgbe/ixgbe_ethtool.c
@@ -55,7 +55,7 @@ struct ixgbe_stats {
 				offsetof(struct ixgbe_adapter, m)
 #define IXGBE_NETDEV_STAT(m)	NETDEV_STATS, \
 				sizeof(((struct net_device *)0)->m), \
-				offsetof(struct net_device, m)
+				offsetof(struct net_device, m) - offsetof(struct net_device, stats)
 
 static struct ixgbe_stats ixgbe_gstrings_stats[] = {
 	{"rx_packets", IXGBE_NETDEV_STAT(stats.rx_packets)},
@@ -998,16 +998,18 @@ static void ixgbe_get_ethtool_stats(struct net_device *netdev,
 	struct ixgbe_adapter *adapter = netdev_priv(netdev);
 	u64 *queue_stat;
 	int stat_count = sizeof(struct ixgbe_queue_stats) / sizeof(u64);
+	struct rtnl_link_stats64 temp;
+	const struct rtnl_link_stats64 *net_stats;
 	int j, k;
 	int i;
 	char *p = NULL;
 
 	ixgbe_update_stats(adapter);
-	dev_get_stats(netdev);
+	net_stats = dev_get_stats(netdev, &temp);
 	for (i = 0; i < IXGBE_GLOBAL_STATS_LEN; i++) {
 		switch (ixgbe_gstrings_stats[i].type) {
 		case NETDEV_STATS:
-			p = (char *) netdev +
+			p = (char *) net_stats +
 					ixgbe_gstrings_stats[i].stat_offset;
 			break;
 		case IXGBE_STATS:
diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c
index 4dd0510d7a9..9a099679532 100644
--- a/drivers/net/loopback.c
+++ b/drivers/net/loopback.c
@@ -98,10 +98,10 @@ static netdev_tx_t loopback_xmit(struct sk_buff *skb,
 	return NETDEV_TX_OK;
 }
 
-static struct rtnl_link_stats64 *loopback_get_stats64(struct net_device *dev)
+static struct rtnl_link_stats64 *loopback_get_stats64(struct net_device *dev,
+						      struct rtnl_link_stats64 *stats)
 {
 	const struct pcpu_lstats __percpu *pcpu_lstats;
-	struct rtnl_link_stats64 *stats = &dev->stats64;
 	u64 bytes = 0;
 	u64 packets = 0;
 	u64 drops = 0;
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index e6d626e7851..6112f149894 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -431,12 +431,12 @@ static void macvlan_uninit(struct net_device *dev)
 	free_percpu(vlan->rx_stats);
 }
 
-static struct rtnl_link_stats64 *macvlan_dev_get_stats64(struct net_device *dev)
+static struct rtnl_link_stats64 *macvlan_dev_get_stats64(struct net_device *dev,
+							 struct rtnl_link_stats64 *stats)
 {
-	struct rtnl_link_stats64 *stats = &dev->stats64;
 	struct macvlan_dev *vlan = netdev_priv(dev);
 
-	dev_txq_stats_fold(dev, &dev->stats);
+	dev_txq_stats_fold(dev, (struct net_device_stats *)stats);
 
 	if (vlan->rx_stats) {
 		struct macvlan_rx_stats *p, accum = {0};
diff --git a/drivers/net/sfc/efx.c b/drivers/net/sfc/efx.c
index 35b3f2922e5..ba674c5ca29 100644
--- a/drivers/net/sfc/efx.c
+++ b/drivers/net/sfc/efx.c
@@ -1533,11 +1533,10 @@ static int efx_net_stop(struct net_device *net_dev)
 }
 
 /* Context: process, dev_base_lock or RTNL held, non-blocking. */
-static struct rtnl_link_stats64 *efx_net_stats(struct net_device *net_dev)
+static struct rtnl_link_stats64 *efx_net_stats(struct net_device *net_dev, struct rtnl_link_stats64 *stats)
 {
 	struct efx_nic *efx = netdev_priv(net_dev);
 	struct efx_mac_stats *mac_stats = &efx->mac_stats;
-	struct rtnl_link_stats64 *stats = &net_dev->stats64;
 
 	spin_lock_bh(&efx->stats_lock);
 	efx->type->update_stats(efx);
diff --git a/drivers/net/sfc/ethtool.c b/drivers/net/sfc/ethtool.c
index 3b8b0a06274..fd19d6ab97a 100644
--- a/drivers/net/sfc/ethtool.c
+++ b/drivers/net/sfc/ethtool.c
@@ -469,12 +469,13 @@ static void efx_ethtool_get_stats(struct net_device *net_dev,
 	struct efx_mac_stats *mac_stats = &efx->mac_stats;
 	struct efx_ethtool_stat *stat;
 	struct efx_channel *channel;
+	struct rtnl_link_stats64 temp;
 	int i;
 
 	EFX_BUG_ON_PARANOID(stats->n_stats != EFX_ETHTOOL_NUM_STATS);
 
 	/* Update MAC and NIC statistics */
-	dev_get_stats(net_dev);
+	dev_get_stats(net_dev, &temp);
 
 	/* Fill detailed statistics buffer */
 	for (i = 0; i < EFX_ETHTOOL_NUM_STATS; i++) {
diff --git a/drivers/parisc/led.c b/drivers/parisc/led.c
index 188bc8496a2..18dff43b8bd 100644
--- a/drivers/parisc/led.c
+++ b/drivers/parisc/led.c
@@ -355,12 +355,13 @@ static __inline__ int led_get_net_activity(void)
 	rcu_read_lock();
 	for_each_netdev_rcu(&init_net, dev) {
 	    const struct net_device_stats *stats;
+	    struct rtnl_link_stats64 temp;
 	    struct in_device *in_dev = __in_dev_get_rcu(dev);
 	    if (!in_dev || !in_dev->ifa_list)
 		continue;
 	    if (ipv4_is_loopback(in_dev->ifa_list->ifa_local))
 		continue;
-	    stats = dev_get_stats(dev);
+	    stats = dev_get_stats(dev, &temp);
 	    rx_total += stats->rx_packets;
 	    tx_total += stats->tx_packets;
 	}
diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c
index 44a07593de5..1a429ed6da9 100644
--- a/drivers/scsi/fcoe/fcoe.c
+++ b/drivers/scsi/fcoe/fcoe.c
@@ -2653,6 +2653,7 @@ static void fcoe_get_lesb(struct fc_lport *lport,
 	u32 lfc, vlfc, mdac;
 	struct fcoe_dev_stats *devst;
 	struct fcoe_fc_els_lesb *lesb;
+	struct rtnl_link_stats64 temp;
 	struct net_device *netdev = fcoe_netdev(lport);
 
 	lfc = 0;
@@ -2669,7 +2670,7 @@ static void fcoe_get_lesb(struct fc_lport *lport,
 	lesb->lesb_link_fail = htonl(lfc);
 	lesb->lesb_vlink_fail = htonl(vlfc);
 	lesb->lesb_miss_fka = htonl(mdac);
-	lesb->lesb_fcs_error = htonl(dev_get_stats(netdev)->rx_crc_errors);
+	lesb->lesb_fcs_error = htonl(dev_get_stats(netdev, &temp)->rx_crc_errors);
 }
 
 /**
diff --git a/drivers/staging/batman-adv/hard-interface.c b/drivers/staging/batman-adv/hard-interface.c
index 5ede9c25509..96c86c87301 100644
--- a/drivers/staging/batman-adv/hard-interface.c
+++ b/drivers/staging/batman-adv/hard-interface.c
@@ -440,6 +440,7 @@ int batman_skb_recv(struct sk_buff *skb, struct net_device *dev,
 	struct batman_packet *batman_packet;
 	struct batman_if *batman_if;
 	struct net_device_stats *stats;
+	struct rtnl_link_stats64 temp;
 	int ret;
 
 	skb = skb_share_check(skb, GFP_ATOMIC);
@@ -468,7 +469,7 @@ int batman_skb_recv(struct sk_buff *skb, struct net_device *dev,
 	if (batman_if->if_status != IF_ACTIVE)
 		goto err_free;
 
-	stats = (struct net_device_stats *)dev_get_stats(skb->dev);
+	stats = (struct net_device_stats *)dev_get_stats(skb->dev, &temp);
 	if (stats) {
 		stats->rx_packets++;
 		stats->rx_bytes += skb->len;
diff --git a/drivers/usb/gadget/rndis.c b/drivers/usb/gadget/rndis.c
index fb69b01c8f3..020fa5a25fd 100644
--- a/drivers/usb/gadget/rndis.c
+++ b/drivers/usb/gadget/rndis.c
@@ -171,6 +171,7 @@ gen_ndis_query_resp (int configNr, u32 OID, u8 *buf, unsigned buf_len,
 	int			i, count;
 	rndis_query_cmplt_type	*resp;
 	struct net_device	*net;
+	struct rtnl_link_stats64 temp;
 	const struct rtnl_link_stats64 *stats;
 
 	if (!r) return -ENOMEM;
@@ -194,7 +195,7 @@ gen_ndis_query_resp (int configNr, u32 OID, u8 *buf, unsigned buf_len,
 	resp->InformationBufferOffset = cpu_to_le32 (16);
 
 	net = rndis_per_dev_params[configNr].dev;
-	stats = dev_get_stats(net);
+	stats = dev_get_stats(net, &temp);
 
 	switch (OID) {
 
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 4d27368674d..60de65316fd 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -666,7 +666,8 @@ struct netdev_rx_queue {
  *	Callback uses when the transmitter has not made any progress
  *	for dev->watchdog ticks.
  *
- * struct rtnl_link_stats64* (*ndo_get_stats64)(struct net_device *dev);
+ * struct rtnl_link_stats64* (*ndo_get_stats64)(struct net_device *dev
+ *                      struct rtnl_link_stats64 *storage);
  * struct net_device_stats* (*ndo_get_stats)(struct net_device *dev);
  *	Called when a user wants to get the network device usage
  *	statistics. Drivers must do one of the following:
@@ -733,7 +734,8 @@ struct net_device_ops {
 						   struct neigh_parms *);
 	void			(*ndo_tx_timeout) (struct net_device *dev);
 
-	struct rtnl_link_stats64* (*ndo_get_stats64)(struct net_device *dev);
+	struct rtnl_link_stats64* (*ndo_get_stats64)(struct net_device *dev,
+						     struct rtnl_link_stats64 *storage);
 	struct net_device_stats* (*ndo_get_stats)(struct net_device *dev);
 
 	void			(*ndo_vlan_rx_register)(struct net_device *dev,
@@ -2139,8 +2141,10 @@ extern void		netdev_features_change(struct net_device *dev);
 /* Load a device via the kmod */
 extern void		dev_load(struct net *net, const char *name);
 extern void		dev_mcast_init(void);
-extern const struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev);
-extern void		dev_txq_stats_fold(const struct net_device *dev, struct net_device_stats *stats);
+extern const struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
+						     struct rtnl_link_stats64 *storage);
+extern void		dev_txq_stats_fold(const struct net_device *dev,
+					   struct net_device_stats *stats);
 
 extern int		netdev_max_backlog;
 extern int		netdev_tstamp_prequeue;
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index c6456cb842f..7865a4ce525 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -803,11 +803,9 @@ static u32 vlan_ethtool_get_flags(struct net_device *dev)
 	return dev_ethtool_get_flags(vlan->real_dev);
 }
 
-static struct rtnl_link_stats64 *vlan_dev_get_stats64(struct net_device *dev)
+static struct rtnl_link_stats64 *vlan_dev_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
 {
-	struct rtnl_link_stats64 *stats = &dev->stats64;
-
-	dev_txq_stats_fold(dev, &dev->stats);
+	dev_txq_stats_fold(dev, (struct net_device_stats *)stats);
 
 	if (vlan_dev_info(dev)->vlan_rx_stats) {
 		struct vlan_rx_stats *p, accum = {0};
diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c
index df56f5ce887..80e280f5668 100644
--- a/net/8021q/vlanproc.c
+++ b/net/8021q/vlanproc.c
@@ -278,6 +278,7 @@ static int vlandev_seq_show(struct seq_file *seq, void *offset)
 {
 	struct net_device *vlandev = (struct net_device *) seq->private;
 	const struct vlan_dev_info *dev_info = vlan_dev_info(vlandev);
+	struct rtnl_link_stats64 temp;
 	const struct rtnl_link_stats64 *stats;
 	static const char fmt[] = "%30s %12lu\n";
 	static const char fmt64[] = "%30s %12llu\n";
@@ -286,7 +287,7 @@ static int vlandev_seq_show(struct seq_file *seq, void *offset)
 	if (!is_vlan_dev(vlandev))
 		return 0;
 
-	stats = dev_get_stats(vlandev);
+	stats = dev_get_stats(vlandev, &temp);
 	seq_printf(seq,
 		   "%s  VID: %d	 REORDER_HDR: %i  dev->priv_flags: %hx\n",
 		   vlandev->name, dev_info->vlan_id,
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index edf639e9628..075c435ad22 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -98,10 +98,10 @@ static int br_dev_stop(struct net_device *dev)
 	return 0;
 }
 
-static struct rtnl_link_stats64 *br_get_stats64(struct net_device *dev)
+static struct rtnl_link_stats64 *br_get_stats64(struct net_device *dev,
+						struct rtnl_link_stats64 *stats)
 {
 	struct net_bridge *br = netdev_priv(dev);
-	struct rtnl_link_stats64 *stats = &dev->stats64;
 	struct br_cpu_netstats tmp, sum = { 0 };
 	unsigned int cpu;
 
diff --git a/net/core/dev.c b/net/core/dev.c
index 93b8929fa21..92482d7a87a 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3703,7 +3703,8 @@ void dev_seq_stop(struct seq_file *seq, void *v)
 
 static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
 {
-	const struct rtnl_link_stats64 *stats = dev_get_stats(dev);
+	struct rtnl_link_stats64 temp;
+	const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp);
 
 	seq_printf(seq, "%6s: %7llu %7llu %4llu %4llu %4llu %5llu %10llu %9llu "
 		   "%8llu %7llu %4llu %4llu %4llu %5llu %7llu %10llu\n",
@@ -5281,23 +5282,29 @@ EXPORT_SYMBOL(dev_txq_stats_fold);
 /**
  *	dev_get_stats	- get network device statistics
  *	@dev: device to get statistics from
+ *	@storage: place to store stats
  *
  *	Get network statistics from device. The device driver may provide
  *	its own method by setting dev->netdev_ops->get_stats64 or
  *	dev->netdev_ops->get_stats; otherwise the internal statistics
  *	structure is used.
  */
-const struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev)
+const struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
+					      struct rtnl_link_stats64 *storage)
 {
 	const struct net_device_ops *ops = dev->netdev_ops;
 
-	if (ops->ndo_get_stats64)
-		return ops->ndo_get_stats64(dev);
-	if (ops->ndo_get_stats)
-		return (struct rtnl_link_stats64 *)ops->ndo_get_stats(dev);
-
-	dev_txq_stats_fold(dev, &dev->stats);
-	return &dev->stats64;
+	if (ops->ndo_get_stats64) {
+		memset(storage, 0, sizeof(*storage));
+		return ops->ndo_get_stats64(dev, storage);
+	}
+	if (ops->ndo_get_stats) {
+		memcpy(storage, ops->ndo_get_stats(dev), sizeof(*storage));
+		return storage;
+	}
+	memcpy(storage, &dev->stats, sizeof(*storage));
+	dev_txq_stats_fold(dev, (struct net_device_stats *)storage);
+	return storage;
 }
 EXPORT_SYMBOL(dev_get_stats);
 
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index ea3bb4c3b87..914f42b0f03 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -330,7 +330,9 @@ static ssize_t netstat_show(const struct device *d,
 
 	read_lock(&dev_base_lock);
 	if (dev_isalive(dev)) {
-		const struct rtnl_link_stats64 *stats = dev_get_stats(dev);
+		struct rtnl_link_stats64 temp;
+		const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp);
+
 		ret = sprintf(buf, fmt_u64, *(u64 *)(((u8 *) stats) + offset));
 	}
 	read_unlock(&dev_base_lock);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index e645778e9b7..5e773ea2201 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -791,6 +791,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 {
 	struct ifinfomsg *ifm;
 	struct nlmsghdr *nlh;
+	struct rtnl_link_stats64 temp;
 	const struct rtnl_link_stats64 *stats;
 	struct nlattr *attr;
 
@@ -847,7 +848,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 	if (attr == NULL)
 		goto nla_put_failure;
 
-	stats = dev_get_stats(dev);
+	stats = dev_get_stats(dev, &temp);
 	copy_rtnl_link_stats(nla_data(attr), stats);
 
 	attr = nla_reserve(skb, IFLA_STATS64,
-- 
cgit v1.2.3-70-g09d2


From 250b2b6dd421c9f8844a867d2ac06e0661e0ad93 Mon Sep 17 00:00:00 2001
From: Stefan Richter <stefanr@s5r6.in-berlin.de>
Date: Mon, 21 Jun 2010 23:24:35 +0200
Subject: firewire: cdev: fix fw_cdev_event_bus_reset.bm_node_id

Fix an obscure ABI feature that is a bit of a hassle to implement.
However, somebody put it into the ABI, so let's fill in a sensible
value there.

Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
---
 drivers/firewire/core-card.c     | 12 +++++++++---
 drivers/firewire/core-cdev.c     |  2 +-
 drivers/firewire/core-topology.c |  1 +
 include/linux/firewire-cdev.h    |  5 +++++
 include/linux/firewire.h         |  1 +
 5 files changed, 17 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/firewire/core-card.c b/drivers/firewire/core-card.c
index 11fc81500f8..6c316cfe70c 100644
--- a/drivers/firewire/core-card.c
+++ b/drivers/firewire/core-card.c
@@ -239,7 +239,7 @@ static void fw_card_bm_work(struct work_struct *work)
 	struct fw_card *card = container_of(work, struct fw_card, work.work);
 	struct fw_device *root_device;
 	struct fw_node *root_node;
-	int root_id, new_root_id, irm_id, local_id;
+	int root_id, new_root_id, irm_id, bm_id, local_id;
 	int gap_count, generation, grace, rcode;
 	bool do_reset = false;
 	bool root_device_is_running;
@@ -301,9 +301,15 @@ static void fw_card_bm_work(struct work_struct *work)
 			/* Another bus reset, BM work has been rescheduled. */
 			goto out;
 
-		if (rcode == RCODE_COMPLETE &&
-		    card->bm_transaction_data[0] != cpu_to_be32(0x3f)) {
+		bm_id = be32_to_cpu(card->bm_transaction_data[0]);
 
+		spin_lock_irq(&card->lock);
+		if (rcode == RCODE_COMPLETE && generation == card->generation)
+			card->bm_node_id =
+			    bm_id == 0x3f ? local_id : 0xffc0 | bm_id;
+		spin_unlock_irq(&card->lock);
+
+		if (rcode == RCODE_COMPLETE && bm_id != 0x3f) {
 			/* Somebody else is BM.  Only act as IRM. */
 			if (local_id == irm_id)
 				allocate_broadcast_channel(card, generation);
diff --git a/drivers/firewire/core-cdev.c b/drivers/firewire/core-cdev.c
index 9b8df203915..d8ac0ce2d6b 100644
--- a/drivers/firewire/core-cdev.c
+++ b/drivers/firewire/core-cdev.c
@@ -318,7 +318,7 @@ static void fill_bus_reset_event(struct fw_cdev_event_bus_reset *event,
 	event->generation    = client->device->generation;
 	event->node_id       = client->device->node_id;
 	event->local_node_id = card->local_node->node_id;
-	event->bm_node_id    = 0; /* FIXME: We don't track the BM. */
+	event->bm_node_id    = card->bm_node_id;
 	event->irm_node_id   = card->irm_node->node_id;
 	event->root_node_id  = card->root_node->node_id;
 
diff --git a/drivers/firewire/core-topology.c b/drivers/firewire/core-topology.c
index 56e908ba43f..88d5133ae70 100644
--- a/drivers/firewire/core-topology.c
+++ b/drivers/firewire/core-topology.c
@@ -552,6 +552,7 @@ void fw_core_handle_bus_reset(struct fw_card *card, int node_id, int generation,
 	smp_wmb();
 	card->generation = generation;
 	card->reset_jiffies = jiffies;
+	card->bm_node_id  = 0xffff;
 	card->bm_abdicate = bm_abdicate;
 	fw_schedule_bm_work(card, 0);
 
diff --git a/include/linux/firewire-cdev.h b/include/linux/firewire-cdev.h
index 8b9b2737321..d31022b05bd 100644
--- a/include/linux/firewire-cdev.h
+++ b/include/linux/firewire-cdev.h
@@ -66,6 +66,10 @@ struct fw_cdev_event_common {
  * This event is sent when the bus the device belongs to goes through a bus
  * reset.  It provides information about the new bus configuration, such as
  * new node ID for this device, new root ID, and others.
+ *
+ * If @bm_node_id is 0xffff right after bus reset it can be reread by an
+ * %FW_CDEV_IOC_GET_INFO ioctl after bus manager selection was finished.
+ * Kernels with ABI version < 4 do not set @bm_node_id.
  */
 struct fw_cdev_event_bus_reset {
 	__u64 closure;
@@ -348,6 +352,7 @@ union fw_cdev_event {
  *  3  (2.6.34)  - made &fw_cdev_get_cycle_timer reliable
  *               - added %FW_CDEV_IOC_GET_CYCLE_TIMER2
  *  4  (2.6.36)  - added %FW_CDEV_EVENT_REQUEST2
+ *               - implemented &fw_cdev_event_bus_reset.bm_node_id
  */
 #define FW_CDEV_VERSION 3 /* Meaningless; don't use this macro. */
 
diff --git a/include/linux/firewire.h b/include/linux/firewire.h
index e44b502c834..db30a752a87 100644
--- a/include/linux/firewire.h
+++ b/include/linux/firewire.h
@@ -119,6 +119,7 @@ struct fw_card {
 	int bm_retries;
 	int bm_generation;
 	__be32 bm_transaction_data[2];
+	int bm_node_id;
 	bool bm_abdicate;
 
 	bool priority_budget_implemented;	/* controller feature */
-- 
cgit v1.2.3-70-g09d2


From 7313bb8f3dd6e28bcf9c42adfd54a5cf9a4949e0 Mon Sep 17 00:00:00 2001
From: Karl Hiramoto <karl@hiramoto.org>
Date: Thu, 8 Jul 2010 20:55:30 +0000
Subject: atm: propagate signal changes via notifier

Add notifier chain for changes in atm_dev.

Clients like br2684 will call register_atmdevice_notifier() to be notified of
changes. Drivers will call atm_dev_signal_change() to notify clients like
br2684 of the change.

On DSL and ATM devices it's usefull to have a know if you have a carrier
signal. netdevice LOWER_UP changes can be propagated to userspace via netlink
monitor.

Signed-off-by: Karl Hiramoto <karl@hiramoto.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/atmdev.h | 17 +++++++++++++++++
 net/atm/common.c       | 30 ++++++++++++++++++++++++++++++
 2 files changed, 47 insertions(+)

(limited to 'include')

diff --git a/include/linux/atmdev.h b/include/linux/atmdev.h
index 817b23705c9..f6481daf6e5 100644
--- a/include/linux/atmdev.h
+++ b/include/linux/atmdev.h
@@ -431,6 +431,14 @@ struct atm_dev *atm_dev_register(const char *type,const struct atmdev_ops *ops,
     int number,unsigned long *flags); /* number == -1: pick first available */
 struct atm_dev *atm_dev_lookup(int number);
 void atm_dev_deregister(struct atm_dev *dev);
+
+/* atm_dev_signal_change
+ *
+ * Propagate lower layer signal change in atm_dev->signal to netdevice.
+ * The event will be sent via a notifier call chain.
+ */
+void atm_dev_signal_change(struct atm_dev *dev, char signal);
+
 void vcc_insert_socket(struct sock *sk);
 
 
@@ -510,6 +518,15 @@ void register_atm_ioctl(struct atm_ioctl *);
  */
 void deregister_atm_ioctl(struct atm_ioctl *);
 
+
+/* register_atmdevice_notifier - register atm_dev notify events
+ *
+ * Clients like br2684 will register notify events
+ * Currently we notify of signal found/lost
+ */
+int register_atmdevice_notifier(struct notifier_block *nb);
+void unregister_atmdevice_notifier(struct notifier_block *nb);
+
 #endif /* __KERNEL__ */
 
 #endif
diff --git a/net/atm/common.c b/net/atm/common.c
index b43feb1a399..940404a73b3 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -37,6 +37,8 @@ EXPORT_SYMBOL(vcc_hash);
 DEFINE_RWLOCK(vcc_sklist_lock);
 EXPORT_SYMBOL(vcc_sklist_lock);
 
+static ATOMIC_NOTIFIER_HEAD(atm_dev_notify_chain);
+
 static void __vcc_insert_socket(struct sock *sk)
 {
 	struct atm_vcc *vcc = atm_sk(sk);
@@ -212,6 +214,22 @@ void vcc_release_async(struct atm_vcc *vcc, int reply)
 }
 EXPORT_SYMBOL(vcc_release_async);
 
+void atm_dev_signal_change(struct atm_dev *dev, char signal)
+{
+	pr_debug("%s signal=%d dev=%p number=%d dev->signal=%d\n",
+		__func__, signal, dev, dev->number, dev->signal);
+
+	/* atm driver sending invalid signal */
+	WARN_ON(signal < ATM_PHY_SIG_LOST || signal > ATM_PHY_SIG_FOUND);
+
+	if (dev->signal == signal)
+		return; /* no change */
+
+	dev->signal = signal;
+
+	atomic_notifier_call_chain(&atm_dev_notify_chain, signal, dev);
+}
+EXPORT_SYMBOL(atm_dev_signal_change);
 
 void atm_dev_release_vccs(struct atm_dev *dev)
 {
@@ -781,6 +799,18 @@ int vcc_getsockopt(struct socket *sock, int level, int optname,
 	return vcc->dev->ops->getsockopt(vcc, level, optname, optval, len);
 }
 
+int register_atmdevice_notifier(struct notifier_block *nb)
+{
+	return atomic_notifier_chain_register(&atm_dev_notify_chain, nb);
+}
+EXPORT_SYMBOL_GPL(register_atmdevice_notifier);
+
+void unregister_atmdevice_notifier(struct notifier_block *nb)
+{
+	atomic_notifier_chain_unregister(&atm_dev_notify_chain, nb);
+}
+EXPORT_SYMBOL_GPL(unregister_atmdevice_notifier);
+
 static int __init atm_init(void)
 {
 	int error;
-- 
cgit v1.2.3-70-g09d2


From ffa71f33a820d1ab3f2fc5723819ac60fb76080b Mon Sep 17 00:00:00 2001
From: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
Date: Fri, 18 Jun 2010 12:22:40 +0900
Subject: x86, ioremap: Fix incorrect physical address handling in PAE mode

Current x86 ioremap() doesn't handle physical address higher than
32-bit properly in X86_32 PAE mode. When physical address higher than
32-bit is passed to ioremap(), higher 32-bits in physical address is
cleared wrongly. Due to this bug, ioremap() can map wrong address to
linear address space.

In my case, 64-bit MMIO region was assigned to a PCI device (ioat
device) on my system. Because of the ioremap()'s bug, wrong physical
address (instead of MMIO region) was mapped to linear address space.
Because of this, loading ioatdma driver caused unexpected behavior
(kernel panic, kernel hangup, ...).

Signed-off-by: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
LKML-Reference: <4C1AE680.7090408@jp.fujitsu.com>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/mm/ioremap.c   | 12 +++++-------
 include/linux/io.h      |  4 ++--
 include/linux/vmalloc.h |  2 +-
 lib/ioremap.c           | 10 +++++-----
 mm/vmalloc.c            |  2 +-
 5 files changed, 14 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 12e4d2d3c11..754cb4cbce6 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -62,8 +62,8 @@ int ioremap_change_attr(unsigned long vaddr, unsigned long size,
 static void __iomem *__ioremap_caller(resource_size_t phys_addr,
 		unsigned long size, unsigned long prot_val, void *caller)
 {
-	unsigned long pfn, offset, vaddr;
-	resource_size_t last_addr;
+	unsigned long offset, vaddr;
+	resource_size_t pfn, last_pfn, last_addr;
 	const resource_size_t unaligned_phys_addr = phys_addr;
 	const unsigned long unaligned_size = size;
 	struct vm_struct *area;
@@ -100,10 +100,8 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr,
 	/*
 	 * Don't allow anybody to remap normal RAM that we're using..
 	 */
-	for (pfn = phys_addr >> PAGE_SHIFT;
-				(pfn << PAGE_SHIFT) < (last_addr & PAGE_MASK);
-				pfn++) {
-
+	last_pfn = last_addr >> PAGE_SHIFT;
+	for (pfn = phys_addr >> PAGE_SHIFT; pfn < last_pfn; pfn++) {
 		int is_ram = page_is_ram(pfn);
 
 		if (is_ram && pfn_valid(pfn) && !PageReserved(pfn_to_page(pfn)))
@@ -115,7 +113,7 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr,
 	 * Mappings have to be page-aligned
 	 */
 	offset = phys_addr & ~PAGE_MASK;
-	phys_addr &= PAGE_MASK;
+	phys_addr &= PHYSICAL_PAGE_MASK;
 	size = PAGE_ALIGN(last_addr+1) - phys_addr;
 
 	retval = reserve_memtype(phys_addr, (u64)phys_addr + size,
diff --git a/include/linux/io.h b/include/linux/io.h
index 6c7f0ba0d5f..7fd2d2138bf 100644
--- a/include/linux/io.h
+++ b/include/linux/io.h
@@ -29,10 +29,10 @@ void __iowrite64_copy(void __iomem *to, const void *from, size_t count);
 
 #ifdef CONFIG_MMU
 int ioremap_page_range(unsigned long addr, unsigned long end,
-		       unsigned long phys_addr, pgprot_t prot);
+		       phys_addr_t phys_addr, pgprot_t prot);
 #else
 static inline int ioremap_page_range(unsigned long addr, unsigned long end,
-				     unsigned long phys_addr, pgprot_t prot)
+				     phys_addr_t phys_addr, pgprot_t prot)
 {
 	return 0;
 }
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 227c2a585e4..de05e96e0a7 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -30,7 +30,7 @@ struct vm_struct {
 	unsigned long		flags;
 	struct page		**pages;
 	unsigned int		nr_pages;
-	unsigned long		phys_addr;
+	phys_addr_t		phys_addr;
 	void			*caller;
 };
 
diff --git a/lib/ioremap.c b/lib/ioremap.c
index 14c6078f17a..5730ecd3eb6 100644
--- a/lib/ioremap.c
+++ b/lib/ioremap.c
@@ -13,10 +13,10 @@
 #include <asm/pgtable.h>
 
 static int ioremap_pte_range(pmd_t *pmd, unsigned long addr,
-		unsigned long end, unsigned long phys_addr, pgprot_t prot)
+		unsigned long end, phys_addr_t phys_addr, pgprot_t prot)
 {
 	pte_t *pte;
-	unsigned long pfn;
+	u64 pfn;
 
 	pfn = phys_addr >> PAGE_SHIFT;
 	pte = pte_alloc_kernel(pmd, addr);
@@ -31,7 +31,7 @@ static int ioremap_pte_range(pmd_t *pmd, unsigned long addr,
 }
 
 static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr,
-		unsigned long end, unsigned long phys_addr, pgprot_t prot)
+		unsigned long end, phys_addr_t phys_addr, pgprot_t prot)
 {
 	pmd_t *pmd;
 	unsigned long next;
@@ -49,7 +49,7 @@ static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr,
 }
 
 static inline int ioremap_pud_range(pgd_t *pgd, unsigned long addr,
-		unsigned long end, unsigned long phys_addr, pgprot_t prot)
+		unsigned long end, phys_addr_t phys_addr, pgprot_t prot)
 {
 	pud_t *pud;
 	unsigned long next;
@@ -67,7 +67,7 @@ static inline int ioremap_pud_range(pgd_t *pgd, unsigned long addr,
 }
 
 int ioremap_page_range(unsigned long addr,
-		       unsigned long end, unsigned long phys_addr, pgprot_t prot)
+		       unsigned long end, phys_addr_t phys_addr, pgprot_t prot)
 {
 	pgd_t *pgd;
 	unsigned long start;
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index ae007462b7f..b7e314b1009 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -2403,7 +2403,7 @@ static int s_show(struct seq_file *m, void *p)
 		seq_printf(m, " pages=%d", v->nr_pages);
 
 	if (v->phys_addr)
-		seq_printf(m, " phys=%lx", v->phys_addr);
+		seq_printf(m, " phys=%llx", (unsigned long long)v->phys_addr);
 
 	if (v->flags & VM_IOREMAP)
 		seq_printf(m, " ioremap");
-- 
cgit v1.2.3-70-g09d2


From 3cfde79c6c7c8002375c4a8e5be7f602fbb9675d Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Fri, 9 Jul 2010 09:11:52 +0000
Subject: net: Get rid of rtnl_link_stats64 / net_device_stats union

In commit be1f3c2c027cc5ad735df6a45a542ed1db7ec48b "net: Enable 64-bit
net device statistics on 32-bit architectures" I redefined struct
net_device_stats so that it could be used in a union with struct
rtnl_link_stats64, avoiding the need for explicit copying or
conversion between the two.  However, this is unsafe because there is
no locking required and no lock consistently held around calls to
dev_get_stats() and use of the statistics structure it returns.

In commit 28172739f0a276eb8d6ca917b3974c2edb036da3 "net: fix 64 bit
counters on 32 bit arches" Eric Dumazet dealt with that problem by
requiring callers of dev_get_stats() to provide storage for the
result.  This means that the net_device::stats64 field and the padding
in struct net_device_stats are now redundant, so remove them.

Update the comment on net_device_ops::ndo_get_stats64 to reflect its
new usage.

Change dev_txq_stats_fold() to use struct rtnl_link_stats64, since
that is what all its callers are really using and it is no longer
going to be compatible with struct net_device_stats.

Eric Dumazet suggested the separate function for the structure
conversion.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/macvlan.c     |  2 +-
 include/linux/netdevice.h | 70 +++++++++++++++++++----------------------------
 net/8021q/vlan_dev.c      |  2 +-
 net/core/dev.c            | 31 +++++++++++++++++----
 4 files changed, 56 insertions(+), 49 deletions(-)

(limited to 'include')

diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 6112f149894..1b28aaec0a5 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -436,7 +436,7 @@ static struct rtnl_link_stats64 *macvlan_dev_get_stats64(struct net_device *dev,
 {
 	struct macvlan_dev *vlan = netdev_priv(dev);
 
-	dev_txq_stats_fold(dev, (struct net_device_stats *)stats);
+	dev_txq_stats_fold(dev, stats);
 
 	if (vlan->rx_stats) {
 		struct macvlan_rx_stats *p, accum = {0};
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 8018f6bf305..17e95e37aed 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -162,42 +162,32 @@ static inline bool dev_xmit_complete(int rc)
 /*
  *	Old network device statistics. Fields are native words
  *	(unsigned long) so they can be read and written atomically.
- *	Each field is padded to 64 bits for compatibility with
- *	rtnl_link_stats64.
  */
 
-#if BITS_PER_LONG == 64
-#define NET_DEVICE_STATS_DEFINE(name)	unsigned long name
-#elif defined(__LITTLE_ENDIAN)
-#define NET_DEVICE_STATS_DEFINE(name)	unsigned long name, pad_ ## name
-#else
-#define NET_DEVICE_STATS_DEFINE(name)	unsigned long pad_ ## name, name
-#endif
-
 struct net_device_stats {
-	NET_DEVICE_STATS_DEFINE(rx_packets);
-	NET_DEVICE_STATS_DEFINE(tx_packets);
-	NET_DEVICE_STATS_DEFINE(rx_bytes);
-	NET_DEVICE_STATS_DEFINE(tx_bytes);
-	NET_DEVICE_STATS_DEFINE(rx_errors);
-	NET_DEVICE_STATS_DEFINE(tx_errors);
-	NET_DEVICE_STATS_DEFINE(rx_dropped);
-	NET_DEVICE_STATS_DEFINE(tx_dropped);
-	NET_DEVICE_STATS_DEFINE(multicast);
-	NET_DEVICE_STATS_DEFINE(collisions);
-	NET_DEVICE_STATS_DEFINE(rx_length_errors);
-	NET_DEVICE_STATS_DEFINE(rx_over_errors);
-	NET_DEVICE_STATS_DEFINE(rx_crc_errors);
-	NET_DEVICE_STATS_DEFINE(rx_frame_errors);
-	NET_DEVICE_STATS_DEFINE(rx_fifo_errors);
-	NET_DEVICE_STATS_DEFINE(rx_missed_errors);
-	NET_DEVICE_STATS_DEFINE(tx_aborted_errors);
-	NET_DEVICE_STATS_DEFINE(tx_carrier_errors);
-	NET_DEVICE_STATS_DEFINE(tx_fifo_errors);
-	NET_DEVICE_STATS_DEFINE(tx_heartbeat_errors);
-	NET_DEVICE_STATS_DEFINE(tx_window_errors);
-	NET_DEVICE_STATS_DEFINE(rx_compressed);
-	NET_DEVICE_STATS_DEFINE(tx_compressed);
+	unsigned long	rx_packets;
+	unsigned long	tx_packets;
+	unsigned long	rx_bytes;
+	unsigned long	tx_bytes;
+	unsigned long	rx_errors;
+	unsigned long	tx_errors;
+	unsigned long	rx_dropped;
+	unsigned long	tx_dropped;
+	unsigned long	multicast;
+	unsigned long	collisions;
+	unsigned long	rx_length_errors;
+	unsigned long	rx_over_errors;
+	unsigned long	rx_crc_errors;
+	unsigned long	rx_frame_errors;
+	unsigned long	rx_fifo_errors;
+	unsigned long	rx_missed_errors;
+	unsigned long	tx_aborted_errors;
+	unsigned long	tx_carrier_errors;
+	unsigned long	tx_fifo_errors;
+	unsigned long	tx_heartbeat_errors;
+	unsigned long	tx_window_errors;
+	unsigned long	rx_compressed;
+	unsigned long	tx_compressed;
 };
 
 #endif  /*  __KERNEL__  */
@@ -666,14 +656,13 @@ struct netdev_rx_queue {
  *	Callback uses when the transmitter has not made any progress
  *	for dev->watchdog ticks.
  *
- * struct rtnl_link_stats64* (*ndo_get_stats64)(struct net_device *dev
+ * struct rtnl_link_stats64* (*ndo_get_stats64)(struct net_device *dev,
  *                      struct rtnl_link_stats64 *storage);
  * struct net_device_stats* (*ndo_get_stats)(struct net_device *dev);
  *	Called when a user wants to get the network device usage
  *	statistics. Drivers must do one of the following:
- *	1. Define @ndo_get_stats64 to update a rtnl_link_stats64 structure
- *	   (which should normally be dev->stats64) and return a ponter to
- *	   it. The structure must not be changed asynchronously.
+ *	1. Define @ndo_get_stats64 to fill in a zero-initialised
+ *	   rtnl_link_stats64 structure passed by the caller.
  *	2. Define @ndo_get_stats to update a net_device_stats structure
  *	   (which should normally be dev->stats) and return a pointer to
  *	   it. The structure may be changed asynchronously only if each
@@ -888,10 +877,7 @@ struct net_device {
 	int			ifindex;
 	int			iflink;
 
-	union {
-		struct rtnl_link_stats64 stats64;
-		struct net_device_stats stats;
-	};
+	struct net_device_stats	stats;
 
 #ifdef CONFIG_WIRELESS_EXT
 	/* List of functions to handle Wireless Extensions (instead of ioctl).
@@ -2147,7 +2133,7 @@ extern void		dev_mcast_init(void);
 extern const struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
 						     struct rtnl_link_stats64 *storage);
 extern void		dev_txq_stats_fold(const struct net_device *dev,
-					   struct net_device_stats *stats);
+					   struct rtnl_link_stats64 *stats);
 
 extern int		netdev_max_backlog;
 extern int		netdev_tstamp_prequeue;
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index a1b8171cfa7..7cb285f96b9 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -805,7 +805,7 @@ static u32 vlan_ethtool_get_flags(struct net_device *dev)
 
 static struct rtnl_link_stats64 *vlan_dev_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
 {
-	dev_txq_stats_fold(dev, (struct net_device_stats *)stats);
+	dev_txq_stats_fold(dev, stats);
 
 	if (vlan_dev_info(dev)->vlan_rx_stats) {
 		struct vlan_rx_stats *p, accum = {0};
diff --git a/net/core/dev.c b/net/core/dev.c
index eb4201cf9c8..79ee26ef509 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5274,10 +5274,10 @@ void netdev_run_todo(void)
 /**
  *	dev_txq_stats_fold - fold tx_queues stats
  *	@dev: device to get statistics from
- *	@stats: struct net_device_stats to hold results
+ *	@stats: struct rtnl_link_stats64 to hold results
  */
 void dev_txq_stats_fold(const struct net_device *dev,
-			struct net_device_stats *stats)
+			struct rtnl_link_stats64 *stats)
 {
 	unsigned long tx_bytes = 0, tx_packets = 0, tx_dropped = 0;
 	unsigned int i;
@@ -5297,6 +5297,27 @@ void dev_txq_stats_fold(const struct net_device *dev,
 }
 EXPORT_SYMBOL(dev_txq_stats_fold);
 
+/* Convert net_device_stats to rtnl_link_stats64.  They have the same
+ * fields in the same order, with only the type differing.
+ */
+static void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
+				    const struct net_device_stats *netdev_stats)
+{
+#if BITS_PER_LONG == 64
+        BUILD_BUG_ON(sizeof(*stats64) != sizeof(*netdev_stats));
+        memcpy(stats64, netdev_stats, sizeof(*stats64));
+#else
+	size_t i, n = sizeof(*stats64) / sizeof(u64);
+	const unsigned long *src = (const unsigned long *)netdev_stats;
+	u64 *dst = (u64 *)stats64;
+
+	BUILD_BUG_ON(sizeof(*netdev_stats) / sizeof(unsigned long) !=
+		     sizeof(*stats64) / sizeof(u64));
+	for (i = 0; i < n; i++)
+		dst[i] = src[i];
+#endif
+}
+
 /**
  *	dev_get_stats	- get network device statistics
  *	@dev: device to get statistics from
@@ -5317,11 +5338,11 @@ const struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
 		return ops->ndo_get_stats64(dev, storage);
 	}
 	if (ops->ndo_get_stats) {
-		memcpy(storage, ops->ndo_get_stats(dev), sizeof(*storage));
+		netdev_stats_to_stats64(storage, ops->ndo_get_stats(dev));
 		return storage;
 	}
-	memcpy(storage, &dev->stats, sizeof(*storage));
-	dev_txq_stats_fold(dev, (struct net_device_stats *)storage);
+	netdev_stats_to_stats64(storage, &dev->stats);
+	dev_txq_stats_fold(dev, storage);
 	return storage;
 }
 EXPORT_SYMBOL(dev_get_stats);
-- 
cgit v1.2.3-70-g09d2


From d77535162e736c47978d5c01469c56e1781dc91b Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Fri, 9 Jul 2010 09:12:41 +0000
Subject: net: Document that dev_get_stats() returns the given pointer

Document that dev_get_stats() returns the same stats pointer it was
given.  Remove const qualification from the returned pointer since the
caller may do what it likes with that structure.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  4 ++--
 net/core/dev.c            | 12 ++++++------
 2 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 17e95e37aed..c4fedf00054 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2130,8 +2130,8 @@ extern void		netdev_features_change(struct net_device *dev);
 /* Load a device via the kmod */
 extern void		dev_load(struct net *net, const char *name);
 extern void		dev_mcast_init(void);
-extern const struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
-						     struct rtnl_link_stats64 *storage);
+extern struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
+					       struct rtnl_link_stats64 *storage);
 extern void		dev_txq_stats_fold(const struct net_device *dev,
 					   struct rtnl_link_stats64 *stats);
 
diff --git a/net/core/dev.c b/net/core/dev.c
index 79ee26ef509..e2b9fa2c917 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5323,13 +5323,13 @@ static void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
  *	@dev: device to get statistics from
  *	@storage: place to store stats
  *
- *	Get network statistics from device. The device driver may provide
- *	its own method by setting dev->netdev_ops->get_stats64 or
- *	dev->netdev_ops->get_stats; otherwise the internal statistics
- *	structure is used.
+ *	Get network statistics from device. Return @storage.
+ *	The device driver may provide its own method by setting
+ *	dev->netdev_ops->get_stats64 or dev->netdev_ops->get_stats;
+ *	otherwise the internal statistics structure is used.
  */
-const struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
-					      struct rtnl_link_stats64 *storage)
+struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
+					struct rtnl_link_stats64 *storage)
 {
 	const struct net_device_ops *ops = dev->netdev_ops;
 
-- 
cgit v1.2.3-70-g09d2


From 69c8f52b3897f2faf8510ea7ede8fffabe26c531 Mon Sep 17 00:00:00 2001
From: "Justin P. Mattock" <justinmattock@gmail.com>
Date: Thu, 1 Jul 2010 14:28:27 -0700
Subject: fix #warning about using kernel headers in userpsace

Move the preprocessor #warning message:
warning: #warning Attempt to use kernel headers from user space,
see http://kernelnewbies.org/KernelHeaders
from kernel.h to types.h.

And also fixe the #warning message due to the preprocessor not being able to
read the web address due to it thinking it was the start of a comment.  also
remove the extra #ifndef _KERNEL_ since it's already there.

Signed-off-by: Justin P. Mattock <justinmattock@gmail.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 include/linux/kernel.h | 6 ------
 include/linux/types.h  | 5 ++++-
 2 files changed, 4 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 8317ec4b9f3..bd8501a8ca1 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -728,12 +728,6 @@ extern int do_sysinfo(struct sysinfo *info);
 
 #endif /* __KERNEL__ */
 
-#ifndef __EXPORTED_HEADERS__
-#ifndef __KERNEL__
-#warning Attempt to use kernel headers from user space, see http://kernelnewbies.org/KernelHeaders
-#endif /* __KERNEL__ */
-#endif /* __EXPORTED_HEADERS__ */
-
 #define SI_LOAD_SHIFT	16
 struct sysinfo {
 	long uptime;			/* Seconds since boot */
diff --git a/include/linux/types.h b/include/linux/types.h
index 23d237a075e..331d8baabcf 100644
--- a/include/linux/types.h
+++ b/include/linux/types.h
@@ -8,7 +8,10 @@
 
 #define DECLARE_BITMAP(name,bits) \
 	unsigned long name[BITS_TO_LONGS(bits)]
-
+#else
+#ifndef __EXPORTED_HEADERS__
+#warning "Attempt to use kernel headers from user space, see http://kernelnewbies.org/KernelHeaders"
+#endif /* __EXPORTED_HEADERS__ */
 #endif
 
 #include <linux/posix_types.h>
-- 
cgit v1.2.3-70-g09d2


From b27d63d8f8d34af57805f56005e217c150187531 Mon Sep 17 00:00:00 2001
From: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Date: Thu, 1 Jul 2010 20:48:44 +0200
Subject: fix comment typos concerning "sequential"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/media/video/gspca/sunplus.c | 4 ++--
 include/linux/jffs2.h               | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/media/video/gspca/sunplus.c b/drivers/media/video/gspca/sunplus.c
index 0c786e00ebc..d0a133abe76 100644
--- a/drivers/media/video/gspca/sunplus.c
+++ b/drivers/media/video/gspca/sunplus.c
@@ -805,7 +805,7 @@ static int sd_init(struct gspca_dev *gspca_dev)
 			/* Set AE AWB Banding Type 3-> 50Hz 2-> 60Hz */
 			spca504A_acknowledged_command(gspca_dev, 0x24,
 							8, 3, 0x9e, 1);
-			/* Twice sequencial need status 0xff->0x9e->0x9d */
+			/* Twice sequential need status 0xff->0x9e->0x9d */
 			spca504A_acknowledged_command(gspca_dev, 0x24,
 							8, 3, 0x9e, 0);
 
@@ -880,7 +880,7 @@ static int sd_start(struct gspca_dev *gspca_dev)
 			/* Set AE AWB Banding Type 3-> 50Hz 2-> 60Hz */
 			spca504A_acknowledged_command(gspca_dev, 0x24,
 							8, 3, 0x9e, 1);
-			/* Twice sequencial need status 0xff->0x9e->0x9d */
+			/* Twice sequential need status 0xff->0x9e->0x9d */
 			spca504A_acknowledged_command(gspca_dev, 0x24,
 							8, 3, 0x9e, 0);
 			spca504A_acknowledged_command(gspca_dev, 0x24,
diff --git a/include/linux/jffs2.h b/include/linux/jffs2.h
index 0874ab59ffe..edb9231f189 100644
--- a/include/linux/jffs2.h
+++ b/include/linux/jffs2.h
@@ -185,7 +185,7 @@ struct jffs2_raw_xref
 	jint32_t hdr_crc;
 	jint32_t ino;		/* inode number */
 	jint32_t xid;		/* XATTR identifier number */
-	jint32_t xseqno;	/* xref sequencial number */
+	jint32_t xseqno;	/* xref sequential number */
 	jint32_t node_crc;
 } __attribute__((packed));
 
-- 
cgit v1.2.3-70-g09d2


From ab0cfb928a3839e21942a28a86ad88e56ea3b136 Mon Sep 17 00:00:00 2001
From: Suresh Jayaraman <sjayaraman@suse.de>
Date: Tue, 6 Jul 2010 18:12:39 +0530
Subject: fscache: fix a trivial typo in the comment

Signed-off-by: Suresh Jayaraman <sjayaraman@suse.de>
Acked-by: David Howells <dhowells@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 include/linux/fscache.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/fscache.h b/include/linux/fscache.h
index 595ce49288b..1c0fc3e62f4 100644
--- a/include/linux/fscache.h
+++ b/include/linux/fscache.h
@@ -85,7 +85,7 @@ struct fscache_cookie_def {
 
 	/* get an index key
 	 * - should store the key data in the buffer
-	 * - should return the amount of amount stored
+	 * - should return the amount of data stored
 	 * - not permitted to return an error
 	 * - the netfs data from the cookie being used as the source is
 	 *   presented
-- 
cgit v1.2.3-70-g09d2


From 49a3df804bec09b8ee8196f79b81757e95cc6de4 Mon Sep 17 00:00:00 2001
From: Suresh Jayaraman <sjayaraman@suse.de>
Date: Tue, 6 Jul 2010 18:29:45 +0530
Subject: fscache: fix missing kerneldoc annotation

.. and make kerneldoc scripts happy.

Signed-off-by: Suresh Jayaraman <sjayaraman@suse.de>
Acked-by: David Howells <dhowells@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 include/linux/fscache.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/fscache.h b/include/linux/fscache.h
index 1c0fc3e62f4..ec0dad5ab90 100644
--- a/include/linux/fscache.h
+++ b/include/linux/fscache.h
@@ -454,6 +454,7 @@ int fscache_read_or_alloc_page(struct fscache_cookie *cookie,
  * @cookie: The cookie representing the cache object
  * @mapping: The netfs inode mapping to which the pages will be attached
  * @pages: A list of potential netfs pages to be filled
+ * @nr_pages: Number of pages to be read and/or allocated
  * @end_io_func: The callback to invoke when and if each page is filled
  * @context: An arbitrary piece of data to pass on to end_io_func()
  * @gfp: The conditions under which memory allocation should be made
-- 
cgit v1.2.3-70-g09d2


From a1d75f258230b75d46aecdf28b2e732413028863 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Mon, 12 Jul 2010 14:41:40 +0200
Subject: fuse: add store request

Userspace filesystem can request data to be stored in the inode's
mapping.  This request is synchronous and has no reply.  If the write
to the fuse device returns an error then the store request was not
fully completed (but may have updated some pages).

If the stored data overflows the current file size, then the size is
extended, similarly to a write(2) on the filesystem.

Pages which have been completely stored are marked uptodate.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 fs/fuse/dev.c        | 88 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/fuse/file.c       |  2 +-
 fs/fuse/fuse_i.h     |  2 ++
 include/linux/fuse.h | 13 +++++++-
 4 files changed, 103 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 7eb80d33c4f..8e01c865586 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -1231,6 +1231,91 @@ err:
 	return err;
 }
 
+static int fuse_notify_store(struct fuse_conn *fc, unsigned int size,
+			     struct fuse_copy_state *cs)
+{
+	struct fuse_notify_store_out outarg;
+	struct inode *inode;
+	struct address_space *mapping;
+	u64 nodeid;
+	int err;
+	pgoff_t index;
+	unsigned int offset;
+	unsigned int num;
+	loff_t file_size;
+	loff_t end;
+
+	err = -EINVAL;
+	if (size < sizeof(outarg))
+		goto out_finish;
+
+	err = fuse_copy_one(cs, &outarg, sizeof(outarg));
+	if (err)
+		goto out_finish;
+
+	err = -EINVAL;
+	if (size - sizeof(outarg) != outarg.size)
+		goto out_finish;
+
+	nodeid = outarg.nodeid;
+
+	down_read(&fc->killsb);
+
+	err = -ENOENT;
+	if (!fc->sb)
+		goto out_up_killsb;
+
+	inode = ilookup5(fc->sb, nodeid, fuse_inode_eq, &nodeid);
+	if (!inode)
+		goto out_up_killsb;
+
+	mapping = inode->i_mapping;
+	index = outarg.offset >> PAGE_CACHE_SHIFT;
+	offset = outarg.offset & ~PAGE_CACHE_MASK;
+	file_size = i_size_read(inode);
+	end = outarg.offset + outarg.size;
+	if (end > file_size) {
+		file_size = end;
+		fuse_write_update_size(inode, file_size);
+	}
+
+	num = outarg.size;
+	while (num) {
+		struct page *page;
+		unsigned int this_num;
+
+		err = -ENOMEM;
+		page = find_or_create_page(mapping, index,
+					   mapping_gfp_mask(mapping));
+		if (!page)
+			goto out_iput;
+
+		this_num = min_t(unsigned, num, PAGE_CACHE_SIZE - offset);
+		err = fuse_copy_page(cs, &page, offset, this_num, 0);
+		if (!err && offset == 0 && (num != 0 || file_size == end))
+			SetPageUptodate(page);
+		unlock_page(page);
+		page_cache_release(page);
+
+		if (err)
+			goto out_iput;
+
+		num -= this_num;
+		offset = 0;
+		index++;
+	}
+
+	err = 0;
+
+out_iput:
+	iput(inode);
+out_up_killsb:
+	up_read(&fc->killsb);
+out_finish:
+	fuse_copy_finish(cs);
+	return err;
+}
+
 static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code,
 		       unsigned int size, struct fuse_copy_state *cs)
 {
@@ -1244,6 +1329,9 @@ static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code,
 	case FUSE_NOTIFY_INVAL_ENTRY:
 		return fuse_notify_inval_entry(fc, size, cs);
 
+	case FUSE_NOTIFY_STORE:
+		return fuse_notify_store(fc, size, cs);
+
 	default:
 		fuse_copy_finish(cs);
 		return -EINVAL;
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index ada0adeb3bb..147c1f71bdb 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -706,7 +706,7 @@ static int fuse_write_begin(struct file *file, struct address_space *mapping,
 	return 0;
 }
 
-static void fuse_write_update_size(struct inode *inode, loff_t pos)
+void fuse_write_update_size(struct inode *inode, loff_t pos)
 {
 	struct fuse_conn *fc = get_fuse_conn(inode);
 	struct fuse_inode *fi = get_fuse_inode(inode);
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 8f309f04064..61267d8d527 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -748,4 +748,6 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
 unsigned fuse_file_poll(struct file *file, poll_table *wait);
 int fuse_dev_release(struct inode *inode, struct file *file);
 
+void fuse_write_update_size(struct inode *inode, loff_t pos);
+
 #endif /* _FS_FUSE_I_H */
diff --git a/include/linux/fuse.h b/include/linux/fuse.h
index 88e0eb59691..a90bd49834a 100644
--- a/include/linux/fuse.h
+++ b/include/linux/fuse.h
@@ -37,6 +37,9 @@
  *
  * 7.14
  *  - add splice support to fuse device
+ *
+ * 7.15
+ *  - add store notify
  */
 
 #ifndef _LINUX_FUSE_H
@@ -68,7 +71,7 @@
 #define FUSE_KERNEL_VERSION 7
 
 /** Minor version number of this interface */
-#define FUSE_KERNEL_MINOR_VERSION 14
+#define FUSE_KERNEL_MINOR_VERSION 15
 
 /** The node ID of the root inode */
 #define FUSE_ROOT_ID 1
@@ -260,6 +263,7 @@ enum fuse_notify_code {
 	FUSE_NOTIFY_POLL   = 1,
 	FUSE_NOTIFY_INVAL_INODE = 2,
 	FUSE_NOTIFY_INVAL_ENTRY = 3,
+	FUSE_NOTIFY_STORE = 4,
 	FUSE_NOTIFY_CODE_MAX,
 };
 
@@ -568,4 +572,11 @@ struct fuse_notify_inval_entry_out {
 	__u32	padding;
 };
 
+struct fuse_notify_store_out {
+	__u64	nodeid;
+	__u64	offset;
+	__u32	size;
+	__u32	padding;
+};
+
 #endif /* _LINUX_FUSE_H */
-- 
cgit v1.2.3-70-g09d2


From 2d45ba381a74a743eeaa2b06c7c5c0d2bf73ba1a Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Mon, 12 Jul 2010 14:41:40 +0200
Subject: fuse: add retrieve request

Userspace filesystem can request data to be retrieved from the inode's
mapping.  This request is synchronous and the retrieved data is queued
as a new request.  If the write to the fuse device returns an error
then the retrieve request was not completed and a reply will not be
sent.

Only present pages are returned in the retrieve reply.  Retrieving
stops when it finds a non-present page and only data prior to that is
returned.

This request doesn't change the dirty state of pages.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 fs/fuse/dev.c        | 131 ++++++++++++++++++++++++++++++++++++++++++++++++++-
 fs/fuse/fuse_i.h     |   1 +
 include/linux/fuse.h |  21 +++++++++
 3 files changed, 152 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 8e01c865586..69ad053ffd7 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -239,7 +239,6 @@ static u64 fuse_get_unique(struct fuse_conn *fc)
 
 static void queue_request(struct fuse_conn *fc, struct fuse_req *req)
 {
-	req->in.h.unique = fuse_get_unique(fc);
 	req->in.h.len = sizeof(struct fuse_in_header) +
 		len_args(req->in.numargs, (struct fuse_arg *) req->in.args);
 	list_add_tail(&req->list, &fc->pending);
@@ -261,6 +260,7 @@ static void flush_bg_queue(struct fuse_conn *fc)
 		req = list_entry(fc->bg_queue.next, struct fuse_req, list);
 		list_del(&req->list);
 		fc->active_background++;
+		req->in.h.unique = fuse_get_unique(fc);
 		queue_request(fc, req);
 	}
 }
@@ -398,6 +398,7 @@ void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
 	else if (fc->conn_error)
 		req->out.h.error = -ECONNREFUSED;
 	else {
+		req->in.h.unique = fuse_get_unique(fc);
 		queue_request(fc, req);
 		/* acquire extra reference, since request is still needed
 		   after request_end() */
@@ -450,6 +451,23 @@ void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req)
 }
 EXPORT_SYMBOL_GPL(fuse_request_send_background);
 
+static int fuse_request_send_notify_reply(struct fuse_conn *fc,
+					  struct fuse_req *req, u64 unique)
+{
+	int err = -ENODEV;
+
+	req->isreply = 0;
+	req->in.h.unique = unique;
+	spin_lock(&fc->lock);
+	if (fc->connected) {
+		queue_request(fc, req);
+		err = 0;
+	}
+	spin_unlock(&fc->lock);
+
+	return err;
+}
+
 /*
  * Called under fc->lock
  *
@@ -1316,6 +1334,114 @@ out_finish:
 	return err;
 }
 
+static void fuse_retrieve_end(struct fuse_conn *fc, struct fuse_req *req)
+{
+	int i;
+
+	for (i = 0; i < req->num_pages; i++) {
+		struct page *page = req->pages[i];
+		page_cache_release(page);
+	}
+}
+
+static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
+			 struct fuse_notify_retrieve_out *outarg)
+{
+	int err;
+	struct address_space *mapping = inode->i_mapping;
+	struct fuse_req *req;
+	pgoff_t index;
+	loff_t file_size;
+	unsigned int num;
+	unsigned int offset;
+	size_t total_len;
+
+	req = fuse_get_req(fc);
+	if (IS_ERR(req))
+		return PTR_ERR(req);
+
+	offset = outarg->offset & ~PAGE_CACHE_MASK;
+
+	req->in.h.opcode = FUSE_NOTIFY_REPLY;
+	req->in.h.nodeid = outarg->nodeid;
+	req->in.numargs = 2;
+	req->in.argpages = 1;
+	req->page_offset = offset;
+	req->end = fuse_retrieve_end;
+
+	index = outarg->offset >> PAGE_CACHE_SHIFT;
+	file_size = i_size_read(inode);
+	num = outarg->size;
+	if (outarg->offset > file_size)
+		num = 0;
+	else if (outarg->offset + num > file_size)
+		num = file_size - outarg->offset;
+
+	while (num) {
+		struct page *page;
+		unsigned int this_num;
+
+		page = find_get_page(mapping, index);
+		if (!page)
+			break;
+
+		this_num = min_t(unsigned, num, PAGE_CACHE_SIZE - offset);
+		req->pages[req->num_pages] = page;
+		req->num_pages++;
+
+		num -= this_num;
+		total_len += this_num;
+	}
+	req->misc.retrieve_in.offset = outarg->offset;
+	req->misc.retrieve_in.size = total_len;
+	req->in.args[0].size = sizeof(req->misc.retrieve_in);
+	req->in.args[0].value = &req->misc.retrieve_in;
+	req->in.args[1].size = total_len;
+
+	err = fuse_request_send_notify_reply(fc, req, outarg->notify_unique);
+	if (err)
+		fuse_retrieve_end(fc, req);
+
+	return err;
+}
+
+static int fuse_notify_retrieve(struct fuse_conn *fc, unsigned int size,
+				struct fuse_copy_state *cs)
+{
+	struct fuse_notify_retrieve_out outarg;
+	struct inode *inode;
+	int err;
+
+	err = -EINVAL;
+	if (size != sizeof(outarg))
+		goto copy_finish;
+
+	err = fuse_copy_one(cs, &outarg, sizeof(outarg));
+	if (err)
+		goto copy_finish;
+
+	fuse_copy_finish(cs);
+
+	down_read(&fc->killsb);
+	err = -ENOENT;
+	if (fc->sb) {
+		u64 nodeid = outarg.nodeid;
+
+		inode = ilookup5(fc->sb, nodeid, fuse_inode_eq, &nodeid);
+		if (inode) {
+			err = fuse_retrieve(fc, inode, &outarg);
+			iput(inode);
+		}
+	}
+	up_read(&fc->killsb);
+
+	return err;
+
+copy_finish:
+	fuse_copy_finish(cs);
+	return err;
+}
+
 static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code,
 		       unsigned int size, struct fuse_copy_state *cs)
 {
@@ -1332,6 +1458,9 @@ static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code,
 	case FUSE_NOTIFY_STORE:
 		return fuse_notify_store(fc, size, cs);
 
+	case FUSE_NOTIFY_RETRIEVE:
+		return fuse_notify_retrieve(fc, size, cs);
+
 	default:
 		fuse_copy_finish(cs);
 		return -EINVAL;
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 61267d8d527..57d4a3a0f10 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -272,6 +272,7 @@ struct fuse_req {
 			struct fuse_write_in in;
 			struct fuse_write_out out;
 		} write;
+		struct fuse_notify_retrieve_in retrieve_in;
 		struct fuse_lk_in lk_in;
 	} misc;
 
diff --git a/include/linux/fuse.h b/include/linux/fuse.h
index a90bd49834a..c3c578e0983 100644
--- a/include/linux/fuse.h
+++ b/include/linux/fuse.h
@@ -40,6 +40,7 @@
  *
  * 7.15
  *  - add store notify
+ *  - add retrieve notify
  */
 
 #ifndef _LINUX_FUSE_H
@@ -254,6 +255,7 @@ enum fuse_opcode {
 	FUSE_DESTROY       = 38,
 	FUSE_IOCTL         = 39,
 	FUSE_POLL          = 40,
+	FUSE_NOTIFY_REPLY  = 41,
 
 	/* CUSE specific operations */
 	CUSE_INIT          = 4096,
@@ -264,6 +266,7 @@ enum fuse_notify_code {
 	FUSE_NOTIFY_INVAL_INODE = 2,
 	FUSE_NOTIFY_INVAL_ENTRY = 3,
 	FUSE_NOTIFY_STORE = 4,
+	FUSE_NOTIFY_RETRIEVE = 5,
 	FUSE_NOTIFY_CODE_MAX,
 };
 
@@ -579,4 +582,22 @@ struct fuse_notify_store_out {
 	__u32	padding;
 };
 
+struct fuse_notify_retrieve_out {
+	__u64	notify_unique;
+	__u64	nodeid;
+	__u64	offset;
+	__u32	size;
+	__u32	padding;
+};
+
+/* Matches the size of fuse_write_in */
+struct fuse_notify_retrieve_in {
+	__u64	dummy1;
+	__u64	offset;
+	__u32	size;
+	__u32	dummy2;
+	__u64	dummy3;
+	__u64	dummy4;
+};
+
 #endif /* _LINUX_FUSE_H */
-- 
cgit v1.2.3-70-g09d2


From 9874647ba1bdf3e1af25e079070a00676f60f2f0 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Thu, 8 Jul 2010 00:43:36 +0200
Subject: ACPI / ACPICA: Do not execute _PRW methods during initialization

Currently, during initialization ACPICA walks the entire ACPI
namespace in search of any device objects with assciated _PRW
methods.  All of the _PRW methods found are executed in the process
to extract the GPE information returned by them, so that the GPEs in
question can be marked as "able to wakeup" (more precisely, the
ACPI_GPE_CAN_WAKE flag is set for them).  The only purpose of this
exercise is to avoid enabling the CAN_WAKE GPEs automatically, even
if there are _Lxx/_Exx methods associated with them.  However, it is
both costly and unnecessary, because the host OS has to execute the
_PRW methods anyway to check which devices can wake up the system
from sleep states.  Moreover, it then uses full information
returned by _PRW, including the GPE information, so it can take care
of disabling the GPEs if necessary.

Remove the code that walks the namespace and executes _PRW from
ACPICA and modify comments to reflect that change.  Make
acpi_bus_set_run_wake_flags() disable GPEs for wakeup devices
so that they don't cause spurious wakeup events to be signaled.
This not only reduces the complexity of the ACPICA initialization
code, but in some cases it should reduce the kernel boot time as
well.

Unfortunately, for this purpose we need a new ACPICA function,
acpi_gpe_can_wake(), to be called by the host OS in order to disable
the GPEs that can wake up the system and were previously enabled by
acpi_ev_initialize_gpe_block() or acpi_ev_update_gpes() (such a GPE
should be disabled only once, because the initialization code enables
it only once, but it may be pointed to by _PRW for multiple devices
and that's why the additional function is necessary).

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/acpica/acglobal.h  |   7 --
 drivers/acpi/acpica/evevent.c   |   5 +-
 drivers/acpi/acpica/evgpeblk.c  |  47 +-------
 drivers/acpi/acpica/evgpeinit.c | 235 +++-------------------------------------
 drivers/acpi/acpica/evxfevnt.c  |  55 +++++++++-
 drivers/acpi/acpica/exconfig.c  |   2 +-
 drivers/acpi/acpica/utxface.c   |   8 +-
 drivers/acpi/osl.c              |  20 ----
 drivers/acpi/scan.c             |   2 +
 include/acpi/acpixf.h           |   3 +-
 10 files changed, 85 insertions(+), 299 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/acpica/acglobal.h b/drivers/acpi/acpica/acglobal.h
index 899d68afc3c..18e796fe429 100644
--- a/drivers/acpi/acpica/acglobal.h
+++ b/drivers/acpi/acpica/acglobal.h
@@ -99,13 +99,6 @@ u8 ACPI_INIT_GLOBAL(acpi_gbl_all_methods_serialized, FALSE);
  */
 u8 ACPI_INIT_GLOBAL(acpi_gbl_create_osi_method, TRUE);
 
-/*
- * Disable wakeup GPEs during runtime? Default is TRUE because WAKE and
- * RUNTIME GPEs should never be shared, and WAKE GPEs should typically only
- * be enabled just before going to sleep.
- */
-u8 ACPI_INIT_GLOBAL(acpi_gbl_leave_wake_gpes_disabled, TRUE);
-
 /*
  * Optionally use default values for the ACPI register widths. Set this to
  * TRUE to use the defaults, if an FADT contains incorrect widths/lengths.
diff --git a/drivers/acpi/acpica/evevent.c b/drivers/acpi/acpica/evevent.c
index f5795915a2e..303618889da 100644
--- a/drivers/acpi/acpica/evevent.c
+++ b/drivers/acpi/acpica/evevent.c
@@ -102,9 +102,8 @@ acpi_status acpi_ev_initialize_events(void)
  * RETURN:      Status
  *
  * DESCRIPTION: Completes initialization of the FADT-defined GPE blocks
- *              (0 and 1). This causes the _PRW methods to be run, so the HW
- *              must be fully initialized at this point, including global lock
- *              support.
+ *              (0 and 1). The HW must be fully initialized at this point,
+ *              including global lock support.
  *
  ******************************************************************************/
 
diff --git a/drivers/acpi/acpica/evgpeblk.c b/drivers/acpi/acpica/evgpeblk.c
index 0c6f3f878eb..12ca1bc5f1f 100644
--- a/drivers/acpi/acpica/evgpeblk.c
+++ b/drivers/acpi/acpica/evgpeblk.c
@@ -439,8 +439,6 @@ acpi_ev_initialize_gpe_block(struct acpi_namespace_node *gpe_device,
 {
 	acpi_status status;
 	struct acpi_gpe_event_info *gpe_event_info;
-	struct acpi_gpe_walk_info walk_info;
-	u32 wake_gpe_count;
 	u32 gpe_enabled_count;
 	u32 gpe_index;
 	u32 gpe_number;
@@ -456,37 +454,9 @@ acpi_ev_initialize_gpe_block(struct acpi_namespace_node *gpe_device,
 	}
 
 	/*
-	 * Runtime option: Should wake GPEs be enabled at runtime?  The default
-	 * is no, they should only be enabled just as the machine goes to sleep.
+	 * Enable all GPEs that have a corresponding method.  Any other GPEs
+	 * within this block must be enabled via the acpi_enable_gpe interface.
 	 */
-	if (acpi_gbl_leave_wake_gpes_disabled) {
-		/*
-		 * Differentiate runtime vs wake GPEs, via the _PRW control methods.
-		 * Each GPE that has one or more _PRWs that reference it is by
-		 * definition a wake GPE and will not be enabled while the machine
-		 * is running.
-		 */
-		walk_info.gpe_block = gpe_block;
-		walk_info.gpe_device = gpe_device;
-		walk_info.execute_by_owner_id = FALSE;
-
-		status =
-		    acpi_ns_walk_namespace(ACPI_TYPE_DEVICE, ACPI_ROOT_OBJECT,
-					   ACPI_UINT32_MAX, ACPI_NS_WALK_UNLOCK,
-					   acpi_ev_match_prw_and_gpe, NULL,
-					   &walk_info, NULL);
-		if (ACPI_FAILURE(status)) {
-			ACPI_EXCEPTION((AE_INFO, status,
-					"While executing _PRW methods"));
-		}
-	}
-
-	/*
-	 * Enable all GPEs that have a corresponding method and are not
-	 * capable of generating wakeups. Any other GPEs within this block
-	 * must be enabled via the acpi_enable_gpe interface.
-	 */
-	wake_gpe_count = 0;
 	gpe_enabled_count = 0;
 
 	if (gpe_device == acpi_gbl_fadt_gpe_device) {
@@ -512,13 +482,6 @@ acpi_ev_initialize_gpe_block(struct acpi_namespace_node *gpe_device,
 				goto enabled;
 			}
 
-			if (gpe_event_info->flags & ACPI_GPE_CAN_WAKE) {
-				wake_gpe_count++;
-				if (acpi_gbl_leave_wake_gpes_disabled) {
-					continue;
-				}
-			}
-
 			/* Ignore GPEs that have no corresponding _Lxx/_Exx method */
 
 			if (!(gpe_event_info->flags & ACPI_GPE_DISPATCH_METHOD)) {
@@ -540,10 +503,10 @@ acpi_ev_initialize_gpe_block(struct acpi_namespace_node *gpe_device,
 		}
 	}
 
-	if (gpe_enabled_count || wake_gpe_count) {
+	if (gpe_enabled_count) {
 		ACPI_DEBUG_PRINT((ACPI_DB_INIT,
-				  "Enabled %u Runtime GPEs, added %u Wake GPEs in this block\n",
-				  gpe_enabled_count, wake_gpe_count));
+				  "Enabled %u GPEs in this block\n",
+				  gpe_enabled_count));
 	}
 
 	return_ACPI_STATUS(AE_OK);
diff --git a/drivers/acpi/acpica/evgpeinit.c b/drivers/acpi/acpica/evgpeinit.c
index 8db9e076a53..3084c5de1bb 100644
--- a/drivers/acpi/acpica/evgpeinit.c
+++ b/drivers/acpi/acpica/evgpeinit.c
@@ -211,9 +211,7 @@ acpi_status acpi_ev_gpe_initialize(void)
  * DESCRIPTION: Check for new GPE methods (_Lxx/_Exx) made available as a
  *              result of a Load() or load_table() operation. If new GPE
  *              methods have been installed, register the new methods and
- *              enable and runtime GPEs that are associated with them. Also,
- *              run any newly loaded _PRW methods in order to discover any
- *              new CAN_WAKE GPEs.
+ *              enable and runtime GPEs that are associated with them.
  *
  ******************************************************************************/
 
@@ -223,49 +221,12 @@ void acpi_ev_update_gpes(acpi_owner_id table_owner_id)
 	struct acpi_gpe_block_info *gpe_block;
 	struct acpi_gpe_walk_info walk_info;
 	acpi_status status = AE_OK;
-	u32 new_wake_gpe_count = 0;
-
-	/* We will examine only _PRW/_Lxx/_Exx methods owned by this table */
-
-	walk_info.owner_id = table_owner_id;
-	walk_info.execute_by_owner_id = TRUE;
-	walk_info.count = 0;
-
-	if (acpi_gbl_leave_wake_gpes_disabled) {
-		/*
-		 * 1) Run any newly-loaded _PRW methods to find any GPEs that
-		 * can now be marked as CAN_WAKE GPEs. Note: We must run the
-		 * _PRW methods before we process the _Lxx/_Exx methods because
-		 * we will enable all runtime GPEs associated with the new
-		 * _Lxx/_Exx methods at the time we process those methods.
-		 *
-		 * Unlock interpreter so that we can run the _PRW methods.
-		 */
-		walk_info.gpe_block = NULL;
-		walk_info.gpe_device = NULL;
-
-		acpi_ex_exit_interpreter();
-
-		status =
-		    acpi_ns_walk_namespace(ACPI_TYPE_DEVICE, ACPI_ROOT_OBJECT,
-					   ACPI_UINT32_MAX,
-					   ACPI_NS_WALK_NO_UNLOCK,
-					   acpi_ev_match_prw_and_gpe, NULL,
-					   &walk_info, NULL);
-		if (ACPI_FAILURE(status)) {
-			ACPI_EXCEPTION((AE_INFO, status,
-					"While executing _PRW methods"));
-		}
-
-		acpi_ex_enter_interpreter();
-		new_wake_gpe_count = walk_info.count;
-	}
 
 	/*
 	 * 2) Find any _Lxx/_Exx GPE methods that have just been loaded.
 	 *
-	 * Any GPEs that correspond to new _Lxx/_Exx methods and are not
-	 * marked as CAN_WAKE are immediately enabled.
+	 * Any GPEs that correspond to new _Lxx/_Exx methods are immediately
+	 * enabled.
 	 *
 	 * Examine the namespace underneath each gpe_device within the
 	 * gpe_block lists.
@@ -275,6 +236,8 @@ void acpi_ev_update_gpes(acpi_owner_id table_owner_id)
 		return;
 	}
 
+	walk_info.owner_id = table_owner_id;
+	walk_info.execute_by_owner_id = TRUE;
 	walk_info.count = 0;
 	walk_info.enable_this_gpe = TRUE;
 
@@ -307,10 +270,8 @@ void acpi_ev_update_gpes(acpi_owner_id table_owner_id)
 		gpe_xrupt_info = gpe_xrupt_info->next;
 	}
 
-	if (walk_info.count || new_wake_gpe_count) {
-		ACPI_INFO((AE_INFO,
-			   "Enabled %u new runtime GPEs, added %u new wakeup GPEs",
-			   walk_info.count, new_wake_gpe_count));
+	if (walk_info.count) {
+		ACPI_INFO((AE_INFO, "Enabled %u new GPEs", walk_info.count));
 	}
 
 	(void)acpi_ut_release_mutex(ACPI_MTX_EVENTS);
@@ -386,9 +347,6 @@ acpi_ev_match_gpe_method(acpi_handle obj_handle,
 	/*
 	 * 3) Edge/Level determination is based on the 2nd character
 	 *    of the method name
-	 *
-	 * NOTE: Default GPE type is RUNTIME only. Later, if a _PRW object is
-	 * found that points to this GPE, the ACPI_GPE_CAN_WAKE flag is set.
 	 */
 	switch (name[1]) {
 	case 'L':
@@ -471,23 +429,18 @@ acpi_ev_match_gpe_method(acpi_handle obj_handle,
 	 */
 	if (walk_info->enable_this_gpe) {
 
-		/* Ignore GPEs that can wake the system */
+		walk_info->count++;
+		gpe_device = walk_info->gpe_device;
 
-		if (!(gpe_event_info->flags & ACPI_GPE_CAN_WAKE) ||
-		    !acpi_gbl_leave_wake_gpes_disabled) {
-			walk_info->count++;
-			gpe_device = walk_info->gpe_device;
-
-			if (gpe_device == acpi_gbl_fadt_gpe_device) {
-				gpe_device = NULL;
-			}
+		if (gpe_device == acpi_gbl_fadt_gpe_device) {
+			gpe_device = NULL;
+		}
 
-			status = acpi_enable_gpe(gpe_device, gpe_number);
-			if (ACPI_FAILURE(status)) {
-				ACPI_EXCEPTION((AE_INFO, status,
-						"Could not enable GPE 0x%02X",
-						gpe_number));
-			}
+		status = acpi_enable_gpe(gpe_device, gpe_number);
+		if (ACPI_FAILURE(status)) {
+			ACPI_EXCEPTION((AE_INFO, status,
+					"Could not enable GPE 0x%02X",
+					gpe_number));
 		}
 	}
 
@@ -496,157 +449,3 @@ acpi_ev_match_gpe_method(acpi_handle obj_handle,
 			  name, gpe_number));
 	return_ACPI_STATUS(AE_OK);
 }
-
-/*******************************************************************************
- *
- * FUNCTION:    acpi_ev_match_prw_and_gpe
- *
- * PARAMETERS:  Callback from walk_namespace
- *
- * RETURN:      Status. NOTE: We ignore errors so that the _PRW walk is
- *              not aborted on a single _PRW failure.
- *
- * DESCRIPTION: Called from acpi_walk_namespace. Expects each object to be a
- *              Device. Run the _PRW method. If present, extract the GPE
- *              number and mark the GPE as a CAN_WAKE GPE. Allows a
- *              per-owner_id execution if execute_by_owner_id is TRUE in the
- *              walk_info parameter block.
- *
- * If walk_info->execute_by_owner_id is TRUE, we only execute _PRWs with that
- *    owner.
- * If walk_info->gpe_device is NULL, we execute every _PRW found. Otherwise,
- *    we only execute _PRWs that refer to the input gpe_device.
- *
- ******************************************************************************/
-
-acpi_status
-acpi_ev_match_prw_and_gpe(acpi_handle obj_handle,
-			  u32 level, void *context, void **return_value)
-{
-	struct acpi_gpe_walk_info *walk_info =
-	    ACPI_CAST_PTR(struct acpi_gpe_walk_info, context);
-	struct acpi_namespace_node *gpe_device;
-	struct acpi_gpe_block_info *gpe_block;
-	struct acpi_namespace_node *target_gpe_device;
-	struct acpi_namespace_node *prw_node;
-	struct acpi_gpe_event_info *gpe_event_info;
-	union acpi_operand_object *pkg_desc;
-	union acpi_operand_object *obj_desc;
-	u32 gpe_number;
-	acpi_status status;
-
-	ACPI_FUNCTION_TRACE(ev_match_prw_and_gpe);
-
-	/* Check for a _PRW method under this device */
-
-	status = acpi_ns_get_node(obj_handle, METHOD_NAME__PRW,
-				  ACPI_NS_NO_UPSEARCH, &prw_node);
-	if (ACPI_FAILURE(status)) {
-		return_ACPI_STATUS(AE_OK);
-	}
-
-	/* Check if requested owner_id matches this owner_id */
-
-	if ((walk_info->execute_by_owner_id) &&
-	    (prw_node->owner_id != walk_info->owner_id)) {
-		return_ACPI_STATUS(AE_OK);
-	}
-
-	/* Execute the _PRW */
-
-	status = acpi_ut_evaluate_object(prw_node, NULL,
-					 ACPI_BTYPE_PACKAGE, &pkg_desc);
-	if (ACPI_FAILURE(status)) {
-		return_ACPI_STATUS(AE_OK);
-	}
-
-	/* The returned _PRW package must have at least two elements */
-
-	if (pkg_desc->package.count < 2) {
-		goto cleanup;
-	}
-
-	/* Extract pointers from the input context */
-
-	gpe_device = walk_info->gpe_device;
-	gpe_block = walk_info->gpe_block;
-
-	/*
-	 * The _PRW object must return a package, we are only interested
-	 * in the first element
-	 */
-	obj_desc = pkg_desc->package.elements[0];
-
-	if (obj_desc->common.type == ACPI_TYPE_INTEGER) {
-
-		/* Use FADT-defined GPE device (from definition of _PRW) */
-
-		target_gpe_device = NULL;
-		if (gpe_device) {
-			target_gpe_device = acpi_gbl_fadt_gpe_device;
-		}
-
-		/* Integer is the GPE number in the FADT described GPE blocks */
-
-		gpe_number = (u32)obj_desc->integer.value;
-	} else if (obj_desc->common.type == ACPI_TYPE_PACKAGE) {
-
-		/* Package contains a GPE reference and GPE number within a GPE block */
-
-		if ((obj_desc->package.count < 2) ||
-		    ((obj_desc->package.elements[0])->common.type !=
-		     ACPI_TYPE_LOCAL_REFERENCE) ||
-		    ((obj_desc->package.elements[1])->common.type !=
-		     ACPI_TYPE_INTEGER)) {
-			goto cleanup;
-		}
-
-		/* Get GPE block reference and decode */
-
-		target_gpe_device =
-		    obj_desc->package.elements[0]->reference.node;
-		gpe_number = (u32)obj_desc->package.elements[1]->integer.value;
-	} else {
-		/* Unknown type, just ignore it */
-
-		goto cleanup;
-	}
-
-	/* Get the gpe_event_info for this GPE */
-
-	if (gpe_device) {
-		/*
-		 * Is this GPE within this block?
-		 *
-		 * TRUE if and only if these conditions are true:
-		 *     1) The GPE devices match.
-		 *     2) The GPE index(number) is within the range of the Gpe Block
-		 *          associated with the GPE device.
-		 */
-		if (gpe_device != target_gpe_device) {
-			goto cleanup;
-		}
-
-		gpe_event_info =
-		    acpi_ev_low_get_gpe_info(gpe_number, gpe_block);
-	} else {
-		/* gpe_device is NULL, just match the target_device and gpe_number */
-
-		gpe_event_info =
-		    acpi_ev_get_gpe_event_info(target_gpe_device, gpe_number);
-	}
-
-	if (gpe_event_info) {
-		if (!(gpe_event_info->flags & ACPI_GPE_CAN_WAKE)) {
-
-			/* This GPE can wake the system */
-
-			gpe_event_info->flags |= ACPI_GPE_CAN_WAKE;
-			walk_info->count++;
-		}
-	}
-
-      cleanup:
-	acpi_ut_remove_reference(pkg_desc);
-	return_ACPI_STATUS(AE_OK);
-}
diff --git a/drivers/acpi/acpica/evxfevnt.c b/drivers/acpi/acpica/evxfevnt.c
index fda5b44a556..bd06fad83e7 100644
--- a/drivers/acpi/acpica/evxfevnt.c
+++ b/drivers/acpi/acpica/evxfevnt.c
@@ -387,6 +387,59 @@ unlock_and_exit:
 }
 ACPI_EXPORT_SYMBOL(acpi_disable_gpe)
 
+/*******************************************************************************
+ *
+ * FUNCTION:    acpi_gpe_can_wake
+ *
+ * PARAMETERS:  gpe_device      - Parent GPE Device. NULL for GPE0/GPE1
+ *              gpe_number      - GPE level within the GPE block
+ *
+ * RETURN:      Status
+ *
+ * DESCRIPTION: Set the ACPI_GPE_CAN_WAKE flag for the given GPE.  If the GPE
+ *              has a corresponding method and is currently enabled, disable it
+ *              (GPEs with corresponding methods are enabled unconditionally
+ *              during initialization, but GPEs that can wake up are expected
+ *              to be initially disabled).
+ *
+ ******************************************************************************/
+acpi_status acpi_gpe_can_wake(acpi_handle gpe_device, u32 gpe_number)
+{
+	acpi_status status = AE_OK;
+	struct acpi_gpe_event_info *gpe_event_info;
+	acpi_cpu_flags flags;
+	u8 disable = 0;
+
+	ACPI_FUNCTION_TRACE(acpi_gpe_can_wake);
+
+	flags = acpi_os_acquire_lock(acpi_gbl_gpe_lock);
+
+	/* Ensure that we have a valid GPE number */
+
+	gpe_event_info = acpi_ev_get_gpe_event_info(gpe_device, gpe_number);
+	if (!gpe_event_info) {
+		status = AE_BAD_PARAMETER;
+		goto unlock_and_exit;
+	}
+
+	if (gpe_event_info->flags & ACPI_GPE_CAN_WAKE) {
+		goto unlock_and_exit;
+	}
+
+	gpe_event_info->flags |= ACPI_GPE_CAN_WAKE;
+	disable = (gpe_event_info->flags & ACPI_GPE_DISPATCH_METHOD)
+		&& gpe_event_info->runtime_count;
+
+unlock_and_exit:
+	acpi_os_release_lock(acpi_gbl_gpe_lock, flags);
+
+	if (disable)
+		status = acpi_disable_gpe(gpe_device, gpe_number);
+
+	return_ACPI_STATUS(status);
+}
+ACPI_EXPORT_SYMBOL(acpi_gpe_can_wake)
+
 /*******************************************************************************
  *
  * FUNCTION:    acpi_disable_event
@@ -703,7 +756,7 @@ acpi_install_gpe_block(acpi_handle gpe_device,
 
 	obj_desc->device.gpe_block = gpe_block;
 
-	/* Run the _PRW methods and enable the runtime GPEs in the new block */
+	/* Enable the runtime GPEs in the new block */
 
 	status = acpi_ev_initialize_gpe_block(node, gpe_block);
 
diff --git a/drivers/acpi/acpica/exconfig.c b/drivers/acpi/acpica/exconfig.c
index 008621c5ad8..18832205b63 100644
--- a/drivers/acpi/acpica/exconfig.c
+++ b/drivers/acpi/acpica/exconfig.c
@@ -120,7 +120,7 @@ acpi_ex_add_table(u32 table_index,
 	acpi_ns_exec_module_code_list();
 	acpi_ex_enter_interpreter();
 
-	/* Update GPEs for any new _PRW or _Lxx/_Exx methods. Ignore errors */
+	/* Update GPEs for any new _Lxx/_Exx methods. Ignore errors */
 
 	status = acpi_tb_get_owner_id(table_index, &owner_id);
 	if (ACPI_SUCCESS(status)) {
diff --git a/drivers/acpi/acpica/utxface.c b/drivers/acpi/acpica/utxface.c
index db9d8ca5798..7f8cefcb2b3 100644
--- a/drivers/acpi/acpica/utxface.c
+++ b/drivers/acpi/acpica/utxface.c
@@ -293,12 +293,8 @@ acpi_status acpi_initialize_objects(u32 flags)
 	 * Complete the GPE initialization for the GPE blocks defined in the FADT
 	 * (GPE block 0 and 1).
 	 *
-	 * Note1: This is where the _PRW methods are executed for the GPEs. These
-	 * methods can only be executed after the SCI and Global Lock handlers are
-	 * installed and initialized.
-	 *
-	 * Note2: Currently, there seems to be no need to run the _REG methods
-	 * before execution of the _PRW methods and enabling of the GPEs.
+	 * NOTE: Currently, there seems to be no need to run the _REG methods
+	 * before enabling the GPEs.
 	 */
 	if (!(flags & ACPI_NO_EVENT_INIT)) {
 		status = acpi_ev_install_fadt_gpes();
diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
index 78418ce4fc7..44bddc5bc6a 100644
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c
@@ -1064,26 +1064,6 @@ static int __init acpi_serialize_setup(char *str)
 
 __setup("acpi_serialize", acpi_serialize_setup);
 
-/*
- * Wake and Run-Time GPES are expected to be separate.
- * We disable wake-GPEs at run-time to prevent spurious
- * interrupts.
- *
- * However, if a system exists that shares Wake and
- * Run-time events on the same GPE this flag is available
- * to tell Linux to keep the wake-time GPEs enabled at run-time.
- */
-static int __init acpi_wake_gpes_always_on_setup(char *str)
-{
-	printk(KERN_INFO PREFIX "wake GPEs not disabled\n");
-
-	acpi_gbl_leave_wake_gpes_disabled = FALSE;
-
-	return 1;
-}
-
-__setup("acpi_wake_gpes_always_on", acpi_wake_gpes_always_on_setup);
-
 /* Check of resource interference between native drivers and ACPI
  * OperationRegions (SystemIO and System Memory only).
  * IO ports and memory declared in ACPI might be used by the ACPI subsystem
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 449ada016d8..b23825ecfa3 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -740,6 +740,8 @@ acpi_bus_extract_wakeup_device_power_package(struct acpi_device *device,
 		device->wakeup.resources.handles[i] = element->reference.handle;
 	}
 
+	acpi_gpe_can_wake(device->wakeup.gpe_device, device->wakeup.gpe_number);
+
 	return AE_OK;
 }
 
diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index ba94a889afd..81d4f3d4b9f 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -63,7 +63,6 @@ extern u32 acpi_dbg_layer;
 extern u8 acpi_gbl_enable_interpreter_slack;
 extern u8 acpi_gbl_all_methods_serialized;
 extern u8 acpi_gbl_create_osi_method;
-extern u8 acpi_gbl_leave_wake_gpes_disabled;
 extern u8 acpi_gbl_use_default_register_widths;
 extern acpi_name acpi_gbl_trace_method_name;
 extern u32 acpi_gbl_trace_flags;
@@ -286,6 +285,8 @@ acpi_status acpi_enable_gpe(acpi_handle gpe_device, u32 gpe_number);
 
 acpi_status acpi_disable_gpe(acpi_handle gpe_device, u32 gpe_number);
 
+acpi_status acpi_gpe_can_wake(acpi_handle gpe_device, u32 gpe_number);
+
 acpi_status acpi_clear_gpe(acpi_handle gpe_device, u32 gpe_number);
 
 acpi_status acpi_gpe_wakeup(acpi_handle gpe_device, u32 gpe_number, u8 action);
-- 
cgit v1.2.3-70-g09d2


From 53d3176b282cc105493babb0fef36c8b873f6201 Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Sat, 10 Jul 2010 20:41:06 +0000
Subject: net: cleanups

remove useless blanks.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
----
 include/net/inet_common.h |   55 ++++-------
 include/net/tcp.h         |  222 +++++++++++++++++-----------------------------
 include/net/udp.h         |   38 +++----
 3 files changed, 123 insertions(+), 192 deletions(-)
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_common.h |  55 +++++-------
 include/net/tcp.h         | 222 ++++++++++++++++++----------------------------
 include/net/udp.h         |  38 ++++----
 3 files changed, 123 insertions(+), 192 deletions(-)

(limited to 'include')

diff --git a/include/net/inet_common.h b/include/net/inet_common.h
index 18c773286b9..140c1ec9f8a 100644
--- a/include/net/inet_common.h
+++ b/include/net/inet_common.h
@@ -1,8 +1,8 @@
 #ifndef _INET_COMMON_H
 #define _INET_COMMON_H
 
-extern const struct proto_ops		inet_stream_ops;
-extern const struct proto_ops		inet_dgram_ops;
+extern const struct proto_ops inet_stream_ops;
+extern const struct proto_ops inet_dgram_ops;
 
 /*
  *	INET4 prototypes used by INET6
@@ -13,37 +13,24 @@ struct sock;
 struct sockaddr;
 struct socket;
 
-extern int			inet_release(struct socket *sock);
-extern int			inet_stream_connect(struct socket *sock,
-						    struct sockaddr * uaddr,
-						    int addr_len, int flags);
-extern int			inet_dgram_connect(struct socket *sock, 
-						   struct sockaddr * uaddr,
-						   int addr_len, int flags);
-extern int			inet_accept(struct socket *sock, 
-					    struct socket *newsock, int flags);
-extern int			inet_sendmsg(struct kiocb *iocb,
-					     struct socket *sock, 
-					     struct msghdr *msg, 
-					     size_t size);
-extern int			inet_shutdown(struct socket *sock, int how);
-extern int			inet_listen(struct socket *sock, int backlog);
-
-extern void			inet_sock_destruct(struct sock *sk);
-
-extern int			inet_bind(struct socket *sock, 
-					  struct sockaddr *uaddr, int addr_len);
-extern int			inet_getname(struct socket *sock, 
-					     struct sockaddr *uaddr, 
-					     int *uaddr_len, int peer);
-extern int			inet_ioctl(struct socket *sock, 
-					   unsigned int cmd, unsigned long arg);
-
-extern int			inet_ctl_sock_create(struct sock **sk,
-						     unsigned short family,
-						     unsigned short type,
-						     unsigned char protocol,
-						     struct net *net);
+extern int inet_release(struct socket *sock);
+extern int inet_stream_connect(struct socket *sock, struct sockaddr * uaddr,
+			       int addr_len, int flags);
+extern int inet_dgram_connect(struct socket *sock, struct sockaddr * uaddr,
+			      int addr_len, int flags);
+extern int inet_accept(struct socket *sock, struct socket *newsock, int flags);
+extern int inet_sendmsg(struct kiocb *iocb, struct socket *sock,
+			struct msghdr *msg, size_t size);
+extern int inet_shutdown(struct socket *sock, int how);
+extern int inet_listen(struct socket *sock, int backlog);
+extern void inet_sock_destruct(struct sock *sk);
+extern int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len);
+extern int inet_getname(struct socket *sock, struct sockaddr *uaddr,
+			int *uaddr_len, int peer);
+extern int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
+extern int inet_ctl_sock_create(struct sock **sk, unsigned short family,
+				unsigned short type, unsigned char protocol,
+				struct net *net);
 
 static inline void inet_ctl_sock_destroy(struct sock *sk)
 {
@@ -51,5 +38,3 @@ static inline void inet_ctl_sock_destroy(struct sock *sk)
 }
 
 #endif
-
-
diff --git a/include/net/tcp.h b/include/net/tcp.h
index c2f96c2cc89..33ce5242a17 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -296,45 +296,30 @@ extern struct proto tcp_prot;
 #define TCP_ADD_STATS_USER(net, field, val) SNMP_ADD_STATS_USER((net)->mib.tcp_statistics, field, val)
 #define TCP_ADD_STATS(net, field, val)	SNMP_ADD_STATS((net)->mib.tcp_statistics, field, val)
 
-extern void			tcp_v4_err(struct sk_buff *skb, u32);
-
-extern void			tcp_shutdown (struct sock *sk, int how);
-
-extern int			tcp_v4_rcv(struct sk_buff *skb);
-
-extern int			tcp_v4_remember_stamp(struct sock *sk);
-
-extern int		    	tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw);
-
-extern int			tcp_sendmsg(struct kiocb *iocb, struct socket *sock,
-					    struct msghdr *msg, size_t size);
-extern ssize_t			tcp_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags);
-
-extern int			tcp_ioctl(struct sock *sk, 
-					  int cmd, 
-					  unsigned long arg);
-
-extern int			tcp_rcv_state_process(struct sock *sk, 
-						      struct sk_buff *skb,
-						      struct tcphdr *th,
-						      unsigned len);
-
-extern int			tcp_rcv_established(struct sock *sk, 
-						    struct sk_buff *skb,
-						    struct tcphdr *th, 
-						    unsigned len);
-
-extern void			tcp_rcv_space_adjust(struct sock *sk);
-
-extern void			tcp_cleanup_rbuf(struct sock *sk, int copied);
-
-extern int			tcp_twsk_unique(struct sock *sk,
-						struct sock *sktw, void *twp);
-
-extern void			tcp_twsk_destructor(struct sock *sk);
-
-extern ssize_t			tcp_splice_read(struct socket *sk, loff_t *ppos,
-					        struct pipe_inode_info *pipe, size_t len, unsigned int flags);
+extern void tcp_v4_err(struct sk_buff *skb, u32);
+
+extern void tcp_shutdown (struct sock *sk, int how);
+
+extern int tcp_v4_rcv(struct sk_buff *skb);
+
+extern int tcp_v4_remember_stamp(struct sock *sk);
+extern int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw);
+extern int tcp_sendmsg(struct kiocb *iocb, struct socket *sock,
+		       struct msghdr *msg, size_t size);
+extern ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset,
+			    size_t size, int flags);
+extern int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg);
+extern int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
+				 struct tcphdr *th, unsigned len);
+extern int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
+			       struct tcphdr *th, unsigned len);
+extern void tcp_rcv_space_adjust(struct sock *sk);
+extern void tcp_cleanup_rbuf(struct sock *sk, int copied);
+extern int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp);
+extern void tcp_twsk_destructor(struct sock *sk);
+extern ssize_t tcp_splice_read(struct socket *sk, loff_t *ppos,
+			       struct pipe_inode_info *pipe, size_t len,
+			       unsigned int flags);
 
 static inline void tcp_dec_quickack_mode(struct sock *sk,
 					 const unsigned int pkts)
@@ -372,88 +357,59 @@ enum tcp_tw_status {
 };
 
 
-extern enum tcp_tw_status	tcp_timewait_state_process(struct inet_timewait_sock *tw,
-							   struct sk_buff *skb,
-							   const struct tcphdr *th);
-
-extern struct sock *		tcp_check_req(struct sock *sk,struct sk_buff *skb,
-					      struct request_sock *req,
-					      struct request_sock **prev);
-extern int			tcp_child_process(struct sock *parent,
-						  struct sock *child,
-						  struct sk_buff *skb);
-extern int			tcp_use_frto(struct sock *sk);
-extern void			tcp_enter_frto(struct sock *sk);
-extern void			tcp_enter_loss(struct sock *sk, int how);
-extern void			tcp_clear_retrans(struct tcp_sock *tp);
-extern void			tcp_update_metrics(struct sock *sk);
-
-extern void			tcp_close(struct sock *sk, 
-					  long timeout);
-extern unsigned int		tcp_poll(struct file * file, struct socket *sock, struct poll_table_struct *wait);
-
-extern int			tcp_getsockopt(struct sock *sk, int level, 
-					       int optname,
-					       char __user *optval, 
-					       int __user *optlen);
-extern int			tcp_setsockopt(struct sock *sk, int level, 
-					       int optname, char __user *optval, 
-					       unsigned int optlen);
-extern int			compat_tcp_getsockopt(struct sock *sk,
-					int level, int optname,
-					char __user *optval, int __user *optlen);
-extern int			compat_tcp_setsockopt(struct sock *sk,
-					int level, int optname,
-					char __user *optval, unsigned int optlen);
-extern void			tcp_set_keepalive(struct sock *sk, int val);
-extern void			tcp_syn_ack_timeout(struct sock *sk,
-						    struct request_sock *req);
-extern int			tcp_recvmsg(struct kiocb *iocb, struct sock *sk,
-					    struct msghdr *msg,
-					    size_t len, int nonblock, 
-					    int flags, int *addr_len);
-
-extern void			tcp_parse_options(struct sk_buff *skb,
-						  struct tcp_options_received *opt_rx,
-						  u8 **hvpp,
-						  int estab);
-
-extern u8			*tcp_parse_md5sig_option(struct tcphdr *th);
+extern enum tcp_tw_status tcp_timewait_state_process(struct inet_timewait_sock *tw,
+						     struct sk_buff *skb,
+						     const struct tcphdr *th);
+extern struct sock * tcp_check_req(struct sock *sk,struct sk_buff *skb,
+				   struct request_sock *req,
+				   struct request_sock **prev);
+extern int tcp_child_process(struct sock *parent, struct sock *child,
+			     struct sk_buff *skb);
+extern int tcp_use_frto(struct sock *sk);
+extern void tcp_enter_frto(struct sock *sk);
+extern void tcp_enter_loss(struct sock *sk, int how);
+extern void tcp_clear_retrans(struct tcp_sock *tp);
+extern void tcp_update_metrics(struct sock *sk);
+extern void tcp_close(struct sock *sk, long timeout);
+extern unsigned int tcp_poll(struct file * file, struct socket *sock,
+			     struct poll_table_struct *wait);
+extern int tcp_getsockopt(struct sock *sk, int level, int optname,
+			  char __user *optval, int __user *optlen);
+extern int tcp_setsockopt(struct sock *sk, int level, int optname,
+			  char __user *optval, unsigned int optlen);
+extern int compat_tcp_getsockopt(struct sock *sk, int level, int optname,
+				 char __user *optval, int __user *optlen);
+extern int compat_tcp_setsockopt(struct sock *sk, int level, int optname,
+				 char __user *optval, unsigned int optlen);
+extern void tcp_set_keepalive(struct sock *sk, int val);
+extern void tcp_syn_ack_timeout(struct sock *sk, struct request_sock *req);
+extern int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
+		       size_t len, int nonblock, int flags, int *addr_len);
+extern void tcp_parse_options(struct sk_buff *skb,
+			      struct tcp_options_received *opt_rx, u8 **hvpp,
+			      int estab);
+extern u8 *tcp_parse_md5sig_option(struct tcphdr *th);
 
 /*
  *	TCP v4 functions exported for the inet6 API
  */
 
-extern void		       	tcp_v4_send_check(struct sock *sk,
-						  struct sk_buff *skb);
-
-extern int			tcp_v4_conn_request(struct sock *sk,
-						    struct sk_buff *skb);
-
-extern struct sock *		tcp_create_openreq_child(struct sock *sk,
-							 struct request_sock *req,
-							 struct sk_buff *skb);
-
-extern struct sock *		tcp_v4_syn_recv_sock(struct sock *sk,
-						     struct sk_buff *skb,
-						     struct request_sock *req,
-							struct dst_entry *dst);
-
-extern int			tcp_v4_do_rcv(struct sock *sk,
+extern void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb);
+extern int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb);
+extern struct sock * tcp_create_openreq_child(struct sock *sk,
+					      struct request_sock *req,
 					      struct sk_buff *skb);
-
-extern int			tcp_v4_connect(struct sock *sk,
-					       struct sockaddr *uaddr,
-					       int addr_len);
-
-extern int			tcp_connect(struct sock *sk);
-
-extern struct sk_buff *		tcp_make_synack(struct sock *sk,
-						struct dst_entry *dst,
-						struct request_sock *req,
-						struct request_values *rvp);
-
-extern int			tcp_disconnect(struct sock *sk, int flags);
+extern struct sock * tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
+					  struct request_sock *req,
+					  struct dst_entry *dst);
+extern int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb);
+extern int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr,
+			  int addr_len);
+extern int tcp_connect(struct sock *sk);
+extern struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst,
+					struct request_sock *req,
+					struct request_values *rvp);
+extern int tcp_disconnect(struct sock *sk, int flags);
 
 
 /* From syncookies.c */
@@ -485,10 +441,10 @@ extern int tcp_fragment(struct sock *, struct sk_buff *, u32, unsigned int);
 
 extern void tcp_send_probe0(struct sock *);
 extern void tcp_send_partial(struct sock *);
-extern int  tcp_write_wakeup(struct sock *);
+extern int tcp_write_wakeup(struct sock *);
 extern void tcp_send_fin(struct sock *sk);
 extern void tcp_send_active_reset(struct sock *sk, gfp_t priority);
-extern int  tcp_send_synack(struct sock *);
+extern int tcp_send_synack(struct sock *);
 extern void tcp_push_one(struct sock *, unsigned int mss_now);
 extern void tcp_send_ack(struct sock *sk);
 extern void tcp_send_delayed_ack(struct sock *sk);
@@ -592,7 +548,7 @@ static inline u32 tcp_receive_window(const struct tcp_sock *tp)
  * scaling applied to the result.  The caller does these things
  * if necessary.  This is a "raw" window selection.
  */
-extern u32	__tcp_select_window(struct sock *sk);
+extern u32 __tcp_select_window(struct sock *sk);
 
 /* TCP timestamps are only 32-bits, this causes a slight
  * complication on 64-bit systems since we store a snapshot
@@ -1174,22 +1130,14 @@ struct tcp_md5sig_pool {
 #define TCP_MD5SIG_MAXKEYS	(~(u32)0)	/* really?! */
 
 /* - functions */
-extern int			tcp_v4_md5_hash_skb(char *md5_hash,
-						    struct tcp_md5sig_key *key,
-						    struct sock *sk,
-						    struct request_sock *req,
-						    struct sk_buff *skb);
-
-extern struct tcp_md5sig_key	*tcp_v4_md5_lookup(struct sock *sk,
-						   struct sock *addr_sk);
-
-extern int			tcp_v4_md5_do_add(struct sock *sk,
-						  __be32 addr,
-						  u8 *newkey,
-						  u8 newkeylen);
-
-extern int			tcp_v4_md5_do_del(struct sock *sk,
-						  __be32 addr);
+extern int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
+			       struct sock *sk, struct request_sock *req,
+			       struct sk_buff *skb);
+extern struct tcp_md5sig_key * tcp_v4_md5_lookup(struct sock *sk,
+						 struct sock *addr_sk);
+extern int tcp_v4_md5_do_add(struct sock *sk, __be32 addr, u8 *newkey,
+			     u8 newkeylen);
+extern int tcp_v4_md5_do_del(struct sock *sk, __be32 addr);
 
 #ifdef CONFIG_TCP_MD5SIG
 #define tcp_twsk_md5_key(twsk)	((twsk)->tw_md5_keylen ? 		 \
@@ -1202,10 +1150,10 @@ extern int			tcp_v4_md5_do_del(struct sock *sk,
 #endif
 
 extern struct tcp_md5sig_pool * __percpu *tcp_alloc_md5sig_pool(struct sock *);
-extern void			tcp_free_md5sig_pool(void);
+extern void tcp_free_md5sig_pool(void);
 
 extern struct tcp_md5sig_pool	*tcp_get_md5sig_pool(void);
-extern void			tcp_put_md5sig_pool(void);
+extern void tcp_put_md5sig_pool(void);
 
 extern int tcp_md5_hash_header(struct tcp_md5sig_pool *, struct tcphdr *);
 extern int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *, struct sk_buff *,
@@ -1433,7 +1381,7 @@ extern int tcp_gro_complete(struct sk_buff *skb);
 extern int tcp4_gro_complete(struct sk_buff *skb);
 
 #ifdef CONFIG_PROC_FS
-extern int  tcp4_proc_init(void);
+extern int tcp4_proc_init(void);
 extern void tcp4_proc_exit(void);
 #endif
 
diff --git a/include/net/udp.h b/include/net/udp.h
index 5348d80b25b..7abdf305da5 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -157,30 +157,28 @@ static inline void udp_lib_close(struct sock *sk, long timeout)
 	sk_common_release(sk);
 }
 
-extern int	udp_lib_get_port(struct sock *sk, unsigned short snum,
-		int (*)(const struct sock *,const struct sock *),
-		unsigned int hash2_nulladdr);
+extern int udp_lib_get_port(struct sock *sk, unsigned short snum,
+			    int (*)(const struct sock *,const struct sock *),
+			    unsigned int hash2_nulladdr);
 
 /* net/ipv4/udp.c */
-extern int	udp_get_port(struct sock *sk, unsigned short snum,
-			     int (*saddr_cmp)(const struct sock *, const struct sock *));
-extern void	udp_err(struct sk_buff *, u32);
-
-extern int	udp_sendmsg(struct kiocb *iocb, struct sock *sk,
+extern int udp_get_port(struct sock *sk, unsigned short snum,
+			int (*saddr_cmp)(const struct sock *,
+					 const struct sock *));
+extern void udp_err(struct sk_buff *, u32);
+extern int udp_sendmsg(struct kiocb *iocb, struct sock *sk,
 			    struct msghdr *msg, size_t len);
-extern void	udp_flush_pending_frames(struct sock *sk);
-
-extern int	udp_rcv(struct sk_buff *skb);
-extern int	udp_ioctl(struct sock *sk, int cmd, unsigned long arg);
-extern int	udp_disconnect(struct sock *sk, int flags);
+extern void udp_flush_pending_frames(struct sock *sk);
+extern int udp_rcv(struct sk_buff *skb);
+extern int udp_ioctl(struct sock *sk, int cmd, unsigned long arg);
+extern int udp_disconnect(struct sock *sk, int flags);
 extern unsigned int udp_poll(struct file *file, struct socket *sock,
 			     poll_table *wait);
-extern int 	udp_lib_getsockopt(struct sock *sk, int level, int optname,
-			           char __user *optval, int __user *optlen);
-extern int 	udp_lib_setsockopt(struct sock *sk, int level, int optname,
-				   char __user *optval, unsigned int optlen,
-				   int (*push_pending_frames)(struct sock *));
-
+extern int udp_lib_getsockopt(struct sock *sk, int level, int optname,
+			      char __user *optval, int __user *optlen);
+extern int udp_lib_setsockopt(struct sock *sk, int level, int optname,
+			      char __user *optval, unsigned int optlen,
+			      int (*push_pending_frames)(struct sock *));
 extern struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport,
 				    __be32 daddr, __be16 dport,
 				    int dif);
@@ -236,7 +234,7 @@ struct udp_iter_state {
 extern int udp_proc_register(struct net *net, struct udp_seq_afinfo *afinfo);
 extern void udp_proc_unregister(struct net *net, struct udp_seq_afinfo *afinfo);
 
-extern int  udp4_proc_init(void);
+extern int udp4_proc_init(void);
 extern void udp4_proc_exit(void);
 #endif
 
-- 
cgit v1.2.3-70-g09d2


From 7ba42910073f8432934d61a6c08b1023c408fb62 Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Sat, 10 Jul 2010 20:41:55 +0000
Subject: inet, inet6: make tcp_sendmsg() and tcp_sendpage() through
 inet_sendmsg() and inet_sendpage()

a new boolean flag no_autobind is added to structure proto to avoid the autobind
calls when the protocol is TCP. Then sock_rps_record_flow() is called int the
TCP's sendmsg() and sendpage() pathes.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
----
 include/net/inet_common.h |    4 ++++
 include/net/sock.h        |    1 +
 include/net/tcp.h         |    8 ++++----
 net/ipv4/af_inet.c        |   15 +++++++++------
 net/ipv4/tcp.c            |   11 +++++------
 net/ipv4/tcp_ipv4.c       |    3 +++
 net/ipv6/af_inet6.c       |    8 ++++----
 net/ipv6/tcp_ipv6.c       |    3 +++
 8 files changed, 33 insertions(+), 20 deletions(-)
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_common.h |  4 ++++
 include/net/sock.h        |  1 +
 include/net/tcp.h         |  8 ++++----
 net/ipv4/af_inet.c        | 15 +++++++++------
 net/ipv4/tcp.c            | 11 +++++------
 net/ipv4/tcp_ipv4.c       |  3 +++
 net/ipv6/af_inet6.c       |  8 ++++----
 net/ipv6/tcp_ipv6.c       |  3 +++
 8 files changed, 33 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/include/net/inet_common.h b/include/net/inet_common.h
index 140c1ec9f8a..22fac9892b1 100644
--- a/include/net/inet_common.h
+++ b/include/net/inet_common.h
@@ -21,6 +21,10 @@ extern int inet_dgram_connect(struct socket *sock, struct sockaddr * uaddr,
 extern int inet_accept(struct socket *sock, struct socket *newsock, int flags);
 extern int inet_sendmsg(struct kiocb *iocb, struct socket *sock,
 			struct msghdr *msg, size_t size);
+extern ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset,
+			     size_t size, int flags);
+extern int inet_recvmsg(struct kiocb *iocb, struct socket *sock,
+			struct msghdr *msg, size_t size, int flags);
 extern int inet_shutdown(struct socket *sock, int how);
 extern int inet_listen(struct socket *sock, int backlog);
 extern void inet_sock_destruct(struct sock *sk);
diff --git a/include/net/sock.h b/include/net/sock.h
index 4f26f2f83be..3100e71f0c3 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -772,6 +772,7 @@ struct proto {
 	int			*sysctl_wmem;
 	int			*sysctl_rmem;
 	int			max_header;
+	bool			no_autobind;
 
 	struct kmem_cache	*slab;
 	unsigned int		obj_size;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 33ce5242a17..468b01f01c1 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -304,10 +304,10 @@ extern int tcp_v4_rcv(struct sk_buff *skb);
 
 extern int tcp_v4_remember_stamp(struct sock *sk);
 extern int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw);
-extern int tcp_sendmsg(struct kiocb *iocb, struct socket *sock,
-		       struct msghdr *msg, size_t size);
-extern ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset,
-			    size_t size, int flags);
+extern int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
+		       size_t size);
+extern int tcp_sendpage(struct sock *sk, struct page *page, int offset,
+			size_t size, int flags);
 extern int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg);
 extern int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 				 struct tcphdr *th, unsigned len);
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 3ceb025b16f..6a1100c25a9 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -727,28 +727,31 @@ int inet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
 	sock_rps_record_flow(sk);
 
 	/* We may need to bind the socket. */
-	if (!inet_sk(sk)->inet_num && inet_autobind(sk))
+	if (!inet_sk(sk)->inet_num && !sk->sk_prot->no_autobind &&
+	    inet_autobind(sk))
 		return -EAGAIN;
 
 	return sk->sk_prot->sendmsg(iocb, sk, msg, size);
 }
 EXPORT_SYMBOL(inet_sendmsg);
 
-static ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset,
-			     size_t size, int flags)
+ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset,
+		      size_t size, int flags)
 {
 	struct sock *sk = sock->sk;
 
 	sock_rps_record_flow(sk);
 
 	/* We may need to bind the socket. */
-	if (!inet_sk(sk)->inet_num && inet_autobind(sk))
+	if (!inet_sk(sk)->inet_num && !sk->sk_prot->no_autobind &&
+	    inet_autobind(sk))
 		return -EAGAIN;
 
 	if (sk->sk_prot->sendpage)
 		return sk->sk_prot->sendpage(sk, page, offset, size, flags);
 	return sock_no_sendpage(sock, page, offset, size, flags);
 }
+EXPORT_SYMBOL(inet_sendpage);
 
 int inet_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
 		 size_t size, int flags)
@@ -894,10 +897,10 @@ const struct proto_ops inet_stream_ops = {
 	.shutdown	   = inet_shutdown,
 	.setsockopt	   = sock_common_setsockopt,
 	.getsockopt	   = sock_common_getsockopt,
-	.sendmsg	   = tcp_sendmsg,
+	.sendmsg	   = inet_sendmsg,
 	.recvmsg	   = inet_recvmsg,
 	.mmap		   = sock_no_mmap,
-	.sendpage	   = tcp_sendpage,
+	.sendpage	   = inet_sendpage,
 	.splice_read	   = tcp_splice_read,
 #ifdef CONFIG_COMPAT
 	.compat_setsockopt = compat_sock_common_setsockopt,
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index b8601b7683a..9fce8a8a13a 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -857,15 +857,15 @@ out_err:
 	return sk_stream_error(sk, flags, err);
 }
 
-ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset,
-		     size_t size, int flags)
+int tcp_sendpage(struct sock *sk, struct page *page, int offset,
+		 size_t size, int flags)
 {
 	ssize_t res;
-	struct sock *sk = sock->sk;
 
 	if (!(sk->sk_route_caps & NETIF_F_SG) ||
 	    !(sk->sk_route_caps & NETIF_F_ALL_CSUM))
-		return sock_no_sendpage(sock, page, offset, size, flags);
+		return sock_no_sendpage(sk->sk_socket, page, offset, size,
+					flags);
 
 	lock_sock(sk);
 	TCP_CHECK_TIMER(sk);
@@ -899,10 +899,9 @@ static inline int select_size(struct sock *sk, int sg)
 	return tmp;
 }
 
-int tcp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
+int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 		size_t size)
 {
-	struct sock *sk = sock->sk;
 	struct iovec *iov;
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *skb;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 44545e8e8c9..020766292bb 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2600,6 +2600,8 @@ struct proto tcp_prot = {
 	.setsockopt		= tcp_setsockopt,
 	.getsockopt		= tcp_getsockopt,
 	.recvmsg		= tcp_recvmsg,
+	.sendmsg		= tcp_sendmsg,
+	.sendpage		= tcp_sendpage,
 	.backlog_rcv		= tcp_v4_do_rcv,
 	.hash			= inet_hash,
 	.unhash			= inet_unhash,
@@ -2618,6 +2620,7 @@ struct proto tcp_prot = {
 	.twsk_prot		= &tcp_timewait_sock_ops,
 	.rsk_prot		= &tcp_request_sock_ops,
 	.h.hashinfo		= &tcp_hashinfo,
+	.no_autobind		= true,
 #ifdef CONFIG_COMPAT
 	.compat_setsockopt	= compat_tcp_setsockopt,
 	.compat_getsockopt	= compat_tcp_getsockopt,
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index e830cd4f9d0..56b9bf2516f 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -522,10 +522,10 @@ const struct proto_ops inet6_stream_ops = {
 	.shutdown	   = inet_shutdown,		/* ok		*/
 	.setsockopt	   = sock_common_setsockopt,	/* ok		*/
 	.getsockopt	   = sock_common_getsockopt,	/* ok		*/
-	.sendmsg	   = tcp_sendmsg,		/* ok		*/
-	.recvmsg	   = sock_common_recvmsg,	/* ok		*/
+	.sendmsg	   = inet_sendmsg,		/* ok		*/
+	.recvmsg	   = inet_recvmsg,		/* ok		*/
 	.mmap		   = sock_no_mmap,
-	.sendpage	   = tcp_sendpage,
+	.sendpage	   = inet_sendpage,
 	.splice_read	   = tcp_splice_read,
 #ifdef CONFIG_COMPAT
 	.compat_setsockopt = compat_sock_common_setsockopt,
@@ -549,7 +549,7 @@ const struct proto_ops inet6_dgram_ops = {
 	.setsockopt	   = sock_common_setsockopt,	/* ok		*/
 	.getsockopt	   = sock_common_getsockopt,	/* ok		*/
 	.sendmsg	   = inet_sendmsg,		/* ok		*/
-	.recvmsg	   = sock_common_recvmsg,	/* ok		*/
+	.recvmsg	   = inet_recvmsg,		/* ok		*/
 	.mmap		   = sock_no_mmap,
 	.sendpage	   = sock_no_sendpage,
 #ifdef CONFIG_COMPAT
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 5ebc27ecebd..fe6d40418c0 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -2142,6 +2142,8 @@ struct proto tcpv6_prot = {
 	.setsockopt		= tcp_setsockopt,
 	.getsockopt		= tcp_getsockopt,
 	.recvmsg		= tcp_recvmsg,
+	.sendmsg		= tcp_sendmsg,
+	.sendpage		= tcp_sendpage,
 	.backlog_rcv		= tcp_v6_do_rcv,
 	.hash			= tcp_v6_hash,
 	.unhash			= inet_unhash,
@@ -2160,6 +2162,7 @@ struct proto tcpv6_prot = {
 	.twsk_prot		= &tcp6_timewait_sock_ops,
 	.rsk_prot		= &tcp6_request_sock_ops,
 	.h.hashinfo		= &tcp_hashinfo,
+	.no_autobind		= true,
 #ifdef CONFIG_COMPAT
 	.compat_setsockopt	= compat_tcp_setsockopt,
 	.compat_getsockopt	= compat_tcp_getsockopt,
-- 
cgit v1.2.3-70-g09d2


From 02d37bed188c500ee7afb0a2dc6b65a80704c58e Mon Sep 17 00:00:00 2001
From: Stefan Richter <stefanr@s5r6.in-berlin.de>
Date: Thu, 8 Jul 2010 16:09:06 +0200
Subject: firewire: core: integrate software-forced bus resets with bus
 management

Bus resets which are triggered
  - by the kernel drivers after updates of the local nodes' config ROM,
  - by userspace software via ioctl
shall be deferred until after >=2 seconds after the last bus reset.

If multiple modifications of the local nodes' config ROM happen in a row,
only a single bus reset should happen after them.

When the local node's link goes from inactive to active or vice versa,
and at the two occasions of bus resets mentioned above --- and if the
current gap count differs from 63 --- the bus reset should be preceded
by a PHY configuration packet that reaffirms the gap count.  Otherwise a
bus manager would have to reset the bus again right after that.

This is necessary to promote bus stability, e.g. leave grace periods for
allocations and reallocations of isochronous channels and bandwidth,
SBP-2 reconnections etc.; see IEEE 1394 clause 8.2.1.

This change implements all of the above by moving bus reset initiation
into a delayed work (except for bus resets which are triggered by the
bus manager workqueue job and are performed there immediately).  It
comes with a necessary addition to the card driver methods that allows
to get the current gap count from PHY registers.

Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
---
 drivers/firewire/core-card.c        | 68 ++++++++++++++++++++++++++++---------
 drivers/firewire/core-cdev.c        |  3 +-
 drivers/firewire/core-transaction.c | 18 ++++++++--
 drivers/firewire/core.h             |  6 +++-
 drivers/firewire/ohci.c             | 53 +++++++++++++++++++++--------
 include/linux/firewire.h            |  6 ++--
 6 files changed, 116 insertions(+), 38 deletions(-)

(limited to 'include')

diff --git a/drivers/firewire/core-card.c b/drivers/firewire/core-card.c
index 6c316cfe70c..2bb5c036e80 100644
--- a/drivers/firewire/core-card.c
+++ b/drivers/firewire/core-card.c
@@ -204,6 +204,45 @@ void fw_core_remove_descriptor(struct fw_descriptor *desc)
 }
 EXPORT_SYMBOL(fw_core_remove_descriptor);
 
+static int reset_bus(struct fw_card *card, bool short_reset)
+{
+	int reg = short_reset ? 5 : 1;
+	int bit = short_reset ? PHY_BUS_SHORT_RESET : PHY_BUS_RESET;
+
+	return card->driver->update_phy_reg(card, reg, 0, bit);
+}
+
+void fw_schedule_bus_reset(struct fw_card *card, bool delayed, bool short_reset)
+{
+	/* We don't try hard to sort out requests of long vs. short resets. */
+	card->br_short = short_reset;
+
+	/* Use an arbitrary short delay to combine multiple reset requests. */
+	fw_card_get(card);
+	if (!schedule_delayed_work(&card->br_work,
+				   delayed ? DIV_ROUND_UP(HZ, 100) : 0))
+		fw_card_put(card);
+}
+EXPORT_SYMBOL(fw_schedule_bus_reset);
+
+static void br_work(struct work_struct *work)
+{
+	struct fw_card *card = container_of(work, struct fw_card, br_work.work);
+
+	/* Delay for 2s after last reset per IEEE 1394 clause 8.2.1. */
+	if (card->reset_jiffies != 0 &&
+	    time_is_after_jiffies(card->reset_jiffies + 2 * HZ)) {
+		if (!schedule_delayed_work(&card->br_work, 2 * HZ))
+			fw_card_put(card);
+		return;
+	}
+
+	fw_send_phy_config(card, FW_PHY_CONFIG_NO_NODE_ID, card->generation,
+			   FW_PHY_CONFIG_CURRENT_GAP_COUNT);
+	reset_bus(card, card->br_short);
+	fw_card_put(card);
+}
+
 static void allocate_broadcast_channel(struct fw_card *card, int generation)
 {
 	int channel, bandwidth = 0;
@@ -230,13 +269,13 @@ static const char gap_count_table[] = {
 void fw_schedule_bm_work(struct fw_card *card, unsigned long delay)
 {
 	fw_card_get(card);
-	if (!schedule_delayed_work(&card->work, delay))
+	if (!schedule_delayed_work(&card->bm_work, delay))
 		fw_card_put(card);
 }
 
-static void fw_card_bm_work(struct work_struct *work)
+static void bm_work(struct work_struct *work)
 {
-	struct fw_card *card = container_of(work, struct fw_card, work.work);
+	struct fw_card *card = container_of(work, struct fw_card, bm_work.work);
 	struct fw_device *root_device;
 	struct fw_node *root_node;
 	int root_id, new_root_id, irm_id, bm_id, local_id;
@@ -413,7 +452,7 @@ static void fw_card_bm_work(struct work_struct *work)
 		fw_notify("phy config: card %d, new root=%x, gap_count=%d\n",
 			  card->index, new_root_id, gap_count);
 		fw_send_phy_config(card, new_root_id, generation, gap_count);
-		fw_core_initiate_bus_reset(card, 1);
+		reset_bus(card, true);
 		/* Will allocate broadcast channel after the reset. */
 		goto out;
 	}
@@ -465,7 +504,8 @@ void fw_card_initialize(struct fw_card *card,
 
 	card->local_node = NULL;
 
-	INIT_DELAYED_WORK(&card->work, fw_card_bm_work);
+	INIT_DELAYED_WORK(&card->br_work, br_work);
+	INIT_DELAYED_WORK(&card->bm_work, bm_work);
 }
 EXPORT_SYMBOL(fw_card_initialize);
 
@@ -491,7 +531,6 @@ int fw_card_add(struct fw_card *card,
 }
 EXPORT_SYMBOL(fw_card_add);
 
-
 /*
  * The next few functions implement a dummy driver that is used once a card
  * driver shuts down an fw_card.  This allows the driver to cleanly unload,
@@ -507,6 +546,11 @@ static int dummy_enable(struct fw_card *card,
 	return -1;
 }
 
+static int dummy_read_phy_reg(struct fw_card *card, int address)
+{
+	return -ENODEV;
+}
+
 static int dummy_update_phy_reg(struct fw_card *card, int address,
 				int clear_bits, int set_bits)
 {
@@ -547,6 +591,7 @@ static int dummy_enable_phys_dma(struct fw_card *card,
 
 static const struct fw_card_driver dummy_driver_template = {
 	.enable          = dummy_enable,
+	.read_phy_reg    = dummy_read_phy_reg,
 	.update_phy_reg  = dummy_update_phy_reg,
 	.set_config_rom  = dummy_set_config_rom,
 	.send_request    = dummy_send_request,
@@ -568,7 +613,7 @@ void fw_core_remove_card(struct fw_card *card)
 
 	card->driver->update_phy_reg(card, 4,
 				     PHY_LINK_ACTIVE | PHY_CONTENDER, 0);
-	fw_core_initiate_bus_reset(card, 1);
+	fw_schedule_bus_reset(card, false, true);
 
 	mutex_lock(&card_mutex);
 	list_del_init(&card->link);
@@ -588,12 +633,3 @@ void fw_core_remove_card(struct fw_card *card)
 	WARN_ON(!list_empty(&card->transaction_list));
 }
 EXPORT_SYMBOL(fw_core_remove_card);
-
-int fw_core_initiate_bus_reset(struct fw_card *card, int short_reset)
-{
-	int reg = short_reset ? 5 : 1;
-	int bit = short_reset ? PHY_BUS_SHORT_RESET : PHY_BUS_RESET;
-
-	return card->driver->update_phy_reg(card, reg, 0, bit);
-}
-EXPORT_SYMBOL(fw_core_initiate_bus_reset);
diff --git a/drivers/firewire/core-cdev.c b/drivers/firewire/core-cdev.c
index 7a690c466ce..ee2e8735310 100644
--- a/drivers/firewire/core-cdev.c
+++ b/drivers/firewire/core-cdev.c
@@ -820,8 +820,9 @@ static int ioctl_send_response(struct client *client, union ioctl_arg *arg)
 
 static int ioctl_initiate_bus_reset(struct client *client, union ioctl_arg *arg)
 {
-	return fw_core_initiate_bus_reset(client->device->card,
+	fw_schedule_bus_reset(client->device->card, true,
 			arg->initiate_bus_reset.type == FW_CDEV_SHORT_RESET);
+	return 0;
 }
 
 static void release_descriptor(struct client *client,
diff --git a/drivers/firewire/core-transaction.c b/drivers/firewire/core-transaction.c
index 7813da8a129..5f5a7852f7a 100644
--- a/drivers/firewire/core-transaction.c
+++ b/drivers/firewire/core-transaction.c
@@ -426,9 +426,21 @@ void fw_send_phy_config(struct fw_card *card,
 			int node_id, int generation, int gap_count)
 {
 	long timeout = DIV_ROUND_UP(HZ, 10);
-	u32 data = PHY_IDENTIFIER(PHY_PACKET_CONFIG) |
-		   PHY_CONFIG_ROOT_ID(node_id) |
-		   PHY_CONFIG_GAP_COUNT(gap_count);
+	u32 data = PHY_IDENTIFIER(PHY_PACKET_CONFIG);
+
+	if (node_id != FW_PHY_CONFIG_NO_NODE_ID)
+		data |= PHY_CONFIG_ROOT_ID(node_id);
+
+	if (gap_count == FW_PHY_CONFIG_CURRENT_GAP_COUNT) {
+		gap_count = card->driver->read_phy_reg(card, 1);
+		if (gap_count < 0)
+			return;
+
+		gap_count &= 63;
+		if (gap_count == 63)
+			return;
+	}
+	data |= PHY_CONFIG_GAP_COUNT(gap_count);
 
 	mutex_lock(&phy_config_mutex);
 
diff --git a/drivers/firewire/core.h b/drivers/firewire/core.h
index 3000dd74acf..ff6c9092200 100644
--- a/drivers/firewire/core.h
+++ b/drivers/firewire/core.h
@@ -51,6 +51,7 @@ struct fw_card_driver {
 	int (*enable)(struct fw_card *card,
 		      const __be32 *config_rom, size_t length);
 
+	int (*read_phy_reg)(struct fw_card *card, int address);
 	int (*update_phy_reg)(struct fw_card *card, int address,
 			      int clear_bits, int set_bits);
 
@@ -102,8 +103,8 @@ void fw_card_initialize(struct fw_card *card,
 int fw_card_add(struct fw_card *card,
 		u32 max_receive, u32 link_speed, u64 guid);
 void fw_core_remove_card(struct fw_card *card);
-int fw_core_initiate_bus_reset(struct fw_card *card, int short_reset);
 int fw_compute_block_crc(__be32 *block);
+void fw_schedule_bus_reset(struct fw_card *card, bool delayed, bool short_reset);
 void fw_schedule_bm_work(struct fw_card *card, unsigned long delay);
 
 static inline struct fw_card *fw_card_get(struct fw_card *card)
@@ -225,6 +226,9 @@ void fw_core_handle_response(struct fw_card *card, struct fw_packet *packet);
 int fw_get_response_length(struct fw_request *request);
 void fw_fill_response(struct fw_packet *response, u32 *request_header,
 		      int rcode, void *payload, size_t length);
+
+#define FW_PHY_CONFIG_NO_NODE_ID	-1
+#define FW_PHY_CONFIG_CURRENT_GAP_COUNT	-1
 void fw_send_phy_config(struct fw_card *card,
 			int node_id, int generation, int gap_count);
 
diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c
index a4bbf3dadf5..bb6a92bc9e6 100644
--- a/drivers/firewire/ohci.c
+++ b/drivers/firewire/ohci.c
@@ -34,6 +34,7 @@
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
+#include <linux/mutex.h>
 #include <linux/pci.h>
 #include <linux/pci_ids.h>
 #include <linux/spinlock.h>
@@ -182,6 +183,8 @@ struct fw_ohci {
 	 */
 	spinlock_t lock;
 
+	struct mutex phy_reg_mutex;
+
 	struct ar_context ar_request_ctx;
 	struct ar_context ar_response_ctx;
 	struct context at_request_ctx;
@@ -517,13 +520,10 @@ static int write_phy_reg(const struct fw_ohci *ohci, int addr, u32 val)
 	return -EBUSY;
 }
 
-static int ohci_update_phy_reg(struct fw_card *card, int addr,
-			       int clear_bits, int set_bits)
+static int update_phy_reg(struct fw_ohci *ohci, int addr,
+			  int clear_bits, int set_bits)
 {
-	struct fw_ohci *ohci = fw_ohci(card);
-	int ret;
-
-	ret = read_phy_reg(ohci, addr);
+	int ret = read_phy_reg(ohci, addr);
 	if (ret < 0)
 		return ret;
 
@@ -541,13 +541,38 @@ static int read_paged_phy_reg(struct fw_ohci *ohci, int page, int addr)
 {
 	int ret;
 
-	ret = ohci_update_phy_reg(&ohci->card, 7, PHY_PAGE_SELECT, page << 5);
+	ret = update_phy_reg(ohci, 7, PHY_PAGE_SELECT, page << 5);
 	if (ret < 0)
 		return ret;
 
 	return read_phy_reg(ohci, addr);
 }
 
+static int ohci_read_phy_reg(struct fw_card *card, int addr)
+{
+	struct fw_ohci *ohci = fw_ohci(card);
+	int ret;
+
+	mutex_lock(&ohci->phy_reg_mutex);
+	ret = read_phy_reg(ohci, addr);
+	mutex_unlock(&ohci->phy_reg_mutex);
+
+	return ret;
+}
+
+static int ohci_update_phy_reg(struct fw_card *card, int addr,
+			       int clear_bits, int set_bits)
+{
+	struct fw_ohci *ohci = fw_ohci(card);
+	int ret;
+
+	mutex_lock(&ohci->phy_reg_mutex);
+	ret = update_phy_reg(ohci, addr, clear_bits, set_bits);
+	mutex_unlock(&ohci->phy_reg_mutex);
+
+	return ret;
+}
+
 static int ar_context_add_page(struct ar_context *ctx)
 {
 	struct device *dev = ctx->ohci->card.device;
@@ -1676,7 +1701,7 @@ static int configure_1394a_enhancements(struct fw_ohci *ohci)
 		clear = PHY_ENABLE_ACCEL | PHY_ENABLE_MULTI;
 		set = 0;
 	}
-	ret = ohci_update_phy_reg(&ohci->card, 5, clear, set);
+	ret = update_phy_reg(ohci, 5, clear, set);
 	if (ret < 0)
 		return ret;
 
@@ -1856,12 +1881,8 @@ static int ohci_enable(struct fw_card *card,
 		  OHCI1394_HCControl_BIBimageValid);
 	flush_writes(ohci);
 
-	/*
-	 * We are ready to go, initiate bus reset to finish the
-	 * initialization.
-	 */
-
-	fw_core_initiate_bus_reset(&ohci->card, 1);
+	/* We are ready to go, reset bus to finish initialization. */
+	fw_schedule_bus_reset(&ohci->card, false, true);
 
 	return 0;
 }
@@ -1936,7 +1957,7 @@ static int ohci_set_config_rom(struct fw_card *card,
 	 * takes effect.
 	 */
 	if (ret == 0)
-		fw_core_initiate_bus_reset(&ohci->card, 1);
+		fw_schedule_bus_reset(&ohci->card, true, true);
 	else
 		dma_free_coherent(ohci->card.device, CONFIG_ROM_SIZE,
 				  next_config_rom, next_config_rom_bus);
@@ -2570,6 +2591,7 @@ static int ohci_queue_iso(struct fw_iso_context *base,
 
 static const struct fw_card_driver ohci_driver = {
 	.enable			= ohci_enable,
+	.read_phy_reg		= ohci_read_phy_reg,
 	.update_phy_reg		= ohci_update_phy_reg,
 	.set_config_rom		= ohci_set_config_rom,
 	.send_request		= ohci_send_request,
@@ -2645,6 +2667,7 @@ static int __devinit pci_probe(struct pci_dev *dev,
 	pci_set_drvdata(dev, ohci);
 
 	spin_lock_init(&ohci->lock);
+	mutex_init(&ohci->phy_reg_mutex);
 
 	tasklet_init(&ohci->bus_reset_tasklet,
 		     bus_reset_tasklet, (unsigned long)ohci);
diff --git a/include/linux/firewire.h b/include/linux/firewire.h
index db30a752a87..adc5b55e6e5 100644
--- a/include/linux/firewire.h
+++ b/include/linux/firewire.h
@@ -114,8 +114,10 @@ struct fw_card {
 
 	struct list_head link;
 
-	/* Work struct for BM duties. */
-	struct delayed_work work;
+	struct delayed_work br_work; /* bus reset job */
+	bool br_short;
+
+	struct delayed_work bm_work; /* bus manager job */
 	int bm_retries;
 	int bm_generation;
 	__be32 bm_transaction_data[2];
-- 
cgit v1.2.3-70-g09d2


From 095687c48b2e3b9f849554ab7a65e74988d50269 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Tue, 13 Jul 2010 18:13:19 +0900
Subject: ASoC: fsi: modify format area definition on flags

There is no necessity that each bit in this area has the meaning.
This patch modify it to sequence number

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Acked-by: Liam Girdwood <lrg@slimlogic.co.uk>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 include/sound/sh_fsi.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/sound/sh_fsi.h b/include/sound/sh_fsi.h
index c0227361a87..3ecdb965b89 100644
--- a/include/sound/sh_fsi.h
+++ b/include/sound/sh_fsi.h
@@ -55,12 +55,12 @@
 #define SH_FSI_GET_IFMT(x)	((x >> 8) & SH_FSI_FMT_MASK)
 #define SH_FSI_GET_OFMT(x)	((x >> 0) & SH_FSI_FMT_MASK)
 
-#define SH_FSI_FMT_MONO		(1 << 0)
-#define SH_FSI_FMT_MONO_DELAY	(1 << 1)
-#define SH_FSI_FMT_PCM		(1 << 2)
-#define SH_FSI_FMT_I2S		(1 << 3)
-#define SH_FSI_FMT_TDM		(1 << 4)
-#define SH_FSI_FMT_TDM_DELAY	(1 << 5)
+#define SH_FSI_FMT_MONO		0
+#define SH_FSI_FMT_MONO_DELAY	1
+#define SH_FSI_FMT_PCM		2
+#define SH_FSI_FMT_I2S		3
+#define SH_FSI_FMT_TDM		4
+#define SH_FSI_FMT_TDM_DELAY	5
 
 #define SH_FSI_IFMT_TDM_CH(x) \
 	(SH_FSI_IFMT(TDM)	| SH_FSI_SET_CH_I(x))
-- 
cgit v1.2.3-70-g09d2


From ccad7b44ccdc8341c1449bc5b864b42b197f8c2e Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Tue, 13 Jul 2010 12:13:14 +0900
Subject: ASoC: fsi: Fixup for master mode

This patch add hw_params to snd_soc_dai_ops,
because board specific set_rate is needed
when FSI was used as master mode.

This patch remove fsi_clk_ctrl from fsi_dai_startup,
because clock should be disabled before set_rate.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Acked-by: Liam Girdwood <lrg@slimlogic.co.uk>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 include/sound/sh_fsi.h | 32 +++++++++++++++++
 sound/soc/sh/fsi.c     | 98 ++++++++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 124 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/sound/sh_fsi.h b/include/sound/sh_fsi.h
index 3ecdb965b89..2c6237e6c04 100644
--- a/include/sound/sh_fsi.h
+++ b/include/sound/sh_fsi.h
@@ -72,9 +72,41 @@
 #define SH_FSI_OFMT_TDM_DELAY_CH(x) \
 	(SH_FSI_OFMT(TDM_DELAY)	| SH_FSI_SET_CH_O(x))
 
+
+/*
+ * set_rate return value
+ *
+ * see ACKMD/BPFMD on
+ *     ACK_MD (FSI2)
+ *     CKG1   (FSI)
+ *
+ * err:  return value < 0
+ *
+ * 0x-00000AB
+ *
+ * A:  ACKMD value
+ * B:  BPFMD value
+ */
+
+#define SH_FSI_ACKMD_MASK	(0xF << 0)
+#define SH_FSI_ACKMD_512	(1 << 0)
+#define SH_FSI_ACKMD_256	(2 << 0)
+#define SH_FSI_ACKMD_128	(3 << 0)
+#define SH_FSI_ACKMD_64		(4 << 0)
+#define SH_FSI_ACKMD_32		(5 << 0)
+
+#define SH_FSI_BPFMD_MASK	(0xF << 4)
+#define SH_FSI_BPFMD_512	(1 << 4)
+#define SH_FSI_BPFMD_256	(2 << 4)
+#define SH_FSI_BPFMD_128	(3 << 4)
+#define SH_FSI_BPFMD_64		(4 << 4)
+#define SH_FSI_BPFMD_32		(5 << 4)
+#define SH_FSI_BPFMD_16		(6 << 4)
+
 struct sh_fsi_platform_info {
 	unsigned long porta_flags;
 	unsigned long portb_flags;
+	int (*set_rate)(int is_porta, int rate); /* for master mode */
 };
 
 extern struct snd_soc_dai fsi_soc_dai[2];
diff --git a/sound/soc/sh/fsi.c b/sound/soc/sh/fsi.c
index e551ca45f03..a1ce6089177 100644
--- a/sound/soc/sh/fsi.c
+++ b/sound/soc/sh/fsi.c
@@ -65,6 +65,10 @@
 #define ERR_UNDER	0x00000001
 #define ST_ERR		(ERR_OVER | ERR_UNDER)
 
+/* CKG1 */
+#define ACKMD_MASK	0x00007000
+#define BPFMD_MASK	0x00000700
+
 /* CLK_RST */
 #define B_CLK		0x00000010
 #define A_CLK		0x00000001
@@ -734,12 +738,6 @@ static int fsi_dai_startup(struct snd_pcm_substream *substream,
 	}
 	fsi_reg_write(fsi, reg, data);
 
-	/*
-	 * clear clk reset if master mode
-	 */
-	if (is_master)
-		fsi_clk_ctrl(fsi, 1);
-
 	/* irq clear */
 	fsi_irq_disable(fsi, is_play);
 	fsi_irq_clear_status(fsi);
@@ -786,10 +784,98 @@ static int fsi_dai_trigger(struct snd_pcm_substream *substream, int cmd,
 	return ret;
 }
 
+static int fsi_dai_hw_params(struct snd_pcm_substream *substream,
+			     struct snd_pcm_hw_params *params,
+			     struct snd_soc_dai *dai)
+{
+	struct fsi_priv *fsi = fsi_get_priv(substream);
+	struct fsi_master *master = fsi_get_master(fsi);
+	int (*set_rate)(int is_porta, int rate) = master->info->set_rate;
+	int fsi_ver = master->core->ver;
+	int is_play = (substream->stream == SNDRV_PCM_STREAM_PLAYBACK);
+	int ret;
+
+	/* if slave mode, set_rate is not needed */
+	if (!fsi_is_master_mode(fsi, is_play))
+		return 0;
+
+	/* it is error if no set_rate */
+	if (!set_rate)
+		return -EIO;
+
+	/* clock stop */
+	pm_runtime_put_sync(dai->dev);
+	fsi_clk_ctrl(fsi, 0);
+
+	ret = set_rate(fsi_is_port_a(fsi), params_rate(params));
+	if (ret > 0) {
+		u32 data = 0;
+
+		switch (ret & SH_FSI_ACKMD_MASK) {
+		default:
+			/* FALL THROUGH */
+		case SH_FSI_ACKMD_512:
+			data |= (0x0 << 12);
+			break;
+		case SH_FSI_ACKMD_256:
+			data |= (0x1 << 12);
+			break;
+		case SH_FSI_ACKMD_128:
+			data |= (0x2 << 12);
+			break;
+		case SH_FSI_ACKMD_64:
+			data |= (0x3 << 12);
+			break;
+		case SH_FSI_ACKMD_32:
+			if (fsi_ver < 2)
+				dev_err(dai->dev, "unsupported ACKMD\n");
+			else
+				data |= (0x4 << 12);
+			break;
+		}
+
+		switch (ret & SH_FSI_BPFMD_MASK) {
+		default:
+			/* FALL THROUGH */
+		case SH_FSI_BPFMD_32:
+			data |= (0x0 << 8);
+			break;
+		case SH_FSI_BPFMD_64:
+			data |= (0x1 << 8);
+			break;
+		case SH_FSI_BPFMD_128:
+			data |= (0x2 << 8);
+			break;
+		case SH_FSI_BPFMD_256:
+			data |= (0x3 << 8);
+			break;
+		case SH_FSI_BPFMD_512:
+			data |= (0x4 << 8);
+			break;
+		case SH_FSI_BPFMD_16:
+			if (fsi_ver < 2)
+				dev_err(dai->dev, "unsupported ACKMD\n");
+			else
+				data |= (0x7 << 8);
+			break;
+		}
+
+		fsi_reg_mask_set(fsi, CKG1, (ACKMD_MASK | BPFMD_MASK) , data);
+		udelay(10);
+		fsi_clk_ctrl(fsi, 1);
+		ret = 0;
+	}
+	pm_runtime_get_sync(dai->dev);
+
+	return ret;
+
+}
+
 static struct snd_soc_dai_ops fsi_dai_ops = {
 	.startup	= fsi_dai_startup,
 	.shutdown	= fsi_dai_shutdown,
 	.trigger	= fsi_dai_trigger,
+	.hw_params	= fsi_dai_hw_params,
 };
 
 /************************************************************************
-- 
cgit v1.2.3-70-g09d2


From 4c879170296174bde05cd1c643dac16594edee77 Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Wed, 7 Jul 2010 15:30:10 +0200
Subject: padata: Check for valid padata instance on start

This patch introduces the PADATA_INVALID flag which is
checked on padata start. This will be used to mark a padata
instance as invalid, if the padata cpumask does not intersect
with the active cpumask. we change padata_start to return an
error if the PADATA_INVALID is set. Also we adapt the only
padata user, pcrypt to this change.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/pcrypt.c        | 19 ++++++++++++++-----
 include/linux/padata.h |  3 ++-
 kernel/padata.c        | 18 ++++++++++++++++--
 3 files changed, 32 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/crypto/pcrypt.c b/crypto/pcrypt.c
index 247178cb98e..71ae2b2ae33 100644
--- a/crypto/pcrypt.c
+++ b/crypto/pcrypt.c
@@ -385,6 +385,7 @@ static struct crypto_template pcrypt_tmpl = {
 
 static int __init pcrypt_init(void)
 {
+	int err = -ENOMEM;
 	encwq = create_workqueue("pencrypt");
 	if (!encwq)
 		goto err;
@@ -400,14 +401,22 @@ static int __init pcrypt_init(void)
 
 	pcrypt_dec_padata = padata_alloc(cpu_possible_mask, decwq);
 	if (!pcrypt_dec_padata)
-		goto err_free_padata;
+		goto err_free_enc_padata;
 
-	padata_start(pcrypt_enc_padata);
-	padata_start(pcrypt_dec_padata);
+	err = padata_start(pcrypt_enc_padata);
+	if (err)
+		goto err_free_dec_padata;
+
+	err = padata_start(pcrypt_dec_padata);
+	if (err)
+		goto err_free_dec_padata;
 
 	return crypto_register_template(&pcrypt_tmpl);
 
-err_free_padata:
+err_free_dec_padata:
+	padata_free(pcrypt_dec_padata);
+
+err_free_enc_padata:
 	padata_free(pcrypt_enc_padata);
 
 err_destroy_decwq:
@@ -417,7 +426,7 @@ err_destroy_encwq:
 	destroy_workqueue(encwq);
 
 err:
-	return -ENOMEM;
+	return err;
 }
 
 static void __exit pcrypt_exit(void)
diff --git a/include/linux/padata.h b/include/linux/padata.h
index 8d8406246ee..e4c17f9b7c9 100644
--- a/include/linux/padata.h
+++ b/include/linux/padata.h
@@ -126,6 +126,7 @@ struct padata_instance {
 	u8			flags;
 #define	PADATA_INIT		1
 #define	PADATA_RESET		2
+#define	PADATA_INVALID		4
 };
 
 extern struct padata_instance *padata_alloc(const struct cpumask *cpumask,
@@ -138,6 +139,6 @@ extern int padata_set_cpumask(struct padata_instance *pinst,
 			      cpumask_var_t cpumask);
 extern int padata_add_cpu(struct padata_instance *pinst, int cpu);
 extern int padata_remove_cpu(struct padata_instance *pinst, int cpu);
-extern void padata_start(struct padata_instance *pinst);
+extern int padata_start(struct padata_instance *pinst);
 extern void padata_stop(struct padata_instance *pinst);
 #endif
diff --git a/kernel/padata.c b/kernel/padata.c
index ff8de1b71e4..e7d723a3e31 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -485,6 +485,11 @@ static void padata_flush_queues(struct parallel_data *pd)
 	BUG_ON(atomic_read(&pd->refcnt) != 0);
 }
 
+static void __padata_start(struct padata_instance *pinst)
+{
+	pinst->flags |= PADATA_INIT;
+}
+
 /* Replace the internal control stucture with a new one. */
 static void padata_replace(struct padata_instance *pinst,
 			   struct parallel_data *pd_new)
@@ -619,11 +624,20 @@ EXPORT_SYMBOL(padata_remove_cpu);
  *
  * @pinst: padata instance to start
  */
-void padata_start(struct padata_instance *pinst)
+int padata_start(struct padata_instance *pinst)
 {
+	int err = 0;
+
 	mutex_lock(&pinst->lock);
-	pinst->flags |= PADATA_INIT;
+
+	if (pinst->flags & PADATA_INVALID)
+		err =-EINVAL;
+
+	 __padata_start(pinst);
+
 	mutex_unlock(&pinst->lock);
+
+	return err;
 }
 EXPORT_SYMBOL(padata_start);
 
-- 
cgit v1.2.3-70-g09d2


From 5f1a8c1bc724498ff32acbd59ed5263275676b9d Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Wed, 7 Jul 2010 15:32:39 +0200
Subject: padata: simplify serialization mechanism

We count the number of processed objects on a percpu basis,
so we need to go through all the percpu reorder queues to calculate
the sequence number of the next object that needs serialization.
This patch changes this to count the number of processed objects
global. So we can calculate the sequence number and the percpu
reorder queue of the next object that needs serialization without
searching through the percpu reorder queues. This avoids some
accesses to memory of foreign cpus.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/padata.h |  6 ++---
 kernel/padata.c        | 71 ++++++++++++++------------------------------------
 2 files changed, 22 insertions(+), 55 deletions(-)

(limited to 'include')

diff --git a/include/linux/padata.h b/include/linux/padata.h
index e4c17f9b7c9..8844b851191 100644
--- a/include/linux/padata.h
+++ b/include/linux/padata.h
@@ -67,7 +67,6 @@ struct padata_list {
  * @pwork: work struct for parallelization.
  * @swork: work struct for serialization.
  * @pd: Backpointer to the internal control structure.
- * @num_obj: Number of objects that are processed by this cpu.
  * @cpu_index: Index of the cpu.
  */
 struct padata_queue {
@@ -77,7 +76,6 @@ struct padata_queue {
 	struct work_struct	pwork;
 	struct work_struct	swork;
 	struct parallel_data    *pd;
-	atomic_t		num_obj;
 	int			cpu_index;
 };
 
@@ -93,6 +91,7 @@ struct padata_queue {
  * @max_seq_nr:  Maximal used sequence number.
  * @cpumask: cpumask in use.
  * @lock: Reorder lock.
+ * @processed: Number of already processed objects.
  * @timer: Reorder timer.
  */
 struct parallel_data {
@@ -103,7 +102,8 @@ struct parallel_data {
 	atomic_t                refcnt;
 	unsigned int		max_seq_nr;
 	cpumask_var_t		cpumask;
-	spinlock_t              lock;
+	spinlock_t              lock ____cacheline_aligned;
+	unsigned int            processed;
 	struct timer_list       timer;
 };
 
diff --git a/kernel/padata.c b/kernel/padata.c
index ae8defcf062..450d67d394b 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -170,79 +170,47 @@ EXPORT_SYMBOL(padata_do_parallel);
  */
 static struct padata_priv *padata_get_next(struct parallel_data *pd)
 {
-	int cpu, num_cpus, empty, calc_seq_nr;
-	int seq_nr, next_nr, overrun, next_overrun;
+	int cpu, num_cpus;
+	int next_nr, next_index;
 	struct padata_queue *queue, *next_queue;
 	struct padata_priv *padata;
 	struct padata_list *reorder;
 
-	empty = 0;
-	next_nr = -1;
-	next_overrun = 0;
-	next_queue = NULL;
-
 	num_cpus = cpumask_weight(pd->cpumask);
 
-	for_each_cpu(cpu, pd->cpumask) {
-		queue = per_cpu_ptr(pd->queue, cpu);
-		reorder = &queue->reorder;
-
-		/*
-		 * Calculate the seq_nr of the object that should be
-		 * next in this reorder queue.
-		 */
-		overrun = 0;
-		calc_seq_nr = (atomic_read(&queue->num_obj) * num_cpus)
-			       + queue->cpu_index;
-
-		if (unlikely(calc_seq_nr > pd->max_seq_nr)) {
-			calc_seq_nr = calc_seq_nr - pd->max_seq_nr - 1;
-			overrun = 1;
-		}
-
-		if (!list_empty(&reorder->list)) {
-			padata = list_entry(reorder->list.next,
-					    struct padata_priv, list);
-
-			seq_nr  = padata->seq_nr;
-			BUG_ON(calc_seq_nr != seq_nr);
-		} else {
-			seq_nr = calc_seq_nr;
-			empty++;
-		}
-
-		if (next_nr < 0 || seq_nr < next_nr
-		    || (next_overrun && !overrun)) {
-			next_nr = seq_nr;
-			next_overrun = overrun;
-			next_queue = queue;
-		}
+	/*
+	 * Calculate the percpu reorder queue and the sequence
+	 * number of the next object.
+	 */
+	next_nr = pd->processed;
+	next_index = next_nr % num_cpus;
+	cpu = padata_index_to_cpu(pd, next_index);
+	next_queue = per_cpu_ptr(pd->queue, cpu);
+
+	if (unlikely(next_nr > pd->max_seq_nr)) {
+		next_nr = next_nr - pd->max_seq_nr - 1;
+		next_index = next_nr % num_cpus;
+		cpu = padata_index_to_cpu(pd, next_index);
+		next_queue = per_cpu_ptr(pd->queue, cpu);
+		pd->processed = 0;
 	}
 
 	padata = NULL;
 
-	if (empty == num_cpus)
-		goto out;
-
 	reorder = &next_queue->reorder;
 
 	if (!list_empty(&reorder->list)) {
 		padata = list_entry(reorder->list.next,
 				    struct padata_priv, list);
 
-		if (unlikely(next_overrun)) {
-			for_each_cpu(cpu, pd->cpumask) {
-				queue = per_cpu_ptr(pd->queue, cpu);
-				atomic_set(&queue->num_obj, 0);
-			}
-		}
+		BUG_ON(next_nr != padata->seq_nr);
 
 		spin_lock(&reorder->lock);
 		list_del_init(&padata->list);
 		atomic_dec(&pd->reorder_objects);
 		spin_unlock(&reorder->lock);
 
-		atomic_inc(&next_queue->num_obj);
+		pd->processed++;
 
 		goto out;
 	}
@@ -430,7 +398,6 @@ static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst,
 
 		INIT_WORK(&queue->pwork, padata_parallel_worker);
 		INIT_WORK(&queue->swork, padata_serial_worker);
-		atomic_set(&queue->num_obj, 0);
 	}
 
 	num_cpus = cpumask_weight(pd->cpumask);
-- 
cgit v1.2.3-70-g09d2


From 035ebefc737cce56d3938e9b7eaa5ac0e9c28715 Mon Sep 17 00:00:00 2001
From: Andres Salomon <dilinger@queued.net>
Date: Tue, 13 Jul 2010 09:42:26 +0000
Subject: of/sparc: move is_root_node() to of.h

Rename is_root_node() to of_node_is_root() and make it available for
all archs to use, as it's not PROM-specific.

Signed-off-by: Andres Salomon <dilinger@queued.net>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 arch/sparc/kernel/prom.h        | 8 --------
 arch/sparc/kernel/prom_64.c     | 6 +++---
 arch/sparc/kernel/prom_common.c | 2 +-
 include/linux/of.h              | 5 +++++
 4 files changed, 9 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/arch/sparc/kernel/prom.h b/arch/sparc/kernel/prom.h
index a8591ef2636..eeb04a782ec 100644
--- a/arch/sparc/kernel/prom.h
+++ b/arch/sparc/kernel/prom.h
@@ -9,14 +9,6 @@ extern void irq_trans_init(struct device_node *dp);
 
 extern unsigned int prom_unique_id;
 
-static inline int is_root_node(const struct device_node *dp)
-{
-	if (!dp)
-		return 0;
-
-	return (dp->parent == NULL);
-}
-
 extern char *build_path_component(struct device_node *dp);
 extern void of_console_init(void);
 
diff --git a/arch/sparc/kernel/prom_64.c b/arch/sparc/kernel/prom_64.c
index fb06ac2bd38..0bffafdee35 100644
--- a/arch/sparc/kernel/prom_64.c
+++ b/arch/sparc/kernel/prom_64.c
@@ -21,7 +21,7 @@
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/lmb.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 
 #include <asm/prom.h>
 #include <asm/oplib.h>
@@ -81,7 +81,7 @@ static void __init sun4v_path_component(struct device_node *dp, char *tmp_buf)
 		return;
 
 	regs = rprop->value;
-	if (!is_root_node(dp->parent)) {
+	if (!of_node_is_root(dp->parent)) {
 		sprintf(tmp_buf, "%s@%x,%x",
 			dp->name,
 			(unsigned int) (regs->phys_addr >> 32UL),
@@ -121,7 +121,7 @@ static void __init sun4u_path_component(struct device_node *dp, char *tmp_buf)
 		return;
 
 	regs = prop->value;
-	if (!is_root_node(dp->parent)) {
+	if (!of_node_is_root(dp->parent)) {
 		sprintf(tmp_buf, "%s@%x,%x",
 			dp->name,
 			(unsigned int) (regs->phys_addr >> 32UL),
diff --git a/arch/sparc/kernel/prom_common.c b/arch/sparc/kernel/prom_common.c
index 57ac9e28be0..1f830da2ddf 100644
--- a/arch/sparc/kernel/prom_common.c
+++ b/arch/sparc/kernel/prom_common.c
@@ -244,7 +244,7 @@ char * __init build_full_name(struct device_node *dp)
 
 	n = prom_early_alloc(len);
 	strcpy(n, dp->parent->full_name);
-	if (!is_root_node(dp->parent)) {
+	if (!of_node_is_root(dp->parent)) {
 		strcpy(n + plen, "/");
 		plen++;
 	}
diff --git a/include/linux/of.h b/include/linux/of.h
index a367e19bb3a..b0756f33249 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -70,6 +70,11 @@ extern struct device_node *allnodes;
 extern struct device_node *of_chosen;
 extern rwlock_t devtree_lock;
 
+static inline bool of_node_is_root(const struct device_node *node)
+{
+	return node && (node->parent == NULL);
+}
+
 static inline int of_node_check_flag(struct device_node *n, unsigned long flag)
 {
 	return test_bit(flag, &n->_flags);
-- 
cgit v1.2.3-70-g09d2


From b0f77d0eae0c58a5a9691a067ada112ceeae2d00 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Wed, 14 Jul 2010 20:50:29 -0700
Subject: net: fix problem in reading sock TX queue

Fix problem in reading the tx_queue recorded in a socket.  In
dev_pick_tx, the TX queue is read by doing a check with
sk_tx_queue_recorded on the socket, followed by a sk_tx_queue_get.
The problem is that there is not mutual exclusion across these
calls in the socket so it it is possible that the queue in the
sock can be invalidated after sk_tx_queue_recorded is called so
that sk_tx_queue get returns -1, which sets 65535 in queue_index
and thus dev_pick_tx returns 65536 which is a bogus queue and
can cause crash in dev_queue_xmit.

We fix this by only calling sk_tx_queue_get which does the proper
checks.  The interface is that sk_tx_queue_get returns the TX queue
if the sock argument is non-NULL and TX queue is recorded, else it
returns -1.  sk_tx_queue_recorded is no longer used so it can be
completely removed.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h | 7 +------
 net/core/dev.c     | 7 +++----
 2 files changed, 4 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/net/sock.h b/include/net/sock.h
index 731150d5279..0a691ea7654 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1224,12 +1224,7 @@ static inline void sk_tx_queue_clear(struct sock *sk)
 
 static inline int sk_tx_queue_get(const struct sock *sk)
 {
-	return sk->sk_tx_queue_mapping;
-}
-
-static inline bool sk_tx_queue_recorded(const struct sock *sk)
-{
-	return (sk && sk->sk_tx_queue_mapping >= 0);
+	return sk ? sk->sk_tx_queue_mapping : -1;
 }
 
 static inline void sk_set_socket(struct sock *sk, struct socket *sock)
diff --git a/net/core/dev.c b/net/core/dev.c
index 4b05fdf762a..0ea10f849be 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2029,12 +2029,11 @@ static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index)
 static struct netdev_queue *dev_pick_tx(struct net_device *dev,
 					struct sk_buff *skb)
 {
-	u16 queue_index;
+	int queue_index;
 	struct sock *sk = skb->sk;
 
-	if (sk_tx_queue_recorded(sk)) {
-		queue_index = sk_tx_queue_get(sk);
-	} else {
+	queue_index = sk_tx_queue_get(sk);
+	if (queue_index < 0) {
 		const struct net_device_ops *ops = dev->netdev_ops;
 
 		if (ops->ndo_select_queue) {
-- 
cgit v1.2.3-70-g09d2


From 4cf51c383d7a8d472a6090a0d19c371d40e823c9 Mon Sep 17 00:00:00 2001
From: Joonyoung Shim <jy0922.shim@samsung.com>
Date: Wed, 14 Jul 2010 21:55:30 -0700
Subject: Input: Add ATMEL QT602240 touchscreen driver

The chip's full name is AT42QT602240 or ATMXT224. This is a capacitive
touchscreen supporting 10-contact multitouch and using I2C interface.

Signed-off-by: Joonyoung Shim <jy0922.shim@samsung.com>
Acked-by: Henrik Rydberg <rydberg@euromail.se>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/touchscreen/Kconfig       |   12 +
 drivers/input/touchscreen/Makefile      |    1 +
 drivers/input/touchscreen/qt602240_ts.c | 1401 +++++++++++++++++++++++++++++++
 include/linux/i2c/qt602240_ts.h         |   38 +
 4 files changed, 1452 insertions(+)
 create mode 100644 drivers/input/touchscreen/qt602240_ts.c
 create mode 100644 include/linux/i2c/qt602240_ts.h

(limited to 'include')

diff --git a/drivers/input/touchscreen/Kconfig b/drivers/input/touchscreen/Kconfig
index ff18d896ea6..7bfcfdff6cf 100644
--- a/drivers/input/touchscreen/Kconfig
+++ b/drivers/input/touchscreen/Kconfig
@@ -291,6 +291,18 @@ config TOUCHSCREEN_PENMOUNT
 	  To compile this driver as a module, choose M here: the
 	  module will be called penmount.
 
+config TOUCHSCREEN_QT602240
+	tristate "QT602240 I2C Touchscreen"
+	depends on I2C
+	help
+	  Say Y here if you have the AT42QT602240/ATMXT224 I2C touchscreen
+	  connected to your system.
+
+	  If unsure, say N.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called qt602240_ts.
+
 config TOUCHSCREEN_MIGOR
 	tristate "Renesas MIGO-R touchscreen"
 	depends on SH_MIGOR && I2C
diff --git a/drivers/input/touchscreen/Makefile b/drivers/input/touchscreen/Makefile
index 9efdd442475..779de0d9d41 100644
--- a/drivers/input/touchscreen/Makefile
+++ b/drivers/input/touchscreen/Makefile
@@ -32,6 +32,7 @@ obj-$(CONFIG_TOUCHSCREEN_HTCPEN)	+= htcpen.o
 obj-$(CONFIG_TOUCHSCREEN_USB_COMPOSITE)	+= usbtouchscreen.o
 obj-$(CONFIG_TOUCHSCREEN_PCAP)		+= pcap_ts.o
 obj-$(CONFIG_TOUCHSCREEN_PENMOUNT)	+= penmount.o
+obj-$(CONFIG_TOUCHSCREEN_QT602240)	+= qt602240_ts.o
 obj-$(CONFIG_TOUCHSCREEN_S3C2410)	+= s3c2410_ts.o
 obj-$(CONFIG_TOUCHSCREEN_TOUCHIT213)	+= touchit213.o
 obj-$(CONFIG_TOUCHSCREEN_TOUCHRIGHT)	+= touchright.o
diff --git a/drivers/input/touchscreen/qt602240_ts.c b/drivers/input/touchscreen/qt602240_ts.c
new file mode 100644
index 00000000000..66b26ad3032
--- /dev/null
+++ b/drivers/input/touchscreen/qt602240_ts.c
@@ -0,0 +1,1401 @@
+/*
+ * AT42QT602240/ATMXT224 Touchscreen driver
+ *
+ * Copyright (C) 2010 Samsung Electronics Co.Ltd
+ * Author: Joonyoung Shim <jy0922.shim@samsung.com>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/firmware.h>
+#include <linux/i2c.h>
+#include <linux/i2c/qt602240_ts.h>
+#include <linux/input.h>
+#include <linux/interrupt.h>
+#include <linux/slab.h>
+
+/* Version */
+#define QT602240_VER_20			20
+#define QT602240_VER_21			21
+#define QT602240_VER_22			22
+
+/* Slave addresses */
+#define QT602240_APP_LOW		0x4a
+#define QT602240_APP_HIGH		0x4b
+#define QT602240_BOOT_LOW		0x24
+#define QT602240_BOOT_HIGH		0x25
+
+/* Firmware */
+#define QT602240_FW_NAME		"qt602240.fw"
+
+/* Registers */
+#define QT602240_FAMILY_ID		0x00
+#define QT602240_VARIANT_ID		0x01
+#define QT602240_VERSION		0x02
+#define QT602240_BUILD			0x03
+#define QT602240_MATRIX_X_SIZE		0x04
+#define QT602240_MATRIX_Y_SIZE		0x05
+#define QT602240_OBJECT_NUM		0x06
+#define QT602240_OBJECT_START		0x07
+
+#define QT602240_OBJECT_SIZE		6
+
+/* Object types */
+#define QT602240_DEBUG_DIAGNOSTIC	37
+#define QT602240_GEN_MESSAGE		5
+#define QT602240_GEN_COMMAND		6
+#define QT602240_GEN_POWER		7
+#define QT602240_GEN_ACQUIRE		8
+#define QT602240_TOUCH_MULTI		9
+#define QT602240_TOUCH_KEYARRAY		15
+#define QT602240_TOUCH_PROXIMITY	23
+#define QT602240_PROCI_GRIPFACE		20
+#define QT602240_PROCG_NOISE		22
+#define QT602240_PROCI_ONETOUCH		24
+#define QT602240_PROCI_TWOTOUCH		27
+#define QT602240_SPT_COMMSCONFIG	18	/* firmware ver 21 over */
+#define QT602240_SPT_GPIOPWM		19
+#define QT602240_SPT_SELFTEST		25
+#define QT602240_SPT_CTECONFIG		28
+#define QT602240_SPT_USERDATA		38	/* firmware ver 21 over */
+
+/* QT602240_GEN_COMMAND field */
+#define QT602240_COMMAND_RESET		0
+#define QT602240_COMMAND_BACKUPNV	1
+#define QT602240_COMMAND_CALIBRATE	2
+#define QT602240_COMMAND_REPORTALL	3
+#define QT602240_COMMAND_DIAGNOSTIC	5
+
+/* QT602240_GEN_POWER field */
+#define QT602240_POWER_IDLEACQINT	0
+#define QT602240_POWER_ACTVACQINT	1
+#define QT602240_POWER_ACTV2IDLETO	2
+
+/* QT602240_GEN_ACQUIRE field */
+#define QT602240_ACQUIRE_CHRGTIME	0
+#define QT602240_ACQUIRE_TCHDRIFT	2
+#define QT602240_ACQUIRE_DRIFTST	3
+#define QT602240_ACQUIRE_TCHAUTOCAL	4
+#define QT602240_ACQUIRE_SYNC		5
+#define QT602240_ACQUIRE_ATCHCALST	6
+#define QT602240_ACQUIRE_ATCHCALSTHR	7
+
+/* QT602240_TOUCH_MULTI field */
+#define QT602240_TOUCH_CTRL		0
+#define QT602240_TOUCH_XORIGIN		1
+#define QT602240_TOUCH_YORIGIN		2
+#define QT602240_TOUCH_XSIZE		3
+#define QT602240_TOUCH_YSIZE		4
+#define QT602240_TOUCH_BLEN		6
+#define QT602240_TOUCH_TCHTHR		7
+#define QT602240_TOUCH_TCHDI		8
+#define QT602240_TOUCH_ORIENT		9
+#define QT602240_TOUCH_MOVHYSTI		11
+#define QT602240_TOUCH_MOVHYSTN		12
+#define QT602240_TOUCH_NUMTOUCH		14
+#define QT602240_TOUCH_MRGHYST		15
+#define QT602240_TOUCH_MRGTHR		16
+#define QT602240_TOUCH_AMPHYST		17
+#define QT602240_TOUCH_XRANGE_LSB	18
+#define QT602240_TOUCH_XRANGE_MSB	19
+#define QT602240_TOUCH_YRANGE_LSB	20
+#define QT602240_TOUCH_YRANGE_MSB	21
+#define QT602240_TOUCH_XLOCLIP		22
+#define QT602240_TOUCH_XHICLIP		23
+#define QT602240_TOUCH_YLOCLIP		24
+#define QT602240_TOUCH_YHICLIP		25
+#define QT602240_TOUCH_XEDGECTRL	26
+#define QT602240_TOUCH_XEDGEDIST	27
+#define QT602240_TOUCH_YEDGECTRL	28
+#define QT602240_TOUCH_YEDGEDIST	29
+#define QT602240_TOUCH_JUMPLIMIT	30	/* firmware ver 22 over */
+
+/* QT602240_PROCI_GRIPFACE field */
+#define QT602240_GRIPFACE_CTRL		0
+#define QT602240_GRIPFACE_XLOGRIP	1
+#define QT602240_GRIPFACE_XHIGRIP	2
+#define QT602240_GRIPFACE_YLOGRIP	3
+#define QT602240_GRIPFACE_YHIGRIP	4
+#define QT602240_GRIPFACE_MAXTCHS	5
+#define QT602240_GRIPFACE_SZTHR1	7
+#define QT602240_GRIPFACE_SZTHR2	8
+#define QT602240_GRIPFACE_SHPTHR1	9
+#define QT602240_GRIPFACE_SHPTHR2	10
+#define QT602240_GRIPFACE_SUPEXTTO	11
+
+/* QT602240_PROCI_NOISE field */
+#define QT602240_NOISE_CTRL		0
+#define QT602240_NOISE_OUTFLEN		1
+#define QT602240_NOISE_GCAFUL_LSB	3
+#define QT602240_NOISE_GCAFUL_MSB	4
+#define QT602240_NOISE_GCAFLL_LSB	5
+#define QT602240_NOISE_GCAFLL_MSB	6
+#define QT602240_NOISE_ACTVGCAFVALID	7
+#define QT602240_NOISE_NOISETHR		8
+#define QT602240_NOISE_FREQHOPSCALE	10
+#define QT602240_NOISE_FREQ0		11
+#define QT602240_NOISE_FREQ1		12
+#define QT602240_NOISE_FREQ2		13
+#define QT602240_NOISE_FREQ3		14
+#define QT602240_NOISE_FREQ4		15
+#define QT602240_NOISE_IDLEGCAFVALID	16
+
+/* QT602240_SPT_COMMSCONFIG */
+#define QT602240_COMMS_CTRL		0
+#define QT602240_COMMS_CMD		1
+
+/* QT602240_SPT_CTECONFIG field */
+#define QT602240_CTE_CTRL		0
+#define QT602240_CTE_CMD		1
+#define QT602240_CTE_MODE		2
+#define QT602240_CTE_IDLEGCAFDEPTH	3
+#define QT602240_CTE_ACTVGCAFDEPTH	4
+#define QT602240_CTE_VOLTAGE		5	/* firmware ver 21 over */
+
+#define QT602240_VOLTAGE_DEFAULT	2700000
+#define QT602240_VOLTAGE_STEP		10000
+
+/* Define for QT602240_GEN_COMMAND */
+#define QT602240_BOOT_VALUE		0xa5
+#define QT602240_BACKUP_VALUE		0x55
+#define QT602240_BACKUP_TIME		25	/* msec */
+#define QT602240_RESET_TIME		65	/* msec */
+
+#define QT602240_FWRESET_TIME		175	/* msec */
+
+/* Command to unlock bootloader */
+#define QT602240_UNLOCK_CMD_MSB		0xaa
+#define QT602240_UNLOCK_CMD_LSB		0xdc
+
+/* Bootloader mode status */
+#define QT602240_WAITING_BOOTLOAD_CMD	0xc0	/* valid 7 6 bit only */
+#define QT602240_WAITING_FRAME_DATA	0x80	/* valid 7 6 bit only */
+#define QT602240_FRAME_CRC_CHECK	0x02
+#define QT602240_FRAME_CRC_FAIL		0x03
+#define QT602240_FRAME_CRC_PASS		0x04
+#define QT602240_APP_CRC_FAIL		0x40	/* valid 7 8 bit only */
+#define QT602240_BOOT_STATUS_MASK	0x3f
+
+/* Touch status */
+#define QT602240_SUPPRESS		(1 << 1)
+#define QT602240_AMP			(1 << 2)
+#define QT602240_VECTOR			(1 << 3)
+#define QT602240_MOVE			(1 << 4)
+#define QT602240_RELEASE		(1 << 5)
+#define QT602240_PRESS			(1 << 6)
+#define QT602240_DETECT			(1 << 7)
+
+/* Touchscreen absolute values */
+#define QT602240_MAX_XC			0x3ff
+#define QT602240_MAX_YC			0x3ff
+#define QT602240_MAX_AREA		0xff
+
+#define QT602240_MAX_FINGER		10
+
+/* Initial register values recommended from chip vendor */
+static const u8 init_vals_ver_20[] = {
+	/* QT602240_GEN_COMMAND(6) */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	/* QT602240_GEN_POWER(7) */
+	0x20, 0xff, 0x32,
+	/* QT602240_GEN_ACQUIRE(8) */
+	0x08, 0x05, 0x05, 0x00, 0x00, 0x00, 0x05, 0x14,
+	/* QT602240_TOUCH_MULTI(9) */
+	0x00, 0x00, 0x00, 0x11, 0x0a, 0x00, 0x00, 0x00, 0x02, 0x00,
+	0x00, 0x01, 0x01, 0x0e, 0x0a, 0x0a, 0x0a, 0x0a, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x88, 0x64,
+	/* QT602240_TOUCH_KEYARRAY(15) */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00,
+	/* QT602240_SPT_GPIOPWM(19) */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00,
+	/* QT602240_PROCI_GRIPFACE(20) */
+	0x00, 0x64, 0x64, 0x64, 0x64, 0x00, 0x00, 0x1e, 0x14, 0x04,
+	0x1e, 0x00,
+	/* QT602240_PROCG_NOISE(22) */
+	0x05, 0x00, 0x00, 0x19, 0x00, 0xe7, 0xff, 0x04, 0x32, 0x00,
+	0x01, 0x0a, 0x0f, 0x14, 0x00, 0x00, 0xe8,
+	/* QT602240_TOUCH_PROXIMITY(23) */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00,
+	/* QT602240_PROCI_ONETOUCH(24) */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	/* QT602240_SPT_SELFTEST(25) */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	/* QT602240_PROCI_TWOTOUCH(27) */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	/* QT602240_SPT_CTECONFIG(28) */
+	0x00, 0x00, 0x00, 0x04, 0x08,
+};
+
+static const u8 init_vals_ver_21[] = {
+	/* QT602240_GEN_COMMAND(6) */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	/* QT602240_GEN_POWER(7) */
+	0x20, 0xff, 0x32,
+	/* QT602240_GEN_ACQUIRE(8) */
+	0x0a, 0x00, 0x05, 0x00, 0x00, 0x00, 0x09, 0x23,
+	/* QT602240_TOUCH_MULTI(9) */
+	0x00, 0x00, 0x00, 0x13, 0x0b, 0x00, 0x00, 0x00, 0x02, 0x00,
+	0x00, 0x01, 0x01, 0x0e, 0x0a, 0x0a, 0x0a, 0x0a, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	/* QT602240_TOUCH_KEYARRAY(15) */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00,
+	/* QT602240_SPT_GPIOPWM(19) */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	/* QT602240_PROCI_GRIPFACE(20) */
+	0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x50, 0x28, 0x04,
+	0x0f, 0x0a,
+	/* QT602240_PROCG_NOISE(22) */
+	0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x23, 0x00,
+	0x00, 0x05, 0x0f, 0x19, 0x23, 0x2d, 0x03,
+	/* QT602240_TOUCH_PROXIMITY(23) */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00,
+	/* QT602240_PROCI_ONETOUCH(24) */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	/* QT602240_SPT_SELFTEST(25) */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	/* QT602240_PROCI_TWOTOUCH(27) */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	/* QT602240_SPT_CTECONFIG(28) */
+	0x00, 0x00, 0x00, 0x08, 0x10, 0x00,
+};
+
+static const u8 init_vals_ver_22[] = {
+	/* QT602240_GEN_COMMAND(6) */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	/* QT602240_GEN_POWER(7) */
+	0x20, 0xff, 0x32,
+	/* QT602240_GEN_ACQUIRE(8) */
+	0x0a, 0x00, 0x05, 0x00, 0x00, 0x00, 0x09, 0x23,
+	/* QT602240_TOUCH_MULTI(9) */
+	0x00, 0x00, 0x00, 0x13, 0x0b, 0x00, 0x00, 0x00, 0x02, 0x00,
+	0x00, 0x01, 0x01, 0x0e, 0x0a, 0x0a, 0x0a, 0x0a, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00,
+	/* QT602240_TOUCH_KEYARRAY(15) */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00,
+	/* QT602240_SPT_GPIOPWM(19) */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	/* QT602240_PROCI_GRIPFACE(20) */
+	0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x50, 0x28, 0x04,
+	0x0f, 0x0a,
+	/* QT602240_PROCG_NOISE(22) */
+	0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x23, 0x00,
+	0x00, 0x05, 0x0f, 0x19, 0x23, 0x2d, 0x03,
+	/* QT602240_TOUCH_PROXIMITY(23) */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00,
+	/* QT602240_PROCI_ONETOUCH(24) */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	/* QT602240_SPT_SELFTEST(25) */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	/* QT602240_PROCI_TWOTOUCH(27) */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	/* QT602240_SPT_CTECONFIG(28) */
+	0x00, 0x00, 0x00, 0x08, 0x10, 0x00,
+};
+
+struct qt602240_info {
+	u8 family_id;
+	u8 variant_id;
+	u8 version;
+	u8 build;
+	u8 matrix_xsize;
+	u8 matrix_ysize;
+	u8 object_num;
+};
+
+struct qt602240_object {
+	u8 type;
+	u16 start_address;
+	u8 size;
+	u8 instances;
+	u8 num_report_ids;
+
+	/* to map object and message */
+	u8 max_reportid;
+};
+
+struct qt602240_message {
+	u8 reportid;
+	u8 message[7];
+	u8 checksum;
+};
+
+struct qt602240_finger {
+	int status;
+	int x;
+	int y;
+	int area;
+};
+
+/* Each client has this additional data */
+struct qt602240_data {
+	struct i2c_client *client;
+	struct input_dev *input_dev;
+	const struct qt602240_platform_data *pdata;
+	struct qt602240_object *object_table;
+	struct qt602240_info info;
+	struct qt602240_finger finger[QT602240_MAX_FINGER];
+	unsigned int irq;
+};
+
+static bool qt602240_object_readable(unsigned int type)
+{
+	switch (type) {
+	case QT602240_GEN_MESSAGE:
+	case QT602240_GEN_COMMAND:
+	case QT602240_GEN_POWER:
+	case QT602240_GEN_ACQUIRE:
+	case QT602240_TOUCH_MULTI:
+	case QT602240_TOUCH_KEYARRAY:
+	case QT602240_TOUCH_PROXIMITY:
+	case QT602240_PROCI_GRIPFACE:
+	case QT602240_PROCG_NOISE:
+	case QT602240_PROCI_ONETOUCH:
+	case QT602240_PROCI_TWOTOUCH:
+	case QT602240_SPT_COMMSCONFIG:
+	case QT602240_SPT_GPIOPWM:
+	case QT602240_SPT_SELFTEST:
+	case QT602240_SPT_CTECONFIG:
+	case QT602240_SPT_USERDATA:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static bool qt602240_object_writable(unsigned int type)
+{
+	switch (type) {
+	case QT602240_GEN_COMMAND:
+	case QT602240_GEN_POWER:
+	case QT602240_GEN_ACQUIRE:
+	case QT602240_TOUCH_MULTI:
+	case QT602240_TOUCH_KEYARRAY:
+	case QT602240_TOUCH_PROXIMITY:
+	case QT602240_PROCI_GRIPFACE:
+	case QT602240_PROCG_NOISE:
+	case QT602240_PROCI_ONETOUCH:
+	case QT602240_PROCI_TWOTOUCH:
+	case QT602240_SPT_GPIOPWM:
+	case QT602240_SPT_SELFTEST:
+	case QT602240_SPT_CTECONFIG:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static void qt602240_dump_message(struct device *dev,
+				  struct qt602240_message *message)
+{
+	dev_dbg(dev, "reportid:\t0x%x\n", message->reportid);
+	dev_dbg(dev, "message1:\t0x%x\n", message->message[0]);
+	dev_dbg(dev, "message2:\t0x%x\n", message->message[1]);
+	dev_dbg(dev, "message3:\t0x%x\n", message->message[2]);
+	dev_dbg(dev, "message4:\t0x%x\n", message->message[3]);
+	dev_dbg(dev, "message5:\t0x%x\n", message->message[4]);
+	dev_dbg(dev, "message6:\t0x%x\n", message->message[5]);
+	dev_dbg(dev, "message7:\t0x%x\n", message->message[6]);
+	dev_dbg(dev, "checksum:\t0x%x\n", message->checksum);
+}
+
+static int qt602240_check_bootloader(struct i2c_client *client,
+				     unsigned int state)
+{
+	u8 val;
+
+recheck:
+	if (i2c_master_recv(client, &val, 1) != 1) {
+		dev_err(&client->dev, "%s: i2c recv failed\n", __func__);
+		return -EIO;
+	}
+
+	switch (state) {
+	case QT602240_WAITING_BOOTLOAD_CMD:
+	case QT602240_WAITING_FRAME_DATA:
+		val &= ~QT602240_BOOT_STATUS_MASK;
+		break;
+	case QT602240_FRAME_CRC_PASS:
+		if (val == QT602240_FRAME_CRC_CHECK)
+			goto recheck;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (val != state) {
+		dev_err(&client->dev, "Unvalid bootloader mode state\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int qt602240_unlock_bootloader(struct i2c_client *client)
+{
+	u8 buf[2];
+
+	buf[0] = QT602240_UNLOCK_CMD_LSB;
+	buf[1] = QT602240_UNLOCK_CMD_MSB;
+
+	if (i2c_master_send(client, buf, 2) != 2) {
+		dev_err(&client->dev, "%s: i2c send failed\n", __func__);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static int qt602240_fw_write(struct i2c_client *client,
+			     const u8 *data, unsigned int frame_size)
+{
+	if (i2c_master_send(client, data, frame_size) != frame_size) {
+		dev_err(&client->dev, "%s: i2c send failed\n", __func__);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static int __qt602240_read_reg(struct i2c_client *client,
+			       u16 reg, u16 len, void *val)
+{
+	struct i2c_msg xfer[2];
+	u8 buf[2];
+
+	buf[0] = reg & 0xff;
+	buf[1] = (reg >> 8) & 0xff;
+
+	/* Write register */
+	xfer[0].addr = client->addr;
+	xfer[0].flags = 0;
+	xfer[0].len = 2;
+	xfer[0].buf = buf;
+
+	/* Read data */
+	xfer[1].addr = client->addr;
+	xfer[1].flags = I2C_M_RD;
+	xfer[1].len = len;
+	xfer[1].buf = val;
+
+	if (i2c_transfer(client->adapter, xfer, 2) != 2) {
+		dev_err(&client->dev, "%s: i2c transfer failed\n", __func__);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static int qt602240_read_reg(struct i2c_client *client, u16 reg, u8 *val)
+{
+	return __qt602240_read_reg(client, reg, 1, val);
+}
+
+static int qt602240_write_reg(struct i2c_client *client, u16 reg, u8 val)
+{
+	u8 buf[3];
+
+	buf[0] = reg & 0xff;
+	buf[1] = (reg >> 8) & 0xff;
+	buf[2] = val;
+
+	if (i2c_master_send(client, buf, 3) != 3) {
+		dev_err(&client->dev, "%s: i2c send failed\n", __func__);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static int qt602240_read_object_table(struct i2c_client *client,
+				      u16 reg, u8 *object_buf)
+{
+	return __qt602240_read_reg(client, reg, QT602240_OBJECT_SIZE,
+				   object_buf);
+}
+
+static struct qt602240_object *
+qt602240_get_object(struct qt602240_data *data, u8 type)
+{
+	struct qt602240_object *object;
+	int i;
+
+	for (i = 0; i < data->info.object_num; i++) {
+		object = data->object_table + i;
+		if (object->type == type)
+			return object;
+	}
+
+	dev_err(&data->client->dev, "Invalid object type\n");
+	return NULL;
+}
+
+static int qt602240_read_message(struct qt602240_data *data,
+				 struct qt602240_message *message)
+{
+	struct qt602240_object *object;
+	u16 reg;
+
+	object = qt602240_get_object(data, QT602240_GEN_MESSAGE);
+	if (!object)
+		return -EINVAL;
+
+	reg = object->start_address;
+	return __qt602240_read_reg(data->client, reg,
+			sizeof(struct qt602240_message), message);
+}
+
+static int qt602240_read_object(struct qt602240_data *data,
+				u8 type, u8 offset, u8 *val)
+{
+	struct qt602240_object *object;
+	u16 reg;
+
+	object = qt602240_get_object(data, type);
+	if (!object)
+		return -EINVAL;
+
+	reg = object->start_address;
+	return __qt602240_read_reg(data->client, reg + offset, 1, val);
+}
+
+static int qt602240_write_object(struct qt602240_data *data,
+				 u8 type, u8 offset, u8 val)
+{
+	struct qt602240_object *object;
+	u16 reg;
+
+	object = qt602240_get_object(data, type);
+	if (!object)
+		return -EINVAL;
+
+	reg = object->start_address;
+	return qt602240_write_reg(data->client, reg + offset, val);
+}
+
+static void qt602240_input_report(struct qt602240_data *data, int single_id)
+{
+	struct qt602240_finger *finger = data->finger;
+	struct input_dev *input_dev = data->input_dev;
+	int status = finger[single_id].status;
+	int finger_num = 0;
+	int id;
+
+	for (id = 0; id < QT602240_MAX_FINGER; id++) {
+		if (!finger[id].status)
+			continue;
+
+		input_report_abs(input_dev, ABS_MT_TOUCH_MAJOR,
+				finger[id].status != QT602240_RELEASE ?
+				finger[id].area : 0);
+		input_report_abs(input_dev, ABS_MT_POSITION_X,
+				finger[id].x);
+		input_report_abs(input_dev, ABS_MT_POSITION_Y,
+				finger[id].y);
+		input_mt_sync(input_dev);
+
+		if (finger[id].status == QT602240_RELEASE)
+			finger[id].status = 0;
+		else
+			finger_num++;
+	}
+
+	input_report_key(input_dev, BTN_TOUCH, finger_num > 0);
+
+	if (status != QT602240_RELEASE) {
+		input_report_abs(input_dev, ABS_X, finger[single_id].x);
+		input_report_abs(input_dev, ABS_Y, finger[single_id].y);
+	}
+
+	input_sync(input_dev);
+}
+
+static void qt602240_input_touchevent(struct qt602240_data *data,
+				      struct qt602240_message *message, int id)
+{
+	struct qt602240_finger *finger = data->finger;
+	struct device *dev = &data->client->dev;
+	u8 status = message->message[0];
+	int x;
+	int y;
+	int area;
+
+	/* Check the touch is present on the screen */
+	if (!(status & QT602240_DETECT)) {
+		if (status & QT602240_RELEASE) {
+			dev_dbg(dev, "[%d] released\n", id);
+
+			finger[id].status = QT602240_RELEASE;
+			qt602240_input_report(data, id);
+		}
+		return;
+	}
+
+	/* Check only AMP detection */
+	if (!(status & (QT602240_PRESS | QT602240_MOVE)))
+		return;
+
+	x = (message->message[1] << 2) | ((message->message[3] & ~0x3f) >> 6);
+	y = (message->message[2] << 2) | ((message->message[3] & ~0xf3) >> 2);
+	area = message->message[4];
+
+	dev_dbg(dev, "[%d] %s x: %d, y: %d, area: %d\n", id,
+		status & QT602240_MOVE ? "moved" : "pressed",
+		x, y, area);
+
+	finger[id].status = status & QT602240_MOVE ?
+				QT602240_MOVE : QT602240_PRESS;
+	finger[id].x = x;
+	finger[id].y = y;
+	finger[id].area = area;
+
+	qt602240_input_report(data, id);
+}
+
+static irqreturn_t qt602240_interrupt(int irq, void *dev_id)
+{
+	struct qt602240_data *data = dev_id;
+	struct qt602240_message message;
+	struct qt602240_object *object;
+	struct device *dev = &data->client->dev;
+	int id;
+	u8 reportid;
+	u8 max_reportid;
+	u8 min_reportid;
+
+	do {
+		if (qt602240_read_message(data, &message)) {
+			dev_err(dev, "Failed to read message\n");
+			goto end;
+		}
+
+		reportid = message.reportid;
+
+		/* whether reportid is thing of QT602240_TOUCH_MULTI */
+		object = qt602240_get_object(data, QT602240_TOUCH_MULTI);
+		if (!object)
+			goto end;
+
+		max_reportid = object->max_reportid;
+		min_reportid = max_reportid - object->num_report_ids + 1;
+		id = reportid - min_reportid;
+
+		if (reportid >= min_reportid && reportid <= max_reportid)
+			qt602240_input_touchevent(data, &message, id);
+		else
+			qt602240_dump_message(dev, &message);
+	} while (reportid != 0xff);
+
+end:
+	return IRQ_HANDLED;
+}
+
+static int qt602240_check_reg_init(struct qt602240_data *data)
+{
+	struct qt602240_object *object;
+	struct device *dev = &data->client->dev;
+	int index = 0;
+	int i, j;
+	u8 version = data->info.version;
+	u8 *init_vals;
+
+	switch (version) {
+	case QT602240_VER_20:
+		init_vals = (u8 *)init_vals_ver_20;
+		break;
+	case QT602240_VER_21:
+		init_vals = (u8 *)init_vals_ver_21;
+		break;
+	case QT602240_VER_22:
+		init_vals = (u8 *)init_vals_ver_22;
+		break;
+	default:
+		dev_err(dev, "Firmware version %d doesn't support\n", version);
+		return -EINVAL;
+	}
+
+	for (i = 0; i < data->info.object_num; i++) {
+		object = data->object_table + i;
+
+		if (!qt602240_object_writable(object->type))
+			continue;
+
+		for (j = 0; j < object->size + 1; j++)
+			qt602240_write_object(data, object->type, j,
+					init_vals[index + j]);
+
+		index += object->size + 1;
+	}
+
+	return 0;
+}
+
+static int qt602240_check_matrix_size(struct qt602240_data *data)
+{
+	const struct qt602240_platform_data *pdata = data->pdata;
+	struct device *dev = &data->client->dev;
+	int mode = -1;
+	int error;
+	u8 val;
+
+	dev_dbg(dev, "Number of X lines: %d\n", pdata->x_line);
+	dev_dbg(dev, "Number of Y lines: %d\n", pdata->y_line);
+
+	switch (pdata->x_line) {
+	case 0 ... 15:
+		if (pdata->y_line <= 14)
+			mode = 0;
+		break;
+	case 16:
+		if (pdata->y_line <= 12)
+			mode = 1;
+		if (pdata->y_line == 13 || pdata->y_line == 14)
+			mode = 0;
+		break;
+	case 17:
+		if (pdata->y_line <= 11)
+			mode = 2;
+		if (pdata->y_line == 12 || pdata->y_line == 13)
+			mode = 1;
+		break;
+	case 18:
+		if (pdata->y_line <= 10)
+			mode = 3;
+		if (pdata->y_line == 11 || pdata->y_line == 12)
+			mode = 2;
+		break;
+	case 19:
+		if (pdata->y_line <= 9)
+			mode = 4;
+		if (pdata->y_line == 10 || pdata->y_line == 11)
+			mode = 3;
+		break;
+	case 20:
+		mode = 4;
+	}
+
+	if (mode < 0) {
+		dev_err(dev, "Invalid X/Y lines\n");
+		return -EINVAL;
+	}
+
+	error = qt602240_read_object(data, QT602240_SPT_CTECONFIG,
+				QT602240_CTE_MODE, &val);
+	if (error)
+		return error;
+
+	if (mode == val)
+		return 0;
+
+	/* Change the CTE configuration */
+	qt602240_write_object(data, QT602240_SPT_CTECONFIG,
+			QT602240_CTE_CTRL, 1);
+	qt602240_write_object(data, QT602240_SPT_CTECONFIG,
+			QT602240_CTE_MODE, mode);
+	qt602240_write_object(data, QT602240_SPT_CTECONFIG,
+			QT602240_CTE_CTRL, 0);
+
+	return 0;
+}
+
+static int qt602240_make_highchg(struct qt602240_data *data)
+{
+	struct device *dev = &data->client->dev;
+	int count = 10;
+	int error;
+	u8 val;
+
+	/* Read dummy message to make high CHG pin */
+	do {
+		error = qt602240_read_object(data, QT602240_GEN_MESSAGE, 0, &val);
+		if (error)
+			return error;
+	} while ((val != 0xff) && --count);
+
+	if (!count) {
+		dev_err(dev, "CHG pin isn't cleared\n");
+		return -EBUSY;
+	}
+
+	return 0;
+}
+
+static void qt602240_handle_pdata(struct qt602240_data *data)
+{
+	const struct qt602240_platform_data *pdata = data->pdata;
+	u8 voltage;
+
+	/* Set touchscreen lines */
+	qt602240_write_object(data, QT602240_TOUCH_MULTI, QT602240_TOUCH_XSIZE,
+			pdata->x_line);
+	qt602240_write_object(data, QT602240_TOUCH_MULTI, QT602240_TOUCH_YSIZE,
+			pdata->y_line);
+
+	/* Set touchscreen orient */
+	qt602240_write_object(data, QT602240_TOUCH_MULTI, QT602240_TOUCH_ORIENT,
+			pdata->orient);
+
+	/* Set touchscreen burst length */
+	qt602240_write_object(data, QT602240_TOUCH_MULTI,
+			QT602240_TOUCH_BLEN, pdata->blen);
+
+	/* Set touchscreen threshold */
+	qt602240_write_object(data, QT602240_TOUCH_MULTI,
+			QT602240_TOUCH_TCHTHR, pdata->threshold);
+
+	/* Set touchscreen resolution */
+	qt602240_write_object(data, QT602240_TOUCH_MULTI,
+			QT602240_TOUCH_XRANGE_LSB, (pdata->x_size - 1) & 0xff);
+	qt602240_write_object(data, QT602240_TOUCH_MULTI,
+			QT602240_TOUCH_XRANGE_MSB, (pdata->x_size - 1) >> 8);
+	qt602240_write_object(data, QT602240_TOUCH_MULTI,
+			QT602240_TOUCH_YRANGE_LSB, (pdata->y_size - 1) & 0xff);
+	qt602240_write_object(data, QT602240_TOUCH_MULTI,
+			QT602240_TOUCH_YRANGE_MSB, (pdata->y_size - 1) >> 8);
+
+	/* Set touchscreen voltage */
+	if (data->info.version >= QT602240_VER_21 && pdata->voltage) {
+		if (pdata->voltage < QT602240_VOLTAGE_DEFAULT) {
+			voltage = (QT602240_VOLTAGE_DEFAULT - pdata->voltage) /
+				QT602240_VOLTAGE_STEP;
+			voltage = 0xff - voltage + 1;
+		} else
+			voltage = (pdata->voltage - QT602240_VOLTAGE_DEFAULT) /
+				QT602240_VOLTAGE_STEP;
+
+		qt602240_write_object(data, QT602240_SPT_CTECONFIG,
+				QT602240_CTE_VOLTAGE, voltage);
+	}
+}
+
+static int qt602240_get_info(struct qt602240_data *data)
+{
+	struct i2c_client *client = data->client;
+	struct qt602240_info *info = &data->info;
+	int error;
+	u8 val;
+
+	error = qt602240_read_reg(client, QT602240_FAMILY_ID, &val);
+	if (error)
+		return error;
+	info->family_id = val;
+
+	error = qt602240_read_reg(client, QT602240_VARIANT_ID, &val);
+	if (error)
+		return error;
+	info->variant_id = val;
+
+	error = qt602240_read_reg(client, QT602240_VERSION, &val);
+	if (error)
+		return error;
+	info->version = val;
+
+	error = qt602240_read_reg(client, QT602240_BUILD, &val);
+	if (error)
+		return error;
+	info->build = val;
+
+	error = qt602240_read_reg(client, QT602240_OBJECT_NUM, &val);
+	if (error)
+		return error;
+	info->object_num = val;
+
+	return 0;
+}
+
+static int qt602240_get_object_table(struct qt602240_data *data)
+{
+	int error;
+	int i;
+	u16 reg;
+	u8 reportid = 0;
+	u8 buf[QT602240_OBJECT_SIZE];
+
+	for (i = 0; i < data->info.object_num; i++) {
+		struct qt602240_object *object = data->object_table + i;
+
+		reg = QT602240_OBJECT_START + QT602240_OBJECT_SIZE * i;
+		error = qt602240_read_object_table(data->client, reg, buf);
+		if (error)
+			return error;
+
+		object->type = buf[0];
+		object->start_address = (buf[2] << 8) | buf[1];
+		object->size = buf[3];
+		object->instances = buf[4];
+		object->num_report_ids = buf[5];
+
+		if (object->num_report_ids) {
+			reportid += object->num_report_ids *
+					(object->instances + 1);
+			object->max_reportid = reportid;
+		}
+	}
+
+	return 0;
+}
+
+static int qt602240_initialize(struct qt602240_data *data)
+{
+	struct i2c_client *client = data->client;
+	struct qt602240_info *info = &data->info;
+	int error;
+	u8 val;
+
+	error = qt602240_get_info(data);
+	if (error)
+		return error;
+
+	data->object_table = kcalloc(info->object_num,
+				     sizeof(struct qt602240_data),
+				     GFP_KERNEL);
+	if (!data->object_table) {
+		dev_err(&client->dev, "Failed to allocate memory\n");
+		return -ENOMEM;
+	}
+
+	/* Get object table information */
+	error = qt602240_get_object_table(data);
+	if (error)
+		return error;
+
+	/* Check register init values */
+	error = qt602240_check_reg_init(data);
+	if (error)
+		return error;
+
+	/* Check X/Y matrix size */
+	error = qt602240_check_matrix_size(data);
+	if (error)
+		return error;
+
+	error = qt602240_make_highchg(data);
+	if (error)
+		return error;
+
+	qt602240_handle_pdata(data);
+
+	/* Backup to memory */
+	qt602240_write_object(data, QT602240_GEN_COMMAND,
+			QT602240_COMMAND_BACKUPNV,
+			QT602240_BACKUP_VALUE);
+	msleep(QT602240_BACKUP_TIME);
+
+	/* Soft reset */
+	qt602240_write_object(data, QT602240_GEN_COMMAND,
+			QT602240_COMMAND_RESET, 1);
+	msleep(QT602240_RESET_TIME);
+
+	/* Update matrix size at info struct */
+	error = qt602240_read_reg(client, QT602240_MATRIX_X_SIZE, &val);
+	if (error)
+		return error;
+	info->matrix_xsize = val;
+
+	error = qt602240_read_reg(client, QT602240_MATRIX_Y_SIZE, &val);
+	if (error)
+		return error;
+	info->matrix_ysize = val;
+
+	dev_info(&client->dev,
+			"Family ID: %d Variant ID: %d Version: %d Build: %d\n",
+			info->family_id, info->variant_id, info->version,
+			info->build);
+
+	dev_info(&client->dev,
+			"Matrix X Size: %d Matrix Y Size: %d Object Num: %d\n",
+			info->matrix_xsize, info->matrix_ysize,
+			info->object_num);
+
+	return 0;
+}
+
+static ssize_t qt602240_object_show(struct device *dev,
+				    struct device_attribute *attr, char *buf)
+{
+	struct qt602240_data *data = dev_get_drvdata(dev);
+	struct qt602240_object *object;
+	int count = 0;
+	int i, j;
+	int error;
+	u8 val;
+
+	for (i = 0; i < data->info.object_num; i++) {
+		object = data->object_table + i;
+
+		count += sprintf(buf + count,
+				"Object Table Element %d(Type %d)\n",
+				i + 1, object->type);
+
+		if (!qt602240_object_readable(object->type)) {
+			count += sprintf(buf + count, "\n");
+			continue;
+		}
+
+		for (j = 0; j < object->size + 1; j++) {
+			error = qt602240_read_object(data,
+						object->type, j, &val);
+			if (error)
+				return error;
+
+			count += sprintf(buf + count,
+					"  Byte %d: 0x%x (%d)\n", j, val, val);
+		}
+
+		count += sprintf(buf + count, "\n");
+	}
+
+	return count;
+}
+
+static int qt602240_load_fw(struct device *dev, const char *fn)
+{
+	struct qt602240_data *data = dev_get_drvdata(dev);
+	struct i2c_client *client = data->client;
+	const struct firmware *fw = NULL;
+	unsigned int frame_size;
+	unsigned int pos = 0;
+	int ret;
+
+	ret = request_firmware(&fw, fn, dev);
+	if (ret) {
+		dev_err(dev, "Unable to open firmware %s\n", fn);
+		return ret;
+	}
+
+	/* Change to the bootloader mode */
+	qt602240_write_object(data, QT602240_GEN_COMMAND,
+			QT602240_COMMAND_RESET, QT602240_BOOT_VALUE);
+	msleep(QT602240_RESET_TIME);
+
+	/* Change to slave address of bootloader */
+	if (client->addr == QT602240_APP_LOW)
+		client->addr = QT602240_BOOT_LOW;
+	else
+		client->addr = QT602240_BOOT_HIGH;
+
+	ret = qt602240_check_bootloader(client, QT602240_WAITING_BOOTLOAD_CMD);
+	if (ret)
+		goto out;
+
+	/* Unlock bootloader */
+	qt602240_unlock_bootloader(client);
+
+	while (pos < fw->size) {
+		ret = qt602240_check_bootloader(client,
+						QT602240_WAITING_FRAME_DATA);
+		if (ret)
+			goto out;
+
+		frame_size = ((*(fw->data + pos) << 8) | *(fw->data + pos + 1));
+
+		/* We should add 2 at frame size as the the firmware data is not
+		 * included the CRC bytes.
+		 */
+		frame_size += 2;
+
+		/* Write one frame to device */
+		qt602240_fw_write(client, fw->data + pos, frame_size);
+
+		ret = qt602240_check_bootloader(client,
+						QT602240_FRAME_CRC_PASS);
+		if (ret)
+			goto out;
+
+		pos += frame_size;
+
+		dev_dbg(dev, "Updated %d bytes / %zd bytes\n", pos, fw->size);
+	}
+
+out:
+	release_firmware(fw);
+
+	/* Change to slave address of application */
+	if (client->addr == QT602240_BOOT_LOW)
+		client->addr = QT602240_APP_LOW;
+	else
+		client->addr = QT602240_APP_HIGH;
+
+	return ret;
+}
+
+static ssize_t qt602240_update_fw_store(struct device *dev,
+					struct device_attribute *attr,
+					const char *buf, size_t count)
+{
+	struct qt602240_data *data = dev_get_drvdata(dev);
+	unsigned int version;
+	int error;
+
+	if (sscanf(buf, "%u", &version) != 1) {
+		dev_err(dev, "Invalid values\n");
+		return -EINVAL;
+	}
+
+	if (data->info.version < QT602240_VER_21 || version < QT602240_VER_21) {
+		dev_err(dev, "FW update supported starting with version 21\n");
+		return -EINVAL;
+	}
+
+	disable_irq(data->irq);
+
+	error = qt602240_load_fw(dev, QT602240_FW_NAME);
+	if (error) {
+		dev_err(dev, "The firmware update failed(%d)\n", error);
+		count = error;
+	} else {
+		dev_dbg(dev, "The firmware update succeeded\n");
+
+		/* Wait for reset */
+		msleep(QT602240_FWRESET_TIME);
+
+		kfree(data->object_table);
+		data->object_table = NULL;
+
+		qt602240_initialize(data);
+	}
+
+	enable_irq(data->irq);
+
+	return count;
+}
+
+static DEVICE_ATTR(object, 0444, qt602240_object_show, NULL);
+static DEVICE_ATTR(update_fw, 0664, NULL, qt602240_update_fw_store);
+
+static struct attribute *qt602240_attrs[] = {
+	&dev_attr_object.attr,
+	&dev_attr_update_fw.attr,
+	NULL
+};
+
+static const struct attribute_group qt602240_attr_group = {
+	.attrs = qt602240_attrs,
+};
+
+static void qt602240_start(struct qt602240_data *data)
+{
+	/* Touch enable */
+	qt602240_write_object(data,
+			QT602240_TOUCH_MULTI, QT602240_TOUCH_CTRL, 0x83);
+}
+
+static void qt602240_stop(struct qt602240_data *data)
+{
+	/* Touch disable */
+	qt602240_write_object(data,
+			QT602240_TOUCH_MULTI, QT602240_TOUCH_CTRL, 0);
+}
+
+static int qt602240_input_open(struct input_dev *dev)
+{
+	struct qt602240_data *data = input_get_drvdata(dev);
+
+	qt602240_start(data);
+
+	return 0;
+}
+
+static void qt602240_input_close(struct input_dev *dev)
+{
+	struct qt602240_data *data = input_get_drvdata(dev);
+
+	qt602240_stop(data);
+}
+
+static int __devinit qt602240_probe(struct i2c_client *client,
+		const struct i2c_device_id *id)
+{
+	struct qt602240_data *data;
+	struct input_dev *input_dev;
+	int error;
+
+	if (!client->dev.platform_data)
+		return -EINVAL;
+
+	data = kzalloc(sizeof(struct qt602240_data), GFP_KERNEL);
+	input_dev = input_allocate_device();
+	if (!data || !input_dev) {
+		dev_err(&client->dev, "Failed to allocate memory\n");
+		error = -ENOMEM;
+		goto err_free_mem;
+	}
+
+	input_dev->name = "AT42QT602240/ATMXT224 Touchscreen";
+	input_dev->id.bustype = BUS_I2C;
+	input_dev->dev.parent = &client->dev;
+	input_dev->open = qt602240_input_open;
+	input_dev->close = qt602240_input_close;
+
+	__set_bit(EV_ABS, input_dev->evbit);
+	__set_bit(EV_KEY, input_dev->evbit);
+	__set_bit(BTN_TOUCH, input_dev->keybit);
+
+	/* For single touch */
+	input_set_abs_params(input_dev, ABS_X,
+			     0, QT602240_MAX_XC, 0, 0);
+	input_set_abs_params(input_dev, ABS_Y,
+			     0, QT602240_MAX_YC, 0, 0);
+
+	/* For multi touch */
+	input_set_abs_params(input_dev, ABS_MT_TOUCH_MAJOR,
+			     0, QT602240_MAX_AREA, 0, 0);
+	input_set_abs_params(input_dev, ABS_MT_POSITION_X,
+			     0, QT602240_MAX_XC, 0, 0);
+	input_set_abs_params(input_dev, ABS_MT_POSITION_Y,
+			     0, QT602240_MAX_YC, 0, 0);
+
+	input_set_drvdata(input_dev, data);
+
+	data->client = client;
+	data->input_dev = input_dev;
+	data->pdata = client->dev.platform_data;
+	data->irq = client->irq;
+
+	i2c_set_clientdata(client, data);
+
+	error = qt602240_initialize(data);
+	if (error)
+		goto err_free_object;
+
+	error = request_threaded_irq(client->irq, NULL, qt602240_interrupt,
+			IRQF_TRIGGER_FALLING, client->dev.driver->name, data);
+	if (error) {
+		dev_err(&client->dev, "Failed to register interrupt\n");
+		goto err_free_object;
+	}
+
+	error = input_register_device(input_dev);
+	if (error)
+		goto err_free_irq;
+
+	error = sysfs_create_group(&client->dev.kobj, &qt602240_attr_group);
+	if (error)
+		goto err_unregister_device;
+
+	return 0;
+
+err_unregister_device:
+	input_unregister_device(input_dev);
+	input_dev = NULL;
+err_free_irq:
+	free_irq(client->irq, data);
+err_free_object:
+	kfree(data->object_table);
+err_free_mem:
+	input_free_device(input_dev);
+	kfree(data);
+	return error;
+}
+
+static int __devexit qt602240_remove(struct i2c_client *client)
+{
+	struct qt602240_data *data = i2c_get_clientdata(client);
+
+	sysfs_remove_group(&client->dev.kobj, &qt602240_attr_group);
+	free_irq(data->irq, data);
+	input_unregister_device(data->input_dev);
+	kfree(data->object_table);
+	kfree(data);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM
+static int qt602240_suspend(struct i2c_client *client, pm_message_t mesg)
+{
+	struct qt602240_data *data = i2c_get_clientdata(client);
+	struct input_dev *input_dev = data->input_dev;
+
+	mutex_lock(&input_dev->mutex);
+
+	if (input_dev->users)
+		qt602240_stop(data);
+
+	mutex_unlock(&input_dev->mutex);
+
+	return 0;
+}
+
+static int qt602240_resume(struct i2c_client *client)
+{
+	struct qt602240_data *data = i2c_get_clientdata(client);
+	struct input_dev *input_dev = data->input_dev;
+
+	/* Soft reset */
+	qt602240_write_object(data, QT602240_GEN_COMMAND,
+			QT602240_COMMAND_RESET, 1);
+
+	msleep(QT602240_RESET_TIME);
+
+	mutex_lock(&input_dev->mutex);
+
+	if (input_dev->users)
+		qt602240_start(data);
+
+	mutex_unlock(&input_dev->mutex);
+
+	return 0;
+}
+#else
+#define qt602240_suspend	NULL
+#define qt602240_resume		NULL
+#endif
+
+static const struct i2c_device_id qt602240_id[] = {
+	{ "qt602240_ts", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, qt602240_id);
+
+static struct i2c_driver qt602240_driver = {
+	.driver = {
+		.name	= "qt602240_ts",
+		.owner	= THIS_MODULE,
+	},
+	.probe		= qt602240_probe,
+	.remove		= __devexit_p(qt602240_remove),
+	.suspend	= qt602240_suspend,
+	.resume		= qt602240_resume,
+	.id_table	= qt602240_id,
+};
+
+static int __init qt602240_init(void)
+{
+	return i2c_add_driver(&qt602240_driver);
+}
+
+static void __exit qt602240_exit(void)
+{
+	i2c_del_driver(&qt602240_driver);
+}
+
+module_init(qt602240_init);
+module_exit(qt602240_exit);
+
+/* Module information */
+MODULE_AUTHOR("Joonyoung Shim <jy0922.shim@samsung.com>");
+MODULE_DESCRIPTION("AT42QT602240/ATMXT224 Touchscreen driver");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/i2c/qt602240_ts.h b/include/linux/i2c/qt602240_ts.h
new file mode 100644
index 00000000000..c5033e10109
--- /dev/null
+++ b/include/linux/i2c/qt602240_ts.h
@@ -0,0 +1,38 @@
+/*
+ * AT42QT602240/ATMXT224 Touchscreen driver
+ *
+ * Copyright (C) 2010 Samsung Electronics Co.Ltd
+ * Author: Joonyoung Shim <jy0922.shim@samsung.com>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#ifndef __LINUX_QT602240_TS_H
+#define __LINUX_QT602240_TS_H
+
+/* Orient */
+#define QT602240_NORMAL			0x0
+#define QT602240_DIAGONAL		0x1
+#define QT602240_HORIZONTAL_FLIP	0x2
+#define QT602240_ROTATED_90_COUNTER	0x3
+#define QT602240_VERTICAL_FLIP		0x4
+#define QT602240_ROTATED_90		0x5
+#define QT602240_ROTATED_180		0x6
+#define QT602240_DIAGONAL_COUNTER	0x7
+
+/* The platform data for the AT42QT602240/ATMXT224 touchscreen driver */
+struct qt602240_platform_data {
+	unsigned int x_line;
+	unsigned int y_line;
+	unsigned int x_size;
+	unsigned int y_size;
+	unsigned int blen;
+	unsigned int threshold;
+	unsigned int voltage;
+	unsigned char orient;
+};
+
+#endif /* __LINUX_QT602240_TS_H */
-- 
cgit v1.2.3-70-g09d2


From cca5cf91c789f3301cc2541a79c323c53be5a8e1 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Thu, 15 Jul 2010 11:27:41 +0200
Subject: nfnetlink_log: do not expose NFULNL_COPY_DISABLED to user-space

This patch moves NFULNL_COPY_PACKET definition from
linux/netfilter/nfnetlink_log.h to net/netfilter/nfnetlink_log.h
since this copy mode is only for internal use.

I have also changed the value from 0x03 to 0xff. Thus, we avoid
a gap from user-space that may confuse users if we add new
copy modes in the future.

This change was introduced in:
http://www.spinics.net/lists/netfilter-devel/msg13535.html

Since this change is not included in any stable Linux kernel,
I think it's safe to make this change now. Anyway, this copy
mode does not make any sense from user-space, so this patch
should not break any existing setup.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter/nfnetlink_log.h | 2 +-
 include/net/netfilter/nfnetlink_log.h   | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/netfilter/nfnetlink_log.h b/include/linux/netfilter/nfnetlink_log.h
index 1d0b84aa1d4..ea9b8d38052 100644
--- a/include/linux/netfilter/nfnetlink_log.h
+++ b/include/linux/netfilter/nfnetlink_log.h
@@ -89,7 +89,7 @@ enum nfulnl_attr_config {
 #define NFULNL_COPY_NONE	0x00
 #define NFULNL_COPY_META	0x01
 #define NFULNL_COPY_PACKET	0x02
-#define NFULNL_COPY_DISABLED	0x03
+/* 0xff is reserved, don't use it for new copy modes. */
 
 #define NFULNL_CFG_F_SEQ	0x0001
 #define NFULNL_CFG_F_SEQ_GLOBAL	0x0002
diff --git a/include/net/netfilter/nfnetlink_log.h b/include/net/netfilter/nfnetlink_log.h
index b0569ff0775..e2dec42c2db 100644
--- a/include/net/netfilter/nfnetlink_log.h
+++ b/include/net/netfilter/nfnetlink_log.h
@@ -10,5 +10,7 @@ nfulnl_log_packet(u_int8_t pf,
 		  const struct nf_loginfo *li_user,
 		  const char *prefix);
 
+#define NFULNL_COPY_DISABLED    0xff
+
 #endif /* _KER_NFNETLINK_LOG_H */
 
-- 
cgit v1.2.3-70-g09d2


From edf0e1fb0d0910880881523cfaaabcec06a2c0d5 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Thu, 15 Jul 2010 17:20:46 +0200
Subject: netfilter: add CHECKSUM target

This adds a `CHECKSUM' target, which can be used in the iptables mangle
table.

You can use this target to compute and fill in the checksum in
a packet that lacks a checksum.  This is particularly useful,
if you need to work around old applications such as dhcp clients,
that do not work well with checksum offloads, but don't want to
disable checksum offload in your device.

The problem happens in the field with virtualized applications.
For reference, see Red Hat bz 605555, as well as
http://www.spinics.net/lists/kvm/msg37660.html

Typical expected use (helps old dhclient binary running in a VM):
iptables -A POSTROUTING -t mangle -p udp --dport bootpc \
	-j CHECKSUM --checksum-fill

Includes fixes by Jan Engelhardt <jengelh@medozas.de>

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter/xt_CHECKSUM.h | 18 +++++++++
 net/netfilter/Kconfig                 | 16 ++++++++
 net/netfilter/Makefile                |  1 +
 net/netfilter/xt_CHECKSUM.c           | 70 +++++++++++++++++++++++++++++++++++
 4 files changed, 105 insertions(+)
 create mode 100644 include/linux/netfilter/xt_CHECKSUM.h
 create mode 100644 net/netfilter/xt_CHECKSUM.c

(limited to 'include')

diff --git a/include/linux/netfilter/xt_CHECKSUM.h b/include/linux/netfilter/xt_CHECKSUM.h
new file mode 100644
index 00000000000..3b4fb77acef
--- /dev/null
+++ b/include/linux/netfilter/xt_CHECKSUM.h
@@ -0,0 +1,18 @@
+/* Header file for iptables ipt_CHECKSUM target
+ *
+ * (C) 2002 by Harald Welte <laforge@gnumonks.org>
+ * (C) 2010 Red Hat Inc
+ * Author: Michael S. Tsirkin <mst@redhat.com>
+ *
+ * This software is distributed under GNU GPL v2, 1991
+*/
+#ifndef _IPT_CHECKSUM_TARGET_H
+#define _IPT_CHECKSUM_TARGET_H
+
+#define XT_CHECKSUM_OP_FILL	0x01	/* fill in checksum in IP header */
+
+struct xt_CHECKSUM_info {
+	__u8 operation;	/* bitset of operations */
+};
+
+#endif /* _IPT_CHECKSUM_TARGET_H */
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index aa2f106347e..5fb8efa84df 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -326,6 +326,22 @@ config NETFILTER_XT_CONNMARK
 
 comment "Xtables targets"
 
+config NETFILTER_XT_TARGET_CHECKSUM
+	tristate "CHECKSUM target support"
+	depends on IP_NF_MANGLE || IP6_NF_MANGLE
+	depends on NETFILTER_ADVANCED
+	---help---
+	  This option adds a `CHECKSUM' target, which can be used in the iptables mangle
+	  table.
+
+	  You can use this target to compute and fill in the checksum in
+	  a packet that lacks a checksum.  This is particularly useful,
+	  if you need to work around old applications such as dhcp clients,
+	  that do not work well with checksum offloads, but don't want to disable
+	  checksum offload in your device.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
 config NETFILTER_XT_TARGET_CLASSIFY
 	tristate '"CLASSIFY" target support'
 	depends on NETFILTER_ADVANCED
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index e28420aac5e..36ef8e63be1 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -45,6 +45,7 @@ obj-$(CONFIG_NETFILTER_XT_MARK) += xt_mark.o
 obj-$(CONFIG_NETFILTER_XT_CONNMARK) += xt_connmark.o
 
 # targets
+obj-$(CONFIG_NETFILTER_XT_TARGET_CHECKSUM) += xt_CHECKSUM.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_CT) += xt_CT.o
diff --git a/net/netfilter/xt_CHECKSUM.c b/net/netfilter/xt_CHECKSUM.c
new file mode 100644
index 00000000000..0f642ef8cd2
--- /dev/null
+++ b/net/netfilter/xt_CHECKSUM.c
@@ -0,0 +1,70 @@
+/* iptables module for the packet checksum mangling
+ *
+ * (C) 2002 by Harald Welte <laforge@netfilter.org>
+ * (C) 2010 Red Hat, Inc.
+ *
+ * Author: Michael S. Tsirkin <mst@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/module.h>
+#include <linux/skbuff.h>
+
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_CHECKSUM.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Michael S. Tsirkin <mst@redhat.com>");
+MODULE_DESCRIPTION("Xtables: checksum modification");
+MODULE_ALIAS("ipt_CHECKSUM");
+MODULE_ALIAS("ip6t_CHECKSUM");
+
+static unsigned int
+checksum_tg(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	if (skb->ip_summed == CHECKSUM_PARTIAL)
+		skb_checksum_help(skb);
+
+	return XT_CONTINUE;
+}
+
+static int checksum_tg_check(const struct xt_tgchk_param *par)
+{
+	const struct xt_CHECKSUM_info *einfo = par->targinfo;
+
+	if (einfo->operation & ~XT_CHECKSUM_OP_FILL) {
+		pr_info("unsupported CHECKSUM operation %x\n", einfo->operation);
+		return -EINVAL;
+	}
+	if (!einfo->operation) {
+		pr_info("no CHECKSUM operation enabled\n");
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static struct xt_target checksum_tg_reg __read_mostly = {
+	.name		= "CHECKSUM",
+	.family		= NFPROTO_UNSPEC,
+	.target		= checksum_tg,
+	.targetsize	= sizeof(struct xt_CHECKSUM_info),
+	.table		= "mangle",
+	.checkentry	= checksum_tg_check,
+	.me		= THIS_MODULE,
+};
+
+static int __init checksum_tg_init(void)
+{
+	return xt_register_target(&checksum_tg_reg);
+}
+
+static void __exit checksum_tg_exit(void)
+{
+	xt_unregister_target(&checksum_tg_reg);
+}
+
+module_init(checksum_tg_init);
+module_exit(checksum_tg_exit);
-- 
cgit v1.2.3-70-g09d2


From eb878b3bc0349344dbf70c51bf01fc734d5cf2d3 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Thu, 15 Jul 2010 23:52:45 +0200
Subject: tracing: Remove letfover markers section

Markers have been removed, but we forgot to remove their
section.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
---
 include/asm-generic/vmlinux.lds.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 48c5299cbf2..415b1a9118e 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -150,10 +150,6 @@
 	CPU_KEEP(exit.data)						\
 	MEM_KEEP(init.data)						\
 	MEM_KEEP(exit.data)						\
-	. = ALIGN(8);							\
-	VMLINUX_SYMBOL(__start___markers) = .;				\
-	*(__markers)							\
-	VMLINUX_SYMBOL(__stop___markers) = .;				\
 	. = ALIGN(32);							\
 	VMLINUX_SYMBOL(__start___tracepoints) = .;			\
 	*(__tracepoints)						\
-- 
cgit v1.2.3-70-g09d2


From f86586fa48a94c7a9c7f36650a2e0e4a947881c9 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 15 Jul 2010 21:41:00 -0700
Subject: tcp: sizeof struct tcp_skb_cb is 44

Correct comment stating sizeof(struct tcp_skb_cb) is 36 or 40, since its
44 bytes, since commit 951dbc8ac714b04 ([IPV6]: Move nextheader offset
to the IP6CB).

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 468b01f01c1..df6a2eb2019 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -570,11 +570,10 @@ extern u32 __tcp_select_window(struct sock *sk);
 #define TCPHDR_CWR 0x80
 
 /* This is what the send packet queuing engine uses to pass
- * TCP per-packet control information to the transmission
- * code.  We also store the host-order sequence numbers in
- * here too.  This is 36 bytes on 32-bit architectures,
- * 40 bytes on 64-bit machines, if this grows please adjust
- * skbuff.h:skbuff->cb[xxx] size appropriately.
+ * TCP per-packet control information to the transmission code.
+ * We also store the host-order sequence numbers in here too.
+ * This is 44 bytes if IPV6 is enabled.
+ * If this grows please adjust skbuff.h:skbuff->cb[xxx] size appropriately.
  */
 struct tcp_skb_cb {
 	union {
-- 
cgit v1.2.3-70-g09d2


From 40d007e7df1dab17bf1ecf91e718218354d963d7 Mon Sep 17 00:00:00 2001
From: Henrik Rydberg <rydberg@euromail.se>
Date: Thu, 15 Jul 2010 23:10:10 -0700
Subject: Input: introduce MT event slots

With the rapidly increasing number of intelligent multi-contact and
multi-user devices, the need to send digested, filtered information
from a set of different sources within the same device is imminent.
This patch adds the concept of slots to the MT protocol. The slots
enumerate a set of identified sources, such that all MT events
can be passed independently and selectively per identified source.

The protocol works like this: Instead of sending a SYN_MT_REPORT
event immediately after the contact data, one sends an ABS_MT_SLOT
event immediately before the contact data. The input core will only
emit events for slots with modified MT events. It is assumed that
the same slot is used for the duration of an initiated contact.

Acked-by: Ping Cheng <pingc@wacom.com>
Acked-by: Chase Douglas <chase.douglas@canonical.com>
Acked-by: Rafi Rubin <rafi@seas.upenn.edu>
Signed-off-by: Henrik Rydberg <rydberg@euromail.se>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/evdev.c |   4 ++
 drivers/input/input.c | 135 ++++++++++++++++++++++++++++++++++----------------
 include/linux/input.h |  33 ++++++++++++
 3 files changed, 129 insertions(+), 43 deletions(-)

(limited to 'include')

diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c
index cd323254ca6..fc5afbd7862 100644
--- a/drivers/input/evdev.c
+++ b/drivers/input/evdev.c
@@ -686,6 +686,10 @@ static long evdev_do_ioctl(struct file *file, unsigned int cmd,
 								  sizeof(struct input_absinfo))))
 					return -EFAULT;
 
+				/* We can't change number of reserved MT slots */
+				if (t == ABS_MT_SLOT)
+					return -EINVAL;
+
 				/*
 				 * Take event lock to ensure that we are not
 				 * changing device parameters in the middle
diff --git a/drivers/input/input.c b/drivers/input/input.c
index a3d5485154e..54109c33e36 100644
--- a/drivers/input/input.c
+++ b/drivers/input/input.c
@@ -33,25 +33,6 @@ MODULE_LICENSE("GPL");
 
 #define INPUT_DEVICES	256
 
-/*
- * EV_ABS events which should not be cached are listed here.
- */
-static unsigned int input_abs_bypass_init_data[] __initdata = {
-	ABS_MT_TOUCH_MAJOR,
-	ABS_MT_TOUCH_MINOR,
-	ABS_MT_WIDTH_MAJOR,
-	ABS_MT_WIDTH_MINOR,
-	ABS_MT_ORIENTATION,
-	ABS_MT_POSITION_X,
-	ABS_MT_POSITION_Y,
-	ABS_MT_TOOL_TYPE,
-	ABS_MT_BLOB_ID,
-	ABS_MT_TRACKING_ID,
-	ABS_MT_PRESSURE,
-	0
-};
-static unsigned long input_abs_bypass[BITS_TO_LONGS(ABS_CNT)];
-
 static LIST_HEAD(input_dev_list);
 static LIST_HEAD(input_handler_list);
 
@@ -181,6 +162,56 @@ static void input_stop_autorepeat(struct input_dev *dev)
 #define INPUT_PASS_TO_DEVICE	2
 #define INPUT_PASS_TO_ALL	(INPUT_PASS_TO_HANDLERS | INPUT_PASS_TO_DEVICE)
 
+static int input_handle_abs_event(struct input_dev *dev,
+				  unsigned int code, int *pval)
+{
+	bool is_mt_event;
+	int *pold;
+
+	if (code == ABS_MT_SLOT) {
+		/*
+		 * "Stage" the event; we'll flush it later, when we
+		 * get actiual touch data.
+		 */
+		if (*pval >= 0 && *pval < dev->mtsize)
+			dev->slot = *pval;
+
+		return INPUT_IGNORE_EVENT;
+	}
+
+	is_mt_event = code >= ABS_MT_FIRST && code <= ABS_MT_LAST;
+
+	if (!is_mt_event) {
+		pold = &dev->abs[code];
+	} else if (dev->mt) {
+		struct input_mt_slot *mtslot = &dev->mt[dev->slot];
+		pold = &mtslot->abs[code - ABS_MT_FIRST];
+	} else {
+		/*
+		 * Bypass filtering for multitouch events when
+		 * not employing slots.
+		 */
+		pold = NULL;
+	}
+
+	if (pold) {
+		*pval = input_defuzz_abs_event(*pval, *pold,
+						dev->absfuzz[code]);
+		if (*pold == *pval)
+			return INPUT_IGNORE_EVENT;
+
+		*pold = *pval;
+	}
+
+	/* Flush pending "slot" event */
+	if (is_mt_event && dev->slot != dev->abs[ABS_MT_SLOT]) {
+		dev->abs[ABS_MT_SLOT] = dev->slot;
+		input_pass_event(dev, EV_ABS, ABS_MT_SLOT, dev->slot);
+	}
+
+	return INPUT_PASS_TO_HANDLERS;
+}
+
 static void input_handle_event(struct input_dev *dev,
 			       unsigned int type, unsigned int code, int value)
 {
@@ -233,21 +264,9 @@ static void input_handle_event(struct input_dev *dev,
 		break;
 
 	case EV_ABS:
-		if (is_event_supported(code, dev->absbit, ABS_MAX)) {
-
-			if (test_bit(code, input_abs_bypass)) {
-				disposition = INPUT_PASS_TO_HANDLERS;
-				break;
-			}
+		if (is_event_supported(code, dev->absbit, ABS_MAX))
+			disposition = input_handle_abs_event(dev, code, &value);
 
-			value = input_defuzz_abs_event(value,
-					dev->abs[code], dev->absfuzz[code]);
-
-			if (dev->abs[code] != value) {
-				dev->abs[code] = value;
-				disposition = INPUT_PASS_TO_HANDLERS;
-			}
-		}
 		break;
 
 	case EV_REL:
@@ -1288,6 +1307,7 @@ static void input_dev_release(struct device *device)
 	struct input_dev *dev = to_input_dev(device);
 
 	input_ff_destroy(dev);
+	input_mt_destroy_slots(dev);
 	kfree(dev);
 
 	module_put(THIS_MODULE);
@@ -1536,6 +1556,45 @@ void input_free_device(struct input_dev *dev)
 }
 EXPORT_SYMBOL(input_free_device);
 
+/**
+ * input_mt_create_slots() - create MT input slots
+ * @dev: input device supporting MT events and finger tracking
+ * @num_slots: number of slots used by the device
+ *
+ * This function allocates all necessary memory for MT slot handling
+ * in the input device, and adds ABS_MT_SLOT to the device capabilities.
+ */
+int input_mt_create_slots(struct input_dev *dev, unsigned int num_slots)
+{
+	if (!num_slots)
+		return 0;
+
+	dev->mt = kcalloc(num_slots, sizeof(struct input_mt_slot), GFP_KERNEL);
+	if (!dev->mt)
+		return -ENOMEM;
+
+	dev->mtsize = num_slots;
+	input_set_abs_params(dev, ABS_MT_SLOT, 0, num_slots - 1, 0, 0);
+
+	return 0;
+}
+EXPORT_SYMBOL(input_mt_create_slots);
+
+/**
+ * input_mt_destroy_slots() - frees the MT slots of the input device
+ * @dev: input device with allocated MT slots
+ *
+ * This function is only needed in error path as the input core will
+ * automatically free the MT slots when the device is destroyed.
+ */
+void input_mt_destroy_slots(struct input_dev *dev)
+{
+	kfree(dev->mt);
+	dev->mt = NULL;
+	dev->mtsize = 0;
+}
+EXPORT_SYMBOL(input_mt_destroy_slots);
+
 /**
  * input_set_capability - mark device as capable of a certain event
  * @dev: device that is capable of emitting or accepting event
@@ -1945,20 +2004,10 @@ static const struct file_operations input_fops = {
 	.open = input_open_file,
 };
 
-static void __init input_init_abs_bypass(void)
-{
-	const unsigned int *p;
-
-	for (p = input_abs_bypass_init_data; *p; p++)
-		input_abs_bypass[BIT_WORD(*p)] |= BIT_MASK(*p);
-}
-
 static int __init input_init(void)
 {
 	int err;
 
-	input_init_abs_bypass();
-
 	err = class_register(&input_class);
 	if (err) {
 		printk(KERN_ERR "input: unable to register input_dev class\n");
diff --git a/include/linux/input.h b/include/linux/input.h
index cc524c8b670..a14de64ed16 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -691,9 +691,12 @@ struct input_absinfo {
 #define ABS_TILT_X		0x1a
 #define ABS_TILT_Y		0x1b
 #define ABS_TOOL_WIDTH		0x1c
+
 #define ABS_VOLUME		0x20
+
 #define ABS_MISC		0x28
 
+#define ABS_MT_SLOT		0x2f	/* MT slot being modified */
 #define ABS_MT_TOUCH_MAJOR	0x30	/* Major axis of touching ellipse */
 #define ABS_MT_TOUCH_MINOR	0x31	/* Minor axis (omit if circular) */
 #define ABS_MT_WIDTH_MAJOR	0x32	/* Major axis of approaching ellipse */
@@ -706,6 +709,12 @@ struct input_absinfo {
 #define ABS_MT_TRACKING_ID	0x39	/* Unique ID of initiated contact */
 #define ABS_MT_PRESSURE		0x3a	/* Pressure on contact area */
 
+#ifdef __KERNEL__
+/* Implementation details, userspace should not care about these */
+#define ABS_MT_FIRST		ABS_MT_TOUCH_MAJOR
+#define ABS_MT_LAST		ABS_MT_PRESSURE
+#endif
+
 #define ABS_MAX			0x3f
 #define ABS_CNT			(ABS_MAX+1)
 
@@ -1047,6 +1056,14 @@ struct ff_effect {
 #include <linux/timer.h>
 #include <linux/mod_devicetable.h>
 
+/**
+ * struct input_mt_slot - represents the state of an input MT slot
+ * @abs: holds current values of ABS_MT axes for this slot
+ */
+struct input_mt_slot {
+	int abs[ABS_MT_LAST - ABS_MT_FIRST + 1];
+};
+
 /**
  * struct input_dev - represents an input device
  * @name: name of the device
@@ -1085,6 +1102,10 @@ struct ff_effect {
  * @sync: set to 1 when there were no new events since last EV_SYNC
  * @abs: current values for reports from absolute axes
  * @rep: current values for autorepeat parameters (delay, rate)
+ * @mt: pointer to array of struct input_mt_slot holding current values
+ *	of tracked contacts
+ * @mtsize: number of MT slots the device uses
+ * @slot: MT slot currently being transmitted
  * @key: reflects current state of device's keys/buttons
  * @led: reflects current state of device's LEDs
  * @snd: reflects current state of sound effects
@@ -1164,6 +1185,10 @@ struct input_dev {
 	int abs[ABS_CNT];
 	int rep[REP_MAX + 1];
 
+	struct input_mt_slot *mt;
+	int mtsize;
+	int slot;
+
 	unsigned long key[BITS_TO_LONGS(KEY_CNT)];
 	unsigned long led[BITS_TO_LONGS(LED_CNT)];
 	unsigned long snd[BITS_TO_LONGS(SND_CNT)];
@@ -1412,6 +1437,11 @@ static inline void input_mt_sync(struct input_dev *dev)
 	input_event(dev, EV_SYN, SYN_MT_REPORT, 0);
 }
 
+static inline void input_mt_slot(struct input_dev *dev, int slot)
+{
+	input_event(dev, EV_ABS, ABS_MT_SLOT, slot);
+}
+
 void input_set_capability(struct input_dev *dev, unsigned int type, unsigned int code);
 
 /**
@@ -1506,5 +1536,8 @@ int input_ff_erase(struct input_dev *dev, int effect_id, struct file *file);
 int input_ff_create_memless(struct input_dev *dev, void *data,
 		int (*play_effect)(struct input_dev *, void *, struct ff_effect *));
 
+int input_mt_create_slots(struct input_dev *dev, unsigned int num_slots);
+void input_mt_destroy_slots(struct input_dev *dev);
+
 #endif
 #endif
-- 
cgit v1.2.3-70-g09d2


From 20da92de8ec3c1d4ba7e5aca322d38b6ce634932 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Thu, 15 Jul 2010 23:27:36 -0700
Subject: Input: change input handlers to use bool when possible

Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/evdev.c    | 6 +++---
 drivers/input/input.c    | 6 +++---
 drivers/input/joydev.c   | 7 +++----
 drivers/input/mousedev.c | 6 +++---
 include/linux/input.h    | 6 +++---
 5 files changed, 15 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c
index fc5afbd7862..70c0eb52ca9 100644
--- a/drivers/input/evdev.c
+++ b/drivers/input/evdev.c
@@ -24,7 +24,6 @@
 #include "input-compat.h"
 
 struct evdev {
-	int exist;
 	int open;
 	int minor;
 	struct input_handle handle;
@@ -34,6 +33,7 @@ struct evdev {
 	spinlock_t client_lock; /* protects client_list */
 	struct mutex mutex;
 	struct device dev;
+	bool exist;
 };
 
 struct evdev_client {
@@ -793,7 +793,7 @@ static void evdev_remove_chrdev(struct evdev *evdev)
 static void evdev_mark_dead(struct evdev *evdev)
 {
 	mutex_lock(&evdev->mutex);
-	evdev->exist = 0;
+	evdev->exist = false;
 	mutex_unlock(&evdev->mutex);
 }
 
@@ -842,7 +842,7 @@ static int evdev_connect(struct input_handler *handler, struct input_dev *dev,
 	init_waitqueue_head(&evdev->wait);
 
 	dev_set_name(&evdev->dev, "event%d", minor);
-	evdev->exist = 1;
+	evdev->exist = true;
 	evdev->minor = minor;
 
 	evdev->handle.dev = input_get_device(dev);
diff --git a/drivers/input/input.c b/drivers/input/input.c
index 54109c33e36..e1243b4b32a 100644
--- a/drivers/input/input.c
+++ b/drivers/input/input.c
@@ -227,12 +227,12 @@ static void input_handle_event(struct input_dev *dev,
 
 		case SYN_REPORT:
 			if (!dev->sync) {
-				dev->sync = 1;
+				dev->sync = true;
 				disposition = INPUT_PASS_TO_HANDLERS;
 			}
 			break;
 		case SYN_MT_REPORT:
-			dev->sync = 0;
+			dev->sync = false;
 			disposition = INPUT_PASS_TO_HANDLERS;
 			break;
 		}
@@ -317,7 +317,7 @@ static void input_handle_event(struct input_dev *dev,
 	}
 
 	if (disposition != INPUT_IGNORE_EVENT && type != EV_SYN)
-		dev->sync = 0;
+		dev->sync = false;
 
 	if ((disposition & INPUT_PASS_TO_DEVICE) && dev->event)
 		dev->event(dev, type, code, value);
diff --git a/drivers/input/joydev.c b/drivers/input/joydev.c
index 34157bb97ed..63834585c28 100644
--- a/drivers/input/joydev.c
+++ b/drivers/input/joydev.c
@@ -37,7 +37,6 @@ MODULE_LICENSE("GPL");
 #define JOYDEV_BUFFER_SIZE	64
 
 struct joydev {
-	int exist;
 	int open;
 	int minor;
 	struct input_handle handle;
@@ -46,6 +45,7 @@ struct joydev {
 	spinlock_t client_lock; /* protects client_list */
 	struct mutex mutex;
 	struct device dev;
+	bool exist;
 
 	struct js_corr corr[ABS_CNT];
 	struct JS_DATA_SAVE_TYPE glue;
@@ -760,7 +760,7 @@ static void joydev_remove_chrdev(struct joydev *joydev)
 static void joydev_mark_dead(struct joydev *joydev)
 {
 	mutex_lock(&joydev->mutex);
-	joydev->exist = 0;
+	joydev->exist = false;
 	mutex_unlock(&joydev->mutex);
 }
 
@@ -817,10 +817,9 @@ static int joydev_connect(struct input_handler *handler, struct input_dev *dev,
 	init_waitqueue_head(&joydev->wait);
 
 	dev_set_name(&joydev->dev, "js%d", minor);
-	joydev->exist = 1;
+	joydev->exist = true;
 	joydev->minor = minor;
 
-	joydev->exist = 1;
 	joydev->handle.dev = input_get_device(dev);
 	joydev->handle.name = dev_name(&joydev->dev);
 	joydev->handle.handler = handler;
diff --git a/drivers/input/mousedev.c b/drivers/input/mousedev.c
index f34b22bce4f..d7a7a2fce74 100644
--- a/drivers/input/mousedev.c
+++ b/drivers/input/mousedev.c
@@ -57,7 +57,6 @@ struct mousedev_hw_data {
 };
 
 struct mousedev {
-	int exist;
 	int open;
 	int minor;
 	struct input_handle handle;
@@ -66,6 +65,7 @@ struct mousedev {
 	spinlock_t client_lock; /* protects client_list */
 	struct mutex mutex;
 	struct device dev;
+	bool exist;
 
 	struct list_head mixdev_node;
 	int mixdev_open;
@@ -802,7 +802,7 @@ static void mousedev_remove_chrdev(struct mousedev *mousedev)
 static void mousedev_mark_dead(struct mousedev *mousedev)
 {
 	mutex_lock(&mousedev->mutex);
-	mousedev->exist = 0;
+	mousedev->exist = false;
 	mutex_unlock(&mousedev->mutex);
 }
 
@@ -862,7 +862,7 @@ static struct mousedev *mousedev_create(struct input_dev *dev,
 		dev_set_name(&mousedev->dev, "mouse%d", minor);
 
 	mousedev->minor = minor;
-	mousedev->exist = 1;
+	mousedev->exist = true;
 	mousedev->handle.dev = input_get_device(dev);
 	mousedev->handle.name = dev_name(&mousedev->dev);
 	mousedev->handle.handler = handler;
diff --git a/include/linux/input.h b/include/linux/input.h
index a14de64ed16..339d043ccb5 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -1099,7 +1099,6 @@ struct input_mt_slot {
  * @repeat_key: stores key code of the last key pressed; used to implement
  *	software autorepeat
  * @timer: timer for software autorepeat
- * @sync: set to 1 when there were no new events since last EV_SYNC
  * @abs: current values for reports from absolute axes
  * @rep: current values for autorepeat parameters (delay, rate)
  * @mt: pointer to array of struct input_mt_slot holding current values
@@ -1144,6 +1143,7 @@ struct input_mt_slot {
  *	last user closes the device
  * @going_away: marks devices that are in a middle of unregistering and
  *	causes input_open_device*() fail with -ENODEV.
+ * @sync: set to %true when there were no new events since last EV_SYN
  * @dev: driver model's view of this device
  * @h_list: list of input handles associated with the device. When
  *	accessing the list dev->mutex must be held
@@ -1180,8 +1180,6 @@ struct input_dev {
 	unsigned int repeat_key;
 	struct timer_list timer;
 
-	int sync;
-
 	int abs[ABS_CNT];
 	int rep[REP_MAX + 1];
 
@@ -1213,6 +1211,8 @@ struct input_dev {
 	unsigned int users;
 	bool going_away;
 
+	bool sync;
+
 	struct device dev;
 
 	struct list_head	h_list;
-- 
cgit v1.2.3-70-g09d2


From af537b0a6c650ab6ff7104d8163e96866b31c835 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <cl@linux-foundation.org>
Date: Fri, 9 Jul 2010 14:07:14 -0500
Subject: slub: Use kmem_cache flags to detect if slab is in debugging mode.

The cacheline with the flags is reachable from the hot paths after the
percpu allocator changes went in. So there is no need anymore to put a
flag into each slab page. Get rid of the SlubDebug flag and use
the flags in kmem_cache instead.

Acked-by: David Rientjes <rientjes@google.com>
Signed-off-by: Christoph Lameter <cl@linux-foundation.org>
Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>
---
 include/linux/page-flags.h |  2 --
 mm/slub.c                  | 33 ++++++++++++---------------------
 2 files changed, 12 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 5b59f35dcb8..6fa317801e1 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -128,7 +128,6 @@ enum pageflags {
 
 	/* SLUB */
 	PG_slub_frozen = PG_active,
-	PG_slub_debug = PG_error,
 };
 
 #ifndef __GENERATING_BOUNDS_H
@@ -215,7 +214,6 @@ PAGEFLAG(SwapBacked, swapbacked) __CLEARPAGEFLAG(SwapBacked, swapbacked)
 __PAGEFLAG(SlobFree, slob_free)
 
 __PAGEFLAG(SlubFrozen, slub_frozen)
-__PAGEFLAG(SlubDebug, slub_debug)
 
 /*
  * Private page markings that may be used by the filesystem that owns the page
diff --git a/mm/slub.c b/mm/slub.c
index b89a7c99b2f..9cf5dae7815 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -107,11 +107,17 @@
  * 			the fast path and disables lockless freelists.
  */
 
+#define SLAB_DEBUG_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
+		SLAB_TRACE | SLAB_DEBUG_FREE)
+
+static inline int kmem_cache_debug(struct kmem_cache *s)
+{
 #ifdef CONFIG_SLUB_DEBUG
-#define SLABDEBUG 1
+	return unlikely(s->flags & SLAB_DEBUG_FLAGS);
 #else
-#define SLABDEBUG 0
+	return 0;
 #endif
+}
 
 /*
  * Issues still to be resolved:
@@ -1157,9 +1163,6 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
 	inc_slabs_node(s, page_to_nid(page), page->objects);
 	page->slab = s;
 	page->flags |= 1 << PG_slab;
-	if (s->flags & (SLAB_DEBUG_FREE | SLAB_RED_ZONE | SLAB_POISON |
-			SLAB_STORE_USER | SLAB_TRACE))
-		__SetPageSlubDebug(page);
 
 	start = page_address(page);
 
@@ -1186,14 +1189,13 @@ static void __free_slab(struct kmem_cache *s, struct page *page)
 	int order = compound_order(page);
 	int pages = 1 << order;
 
-	if (unlikely(SLABDEBUG && PageSlubDebug(page))) {
+	if (kmem_cache_debug(s)) {
 		void *p;
 
 		slab_pad_check(s, page);
 		for_each_object(p, s, page_address(page),
 						page->objects)
 			check_object(s, page, p, 0);
-		__ClearPageSlubDebug(page);
 	}
 
 	kmemcheck_free_shadow(page, compound_order(page));
@@ -1415,8 +1417,7 @@ static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail)
 			stat(s, tail ? DEACTIVATE_TO_TAIL : DEACTIVATE_TO_HEAD);
 		} else {
 			stat(s, DEACTIVATE_FULL);
-			if (SLABDEBUG && PageSlubDebug(page) &&
-						(s->flags & SLAB_STORE_USER))
+			if (kmem_cache_debug(s) && (s->flags & SLAB_STORE_USER))
 				add_full(n, page);
 		}
 		slab_unlock(page);
@@ -1624,7 +1625,7 @@ load_freelist:
 	object = c->page->freelist;
 	if (unlikely(!object))
 		goto another_slab;
-	if (unlikely(SLABDEBUG && PageSlubDebug(c->page)))
+	if (kmem_cache_debug(s))
 		goto debug;
 
 	c->freelist = get_freepointer(s, object);
@@ -1783,7 +1784,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
 	stat(s, FREE_SLOWPATH);
 	slab_lock(page);
 
-	if (unlikely(SLABDEBUG && PageSlubDebug(page)))
+	if (kmem_cache_debug(s))
 		goto debug;
 
 checks_ok:
@@ -3398,16 +3399,6 @@ static void validate_slab_slab(struct kmem_cache *s, struct page *page,
 	} else
 		printk(KERN_INFO "SLUB %s: Skipped busy slab 0x%p\n",
 			s->name, page);
-
-	if (s->flags & DEBUG_DEFAULT_FLAGS) {
-		if (!PageSlubDebug(page))
-			printk(KERN_ERR "SLUB %s: SlubDebug not set "
-				"on slab 0x%p\n", s->name, page);
-	} else {
-		if (PageSlubDebug(page))
-			printk(KERN_ERR "SLUB %s: SlubDebug set on "
-				"slab 0x%p\n", s->name, page);
-	}
 }
 
 static int validate_slab_node(struct kmem_cache *s,
-- 
cgit v1.2.3-70-g09d2


From 22cb516696304a9b85892b18c483a27d97cfa51b Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Fri, 16 Jul 2010 14:08:20 +0200
Subject: netfilter: correct CHECKSUM header and export it

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter/Kbuild        | 1 +
 include/linux/netfilter/xt_CHECKSUM.h | 8 +++++---
 2 files changed, 6 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter/Kbuild b/include/linux/netfilter/Kbuild
index bb103f43afa..b93b64dc9fa 100644
--- a/include/linux/netfilter/Kbuild
+++ b/include/linux/netfilter/Kbuild
@@ -3,6 +3,7 @@ header-y += nf_conntrack_tuple_common.h
 header-y += nfnetlink_conntrack.h
 header-y += nfnetlink_log.h
 header-y += nfnetlink_queue.h
+header-y += xt_CHECKSUM.h
 header-y += xt_CLASSIFY.h
 header-y += xt_CONNMARK.h
 header-y += xt_CONNSECMARK.h
diff --git a/include/linux/netfilter/xt_CHECKSUM.h b/include/linux/netfilter/xt_CHECKSUM.h
index 3b4fb77acef..9a2e4661654 100644
--- a/include/linux/netfilter/xt_CHECKSUM.h
+++ b/include/linux/netfilter/xt_CHECKSUM.h
@@ -6,8 +6,10 @@
  *
  * This software is distributed under GNU GPL v2, 1991
 */
-#ifndef _IPT_CHECKSUM_TARGET_H
-#define _IPT_CHECKSUM_TARGET_H
+#ifndef _XT_CHECKSUM_TARGET_H
+#define _XT_CHECKSUM_TARGET_H
+
+#include <linux/types.h>
 
 #define XT_CHECKSUM_OP_FILL	0x01	/* fill in checksum in IP header */
 
@@ -15,4 +17,4 @@ struct xt_CHECKSUM_info {
 	__u8 operation;	/* bitset of operations */
 };
 
-#endif /* _IPT_CHECKSUM_TARGET_H */
+#endif /* _XT_CHECKSUM_TARGET_H */
-- 
cgit v1.2.3-70-g09d2


From 2f495c398edca50ac251c134f1995a2fb3c06cb7 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Mon, 21 Jun 2010 13:20:46 +1000
Subject: net/phy/marvell: Expose IDs and flags in a .h and add dns323 LEDs
 setup flag

This moves the various known Marvell PHY IDs to include/linux/marvell_phy.h
along with dev_flags definitions for use by the driver.

I then added a flag that changes the PHY init code to setup the LEDs
config to the values needed to operate a dns323 rev C1 NAS.

I moved the existing "resistance" flag to the .h as well, though I've
been unable to find whoever sets this to convert it to use that constant.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Reviewed-by: Wolfram Sang <w.sang@pengutronix.de>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Nicolas Pitre <nico@fluxnic.net>
---
 drivers/net/phy/marvell.c   | 38 ++++++++++++++++++++------------------
 include/linux/marvell_phy.h | 20 ++++++++++++++++++++
 2 files changed, 40 insertions(+), 18 deletions(-)
 create mode 100644 include/linux/marvell_phy.h

(limited to 'include')

diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
index 78b74e83ce5..5a1bd5db2a9 100644
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@ -29,6 +29,7 @@
 #include <linux/mii.h>
 #include <linux/ethtool.h>
 #include <linux/phy.h>
+#include <linux/marvell_phy.h>
 
 #include <asm/io.h>
 #include <asm/irq.h>
@@ -48,8 +49,6 @@
 #define MII_M1145_RGMII_RX_DELAY	0x0080
 #define MII_M1145_RGMII_TX_DELAY	0x0002
 
-#define M1145_DEV_FLAGS_RESISTANCE	0x00000001
-
 #define MII_M1111_PHY_LED_CONTROL	0x18
 #define MII_M1111_PHY_LED_DIRECT	0x4100
 #define MII_M1111_PHY_LED_COMBINE	0x411c
@@ -350,7 +349,10 @@ static int m88e1118_config_init(struct phy_device *phydev)
 		return err;
 
 	/* Adjust LED Control */
-	err = phy_write(phydev, 0x10, 0x021e);
+	if (phydev->dev_flags & MARVELL_PHY_M1118_DNS323_LEDS)
+		err = phy_write(phydev, 0x10, 0x1100);
+	else
+		err = phy_write(phydev, 0x10, 0x021e);
 	if (err < 0)
 		return err;
 
@@ -398,7 +400,7 @@ static int m88e1145_config_init(struct phy_device *phydev)
 		if (err < 0)
 			return err;
 
-		if (phydev->dev_flags & M1145_DEV_FLAGS_RESISTANCE) {
+		if (phydev->dev_flags & MARVELL_PHY_M1145_FLAGS_RESISTANCE) {
 			err = phy_write(phydev, 0x1d, 0x0012);
 			if (err < 0)
 				return err;
@@ -529,8 +531,8 @@ static int m88e1121_did_interrupt(struct phy_device *phydev)
 
 static struct phy_driver marvell_drivers[] = {
 	{
-		.phy_id = 0x01410c60,
-		.phy_id_mask = 0xfffffff0,
+		.phy_id = MARVELL_PHY_ID_88E1101,
+		.phy_id_mask = MARVELL_PHY_ID_MASK,
 		.name = "Marvell 88E1101",
 		.features = PHY_GBIT_FEATURES,
 		.flags = PHY_HAS_INTERRUPT,
@@ -541,8 +543,8 @@ static struct phy_driver marvell_drivers[] = {
 		.driver = { .owner = THIS_MODULE },
 	},
 	{
-		.phy_id = 0x01410c90,
-		.phy_id_mask = 0xfffffff0,
+		.phy_id = MARVELL_PHY_ID_88E1112,
+		.phy_id_mask = MARVELL_PHY_ID_MASK,
 		.name = "Marvell 88E1112",
 		.features = PHY_GBIT_FEATURES,
 		.flags = PHY_HAS_INTERRUPT,
@@ -554,8 +556,8 @@ static struct phy_driver marvell_drivers[] = {
 		.driver = { .owner = THIS_MODULE },
 	},
 	{
-		.phy_id = 0x01410cc0,
-		.phy_id_mask = 0xfffffff0,
+		.phy_id = MARVELL_PHY_ID_88E1111,
+		.phy_id_mask = MARVELL_PHY_ID_MASK,
 		.name = "Marvell 88E1111",
 		.features = PHY_GBIT_FEATURES,
 		.flags = PHY_HAS_INTERRUPT,
@@ -567,8 +569,8 @@ static struct phy_driver marvell_drivers[] = {
 		.driver = { .owner = THIS_MODULE },
 	},
 	{
-		.phy_id = 0x01410e10,
-		.phy_id_mask = 0xfffffff0,
+		.phy_id = MARVELL_PHY_ID_88E1118,
+		.phy_id_mask = MARVELL_PHY_ID_MASK,
 		.name = "Marvell 88E1118",
 		.features = PHY_GBIT_FEATURES,
 		.flags = PHY_HAS_INTERRUPT,
@@ -580,8 +582,8 @@ static struct phy_driver marvell_drivers[] = {
 		.driver = {.owner = THIS_MODULE,},
 	},
 	{
-		.phy_id = 0x01410cb0,
-		.phy_id_mask = 0xfffffff0,
+		.phy_id = MARVELL_PHY_ID_88E1121R,
+		.phy_id_mask = MARVELL_PHY_ID_MASK,
 		.name = "Marvell 88E1121R",
 		.features = PHY_GBIT_FEATURES,
 		.flags = PHY_HAS_INTERRUPT,
@@ -593,8 +595,8 @@ static struct phy_driver marvell_drivers[] = {
 		.driver = { .owner = THIS_MODULE },
 	},
 	{
-		.phy_id = 0x01410cd0,
-		.phy_id_mask = 0xfffffff0,
+		.phy_id = MARVELL_PHY_ID_88E1145,
+		.phy_id_mask = MARVELL_PHY_ID_MASK,
 		.name = "Marvell 88E1145",
 		.features = PHY_GBIT_FEATURES,
 		.flags = PHY_HAS_INTERRUPT,
@@ -606,8 +608,8 @@ static struct phy_driver marvell_drivers[] = {
 		.driver = { .owner = THIS_MODULE },
 	},
 	{
-		.phy_id = 0x01410e30,
-		.phy_id_mask = 0xfffffff0,
+		.phy_id = MARVELL_PHY_ID_88E1240,
+		.phy_id_mask = MARVELL_PHY_ID_MASK,
 		.name = "Marvell 88E1240",
 		.features = PHY_GBIT_FEATURES,
 		.flags = PHY_HAS_INTERRUPT,
diff --git a/include/linux/marvell_phy.h b/include/linux/marvell_phy.h
new file mode 100644
index 00000000000..2ed4fb8bbd5
--- /dev/null
+++ b/include/linux/marvell_phy.h
@@ -0,0 +1,20 @@
+#ifndef _MARVELL_PHY_H
+#define _MARVELL_PHY_H
+
+/* Mask used for ID comparisons */
+#define MARVELL_PHY_ID_MASK		0xfffffff0
+
+/* Known PHY IDs */
+#define MARVELL_PHY_ID_88E1101		0x01410c60
+#define MARVELL_PHY_ID_88E1112		0x01410c90
+#define MARVELL_PHY_ID_88E1111		0x01410cc0
+#define MARVELL_PHY_ID_88E1118		0x01410e10
+#define MARVELL_PHY_ID_88E1121R		0x01410cb0
+#define MARVELL_PHY_ID_88E1145		0x01410cd0
+#define MARVELL_PHY_ID_88E1240		0x01410e30
+
+/* struct phy_device dev_flags definitions */
+#define MARVELL_PHY_M1145_FLAGS_RESISTANCE	0x00000001
+#define MARVELL_PHY_M1118_DNS323_LEDS		0x00000002
+
+#endif /* _MARVELL_PHY_H */
-- 
cgit v1.2.3-70-g09d2


From 396e894d289d69bacf5acd983c97cd6e21a14c08 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 9 Jul 2010 15:12:27 +0200
Subject: sched: Revert nohz_ratelimit() for now

Norbert reported that nohz_ratelimit() causes his laptop to burn about
4W (40%) extra. For now back out the change and see if we can adjust
the power management code to make better decisions.

Reported-by: Norbert Preining <preining@logic.at>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Mike Galbraith <efault@gmx.de>
Cc: Arjan van de Ven <arjan@infradead.org>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h    |  6 ------
 kernel/sched.c           | 10 ----------
 kernel/time/tick-sched.c |  2 +-
 3 files changed, 1 insertion(+), 17 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 747fcaedddb..6e0bb86de99 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -273,17 +273,11 @@ extern cpumask_var_t nohz_cpu_mask;
 #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ)
 extern int select_nohz_load_balancer(int cpu);
 extern int get_nohz_load_balancer(void);
-extern int nohz_ratelimit(int cpu);
 #else
 static inline int select_nohz_load_balancer(int cpu)
 {
 	return 0;
 }
-
-static inline int nohz_ratelimit(int cpu)
-{
-	return 0;
-}
 #endif
 
 /*
diff --git a/kernel/sched.c b/kernel/sched.c
index f52a8801b7a..63b4a14682f 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1232,16 +1232,6 @@ void wake_up_idle_cpu(int cpu)
 		smp_send_reschedule(cpu);
 }
 
-int nohz_ratelimit(int cpu)
-{
-	struct rq *rq = cpu_rq(cpu);
-	u64 diff = rq->clock - rq->nohz_stamp;
-
-	rq->nohz_stamp = rq->clock;
-
-	return diff < (NSEC_PER_SEC / HZ) >> 1;
-}
-
 #endif /* CONFIG_NO_HZ */
 
 static u64 sched_avg_period(void)
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 813993b5fb6..f898af60817 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -325,7 +325,7 @@ void tick_nohz_stop_sched_tick(int inidle)
 	} while (read_seqretry(&xtime_lock, seq));
 
 	if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu) ||
-	    arch_needs_cpu(cpu) || nohz_ratelimit(cpu)) {
+	    arch_needs_cpu(cpu)) {
 		next_jiffies = last_jiffies + 1;
 		delta_jiffies = 1;
 	} else {
-- 
cgit v1.2.3-70-g09d2


From 3c2ef841c0e27f37923ed15dc5d744cd6ba704ae Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Fri, 16 Jul 2010 19:51:06 +0900
Subject: ASoC: fsi: Add specified ID for soc-audio

Specified ID is necessary, when some codecs are used with FSI.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Acked-by: Liam Girdwood <lrg@slimlogic.co.uk>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 include/sound/sh_fsi.h    | 3 +++
 sound/soc/sh/fsi-ak4642.c | 4 ++--
 sound/soc/sh/fsi-da7210.c | 4 ++--
 3 files changed, 7 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/sound/sh_fsi.h b/include/sound/sh_fsi.h
index 2c6237e6c04..6ac71863c70 100644
--- a/include/sound/sh_fsi.h
+++ b/include/sound/sh_fsi.h
@@ -12,6 +12,9 @@
  * published by the Free Software Foundation.
  */
 
+#define FSI_PORT_A	0
+#define FSI_PORT_B	1
+
 /* flags format
 
  * 0xABCDEEFF
diff --git a/sound/soc/sh/fsi-ak4642.c b/sound/soc/sh/fsi-ak4642.c
index 2871a200160..dad575a2262 100644
--- a/sound/soc/sh/fsi-ak4642.c
+++ b/sound/soc/sh/fsi-ak4642.c
@@ -29,7 +29,7 @@ static int fsi_ak4642_dai_init(struct snd_soc_codec *codec)
 static struct snd_soc_dai_link fsi_dai_link = {
 	.name		= "AK4642",
 	.stream_name	= "AK4642",
-	.cpu_dai	= &fsi_soc_dai[0], /* fsi */
+	.cpu_dai	= &fsi_soc_dai[FSI_PORT_A],
 	.codec_dai	= &ak4642_dai,
 	.init		= fsi_ak4642_dai_init,
 	.ops		= NULL,
@@ -53,7 +53,7 @@ static int __init fsi_ak4642_init(void)
 {
 	int ret = -ENOMEM;
 
-	fsi_snd_device = platform_device_alloc("soc-audio", -1);
+	fsi_snd_device = platform_device_alloc("soc-audio", FSI_PORT_A);
 	if (!fsi_snd_device)
 		goto out;
 
diff --git a/sound/soc/sh/fsi-da7210.c b/sound/soc/sh/fsi-da7210.c
index 4d4fd777b45..121bbb07bb0 100644
--- a/sound/soc/sh/fsi-da7210.c
+++ b/sound/soc/sh/fsi-da7210.c
@@ -24,7 +24,7 @@ static int fsi_da7210_init(struct snd_soc_codec *codec)
 static struct snd_soc_dai_link fsi_da7210_dai = {
 	.name		= "DA7210",
 	.stream_name	= "DA7210",
-	.cpu_dai	= &fsi_soc_dai[1], /* FSI B */
+	.cpu_dai	= &fsi_soc_dai[FSI_PORT_B],
 	.codec_dai	= &da7210_dai,
 	.init		= fsi_da7210_init,
 };
@@ -47,7 +47,7 @@ static int __init fsi_da7210_sound_init(void)
 {
 	int ret;
 
-	fsi_da7210_snd_device = platform_device_alloc("soc-audio", -1);
+	fsi_da7210_snd_device = platform_device_alloc("soc-audio", FSI_PORT_B);
 	if (!fsi_da7210_snd_device)
 		return -ENOMEM;
 
-- 
cgit v1.2.3-70-g09d2


From 2430d12c94ff2bafcfe4f65edf7ee5f300d2d9c6 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Sun, 13 Jun 2010 00:36:52 +0200
Subject: PM: describe kernel policy regarding wakeup defaults (v. 2)

This patch (as1381b) updates a comment describing the kernel's policy
toward enabling wakeup by default.

It also makes device_set_wakeup_capable() actually do something when
CONFIG_PM isn't enabled.  It's not clear this is necessary; however if
it isn't then device_init_wakeup() and device_can_wakeup() should also
be do-nothing routines.  Furthermore, I don't expect this change to
have any noticeable effect -- but if it does then clearly the old
behavior was wrong.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 include/linux/pm_wakeup.h | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/pm_wakeup.h b/include/linux/pm_wakeup.h
index 22d64c18056..76aca48722a 100644
--- a/include/linux/pm_wakeup.h
+++ b/include/linux/pm_wakeup.h
@@ -29,8 +29,11 @@
 
 #ifdef CONFIG_PM
 
-/* changes to device_may_wakeup take effect on the next pm state change.
- * by default, devices should wakeup if they can.
+/* Changes to device_may_wakeup take effect on the next pm state change.
+ *
+ * By default, most devices should leave wakeup disabled.  The exceptions
+ * are devices that everyone expects to be wakeup sources: keyboards,
+ * power buttons, possibly network interfaces, etc.
  */
 static inline void device_init_wakeup(struct device *dev, bool val)
 {
@@ -59,7 +62,7 @@ static inline bool device_may_wakeup(struct device *dev)
 
 #else /* !CONFIG_PM */
 
-/* For some reason the next two routines work even without CONFIG_PM */
+/* For some reason the following routines work even without CONFIG_PM */
 static inline void device_init_wakeup(struct device *dev, bool val)
 {
 	dev->power.can_wakeup = val;
@@ -67,6 +70,7 @@ static inline void device_init_wakeup(struct device *dev, bool val)
 
 static inline void device_set_wakeup_capable(struct device *dev, bool capable)
 {
+	dev->power.can_wakeup = capable;
 }
 
 static inline bool device_can_wakeup(struct device *dev)
-- 
cgit v1.2.3-70-g09d2


From b14e033e17d0ea0ba12668d0d2f371cd31586994 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Tue, 29 Jun 2010 22:49:24 +0200
Subject: PNPACPI: Add support for remote wakeup

This patch (as1354) adds remote-wakeup support to the pnpacpi driver.
The new can_wakeup method also allows other PNP protocol drivers
(pnpbios or iaspnp) to add wakeup support, but I don't know enough
about how they work to actually do it.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Reviewed-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 drivers/pnp/core.c         |  3 +++
 drivers/pnp/pnpacpi/core.c | 23 +++++++++++++++++++++++
 include/linux/pnp.h        |  1 +
 3 files changed, 27 insertions(+)

(limited to 'include')

diff --git a/drivers/pnp/core.c b/drivers/pnp/core.c
index 5dba90995d9..88b3cde5259 100644
--- a/drivers/pnp/core.c
+++ b/drivers/pnp/core.c
@@ -164,6 +164,9 @@ int __pnp_add_device(struct pnp_dev *dev)
 	list_add_tail(&dev->global_list, &pnp_global);
 	list_add_tail(&dev->protocol_list, &dev->protocol->devices);
 	spin_unlock(&pnp_lock);
+	if (dev->protocol->can_wakeup)
+		device_set_wakeup_capable(&dev->dev,
+				dev->protocol->can_wakeup(dev));
 	return device_register(&dev->dev);
 }
 
diff --git a/drivers/pnp/pnpacpi/core.c b/drivers/pnp/pnpacpi/core.c
index f7ff628b7d9..dc4e32e031e 100644
--- a/drivers/pnp/pnpacpi/core.c
+++ b/drivers/pnp/pnpacpi/core.c
@@ -122,17 +122,37 @@ static int pnpacpi_disable_resources(struct pnp_dev *dev)
 }
 
 #ifdef CONFIG_ACPI_SLEEP
+static bool pnpacpi_can_wakeup(struct pnp_dev *dev)
+{
+	struct acpi_device *acpi_dev = dev->data;
+	acpi_handle handle = acpi_dev->handle;
+
+	return acpi_bus_can_wakeup(handle);
+}
+
 static int pnpacpi_suspend(struct pnp_dev *dev, pm_message_t state)
 {
 	struct acpi_device *acpi_dev = dev->data;
 	acpi_handle handle = acpi_dev->handle;
 	int power_state;
 
+	if (device_can_wakeup(&dev->dev)) {
+		int rc = acpi_pm_device_sleep_wake(&dev->dev,
+				device_may_wakeup(&dev->dev));
+
+		if (rc)
+			return rc;
+	}
 	power_state = acpi_pm_device_sleep_state(&dev->dev, NULL);
 	if (power_state < 0)
 		power_state = (state.event == PM_EVENT_ON) ?
 				ACPI_STATE_D0 : ACPI_STATE_D3;
 
+	/* acpi_bus_set_power() often fails (keyboard port can't be
+	 * powered-down?), and in any case, our return value is ignored
+	 * by pnp_bus_suspend().  Hence we don't revert the wakeup
+	 * setting if the set_power fails.
+	 */
 	return acpi_bus_set_power(handle, power_state);
 }
 
@@ -141,6 +161,8 @@ static int pnpacpi_resume(struct pnp_dev *dev)
 	struct acpi_device *acpi_dev = dev->data;
 	acpi_handle handle = acpi_dev->handle;
 
+	if (device_may_wakeup(&dev->dev))
+		acpi_pm_device_sleep_wake(&dev->dev, false);
 	return acpi_bus_set_power(handle, ACPI_STATE_D0);
 }
 #endif
@@ -151,6 +173,7 @@ struct pnp_protocol pnpacpi_protocol = {
 	.set	 = pnpacpi_set_resources,
 	.disable = pnpacpi_disable_resources,
 #ifdef CONFIG_ACPI_SLEEP
+	.can_wakeup = pnpacpi_can_wakeup,
 	.suspend = pnpacpi_suspend,
 	.resume = pnpacpi_resume,
 #endif
diff --git a/include/linux/pnp.h b/include/linux/pnp.h
index 7c4193eb007..1bc1338b817 100644
--- a/include/linux/pnp.h
+++ b/include/linux/pnp.h
@@ -414,6 +414,7 @@ struct pnp_protocol {
 	int (*disable) (struct pnp_dev *dev);
 
 	/* protocol specific suspend/resume */
+	bool (*can_wakeup) (struct pnp_dev *dev);
 	int (*suspend) (struct pnp_dev * dev, pm_message_t state);
 	int (*resume) (struct pnp_dev * dev);
 
-- 
cgit v1.2.3-70-g09d2


From c125e96f044427f38d106fab7bc5e4a5e6a18262 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Mon, 5 Jul 2010 22:43:53 +0200
Subject: PM: Make it possible to avoid races between wakeup and system sleep

One of the arguments during the suspend blockers discussion was that
the mainline kernel didn't contain any mechanisms making it possible
to avoid races between wakeup and system suspend.

Generally, there are two problems in that area.  First, if a wakeup
event occurs exactly when /sys/power/state is being written to, it
may be delivered to user space right before the freezer kicks in, so
the user space consumer of the event may not be able to process it
before the system is suspended.  Second, if a wakeup event occurs
after user space has been frozen, it is not generally guaranteed that
the ongoing transition of the system into a sleep state will be
aborted.

To address these issues introduce a new global sysfs attribute,
/sys/power/wakeup_count, associated with a running counter of wakeup
events and three helper functions, pm_stay_awake(), pm_relax(), and
pm_wakeup_event(), that may be used by kernel subsystems to control
the behavior of this attribute and to request the PM core to abort
system transitions into a sleep state already in progress.

The /sys/power/wakeup_count file may be read from or written to by
user space.  Reads will always succeed (unless interrupted by a
signal) and return the current value of the wakeup events counter.
Writes, however, will only succeed if the written number is equal to
the current value of the wakeup events counter.  If a write is
successful, it will cause the kernel to save the current value of the
wakeup events counter and to abort the subsequent system transition
into a sleep state if any wakeup events are reported after the write
has returned.

[The assumption is that before writing to /sys/power/state user space
will first read from /sys/power/wakeup_count.  Next, user space
consumers of wakeup events will have a chance to acknowledge or
veto the upcoming system transition to a sleep state.  Finally, if
the transition is allowed to proceed, /sys/power/wakeup_count will
be written to and if that succeeds, /sys/power/state will be written
to as well.  Still, if any wakeup events are reported to the PM core
by kernel subsystems after that point, the transition will be
aborted.]

Additionally, put a wakeup events counter into struct dev_pm_info and
make these per-device wakeup event counters available via sysfs,
so that it's possible to check the activity of various wakeup event
sources within the kernel.

To illustrate how subsystems can use pm_wakeup_event(), make the
low-level PCI runtime PM wakeup-handling code use it.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Acked-by: markgross <markgross@thegnar.org>
Reviewed-by: Alan Stern <stern@rowland.harvard.edu>
---
 Documentation/ABI/testing/sysfs-power |  15 +++
 drivers/base/power/Makefile           |   2 +-
 drivers/base/power/main.c             |   1 +
 drivers/base/power/sysfs.c            |  15 +++
 drivers/base/power/wakeup.c           | 229 ++++++++++++++++++++++++++++++++++
 drivers/pci/pci-acpi.c                |   1 +
 drivers/pci/pci.c                     |  20 ++-
 drivers/pci/pci.h                     |   1 +
 drivers/pci/pcie/pme/pcie_pme.c       |   5 +-
 include/linux/pm.h                    |  10 ++
 include/linux/suspend.h               |   7 ++
 kernel/power/hibernate.c              |  20 ++-
 kernel/power/main.c                   |  55 ++++++++
 kernel/power/suspend.c                |   4 +-
 14 files changed, 375 insertions(+), 10 deletions(-)
 create mode 100644 drivers/base/power/wakeup.c

(limited to 'include')

diff --git a/Documentation/ABI/testing/sysfs-power b/Documentation/ABI/testing/sysfs-power
index d6a801f45b4..2875f1f74a0 100644
--- a/Documentation/ABI/testing/sysfs-power
+++ b/Documentation/ABI/testing/sysfs-power
@@ -114,3 +114,18 @@ Description:
 		if this file contains "1", which is the default.  It may be
 		disabled by writing "0" to this file, in which case all devices
 		will be suspended and resumed synchronously.
+
+What:		/sys/power/wakeup_count
+Date:		July 2010
+Contact:	Rafael J. Wysocki <rjw@sisk.pl>
+Description:
+		The /sys/power/wakeup_count file allows user space to put the
+		system into a sleep state while taking into account the
+		concurrent arrival of wakeup events.  Reading from it returns
+		the current number of registered wakeup events and it blocks if
+		some wakeup events are being processed at the time the file is
+		read from.  Writing to it will only succeed if the current
+		number of wakeup events is equal to the written value and, if
+		successful, will make the kernel abort a subsequent transition
+		to a sleep state if any wakeup events are reported after the
+		write has returned.
diff --git a/drivers/base/power/Makefile b/drivers/base/power/Makefile
index 89de75325ce..cbccf9a3cee 100644
--- a/drivers/base/power/Makefile
+++ b/drivers/base/power/Makefile
@@ -1,5 +1,5 @@
 obj-$(CONFIG_PM)	+= sysfs.o
-obj-$(CONFIG_PM_SLEEP)	+= main.o
+obj-$(CONFIG_PM_SLEEP)	+= main.o wakeup.o
 obj-$(CONFIG_PM_RUNTIME)	+= runtime.o
 obj-$(CONFIG_PM_OPS)	+= generic_ops.o
 obj-$(CONFIG_PM_TRACE_RTC)	+= trace.o
diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index 941fcb87e52..5419a49ff13 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -59,6 +59,7 @@ void device_pm_init(struct device *dev)
 {
 	dev->power.status = DPM_ON;
 	init_completion(&dev->power.completion);
+	dev->power.wakeup_count = 0;
 	pm_runtime_init(dev);
 }
 
diff --git a/drivers/base/power/sysfs.c b/drivers/base/power/sysfs.c
index a4c33bc5125..81d344e0e95 100644
--- a/drivers/base/power/sysfs.c
+++ b/drivers/base/power/sysfs.c
@@ -73,6 +73,8 @@
  *	device are known to the PM core.  However, for some devices this
  *	attribute is set to "enabled" by bus type code or device drivers and in
  *	that cases it should be safe to leave the default value.
+ *
+ *	wakeup_count - Report the number of wakeup events related to the device
  */
 
 static const char enabled[] = "enabled";
@@ -144,6 +146,16 @@ wake_store(struct device * dev, struct device_attribute *attr,
 
 static DEVICE_ATTR(wakeup, 0644, wake_show, wake_store);
 
+#ifdef CONFIG_PM_SLEEP
+static ssize_t wakeup_count_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%lu\n", dev->power.wakeup_count);
+}
+
+static DEVICE_ATTR(wakeup_count, 0444, wakeup_count_show, NULL);
+#endif
+
 #ifdef CONFIG_PM_ADVANCED_DEBUG
 #ifdef CONFIG_PM_RUNTIME
 
@@ -230,6 +242,9 @@ static struct attribute * power_attrs[] = {
 	&dev_attr_control.attr,
 #endif
 	&dev_attr_wakeup.attr,
+#ifdef CONFIG_PM_SLEEP
+	&dev_attr_wakeup_count.attr,
+#endif
 #ifdef CONFIG_PM_ADVANCED_DEBUG
 	&dev_attr_async.attr,
 #ifdef CONFIG_PM_RUNTIME
diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c
new file mode 100644
index 00000000000..25599077c39
--- /dev/null
+++ b/drivers/base/power/wakeup.c
@@ -0,0 +1,229 @@
+/*
+ * drivers/base/power/wakeup.c - System wakeup events framework
+ *
+ * Copyright (c) 2010 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc.
+ *
+ * This file is released under the GPLv2.
+ */
+
+#include <linux/device.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/capability.h>
+#include <linux/suspend.h>
+#include <linux/pm.h>
+
+/*
+ * If set, the suspend/hibernate code will abort transitions to a sleep state
+ * if wakeup events are registered during or immediately before the transition.
+ */
+bool events_check_enabled;
+
+/* The counter of registered wakeup events. */
+static unsigned long event_count;
+/* A preserved old value of event_count. */
+static unsigned long saved_event_count;
+/* The counter of wakeup events being processed. */
+static unsigned long events_in_progress;
+
+static DEFINE_SPINLOCK(events_lock);
+
+/*
+ * The functions below use the observation that each wakeup event starts a
+ * period in which the system should not be suspended.  The moment this period
+ * will end depends on how the wakeup event is going to be processed after being
+ * detected and all of the possible cases can be divided into two distinct
+ * groups.
+ *
+ * First, a wakeup event may be detected by the same functional unit that will
+ * carry out the entire processing of it and possibly will pass it to user space
+ * for further processing.  In that case the functional unit that has detected
+ * the event may later "close" the "no suspend" period associated with it
+ * directly as soon as it has been dealt with.  The pair of pm_stay_awake() and
+ * pm_relax(), balanced with each other, is supposed to be used in such
+ * situations.
+ *
+ * Second, a wakeup event may be detected by one functional unit and processed
+ * by another one.  In that case the unit that has detected it cannot really
+ * "close" the "no suspend" period associated with it, unless it knows in
+ * advance what's going to happen to the event during processing.  This
+ * knowledge, however, may not be available to it, so it can simply specify time
+ * to wait before the system can be suspended and pass it as the second
+ * argument of pm_wakeup_event().
+ */
+
+/**
+ * pm_stay_awake - Notify the PM core that a wakeup event is being processed.
+ * @dev: Device the wakeup event is related to.
+ *
+ * Notify the PM core of a wakeup event (signaled by @dev) by incrementing the
+ * counter of wakeup events being processed.  If @dev is not NULL, the counter
+ * of wakeup events related to @dev is incremented too.
+ *
+ * Call this function after detecting of a wakeup event if pm_relax() is going
+ * to be called directly after processing the event (and possibly passing it to
+ * user space for further processing).
+ *
+ * It is safe to call this function from interrupt context.
+ */
+void pm_stay_awake(struct device *dev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&events_lock, flags);
+	if (dev)
+		dev->power.wakeup_count++;
+
+	events_in_progress++;
+	spin_unlock_irqrestore(&events_lock, flags);
+}
+
+/**
+ * pm_relax - Notify the PM core that processing of a wakeup event has ended.
+ *
+ * Notify the PM core that a wakeup event has been processed by decrementing
+ * the counter of wakeup events being processed and incrementing the counter
+ * of registered wakeup events.
+ *
+ * Call this function for wakeup events whose processing started with calling
+ * pm_stay_awake().
+ *
+ * It is safe to call it from interrupt context.
+ */
+void pm_relax(void)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&events_lock, flags);
+	if (events_in_progress) {
+		events_in_progress--;
+		event_count++;
+	}
+	spin_unlock_irqrestore(&events_lock, flags);
+}
+
+/**
+ * pm_wakeup_work_fn - Deferred closing of a wakeup event.
+ *
+ * Execute pm_relax() for a wakeup event detected in the past and free the
+ * work item object used for queuing up the work.
+ */
+static void pm_wakeup_work_fn(struct work_struct *work)
+{
+	struct delayed_work *dwork = to_delayed_work(work);
+
+	pm_relax();
+	kfree(dwork);
+}
+
+/**
+ * pm_wakeup_event - Notify the PM core of a wakeup event.
+ * @dev: Device the wakeup event is related to.
+ * @msec: Anticipated event processing time (in milliseconds).
+ *
+ * Notify the PM core of a wakeup event (signaled by @dev) that will take
+ * approximately @msec milliseconds to be processed by the kernel.  Increment
+ * the counter of wakeup events being processed and queue up a work item
+ * that will execute pm_relax() for the event after @msec milliseconds.  If @dev
+ * is not NULL, the counter of wakeup events related to @dev is incremented too.
+ *
+ * It is safe to call this function from interrupt context.
+ */
+void pm_wakeup_event(struct device *dev, unsigned int msec)
+{
+	unsigned long flags;
+	struct delayed_work *dwork;
+
+	dwork = msec ? kzalloc(sizeof(*dwork), GFP_ATOMIC) : NULL;
+
+	spin_lock_irqsave(&events_lock, flags);
+	if (dev)
+		dev->power.wakeup_count++;
+
+	if (dwork) {
+		INIT_DELAYED_WORK(dwork, pm_wakeup_work_fn);
+		schedule_delayed_work(dwork, msecs_to_jiffies(msec));
+
+		events_in_progress++;
+	} else {
+		event_count++;
+	}
+	spin_unlock_irqrestore(&events_lock, flags);
+}
+
+/**
+ * pm_check_wakeup_events - Check for new wakeup events.
+ *
+ * Compare the current number of registered wakeup events with its preserved
+ * value from the past to check if new wakeup events have been registered since
+ * the old value was stored.  Check if the current number of wakeup events being
+ * processed is zero.
+ */
+bool pm_check_wakeup_events(void)
+{
+	unsigned long flags;
+	bool ret = true;
+
+	spin_lock_irqsave(&events_lock, flags);
+	if (events_check_enabled) {
+		ret = (event_count == saved_event_count) && !events_in_progress;
+		events_check_enabled = ret;
+	}
+	spin_unlock_irqrestore(&events_lock, flags);
+	return ret;
+}
+
+/**
+ * pm_get_wakeup_count - Read the number of registered wakeup events.
+ * @count: Address to store the value at.
+ *
+ * Store the number of registered wakeup events at the address in @count.  Block
+ * if the current number of wakeup events being processed is nonzero.
+ *
+ * Return false if the wait for the number of wakeup events being processed to
+ * drop down to zero has been interrupted by a signal (and the current number
+ * of wakeup events being processed is still nonzero).  Otherwise return true.
+ */
+bool pm_get_wakeup_count(unsigned long *count)
+{
+	bool ret;
+
+	spin_lock_irq(&events_lock);
+	if (capable(CAP_SYS_ADMIN))
+		events_check_enabled = false;
+
+	while (events_in_progress && !signal_pending(current)) {
+		spin_unlock_irq(&events_lock);
+
+		schedule_timeout_interruptible(msecs_to_jiffies(100));
+
+		spin_lock_irq(&events_lock);
+	}
+	*count = event_count;
+	ret = !events_in_progress;
+	spin_unlock_irq(&events_lock);
+	return ret;
+}
+
+/**
+ * pm_save_wakeup_count - Save the current number of registered wakeup events.
+ * @count: Value to compare with the current number of registered wakeup events.
+ *
+ * If @count is equal to the current number of registered wakeup events and the
+ * current number of wakeup events being processed is zero, store @count as the
+ * old number of registered wakeup events to be used by pm_check_wakeup_events()
+ * and return true.  Otherwise return false.
+ */
+bool pm_save_wakeup_count(unsigned long count)
+{
+	bool ret = false;
+
+	spin_lock_irq(&events_lock);
+	if (count == event_count && !events_in_progress) {
+		saved_event_count = count;
+		events_check_enabled = true;
+		ret = true;
+	}
+	spin_unlock_irq(&events_lock);
+	return ret;
+}
diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c
index 2e7a3bf1382..1ab98bbe58d 100644
--- a/drivers/pci/pci-acpi.c
+++ b/drivers/pci/pci-acpi.c
@@ -48,6 +48,7 @@ static void pci_acpi_wake_dev(acpi_handle handle, u32 event, void *context)
 	if (event == ACPI_NOTIFY_DEVICE_WAKE && pci_dev) {
 		pci_check_pme_status(pci_dev);
 		pm_runtime_resume(&pci_dev->dev);
+		pci_wakeup_event(pci_dev);
 		if (pci_dev->subordinate)
 			pci_pme_wakeup_bus(pci_dev->subordinate);
 	}
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 740fb4ea966..130ed1daf0f 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -1275,6 +1275,22 @@ bool pci_check_pme_status(struct pci_dev *dev)
 	return ret;
 }
 
+/*
+ * Time to wait before the system can be put into a sleep state after reporting
+ * a wakeup event signaled by a PCI device.
+ */
+#define PCI_WAKEUP_COOLDOWN	100
+
+/**
+ * pci_wakeup_event - Report a wakeup event related to a given PCI device.
+ * @dev: Device to report the wakeup event for.
+ */
+void pci_wakeup_event(struct pci_dev *dev)
+{
+	if (device_may_wakeup(&dev->dev))
+		pm_wakeup_event(&dev->dev, PCI_WAKEUP_COOLDOWN);
+}
+
 /**
  * pci_pme_wakeup - Wake up a PCI device if its PME Status bit is set.
  * @dev: Device to handle.
@@ -1285,8 +1301,10 @@ bool pci_check_pme_status(struct pci_dev *dev)
  */
 static int pci_pme_wakeup(struct pci_dev *dev, void *ign)
 {
-	if (pci_check_pme_status(dev))
+	if (pci_check_pme_status(dev)) {
 		pm_request_resume(&dev->dev);
+		pci_wakeup_event(dev);
+	}
 	return 0;
 }
 
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index f8077b3c8c8..c8b7fd056cc 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -56,6 +56,7 @@ extern void pci_update_current_state(struct pci_dev *dev, pci_power_t state);
 extern void pci_disable_enabled_device(struct pci_dev *dev);
 extern bool pci_check_pme_status(struct pci_dev *dev);
 extern int pci_finish_runtime_suspend(struct pci_dev *dev);
+extern void pci_wakeup_event(struct pci_dev *dev);
 extern int __pci_pme_wakeup(struct pci_dev *dev, void *ign);
 extern void pci_pme_wakeup_bus(struct pci_bus *bus);
 extern void pci_pm_init(struct pci_dev *dev);
diff --git a/drivers/pci/pcie/pme/pcie_pme.c b/drivers/pci/pcie/pme/pcie_pme.c
index d672a0a6381..bbdea18693d 100644
--- a/drivers/pci/pcie/pme/pcie_pme.c
+++ b/drivers/pci/pcie/pme/pcie_pme.c
@@ -154,6 +154,7 @@ static bool pcie_pme_walk_bus(struct pci_bus *bus)
 		/* Skip PCIe devices in case we started from a root port. */
 		if (!pci_is_pcie(dev) && pci_check_pme_status(dev)) {
 			pm_request_resume(&dev->dev);
+			pci_wakeup_event(dev);
 			ret = true;
 		}
 
@@ -254,8 +255,10 @@ static void pcie_pme_handle_request(struct pci_dev *port, u16 req_id)
 	if (found) {
 		/* The device is there, but we have to check its PME status. */
 		found = pci_check_pme_status(dev);
-		if (found)
+		if (found) {
 			pm_request_resume(&dev->dev);
+			pci_wakeup_event(dev);
+		}
 		pci_dev_put(dev);
 	} else if (devfn) {
 		/*
diff --git a/include/linux/pm.h b/include/linux/pm.h
index 8e258c72797..b417fc46f3f 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -457,6 +457,7 @@ struct dev_pm_info {
 #ifdef CONFIG_PM_SLEEP
 	struct list_head	entry;
 	struct completion	completion;
+	unsigned long		wakeup_count;
 #endif
 #ifdef CONFIG_PM_RUNTIME
 	struct timer_list	suspend_timer;
@@ -552,6 +553,11 @@ extern void __suspend_report_result(const char *function, void *fn, int ret);
 	} while (0)
 
 extern void device_pm_wait_for_dev(struct device *sub, struct device *dev);
+
+/* drivers/base/power/wakeup.c */
+extern void pm_wakeup_event(struct device *dev, unsigned int msec);
+extern void pm_stay_awake(struct device *dev);
+extern void pm_relax(void);
 #else /* !CONFIG_PM_SLEEP */
 
 #define device_pm_lock() do {} while (0)
@@ -565,6 +571,10 @@ static inline int dpm_suspend_start(pm_message_t state)
 #define suspend_report_result(fn, ret)		do {} while (0)
 
 static inline void device_pm_wait_for_dev(struct device *a, struct device *b) {}
+
+static inline void pm_wakeup_event(struct device *dev, unsigned int msec) {}
+static inline void pm_stay_awake(struct device *dev) {}
+static inline void pm_relax(void) {}
 #endif /* !CONFIG_PM_SLEEP */
 
 /* How to reorder dpm_list after device_move() */
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index bc7d6bb4cd8..bf1bab7b059 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -286,6 +286,13 @@ extern int unregister_pm_notifier(struct notifier_block *nb);
 		{ .notifier_call = fn, .priority = pri };	\
 	register_pm_notifier(&fn##_nb);			\
 }
+
+/* drivers/base/power/wakeup.c */
+extern bool events_check_enabled;
+
+extern bool pm_check_wakeup_events(void);
+extern bool pm_get_wakeup_count(unsigned long *count);
+extern bool pm_save_wakeup_count(unsigned long count);
 #else /* !CONFIG_PM_SLEEP */
 
 static inline int register_pm_notifier(struct notifier_block *nb)
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index aa9e916da4d..f6120291663 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -277,7 +277,7 @@ static int create_image(int platform_mode)
 		goto Enable_irqs;
 	}
 
-	if (hibernation_test(TEST_CORE))
+	if (hibernation_test(TEST_CORE) || !pm_check_wakeup_events())
 		goto Power_up;
 
 	in_suspend = 1;
@@ -288,8 +288,10 @@ static int create_image(int platform_mode)
 			error);
 	/* Restore control flow magically appears here */
 	restore_processor_state();
-	if (!in_suspend)
+	if (!in_suspend) {
+		events_check_enabled = false;
 		platform_leave(platform_mode);
+	}
 
  Power_up:
 	sysdev_resume();
@@ -511,14 +513,20 @@ int hibernation_platform_enter(void)
 
 	local_irq_disable();
 	sysdev_suspend(PMSG_HIBERNATE);
+	if (!pm_check_wakeup_events()) {
+		error = -EAGAIN;
+		goto Power_up;
+	}
+
 	hibernation_ops->enter();
 	/* We should never get here */
 	while (1);
 
-	/*
-	 * We don't need to reenable the nonboot CPUs or resume consoles, since
-	 * the system is going to be halted anyway.
-	 */
+ Power_up:
+	sysdev_resume();
+	local_irq_enable();
+	enable_nonboot_cpus();
+
  Platform_finish:
 	hibernation_ops->finish();
 
diff --git a/kernel/power/main.c b/kernel/power/main.c
index b58800b21fc..62b0bc6e498 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -204,6 +204,60 @@ static ssize_t state_store(struct kobject *kobj, struct kobj_attribute *attr,
 
 power_attr(state);
 
+#ifdef CONFIG_PM_SLEEP
+/*
+ * The 'wakeup_count' attribute, along with the functions defined in
+ * drivers/base/power/wakeup.c, provides a means by which wakeup events can be
+ * handled in a non-racy way.
+ *
+ * If a wakeup event occurs when the system is in a sleep state, it simply is
+ * woken up.  In turn, if an event that would wake the system up from a sleep
+ * state occurs when it is undergoing a transition to that sleep state, the
+ * transition should be aborted.  Moreover, if such an event occurs when the
+ * system is in the working state, an attempt to start a transition to the
+ * given sleep state should fail during certain period after the detection of
+ * the event.  Using the 'state' attribute alone is not sufficient to satisfy
+ * these requirements, because a wakeup event may occur exactly when 'state'
+ * is being written to and may be delivered to user space right before it is
+ * frozen, so the event will remain only partially processed until the system is
+ * woken up by another event.  In particular, it won't cause the transition to
+ * a sleep state to be aborted.
+ *
+ * This difficulty may be overcome if user space uses 'wakeup_count' before
+ * writing to 'state'.  It first should read from 'wakeup_count' and store
+ * the read value.  Then, after carrying out its own preparations for the system
+ * transition to a sleep state, it should write the stored value to
+ * 'wakeup_count'.  If that fails, at least one wakeup event has occured since
+ * 'wakeup_count' was read and 'state' should not be written to.  Otherwise, it
+ * is allowed to write to 'state', but the transition will be aborted if there
+ * are any wakeup events detected after 'wakeup_count' was written to.
+ */
+
+static ssize_t wakeup_count_show(struct kobject *kobj,
+				struct kobj_attribute *attr,
+				char *buf)
+{
+	unsigned long val;
+
+	return pm_get_wakeup_count(&val) ? sprintf(buf, "%lu\n", val) : -EINTR;
+}
+
+static ssize_t wakeup_count_store(struct kobject *kobj,
+				struct kobj_attribute *attr,
+				const char *buf, size_t n)
+{
+	unsigned long val;
+
+	if (sscanf(buf, "%lu", &val) == 1) {
+		if (pm_save_wakeup_count(val))
+			return n;
+	}
+	return -EINVAL;
+}
+
+power_attr(wakeup_count);
+#endif /* CONFIG_PM_SLEEP */
+
 #ifdef CONFIG_PM_TRACE
 int pm_trace_enabled;
 
@@ -236,6 +290,7 @@ static struct attribute * g[] = {
 #endif
 #ifdef CONFIG_PM_SLEEP
 	&pm_async_attr.attr,
+	&wakeup_count_attr.attr,
 #ifdef CONFIG_PM_DEBUG
 	&pm_test_attr.attr,
 #endif
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index f37cb7dd440..5f8d09f9432 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -163,8 +163,10 @@ static int suspend_enter(suspend_state_t state)
 
 	error = sysdev_suspend(PMSG_SUSPEND);
 	if (!error) {
-		if (!suspend_test(TEST_CORE))
+		if (!suspend_test(TEST_CORE) && pm_check_wakeup_events()) {
 			error = suspend_ops->enter(state);
+			events_check_enabled = false;
+		}
 		sysdev_resume();
 	}
 
-- 
cgit v1.2.3-70-g09d2


From 12e4d0cc2e0a776a526c93bb2fcb9267abc6e0b1 Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@suse.de>
Date: Thu, 1 Jul 2010 21:46:36 +0200
Subject: plist: Add plist_last

plist is currently used by the scheduler, which only needs to know the
highest item in the list.  This adds plist_last which allows you to
find the lowest.  This is necessary for using plists to implement a
fast search of dynamic ranges in pm_qos which can have both highest
and lowest criteria.

Signed-off-by: James Bottomley <James.Bottomley@suse.de>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 include/linux/plist.h | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

(limited to 'include')

diff --git a/include/linux/plist.h b/include/linux/plist.h
index 6898985e7b3..7254eda078e 100644
--- a/include/linux/plist.h
+++ b/include/linux/plist.h
@@ -259,6 +259,23 @@ static inline int plist_node_empty(const struct plist_node *node)
 	container_of(plist_first(head), type, member)
 #endif
 
+/**
+ * plist_last_entry - get the struct for the last entry
+ * @head:	the &struct plist_head pointer
+ * @type:	the type of the struct this is embedded in
+ * @member:	the name of the list_struct within the struct
+ */
+#ifdef CONFIG_DEBUG_PI_LIST
+# define plist_last_entry(head, type, member)	\
+({ \
+	WARN_ON(plist_head_empty(head)); \
+	container_of(plist_last(head), type, member); \
+})
+#else
+# define plist_last_entry(head, type, member)	\
+	container_of(plist_last(head), type, member)
+#endif
+
 /**
  * plist_first - return the first node (and thus, highest priority)
  * @head:	the &struct plist_head pointer
@@ -271,4 +288,16 @@ static inline struct plist_node *plist_first(const struct plist_head *head)
 			  struct plist_node, plist.node_list);
 }
 
+/**
+ * plist_last - return the last node (and thus, lowest priority)
+ * @head:	the &struct plist_head pointer
+ *
+ * Assumes the plist is _not_ empty.
+ */
+static inline struct plist_node *plist_last(const struct plist_head *head)
+{
+	return list_entry(head->node_list.prev,
+			  struct plist_node, plist.node_list);
+}
+
 #endif
-- 
cgit v1.2.3-70-g09d2


From 82f682514a5df89ffb3890627eebf0897b7a84ec Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@suse.de>
Date: Mon, 5 Jul 2010 22:53:06 +0200
Subject: pm_qos: Get rid of the allocation in pm_qos_add_request()

All current users of pm_qos_add_request() have the ability to supply
the memory required by the pm_qos routines, so make them do this and
eliminate the kmalloc() with pm_qos_add_request().  This has the
double benefit of making the call never fail and allowing it to be
called from atomic context.

Signed-off-by: James Bottomley <James.Bottomley@suse.de>
Signed-off-by: mark gross <markgross@thegnar.org>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 drivers/net/e1000e/netdev.c            | 17 ++++-----
 drivers/net/igbvf/netdev.c             |  9 ++---
 drivers/net/wireless/ipw2x00/ipw2100.c | 12 +++---
 include/linux/netdevice.h              |  2 +-
 include/linux/pm_qos_params.h          | 13 +++++--
 include/sound/pcm.h                    |  2 +-
 kernel/pm_qos_params.c                 | 67 ++++++++++++++++++++--------------
 sound/core/pcm_native.c                | 13 +++----
 8 files changed, 74 insertions(+), 61 deletions(-)

(limited to 'include')

diff --git a/drivers/net/e1000e/netdev.c b/drivers/net/e1000e/netdev.c
index 57a7e41da69..9f13b660b80 100644
--- a/drivers/net/e1000e/netdev.c
+++ b/drivers/net/e1000e/netdev.c
@@ -2901,10 +2901,10 @@ static void e1000_configure_rx(struct e1000_adapter *adapter)
 			 * dropped transactions.
 			 */
 			pm_qos_update_request(
-				adapter->netdev->pm_qos_req, 55);
+				&adapter->netdev->pm_qos_req, 55);
 		} else {
 			pm_qos_update_request(
-				adapter->netdev->pm_qos_req,
+				&adapter->netdev->pm_qos_req,
 				PM_QOS_DEFAULT_VALUE);
 		}
 	}
@@ -3196,9 +3196,9 @@ int e1000e_up(struct e1000_adapter *adapter)
 
 	/* DMA latency requirement to workaround early-receive/jumbo issue */
 	if (adapter->flags & FLAG_HAS_ERT)
-		adapter->netdev->pm_qos_req =
-			pm_qos_add_request(PM_QOS_CPU_DMA_LATENCY,
-				       PM_QOS_DEFAULT_VALUE);
+		pm_qos_add_request(&adapter->netdev->pm_qos_req,
+				   PM_QOS_CPU_DMA_LATENCY,
+				   PM_QOS_DEFAULT_VALUE);
 
 	/* hardware has been reset, we need to reload some things */
 	e1000_configure(adapter);
@@ -3263,11 +3263,8 @@ void e1000e_down(struct e1000_adapter *adapter)
 	e1000_clean_tx_ring(adapter);
 	e1000_clean_rx_ring(adapter);
 
-	if (adapter->flags & FLAG_HAS_ERT) {
-		pm_qos_remove_request(
-			      adapter->netdev->pm_qos_req);
-		adapter->netdev->pm_qos_req = NULL;
-	}
+	if (adapter->flags & FLAG_HAS_ERT)
+		pm_qos_remove_request(&adapter->netdev->pm_qos_req);
 
 	/*
 	 * TODO: for power management, we could drop the link and
diff --git a/drivers/net/igbvf/netdev.c b/drivers/net/igbvf/netdev.c
index 5e2b2a8c56c..add6197d3bc 100644
--- a/drivers/net/igbvf/netdev.c
+++ b/drivers/net/igbvf/netdev.c
@@ -48,7 +48,7 @@
 #define DRV_VERSION "1.0.0-k0"
 char igbvf_driver_name[] = "igbvf";
 const char igbvf_driver_version[] = DRV_VERSION;
-struct pm_qos_request_list *igbvf_driver_pm_qos_req;
+static struct pm_qos_request_list igbvf_driver_pm_qos_req;
 static const char igbvf_driver_string[] =
 				"Intel(R) Virtual Function Network Driver";
 static const char igbvf_copyright[] = "Copyright (c) 2009 Intel Corporation.";
@@ -2902,8 +2902,8 @@ static int __init igbvf_init_module(void)
 	printk(KERN_INFO "%s\n", igbvf_copyright);
 
 	ret = pci_register_driver(&igbvf_driver);
-	igbvf_driver_pm_qos_req = pm_qos_add_request(PM_QOS_CPU_DMA_LATENCY,
-	                       PM_QOS_DEFAULT_VALUE);
+	pm_qos_add_request(&igbvf_driver_pm_qos_req, PM_QOS_CPU_DMA_LATENCY,
+			   PM_QOS_DEFAULT_VALUE);
 
 	return ret;
 }
@@ -2918,8 +2918,7 @@ module_init(igbvf_init_module);
 static void __exit igbvf_exit_module(void)
 {
 	pci_unregister_driver(&igbvf_driver);
-	pm_qos_remove_request(igbvf_driver_pm_qos_req);
-	igbvf_driver_pm_qos_req = NULL;
+	pm_qos_remove_request(&igbvf_driver_pm_qos_req);
 }
 module_exit(igbvf_exit_module);
 
diff --git a/drivers/net/wireless/ipw2x00/ipw2100.c b/drivers/net/wireless/ipw2x00/ipw2100.c
index 0bd4dfa59a8..7f0d98b885b 100644
--- a/drivers/net/wireless/ipw2x00/ipw2100.c
+++ b/drivers/net/wireless/ipw2x00/ipw2100.c
@@ -174,7 +174,7 @@ that only one external action is invoked at a time.
 #define DRV_DESCRIPTION	"Intel(R) PRO/Wireless 2100 Network Driver"
 #define DRV_COPYRIGHT	"Copyright(c) 2003-2006 Intel Corporation"
 
-struct pm_qos_request_list *ipw2100_pm_qos_req;
+struct pm_qos_request_list ipw2100_pm_qos_req;
 
 /* Debugging stuff */
 #ifdef CONFIG_IPW2100_DEBUG
@@ -1741,7 +1741,7 @@ static int ipw2100_up(struct ipw2100_priv *priv, int deferred)
 	/* the ipw2100 hardware really doesn't want power management delays
 	 * longer than 175usec
 	 */
-	pm_qos_update_request(ipw2100_pm_qos_req, 175);
+	pm_qos_update_request(&ipw2100_pm_qos_req, 175);
 
 	/* If the interrupt is enabled, turn it off... */
 	spin_lock_irqsave(&priv->low_lock, flags);
@@ -1889,7 +1889,7 @@ static void ipw2100_down(struct ipw2100_priv *priv)
 	ipw2100_disable_interrupts(priv);
 	spin_unlock_irqrestore(&priv->low_lock, flags);
 
-	pm_qos_update_request(ipw2100_pm_qos_req, PM_QOS_DEFAULT_VALUE);
+	pm_qos_update_request(&ipw2100_pm_qos_req, PM_QOS_DEFAULT_VALUE);
 
 	/* We have to signal any supplicant if we are disassociating */
 	if (associated)
@@ -6669,8 +6669,8 @@ static int __init ipw2100_init(void)
 	if (ret)
 		goto out;
 
-	ipw2100_pm_qos_req = pm_qos_add_request(PM_QOS_CPU_DMA_LATENCY,
-			PM_QOS_DEFAULT_VALUE);
+	pm_qos_add_request(&ipw2100_pm_qos_req, PM_QOS_CPU_DMA_LATENCY,
+			   PM_QOS_DEFAULT_VALUE);
 #ifdef CONFIG_IPW2100_DEBUG
 	ipw2100_debug_level = debug;
 	ret = driver_create_file(&ipw2100_pci_driver.driver,
@@ -6692,7 +6692,7 @@ static void __exit ipw2100_exit(void)
 			   &driver_attr_debug_level);
 #endif
 	pci_unregister_driver(&ipw2100_pci_driver);
-	pm_qos_remove_request(ipw2100_pm_qos_req);
+	pm_qos_remove_request(&ipw2100_pm_qos_req);
 }
 
 module_init(ipw2100_init);
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index b21e4054c12..2f22119b4b0 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -779,7 +779,7 @@ struct net_device {
 	 */
 	char			name[IFNAMSIZ];
 
-	struct pm_qos_request_list *pm_qos_req;
+	struct pm_qos_request_list pm_qos_req;
 
 	/* device name hash chain */
 	struct hlist_node	name_hlist;
diff --git a/include/linux/pm_qos_params.h b/include/linux/pm_qos_params.h
index 8ba440e5eb7..77cbddb3784 100644
--- a/include/linux/pm_qos_params.h
+++ b/include/linux/pm_qos_params.h
@@ -1,8 +1,10 @@
+#ifndef _LINUX_PM_QOS_PARAMS_H
+#define _LINUX_PM_QOS_PARAMS_H
 /* interface for the pm_qos_power infrastructure of the linux kernel.
  *
  * Mark Gross <mgross@linux.intel.com>
  */
-#include <linux/list.h>
+#include <linux/plist.h>
 #include <linux/notifier.h>
 #include <linux/miscdevice.h>
 
@@ -14,9 +16,12 @@
 #define PM_QOS_NUM_CLASSES 4
 #define PM_QOS_DEFAULT_VALUE -1
 
-struct pm_qos_request_list;
+struct pm_qos_request_list {
+	struct plist_node list;
+	int pm_qos_class;
+};
 
-struct pm_qos_request_list *pm_qos_add_request(int pm_qos_class, s32 value);
+void pm_qos_add_request(struct pm_qos_request_list *l, int pm_qos_class, s32 value);
 void pm_qos_update_request(struct pm_qos_request_list *pm_qos_req,
 		s32 new_value);
 void pm_qos_remove_request(struct pm_qos_request_list *pm_qos_req);
@@ -24,4 +29,6 @@ void pm_qos_remove_request(struct pm_qos_request_list *pm_qos_req);
 int pm_qos_request(int pm_qos_class);
 int pm_qos_add_notifier(int pm_qos_class, struct notifier_block *notifier);
 int pm_qos_remove_notifier(int pm_qos_class, struct notifier_block *notifier);
+int pm_qos_request_active(struct pm_qos_request_list *req);
 
+#endif
diff --git a/include/sound/pcm.h b/include/sound/pcm.h
index dd76cdede64..6e3a29732dc 100644
--- a/include/sound/pcm.h
+++ b/include/sound/pcm.h
@@ -366,7 +366,7 @@ struct snd_pcm_substream {
 	int number;
 	char name[32];			/* substream name */
 	int stream;			/* stream (direction) */
-	struct pm_qos_request_list *latency_pm_qos_req; /* pm_qos request */
+	struct pm_qos_request_list latency_pm_qos_req; /* pm_qos request */
 	size_t buffer_bytes_max;	/* limit ring buffer size */
 	struct snd_dma_buffer dma_buffer;
 	unsigned int dma_buf_id;
diff --git a/kernel/pm_qos_params.c b/kernel/pm_qos_params.c
index db8e51d7f39..996a4dec5f9 100644
--- a/kernel/pm_qos_params.c
+++ b/kernel/pm_qos_params.c
@@ -30,7 +30,6 @@
 /*#define DEBUG*/
 
 #include <linux/pm_qos_params.h>
-#include <linux/plist.h>
 #include <linux/sched.h>
 #include <linux/spinlock.h>
 #include <linux/slab.h>
@@ -49,11 +48,6 @@
  * or pm_qos_object list and pm_qos_objects need to happen with pm_qos_lock
  * held, taken with _irqsave.  One lock to rule them all
  */
-struct pm_qos_request_list {
-	struct plist_node list;
-	int pm_qos_class;
-};
-
 enum pm_qos_type {
 	PM_QOS_MAX,		/* return the largest value */
 	PM_QOS_MIN		/* return the smallest value */
@@ -210,6 +204,12 @@ int pm_qos_request(int pm_qos_class)
 }
 EXPORT_SYMBOL_GPL(pm_qos_request);
 
+int pm_qos_request_active(struct pm_qos_request_list *req)
+{
+	return req->pm_qos_class != 0;
+}
+EXPORT_SYMBOL_GPL(pm_qos_request_active);
+
 /**
  * pm_qos_add_request - inserts new qos request into the list
  * @pm_qos_class: identifies which list of qos request to us
@@ -221,25 +221,23 @@ EXPORT_SYMBOL_GPL(pm_qos_request);
  * element as a handle for use in updating and removal.  Call needs to save
  * this handle for later use.
  */
-struct pm_qos_request_list *pm_qos_add_request(int pm_qos_class, s32 value)
+void pm_qos_add_request(struct pm_qos_request_list *dep,
+			int pm_qos_class, s32 value)
 {
-	struct pm_qos_request_list *dep;
-
-	dep = kzalloc(sizeof(struct pm_qos_request_list), GFP_KERNEL);
-	if (dep) {
-		struct pm_qos_object *o =  pm_qos_array[pm_qos_class];
-		int new_value;
-
-		if (value == PM_QOS_DEFAULT_VALUE)
-			new_value = o->default_value;
-		else
-			new_value = value;
-		plist_node_init(&dep->list, new_value);
-		dep->pm_qos_class = pm_qos_class;
-		update_target(o, &dep->list, 0, PM_QOS_DEFAULT_VALUE);
-	}
+	struct pm_qos_object *o =  pm_qos_array[pm_qos_class];
+	int new_value;
 
-	return dep;
+	if (pm_qos_request_active(dep)) {
+		WARN(1, KERN_ERR "pm_qos_add_request() called for already added request\n");
+		return;
+	}
+	if (value == PM_QOS_DEFAULT_VALUE)
+		new_value = o->default_value;
+	else
+		new_value = value;
+	plist_node_init(&dep->list, new_value);
+	dep->pm_qos_class = pm_qos_class;
+	update_target(o, &dep->list, 0, PM_QOS_DEFAULT_VALUE);
 }
 EXPORT_SYMBOL_GPL(pm_qos_add_request);
 
@@ -262,6 +260,11 @@ void pm_qos_update_request(struct pm_qos_request_list *pm_qos_req,
 	if (!pm_qos_req) /*guard against callers passing in null */
 		return;
 
+	if (!pm_qos_request_active(pm_qos_req)) {
+		WARN(1, KERN_ERR "pm_qos_update_request() called for unknown object\n");
+		return;
+	}
+
 	o = pm_qos_array[pm_qos_req->pm_qos_class];
 
 	if (new_value == PM_QOS_DEFAULT_VALUE)
@@ -290,9 +293,14 @@ void pm_qos_remove_request(struct pm_qos_request_list *pm_qos_req)
 		return;
 		/* silent return to keep pcm code cleaner */
 
+	if (!pm_qos_request_active(pm_qos_req)) {
+		WARN(1, KERN_ERR "pm_qos_remove_request() called for unknown object\n");
+		return;
+	}
+
 	o = pm_qos_array[pm_qos_req->pm_qos_class];
 	update_target(o, &pm_qos_req->list, 1, PM_QOS_DEFAULT_VALUE);
-	kfree(pm_qos_req);
+	memset(pm_qos_req, 0, sizeof(*pm_qos_req));
 }
 EXPORT_SYMBOL_GPL(pm_qos_remove_request);
 
@@ -340,8 +348,12 @@ static int pm_qos_power_open(struct inode *inode, struct file *filp)
 
 	pm_qos_class = find_pm_qos_object_by_minor(iminor(inode));
 	if (pm_qos_class >= 0) {
-		filp->private_data = (void *) pm_qos_add_request(pm_qos_class,
-				PM_QOS_DEFAULT_VALUE);
+		struct pm_qos_request_list *req = kzalloc(GFP_KERNEL, sizeof(*req));
+		if (!req)
+			return -ENOMEM;
+
+		pm_qos_add_request(req, pm_qos_class, PM_QOS_DEFAULT_VALUE);
+		filp->private_data = req;
 
 		if (filp->private_data)
 			return 0;
@@ -353,8 +365,9 @@ static int pm_qos_power_release(struct inode *inode, struct file *filp)
 {
 	struct pm_qos_request_list *req;
 
-	req = (struct pm_qos_request_list *)filp->private_data;
+	req = filp->private_data;
 	pm_qos_remove_request(req);
+	kfree(req);
 
 	return 0;
 }
diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c
index 303ac04ff6e..a3b2a647924 100644
--- a/sound/core/pcm_native.c
+++ b/sound/core/pcm_native.c
@@ -451,13 +451,11 @@ static int snd_pcm_hw_params(struct snd_pcm_substream *substream,
 	snd_pcm_timer_resolution_change(substream);
 	runtime->status->state = SNDRV_PCM_STATE_SETUP;
 
-	if (substream->latency_pm_qos_req) {
-		pm_qos_remove_request(substream->latency_pm_qos_req);
-		substream->latency_pm_qos_req = NULL;
-	}
+	if (pm_qos_request_active(&substream->latency_pm_qos_req))
+		pm_qos_remove_request(&substream->latency_pm_qos_req);
 	if ((usecs = period_to_usecs(runtime)) >= 0)
-		substream->latency_pm_qos_req = pm_qos_add_request(
-					PM_QOS_CPU_DMA_LATENCY, usecs);
+		pm_qos_add_request(&substream->latency_pm_qos_req,
+				   PM_QOS_CPU_DMA_LATENCY, usecs);
 	return 0;
  _error:
 	/* hardware might be unuseable from this time,
@@ -512,8 +510,7 @@ static int snd_pcm_hw_free(struct snd_pcm_substream *substream)
 	if (substream->ops->hw_free)
 		result = substream->ops->hw_free(substream);
 	runtime->status->state = SNDRV_PCM_STATE_OPEN;
-	pm_qos_remove_request(substream->latency_pm_qos_req);
-	substream->latency_pm_qos_req = NULL;
+	pm_qos_remove_request(&substream->latency_pm_qos_req);
 	return result;
 }
 
-- 
cgit v1.2.3-70-g09d2


From ce4410116c5debfb0e049f5db4b5cd6211e05b80 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Wed, 7 Jul 2010 23:43:45 +0200
Subject: PM / Suspend: Fix ordering of calls in suspend error paths

The ACPI suspend code calls suspend_nvs_free() at a wrong place,
which may lead to a memory leak if there's an error executing
acpi_pm_prepare(), because acpi_pm_finish() will not be called in
that case.  However, the root cause of this problem is the
apparently confusing ordering of calls in suspend error paths that
needs to be fixed.

In addition to that, fix a typo in a label name in suspend.c.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Len Brown <len.brown@intel.com>
---
 include/linux/suspend.h | 10 ++++++----
 kernel/power/suspend.c  |  9 ++++-----
 2 files changed, 10 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index bf1bab7b059..4af270ec220 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -61,14 +61,15 @@ typedef int __bitwise suspend_state_t;
  *	before device drivers' late suspend callbacks are executed.  It returns
  *	0 on success or a negative error code otherwise, in which case the
  *	system cannot enter the desired sleep state (@prepare_late(), @enter(),
- *	@wake(), and @finish() will not be called in that case).
+ *	and @wake() will not be called in that case).
  *
  * @prepare_late: Finish preparing the platform for entering the system sleep
  *	state indicated by @begin().
  *	@prepare_late is called before disabling nonboot CPUs and after
  *	device drivers' late suspend callbacks have been executed.  It returns
  *	0 on success or a negative error code otherwise, in which case the
- *	system cannot enter the desired sleep state (@enter() and @wake()).
+ *	system cannot enter the desired sleep state (@enter() will not be
+ *	executed).
  *
  * @enter: Enter the system sleep state indicated by @begin() or represented by
  *	the argument if @begin() is not implemented.
@@ -81,14 +82,15 @@ typedef int __bitwise suspend_state_t;
  *	resume callbacks are executed.
  *	This callback is optional, but should be implemented by the platforms
  *	that implement @prepare_late().  If implemented, it is always called
- *	after @enter(), even if @enter() fails.
+ *	after @prepare_late and @enter(), even if one of them fails.
  *
  * @finish: Finish wake-up of the platform.
  *	@finish is called right prior to calling device drivers' regular suspend
  *	callbacks.
  *	This callback is optional, but should be implemented by the platforms
  *	that implement @prepare().  If implemented, it is always called after
- *	@enter() and @wake(), if implemented, even if any of them fails.
+ *	@enter() and @wake(), even if any of them fails.  It is executed after
+ *	a failing @prepare.
  *
  * @end: Called by the PM core right after resuming devices, to indicate to
  *	the platform that the system has returned to the working state or
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 5f8d09f9432..7335952ee47 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -136,19 +136,19 @@ static int suspend_enter(suspend_state_t state)
 	if (suspend_ops->prepare) {
 		error = suspend_ops->prepare();
 		if (error)
-			return error;
+			goto Platform_finish;
 	}
 
 	error = dpm_suspend_noirq(PMSG_SUSPEND);
 	if (error) {
 		printk(KERN_ERR "PM: Some devices failed to power down\n");
-		goto Platfrom_finish;
+		goto Platform_finish;
 	}
 
 	if (suspend_ops->prepare_late) {
 		error = suspend_ops->prepare_late();
 		if (error)
-			goto Power_up_devices;
+			goto Platform_wake;
 	}
 
 	if (suspend_test(TEST_PLATFORM))
@@ -180,10 +180,9 @@ static int suspend_enter(suspend_state_t state)
 	if (suspend_ops->wake)
 		suspend_ops->wake();
 
- Power_up_devices:
 	dpm_resume_noirq(PMSG_RESUME);
 
- Platfrom_finish:
+ Platform_finish:
 	if (suspend_ops->finish)
 		suspend_ops->finish();
 
-- 
cgit v1.2.3-70-g09d2


From 8d4b9d1bfef117862a2889dec4dac227068544c9 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Mon, 19 Jul 2010 02:01:06 +0200
Subject: PM / Runtime: Add runtime PM statistics (v3)

In order for PowerTOP to be able to report how well the new runtime PM is
working for the various drivers, the kernel needs to export some basic
statistics in sysfs.

This patch adds two sysfs files in the runtime PM domain that expose the
total time a device has been active, and the time a device has been
suspended.

With this PowerTOP can compute the activity percentage

Active %age = 100 * (delta active) / (delta active + delta suspended)

and present the information to the user.

I've written the PowerTOP code (slated for version 1.12) already, and the
output looks like this:

Runtime Device Power Management statistics
Active  Device name
 10.0%	06:00.0 Ethernet controller: Realtek Semiconductor Co., Ltd. RTL8101E/RTL8102E PCI Express Fast Ethernet controller

[version 2: fix stat update bugs noticed by Alan Stern]
[version 3: rebase to -next and move the sysfs declaration]

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 drivers/base/power/runtime.c | 54 ++++++++++++++++++++++++++++++++++++++------
 drivers/base/power/sysfs.c   | 30 ++++++++++++++++++++++++
 include/linux/pm.h           |  6 +++++
 3 files changed, 83 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index b0ec0e9f27e..b78c401ffa7 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -123,6 +123,45 @@ int pm_runtime_idle(struct device *dev)
 }
 EXPORT_SYMBOL_GPL(pm_runtime_idle);
 
+
+/**
+ * update_pm_runtime_accounting - Update the time accounting of power states
+ * @dev: Device to update the accounting for
+ *
+ * In order to be able to have time accounting of the various power states
+ * (as used by programs such as PowerTOP to show the effectiveness of runtime
+ * PM), we need to track the time spent in each state.
+ * update_pm_runtime_accounting must be called each time before the
+ * runtime_status field is updated, to account the time in the old state
+ * correctly.
+ */
+void update_pm_runtime_accounting(struct device *dev)
+{
+	unsigned long now = jiffies;
+	int delta;
+
+	delta = now - dev->power.accounting_timestamp;
+
+	if (delta < 0)
+		delta = 0;
+
+	dev->power.accounting_timestamp = now;
+
+	if (dev->power.disable_depth > 0)
+		return;
+
+	if (dev->power.runtime_status == RPM_SUSPENDED)
+		dev->power.suspended_jiffies += delta;
+	else
+		dev->power.active_jiffies += delta;
+}
+
+static void __update_runtime_status(struct device *dev, enum rpm_status status)
+{
+	update_pm_runtime_accounting(dev);
+	dev->power.runtime_status = status;
+}
+
 /**
  * __pm_runtime_suspend - Carry out run-time suspend of given device.
  * @dev: Device to suspend.
@@ -197,7 +236,7 @@ int __pm_runtime_suspend(struct device *dev, bool from_wq)
 		goto repeat;
 	}
 
-	dev->power.runtime_status = RPM_SUSPENDING;
+	__update_runtime_status(dev, RPM_SUSPENDING);
 	dev->power.deferred_resume = false;
 
 	if (dev->bus && dev->bus->pm && dev->bus->pm->runtime_suspend) {
@@ -228,7 +267,7 @@ int __pm_runtime_suspend(struct device *dev, bool from_wq)
 	}
 
 	if (retval) {
-		dev->power.runtime_status = RPM_ACTIVE;
+		__update_runtime_status(dev, RPM_ACTIVE);
 		if (retval == -EAGAIN || retval == -EBUSY) {
 			if (dev->power.timer_expires == 0)
 				notify = true;
@@ -237,7 +276,7 @@ int __pm_runtime_suspend(struct device *dev, bool from_wq)
 			pm_runtime_cancel_pending(dev);
 		}
 	} else {
-		dev->power.runtime_status = RPM_SUSPENDED;
+		__update_runtime_status(dev, RPM_SUSPENDED);
 		pm_runtime_deactivate_timer(dev);
 
 		if (dev->parent) {
@@ -381,7 +420,7 @@ int __pm_runtime_resume(struct device *dev, bool from_wq)
 		goto repeat;
 	}
 
-	dev->power.runtime_status = RPM_RESUMING;
+	__update_runtime_status(dev, RPM_RESUMING);
 
 	if (dev->bus && dev->bus->pm && dev->bus->pm->runtime_resume) {
 		spin_unlock_irq(&dev->power.lock);
@@ -411,10 +450,10 @@ int __pm_runtime_resume(struct device *dev, bool from_wq)
 	}
 
 	if (retval) {
-		dev->power.runtime_status = RPM_SUSPENDED;
+		__update_runtime_status(dev, RPM_SUSPENDED);
 		pm_runtime_cancel_pending(dev);
 	} else {
-		dev->power.runtime_status = RPM_ACTIVE;
+		__update_runtime_status(dev, RPM_ACTIVE);
 		if (parent)
 			atomic_inc(&parent->power.child_count);
 	}
@@ -848,7 +887,7 @@ int __pm_runtime_set_status(struct device *dev, unsigned int status)
 	}
 
  out_set:
-	dev->power.runtime_status = status;
+	__update_runtime_status(dev, status);
 	dev->power.runtime_error = 0;
  out:
 	spin_unlock_irqrestore(&dev->power.lock, flags);
@@ -1077,6 +1116,7 @@ void pm_runtime_init(struct device *dev)
 	dev->power.request_pending = false;
 	dev->power.request = RPM_REQ_NONE;
 	dev->power.deferred_resume = false;
+	dev->power.accounting_timestamp = jiffies;
 	INIT_WORK(&dev->power.work, pm_runtime_work);
 
 	dev->power.timer_expires = 0;
diff --git a/drivers/base/power/sysfs.c b/drivers/base/power/sysfs.c
index 1eca50c8e7c..e56b4388fe6 100644
--- a/drivers/base/power/sysfs.c
+++ b/drivers/base/power/sysfs.c
@@ -6,6 +6,7 @@
 #include <linux/string.h>
 #include <linux/pm_runtime.h>
 #include <asm/atomic.h>
+#include <linux/jiffies.h>
 #include "power.h"
 
 /*
@@ -111,6 +112,33 @@ static ssize_t control_store(struct device * dev, struct device_attribute *attr,
 
 static DEVICE_ATTR(control, 0644, control_show, control_store);
 
+static ssize_t rtpm_active_time_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	int ret;
+	spin_lock_irq(&dev->power.lock);
+	update_pm_runtime_accounting(dev);
+	ret = sprintf(buf, "%i\n", jiffies_to_msecs(dev->power.active_jiffies));
+	spin_unlock_irq(&dev->power.lock);
+	return ret;
+}
+
+static DEVICE_ATTR(runtime_active_time, 0444, rtpm_active_time_show, NULL);
+
+static ssize_t rtpm_suspended_time_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	int ret;
+	spin_lock_irq(&dev->power.lock);
+	update_pm_runtime_accounting(dev);
+	ret = sprintf(buf, "%i\n",
+		jiffies_to_msecs(dev->power.suspended_jiffies));
+	spin_unlock_irq(&dev->power.lock);
+	return ret;
+}
+
+static DEVICE_ATTR(runtime_suspended_time, 0444, rtpm_suspended_time_show, NULL);
+
 static ssize_t rtpm_status_show(struct device *dev,
 				struct device_attribute *attr, char *buf)
 {
@@ -254,6 +282,8 @@ static struct attribute * power_attrs[] = {
 #ifdef CONFIG_PM_RUNTIME
 	&dev_attr_control.attr,
 	&dev_attr_runtime_status.attr,
+	&dev_attr_runtime_suspended_time.attr,
+	&dev_attr_runtime_active_time.attr,
 #endif
 	&dev_attr_wakeup.attr,
 #ifdef CONFIG_PM_SLEEP
diff --git a/include/linux/pm.h b/include/linux/pm.h
index b417fc46f3f..52e8c55ff31 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -477,9 +477,15 @@ struct dev_pm_info {
 	enum rpm_request	request;
 	enum rpm_status		runtime_status;
 	int			runtime_error;
+	unsigned long		active_jiffies;
+	unsigned long		suspended_jiffies;
+	unsigned long		accounting_timestamp;
 #endif
 };
 
+extern void update_pm_runtime_accounting(struct device *dev);
+
+
 /*
  * The PM_EVENT_ messages are also used by drivers implementing the legacy
  * suspend framework, based on the ->suspend() and ->resume() callbacks common
-- 
cgit v1.2.3-70-g09d2


From 4507a71507d4ff37e9a499c4241b7701ed1feab4 Mon Sep 17 00:00:00 2001
From: Richard Cochran <richardcochran@gmail.com>
Date: Sat, 17 Jul 2010 08:48:28 +0000
Subject: net: add driver hook for tx time stamping.

This patch adds a hook for transmit time stamps. The transmit hook
allows a software fallback for transmit time stamps, for MACs
lacking time stamping hardware. Using the hook will still require
adding an inline function call to each MAC driver.

Signed-off-by: Richard Cochran <richard.cochran@omicron.at>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index ac74ee085d7..a1b0400c8d8 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1947,6 +1947,27 @@ static inline ktime_t net_invalid_timestamp(void)
 extern void skb_tstamp_tx(struct sk_buff *orig_skb,
 			struct skb_shared_hwtstamps *hwtstamps);
 
+static inline void sw_tx_timestamp(struct sk_buff *skb)
+{
+	union skb_shared_tx *shtx = skb_tx(skb);
+	if (shtx->software && !shtx->in_progress)
+		skb_tstamp_tx(skb, NULL);
+}
+
+/**
+ * skb_tx_timestamp() - Driver hook for transmit timestamping
+ *
+ * Ethernet MAC Drivers should call this function in their hard_xmit()
+ * function as soon as possible after giving the sk_buff to the MAC
+ * hardware, but before freeing the sk_buff.
+ *
+ * @skb: A socket buffer.
+ */
+static inline void skb_tx_timestamp(struct sk_buff *skb)
+{
+	sw_tx_timestamp(skb);
+}
+
 extern __sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len);
 extern __sum16 __skb_checksum_complete(struct sk_buff *skb);
 
-- 
cgit v1.2.3-70-g09d2


From 28b041139e344ecd0f144d6205b004ae354cfa1e Mon Sep 17 00:00:00 2001
From: Richard Cochran <richardcochran@gmail.com>
Date: Sat, 17 Jul 2010 08:48:55 +0000
Subject: net: preserve ifreq parameter when calling generic phy_mii_ioctl().

The phy_mii_ioctl() function unnecessarily throws away the original ifreq.
We need access to the ifreq in order to support PHYs that can perform
hardware time stamping.

Two maverick drivers filter the ioctl commands passed to phy_mii_ioctl().
This is unnecessary since phylib will check the command in any case.

Signed-off-by: Richard Cochran <richard.cochran@omicron.at>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/arm/ixp4xx_eth.c           |  3 ++-
 drivers/net/au1000_eth.c               |  2 +-
 drivers/net/bcm63xx_enet.c             |  2 +-
 drivers/net/cpmac.c                    |  5 +----
 drivers/net/dnet.c                     |  2 +-
 drivers/net/ethoc.c                    |  2 +-
 drivers/net/fec.c                      |  2 +-
 drivers/net/fec_mpc52xx.c              |  2 +-
 drivers/net/fs_enet/fs_enet-main.c     |  3 +--
 drivers/net/gianfar.c                  |  2 +-
 drivers/net/macb.c                     |  2 +-
 drivers/net/mv643xx_eth.c              |  2 +-
 drivers/net/octeon/octeon_mgmt.c       |  2 +-
 drivers/net/phy/phy.c                  |  3 ++-
 drivers/net/sb1250-mac.c               |  2 +-
 drivers/net/sh_eth.c                   |  2 +-
 drivers/net/smsc911x.c                 |  2 +-
 drivers/net/smsc9420.c                 |  2 +-
 drivers/net/stmmac/stmmac_main.c       | 22 ++++++++--------------
 drivers/net/tc35815.c                  |  2 +-
 drivers/net/tg3.c                      |  2 +-
 drivers/net/ucc_geth.c                 |  2 +-
 drivers/staging/octeon/ethernet-mdio.c |  2 +-
 include/linux/phy.h                    |  2 +-
 net/dsa/slave.c                        |  3 +--
 25 files changed, 34 insertions(+), 43 deletions(-)

(limited to 'include')

diff --git a/drivers/net/arm/ixp4xx_eth.c b/drivers/net/arm/ixp4xx_eth.c
index ee2f8425dbe..4f1cc7164ad 100644
--- a/drivers/net/arm/ixp4xx_eth.c
+++ b/drivers/net/arm/ixp4xx_eth.c
@@ -782,7 +782,8 @@ static int eth_ioctl(struct net_device *dev, struct ifreq *req, int cmd)
 
 	if (!netif_running(dev))
 		return -EINVAL;
-	return phy_mii_ioctl(port->phydev, if_mii(req), cmd);
+
+	return phy_mii_ioctl(port->phydev, req, cmd);
 }
 
 /* ethtool support */
diff --git a/drivers/net/au1000_eth.c b/drivers/net/au1000_eth.c
index ece6128bef1..386d4feec65 100644
--- a/drivers/net/au1000_eth.c
+++ b/drivers/net/au1000_eth.c
@@ -978,7 +978,7 @@ static int au1000_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 	if (!aup->phy_dev)
 		return -EINVAL; /* PHY not controllable */
 
-	return phy_mii_ioctl(aup->phy_dev, if_mii(rq), cmd);
+	return phy_mii_ioctl(aup->phy_dev, rq, cmd);
 }
 
 static const struct net_device_ops au1000_netdev_ops = {
diff --git a/drivers/net/bcm63xx_enet.c b/drivers/net/bcm63xx_enet.c
index faf5add894d..0d2c5da0893 100644
--- a/drivers/net/bcm63xx_enet.c
+++ b/drivers/net/bcm63xx_enet.c
@@ -1496,7 +1496,7 @@ static int bcm_enet_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 	if (priv->has_phy) {
 		if (!priv->phydev)
 			return -ENODEV;
-		return phy_mii_ioctl(priv->phydev, if_mii(rq), cmd);
+		return phy_mii_ioctl(priv->phydev, rq, cmd);
 	} else {
 		struct mii_if_info mii;
 
diff --git a/drivers/net/cpmac.c b/drivers/net/cpmac.c
index 38de1a4f825..e1f6156b371 100644
--- a/drivers/net/cpmac.c
+++ b/drivers/net/cpmac.c
@@ -846,11 +846,8 @@ static int cpmac_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 		return -EINVAL;
 	if (!priv->phy)
 		return -EINVAL;
-	if ((cmd == SIOCGMIIPHY) || (cmd == SIOCGMIIREG) ||
-	    (cmd == SIOCSMIIREG))
-		return phy_mii_ioctl(priv->phy, if_mii(ifr), cmd);
 
-	return -EOPNOTSUPP;
+	return phy_mii_ioctl(priv->phy, ifr, cmd);
 }
 
 static int cpmac_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
diff --git a/drivers/net/dnet.c b/drivers/net/dnet.c
index 8b0f50bbf3e..4ea7141f525 100644
--- a/drivers/net/dnet.c
+++ b/drivers/net/dnet.c
@@ -797,7 +797,7 @@ static int dnet_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 	if (!phydev)
 		return -ENODEV;
 
-	return phy_mii_ioctl(phydev, if_mii(rq), cmd);
+	return phy_mii_ioctl(phydev, rq, cmd);
 }
 
 static void dnet_get_drvinfo(struct net_device *dev,
diff --git a/drivers/net/ethoc.c b/drivers/net/ethoc.c
index 5bb6bb74c40..38c282e6565 100644
--- a/drivers/net/ethoc.c
+++ b/drivers/net/ethoc.c
@@ -730,7 +730,7 @@ static int ethoc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 		phy = priv->phy;
 	}
 
-	return phy_mii_ioctl(phy, mdio, cmd);
+	return phy_mii_ioctl(phy, ifr, cmd);
 }
 
 static int ethoc_config(struct net_device *dev, struct ifmap *map)
diff --git a/drivers/net/fec.c b/drivers/net/fec.c
index 937f1b4a348..391a553a3ad 100644
--- a/drivers/net/fec.c
+++ b/drivers/net/fec.c
@@ -825,7 +825,7 @@ static int fec_enet_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 	if (!phydev)
 		return -ENODEV;
 
-	return phy_mii_ioctl(phydev, if_mii(rq), cmd);
+	return phy_mii_ioctl(phydev, rq, cmd);
 }
 
 static void fec_enet_free_buffers(struct net_device *dev)
diff --git a/drivers/net/fec_mpc52xx.c b/drivers/net/fec_mpc52xx.c
index 5f8346369b8..d1a5b17b2a9 100644
--- a/drivers/net/fec_mpc52xx.c
+++ b/drivers/net/fec_mpc52xx.c
@@ -826,7 +826,7 @@ static int mpc52xx_fec_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 	if (!priv->phydev)
 		return -ENOTSUPP;
 
-	return phy_mii_ioctl(priv->phydev, if_mii(rq), cmd);
+	return phy_mii_ioctl(priv->phydev, rq, cmd);
 }
 
 static const struct net_device_ops mpc52xx_fec_netdev_ops = {
diff --git a/drivers/net/fs_enet/fs_enet-main.c b/drivers/net/fs_enet/fs_enet-main.c
index 309a0eaddd8..f08cff9020b 100644
--- a/drivers/net/fs_enet/fs_enet-main.c
+++ b/drivers/net/fs_enet/fs_enet-main.c
@@ -963,12 +963,11 @@ static const struct ethtool_ops fs_ethtool_ops = {
 static int fs_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 {
 	struct fs_enet_private *fep = netdev_priv(dev);
-	struct mii_ioctl_data *mii = (struct mii_ioctl_data *)&rq->ifr_data;
 
 	if (!netif_running(dev))
 		return -EINVAL;
 
-	return phy_mii_ioctl(fep->phydev, mii, cmd);
+	return phy_mii_ioctl(fep->phydev, rq, cmd);
 }
 
 extern int fs_mii_connect(struct net_device *dev);
diff --git a/drivers/net/gianfar.c b/drivers/net/gianfar.c
index 746a776a165..27f02970d89 100644
--- a/drivers/net/gianfar.c
+++ b/drivers/net/gianfar.c
@@ -847,7 +847,7 @@ static int gfar_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 	if (!priv->phydev)
 		return -ENODEV;
 
-	return phy_mii_ioctl(priv->phydev, if_mii(rq), cmd);
+	return phy_mii_ioctl(priv->phydev, rq, cmd);
 }
 
 static unsigned int reverse_bitmap(unsigned int bit_map, unsigned int max_qs)
diff --git a/drivers/net/macb.c b/drivers/net/macb.c
index 40797fbdca9..ff2f158ab0b 100644
--- a/drivers/net/macb.c
+++ b/drivers/net/macb.c
@@ -1082,7 +1082,7 @@ static int macb_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 	if (!phydev)
 		return -ENODEV;
 
-	return phy_mii_ioctl(phydev, if_mii(rq), cmd);
+	return phy_mii_ioctl(phydev, rq, cmd);
 }
 
 static const struct net_device_ops macb_netdev_ops = {
diff --git a/drivers/net/mv643xx_eth.c b/drivers/net/mv643xx_eth.c
index af075af20e0..2fcdb1e1b99 100644
--- a/drivers/net/mv643xx_eth.c
+++ b/drivers/net/mv643xx_eth.c
@@ -2461,7 +2461,7 @@ static int mv643xx_eth_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 	struct mv643xx_eth_private *mp = netdev_priv(dev);
 
 	if (mp->phy != NULL)
-		return phy_mii_ioctl(mp->phy, if_mii(ifr), cmd);
+		return phy_mii_ioctl(mp->phy, ifr, cmd);
 
 	return -EOPNOTSUPP;
 }
diff --git a/drivers/net/octeon/octeon_mgmt.c b/drivers/net/octeon/octeon_mgmt.c
index f4a0f08e14e..b264f0f4560 100644
--- a/drivers/net/octeon/octeon_mgmt.c
+++ b/drivers/net/octeon/octeon_mgmt.c
@@ -620,7 +620,7 @@ static int octeon_mgmt_ioctl(struct net_device *netdev,
 	if (!p->phydev)
 		return -EINVAL;
 
-	return phy_mii_ioctl(p->phydev, if_mii(rq), cmd);
+	return phy_mii_ioctl(p->phydev, rq, cmd);
 }
 
 static void octeon_mgmt_adjust_link(struct net_device *netdev)
diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index 64be4664cca..bd88d818f08 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -309,8 +309,9 @@ EXPORT_SYMBOL(phy_ethtool_gset);
  * current state.  Use at own risk.
  */
 int phy_mii_ioctl(struct phy_device *phydev,
-		struct mii_ioctl_data *mii_data, int cmd)
+		struct ifreq *ifr, int cmd)
 {
+	struct mii_ioctl_data *mii_data = if_mii(ifr);
 	u16 val = mii_data->val_in;
 
 	switch (cmd) {
diff --git a/drivers/net/sb1250-mac.c b/drivers/net/sb1250-mac.c
index 79eee306208..8e6bd45b9f3 100644
--- a/drivers/net/sb1250-mac.c
+++ b/drivers/net/sb1250-mac.c
@@ -2532,7 +2532,7 @@ static int sbmac_mii_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 	if (!netif_running(dev) || !sc->phy_dev)
 		return -EINVAL;
 
-	return phy_mii_ioctl(sc->phy_dev, if_mii(rq), cmd);
+	return phy_mii_ioctl(sc->phy_dev, rq, cmd);
 }
 
 static int sbmac_close(struct net_device *dev)
diff --git a/drivers/net/sh_eth.c b/drivers/net/sh_eth.c
index 7ac814d932b..32f2deaa38b 100644
--- a/drivers/net/sh_eth.c
+++ b/drivers/net/sh_eth.c
@@ -1284,7 +1284,7 @@ static int sh_eth_do_ioctl(struct net_device *ndev, struct ifreq *rq,
 	if (!phydev)
 		return -ENODEV;
 
-	return phy_mii_ioctl(phydev, if_mii(rq), cmd);
+	return phy_mii_ioctl(phydev, rq, cmd);
 }
 
 #if defined(SH_ETH_HAS_TSU)
diff --git a/drivers/net/smsc911x.c b/drivers/net/smsc911x.c
index cc559741b0f..56dc2ff75ee 100644
--- a/drivers/net/smsc911x.c
+++ b/drivers/net/smsc911x.c
@@ -1538,7 +1538,7 @@ static int smsc911x_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 	if (!netif_running(dev) || !pdata->phy_dev)
 		return -EINVAL;
 
-	return phy_mii_ioctl(pdata->phy_dev, if_mii(ifr), cmd);
+	return phy_mii_ioctl(pdata->phy_dev, ifr, cmd);
 }
 
 static int
diff --git a/drivers/net/smsc9420.c b/drivers/net/smsc9420.c
index 6cdee6a15f9..b09ee1c319e 100644
--- a/drivers/net/smsc9420.c
+++ b/drivers/net/smsc9420.c
@@ -245,7 +245,7 @@ static int smsc9420_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 	if (!netif_running(dev) || !pd->phy_dev)
 		return -EINVAL;
 
-	return phy_mii_ioctl(pd->phy_dev, if_mii(ifr), cmd);
+	return phy_mii_ioctl(pd->phy_dev, ifr, cmd);
 }
 
 static int smsc9420_ethtool_get_settings(struct net_device *dev,
diff --git a/drivers/net/stmmac/stmmac_main.c b/drivers/net/stmmac/stmmac_main.c
index a31d580f306..acf06168694 100644
--- a/drivers/net/stmmac/stmmac_main.c
+++ b/drivers/net/stmmac/stmmac_main.c
@@ -1437,24 +1437,18 @@ static void stmmac_poll_controller(struct net_device *dev)
 static int stmmac_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 {
 	struct stmmac_priv *priv = netdev_priv(dev);
-	int ret = -EOPNOTSUPP;
+	int ret;
 
 	if (!netif_running(dev))
 		return -EINVAL;
 
-	switch (cmd) {
-	case SIOCGMIIPHY:
-	case SIOCGMIIREG:
-	case SIOCSMIIREG:
-		if (!priv->phydev)
-			return -EINVAL;
-
-		spin_lock(&priv->lock);
-		ret = phy_mii_ioctl(priv->phydev, if_mii(rq), cmd);
-		spin_unlock(&priv->lock);
-	default:
-		break;
-	}
+	if (!priv->phydev)
+		return -EINVAL;
+
+	spin_lock(&priv->lock);
+	ret = phy_mii_ioctl(priv->phydev, rq, cmd);
+	spin_unlock(&priv->lock);
+
 	return ret;
 }
 
diff --git a/drivers/net/tc35815.c b/drivers/net/tc35815.c
index be08b75dbc1..99e423a5b9f 100644
--- a/drivers/net/tc35815.c
+++ b/drivers/net/tc35815.c
@@ -2066,7 +2066,7 @@ static int tc35815_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 		return -EINVAL;
 	if (!lp->phy_dev)
 		return -ENODEV;
-	return phy_mii_ioctl(lp->phy_dev, if_mii(rq), cmd);
+	return phy_mii_ioctl(lp->phy_dev, rq, cmd);
 }
 
 static void tc35815_chip_reset(struct net_device *dev)
diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 5769e1507d2..b26a5778293 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -10932,7 +10932,7 @@ static int tg3_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 		if (!(tp->tg3_flags3 & TG3_FLG3_PHY_CONNECTED))
 			return -EAGAIN;
 		phydev = tp->mdio_bus->phy_map[TG3_PHY_MII_ADDR];
-		return phy_mii_ioctl(phydev, data, cmd);
+		return phy_mii_ioctl(phydev, ifr, cmd);
 	}
 
 	switch (cmd) {
diff --git a/drivers/net/ucc_geth.c b/drivers/net/ucc_geth.c
index dc32a62e611..e17dd743091 100644
--- a/drivers/net/ucc_geth.c
+++ b/drivers/net/ucc_geth.c
@@ -3714,7 +3714,7 @@ static int ucc_geth_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 	if (!ugeth->phydev)
 		return -ENODEV;
 
-	return phy_mii_ioctl(ugeth->phydev, if_mii(rq), cmd);
+	return phy_mii_ioctl(ugeth->phydev, rq, cmd);
 }
 
 static const struct net_device_ops ucc_geth_netdev_ops = {
diff --git a/drivers/staging/octeon/ethernet-mdio.c b/drivers/staging/octeon/ethernet-mdio.c
index 7e0be8d00dc..10a82ef3021 100644
--- a/drivers/staging/octeon/ethernet-mdio.c
+++ b/drivers/staging/octeon/ethernet-mdio.c
@@ -113,7 +113,7 @@ int cvm_oct_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 	if (!priv->phydev)
 		return -EINVAL;
 
-	return phy_mii_ioctl(priv->phydev, if_mii(rq), cmd);
+	return phy_mii_ioctl(priv->phydev, rq, cmd);
 }
 
 static void cvm_oct_adjust_link(struct net_device *dev)
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 987e111f7b1..d63736a8400 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -498,7 +498,7 @@ void phy_stop_machine(struct phy_device *phydev);
 int phy_ethtool_sset(struct phy_device *phydev, struct ethtool_cmd *cmd);
 int phy_ethtool_gset(struct phy_device *phydev, struct ethtool_cmd *cmd);
 int phy_mii_ioctl(struct phy_device *phydev,
-		struct mii_ioctl_data *mii_data, int cmd);
+		struct ifreq *ifr, int cmd);
 int phy_start_interrupts(struct phy_device *phydev);
 void phy_print_status(struct phy_device *phydev);
 struct phy_device* phy_device_create(struct mii_bus *bus, int addr, int phy_id);
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 8fdca56bb08..64ca2a6fa0d 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -164,10 +164,9 @@ out:
 static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 {
 	struct dsa_slave_priv *p = netdev_priv(dev);
-	struct mii_ioctl_data *mii_data = if_mii(ifr);
 
 	if (p->phy != NULL)
-		return phy_mii_ioctl(p->phy, mii_data, cmd);
+		return phy_mii_ioctl(p->phy, ifr, cmd);
 
 	return -EOPNOTSUPP;
 }
-- 
cgit v1.2.3-70-g09d2


From 15f0127d1d189fda3294b7823e3e654afca54055 Mon Sep 17 00:00:00 2001
From: Richard Cochran <richardcochran@gmail.com>
Date: Sat, 17 Jul 2010 08:49:17 +0000
Subject: net: added a BPF to help drivers detect PTP packets.

Certain kinds of hardware time stamping units in both MACs and PHYs have
the limitation that they can only time stamp PTP packets. Drivers for such
hardware are left with the task of correctly matching skbs to time stamps.
This patch adds a BPF that drivers can use to classify PTP packets when
needed.

Signed-off-by: Richard Cochran <richard.cochran@omicron.at>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ptp_classify.h | 126 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 126 insertions(+)
 create mode 100644 include/linux/ptp_classify.h

(limited to 'include')

diff --git a/include/linux/ptp_classify.h b/include/linux/ptp_classify.h
new file mode 100644
index 00000000000..943a85ab002
--- /dev/null
+++ b/include/linux/ptp_classify.h
@@ -0,0 +1,126 @@
+/*
+ * PTP 1588 support
+ *
+ * This file implements a BPF that recognizes PTP event messages.
+ *
+ * Copyright (C) 2010 OMICRON electronics GmbH
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef _PTP_CLASSIFY_H_
+#define _PTP_CLASSIFY_H_
+
+#include <linux/if_ether.h>
+#include <linux/if_vlan.h>
+#include <linux/filter.h>
+#ifdef __KERNEL__
+#include <linux/in.h>
+#else
+#include <netinet/in.h>
+#endif
+
+#define PTP_CLASS_NONE  0x00 /* not a PTP event message */
+#define PTP_CLASS_V1    0x01 /* protocol version 1 */
+#define PTP_CLASS_V2    0x02 /* protocol version 2 */
+#define PTP_CLASS_VMASK 0x0f /* max protocol version is 15 */
+#define PTP_CLASS_IPV4  0x10 /* event in an IPV4 UDP packet */
+#define PTP_CLASS_IPV6  0x20 /* event in an IPV6 UDP packet */
+#define PTP_CLASS_L2    0x30 /* event in a L2 packet */
+#define PTP_CLASS_VLAN  0x40 /* event in a VLAN tagged L2 packet */
+#define PTP_CLASS_PMASK 0xf0 /* mask for the packet type field */
+
+#define PTP_CLASS_V1_IPV4 (PTP_CLASS_V1 | PTP_CLASS_IPV4)
+#define PTP_CLASS_V1_IPV6 (PTP_CLASS_V1 | PTP_CLASS_IPV6) /*probably DNE*/
+#define PTP_CLASS_V2_IPV4 (PTP_CLASS_V2 | PTP_CLASS_IPV4)
+#define PTP_CLASS_V2_IPV6 (PTP_CLASS_V2 | PTP_CLASS_IPV6)
+#define PTP_CLASS_V2_L2   (PTP_CLASS_V2 | PTP_CLASS_L2)
+#define PTP_CLASS_V2_VLAN (PTP_CLASS_V2 | PTP_CLASS_VLAN)
+
+#define PTP_EV_PORT 319
+
+#define OFF_ETYPE	12
+#define OFF_IHL		14
+#define OFF_FRAG	20
+#define OFF_PROTO4	23
+#define OFF_NEXT	6
+#define OFF_UDP_DST	2
+
+#define IP6_HLEN	40
+#define UDP_HLEN	8
+
+#define RELOFF_DST4	(ETH_HLEN + OFF_UDP_DST)
+#define OFF_DST6	(ETH_HLEN + IP6_HLEN + OFF_UDP_DST)
+#define OFF_PTP6	(ETH_HLEN + IP6_HLEN + UDP_HLEN)
+
+#define OP_AND	(BPF_ALU | BPF_AND  | BPF_K)
+#define OP_JEQ	(BPF_JMP | BPF_JEQ  | BPF_K)
+#define OP_JSET	(BPF_JMP | BPF_JSET | BPF_K)
+#define OP_LDB	(BPF_LD  | BPF_B    | BPF_ABS)
+#define OP_LDH	(BPF_LD  | BPF_H    | BPF_ABS)
+#define OP_LDHI	(BPF_LD  | BPF_H    | BPF_IND)
+#define OP_LDX	(BPF_LDX | BPF_B    | BPF_MSH)
+#define OP_OR	(BPF_ALU | BPF_OR   | BPF_K)
+#define OP_RETA	(BPF_RET | BPF_A)
+#define OP_RETK	(BPF_RET | BPF_K)
+
+static inline int ptp_filter_init(struct sock_filter *f, int len)
+{
+	if (OP_LDH == f[0].code)
+		return sk_chk_filter(f, len);
+	else
+		return 0;
+}
+
+#define PTP_FILTER \
+	{OP_LDH,	0,   0, OFF_ETYPE		}, /*              */ \
+	{OP_JEQ,	0,  12, ETH_P_IP		}, /* f goto L20   */ \
+	{OP_LDB,	0,   0, OFF_PROTO4		}, /*              */ \
+	{OP_JEQ,	0,   9, IPPROTO_UDP		}, /* f goto L10   */ \
+	{OP_LDH,	0,   0, OFF_FRAG		}, /*              */ \
+	{OP_JSET,	7,   0, 0x1fff			}, /* t goto L11   */ \
+	{OP_LDX,	0,   0, OFF_IHL			}, /*              */ \
+	{OP_LDHI,	0,   0, RELOFF_DST4		}, /*              */ \
+	{OP_JEQ,	0,   4, PTP_EV_PORT		}, /* f goto L12   */ \
+	{OP_LDHI,	0,   0, ETH_HLEN + UDP_HLEN	}, /*              */ \
+	{OP_AND,	0,   0, PTP_CLASS_VMASK		}, /*              */ \
+	{OP_OR,		0,   0, PTP_CLASS_IPV4		}, /*              */ \
+	{OP_RETA,	0,   0, 0			}, /*              */ \
+/*L1x*/	{OP_RETK,	0,   0, PTP_CLASS_NONE		}, /*              */ \
+/*L20*/	{OP_JEQ,	0,   9, ETH_P_IPV6		}, /* f goto L40   */ \
+	{OP_LDB,	0,   0, ETH_HLEN + OFF_NEXT	}, /*              */ \
+	{OP_JEQ,	0,   6, IPPROTO_UDP		}, /* f goto L30   */ \
+	{OP_LDH,	0,   0, OFF_DST6		}, /*              */ \
+	{OP_JEQ,	0,   4, PTP_EV_PORT		}, /* f goto L31   */ \
+	{OP_LDH,	0,   0, OFF_PTP6		}, /*              */ \
+	{OP_AND,	0,   0, PTP_CLASS_VMASK		}, /*              */ \
+	{OP_OR,		0,   0, PTP_CLASS_IPV6		}, /*              */ \
+	{OP_RETA,	0,   0, 0			}, /*              */ \
+/*L3x*/	{OP_RETK,	0,   0, PTP_CLASS_NONE		}, /*              */ \
+/*L40*/	{OP_JEQ,	0,   6, ETH_P_8021Q		}, /* f goto L50   */ \
+	{OP_LDH,	0,   0, OFF_ETYPE + 4		}, /*              */ \
+	{OP_JEQ,	0,   9, ETH_P_1588		}, /* f goto L60   */ \
+	{OP_LDH,	0,   0, ETH_HLEN + VLAN_HLEN	}, /*              */ \
+	{OP_AND,	0,   0, PTP_CLASS_VMASK		}, /*              */ \
+	{OP_OR,		0,   0, PTP_CLASS_VLAN		}, /*              */ \
+	{OP_RETA,	0,   0, 0			}, /*              */ \
+/*L50*/	{OP_JEQ,	0,   4, ETH_P_1588		}, /* f goto L61   */ \
+	{OP_LDH,	0,   0, ETH_HLEN		}, /*              */ \
+	{OP_AND,	0,   0, PTP_CLASS_VMASK		}, /*              */ \
+	{OP_OR,		0,   0, PTP_CLASS_L2		}, /*              */ \
+	{OP_RETA,	0,   0, 0			}, /*              */ \
+/*L6x*/	{OP_RETK,	0,   0, PTP_CLASS_NONE		},
+
+#endif
-- 
cgit v1.2.3-70-g09d2


From c1f19b51d1d87f3e3bb7e6648f43f7d57ed2da6b Mon Sep 17 00:00:00 2001
From: Richard Cochran <richardcochran@gmail.com>
Date: Sat, 17 Jul 2010 08:49:36 +0000
Subject: net: support time stamping in phy devices.

This patch adds a new networking option to allow hardware time stamps
from PHY devices. When enabled, likely candidates among incoming and
outgoing network packets are offered to the PHY driver for possible
time stamping. When accepted by the PHY driver, incoming packets are
deferred for later delivery by the driver.

The patch also adds phylib driver methods for the SIOCSHWTSTAMP ioctl
and callbacks for transmit and receive time stamping. Drivers may
optionally implement these functions.

Signed-off-by: Richard Cochran <richard.cochran@omicron.at>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy.c        |   5 ++
 drivers/net/phy/phy_device.c |   2 +
 include/linux/netdevice.h    |   4 ++
 include/linux/phy.h          |  22 ++++++++
 include/linux/skbuff.h       |  31 +++++++++++
 net/Kconfig                  |  10 ++++
 net/core/Makefile            |   2 +-
 net/core/dev.c               |   3 ++
 net/core/timestamping.c      | 126 +++++++++++++++++++++++++++++++++++++++++++
 net/socket.c                 |   4 ++
 10 files changed, 208 insertions(+), 1 deletion(-)
 create mode 100644 net/core/timestamping.c

(limited to 'include')

diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index bd88d818f08..5130db8f5c4 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -361,6 +361,11 @@ int phy_mii_ioctl(struct phy_device *phydev,
 		}
 		break;
 
+	case SIOCSHWTSTAMP:
+		if (phydev->drv->hwtstamp)
+			return phydev->drv->hwtstamp(phydev, ifr);
+		/* fall through */
+
 	default:
 		return -EOPNOTSUPP;
 	}
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 1a99bb24410..c0761197c07 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -460,6 +460,7 @@ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev,
 	}
 
 	phydev->attached_dev = dev;
+	dev->phydev = phydev;
 
 	phydev->dev_flags = flags;
 
@@ -513,6 +514,7 @@ EXPORT_SYMBOL(phy_attach);
  */
 void phy_detach(struct phy_device *phydev)
 {
+	phydev->attached_dev->phydev = NULL;
 	phydev->attached_dev = NULL;
 
 	/* If the device had no specific driver before (i.e. - it
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index c4fedf00054..fdc3f299223 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -54,6 +54,7 @@
 
 struct vlan_group;
 struct netpoll_info;
+struct phy_device;
 /* 802.11 specific */
 struct wireless_dev;
 					/* source back-compat hooks */
@@ -1065,6 +1066,9 @@ struct net_device {
 #endif
 	/* n-tuple filter list attached to this device */
 	struct ethtool_rx_ntuple_list ethtool_ntuple_list;
+
+	/* phy device may attach itself for hardware timestamping */
+	struct phy_device *phydev;
 };
 #define to_net_dev(d) container_of(d, struct net_device, dev)
 
diff --git a/include/linux/phy.h b/include/linux/phy.h
index d63736a8400..6b0a782c622 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -234,6 +234,8 @@ enum phy_state {
 	PHY_RESUMING
 };
 
+struct sk_buff;
+
 /* phy_device: An instance of a PHY
  *
  * drv: Pointer to the driver for this PHY instance
@@ -402,6 +404,26 @@ struct phy_driver {
 	/* Clears up any memory if needed */
 	void (*remove)(struct phy_device *phydev);
 
+	/* Handles SIOCSHWTSTAMP ioctl for hardware time stamping. */
+	int  (*hwtstamp)(struct phy_device *phydev, struct ifreq *ifr);
+
+	/*
+	 * Requests a Rx timestamp for 'skb'. If the skb is accepted,
+	 * the phy driver promises to deliver it using netif_rx() as
+	 * soon as a timestamp becomes available. One of the
+	 * PTP_CLASS_ values is passed in 'type'. The function must
+	 * return true if the skb is accepted for delivery.
+	 */
+	bool (*rxtstamp)(struct phy_device *dev, struct sk_buff *skb, int type);
+
+	/*
+	 * Requests a Tx timestamp for 'skb'. The phy driver promises
+	 * to deliver it to the socket's error queue as soon as a
+	 * timestamp becomes available. One of the PTP_CLASS_ values
+	 * is passed in 'type'.
+	 */
+	void (*txtstamp)(struct phy_device *dev, struct sk_buff *skb, int type);
+
 	struct device_driver driver;
 };
 #define to_phy_driver(d) container_of(d, struct phy_driver, driver)
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index a1b0400c8d8..f5aa87e1e0c 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1933,6 +1933,36 @@ static inline ktime_t net_invalid_timestamp(void)
 	return ktime_set(0, 0);
 }
 
+extern void skb_timestamping_init(void);
+
+#ifdef CONFIG_NETWORK_PHY_TIMESTAMPING
+
+extern void skb_clone_tx_timestamp(struct sk_buff *skb);
+extern bool skb_defer_rx_timestamp(struct sk_buff *skb);
+
+#else /* CONFIG_NETWORK_PHY_TIMESTAMPING */
+
+static inline void skb_clone_tx_timestamp(struct sk_buff *skb)
+{
+}
+
+static inline bool skb_defer_rx_timestamp(struct sk_buff *skb)
+{
+	return false;
+}
+
+#endif /* !CONFIG_NETWORK_PHY_TIMESTAMPING */
+
+/**
+ * skb_complete_tx_timestamp() - deliver cloned skb with tx timestamps
+ *
+ * @skb: clone of the the original outgoing packet
+ * @hwtstamps: hardware time stamps
+ *
+ */
+void skb_complete_tx_timestamp(struct sk_buff *skb,
+			       struct skb_shared_hwtstamps *hwtstamps);
+
 /**
  * skb_tstamp_tx - queue clone of skb with send time stamps
  * @orig_skb:	the original outgoing packet
@@ -1965,6 +1995,7 @@ static inline void sw_tx_timestamp(struct sk_buff *skb)
  */
 static inline void skb_tx_timestamp(struct sk_buff *skb)
 {
+	skb_clone_tx_timestamp(skb);
 	sw_tx_timestamp(skb);
 }
 
diff --git a/net/Kconfig b/net/Kconfig
index 0d68b40fc0e..b3250944cde 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -86,6 +86,16 @@ config NETWORK_SECMARK
 	  to nfmark, but designated for security purposes.
 	  If you are unsure how to answer this question, answer N.
 
+config NETWORK_PHY_TIMESTAMPING
+	bool "Timestamping in PHY devices"
+	depends on EXPERIMENTAL
+	help
+	  This allows timestamping of network packets by PHYs with
+	  hardware timestamping capabilities. This option adds some
+	  overhead in the transmit and receive paths.
+
+	  If you are unsure how to answer this question, answer N.
+
 menuconfig NETFILTER
 	bool "Network packet filtering framework (Netfilter)"
 	---help---
diff --git a/net/core/Makefile b/net/core/Makefile
index 51c3eec850e..8a04dd22cf7 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -18,4 +18,4 @@ obj-$(CONFIG_NET_DMA) += user_dma.o
 obj-$(CONFIG_FIB_RULES) += fib_rules.o
 obj-$(CONFIG_TRACEPOINTS) += net-traces.o
 obj-$(CONFIG_NET_DROP_MONITOR) += drop_monitor.o
-
+obj-$(CONFIG_NETWORK_PHY_TIMESTAMPING) += timestamping.o
diff --git a/net/core/dev.c b/net/core/dev.c
index e2b9fa2c917..1c002c7ef5d 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2957,6 +2957,9 @@ int netif_receive_skb(struct sk_buff *skb)
 	if (netdev_tstamp_prequeue)
 		net_timestamp_check(skb);
 
+	if (skb_defer_rx_timestamp(skb))
+		return NET_RX_SUCCESS;
+
 #ifdef CONFIG_RPS
 	{
 		struct rps_dev_flow voidflow, *rflow = &voidflow;
diff --git a/net/core/timestamping.c b/net/core/timestamping.c
new file mode 100644
index 00000000000..0ae6c22da85
--- /dev/null
+++ b/net/core/timestamping.c
@@ -0,0 +1,126 @@
+/*
+ * PTP 1588 clock support - support for timestamping in PHY devices
+ *
+ * Copyright (C) 2010 OMICRON electronics GmbH
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#include <linux/errqueue.h>
+#include <linux/phy.h>
+#include <linux/ptp_classify.h>
+#include <linux/skbuff.h>
+
+static struct sock_filter ptp_filter[] = {
+	PTP_FILTER
+};
+
+static unsigned int classify(struct sk_buff *skb)
+{
+	if (likely(skb->dev &&
+		   skb->dev->phydev &&
+		   skb->dev->phydev->drv))
+		return sk_run_filter(skb, ptp_filter, ARRAY_SIZE(ptp_filter));
+	else
+		return PTP_CLASS_NONE;
+}
+
+void skb_clone_tx_timestamp(struct sk_buff *skb)
+{
+	struct phy_device *phydev;
+	struct sk_buff *clone;
+	struct sock *sk = skb->sk;
+	unsigned int type;
+
+	if (!sk)
+		return;
+
+	type = classify(skb);
+
+	switch (type) {
+	case PTP_CLASS_V1_IPV4:
+	case PTP_CLASS_V1_IPV6:
+	case PTP_CLASS_V2_IPV4:
+	case PTP_CLASS_V2_IPV6:
+	case PTP_CLASS_V2_L2:
+	case PTP_CLASS_V2_VLAN:
+		phydev = skb->dev->phydev;
+		if (likely(phydev->drv->txtstamp)) {
+			clone = skb_clone(skb, GFP_ATOMIC);
+			if (!clone)
+				return;
+			clone->sk = sk;
+			phydev->drv->txtstamp(phydev, clone, type);
+		}
+		break;
+	default:
+		break;
+	}
+}
+
+void skb_complete_tx_timestamp(struct sk_buff *skb,
+			       struct skb_shared_hwtstamps *hwtstamps)
+{
+	struct sock *sk = skb->sk;
+	struct sock_exterr_skb *serr;
+	int err;
+
+	if (!hwtstamps)
+		return;
+
+	*skb_hwtstamps(skb) = *hwtstamps;
+	serr = SKB_EXT_ERR(skb);
+	memset(serr, 0, sizeof(*serr));
+	serr->ee.ee_errno = ENOMSG;
+	serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING;
+	skb->sk = NULL;
+	err = sock_queue_err_skb(sk, skb);
+	if (err)
+		kfree_skb(skb);
+}
+EXPORT_SYMBOL_GPL(skb_complete_tx_timestamp);
+
+bool skb_defer_rx_timestamp(struct sk_buff *skb)
+{
+	struct phy_device *phydev;
+	unsigned int type;
+
+	skb_push(skb, ETH_HLEN);
+
+	type = classify(skb);
+
+	skb_pull(skb, ETH_HLEN);
+
+	switch (type) {
+	case PTP_CLASS_V1_IPV4:
+	case PTP_CLASS_V1_IPV6:
+	case PTP_CLASS_V2_IPV4:
+	case PTP_CLASS_V2_IPV6:
+	case PTP_CLASS_V2_L2:
+	case PTP_CLASS_V2_VLAN:
+		phydev = skb->dev->phydev;
+		if (likely(phydev->drv->rxtstamp))
+			return phydev->drv->rxtstamp(phydev, skb, type);
+		break;
+	default:
+		break;
+	}
+
+	return false;
+}
+
+void __init skb_timestamping_init(void)
+{
+	BUG_ON(sk_chk_filter(ptp_filter, ARRAY_SIZE(ptp_filter)));
+}
diff --git a/net/socket.c b/net/socket.c
index 6fe484122a4..2270b941bcc 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -2394,6 +2394,10 @@ static int __init sock_init(void)
 	netfilter_init();
 #endif
 
+#ifdef CONFIG_NETWORK_PHY_TIMESTAMPING
+	skb_timestamping_init();
+#endif
+
 	return 0;
 }
 
-- 
cgit v1.2.3-70-g09d2


From 7f8275d0d660c146de6ee3017e1e2e594c49e820 Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Mon, 19 Jul 2010 14:56:17 +1000
Subject: mm: add context argument to shrinker callback

The current shrinker implementation requires the registered callback
to have global state to work from. This makes it difficult to shrink
caches that are not global (e.g. per-filesystem caches). Pass the shrinker
structure to the callback so that users can embed the shrinker structure
in the context the shrinker needs to operate on and get back to it in the
callback via container_of().

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 arch/x86/kvm/mmu.c              | 2 +-
 drivers/gpu/drm/i915/i915_gem.c | 2 +-
 fs/dcache.c                     | 2 +-
 fs/gfs2/glock.c                 | 2 +-
 fs/gfs2/quota.c                 | 2 +-
 fs/gfs2/quota.h                 | 2 +-
 fs/inode.c                      | 2 +-
 fs/mbcache.c                    | 5 +++--
 fs/nfs/dir.c                    | 2 +-
 fs/nfs/internal.h               | 3 ++-
 fs/quota/dquot.c                | 2 +-
 fs/ubifs/shrinker.c             | 2 +-
 fs/ubifs/ubifs.h                | 2 +-
 fs/xfs/linux-2.6/xfs_buf.c      | 5 +++--
 fs/xfs/linux-2.6/xfs_sync.c     | 1 +
 fs/xfs/quota/xfs_qm.c           | 7 +++++--
 include/linux/mm.h              | 2 +-
 mm/vmscan.c                     | 8 +++++---
 18 files changed, 31 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 3699613e883..b1ed0a1a591 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2926,7 +2926,7 @@ static int kvm_mmu_remove_some_alloc_mmu_pages(struct kvm *kvm)
 	return kvm_mmu_zap_page(kvm, page) + 1;
 }
 
-static int mmu_shrink(int nr_to_scan, gfp_t gfp_mask)
+static int mmu_shrink(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
 {
 	struct kvm *kvm;
 	struct kvm *kvm_freed = NULL;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 8757ecf6e96..e7018708cc3 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4978,7 +4978,7 @@ i915_gpu_is_active(struct drm_device *dev)
 }
 
 static int
-i915_gem_shrink(int nr_to_scan, gfp_t gfp_mask)
+i915_gem_shrink(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
 {
 	drm_i915_private_t *dev_priv, *next_dev;
 	struct drm_i915_gem_object *obj_priv, *next_obj;
diff --git a/fs/dcache.c b/fs/dcache.c
index c8c78ba0782..86d4db15473 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -896,7 +896,7 @@ EXPORT_SYMBOL(shrink_dcache_parent);
  *
  * In this case we return -1 to tell the caller that we baled.
  */
-static int shrink_dcache_memory(int nr, gfp_t gfp_mask)
+static int shrink_dcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
 {
 	if (nr) {
 		if (!(gfp_mask & __GFP_FS))
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index dbab3fdc258..0898f3ec821 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1358,7 +1358,7 @@ void gfs2_glock_complete(struct gfs2_glock *gl, int ret)
 }
 
 
-static int gfs2_shrink_glock_memory(int nr, gfp_t gfp_mask)
+static int gfs2_shrink_glock_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
 {
 	struct gfs2_glock *gl;
 	int may_demote;
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index b256d6f2428..8f02d3db8f4 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -77,7 +77,7 @@ static LIST_HEAD(qd_lru_list);
 static atomic_t qd_lru_count = ATOMIC_INIT(0);
 static DEFINE_SPINLOCK(qd_lru_lock);
 
-int gfs2_shrink_qd_memory(int nr, gfp_t gfp_mask)
+int gfs2_shrink_qd_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
 {
 	struct gfs2_quota_data *qd;
 	struct gfs2_sbd *sdp;
diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h
index 195f60c8bd1..e7d236ca48b 100644
--- a/fs/gfs2/quota.h
+++ b/fs/gfs2/quota.h
@@ -51,7 +51,7 @@ static inline int gfs2_quota_lock_check(struct gfs2_inode *ip)
 	return ret;
 }
 
-extern int gfs2_shrink_qd_memory(int nr, gfp_t gfp_mask);
+extern int gfs2_shrink_qd_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask);
 extern const struct quotactl_ops gfs2_quotactl_ops;
 
 #endif /* __QUOTA_DOT_H__ */
diff --git a/fs/inode.c b/fs/inode.c
index 2bee20ae3d6..722860b323a 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -512,7 +512,7 @@ static void prune_icache(int nr_to_scan)
  * This function is passed the number of inodes to scan, and it returns the
  * total number of remaining possibly-reclaimable inodes.
  */
-static int shrink_icache_memory(int nr, gfp_t gfp_mask)
+static int shrink_icache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
 {
 	if (nr) {
 		/*
diff --git a/fs/mbcache.c b/fs/mbcache.c
index ec88ff3d04a..e28f21b9534 100644
--- a/fs/mbcache.c
+++ b/fs/mbcache.c
@@ -115,7 +115,7 @@ mb_cache_indexes(struct mb_cache *cache)
  * What the mbcache registers as to get shrunk dynamically.
  */
 
-static int mb_cache_shrink_fn(int nr_to_scan, gfp_t gfp_mask);
+static int mb_cache_shrink_fn(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask);
 
 static struct shrinker mb_cache_shrinker = {
 	.shrink = mb_cache_shrink_fn,
@@ -191,13 +191,14 @@ forget:
  * This function is called by the kernel memory management when memory
  * gets low.
  *
+ * @shrink: (ignored)
  * @nr_to_scan: Number of objects to scan
  * @gfp_mask: (ignored)
  *
  * Returns the number of objects which are present in the cache.
  */
 static int
-mb_cache_shrink_fn(int nr_to_scan, gfp_t gfp_mask)
+mb_cache_shrink_fn(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
 {
 	LIST_HEAD(free_list);
 	struct list_head *l, *ltmp;
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 782b431ef91..e60416d3f81 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1710,7 +1710,7 @@ static void nfs_access_free_list(struct list_head *head)
 	}
 }
 
-int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask)
+int nfs_access_cache_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
 {
 	LIST_HEAD(head);
 	struct nfs_inode *nfsi;
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index d8bd619e386..e70f44b9b3f 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -205,7 +205,8 @@ extern struct rpc_procinfo nfs4_procedures[];
 void nfs_close_context(struct nfs_open_context *ctx, int is_sync);
 
 /* dir.c */
-extern int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask);
+extern int nfs_access_cache_shrinker(struct shrinker *shrink,
+					int nr_to_scan, gfp_t gfp_mask);
 
 /* inode.c */
 extern struct workqueue_struct *nfsiod_workqueue;
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 12c233da1b6..437d2ca2de9 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -676,7 +676,7 @@ static void prune_dqcache(int count)
  * This is called from kswapd when we think we need some
  * more memory
  */
-static int shrink_dqcache_memory(int nr, gfp_t gfp_mask)
+static int shrink_dqcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
 {
 	if (nr) {
 		spin_lock(&dq_list_lock);
diff --git a/fs/ubifs/shrinker.c b/fs/ubifs/shrinker.c
index 02feb59cefc..0b201114a5a 100644
--- a/fs/ubifs/shrinker.c
+++ b/fs/ubifs/shrinker.c
@@ -277,7 +277,7 @@ static int kick_a_thread(void)
 	return 0;
 }
 
-int ubifs_shrinker(int nr, gfp_t gfp_mask)
+int ubifs_shrinker(struct shrinker *shrink, int nr, gfp_t gfp_mask)
 {
 	int freed, contention = 0;
 	long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt);
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index 2eef553d50c..04310878f44 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -1575,7 +1575,7 @@ int ubifs_tnc_start_commit(struct ubifs_info *c, struct ubifs_zbranch *zroot);
 int ubifs_tnc_end_commit(struct ubifs_info *c);
 
 /* shrinker.c */
-int ubifs_shrinker(int nr_to_scan, gfp_t gfp_mask);
+int ubifs_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask);
 
 /* commit.c */
 int ubifs_bg_thread(void *info);
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 649ade8ef59..2ee3f7a6016 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -45,7 +45,7 @@
 
 static kmem_zone_t *xfs_buf_zone;
 STATIC int xfsbufd(void *);
-STATIC int xfsbufd_wakeup(int, gfp_t);
+STATIC int xfsbufd_wakeup(struct shrinker *, int, gfp_t);
 STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int);
 static struct shrinker xfs_buf_shake = {
 	.shrink = xfsbufd_wakeup,
@@ -340,7 +340,7 @@ _xfs_buf_lookup_pages(
 					__func__, gfp_mask);
 
 			XFS_STATS_INC(xb_page_retries);
-			xfsbufd_wakeup(0, gfp_mask);
+			xfsbufd_wakeup(NULL, 0, gfp_mask);
 			congestion_wait(BLK_RW_ASYNC, HZ/50);
 			goto retry;
 		}
@@ -1762,6 +1762,7 @@ xfs_buf_runall_queues(
 
 STATIC int
 xfsbufd_wakeup(
+	struct shrinker		*shrink,
 	int			priority,
 	gfp_t			mask)
 {
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index ef7f0218bcc..be375827af9 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -838,6 +838,7 @@ static struct rw_semaphore xfs_mount_list_lock;
 
 static int
 xfs_reclaim_inode_shrink(
+	struct shrinker	*shrink,
 	int		nr_to_scan,
 	gfp_t		gfp_mask)
 {
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 8c117ff2e3a..67c018392d6 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -69,7 +69,7 @@ STATIC void	xfs_qm_list_destroy(xfs_dqlist_t *);
 
 STATIC int	xfs_qm_init_quotainos(xfs_mount_t *);
 STATIC int	xfs_qm_init_quotainfo(xfs_mount_t *);
-STATIC int	xfs_qm_shake(int, gfp_t);
+STATIC int	xfs_qm_shake(struct shrinker *, int, gfp_t);
 
 static struct shrinker xfs_qm_shaker = {
 	.shrink = xfs_qm_shake,
@@ -2117,7 +2117,10 @@ xfs_qm_shake_freelist(
  */
 /* ARGSUSED */
 STATIC int
-xfs_qm_shake(int nr_to_scan, gfp_t gfp_mask)
+xfs_qm_shake(
+	struct shrinker	*shrink,
+	int		nr_to_scan,
+	gfp_t		gfp_mask)
 {
 	int	ndqused, nfree, n;
 
diff --git a/include/linux/mm.h b/include/linux/mm.h
index b969efb0378..a2b48041b91 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -999,7 +999,7 @@ static inline void sync_mm_rss(struct task_struct *task, struct mm_struct *mm)
  * querying the cache size, so a fastpath for that case is appropriate.
  */
 struct shrinker {
-	int (*shrink)(int nr_to_scan, gfp_t gfp_mask);
+	int (*shrink)(struct shrinker *, int nr_to_scan, gfp_t gfp_mask);
 	int seeks;	/* seeks to recreate an obj */
 
 	/* These are for internal use */
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 9c7e57cc63a..199fa436c0d 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -213,8 +213,9 @@ unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask,
 	list_for_each_entry(shrinker, &shrinker_list, list) {
 		unsigned long long delta;
 		unsigned long total_scan;
-		unsigned long max_pass = (*shrinker->shrink)(0, gfp_mask);
+		unsigned long max_pass;
 
+		max_pass = (*shrinker->shrink)(shrinker, 0, gfp_mask);
 		delta = (4 * scanned) / shrinker->seeks;
 		delta *= max_pass;
 		do_div(delta, lru_pages + 1);
@@ -242,8 +243,9 @@ unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask,
 			int shrink_ret;
 			int nr_before;
 
-			nr_before = (*shrinker->shrink)(0, gfp_mask);
-			shrink_ret = (*shrinker->shrink)(this_scan, gfp_mask);
+			nr_before = (*shrinker->shrink)(shrinker, 0, gfp_mask);
+			shrink_ret = (*shrinker->shrink)(shrinker, this_scan,
+								gfp_mask);
 			if (shrink_ret == -1)
 				break;
 			if (shrink_ret < nr_before)
-- 
cgit v1.2.3-70-g09d2


From e15bacbebb9dcc95f148f28dfc83a6d5e48b60b8 Mon Sep 17 00:00:00 2001
From: Dan Kruchinin <dkruchinin@acm.org>
Date: Wed, 14 Jul 2010 14:31:57 +0400
Subject: padata: Make two separate cpumasks

The aim of this patch is to make two separate cpumasks
for padata parallel and serial workers respectively.
It allows user to make more thin and sophisticated configurations
of padata framework. For example user may bind parallel and serial workers to non-intersecting
CPU groups to gain better performance. Also each padata instance has notifiers chain for its
cpumasks now. If either parallel or serial or both masks were changed all
interested subsystems will get notification about that. It's especially useful
if padata user uses algorithm for callback CPU selection according to serial cpumask.

Signed-off-by: Dan Kruchinin <dkruchinin@acm.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/pcrypt.c        | 191 ++++++++++++++------
 include/linux/padata.h | 116 ++++++++----
 kernel/padata.c        | 471 ++++++++++++++++++++++++++++++++++++-------------
 3 files changed, 564 insertions(+), 214 deletions(-)

(limited to 'include')

diff --git a/crypto/pcrypt.c b/crypto/pcrypt.c
index 6036b6de907..c9662e25595 100644
--- a/crypto/pcrypt.c
+++ b/crypto/pcrypt.c
@@ -24,12 +24,38 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/slab.h>
+#include <linux/notifier.h>
 #include <crypto/pcrypt.h>
 
-static struct padata_instance *pcrypt_enc_padata;
-static struct padata_instance *pcrypt_dec_padata;
-static struct workqueue_struct *encwq;
-static struct workqueue_struct *decwq;
+struct pcrypt_instance {
+	struct padata_instance *pinst;
+	struct workqueue_struct *wq;
+
+	/*
+	 * Cpumask for callback CPUs. It should be
+	 * equal to serial cpumask of corresponding padata instance,
+	 * so it is updated when padata notifies us about serial
+	 * cpumask change.
+	 *
+	 * cb_cpumask is protected by RCU. This fact prevents us from
+	 * using cpumask_var_t directly because the actual type of
+	 * cpumsak_var_t depends on kernel configuration(particularly on
+	 * CONFIG_CPUMASK_OFFSTACK macro). Depending on the configuration
+	 * cpumask_var_t may be either a pointer to the struct cpumask
+	 * or a variable allocated on the stack. Thus we can not safely use
+	 * cpumask_var_t with RCU operations such as rcu_assign_pointer or
+	 * rcu_dereference. So cpumask_var_t is wrapped with struct
+	 * pcrypt_cpumask which makes possible to use it with RCU.
+	 */
+	struct pcrypt_cpumask {
+		cpumask_var_t mask;
+	} *cb_cpumask;
+	struct notifier_block nblock;
+};
+
+static struct pcrypt_instance pencrypt;
+static struct pcrypt_instance pdecrypt;
+
 
 struct pcrypt_instance_ctx {
 	struct crypto_spawn spawn;
@@ -42,25 +68,29 @@ struct pcrypt_aead_ctx {
 };
 
 static int pcrypt_do_parallel(struct padata_priv *padata, unsigned int *cb_cpu,
-			      struct padata_instance *pinst)
+			      struct pcrypt_instance *pcrypt)
 {
 	unsigned int cpu_index, cpu, i;
+	struct pcrypt_cpumask *cpumask;
 
 	cpu = *cb_cpu;
 
-	if (cpumask_test_cpu(cpu, cpu_active_mask))
+	rcu_read_lock_bh();
+	cpumask = rcu_dereference(pcrypt->cb_cpumask);
+	if (cpumask_test_cpu(cpu, cpumask->mask))
 			goto out;
 
-	cpu_index = cpu % cpumask_weight(cpu_active_mask);
+	cpu_index = cpu % cpumask_weight(cpumask->mask);
 
-	cpu = cpumask_first(cpu_active_mask);
+	cpu = cpumask_first(cpumask->mask);
 	for (i = 0; i < cpu_index; i++)
-		cpu = cpumask_next(cpu, cpu_active_mask);
+		cpu = cpumask_next(cpu, cpumask->mask);
 
 	*cb_cpu = cpu;
 
 out:
-	return padata_do_parallel(pinst, padata, cpu);
+	rcu_read_unlock_bh();
+	return padata_do_parallel(pcrypt->pinst, padata, cpu);
 }
 
 static int pcrypt_aead_setkey(struct crypto_aead *parent,
@@ -142,7 +172,7 @@ static int pcrypt_aead_encrypt(struct aead_request *req)
 			       req->cryptlen, req->iv);
 	aead_request_set_assoc(creq, req->assoc, req->assoclen);
 
-	err = pcrypt_do_parallel(padata, &ctx->cb_cpu, pcrypt_enc_padata);
+	err = pcrypt_do_parallel(padata, &ctx->cb_cpu, &pencrypt);
 	if (!err)
 		return -EINPROGRESS;
 
@@ -184,7 +214,7 @@ static int pcrypt_aead_decrypt(struct aead_request *req)
 			       req->cryptlen, req->iv);
 	aead_request_set_assoc(creq, req->assoc, req->assoclen);
 
-	err = pcrypt_do_parallel(padata, &ctx->cb_cpu, pcrypt_dec_padata);
+	err = pcrypt_do_parallel(padata, &ctx->cb_cpu, &pdecrypt);
 	if (!err)
 		return -EINPROGRESS;
 
@@ -228,7 +258,7 @@ static int pcrypt_aead_givencrypt(struct aead_givcrypt_request *req)
 	aead_givcrypt_set_assoc(creq, areq->assoc, areq->assoclen);
 	aead_givcrypt_set_giv(creq, req->giv, req->seq);
 
-	err = pcrypt_do_parallel(padata, &ctx->cb_cpu, pcrypt_enc_padata);
+	err = pcrypt_do_parallel(padata, &ctx->cb_cpu, &pencrypt);
 	if (!err)
 		return -EINPROGRESS;
 
@@ -370,6 +400,88 @@ static void pcrypt_free(struct crypto_instance *inst)
 	kfree(inst);
 }
 
+static int pcrypt_cpumask_change_notify(struct notifier_block *self,
+					unsigned long val, void *data)
+{
+	struct pcrypt_instance *pcrypt;
+	struct pcrypt_cpumask *new_mask, *old_mask;
+
+	if (!(val & PADATA_CPU_SERIAL))
+		return 0;
+
+	pcrypt = container_of(self, struct pcrypt_instance, nblock);
+	new_mask = kmalloc(sizeof(*new_mask), GFP_KERNEL);
+	if (!new_mask)
+		return -ENOMEM;
+	if (!alloc_cpumask_var(&new_mask->mask, GFP_KERNEL)) {
+		kfree(new_mask);
+		return -ENOMEM;
+	}
+
+	old_mask = pcrypt->cb_cpumask;
+
+	padata_get_cpumask(pcrypt->pinst, PADATA_CPU_SERIAL, new_mask->mask);
+	rcu_assign_pointer(pcrypt->cb_cpumask, new_mask);
+	synchronize_rcu_bh();
+
+	free_cpumask_var(old_mask->mask);
+	kfree(old_mask);
+	return 0;
+}
+
+static int __pcrypt_init_instance(struct pcrypt_instance *pcrypt,
+				  const char *name)
+{
+	int ret = -ENOMEM;
+	struct pcrypt_cpumask *mask;
+
+	pcrypt->wq = create_workqueue(name);
+	if (!pcrypt->wq)
+		goto err;
+
+	pcrypt->pinst = padata_alloc(pcrypt->wq);
+	if (!pcrypt->pinst)
+		goto err_destroy_workqueue;
+
+	mask = kmalloc(sizeof(*mask), GFP_KERNEL);
+	if (!mask)
+		goto err_free_padata;
+	if (!alloc_cpumask_var(&mask->mask, GFP_KERNEL)) {
+		kfree(mask);
+		goto err_free_padata;
+	}
+
+	padata_get_cpumask(pcrypt->pinst, PADATA_CPU_SERIAL, mask->mask);
+	rcu_assign_pointer(pcrypt->cb_cpumask, mask);
+
+	pcrypt->nblock.notifier_call = pcrypt_cpumask_change_notify;
+	ret = padata_register_cpumask_notifier(pcrypt->pinst, &pcrypt->nblock);
+	if (ret)
+		goto err_free_cpumask;
+
+	return ret;
+err_free_cpumask:
+	free_cpumask_var(mask->mask);
+	kfree(mask);
+err_free_padata:
+	padata_free(pcrypt->pinst);
+err_destroy_workqueue:
+	destroy_workqueue(pcrypt->wq);
+err:
+	return ret;
+}
+
+static void __pcrypt_deinit_instance(struct pcrypt_instance *pcrypt)
+{
+	free_cpumask_var(pcrypt->cb_cpumask->mask);
+	kfree(pcrypt->cb_cpumask);
+
+	padata_stop(pcrypt->pinst);
+	padata_unregister_cpumask_notifier(pcrypt->pinst, &pcrypt->nblock);
+	destroy_workqueue(pcrypt->wq);
+	padata_free(pcrypt->pinst);
+}
+
 static struct crypto_template pcrypt_tmpl = {
 	.name = "pcrypt",
 	.alloc = pcrypt_alloc,
@@ -379,60 +491,31 @@ static struct crypto_template pcrypt_tmpl = {
 
 static int __init pcrypt_init(void)
 {
-	int err = -ENOMEM;
-	encwq = create_workqueue("pencrypt");
-	if (!encwq)
-		goto err;
-
-	decwq = create_workqueue("pdecrypt");
-	if (!decwq)
-		goto err_destroy_encwq;
-
-
-	pcrypt_enc_padata = padata_alloc(cpu_possible_mask, encwq);
-	if (!pcrypt_enc_padata)
-		goto err_destroy_decwq;
-
-	pcrypt_dec_padata = padata_alloc(cpu_possible_mask, decwq);
-	if (!pcrypt_dec_padata)
-		goto err_free_enc_padata;
+	int err;
 
-	err = padata_start(pcrypt_enc_padata);
+	err = __pcrypt_init_instance(&pencrypt, "pencrypt");
 	if (err)
-		goto err_free_dec_padata;
+		goto err;
 
-	err = padata_start(pcrypt_dec_padata);
+	err = __pcrypt_init_instance(&pdecrypt, "pdecrypt");
 	if (err)
-		goto err_free_dec_padata;
-
-	return crypto_register_template(&pcrypt_tmpl);
-
-err_free_dec_padata:
-	padata_free(pcrypt_dec_padata);
+		goto err_deinit_pencrypt;
 
-err_free_enc_padata:
-	padata_free(pcrypt_enc_padata);
+	padata_start(pencrypt.pinst);
+	padata_start(pdecrypt.pinst);
 
-err_destroy_decwq:
-	destroy_workqueue(decwq);
-
-err_destroy_encwq:
-	destroy_workqueue(encwq);
+	return crypto_register_template(&pcrypt_tmpl);
 
+err_deinit_pencrypt:
+	__pcrypt_deinit_instance(&pencrypt);
 err:
 	return err;
 }
 
 static void __exit pcrypt_exit(void)
 {
-	padata_stop(pcrypt_enc_padata);
-	padata_stop(pcrypt_dec_padata);
-
-	destroy_workqueue(encwq);
-	destroy_workqueue(decwq);
-
-	padata_free(pcrypt_enc_padata);
-	padata_free(pcrypt_dec_padata);
+	__pcrypt_deinit_instance(&pencrypt);
+	__pcrypt_deinit_instance(&pdecrypt);
 
 	crypto_unregister_template(&pcrypt_tmpl);
 }
diff --git a/include/linux/padata.h b/include/linux/padata.h
index 8844b851191..621e7736690 100644
--- a/include/linux/padata.h
+++ b/include/linux/padata.h
@@ -25,6 +25,10 @@
 #include <linux/spinlock.h>
 #include <linux/list.h>
 #include <linux/timer.h>
+#include <linux/notifier.h>
+
+#define PADATA_CPU_SERIAL   0x01
+#define PADATA_CPU_PARALLEL 0x02
 
 /**
  * struct padata_priv -  Embedded to the users data structure.
@@ -59,7 +63,20 @@ struct padata_list {
 };
 
 /**
- * struct padata_queue - The percpu padata queues.
+* struct padata_serial_queue - The percpu padata serial queue
+*
+* @serial: List to wait for serialization after reordering.
+* @work: work struct for serialization.
+* @pd: Backpointer to the internal control structure.
+*/
+struct padata_serial_queue {
+       struct padata_list    serial;
+       struct work_struct    work;
+       struct parallel_data *pd;
+};
+
+/**
+ * struct padata_parallel_queue - The percpu padata parallel queue
  *
  * @parallel: List to wait for parallelization.
  * @reorder: List to wait for reordering after parallel processing.
@@ -67,44 +84,52 @@ struct padata_list {
  * @pwork: work struct for parallelization.
  * @swork: work struct for serialization.
  * @pd: Backpointer to the internal control structure.
+ * @work: work struct for parallelization.
+ * @num_obj: Number of objects that are processed by this cpu.
  * @cpu_index: Index of the cpu.
  */
-struct padata_queue {
-	struct padata_list	parallel;
-	struct padata_list	reorder;
-	struct padata_list	serial;
-	struct work_struct	pwork;
-	struct work_struct	swork;
-	struct parallel_data    *pd;
-	int			cpu_index;
+struct padata_parallel_queue {
+       struct padata_list    parallel;
+       struct padata_list    reorder;
+       struct parallel_data *pd;
+       struct work_struct    work;
+       atomic_t              num_obj;
+       int                   cpu_index;
 };
 
+
 /**
  * struct parallel_data - Internal control structure, covers everything
  * that depends on the cpumask in use.
  *
  * @pinst: padata instance.
- * @queue: percpu padata queues.
+ * @pqueue: percpu padata queues used for parallelization.
+ * @squeue: percpu padata queues used for serialuzation.
  * @seq_nr: The sequence number that will be attached to the next object.
  * @reorder_objects: Number of objects waiting in the reorder queues.
  * @refcnt: Number of objects holding a reference on this parallel_data.
  * @max_seq_nr:  Maximal used sequence number.
- * @cpumask: cpumask in use.
+ * @cpumask: Contains two cpumasks: pcpu and cbcpu for
+ *           parallel and serial workers respectively.
  * @lock: Reorder lock.
  * @processed: Number of already processed objects.
  * @timer: Reorder timer.
  */
 struct parallel_data {
-	struct padata_instance	*pinst;
-	struct padata_queue	*queue;
-	atomic_t		seq_nr;
-	atomic_t		reorder_objects;
-	atomic_t                refcnt;
-	unsigned int		max_seq_nr;
-	cpumask_var_t		cpumask;
-	spinlock_t              lock ____cacheline_aligned;
-	unsigned int            processed;
-	struct timer_list       timer;
+	struct padata_instance		*pinst;
+	struct padata_parallel_queue	*pqueue;
+	struct padata_serial_queue	*squeue;
+	atomic_t			 seq_nr;
+	atomic_t			 reorder_objects;
+	atomic_t			 refcnt;
+	unsigned int			 max_seq_nr;
+	struct {
+		cpumask_var_t		 pcpu;
+		cpumask_var_t		 cbcpu;
+	} cpumask;
+	spinlock_t                       lock ____cacheline_aligned;
+	unsigned int			 processed;
+	struct timer_list		 timer;
 };
 
 /**
@@ -113,32 +138,51 @@ struct parallel_data {
  * @cpu_notifier: cpu hotplug notifier.
  * @wq: The workqueue in use.
  * @pd: The internal control structure.
- * @cpumask: User supplied cpumask.
+ * @cpumask: User supplied cpumask. Contains two cpumasks: pcpu and
+ *           cbcpu for parallel and serial works respectivly.
+ * @cpumask_change_notifier: Notifiers chain for user-defined notify
+ *            callbacks that will be called when either @pcpu or @cbcpu
+ *             or both cpumasks change.
  * @lock: padata instance lock.
  * @flags: padata flags.
  */
 struct padata_instance {
-	struct notifier_block   cpu_notifier;
-	struct workqueue_struct *wq;
-	struct parallel_data	*pd;
-	cpumask_var_t           cpumask;
-	struct mutex		lock;
-	u8			flags;
-#define	PADATA_INIT		1
-#define	PADATA_RESET		2
-#define	PADATA_INVALID		4
+	struct notifier_block		 cpu_notifier;
+	struct workqueue_struct		*wq;
+	struct parallel_data		*pd;
+	struct {
+		cpumask_var_t		 pcpu;
+		cpumask_var_t		 cbcpu;
+	} cpumask;
+	struct blocking_notifier_head	 cpumask_change_notifier;
+	struct mutex			 lock;
+	u8				 flags;
+#define	PADATA_INIT	1
+#define	PADATA_RESET	2
+#define	PADATA_INVALID	4
 };
 
-extern struct padata_instance *padata_alloc(const struct cpumask *cpumask,
-					    struct workqueue_struct *wq);
+extern struct padata_instance *padata_alloc(struct workqueue_struct *wq);
+extern struct padata_instance *__padata_alloc(struct workqueue_struct *wq,
+					      const struct cpumask *pcpumask,
+					      const struct cpumask *cbcpumask);
 extern void padata_free(struct padata_instance *pinst);
 extern int padata_do_parallel(struct padata_instance *pinst,
 			      struct padata_priv *padata, int cb_cpu);
 extern void padata_do_serial(struct padata_priv *padata);
-extern int padata_set_cpumask(struct padata_instance *pinst,
+extern int padata_get_cpumask(struct padata_instance *pinst,
+			      int cpumask_type, struct cpumask *out_mask);
+extern int padata_set_cpumask(struct padata_instance *pinst, int cpumask_type,
 			      cpumask_var_t cpumask);
-extern int padata_add_cpu(struct padata_instance *pinst, int cpu);
-extern int padata_remove_cpu(struct padata_instance *pinst, int cpu);
+extern int __padata_set_cpumasks(struct padata_instance *pinst,
+				 cpumask_var_t pcpumask,
+				 cpumask_var_t cbcpumask);
+extern int padata_add_cpu(struct padata_instance *pinst, int cpu, int mask);
+extern int padata_remove_cpu(struct padata_instance *pinst, int cpu, int mask);
 extern int padata_start(struct padata_instance *pinst);
 extern void padata_stop(struct padata_instance *pinst);
+extern int padata_register_cpumask_notifier(struct padata_instance *pinst,
+					    struct notifier_block *nblock);
+extern int padata_unregister_cpumask_notifier(struct padata_instance *pinst,
+					      struct notifier_block *nblock);
 #endif
diff --git a/kernel/padata.c b/kernel/padata.c
index 450d67d394b..84d0ca9dac9 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -35,9 +35,9 @@ static int padata_index_to_cpu(struct parallel_data *pd, int cpu_index)
 {
 	int cpu, target_cpu;
 
-	target_cpu = cpumask_first(pd->cpumask);
+	target_cpu = cpumask_first(pd->cpumask.pcpu);
 	for (cpu = 0; cpu < cpu_index; cpu++)
-		target_cpu = cpumask_next(target_cpu, pd->cpumask);
+		target_cpu = cpumask_next(target_cpu, pd->cpumask.pcpu);
 
 	return target_cpu;
 }
@@ -53,26 +53,27 @@ static int padata_cpu_hash(struct padata_priv *padata)
 	 * Hash the sequence numbers to the cpus by taking
 	 * seq_nr mod. number of cpus in use.
 	 */
-	cpu_index =  padata->seq_nr % cpumask_weight(pd->cpumask);
+	cpu_index =  padata->seq_nr % cpumask_weight(pd->cpumask.pcpu);
 
 	return padata_index_to_cpu(pd, cpu_index);
 }
 
-static void padata_parallel_worker(struct work_struct *work)
+static void padata_parallel_worker(struct work_struct *parallel_work)
 {
-	struct padata_queue *queue;
+	struct padata_parallel_queue *pqueue;
 	struct parallel_data *pd;
 	struct padata_instance *pinst;
 	LIST_HEAD(local_list);
 
 	local_bh_disable();
-	queue = container_of(work, struct padata_queue, pwork);
-	pd = queue->pd;
+	pqueue = container_of(parallel_work,
+			      struct padata_parallel_queue, work);
+	pd = pqueue->pd;
 	pinst = pd->pinst;
 
-	spin_lock(&queue->parallel.lock);
-	list_replace_init(&queue->parallel.list, &local_list);
-	spin_unlock(&queue->parallel.lock);
+	spin_lock(&pqueue->parallel.lock);
+	list_replace_init(&pqueue->parallel.list, &local_list);
+	spin_unlock(&pqueue->parallel.lock);
 
 	while (!list_empty(&local_list)) {
 		struct padata_priv *padata;
@@ -94,7 +95,7 @@ static void padata_parallel_worker(struct work_struct *work)
  * @pinst: padata instance
  * @padata: object to be parallelized
  * @cb_cpu: cpu the serialization callback function will run on,
- *          must be in the cpumask of padata.
+ *          must be in the serial cpumask of padata(i.e. cpumask.cbcpu).
  *
  * The parallelization callback function will run with BHs off.
  * Note: Every object which is parallelized by padata_do_parallel
@@ -104,7 +105,7 @@ int padata_do_parallel(struct padata_instance *pinst,
 		       struct padata_priv *padata, int cb_cpu)
 {
 	int target_cpu, err;
-	struct padata_queue *queue;
+	struct padata_parallel_queue *queue;
 	struct parallel_data *pd;
 
 	rcu_read_lock_bh();
@@ -115,7 +116,7 @@ int padata_do_parallel(struct padata_instance *pinst,
 	if (!(pinst->flags & PADATA_INIT))
 		goto out;
 
-	if (!cpumask_test_cpu(cb_cpu, pd->cpumask))
+	if (!cpumask_test_cpu(cb_cpu, pd->cpumask.cbcpu))
 		goto out;
 
 	err =  -EBUSY;
@@ -136,13 +137,13 @@ int padata_do_parallel(struct padata_instance *pinst,
 	padata->seq_nr = atomic_inc_return(&pd->seq_nr);
 
 	target_cpu = padata_cpu_hash(padata);
-	queue = per_cpu_ptr(pd->queue, target_cpu);
+	queue = per_cpu_ptr(pd->pqueue, target_cpu);
 
 	spin_lock(&queue->parallel.lock);
 	list_add_tail(&padata->list, &queue->parallel.list);
 	spin_unlock(&queue->parallel.lock);
 
-	queue_work_on(target_cpu, pinst->wq, &queue->pwork);
+	queue_work_on(target_cpu, pinst->wq, &queue->work);
 
 out:
 	rcu_read_unlock_bh();
@@ -172,11 +173,11 @@ static struct padata_priv *padata_get_next(struct parallel_data *pd)
 {
 	int cpu, num_cpus;
 	int next_nr, next_index;
-	struct padata_queue *queue, *next_queue;
+	struct padata_parallel_queue *queue, *next_queue;
 	struct padata_priv *padata;
 	struct padata_list *reorder;
 
-	num_cpus = cpumask_weight(pd->cpumask);
+	num_cpus = cpumask_weight(pd->cpumask.pcpu);
 
 	/*
 	 * Calculate the percpu reorder queue and the sequence
@@ -185,13 +186,13 @@ static struct padata_priv *padata_get_next(struct parallel_data *pd)
 	next_nr = pd->processed;
 	next_index = next_nr % num_cpus;
 	cpu = padata_index_to_cpu(pd, next_index);
-	next_queue = per_cpu_ptr(pd->queue, cpu);
+	next_queue = per_cpu_ptr(pd->pqueue, cpu);
 
 	if (unlikely(next_nr > pd->max_seq_nr)) {
 		next_nr = next_nr - pd->max_seq_nr - 1;
 		next_index = next_nr % num_cpus;
 		cpu = padata_index_to_cpu(pd, next_index);
-		next_queue = per_cpu_ptr(pd->queue, cpu);
+		next_queue = per_cpu_ptr(pd->pqueue, cpu);
 		pd->processed = 0;
 	}
 
@@ -215,7 +216,7 @@ static struct padata_priv *padata_get_next(struct parallel_data *pd)
 		goto out;
 	}
 
-	queue = per_cpu_ptr(pd->queue, smp_processor_id());
+	queue = per_cpu_ptr(pd->pqueue, smp_processor_id());
 	if (queue->cpu_index == next_queue->cpu_index) {
 		padata = ERR_PTR(-ENODATA);
 		goto out;
@@ -229,7 +230,7 @@ out:
 static void padata_reorder(struct parallel_data *pd)
 {
 	struct padata_priv *padata;
-	struct padata_queue *queue;
+	struct padata_serial_queue *squeue;
 	struct padata_instance *pinst = pd->pinst;
 
 	/*
@@ -268,13 +269,13 @@ static void padata_reorder(struct parallel_data *pd)
 			return;
 		}
 
-		queue = per_cpu_ptr(pd->queue, padata->cb_cpu);
+		squeue = per_cpu_ptr(pd->squeue, padata->cb_cpu);
 
-		spin_lock(&queue->serial.lock);
-		list_add_tail(&padata->list, &queue->serial.list);
-		spin_unlock(&queue->serial.lock);
+		spin_lock(&squeue->serial.lock);
+		list_add_tail(&padata->list, &squeue->serial.list);
+		spin_unlock(&squeue->serial.lock);
 
-		queue_work_on(padata->cb_cpu, pinst->wq, &queue->swork);
+		queue_work_on(padata->cb_cpu, pinst->wq, &squeue->work);
 	}
 
 	spin_unlock_bh(&pd->lock);
@@ -300,19 +301,19 @@ static void padata_reorder_timer(unsigned long arg)
 	padata_reorder(pd);
 }
 
-static void padata_serial_worker(struct work_struct *work)
+static void padata_serial_worker(struct work_struct *serial_work)
 {
-	struct padata_queue *queue;
+	struct padata_serial_queue *squeue;
 	struct parallel_data *pd;
 	LIST_HEAD(local_list);
 
 	local_bh_disable();
-	queue = container_of(work, struct padata_queue, swork);
-	pd = queue->pd;
+	squeue = container_of(serial_work, struct padata_serial_queue, work);
+	pd = squeue->pd;
 
-	spin_lock(&queue->serial.lock);
-	list_replace_init(&queue->serial.list, &local_list);
-	spin_unlock(&queue->serial.lock);
+	spin_lock(&squeue->serial.lock);
+	list_replace_init(&squeue->serial.list, &local_list);
+	spin_unlock(&squeue->serial.lock);
 
 	while (!list_empty(&local_list)) {
 		struct padata_priv *padata;
@@ -339,18 +340,18 @@ static void padata_serial_worker(struct work_struct *work)
 void padata_do_serial(struct padata_priv *padata)
 {
 	int cpu;
-	struct padata_queue *queue;
+	struct padata_parallel_queue *pqueue;
 	struct parallel_data *pd;
 
 	pd = padata->pd;
 
 	cpu = get_cpu();
-	queue = per_cpu_ptr(pd->queue, cpu);
+	pqueue = per_cpu_ptr(pd->pqueue, cpu);
 
-	spin_lock(&queue->reorder.lock);
+	spin_lock(&pqueue->reorder.lock);
 	atomic_inc(&pd->reorder_objects);
-	list_add_tail(&padata->list, &queue->reorder.list);
-	spin_unlock(&queue->reorder.lock);
+	list_add_tail(&padata->list, &pqueue->reorder.list);
+	spin_unlock(&pqueue->reorder.lock);
 
 	put_cpu();
 
@@ -358,51 +359,88 @@ void padata_do_serial(struct padata_priv *padata)
 }
 EXPORT_SYMBOL(padata_do_serial);
 
-/* Allocate and initialize the internal cpumask dependend resources. */
-static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst,
-					     const struct cpumask *cpumask)
+static int padata_setup_cpumasks(struct parallel_data *pd,
+				 const struct cpumask *pcpumask,
+				 const struct cpumask *cbcpumask)
 {
-	int cpu, cpu_index, num_cpus;
-	struct padata_queue *queue;
-	struct parallel_data *pd;
+	if (!alloc_cpumask_var(&pd->cpumask.pcpu, GFP_KERNEL))
+		return -ENOMEM;
 
-	cpu_index = 0;
+	cpumask_and(pd->cpumask.pcpu, pcpumask, cpu_active_mask);
+	if (!alloc_cpumask_var(&pd->cpumask.cbcpu, GFP_KERNEL)) {
+		free_cpumask_var(pd->cpumask.cbcpu);
+		return -ENOMEM;
+	}
 
-	pd = kzalloc(sizeof(struct parallel_data), GFP_KERNEL);
-	if (!pd)
-		goto err;
+	cpumask_and(pd->cpumask.cbcpu, cbcpumask, cpu_active_mask);
+	return 0;
+}
 
-	pd->queue = alloc_percpu(struct padata_queue);
-	if (!pd->queue)
-		goto err_free_pd;
+static void __padata_list_init(struct padata_list *pd_list)
+{
+	INIT_LIST_HEAD(&pd_list->list);
+	spin_lock_init(&pd_list->lock);
+}
 
-	if (!alloc_cpumask_var(&pd->cpumask, GFP_KERNEL))
-		goto err_free_queue;
+/* Initialize all percpu queues used by serial workers */
+static void padata_init_squeues(struct parallel_data *pd)
+{
+	int cpu;
+	struct padata_serial_queue *squeue;
 
-	cpumask_and(pd->cpumask, cpumask, cpu_active_mask);
+	for_each_cpu(cpu, pd->cpumask.cbcpu) {
+		squeue = per_cpu_ptr(pd->squeue, cpu);
+		squeue->pd = pd;
+		__padata_list_init(&squeue->serial);
+		INIT_WORK(&squeue->work, padata_serial_worker);
+	}
+}
 
-	for_each_cpu(cpu, pd->cpumask) {
-		queue = per_cpu_ptr(pd->queue, cpu);
+/* Initialize all percpu queues used by parallel workers */
+static void padata_init_pqueues(struct parallel_data *pd)
+{
+	int cpu_index, num_cpus, cpu;
+	struct padata_parallel_queue *pqueue;
 
-		queue->pd = pd;
+	cpu_index = 0;
+	for_each_cpu(cpu, pd->cpumask.pcpu) {
+		pqueue = per_cpu_ptr(pd->pqueue, cpu);
+		pqueue->pd = pd;
+		pqueue->cpu_index = cpu_index;
+
+		__padata_list_init(&pqueue->reorder);
+		__padata_list_init(&pqueue->parallel);
+		INIT_WORK(&pqueue->work, padata_parallel_worker);
+		atomic_set(&pqueue->num_obj, 0);
+	}
 
-		queue->cpu_index = cpu_index;
-		cpu_index++;
+	num_cpus = cpumask_weight(pd->cpumask.pcpu);
+	pd->max_seq_nr = (MAX_SEQ_NR / num_cpus) * num_cpus - 1;
+}
 
-		INIT_LIST_HEAD(&queue->reorder.list);
-		INIT_LIST_HEAD(&queue->parallel.list);
-		INIT_LIST_HEAD(&queue->serial.list);
-		spin_lock_init(&queue->reorder.lock);
-		spin_lock_init(&queue->parallel.lock);
-		spin_lock_init(&queue->serial.lock);
+/* Allocate and initialize the internal cpumask dependend resources. */
+static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst,
+					     const struct cpumask *pcpumask,
+					     const struct cpumask *cbcpumask)
+{
+	struct parallel_data *pd;
 
-		INIT_WORK(&queue->pwork, padata_parallel_worker);
-		INIT_WORK(&queue->swork, padata_serial_worker);
-	}
+	pd = kzalloc(sizeof(struct parallel_data), GFP_KERNEL);
+	if (!pd)
+		goto err;
 
-	num_cpus = cpumask_weight(pd->cpumask);
-	pd->max_seq_nr = (MAX_SEQ_NR / num_cpus) * num_cpus - 1;
+	pd->pqueue = alloc_percpu(struct padata_parallel_queue);
+	if (!pd->pqueue)
+		goto err_free_pd;
+
+	pd->squeue = alloc_percpu(struct padata_serial_queue);
+	if (!pd->squeue)
+		goto err_free_pqueue;
+	if (padata_setup_cpumasks(pd, pcpumask, cbcpumask) < 0)
+		goto err_free_squeue;
 
+	padata_init_pqueues(pd);
+	padata_init_squeues(pd);
 	setup_timer(&pd->timer, padata_reorder_timer, (unsigned long)pd);
 	atomic_set(&pd->seq_nr, -1);
 	atomic_set(&pd->reorder_objects, 0);
@@ -412,8 +450,10 @@ static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst,
 
 	return pd;
 
-err_free_queue:
-	free_percpu(pd->queue);
+err_free_squeue:
+	free_percpu(pd->squeue);
+err_free_pqueue:
+	free_percpu(pd->pqueue);
 err_free_pd:
 	kfree(pd);
 err:
@@ -422,8 +462,10 @@ err:
 
 static void padata_free_pd(struct parallel_data *pd)
 {
-	free_cpumask_var(pd->cpumask);
-	free_percpu(pd->queue);
+	free_cpumask_var(pd->cpumask.pcpu);
+	free_cpumask_var(pd->cpumask.cbcpu);
+	free_percpu(pd->pqueue);
+	free_percpu(pd->squeue);
 	kfree(pd);
 }
 
@@ -431,11 +473,12 @@ static void padata_free_pd(struct parallel_data *pd)
 static void padata_flush_queues(struct parallel_data *pd)
 {
 	int cpu;
-	struct padata_queue *queue;
+	struct padata_parallel_queue *pqueue;
+	struct padata_serial_queue *squeue;
 
-	for_each_cpu(cpu, pd->cpumask) {
-		queue = per_cpu_ptr(pd->queue, cpu);
-		flush_work(&queue->pwork);
+	for_each_cpu(cpu, pd->cpumask.pcpu) {
+		pqueue = per_cpu_ptr(pd->pqueue, cpu);
+		flush_work(&pqueue->work);
 	}
 
 	del_timer_sync(&pd->timer);
@@ -443,9 +486,9 @@ static void padata_flush_queues(struct parallel_data *pd)
 	if (atomic_read(&pd->reorder_objects))
 		padata_reorder(pd);
 
-	for_each_cpu(cpu, pd->cpumask) {
-		queue = per_cpu_ptr(pd->queue, cpu);
-		flush_work(&queue->swork);
+	for_each_cpu(cpu, pd->cpumask.cbcpu) {
+		squeue = per_cpu_ptr(pd->squeue, cpu);
+		flush_work(&squeue->work);
 	}
 
 	BUG_ON(atomic_read(&pd->refcnt) != 0);
@@ -475,21 +518,63 @@ static void padata_replace(struct padata_instance *pinst,
 			   struct parallel_data *pd_new)
 {
 	struct parallel_data *pd_old = pinst->pd;
+	int notification_mask = 0;
 
 	pinst->flags |= PADATA_RESET;
 
 	rcu_assign_pointer(pinst->pd, pd_new);
 
 	synchronize_rcu();
+	if (!pd_old)
+		goto out;
 
-	if (pd_old) {
-		padata_flush_queues(pd_old);
-		padata_free_pd(pd_old);
-	}
+	padata_flush_queues(pd_old);
+	if (!cpumask_equal(pd_old->cpumask.pcpu, pd_new->cpumask.pcpu))
+		notification_mask |= PADATA_CPU_PARALLEL;
+	if (!cpumask_equal(pd_old->cpumask.cbcpu, pd_new->cpumask.cbcpu))
+		notification_mask |= PADATA_CPU_SERIAL;
+
+	padata_free_pd(pd_old);
+	if (notification_mask)
+		blocking_notifier_call_chain(&pinst->cpumask_change_notifier,
+					     notification_mask, pinst);
 
+out:
 	pinst->flags &= ~PADATA_RESET;
 }
 
+/**
+ * padata_register_cpumask_notifier - Registers a notifier that will be called
+ *                             if either pcpu or cbcpu or both cpumasks change.
+ *
+ * @pinst: A poineter to padata instance
+ * @nblock: A pointer to notifier block.
+ */
+int padata_register_cpumask_notifier(struct padata_instance *pinst,
+				     struct notifier_block *nblock)
+{
+	return blocking_notifier_chain_register(&pinst->cpumask_change_notifier,
+						nblock);
+}
+EXPORT_SYMBOL(padata_register_cpumask_notifier);
+
+/**
+ * padata_unregister_cpumask_notifier - Unregisters cpumask notifier
+ *        registered earlier  using padata_register_cpumask_notifier
+ *
+ * @pinst: A pointer to data instance.
+ * @nlock: A pointer to notifier block.
+ */
+int padata_unregister_cpumask_notifier(struct padata_instance *pinst,
+				       struct notifier_block *nblock)
+{
+	return blocking_notifier_chain_unregister(
+		&pinst->cpumask_change_notifier,
+		nblock);
+}
+EXPORT_SYMBOL(padata_unregister_cpumask_notifier);
+
+
 /* If cpumask contains no active cpu, we mark the instance as invalid. */
 static bool padata_validate_cpumask(struct padata_instance *pinst,
 				    const struct cpumask *cpumask)
@@ -504,13 +589,82 @@ static bool padata_validate_cpumask(struct padata_instance *pinst,
 }
 
 /**
- * padata_set_cpumask - set the cpumask that padata should use
+ * padata_get_cpumask: Fetch serial or parallel cpumask from the
+ *                     given padata instance and copy it to @out_mask
+ *
+ * @pinst: A pointer to padata instance
+ * @cpumask_type: Specifies which cpumask will be copied.
+ *                Possible values are PADATA_CPU_SERIAL *or* PADATA_CPU_PARALLEL
+ *                corresponding to serial and parallel cpumask respectively.
+ * @out_mask: A pointer to cpumask structure where selected
+ *            cpumask will be copied.
+ */
+int padata_get_cpumask(struct padata_instance *pinst,
+		       int cpumask_type, struct cpumask *out_mask)
+{
+	struct parallel_data *pd;
+	int ret = 0;
+
+	rcu_read_lock_bh();
+	pd = rcu_dereference(pinst->pd);
+	switch (cpumask_type) {
+	case PADATA_CPU_SERIAL:
+		cpumask_copy(out_mask, pd->cpumask.cbcpu);
+		break;
+	case PADATA_CPU_PARALLEL:
+		cpumask_copy(out_mask, pd->cpumask.pcpu);
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+	rcu_read_unlock_bh();
+	return ret;
+}
+EXPORT_SYMBOL(padata_get_cpumask);
+
+/**
+ * padata_set_cpumask: Sets specified by @cpumask_type cpumask to the value
+ *                     equivalent to @cpumask.
  *
  * @pinst: padata instance
+ * @cpumask_type: PADATA_CPU_SERIAL or PADATA_CPU_PARALLEL corresponding
+ *                to parallel and serial cpumasks respectively.
  * @cpumask: the cpumask to use
  */
-int padata_set_cpumask(struct padata_instance *pinst,
-			cpumask_var_t cpumask)
+int padata_set_cpumask(struct padata_instance *pinst, int cpumask_type,
+		       cpumask_var_t cpumask)
+{
+	struct cpumask *serial_mask, *parallel_mask;
+
+	switch (cpumask_type) {
+	case PADATA_CPU_PARALLEL:
+		serial_mask = pinst->cpumask.cbcpu;
+		parallel_mask = cpumask;
+		break;
+	case PADATA_CPU_SERIAL:
+		parallel_mask = pinst->cpumask.pcpu;
+		serial_mask = cpumask;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return __padata_set_cpumasks(pinst, parallel_mask, serial_mask);
+}
+EXPORT_SYMBOL(padata_set_cpumask);
+
+/**
+ * __padata_set_cpumasks - Set both parallel and serial cpumasks. The first
+ *                         one is used by parallel workers and the second one
+ *                         by the wokers doing serialization.
+ *
+ * @pinst: padata instance
+ * @pcpumask: the cpumask to use for parallel workers
+ * @cbcpumask: the cpumsak to use for serial workers
+ */
+int __padata_set_cpumasks(struct padata_instance *pinst,
+			  cpumask_var_t pcpumask, cpumask_var_t cbcpumask)
 {
 	int valid;
 	int err = 0;
@@ -518,7 +672,13 @@ int padata_set_cpumask(struct padata_instance *pinst,
 
 	mutex_lock(&pinst->lock);
 
-	valid = padata_validate_cpumask(pinst, cpumask);
+	valid = padata_validate_cpumask(pinst, pcpumask);
+	if (!valid) {
+		__padata_stop(pinst);
+		goto out_replace;
+	}
+
+	valid = padata_validate_cpumask(pinst, cbcpumask);
 	if (!valid) {
 		__padata_stop(pinst);
 		goto out_replace;
@@ -526,14 +686,15 @@ int padata_set_cpumask(struct padata_instance *pinst,
 
 	get_online_cpus();
 
-	pd = padata_alloc_pd(pinst, cpumask);
+	pd = padata_alloc_pd(pinst, pcpumask, cbcpumask);
 	if (!pd) {
 		err = -ENOMEM;
 		goto out;
 	}
 
 out_replace:
-	cpumask_copy(pinst->cpumask, cpumask);
+	cpumask_copy(pinst->cpumask.pcpu, pcpumask);
+	cpumask_copy(pinst->cpumask.cbcpu, cbcpumask);
 
 	padata_replace(pinst, pd);
 
@@ -546,41 +707,57 @@ out:
 	mutex_unlock(&pinst->lock);
 
 	return err;
+
 }
-EXPORT_SYMBOL(padata_set_cpumask);
+EXPORT_SYMBOL(__padata_set_cpumasks);
 
 static int __padata_add_cpu(struct padata_instance *pinst, int cpu)
 {
 	struct parallel_data *pd;
 
 	if (cpumask_test_cpu(cpu, cpu_active_mask)) {
-		pd = padata_alloc_pd(pinst, pinst->cpumask);
+		pd = padata_alloc_pd(pinst, pinst->cpumask.pcpu,
+				     pinst->cpumask.cbcpu);
 		if (!pd)
 			return -ENOMEM;
 
 		padata_replace(pinst, pd);
 
-		if (padata_validate_cpumask(pinst, pinst->cpumask))
+		if (padata_validate_cpumask(pinst, pinst->cpumask.pcpu) &&
+		    padata_validate_cpumask(pinst, pinst->cpumask.cbcpu))
 			__padata_start(pinst);
 	}
 
 	return 0;
 }
 
-/**
- * padata_add_cpu - add a cpu to the padata cpumask
+ /**
+ * padata_add_cpu - add a cpu to one or both(parallel and serial)
+ *                  padata cpumasks.
  *
  * @pinst: padata instance
  * @cpu: cpu to add
+ * @mask: bitmask of flags specifying to which cpumask @cpu shuld be added.
+ *        The @mask may be any combination of the following flags:
+ *          PADATA_CPU_SERIAL   - serial cpumask
+ *          PADATA_CPU_PARALLEL - parallel cpumask
  */
-int padata_add_cpu(struct padata_instance *pinst, int cpu)
+
+int padata_add_cpu(struct padata_instance *pinst, int cpu, int mask)
 {
 	int err;
 
+	if (!(mask & (PADATA_CPU_SERIAL | PADATA_CPU_PARALLEL)))
+		return -EINVAL;
+
 	mutex_lock(&pinst->lock);
 
 	get_online_cpus();
-	cpumask_set_cpu(cpu, pinst->cpumask);
+	if (mask & PADATA_CPU_SERIAL)
+		cpumask_set_cpu(cpu, pinst->cpumask.cbcpu);
+	if (mask & PADATA_CPU_PARALLEL)
+		cpumask_set_cpu(cpu, pinst->cpumask.pcpu);
+
 	err = __padata_add_cpu(pinst, cpu);
 	put_online_cpus();
 
@@ -596,13 +773,15 @@ static int __padata_remove_cpu(struct padata_instance *pinst, int cpu)
 
 	if (cpumask_test_cpu(cpu, cpu_online_mask)) {
 
-		if (!padata_validate_cpumask(pinst, pinst->cpumask)) {
+		if (!padata_validate_cpumask(pinst, pinst->cpumask.pcpu) ||
+		    !padata_validate_cpumask(pinst, pinst->cpumask.cbcpu)) {
 			__padata_stop(pinst);
 			padata_replace(pinst, pd);
 			goto out;
 		}
 
-		pd = padata_alloc_pd(pinst, pinst->cpumask);
+		pd = padata_alloc_pd(pinst, pinst->cpumask.pcpu,
+				     pinst->cpumask.cbcpu);
 		if (!pd)
 			return -ENOMEM;
 
@@ -613,20 +792,32 @@ out:
 	return 0;
 }
 
-/**
- * padata_remove_cpu - remove a cpu from the padata cpumask
+ /**
+ * padata_remove_cpu - remove a cpu from the one or both(serial and paralell)
+ *                     padata cpumasks.
  *
  * @pinst: padata instance
  * @cpu: cpu to remove
+ * @mask: bitmask specifying from which cpumask @cpu should be removed
+ *        The @mask may be any combination of the following flags:
+ *          PADATA_CPU_SERIAL   - serial cpumask
+ *          PADATA_CPU_PARALLEL - parallel cpumask
  */
-int padata_remove_cpu(struct padata_instance *pinst, int cpu)
+int padata_remove_cpu(struct padata_instance *pinst, int cpu, int mask)
 {
 	int err;
 
+	if (!(mask & (PADATA_CPU_SERIAL | PADATA_CPU_PARALLEL)))
+		return -EINVAL;
+
 	mutex_lock(&pinst->lock);
 
 	get_online_cpus();
-	cpumask_clear_cpu(cpu, pinst->cpumask);
+	if (mask & PADATA_CPU_SERIAL)
+		cpumask_clear_cpu(cpu, pinst->cpumask.cbcpu);
+	if (mask & PADATA_CPU_PARALLEL)
+		cpumask_clear_cpu(cpu, pinst->cpumask.pcpu);
+
 	err = __padata_remove_cpu(pinst, cpu);
 	put_online_cpus();
 
@@ -672,6 +863,14 @@ void padata_stop(struct padata_instance *pinst)
 EXPORT_SYMBOL(padata_stop);
 
 #ifdef CONFIG_HOTPLUG_CPU
+
+static inline int pinst_has_cpu(struct padata_instance *pinst, int cpu)
+{
+	return cpumask_test_cpu(cpu, pinst->cpumask.pcpu) ||
+		cpumask_test_cpu(cpu, pinst->cpumask.cbcpu);
+}
+
+
 static int padata_cpu_callback(struct notifier_block *nfb,
 			       unsigned long action, void *hcpu)
 {
@@ -684,7 +883,7 @@ static int padata_cpu_callback(struct notifier_block *nfb,
 	switch (action) {
 	case CPU_ONLINE:
 	case CPU_ONLINE_FROZEN:
-		if (!cpumask_test_cpu(cpu, pinst->cpumask))
+		if (!pinst_has_cpu(pinst, cpu))
 			break;
 		mutex_lock(&pinst->lock);
 		err = __padata_add_cpu(pinst, cpu);
@@ -695,7 +894,7 @@ static int padata_cpu_callback(struct notifier_block *nfb,
 
 	case CPU_DOWN_PREPARE:
 	case CPU_DOWN_PREPARE_FROZEN:
-		if (!cpumask_test_cpu(cpu, pinst->cpumask))
+		if (!pinst_has_cpu(pinst, cpu))
 			break;
 		mutex_lock(&pinst->lock);
 		err = __padata_remove_cpu(pinst, cpu);
@@ -706,7 +905,7 @@ static int padata_cpu_callback(struct notifier_block *nfb,
 
 	case CPU_UP_CANCELED:
 	case CPU_UP_CANCELED_FROZEN:
-		if (!cpumask_test_cpu(cpu, pinst->cpumask))
+		if (!pinst_has_cpu(pinst, cpu))
 			break;
 		mutex_lock(&pinst->lock);
 		__padata_remove_cpu(pinst, cpu);
@@ -714,7 +913,7 @@ static int padata_cpu_callback(struct notifier_block *nfb,
 
 	case CPU_DOWN_FAILED:
 	case CPU_DOWN_FAILED_FROZEN:
-		if (!cpumask_test_cpu(cpu, pinst->cpumask))
+		if (!pinst_has_cpu(pinst, cpu))
 			break;
 		mutex_lock(&pinst->lock);
 		__padata_add_cpu(pinst, cpu);
@@ -726,13 +925,29 @@ static int padata_cpu_callback(struct notifier_block *nfb,
 #endif
 
 /**
- * padata_alloc - allocate and initialize a padata instance
+ * padata_alloc - Allocate and initialize padata instance.
+ *                Use default cpumask(cpu_possible_mask)
+ *                for serial and parallel workes.
+ *
+ * @wq: workqueue to use for the allocated padata instance
+ */
+struct padata_instance *padata_alloc(struct workqueue_struct *wq)
+{
+	return __padata_alloc(wq, cpu_possible_mask, cpu_possible_mask);
+}
+EXPORT_SYMBOL(padata_alloc);
+
+/**
+ * __padata_alloc - allocate and initialize a padata instance
+ *                  and specify cpumasks for serial and parallel workers.
  *
- * @cpumask: cpumask that padata uses for parallelization
  * @wq: workqueue to use for the allocated padata instance
+ * @pcpumask: cpumask that will be used for padata parallelization
+ * @cbcpumask: cpumask that will be used for padata serialization
  */
-struct padata_instance *padata_alloc(const struct cpumask *cpumask,
-				     struct workqueue_struct *wq)
+struct padata_instance *__padata_alloc(struct workqueue_struct *wq,
+				       const struct cpumask *pcpumask,
+				       const struct cpumask *cbcpumask)
 {
 	struct padata_instance *pinst;
 	struct parallel_data *pd = NULL;
@@ -742,21 +957,26 @@ struct padata_instance *padata_alloc(const struct cpumask *cpumask,
 		goto err;
 
 	get_online_cpus();
-
-	if (!alloc_cpumask_var(&pinst->cpumask, GFP_KERNEL))
+	if (!alloc_cpumask_var(&pinst->cpumask.pcpu, GFP_KERNEL))
+		goto err_free_inst;
+	if (!alloc_cpumask_var(&pinst->cpumask.cbcpu, GFP_KERNEL)) {
+		free_cpumask_var(pinst->cpumask.pcpu);
 		goto err_free_inst;
-
-	if (padata_validate_cpumask(pinst, cpumask)) {
-		pd = padata_alloc_pd(pinst, cpumask);
-		if (!pd)
-			goto err_free_mask;
 	}
+	if (!padata_validate_cpumask(pinst, pcpumask) ||
+	    !padata_validate_cpumask(pinst, cbcpumask))
+		goto err_free_masks;
+
+	pd = padata_alloc_pd(pinst, pcpumask, cbcpumask);
+	if (!pd)
+		goto err_free_masks;
 
 	rcu_assign_pointer(pinst->pd, pd);
 
 	pinst->wq = wq;
 
-	cpumask_copy(pinst->cpumask, cpumask);
+	cpumask_copy(pinst->cpumask.pcpu, pcpumask);
+	cpumask_copy(pinst->cpumask.cbcpu, cbcpumask);
 
 	pinst->flags = 0;
 
@@ -768,19 +988,21 @@ struct padata_instance *padata_alloc(const struct cpumask *cpumask,
 
 	put_online_cpus();
 
+	BLOCKING_INIT_NOTIFIER_HEAD(&pinst->cpumask_change_notifier);
 	mutex_init(&pinst->lock);
 
 	return pinst;
 
-err_free_mask:
-	free_cpumask_var(pinst->cpumask);
+err_free_masks:
+	free_cpumask_var(pinst->cpumask.pcpu);
+	free_cpumask_var(pinst->cpumask.cbcpu);
 err_free_inst:
 	kfree(pinst);
 	put_online_cpus();
 err:
 	return NULL;
 }
-EXPORT_SYMBOL(padata_alloc);
+EXPORT_SYMBOL(__padata_alloc);
 
 /**
  * padata_free - free a padata instance
@@ -795,7 +1017,8 @@ void padata_free(struct padata_instance *pinst)
 
 	padata_stop(pinst);
 	padata_free_pd(pinst->pd);
-	free_cpumask_var(pinst->cpumask);
+	free_cpumask_var(pinst->cpumask.pcpu);
+	free_cpumask_var(pinst->cpumask.cbcpu);
 	kfree(pinst);
 }
 EXPORT_SYMBOL(padata_free);
-- 
cgit v1.2.3-70-g09d2


From 5e017dc3f8bc9e4a28983666e6bc00114a2018bb Mon Sep 17 00:00:00 2001
From: Dan Kruchinin <dkruchinin@acm.org>
Date: Wed, 14 Jul 2010 14:33:08 +0400
Subject: padata: Added sysfs primitives to padata subsystem

Added sysfs primitives to padata subsystem. Now API user may
embedded kobject each padata instance contains into any sysfs
hierarchy. For now padata sysfs interface provides only
two objects:
    serial_cpumask   [RW] - cpumask for serial workers
    parallel_cpumask [RW] - cpumask for parallel workers

Signed-off-by: Dan Kruchinin <dkruchinin@acm.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/padata.h |   5 +-
 kernel/padata.c        | 155 ++++++++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 150 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/padata.h b/include/linux/padata.h
index 621e7736690..293ad46ffce 100644
--- a/include/linux/padata.h
+++ b/include/linux/padata.h
@@ -26,6 +26,7 @@
 #include <linux/list.h>
 #include <linux/timer.h>
 #include <linux/notifier.h>
+#include <linux/kobject.h>
 
 #define PADATA_CPU_SERIAL   0x01
 #define PADATA_CPU_PARALLEL 0x02
@@ -142,7 +143,8 @@ struct parallel_data {
  *           cbcpu for parallel and serial works respectivly.
  * @cpumask_change_notifier: Notifiers chain for user-defined notify
  *            callbacks that will be called when either @pcpu or @cbcpu
- *             or both cpumasks change.
+ *            or both cpumasks change.
+ * @kobj: padata instance kernel object.
  * @lock: padata instance lock.
  * @flags: padata flags.
  */
@@ -155,6 +157,7 @@ struct padata_instance {
 		cpumask_var_t		 cbcpu;
 	} cpumask;
 	struct blocking_notifier_head	 cpumask_change_notifier;
+	struct kobject                   kobj;
 	struct mutex			 lock;
 	u8				 flags;
 #define	PADATA_INIT	1
diff --git a/kernel/padata.c b/kernel/padata.c
index 84d0ca9dac9..526f9ea2fcc 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -26,6 +26,7 @@
 #include <linux/mutex.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
+#include <linux/sysfs.h>
 #include <linux/rcupdate.h>
 
 #define MAX_SEQ_NR (INT_MAX - NR_CPUS)
@@ -924,6 +925,149 @@ static int padata_cpu_callback(struct notifier_block *nfb,
 }
 #endif
 
+static void __padata_free(struct padata_instance *pinst)
+{
+#ifdef CONFIG_HOTPLUG_CPU
+	unregister_hotcpu_notifier(&pinst->cpu_notifier);
+#endif
+
+	padata_stop(pinst);
+	padata_free_pd(pinst->pd);
+	free_cpumask_var(pinst->cpumask.pcpu);
+	free_cpumask_var(pinst->cpumask.cbcpu);
+	kfree(pinst);
+}
+
+#define kobj2pinst(_kobj)					\
+	container_of(_kobj, struct padata_instance, kobj)
+#define attr2pentry(_attr)					\
+	container_of(_attr, struct padata_sysfs_entry, attr)
+
+static void padata_sysfs_release(struct kobject *kobj)
+{
+	struct padata_instance *pinst = kobj2pinst(kobj);
+	__padata_free(pinst);
+}
+
+struct padata_sysfs_entry {
+	struct attribute attr;
+	ssize_t (*show)(struct padata_instance *, struct attribute *, char *);
+	ssize_t (*store)(struct padata_instance *, struct attribute *,
+			 const char *, size_t);
+};
+
+static ssize_t show_cpumask(struct padata_instance *pinst,
+			    struct attribute *attr,  char *buf)
+{
+	struct cpumask *cpumask;
+	ssize_t len;
+
+	mutex_lock(&pinst->lock);
+	if (!strcmp(attr->name, "serial_cpumask"))
+		cpumask = pinst->cpumask.cbcpu;
+	else
+		cpumask = pinst->cpumask.pcpu;
+
+	len = bitmap_scnprintf(buf, PAGE_SIZE, cpumask_bits(cpumask),
+			       nr_cpu_ids);
+	if (PAGE_SIZE - len < 2)
+		len = -EINVAL;
+	else
+		len += sprintf(buf + len, "\n");
+
+	mutex_unlock(&pinst->lock);
+	return len;
+}
+
+static ssize_t store_cpumask(struct padata_instance *pinst,
+			     struct attribute *attr,
+			     const char *buf, size_t count)
+{
+	cpumask_var_t new_cpumask;
+	ssize_t ret;
+	int mask_type;
+
+	if (!alloc_cpumask_var(&new_cpumask, GFP_KERNEL))
+		return -ENOMEM;
+
+	ret = bitmap_parse(buf, count, cpumask_bits(new_cpumask),
+			   nr_cpumask_bits);
+	if (ret < 0)
+		goto out;
+
+	mask_type = !strcmp(attr->name, "serial_cpumask") ?
+		PADATA_CPU_SERIAL : PADATA_CPU_PARALLEL;
+	ret = padata_set_cpumask(pinst, mask_type, new_cpumask);
+	if (!ret)
+		ret = count;
+
+out:
+	free_cpumask_var(new_cpumask);
+	return ret;
+}
+
+#define PADATA_ATTR_RW(_name, _show_name, _store_name)		\
+	static struct padata_sysfs_entry _name##_attr =		\
+		__ATTR(_name, 0644, _show_name, _store_name)
+#define PADATA_ATTR_RO(_name, _show_name)		\
+	static struct padata_sysfs_entry _name##_attr = \
+		__ATTR(_name, 0400, _show_name, NULL)
+
+PADATA_ATTR_RW(serial_cpumask, show_cpumask, store_cpumask);
+PADATA_ATTR_RW(parallel_cpumask, show_cpumask, store_cpumask);
+
+/*
+ * Padata sysfs provides the following objects:
+ * serial_cpumask   [RW] - cpumask for serial workers
+ * parallel_cpumask [RW] - cpumask for parallel workers
+ */
+static struct attribute *padata_default_attrs[] = {
+	&serial_cpumask_attr.attr,
+	&parallel_cpumask_attr.attr,
+	NULL,
+};
+
+static ssize_t padata_sysfs_show(struct kobject *kobj,
+				 struct attribute *attr, char *buf)
+{
+	struct padata_instance *pinst;
+	struct padata_sysfs_entry *pentry;
+	ssize_t ret = -EIO;
+
+	pinst = kobj2pinst(kobj);
+	pentry = attr2pentry(attr);
+	if (pentry->show)
+		ret = pentry->show(pinst, attr, buf);
+
+	return ret;
+}
+
+static ssize_t padata_sysfs_store(struct kobject *kobj, struct attribute *attr,
+				  const char *buf, size_t count)
+{
+	struct padata_instance *pinst;
+	struct padata_sysfs_entry *pentry;
+	ssize_t ret = -EIO;
+
+	pinst = kobj2pinst(kobj);
+	pentry = attr2pentry(attr);
+	if (pentry->show)
+		ret = pentry->store(pinst, attr, buf, count);
+
+	return ret;
+}
+
+static const struct sysfs_ops padata_sysfs_ops = {
+	.show = padata_sysfs_show,
+	.store = padata_sysfs_store,
+};
+
+static struct kobj_type padata_attr_type = {
+	.sysfs_ops = &padata_sysfs_ops,
+	.default_attrs = padata_default_attrs,
+	.release = padata_sysfs_release,
+};
+
 /**
  * padata_alloc - Allocate and initialize padata instance.
  *                Use default cpumask(cpu_possible_mask)
@@ -989,6 +1133,7 @@ struct padata_instance *__padata_alloc(struct workqueue_struct *wq,
 	put_online_cpus();
 
 	BLOCKING_INIT_NOTIFIER_HEAD(&pinst->cpumask_change_notifier);
+	kobject_init(&pinst->kobj, &padata_attr_type);
 	mutex_init(&pinst->lock);
 
 	return pinst;
@@ -1011,14 +1156,6 @@ EXPORT_SYMBOL(__padata_alloc);
  */
 void padata_free(struct padata_instance *pinst)
 {
-#ifdef CONFIG_HOTPLUG_CPU
-	unregister_hotcpu_notifier(&pinst->cpu_notifier);
-#endif
-
-	padata_stop(pinst);
-	padata_free_pd(pinst->pd);
-	free_cpumask_var(pinst->cpumask.pcpu);
-	free_cpumask_var(pinst->cpumask.cbcpu);
-	kfree(pinst);
+	kobject_put(&pinst->kobj);
 }
 EXPORT_SYMBOL(padata_free);
-- 
cgit v1.2.3-70-g09d2


From 7a2e3659b6ffbee4e742cd6f6a60359ad9148720 Mon Sep 17 00:00:00 2001
From: Davidlohr Bueso <dave.bueso@gmail.com>
Date: Tue, 13 Jul 2010 05:53:44 -0400
Subject: reiserfs: typo comment fix

Fix trivial typo in code comment (change adn for and), also change comment
style for proper coding style.

Signed-off-by: Davidlohr Bueso <dave@gnu.org>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 include/linux/reiserfs_fs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h
index 3b603f47418..ba394163dea 100644
--- a/include/linux/reiserfs_fs.h
+++ b/include/linux/reiserfs_fs.h
@@ -360,7 +360,7 @@ int is_reiserfs_jr(struct reiserfs_super_block *rs);
 /* the spot for the super in versions 3.5 - 3.5.10 (inclusive) */
 #define REISERFS_OLD_DISK_OFFSET_IN_BYTES (8 * 1024)
 
-// reiserfs internal error code (used by search_by_key adn fix_nodes))
+/* reiserfs internal error code (used by search_by_key and fix_nodes)) */
 #define CARRY_ON      0
 #define REPEAT_SEARCH -1
 #define IO_ERROR      -2
-- 
cgit v1.2.3-70-g09d2


From 3a343ee4509c982552b35fbc99d3213f3bb1acde Mon Sep 17 00:00:00 2001
From: Daniel Mack <daniel@caiaq.de>
Date: Mon, 12 Jul 2010 19:28:27 +0200
Subject: HID: add HID_QUIRK_HIDINPUT_FORCE

For devices with exotic HID report descriptors, it might be necessary to
make the HID core force the registration of an input device. Make that
possible by introducing a new quirk type.

Signed-off-by: Daniel Mack <daniel@caiaq.de>
Cc: Jiri Kosina <jkosina@suse.cz>
Cc: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-core.c | 2 ++
 include/linux/hid.h    | 1 +
 2 files changed, 3 insertions(+)

(limited to 'include')

diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index 866e54ec5fb..7ccee899b59 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -1157,6 +1157,8 @@ int hid_connect(struct hid_device *hdev, unsigned int connect_mask)
 
 	if (hdev->quirks & HID_QUIRK_HIDDEV_FORCE)
 		connect_mask |= (HID_CONNECT_HIDDEV_FORCE | HID_CONNECT_HIDDEV);
+	if (hdev->quirks & HID_QUIRK_HIDINPUT_FORCE)
+		connect_mask |= HID_CONNECT_HIDINPUT_FORCE;
 	if (hdev->bus != BUS_USB)
 		connect_mask &= ~HID_CONNECT_HIDDEV;
 	if (hid_hiddev(hdev))
diff --git a/include/linux/hid.h b/include/linux/hid.h
index 895001f7f4b..42a0f1d1136 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -311,6 +311,7 @@ struct hid_item {
 #define HID_QUIRK_HIDDEV_FORCE			0x00000010
 #define HID_QUIRK_BADPAD			0x00000020
 #define HID_QUIRK_MULTI_INPUT			0x00000040
+#define HID_QUIRK_HIDINPUT_FORCE		0x00000080
 #define HID_QUIRK_SKIP_OUTPUT_REPORTS		0x00010000
 #define HID_QUIRK_FULLSPEED_INTERVAL		0x10000000
 #define HID_QUIRK_NO_INIT_REPORTS		0x20000000
-- 
cgit v1.2.3-70-g09d2


From 323f99cbc35c52a65dea9d072b3ecf1e662240d2 Mon Sep 17 00:00:00 2001
From: Tom Lyon <pugs@cisco.com>
Date: Fri, 2 Jul 2010 16:56:14 -0400
Subject: iommu-api: Extension to check for interrupt remapping

This patch allows IOMMU users to determine whether the
hardware and software support safe, isolated interrupt
remapping.  Not all Intel IOMMUs have the hardware, and the
software for AMD is not there yet.

Signed-off-by: Tom Lyon <pugs@cisco.com>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 drivers/pci/intel-iommu.c | 2 ++
 include/linux/iommu.h     | 1 +
 2 files changed, 3 insertions(+)

(limited to 'include')

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index c9171be7456..6a5af18faf6 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -3698,6 +3698,8 @@ static int intel_iommu_domain_has_cap(struct iommu_domain *domain,
 
 	if (cap == IOMMU_CAP_CACHE_COHERENCY)
 		return dmar_domain->iommu_snooping;
+	if (cap == IOMMU_CAP_INTR_REMAP)
+		return intr_remapping_enabled;
 
 	return 0;
 }
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index be22ad83689..0a2ba409899 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -30,6 +30,7 @@ struct iommu_domain {
 };
 
 #define IOMMU_CAP_CACHE_COHERENCY	0x1
+#define IOMMU_CAP_INTR_REMAP		0x2	/* isolates device intrs */
 
 struct iommu_ops {
 	int (*domain_init)(struct iommu_domain *domain);
-- 
cgit v1.2.3-70-g09d2


From bd27290a593f80cb99e95287cb29c72c0d57608b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 19 Jul 2010 09:35:40 -0700
Subject: net: 64bit stats for netdev_queue

Since struct netdev_queue tx_bytes/tx_packets/tx_dropped are already
protected by _xmit_lock, its easy to convert these fields to u64 instead
of unsigned long.
This completes 64bit stats for devices using them (vlan, macvlan, ...)

Strictly, we could avoid the locking in dev_txq_stats_fold() on 64bit
arches, but its slow path and we prefer keep it simple.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 6 +++---
 net/core/dev.c            | 4 +++-
 2 files changed, 6 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index fdc3f299223..b6262898ece 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -501,9 +501,9 @@ struct netdev_queue {
 	 * please use this field instead of dev->trans_start
 	 */
 	unsigned long		trans_start;
-	unsigned long		tx_bytes;
-	unsigned long		tx_packets;
-	unsigned long		tx_dropped;
+	u64			tx_bytes;
+	u64			tx_packets;
+	u64			tx_dropped;
 } ____cacheline_aligned_in_smp;
 
 #ifdef CONFIG_RPS
diff --git a/net/core/dev.c b/net/core/dev.c
index 1c002c7ef5d..9de75cdade5 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5282,15 +5282,17 @@ void netdev_run_todo(void)
 void dev_txq_stats_fold(const struct net_device *dev,
 			struct rtnl_link_stats64 *stats)
 {
-	unsigned long tx_bytes = 0, tx_packets = 0, tx_dropped = 0;
+	u64 tx_bytes = 0, tx_packets = 0, tx_dropped = 0;
 	unsigned int i;
 	struct netdev_queue *txq;
 
 	for (i = 0; i < dev->num_tx_queues; i++) {
 		txq = netdev_get_tx_queue(dev, i);
+		spin_lock_bh(&txq->_xmit_lock);
 		tx_bytes   += txq->tx_bytes;
 		tx_packets += txq->tx_packets;
 		tx_dropped += txq->tx_dropped;
+		spin_unlock_bh(&txq->_xmit_lock);
 	}
 	if (tx_bytes || tx_packets || tx_dropped) {
 		stats->tx_bytes   = tx_bytes;
-- 
cgit v1.2.3-70-g09d2


From e7c38157c61649e66f853d7b9f109119b8361448 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 19 Jul 2010 22:01:26 -0700
Subject: ipv6: Make IP6CB(skb)->nhoff 16-bit.

Even with jumbograms I cannot see any way in which we would need
to records a larger than 65535 valued next-header offset.

The maximum extension header length is (256 << 3) == 2048.
There are only a handful of extension headers specified which
we'd even accept (say 5 or 6), therefore the largest next-header
offset we'd ever have to contend with is something less than
say 16k.

Therefore make it a u16 instead of a u32.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ipv6.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 940e2159535..ab9e9e89e40 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -246,7 +246,7 @@ struct inet6_skb_parm {
 	__u16			srcrt;
 	__u16			dst1;
 	__u16			lastopt;
-	__u32			nhoff;
+	__u16			nhoff;
 	__u16			flags;
 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
 	__u16			dsthao;
-- 
cgit v1.2.3-70-g09d2


From 772a2f9b488f4d27c314da5eeabde750b9ead41b Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Thu, 15 Jul 2010 10:39:47 +0200
Subject: fb: handle allocation failure in alloc_apertures()

If the kzalloc() fails we should return NULL.  All the places that call
alloc_apertures() check for this already.

Signed-off-by: Dan Carpenter <error27@gmail.com>
Acked-by: James Simmons <jsimmons@infradead.org>
Acked-by: Marcin Slusarz <marcin.slusarz@gmail.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 include/linux/fb.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/fb.h b/include/linux/fb.h
index 8e5a9dfb76b..e7445df44d6 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -873,6 +873,8 @@ struct fb_info {
 static inline struct apertures_struct *alloc_apertures(unsigned int max_num) {
 	struct apertures_struct *a = kzalloc(sizeof(struct apertures_struct)
 			+ max_num * sizeof(struct aperture), GFP_KERNEL);
+	if (!a)
+		return NULL;
 	a->count = max_num;
 	return a;
 }
-- 
cgit v1.2.3-70-g09d2


From eb7beb5c09af75494234ea6acd09d0a647cf7338 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Fri, 16 Jul 2010 00:50:03 +0200
Subject: tracing: Remove special traces

Special traces type was only used by sysprof. Lets remove it now
that sysprof ftrace plugin has been dropped.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Acked-by: Soeren Sandmann <sandmann@daimi.au.dk>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Li Zefan <lizf@cn.fujitsu.com>
---
 include/linux/kernel.h        |  5 ----
 include/linux/sched.h         | 12 --------
 kernel/trace/trace.c          | 55 ------------------------------------
 kernel/trace/trace.h          |  7 -----
 kernel/trace/trace_entries.h  | 17 -----------
 kernel/trace/trace_output.c   | 66 -------------------------------------------
 kernel/trace/trace_selftest.c |  1 -
 7 files changed, 163 deletions(-)

(limited to 'include')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 8317ec4b9f3..adee958b598 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -508,9 +508,6 @@ extern void tracing_start(void);
 extern void tracing_stop(void);
 extern void ftrace_off_permanent(void);
 
-extern void
-ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3);
-
 static inline void __attribute__ ((format (printf, 1, 2)))
 ____trace_printk_check_format(const char *fmt, ...)
 {
@@ -586,8 +583,6 @@ __ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap);
 
 extern void ftrace_dump(enum ftrace_dump_mode oops_dump_mode);
 #else
-static inline void
-ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) { }
 static inline int
 trace_printk(const char *fmt, ...) __attribute__ ((format (printf, 1, 2)));
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 747fcaedddb..f751ea9dcb7 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2434,18 +2434,6 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
 
 #endif /* CONFIG_SMP */
 
-#ifdef CONFIG_TRACING
-extern void
-__trace_special(void *__tr, void *__data,
-		unsigned long arg1, unsigned long arg2, unsigned long arg3);
-#else
-static inline void
-__trace_special(void *__tr, void *__data,
-		unsigned long arg1, unsigned long arg2, unsigned long arg3)
-{
-}
-#endif
-
 extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask);
 extern long sched_getaffinity(pid_t pid, struct cpumask *mask);
 
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 78a49e67f7d..d9a4aa02c38 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1331,61 +1331,6 @@ static void __trace_userstack(struct trace_array *tr, unsigned long flags)
 
 #endif /* CONFIG_STACKTRACE */
 
-static void
-ftrace_trace_special(void *__tr,
-		     unsigned long arg1, unsigned long arg2, unsigned long arg3,
-		     int pc)
-{
-	struct ftrace_event_call *call = &event_special;
-	struct ring_buffer_event *event;
-	struct trace_array *tr = __tr;
-	struct ring_buffer *buffer = tr->buffer;
-	struct special_entry *entry;
-
-	event = trace_buffer_lock_reserve(buffer, TRACE_SPECIAL,
-					  sizeof(*entry), 0, pc);
-	if (!event)
-		return;
-	entry	= ring_buffer_event_data(event);
-	entry->arg1			= arg1;
-	entry->arg2			= arg2;
-	entry->arg3			= arg3;
-
-	if (!filter_check_discard(call, entry, buffer, event))
-		trace_buffer_unlock_commit(buffer, event, 0, pc);
-}
-
-void
-__trace_special(void *__tr, void *__data,
-		unsigned long arg1, unsigned long arg2, unsigned long arg3)
-{
-	ftrace_trace_special(__tr, arg1, arg2, arg3, preempt_count());
-}
-
-void
-ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3)
-{
-	struct trace_array *tr = &global_trace;
-	struct trace_array_cpu *data;
-	unsigned long flags;
-	int cpu;
-	int pc;
-
-	if (tracing_disabled)
-		return;
-
-	pc = preempt_count();
-	local_irq_save(flags);
-	cpu = raw_smp_processor_id();
-	data = tr->data[cpu];
-
-	if (likely(atomic_inc_return(&data->disabled) == 1))
-		ftrace_trace_special(tr, arg1, arg2, arg3, pc);
-
-	atomic_dec(&data->disabled);
-	local_irq_restore(flags);
-}
-
 /**
  * trace_vbprintk - write binary msg to tracing buffer
  *
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 2114b4c1150..638a5887e2e 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -22,7 +22,6 @@ enum trace_type {
 	TRACE_STACK,
 	TRACE_PRINT,
 	TRACE_BPRINT,
-	TRACE_SPECIAL,
 	TRACE_MMIO_RW,
 	TRACE_MMIO_MAP,
 	TRACE_BRANCH,
@@ -189,7 +188,6 @@ extern void __ftrace_bad_type(void);
 		IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\
 		IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT);	\
 		IF_ASSIGN(var, ent, struct bprint_entry, TRACE_BPRINT);	\
-		IF_ASSIGN(var, ent, struct special_entry, 0);		\
 		IF_ASSIGN(var, ent, struct trace_mmiotrace_rw,		\
 			  TRACE_MMIO_RW);				\
 		IF_ASSIGN(var, ent, struct trace_mmiotrace_map,		\
@@ -332,11 +330,6 @@ void tracing_sched_wakeup_trace(struct trace_array *tr,
 				struct task_struct *wakee,
 				struct task_struct *cur,
 				unsigned long flags, int pc);
-void trace_special(struct trace_array *tr,
-		   struct trace_array_cpu *data,
-		   unsigned long arg1,
-		   unsigned long arg2,
-		   unsigned long arg3, int pc);
 void trace_function(struct trace_array *tr,
 		    unsigned long ip,
 		    unsigned long parent_ip,
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
index 84128371f25..e3dfecaf13e 100644
--- a/kernel/trace/trace_entries.h
+++ b/kernel/trace/trace_entries.h
@@ -150,23 +150,6 @@ FTRACE_ENTRY_DUP(wakeup, ctx_switch_entry,
 		)
 );
 
-/*
- * Special (free-form) trace entry:
- */
-FTRACE_ENTRY(special, special_entry,
-
-	TRACE_SPECIAL,
-
-	F_STRUCT(
-		__field(	unsigned long,	arg1	)
-		__field(	unsigned long,	arg2	)
-		__field(	unsigned long,	arg3	)
-	),
-
-	F_printk("(%08lx) (%08lx) (%08lx)",
-		 __entry->arg1, __entry->arg2, __entry->arg3)
-);
-
 /*
  * Stack-trace entry:
  */
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 57c1b459647..a46197b80b7 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -1069,65 +1069,6 @@ static struct trace_event trace_wake_event = {
 	.funcs		= &trace_wake_funcs,
 };
 
-/* TRACE_SPECIAL */
-static enum print_line_t trace_special_print(struct trace_iterator *iter,
-					     int flags, struct trace_event *event)
-{
-	struct special_entry *field;
-
-	trace_assign_type(field, iter->ent);
-
-	if (!trace_seq_printf(&iter->seq, "# %ld %ld %ld\n",
-			      field->arg1,
-			      field->arg2,
-			      field->arg3))
-		return TRACE_TYPE_PARTIAL_LINE;
-
-	return TRACE_TYPE_HANDLED;
-}
-
-static enum print_line_t trace_special_hex(struct trace_iterator *iter,
-					   int flags, struct trace_event *event)
-{
-	struct special_entry *field;
-	struct trace_seq *s = &iter->seq;
-
-	trace_assign_type(field, iter->ent);
-
-	SEQ_PUT_HEX_FIELD_RET(s, field->arg1);
-	SEQ_PUT_HEX_FIELD_RET(s, field->arg2);
-	SEQ_PUT_HEX_FIELD_RET(s, field->arg3);
-
-	return TRACE_TYPE_HANDLED;
-}
-
-static enum print_line_t trace_special_bin(struct trace_iterator *iter,
-					   int flags, struct trace_event *event)
-{
-	struct special_entry *field;
-	struct trace_seq *s = &iter->seq;
-
-	trace_assign_type(field, iter->ent);
-
-	SEQ_PUT_FIELD_RET(s, field->arg1);
-	SEQ_PUT_FIELD_RET(s, field->arg2);
-	SEQ_PUT_FIELD_RET(s, field->arg3);
-
-	return TRACE_TYPE_HANDLED;
-}
-
-static struct trace_event_functions trace_special_funcs = {
-	.trace		= trace_special_print,
-	.raw		= trace_special_print,
-	.hex		= trace_special_hex,
-	.binary		= trace_special_bin,
-};
-
-static struct trace_event trace_special_event = {
-	.type		= TRACE_SPECIAL,
-	.funcs		= &trace_special_funcs,
-};
-
 /* TRACE_STACK */
 
 static enum print_line_t trace_stack_print(struct trace_iterator *iter,
@@ -1161,9 +1102,6 @@ static enum print_line_t trace_stack_print(struct trace_iterator *iter,
 
 static struct trace_event_functions trace_stack_funcs = {
 	.trace		= trace_stack_print,
-	.raw		= trace_special_print,
-	.hex		= trace_special_hex,
-	.binary		= trace_special_bin,
 };
 
 static struct trace_event trace_stack_event = {
@@ -1194,9 +1132,6 @@ static enum print_line_t trace_user_stack_print(struct trace_iterator *iter,
 
 static struct trace_event_functions trace_user_stack_funcs = {
 	.trace		= trace_user_stack_print,
-	.raw		= trace_special_print,
-	.hex		= trace_special_hex,
-	.binary		= trace_special_bin,
 };
 
 static struct trace_event trace_user_stack_event = {
@@ -1314,7 +1249,6 @@ static struct trace_event *events[] __initdata = {
 	&trace_fn_event,
 	&trace_ctx_event,
 	&trace_wake_event,
-	&trace_special_event,
 	&trace_stack_event,
 	&trace_user_stack_event,
 	&trace_bprint_event,
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 6ed05ee6cbc..155a415b320 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -13,7 +13,6 @@ static inline int trace_valid_entry(struct trace_entry *entry)
 	case TRACE_WAKE:
 	case TRACE_STACK:
 	case TRACE_PRINT:
-	case TRACE_SPECIAL:
 	case TRACE_BRANCH:
 	case TRACE_GRAPH_ENT:
 	case TRACE_GRAPH_RET:
-- 
cgit v1.2.3-70-g09d2


From 4ced3f74dae18715920cb680098ec7ff4345d0a3 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Mon, 19 Jul 2010 16:39:04 +0200
Subject: mac80211: move QoS-enable to BSS info

Ever since

commit e1b3ec1a2a336c328c336cfa5485a5f0484cc90d
Author: Stanislaw Gruszka <sgruszka@redhat.com>
Date:   Mon Mar 29 12:18:34 2010 +0200

    mac80211: explicitly disable/enable QoS

mac80211 is telling drivers, in particular
iwlwifi, whether QoS is enabled or not.

However, this is only relevant for station mode,
since only then will any device send nullfunc
frames and need to know whether they should be
QoS frames or not. In other modes, there are
(currently) no frames the device is supposed to
send.

When you now consider virtual interfaces, it
becomes apparent that the current mechanism is
inadequate since it enables/disables QoS on a
global scale, where for nullfunc frames it has
to be on a per-interface scale.

Due to the above considerations, we can change
the way mac80211 advertises the QoS state to
drivers to only ever advertise it as "off" in
station mode, and make it a per-BSS setting.

Tested-by: Stanislaw Gruszka <sgruszka@redhat.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/iwlwifi/iwl-core.c | 18 +++++++++---------
 include/net/mac80211.h                  | 11 +++++------
 net/mac80211/cfg.c                      |  4 ----
 net/mac80211/mlme.c                     | 11 ++++++-----
 net/mac80211/util.c                     |  7 ++++---
 5 files changed, 24 insertions(+), 27 deletions(-)

(limited to 'include')

diff --git a/drivers/net/wireless/iwlwifi/iwl-core.c b/drivers/net/wireless/iwlwifi/iwl-core.c
index f73eb08a949..676d49df77e 100644
--- a/drivers/net/wireless/iwlwifi/iwl-core.c
+++ b/drivers/net/wireless/iwlwifi/iwl-core.c
@@ -1763,6 +1763,15 @@ void iwl_bss_info_changed(struct ieee80211_hw *hw,
 
 	mutex_lock(&priv->mutex);
 
+	if (changes & BSS_CHANGED_QOS) {
+		unsigned long flags;
+
+		spin_lock_irqsave(&priv->lock, flags);
+		priv->qos_data.qos_active = bss_conf->qos;
+		iwl_update_qos(priv);
+		spin_unlock_irqrestore(&priv->lock, flags);
+	}
+
 	if (changes & BSS_CHANGED_BEACON && vif->type == NL80211_IFTYPE_AP) {
 		dev_kfree_skb(priv->ibss_beacon);
 		priv->ibss_beacon = ieee80211_beacon_get(hw, vif);
@@ -2134,15 +2143,6 @@ int iwl_mac_config(struct ieee80211_hw *hw, u32 changed)
 		iwl_set_tx_power(priv, conf->power_level, false);
 	}
 
-	if (changed & IEEE80211_CONF_CHANGE_QOS) {
-		bool qos_active = !!(conf->flags & IEEE80211_CONF_QOS);
-
-		spin_lock_irqsave(&priv->lock, flags);
-		priv->qos_data.qos_active = qos_active;
-		iwl_update_qos(priv);
-		spin_unlock_irqrestore(&priv->lock, flags);
-	}
-
 	if (!iwl_is_ready(priv)) {
 		IWL_DEBUG_MAC80211(priv, "leave - not ready\n");
 		goto out;
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 7f256e23c57..20d372edec2 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -147,6 +147,8 @@ struct ieee80211_low_level_stats {
  * @BSS_CHANGED_CQM: Connection quality monitor config changed
  * @BSS_CHANGED_IBSS: IBSS join status changed
  * @BSS_CHANGED_ARP_FILTER: Hardware ARP filter address list or state changed.
+ * @BSS_CHANGED_QOS: QoS for this association was enabled/disabled. Note
+ *	that it is only ever disabled for station mode.
  */
 enum ieee80211_bss_change {
 	BSS_CHANGED_ASSOC		= 1<<0,
@@ -162,6 +164,7 @@ enum ieee80211_bss_change {
 	BSS_CHANGED_CQM			= 1<<10,
 	BSS_CHANGED_IBSS		= 1<<11,
 	BSS_CHANGED_ARP_FILTER		= 1<<12,
+	BSS_CHANGED_QOS			= 1<<13,
 
 	/* when adding here, make sure to change ieee80211_reconfig */
 };
@@ -217,6 +220,7 @@ enum ieee80211_bss_change {
  *	filter ARP queries based on the @arp_addr_list, if disabled, the
  *	hardware must not perform any ARP filtering. Note, that the filter will
  *	be enabled also in promiscuous mode.
+ * @qos: This is a QoS-enabled BSS.
  */
 struct ieee80211_bss_conf {
 	const u8 *bssid;
@@ -240,6 +244,7 @@ struct ieee80211_bss_conf {
 	__be32 arp_addr_list[IEEE80211_BSS_ARP_ADDR_LIST_LEN];
 	u8 arp_addr_cnt;
 	bool arp_filter_enabled;
+	bool qos;
 };
 
 /**
@@ -620,15 +625,11 @@ struct ieee80211_rx_status {
  *	may turn the device off as much as possible. Typically, this flag will
  *	be set when an interface is set UP but not associated or scanning, but
  *	it can also be unset in that case when monitor interfaces are active.
- * @IEEE80211_CONF_QOS: Enable 802.11e QoS also know as WMM (Wireless
- *      Multimedia). On some drivers (iwlwifi is one of know) we have
- *      to enable/disable QoS explicitly.
  */
 enum ieee80211_conf_flags {
 	IEEE80211_CONF_MONITOR		= (1<<0),
 	IEEE80211_CONF_PS		= (1<<1),
 	IEEE80211_CONF_IDLE		= (1<<2),
-	IEEE80211_CONF_QOS		= (1<<3),
 };
 
 
@@ -643,7 +644,6 @@ enum ieee80211_conf_flags {
  * @IEEE80211_CONF_CHANGE_RETRY_LIMITS: retry limits changed
  * @IEEE80211_CONF_CHANGE_IDLE: Idle flag changed
  * @IEEE80211_CONF_CHANGE_SMPS: Spatial multiplexing powersave mode changed
- * @IEEE80211_CONF_CHANGE_QOS: Quality of service was enabled or disabled
  */
 enum ieee80211_conf_changed {
 	IEEE80211_CONF_CHANGE_SMPS		= BIT(1),
@@ -654,7 +654,6 @@ enum ieee80211_conf_changed {
 	IEEE80211_CONF_CHANGE_CHANNEL		= BIT(6),
 	IEEE80211_CONF_CHANGE_RETRY_LIMITS	= BIT(7),
 	IEEE80211_CONF_CHANGE_IDLE		= BIT(8),
-	IEEE80211_CONF_CHANGE_QOS		= BIT(9),
 };
 
 /**
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 5b8b4460b69..35b07ea0633 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1154,10 +1154,6 @@ static int ieee80211_set_txq_params(struct wiphy *wiphy,
 		return -EINVAL;
 	}
 
-	/* enable WMM or activate new settings */
-	local->hw.conf.flags |= IEEE80211_CONF_QOS;
-	drv_config(local, IEEE80211_CONF_CHANGE_QOS);
-
 	return 0;
 }
 
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index d1962650b25..7a4e4bffbc7 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -698,10 +698,11 @@ void ieee80211_dynamic_ps_timer(unsigned long data)
 
 /* MLME */
 static void ieee80211_sta_wmm_params(struct ieee80211_local *local,
-				     struct ieee80211_if_managed *ifmgd,
+				     struct ieee80211_sub_if_data *sdata,
 				     u8 *wmm_param, size_t wmm_param_len)
 {
 	struct ieee80211_tx_queue_params params;
+	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
 	size_t left;
 	int count;
 	u8 *pos, uapsd_queues = 0;
@@ -790,8 +791,8 @@ static void ieee80211_sta_wmm_params(struct ieee80211_local *local,
 	}
 
 	/* enable WMM or activate new settings */
-	local->hw.conf.flags |=	IEEE80211_CONF_QOS;
-	drv_config(local, IEEE80211_CONF_CHANGE_QOS);
+	sdata->vif.bss_conf.qos = true;
+	ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_QOS);
 }
 
 static u32 ieee80211_handle_bss_capability(struct ieee80211_sub_if_data *sdata,
@@ -1325,7 +1326,7 @@ static bool ieee80211_assoc_success(struct ieee80211_work *wk,
 	}
 
 	if (elems.wmm_param)
-		ieee80211_sta_wmm_params(local, ifmgd, elems.wmm_param,
+		ieee80211_sta_wmm_params(local, sdata, elems.wmm_param,
 					 elems.wmm_param_len);
 	else
 		ieee80211_set_wmm_default(sdata);
@@ -1597,7 +1598,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
 		ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems,
 				      true);
 
-		ieee80211_sta_wmm_params(local, ifmgd, elems.wmm_param,
+		ieee80211_sta_wmm_params(local, sdata, elems.wmm_param,
 					 elems.wmm_param_len);
 	}
 
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index a54cf146ed5..79479217737 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -803,8 +803,8 @@ void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata)
 
 	/* after reinitialize QoS TX queues setting to default,
 	 * disable QoS at all */
-	local->hw.conf.flags &=	~IEEE80211_CONF_QOS;
-	drv_config(local, IEEE80211_CONF_CHANGE_QOS);
+	sdata->vif.bss_conf.qos = sdata->vif.type != NL80211_IFTYPE_STATION;
+	ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_QOS);
 }
 
 void ieee80211_sta_def_wmm_params(struct ieee80211_sub_if_data *sdata,
@@ -1161,7 +1161,8 @@ int ieee80211_reconfig(struct ieee80211_local *local)
 			  BSS_CHANGED_BASIC_RATES |
 			  BSS_CHANGED_BEACON_INT |
 			  BSS_CHANGED_BSSID |
-			  BSS_CHANGED_CQM;
+			  BSS_CHANGED_CQM |
+			  BSS_CHANGED_QOS;
 
 		switch (sdata->vif.type) {
 		case NL80211_IFTYPE_STATION:
-- 
cgit v1.2.3-70-g09d2


From 4f366c5dabcb936dd5754a35188bd699181fe1ce Mon Sep 17 00:00:00 2001
From: "John W. Linville" <linville@tuxdriver.com>
Date: Thu, 15 Jul 2010 14:57:33 -0400
Subject: wireless: only use alpha2 regulatory information from country IE

The meaning and/or usage of the country IE is somewhat poorly defined.
In practice, this means that regulatory rulesets in a country IE are
often incomplete and might be untrustworthy.  This removes the code
associated with interpreting those rulesets while preserving respect
for country "alpha2" codes also contained in the country IE.

Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/regulatory.h |   1 -
 net/wireless/reg.c       | 625 +----------------------------------------------
 2 files changed, 12 insertions(+), 614 deletions(-)

(limited to 'include')

diff --git a/include/net/regulatory.h b/include/net/regulatory.h
index f873ee37f7e..9e103a4e91e 100644
--- a/include/net/regulatory.h
+++ b/include/net/regulatory.h
@@ -54,7 +54,6 @@ struct regulatory_request {
 	enum nl80211_reg_initiator initiator;
 	char alpha2[2];
 	bool intersect;
-	u32 country_ie_checksum;
 	enum environment_cap country_ie_env;
 	struct list_head list;
 };
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 1ac2bdd46ec..678d0bd433f 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -66,18 +66,10 @@ static struct platform_device *reg_pdev;
  */
 const struct ieee80211_regdomain *cfg80211_regdomain;
 
-/*
- * We use this as a place for the rd structure built from the
- * last parsed country IE to rest until CRDA gets back to us with
- * what it thinks should apply for the same country
- */
-static const struct ieee80211_regdomain *country_ie_regdomain;
-
 /*
  * Protects static reg.c components:
  *     - cfg80211_world_regdom
  *     - cfg80211_regdom
- *     - country_ie_regdomain
  *     - last_request
  */
 static DEFINE_MUTEX(reg_mutex);
@@ -275,25 +267,6 @@ static bool is_user_regdom_saved(void)
 	return true;
 }
 
-/**
- * country_ie_integrity_changes - tells us if the country IE has changed
- * @checksum: checksum of country IE of fields we are interested in
- *
- * If the country IE has not changed you can ignore it safely. This is
- * useful to determine if two devices are seeing two different country IEs
- * even on the same alpha2. Note that this will return false if no IE has
- * been set on the wireless core yet.
- */
-static bool country_ie_integrity_changes(u32 checksum)
-{
-	/* If no IE has been set then the checksum doesn't change */
-	if (unlikely(!last_request->country_ie_checksum))
-		return false;
-	if (unlikely(last_request->country_ie_checksum != checksum))
-		return true;
-	return false;
-}
-
 static int reg_copy_regd(const struct ieee80211_regdomain **dst_regd,
 			 const struct ieee80211_regdomain *src_regd)
 {
@@ -505,471 +478,6 @@ static bool freq_in_rule_band(const struct ieee80211_freq_range *freq_range,
 #undef ONE_GHZ_IN_KHZ
 }
 
-/*
- * This is a work around for sanity checking ieee80211_channel_to_frequency()'s
- * work. ieee80211_channel_to_frequency() can for example currently provide a
- * 2 GHz channel when in fact a 5 GHz channel was desired. An example would be
- * an AP providing channel 8 on a country IE triplet when it sent this on the
- * 5 GHz band, that channel is designed to be channel 8 on 5 GHz, not a 2 GHz
- * channel.
- *
- * This can be removed once ieee80211_channel_to_frequency() takes in a band.
- */
-static bool chan_in_band(int chan, enum ieee80211_band band)
-{
-	int center_freq = ieee80211_channel_to_frequency(chan);
-
-	switch (band) {
-	case IEEE80211_BAND_2GHZ:
-		if (center_freq <= 2484)
-			return true;
-		return false;
-	case IEEE80211_BAND_5GHZ:
-		if (center_freq >= 5005)
-			return true;
-		return false;
-	default:
-		return false;
-	}
-}
-
-/*
- * Some APs may send a country IE triplet for each channel they
- * support and while this is completely overkill and silly we still
- * need to support it. We avoid making a single rule for each channel
- * though and to help us with this we use this helper to find the
- * actual subband end channel. These type of country IE triplet
- * scenerios are handled then, all yielding two regulaotry rules from
- * parsing a country IE:
- *
- * [1]
- * [2]
- * [36]
- * [40]
- *
- * [1]
- * [2-4]
- * [5-12]
- * [36]
- * [40-44]
- *
- * [1-4]
- * [5-7]
- * [36-44]
- * [48-64]
- *
- * [36-36]
- * [40-40]
- * [44-44]
- * [48-48]
- * [52-52]
- * [56-56]
- * [60-60]
- * [64-64]
- * [100-100]
- * [104-104]
- * [108-108]
- * [112-112]
- * [116-116]
- * [120-120]
- * [124-124]
- * [128-128]
- * [132-132]
- * [136-136]
- * [140-140]
- *
- * Returns 0 if the IE has been found to be invalid in the middle
- * somewhere.
- */
-static int max_subband_chan(enum ieee80211_band band,
-			    int orig_cur_chan,
-			    int orig_end_channel,
-			    s8 orig_max_power,
-			    u8 **country_ie,
-			    u8 *country_ie_len)
-{
-	u8 *triplets_start = *country_ie;
-	u8 len_at_triplet = *country_ie_len;
-	int end_subband_chan = orig_end_channel;
-
-	/*
-	 * We'll deal with padding for the caller unless
-	 * its not immediate and we don't process any channels
-	 */
-	if (*country_ie_len == 1) {
-		*country_ie += 1;
-		*country_ie_len -= 1;
-		return orig_end_channel;
-	}
-
-	/* Move to the next triplet and then start search */
-	*country_ie += 3;
-	*country_ie_len -= 3;
-
-	if (!chan_in_band(orig_cur_chan, band))
-		return 0;
-
-	while (*country_ie_len >= 3) {
-		int end_channel = 0;
-		struct ieee80211_country_ie_triplet *triplet =
-			(struct ieee80211_country_ie_triplet *) *country_ie;
-		int cur_channel = 0, next_expected_chan;
-
-		/* means last triplet is completely unrelated to this one */
-		if (triplet->ext.reg_extension_id >=
-				IEEE80211_COUNTRY_EXTENSION_ID) {
-			*country_ie -= 3;
-			*country_ie_len += 3;
-			break;
-		}
-
-		if (triplet->chans.first_channel == 0) {
-			*country_ie += 1;
-			*country_ie_len -= 1;
-			if (*country_ie_len != 0)
-				return 0;
-			break;
-		}
-
-		if (triplet->chans.num_channels == 0)
-			return 0;
-
-		/* Monitonically increasing channel order */
-		if (triplet->chans.first_channel <= end_subband_chan)
-			return 0;
-
-		if (!chan_in_band(triplet->chans.first_channel, band))
-			return 0;
-
-		/* 2 GHz */
-		if (triplet->chans.first_channel <= 14) {
-			end_channel = triplet->chans.first_channel +
-				triplet->chans.num_channels - 1;
-		}
-		else {
-			end_channel =  triplet->chans.first_channel +
-				(4 * (triplet->chans.num_channels - 1));
-		}
-
-		if (!chan_in_band(end_channel, band))
-			return 0;
-
-		if (orig_max_power != triplet->chans.max_power) {
-			*country_ie -= 3;
-			*country_ie_len += 3;
-			break;
-		}
-
-		cur_channel = triplet->chans.first_channel;
-
-		/* The key is finding the right next expected channel */
-		if (band == IEEE80211_BAND_2GHZ)
-			next_expected_chan = end_subband_chan + 1;
-		 else
-			next_expected_chan = end_subband_chan + 4;
-
-		if (cur_channel != next_expected_chan) {
-			*country_ie -= 3;
-			*country_ie_len += 3;
-			break;
-		}
-
-		end_subband_chan = end_channel;
-
-		/* Move to the next one */
-		*country_ie += 3;
-		*country_ie_len -= 3;
-
-		/*
-		 * Padding needs to be dealt with if we processed
-		 * some channels.
-		 */
-		if (*country_ie_len == 1) {
-			*country_ie += 1;
-			*country_ie_len -= 1;
-			break;
-		}
-
-		/* If seen, the IE is invalid */
-		if (*country_ie_len == 2)
-			return 0;
-	}
-
-	if (end_subband_chan == orig_end_channel) {
-		*country_ie = triplets_start;
-		*country_ie_len = len_at_triplet;
-		return orig_end_channel;
-	}
-
-	return end_subband_chan;
-}
-
-/*
- * Converts a country IE to a regulatory domain. A regulatory domain
- * structure has a lot of information which the IE doesn't yet have,
- * so for the other values we use upper max values as we will intersect
- * with our userspace regulatory agent to get lower bounds.
- */
-static struct ieee80211_regdomain *country_ie_2_rd(
-				enum ieee80211_band band,
-				u8 *country_ie,
-				u8 country_ie_len,
-				u32 *checksum)
-{
-	struct ieee80211_regdomain *rd = NULL;
-	unsigned int i = 0;
-	char alpha2[2];
-	u32 flags = 0;
-	u32 num_rules = 0, size_of_regd = 0;
-	u8 *triplets_start = NULL;
-	u8 len_at_triplet = 0;
-	/* the last channel we have registered in a subband (triplet) */
-	int last_sub_max_channel = 0;
-
-	*checksum = 0xDEADBEEF;
-
-	/* Country IE requirements */
-	BUG_ON(country_ie_len < IEEE80211_COUNTRY_IE_MIN_LEN ||
-		country_ie_len & 0x01);
-
-	alpha2[0] = country_ie[0];
-	alpha2[1] = country_ie[1];
-
-	/*
-	 * Third octet can be:
-	 *    'I' - Indoor
-	 *    'O' - Outdoor
-	 *
-	 *  anything else we assume is no restrictions
-	 */
-	if (country_ie[2] == 'I')
-		flags = NL80211_RRF_NO_OUTDOOR;
-	else if (country_ie[2] == 'O')
-		flags = NL80211_RRF_NO_INDOOR;
-
-	country_ie += 3;
-	country_ie_len -= 3;
-
-	triplets_start = country_ie;
-	len_at_triplet = country_ie_len;
-
-	*checksum ^= ((flags ^ alpha2[0] ^ alpha2[1]) << 8);
-
-	/*
-	 * We need to build a reg rule for each triplet, but first we must
-	 * calculate the number of reg rules we will need. We will need one
-	 * for each channel subband
-	 */
-	while (country_ie_len >= 3) {
-		int end_channel = 0;
-		struct ieee80211_country_ie_triplet *triplet =
-			(struct ieee80211_country_ie_triplet *) country_ie;
-		int cur_sub_max_channel = 0, cur_channel = 0;
-
-		if (triplet->ext.reg_extension_id >=
-				IEEE80211_COUNTRY_EXTENSION_ID) {
-			country_ie += 3;
-			country_ie_len -= 3;
-			continue;
-		}
-
-		/*
-		 * APs can add padding to make length divisible
-		 * by two, required by the spec.
-		 */
-		if (triplet->chans.first_channel == 0) {
-			country_ie++;
-			country_ie_len--;
-			/* This is expected to be at the very end only */
-			if (country_ie_len != 0)
-				return NULL;
-			break;
-		}
-
-		if (triplet->chans.num_channels == 0)
-			return NULL;
-
-		if (!chan_in_band(triplet->chans.first_channel, band))
-			return NULL;
-
-		/* 2 GHz */
-		if (band == IEEE80211_BAND_2GHZ)
-			end_channel = triplet->chans.first_channel +
-				triplet->chans.num_channels - 1;
-		else
-			/*
-			 * 5 GHz -- For example in country IEs if the first
-			 * channel given is 36 and the number of channels is 4
-			 * then the individual channel numbers defined for the
-			 * 5 GHz PHY by these parameters are: 36, 40, 44, and 48
-			 * and not 36, 37, 38, 39.
-			 *
-			 * See: http://tinyurl.com/11d-clarification
-			 */
-			end_channel =  triplet->chans.first_channel +
-				(4 * (triplet->chans.num_channels - 1));
-
-		cur_channel = triplet->chans.first_channel;
-
-		/*
-		 * Enhancement for APs that send a triplet for every channel
-		 * or for whatever reason sends triplets with multiple channels
-		 * separated when in fact they should be together.
-		 */
-		end_channel = max_subband_chan(band,
-					       cur_channel,
-					       end_channel,
-					       triplet->chans.max_power,
-					       &country_ie,
-					       &country_ie_len);
-		if (!end_channel)
-			return NULL;
-
-		if (!chan_in_band(end_channel, band))
-			return NULL;
-
-		cur_sub_max_channel = end_channel;
-
-		/* Basic sanity check */
-		if (cur_sub_max_channel < cur_channel)
-			return NULL;
-
-		/*
-		 * Do not allow overlapping channels. Also channels
-		 * passed in each subband must be monotonically
-		 * increasing
-		 */
-		if (last_sub_max_channel) {
-			if (cur_channel <= last_sub_max_channel)
-				return NULL;
-			if (cur_sub_max_channel <= last_sub_max_channel)
-				return NULL;
-		}
-
-		/*
-		 * When dot11RegulatoryClassesRequired is supported
-		 * we can throw ext triplets as part of this soup,
-		 * for now we don't care when those change as we
-		 * don't support them
-		 */
-		*checksum ^= ((cur_channel ^ cur_sub_max_channel) << 8) |
-		  ((cur_sub_max_channel ^ cur_sub_max_channel) << 16) |
-		  ((triplet->chans.max_power ^ cur_sub_max_channel) << 24);
-
-		last_sub_max_channel = cur_sub_max_channel;
-
-		num_rules++;
-
-		if (country_ie_len >= 3) {
-			country_ie += 3;
-			country_ie_len -= 3;
-		}
-
-		/*
-		 * Note: this is not a IEEE requirement but
-		 * simply a memory requirement
-		 */
-		if (num_rules > NL80211_MAX_SUPP_REG_RULES)
-			return NULL;
-	}
-
-	country_ie = triplets_start;
-	country_ie_len = len_at_triplet;
-
-	size_of_regd = sizeof(struct ieee80211_regdomain) +
-		(num_rules * sizeof(struct ieee80211_reg_rule));
-
-	rd = kzalloc(size_of_regd, GFP_KERNEL);
-	if (!rd)
-		return NULL;
-
-	rd->n_reg_rules = num_rules;
-	rd->alpha2[0] = alpha2[0];
-	rd->alpha2[1] = alpha2[1];
-
-	/* This time around we fill in the rd */
-	while (country_ie_len >= 3) {
-		int end_channel = 0;
-		struct ieee80211_country_ie_triplet *triplet =
-			(struct ieee80211_country_ie_triplet *) country_ie;
-		struct ieee80211_reg_rule *reg_rule = NULL;
-		struct ieee80211_freq_range *freq_range = NULL;
-		struct ieee80211_power_rule *power_rule = NULL;
-
-		/*
-		 * Must parse if dot11RegulatoryClassesRequired is true,
-		 * we don't support this yet
-		 */
-		if (triplet->ext.reg_extension_id >=
-				IEEE80211_COUNTRY_EXTENSION_ID) {
-			country_ie += 3;
-			country_ie_len -= 3;
-			continue;
-		}
-
-		if (triplet->chans.first_channel == 0) {
-			country_ie++;
-			country_ie_len--;
-			break;
-		}
-
-		reg_rule = &rd->reg_rules[i];
-		freq_range = &reg_rule->freq_range;
-		power_rule = &reg_rule->power_rule;
-
-		reg_rule->flags = flags;
-
-		/* 2 GHz */
-		if (band == IEEE80211_BAND_2GHZ)
-			end_channel = triplet->chans.first_channel +
-				triplet->chans.num_channels -1;
-		else
-			end_channel =  triplet->chans.first_channel +
-				(4 * (triplet->chans.num_channels - 1));
-
-		end_channel = max_subband_chan(band,
-					       triplet->chans.first_channel,
-					       end_channel,
-					       triplet->chans.max_power,
-					       &country_ie,
-					       &country_ie_len);
-
-		/*
-		 * The +10 is since the regulatory domain expects
-		 * the actual band edge, not the center of freq for
-		 * its start and end freqs, assuming 20 MHz bandwidth on
-		 * the channels passed
-		 */
-		freq_range->start_freq_khz =
-			MHZ_TO_KHZ(ieee80211_channel_to_frequency(
-				triplet->chans.first_channel) - 10);
-		freq_range->end_freq_khz =
-			MHZ_TO_KHZ(ieee80211_channel_to_frequency(
-				end_channel) + 10);
-
-		/*
-		 * These are large arbitrary values we use to intersect later.
-		 * Increment this if we ever support >= 40 MHz channels
-		 * in IEEE 802.11
-		 */
-		freq_range->max_bandwidth_khz = MHZ_TO_KHZ(40);
-		power_rule->max_antenna_gain = DBI_TO_MBI(100);
-		power_rule->max_eirp = DBM_TO_MBM(triplet->chans.max_power);
-
-		i++;
-
-		if (country_ie_len >= 3) {
-			country_ie += 3;
-			country_ie_len -= 3;
-		}
-
-		BUG_ON(i > NL80211_MAX_SUPP_REG_RULES);
-	}
-
-	return rd;
-}
-
-
 /*
  * Helper for regdom_intersect(), this does the real
  * mathematical intersection fun
@@ -1191,7 +699,6 @@ static int freq_reg_info_regd(struct wiphy *wiphy,
 
 	return -EINVAL;
 }
-EXPORT_SYMBOL(freq_reg_info);
 
 int freq_reg_info(struct wiphy *wiphy,
 		  u32 center_freq,
@@ -1205,6 +712,7 @@ int freq_reg_info(struct wiphy *wiphy,
 				  reg_rule,
 				  NULL);
 }
+EXPORT_SYMBOL(freq_reg_info);
 
 /*
  * Note that right now we assume the desired channel bandwidth
@@ -1243,41 +751,8 @@ static void handle_channel(struct wiphy *wiphy, enum ieee80211_band band,
 			  desired_bw_khz,
 			  &reg_rule);
 
-	if (r) {
-		/*
-		 * This means no regulatory rule was found in the country IE
-		 * with a frequency range on the center_freq's band, since
-		 * IEEE-802.11 allows for a country IE to have a subset of the
-		 * regulatory information provided in a country we ignore
-		 * disabling the channel unless at least one reg rule was
-		 * found on the center_freq's band. For details see this
-		 * clarification:
-		 *
-		 * http://tinyurl.com/11d-clarification
-		 */
-		if (r == -ERANGE &&
-		    last_request->initiator ==
-		    NL80211_REGDOM_SET_BY_COUNTRY_IE) {
-			REG_DBG_PRINT("cfg80211: Leaving channel %d MHz "
-				"intact on %s - no rule found in band on "
-				"Country IE\n",
-			chan->center_freq, wiphy_name(wiphy));
-		} else {
-		/*
-		 * In this case we know the country IE has at least one reg rule
-		 * for the band so we respect its band definitions
-		 */
-			if (last_request->initiator ==
-			    NL80211_REGDOM_SET_BY_COUNTRY_IE)
-				REG_DBG_PRINT("cfg80211: Disabling "
-					"channel %d MHz on %s due to "
-					"Country IE\n",
-					chan->center_freq, wiphy_name(wiphy));
-			flags |= IEEE80211_CHAN_DISABLED;
-			chan->flags = flags;
-		}
+	if (r)
 		return;
-	}
 
 	power_rule = &reg_rule->power_rule;
 	freq_range = &reg_rule->freq_range;
@@ -2010,7 +1485,7 @@ EXPORT_SYMBOL(regulatory_hint);
 
 /* Caller must hold reg_mutex */
 static bool reg_same_country_ie_hint(struct wiphy *wiphy,
-			u32 country_ie_checksum)
+			char *alpha2, enum environment_cap env)
 {
 	struct wiphy *request_wiphy;
 
@@ -2026,13 +1501,17 @@ static bool reg_same_country_ie_hint(struct wiphy *wiphy,
 		return false;
 
 	if (likely(request_wiphy != wiphy))
-		return !country_ie_integrity_changes(country_ie_checksum);
+		return (last_request->alpha2[0] == alpha2[0] &&
+			last_request->alpha2[1] == alpha2[1] &&
+			last_request->country_ie_env == env);
 	/*
 	 * We should not have let these through at this point, they
 	 * should have been picked up earlier by the first alpha2 check
 	 * on the device
 	 */
-	if (WARN_ON(!country_ie_integrity_changes(country_ie_checksum)))
+	if (WARN_ON((last_request->alpha2[0] == alpha2[0] &&
+			last_request->alpha2[1] == alpha2[1] &&
+			last_request->country_ie_env == env )))
 		return true;
 	return false;
 }
@@ -2048,7 +1527,6 @@ void regulatory_hint_11d(struct wiphy *wiphy,
 {
 	struct ieee80211_regdomain *rd = NULL;
 	char alpha2[2];
-	u32 checksum = 0;
 	enum environment_cap env = ENVIRON_ANY;
 	struct regulatory_request *request;
 
@@ -2064,14 +1542,6 @@ void regulatory_hint_11d(struct wiphy *wiphy,
 	if (country_ie_len < IEEE80211_COUNTRY_IE_MIN_LEN)
 		goto out;
 
-	/*
-	 * Pending country IE processing, this can happen after we
-	 * call CRDA and wait for a response if a beacon was received before
-	 * we were able to process the last regulatory_hint_11d() call
-	 */
-	if (country_ie_regdomain)
-		goto out;
-
 	alpha2[0] = country_ie[0];
 	alpha2[1] = country_ie[1];
 
@@ -2090,12 +1560,6 @@ void regulatory_hint_11d(struct wiphy *wiphy,
 	    wiphy_idx_valid(last_request->wiphy_idx)))
 		goto out;
 
-	rd = country_ie_2_rd(band, country_ie, country_ie_len, &checksum);
-	if (!rd) {
-		REG_DBG_PRINT("cfg80211: Ignoring bogus country IE\n");
-		goto out;
-	}
-
 	/*
 	 * This will not happen right now but we leave it here for the
 	 * the future when we want to add suspend/resume support and having
@@ -2105,24 +1569,17 @@ void regulatory_hint_11d(struct wiphy *wiphy,
 	 * If we hit this before we add this support we want to be informed of
 	 * it as it would indicate a mistake in the current design
 	 */
-	if (WARN_ON(reg_same_country_ie_hint(wiphy, checksum)))
+	if (WARN_ON(reg_same_country_ie_hint(wiphy, alpha2, env)))
 		goto free_rd_out;
 
 	request = kzalloc(sizeof(struct regulatory_request), GFP_KERNEL);
 	if (!request)
 		goto free_rd_out;
 
-	/*
-	 * We keep this around for when CRDA comes back with a response so
-	 * we can intersect with that
-	 */
-	country_ie_regdomain = rd;
-
 	request->wiphy_idx = get_wiphy_idx(wiphy);
-	request->alpha2[0] = rd->alpha2[0];
-	request->alpha2[1] = rd->alpha2[1];
+	request->alpha2[0] = alpha2[0];
+	request->alpha2[1] = alpha2[1];
 	request->initiator = NL80211_REGDOM_SET_BY_COUNTRY_IE;
-	request->country_ie_checksum = checksum;
 	request->country_ie_env = env;
 
 	mutex_unlock(&reg_mutex);
@@ -2383,33 +1840,6 @@ static void print_regdomain_info(const struct ieee80211_regdomain *rd)
 	print_rd_rules(rd);
 }
 
-#ifdef CONFIG_CFG80211_REG_DEBUG
-static void reg_country_ie_process_debug(
-	const struct ieee80211_regdomain *rd,
-	const struct ieee80211_regdomain *country_ie_regdomain,
-	const struct ieee80211_regdomain *intersected_rd)
-{
-	printk(KERN_DEBUG "cfg80211: Received country IE:\n");
-	print_regdomain_info(country_ie_regdomain);
-	printk(KERN_DEBUG "cfg80211: CRDA thinks this should applied:\n");
-	print_regdomain_info(rd);
-	if (intersected_rd) {
-		printk(KERN_DEBUG "cfg80211: We intersect both of these "
-			"and get:\n");
-		print_regdomain_info(intersected_rd);
-		return;
-	}
-	printk(KERN_DEBUG "cfg80211: Intersection between both failed\n");
-}
-#else
-static inline void reg_country_ie_process_debug(
-	const struct ieee80211_regdomain *rd,
-	const struct ieee80211_regdomain *country_ie_regdomain,
-	const struct ieee80211_regdomain *intersected_rd)
-{
-}
-#endif
-
 /* Takes ownership of rd only if it doesn't fail */
 static int __set_regdom(const struct ieee80211_regdomain *rd)
 {
@@ -2521,34 +1951,6 @@ static int __set_regdom(const struct ieee80211_regdomain *rd)
 		return 0;
 	}
 
-	/*
-	 * Country IE requests are handled a bit differently, we intersect
-	 * the country IE rd with what CRDA believes that country should have
-	 */
-
-	/*
-	 * Userspace could have sent two replies with only
-	 * one kernel request. By the second reply we would have
-	 * already processed and consumed the country_ie_regdomain.
-	 */
-	if (!country_ie_regdomain)
-		return -EALREADY;
-	BUG_ON(rd == country_ie_regdomain);
-
-	/*
-	 * Intersect what CRDA returned and our what we
-	 * had built from the Country IE received
-	 */
-
-	intersected_rd = regdom_intersect(rd, country_ie_regdomain);
-
-	reg_country_ie_process_debug(rd,
-				     country_ie_regdomain,
-				     intersected_rd);
-
-	kfree(country_ie_regdomain);
-	country_ie_regdomain = NULL;
-
 	if (!intersected_rd)
 		return -EINVAL;
 
@@ -2688,9 +2090,6 @@ void /* __init_or_exit */ regulatory_exit(void)
 
 	reset_regdomains();
 
-	kfree(country_ie_regdomain);
-	country_ie_regdomain = NULL;
-
 	kfree(last_request);
 
 	platform_device_unregister(reg_pdev);
-- 
cgit v1.2.3-70-g09d2


From c28991a02caec1f3bfe4638ccf4e494c3e9418a3 Mon Sep 17 00:00:00 2001
From: "John W. Linville" <linville@tuxdriver.com>
Date: Tue, 20 Jul 2010 12:22:40 -0400
Subject: wireless: correct sparse warning in wext-compat.c

  CHECK   net/wireless/wext-compat.c
net/wireless/wext-compat.c:1434:5: warning: symbol 'cfg80211_wext_siwpmksa' was not declared. Should it be static?

Add declaration in cfg80211.h.  Also add an EXPORT_SYMBOL_GPL, since all
the peer functions have it.

Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/cfg80211.h     | 4 ++++
 net/wireless/wext-compat.c | 1 +
 2 files changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 9b8b3f486ec..f68ae54cdae 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1963,6 +1963,10 @@ int cfg80211_wext_giwap(struct net_device *dev,
 			struct iw_request_info *info,
 			struct sockaddr *ap_addr, char *extra);
 
+int cfg80211_wext_siwpmksa(struct net_device *dev,
+			   struct iw_request_info *info,
+			   struct iw_point *data, char *extra);
+
 /*
  * callbacks for asynchronous cfg80211 methods, notification
  * functions and BSS handling helpers
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index 1ff1e9f4913..bb5e0a5ecfa 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -1471,6 +1471,7 @@ int cfg80211_wext_siwpmksa(struct net_device *dev,
 		return -EOPNOTSUPP;
 	}
 }
+EXPORT_SYMBOL_GPL(cfg80211_wext_siwpmksa);
 
 static const iw_handler cfg80211_handlers[] = {
 	[IW_IOCTL_IDX(SIOCGIWNAME)]	= (iw_handler) cfg80211_wext_giwname,
-- 
cgit v1.2.3-70-g09d2


From 07fca0e57fca925032526349f4370f97ed580cc9 Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Sat, 10 Jul 2010 08:35:00 +0200
Subject: tracing: Properly align linker defined symbols

We define a number of symbols in the linker scipt like this:

    __start_syscalls_metadata = .;
    *(__syscalls_metadata)

But we do not know the alignment of "." when we assign
the __start_syscalls_metadata symbol.
gcc started to uses bigger alignment for structs (32 bytes),
so we saw situations where the linker due to alignment
constraints increased the value of "." after the symbol assignment.

This resulted in boot fails.

Fix this by forcing a 32 byte alignment of "." before the
assignment.

This patch introduces the forced alignment for
ftrace_events and syscalls_metadata.
It may be required in more places.

Reported-by: Zeev Tarantov <zeev.tarantov@gmail.com>
Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
LKML-Reference: <20100710063459.GA14596@merkur.ravnborg.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/asm-generic/vmlinux.lds.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 48c5299cbf2..4b5902ad0d5 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -63,6 +63,12 @@
 /* Align . to a 8 byte boundary equals to maximum function alignment. */
 #define ALIGN_FUNCTION()  . = ALIGN(8)
 
+/*
+ * Align to a 32 byte boundary equal to the
+ * alignment gcc 4.5 uses for a struct
+ */
+#define STRUCT_ALIGN() . = ALIGN(32)
+
 /* The actual configuration determine if the init/exit sections
  * are handled as text/data or they can be discarded (which
  * often happens at runtime)
@@ -166,7 +172,11 @@
 	LIKELY_PROFILE()		       				\
 	BRANCH_PROFILE()						\
 	TRACE_PRINTKS()							\
+									\
+	STRUCT_ALIGN();							\
 	FTRACE_EVENTS()							\
+									\
+	STRUCT_ALIGN();							\
 	TRACE_SYSCALLS()
 
 /*
-- 
cgit v1.2.3-70-g09d2


From 844b9a8707f1fcf0482e0c52f44a555e799ccda6 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 20 Jul 2010 13:24:34 -0700
Subject: vfs: fix RCU-lockdep false positive due to /proc

If a single-threaded process does a file-descriptor operation, and some
other process accesses that same file descriptor via /proc, the current
rcu_dereference_check_fdtable() can give a false-positive RCU-lockdep
splat due to the reference count being increased by the /proc access after
the reference-count check in fget_light() but before the check in
rcu_dereference_check_fdtable().

This commit prevents this false positive by checking for a single-threaded
process.  To avoid #include hell, this commit uses the wrapper for
thread_group_empty(current) defined by rcu_my_thread_group_empty()
provided in a separate commit.

Located-by: Miles Lane <miles.lane@gmail.com>
Located-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Al Viro <viro@ZenIV.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/fdtable.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h
index 013dc529e95..d147461bc27 100644
--- a/include/linux/fdtable.h
+++ b/include/linux/fdtable.h
@@ -61,7 +61,8 @@ struct files_struct {
 	(rcu_dereference_check((fdtfd), \
 			       rcu_read_lock_held() || \
 			       lockdep_is_held(&(files)->file_lock) || \
-			       atomic_read(&(files)->count) == 1))
+			       atomic_read(&(files)->count) == 1 || \
+			       rcu_my_thread_group_empty()))
 
 #define files_fdtable(files) \
 		(rcu_dereference_check_fdtable((files), (files)->fdt))
-- 
cgit v1.2.3-70-g09d2


From a6a1a095ec8ace2912fc280d371eee8ff5da5736 Mon Sep 17 00:00:00 2001
From: Doug Goldstein <cardoe@gentoo.org>
Date: Tue, 20 Jul 2010 15:22:25 -0700
Subject: include/linux/vgaarb.h: add missing part of include guard

vgaarb.h was missing the #define of the #ifndef at the top for the guard
to prevent multiple #include's from causing re-define errors

Signed-off-by: Doug Goldstein <cardoe@gentoo.org>
Cc: Dave Airlie <airlied@redhat.com>
Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 include/linux/vgaarb.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/vgaarb.h b/include/linux/vgaarb.h
index c9a97597699..814f294d4cd 100644
--- a/include/linux/vgaarb.h
+++ b/include/linux/vgaarb.h
@@ -29,6 +29,7 @@
  */
 
 #ifndef LINUX_VGA_H
+#define LINUX_VGA_H
 
 #include <asm/vga.h>
 
-- 
cgit v1.2.3-70-g09d2


From 92897b5c669f5e819ff2596fe6228ca2e4904981 Mon Sep 17 00:00:00 2001
From: Ben Skeggs <bskeggs@redhat.com>
Date: Fri, 16 Jul 2010 15:09:17 +1000
Subject: drm: add "auto" dithering method

There's no convenient/reliable way for drivers to both obey the dithering
mode property, and to be able to attempt to provide a good default in all
cases.

This commit adds an "auto" method to the property which drivers can default
to if they wish, whilst still allowing the user to override the choice as
they do now.

Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_crtc.c | 1 +
 include/drm/drm_mode.h     | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
index b5802cf6664..d8d65f4232a 100644
--- a/drivers/gpu/drm/drm_crtc.c
+++ b/drivers/gpu/drm/drm_crtc.c
@@ -80,6 +80,7 @@ static struct drm_prop_enum_list drm_dithering_mode_enum_list[] =
 {
 	{ DRM_MODE_DITHERING_OFF, "Off" },
 	{ DRM_MODE_DITHERING_ON, "On" },
+	{ DRM_MODE_DITHERING_AUTO, "Automatic" },
 };
 
 /*
diff --git a/include/drm/drm_mode.h b/include/drm/drm_mode.h
index c5ba1636613..0fc7397c8f1 100644
--- a/include/drm/drm_mode.h
+++ b/include/drm/drm_mode.h
@@ -74,6 +74,7 @@
 /* Dithering mode options */
 #define DRM_MODE_DITHERING_OFF	0
 #define DRM_MODE_DITHERING_ON	1
+#define DRM_MODE_DITHERING_AUTO 2
 
 /* Dirty info options */
 #define DRM_MODE_DIRTY_OFF      0
-- 
cgit v1.2.3-70-g09d2


From f8324e20f8289dffc646d64366332e05eaacab25 Mon Sep 17 00:00:00 2001
From: Mikael Pettersson <mikpe@it.uu.se>
Date: Tue, 20 Jul 2010 18:45:14 -0700
Subject: math-emu: correct test for downshifting fraction in _FP_FROM_INT()

The kernel's math-emu code contains a macro _FP_FROM_INT() which is
used to convert an integer to a raw normalized floating-point value.
It does this basically in three steps:

1. Compute the exponent from the number of leading zero bits.
2. Downshift large fractions to put the MSB in the right position
   for normalized fractions.
3. Upshift small fractions to put the MSB in the right position.

There is an boundary error in step 2, causing a fraction with its
MSB exactly one bit above the normalized MSB position to not be
downshifted.  This results in a non-normalized raw float, which when
packed becomes a massively inaccurate representation for that input.

The impact of this depends on a number of arch-specific factors,
but it is known to have broken emulation of FXTOD instructions
on UltraSPARC III, which was originally reported as GCC bug 44631
<http://gcc.gnu.org/bugzilla/show_bug.cgi?id=44631>.

Any arch which uses math-emu to emulate conversions from integers to
same-size floats may be affected.

The fix is simple: the exponent comparison used to determine if the
fraction should be downshifted must be "<=" not "<".

I'm sending a kernel module to test this as a reply to this message.
There are also SPARC user-space test cases in the GCC bug entry.

Signed-off-by: Mikael Pettersson <mikpe@it.uu.se>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/math-emu/op-common.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/math-emu/op-common.h b/include/math-emu/op-common.h
index fd882261225..9696a5e2c43 100644
--- a/include/math-emu/op-common.h
+++ b/include/math-emu/op-common.h
@@ -799,7 +799,7 @@ do {									\
 		X##_e -= (_FP_W_TYPE_SIZE - rsize);			\
 	X##_e = rsize - X##_e - 1;					\
 									\
-	if (_FP_FRACBITS_##fs < rsize && _FP_WFRACBITS_##fs < X##_e)	\
+	if (_FP_FRACBITS_##fs < rsize && _FP_WFRACBITS_##fs <= X##_e)	\
 	  __FP_FRAC_SRS_1(ur_, (X##_e - _FP_WFRACBITS_##fs + 1), rsize);\
 	_FP_FRAC_DISASSEMBLE_##wc(X, ur_, rsize);			\
 	if ((_FP_WFRACBITS_##fs - X##_e - 1) > 0)			\
-- 
cgit v1.2.3-70-g09d2


From e870e9a1240bcef1157ffaaf71dac63362e71904 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Fri, 2 Jul 2010 11:07:32 +0800
Subject: tracing: Allow to disable cmdline recording

We found that even enabling a single trace event that will rarely be
triggered can add big overhead to context switch.

(lmbench context switch test)
 -------------------------------------------------
 2p/0K 2p/16K 2p/64K 8p/16K 8p/64K 16p/16K 16p/64K
 ctxsw  ctxsw  ctxsw ctxsw  ctxsw   ctxsw   ctxsw
------ ------ ------ ------ ------ ------- -------
  2.19   2.3   2.21   2.56   2.13     2.54    2.07
  2.39   2.51  2.35   2.75   2.27     2.81    2.24

The overhead is 6% ~ 11%.

It's because when a trace event is enabled 3 tracepoints (sched_switch,
sched_wakeup, sched_wakeup_new) will be activated to map pid to cmdname.

We'd like to avoid this overhead, so add a trace option '(no)record-cmd'
to allow to disable cmdline recording.

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
LKML-Reference: <4C2D57F4.2050204@cn.fujitsu.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace_event.h |  7 +++++--
 kernel/trace/trace.c         |  6 +++++-
 kernel/trace/trace.h         |  3 +++
 kernel/trace/trace_events.c  | 30 ++++++++++++++++++++++++++++--
 4 files changed, 41 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 01df7ca4ead..2b7b1395b4d 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -152,11 +152,13 @@ extern int ftrace_event_reg(struct ftrace_event_call *event,
 enum {
 	TRACE_EVENT_FL_ENABLED_BIT,
 	TRACE_EVENT_FL_FILTERED_BIT,
+	TRACE_EVENT_FL_RECORDED_CMD_BIT,
 };
 
 enum {
-	TRACE_EVENT_FL_ENABLED	= (1 << TRACE_EVENT_FL_ENABLED_BIT),
-	TRACE_EVENT_FL_FILTERED	= (1 << TRACE_EVENT_FL_FILTERED_BIT),
+	TRACE_EVENT_FL_ENABLED		= (1 << TRACE_EVENT_FL_ENABLED_BIT),
+	TRACE_EVENT_FL_FILTERED		= (1 << TRACE_EVENT_FL_FILTERED_BIT),
+	TRACE_EVENT_FL_RECORDED_CMD	= (1 << TRACE_EVENT_FL_RECORDED_CMD_BIT),
 };
 
 struct ftrace_event_call {
@@ -174,6 +176,7 @@ struct ftrace_event_call {
 	 * 32 bit flags:
 	 *   bit 1:		enabled
 	 *   bit 2:		filter_active
+	 *   bit 3:		enabled cmd record
 	 *
 	 * Changes to flags must hold the event_mutex.
 	 *
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 8683dec6946..af9042977c0 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -344,7 +344,7 @@ static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
 /* trace_flags holds trace_options default values */
 unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
 	TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME |
-	TRACE_ITER_GRAPH_TIME;
+	TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD;
 
 static int trace_stop_count;
 static DEFINE_SPINLOCK(tracing_start_lock);
@@ -428,6 +428,7 @@ static const char *trace_options[] = {
 	"latency-format",
 	"sleep-time",
 	"graph-time",
+	"record-cmd",
 	NULL
 };
 
@@ -2561,6 +2562,9 @@ static void set_tracer_flags(unsigned int mask, int enabled)
 		trace_flags |= mask;
 	else
 		trace_flags &= ~mask;
+
+	if (mask == TRACE_ITER_RECORD_CMD)
+		trace_event_enable_cmd_record(enabled);
 }
 
 static ssize_t
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 84d3f123e86..7778f067fc8 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -591,6 +591,7 @@ enum trace_iterator_flags {
 	TRACE_ITER_LATENCY_FMT		= 0x20000,
 	TRACE_ITER_SLEEP_TIME		= 0x40000,
 	TRACE_ITER_GRAPH_TIME		= 0x80000,
+	TRACE_ITER_RECORD_CMD		= 0x100000,
 };
 
 /*
@@ -723,6 +724,8 @@ filter_check_discard(struct ftrace_event_call *call, void *rec,
 	return 0;
 }
 
+extern void trace_event_enable_cmd_record(bool enable);
+
 extern struct mutex event_mutex;
 extern struct list_head ftrace_events;
 
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index e8e6043f4d2..09b4fa6e4d3 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -170,6 +170,26 @@ int ftrace_event_reg(struct ftrace_event_call *call, enum trace_reg type)
 }
 EXPORT_SYMBOL_GPL(ftrace_event_reg);
 
+void trace_event_enable_cmd_record(bool enable)
+{
+	struct ftrace_event_call *call;
+
+	mutex_lock(&event_mutex);
+	list_for_each_entry(call, &ftrace_events, list) {
+		if (!(call->flags & TRACE_EVENT_FL_ENABLED))
+			continue;
+
+		if (enable) {
+			tracing_start_cmdline_record();
+			call->flags |= TRACE_EVENT_FL_RECORDED_CMD;
+		} else {
+			tracing_stop_cmdline_record();
+			call->flags &= ~TRACE_EVENT_FL_RECORDED_CMD;
+		}
+	}
+	mutex_unlock(&event_mutex);
+}
+
 static int ftrace_event_enable_disable(struct ftrace_event_call *call,
 					int enable)
 {
@@ -179,13 +199,19 @@ static int ftrace_event_enable_disable(struct ftrace_event_call *call,
 	case 0:
 		if (call->flags & TRACE_EVENT_FL_ENABLED) {
 			call->flags &= ~TRACE_EVENT_FL_ENABLED;
-			tracing_stop_cmdline_record();
+			if (call->flags & TRACE_EVENT_FL_RECORDED_CMD) {
+				tracing_stop_cmdline_record();
+				call->flags &= ~TRACE_EVENT_FL_RECORDED_CMD;
+			}
 			call->class->reg(call, TRACE_REG_UNREGISTER);
 		}
 		break;
 	case 1:
 		if (!(call->flags & TRACE_EVENT_FL_ENABLED)) {
-			tracing_start_cmdline_record();
+			if (trace_flags & TRACE_ITER_RECORD_CMD) {
+				tracing_start_cmdline_record();
+				call->flags |= TRACE_EVENT_FL_RECORDED_CMD;
+			}
 			ret = call->class->reg(call, TRACE_REG_REGISTER);
 			if (ret) {
 				tracing_stop_cmdline_record();
-- 
cgit v1.2.3-70-g09d2


From bc289ae98b75d93228d24f521ef02a076e506e94 Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Thu, 3 Jun 2010 18:26:24 +0800
Subject: tracing: Reduce latency and remove percpu trace_seq

__print_flags() and __print_symbolic() use percpu trace_seq:

1) Its memory is allocated at compile time, it wastes memory if we don't use tracing.
2) It is percpu data and it wastes more memory for multi-cpus system.
3) It disables preemption when it executes its core routine
   "trace_seq_printf(s, "%s: ", #call);" and introduces latency.

So we move this trace_seq to struct trace_iterator.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
LKML-Reference: <4C078350.7090106@cn.fujitsu.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace_event.h |  5 +++--
 include/trace/ftrace.h       | 12 +++---------
 kernel/trace/trace_output.c  |  3 ---
 3 files changed, 6 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 2b7b1395b4d..02b8b24f8f5 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -11,8 +11,6 @@ struct trace_array;
 struct tracer;
 struct dentry;
 
-DECLARE_PER_CPU(struct trace_seq, ftrace_event_seq);
-
 struct trace_print_flags {
 	unsigned long		mask;
 	const char		*name;
@@ -58,6 +56,9 @@ struct trace_iterator {
 	struct ring_buffer_iter	*buffer_iter[NR_CPUS];
 	unsigned long		iter_flags;
 
+	/* trace_seq for __print_flags() and __print_symbolic() etc. */
+	struct trace_seq	tmp_seq;
+
 	/* The below is zeroed out in pipe_read */
 	struct trace_seq	seq;
 	struct trace_entry	*ent;
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 55c1fd1bbc3..fb783d94fc5 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -145,7 +145,7 @@
  *	struct trace_seq *s = &iter->seq;
  *	struct ftrace_raw_<call> *field; <-- defined in stage 1
  *	struct trace_entry *entry;
- *	struct trace_seq *p;
+ *	struct trace_seq *p = &iter->tmp_seq;
  *	int ret;
  *
  *	entry = iter->ent;
@@ -157,12 +157,10 @@
  *
  *	field = (typeof(field))entry;
  *
- *	p = &get_cpu_var(ftrace_event_seq);
  *	trace_seq_init(p);
  *	ret = trace_seq_printf(s, "%s: ", <call>);
  *	if (ret)
  *		ret = trace_seq_printf(s, <TP_printk> "\n");
- *	put_cpu();
  *	if (!ret)
  *		return TRACE_TYPE_PARTIAL_LINE;
  *
@@ -216,7 +214,7 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags,	\
 	struct trace_seq *s = &iter->seq;				\
 	struct ftrace_raw_##call *field;				\
 	struct trace_entry *entry;					\
-	struct trace_seq *p;						\
+	struct trace_seq *p = &iter->tmp_seq;				\
 	int ret;							\
 									\
 	event = container_of(trace_event, struct ftrace_event_call,	\
@@ -231,12 +229,10 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags,	\
 									\
 	field = (typeof(field))entry;					\
 									\
-	p = &get_cpu_var(ftrace_event_seq);				\
 	trace_seq_init(p);						\
 	ret = trace_seq_printf(s, "%s: ", event->name);			\
 	if (ret)							\
 		ret = trace_seq_printf(s, print);			\
-	put_cpu();							\
 	if (!ret)							\
 		return TRACE_TYPE_PARTIAL_LINE;				\
 									\
@@ -255,7 +251,7 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags,	\
 	struct trace_seq *s = &iter->seq;				\
 	struct ftrace_raw_##template *field;				\
 	struct trace_entry *entry;					\
-	struct trace_seq *p;						\
+	struct trace_seq *p = &iter->tmp_seq;				\
 	int ret;							\
 									\
 	entry = iter->ent;						\
@@ -267,12 +263,10 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags,	\
 									\
 	field = (typeof(field))entry;					\
 									\
-	p = &get_cpu_var(ftrace_event_seq);				\
 	trace_seq_init(p);						\
 	ret = trace_seq_printf(s, "%s: ", #call);			\
 	if (ret)							\
 		ret = trace_seq_printf(s, print);			\
-	put_cpu();							\
 	if (!ret)							\
 		return TRACE_TYPE_PARTIAL_LINE;				\
 									\
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 57c1b459647..1ba64d3cc56 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -16,9 +16,6 @@
 
 DECLARE_RWSEM(trace_event_mutex);
 
-DEFINE_PER_CPU(struct trace_seq, ftrace_event_seq);
-EXPORT_PER_CPU_SYMBOL(ftrace_event_seq);
-
 static struct hlist_head event_hash[EVENT_HASHSIZE] __read_mostly;
 
 static int next_event_type = __TRACE_LAST_TYPE + 1;
-- 
cgit v1.2.3-70-g09d2


From ade7ce31c22e961dfbe1a6d57fd362c90c187cbd Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 4 Jun 2010 10:56:01 +0200
Subject: quota: Clean up the namespace in dqblk_xfs.h

Almost all identifiers use the FS_* namespace, so rename the missing few
XFS_* ones to FS_* as well.  Without this some people might get upset
about having too many XFS names in generic code.

Acked-by: Steven Whitehouse <swhiteho@redhat.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/gfs2/quota.c                 | 10 +++++-----
 fs/quota/dquot.c                |  2 +-
 fs/xfs/linux-2.6/xfs_quotaops.c | 10 +++++-----
 fs/xfs/quota/xfs_qm_syscalls.c  | 32 ++++++++++++++++----------------
 include/linux/dqblk_xfs.h       | 24 ++++++++++++------------
 5 files changed, 39 insertions(+), 39 deletions(-)

(limited to 'include')

diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index b256d6f2428..ce345f8c69c 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -1455,10 +1455,10 @@ static int gfs2_quota_get_xstate(struct super_block *sb,
 
 	switch (sdp->sd_args.ar_quota) {
 	case GFS2_QUOTA_ON:
-		fqs->qs_flags |= (XFS_QUOTA_UDQ_ENFD | XFS_QUOTA_GDQ_ENFD);
+		fqs->qs_flags |= (FS_QUOTA_UDQ_ENFD | FS_QUOTA_GDQ_ENFD);
 		/*FALLTHRU*/
 	case GFS2_QUOTA_ACCOUNT:
-		fqs->qs_flags |= (XFS_QUOTA_UDQ_ACCT | XFS_QUOTA_GDQ_ACCT);
+		fqs->qs_flags |= (FS_QUOTA_UDQ_ACCT | FS_QUOTA_GDQ_ACCT);
 		break;
 	case GFS2_QUOTA_OFF:
 		break;
@@ -1504,7 +1504,7 @@ static int gfs2_get_dqblk(struct super_block *sb, int type, qid_t id,
 
 	qlvb = (struct gfs2_quota_lvb *)qd->qd_gl->gl_lvb;
 	fdq->d_version = FS_DQUOT_VERSION;
-	fdq->d_flags = (type == QUOTA_USER) ? XFS_USER_QUOTA : XFS_GROUP_QUOTA;
+	fdq->d_flags = (type == QUOTA_USER) ? FS_USER_QUOTA : FS_GROUP_QUOTA;
 	fdq->d_id = id;
 	fdq->d_blk_hardlimit = be64_to_cpu(qlvb->qb_limit);
 	fdq->d_blk_softlimit = be64_to_cpu(qlvb->qb_warn);
@@ -1539,12 +1539,12 @@ static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id,
 	switch(type) {
 	case USRQUOTA:
 		type = QUOTA_USER;
-		if (fdq->d_flags != XFS_USER_QUOTA)
+		if (fdq->d_flags != FS_USER_QUOTA)
 			return -EINVAL;
 		break;
 	case GRPQUOTA:
 		type = QUOTA_GROUP;
-		if (fdq->d_flags != XFS_GROUP_QUOTA)
+		if (fdq->d_flags != FS_GROUP_QUOTA)
 			return -EINVAL;
 		break;
 	default:
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index a5974c49a78..2857fd67ff3 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -2281,7 +2281,7 @@ static void do_get_dqblk(struct dquot *dquot, struct fs_disk_quota *di)
 	memset(di, 0, sizeof(*di));
 	di->d_version = FS_DQUOT_VERSION;
 	di->d_flags = dquot->dq_type == USRQUOTA ?
-			XFS_USER_QUOTA : XFS_GROUP_QUOTA;
+			FS_USER_QUOTA : FS_GROUP_QUOTA;
 	di->d_id = dquot->dq_id;
 
 	spin_lock(&dq_data_lock);
diff --git a/fs/xfs/linux-2.6/xfs_quotaops.c b/fs/xfs/linux-2.6/xfs_quotaops.c
index 067cafbfc63..b9ba7536f4b 100644
--- a/fs/xfs/linux-2.6/xfs_quotaops.c
+++ b/fs/xfs/linux-2.6/xfs_quotaops.c
@@ -69,15 +69,15 @@ xfs_fs_set_xstate(
 	if (op != Q_XQUOTARM && !XFS_IS_QUOTA_RUNNING(mp))
 		return -ENOSYS;
 
-	if (uflags & XFS_QUOTA_UDQ_ACCT)
+	if (uflags & FS_QUOTA_UDQ_ACCT)
 		flags |= XFS_UQUOTA_ACCT;
-	if (uflags & XFS_QUOTA_PDQ_ACCT)
+	if (uflags & FS_QUOTA_PDQ_ACCT)
 		flags |= XFS_PQUOTA_ACCT;
-	if (uflags & XFS_QUOTA_GDQ_ACCT)
+	if (uflags & FS_QUOTA_GDQ_ACCT)
 		flags |= XFS_GQUOTA_ACCT;
-	if (uflags & XFS_QUOTA_UDQ_ENFD)
+	if (uflags & FS_QUOTA_UDQ_ENFD)
 		flags |= XFS_UQUOTA_ENFD;
-	if (uflags & (XFS_QUOTA_PDQ_ENFD|XFS_QUOTA_GDQ_ENFD))
+	if (uflags & (FS_QUOTA_PDQ_ENFD|FS_QUOTA_GDQ_ENFD))
 		flags |= XFS_OQUOTA_ENFD;
 
 	switch (op) {
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index b4487764e92..41b04b96975 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -786,9 +786,9 @@ xfs_qm_export_dquot(
 	}
 
 #ifdef DEBUG
-	if (((XFS_IS_UQUOTA_ENFORCED(mp) && dst->d_flags == XFS_USER_QUOTA) ||
+	if (((XFS_IS_UQUOTA_ENFORCED(mp) && dst->d_flags == FS_USER_QUOTA) ||
 	     (XFS_IS_OQUOTA_ENFORCED(mp) &&
-			(dst->d_flags & (XFS_PROJ_QUOTA | XFS_GROUP_QUOTA)))) &&
+			(dst->d_flags & (FS_PROJ_QUOTA | FS_GROUP_QUOTA)))) &&
 	    dst->d_id != 0) {
 		if (((int) dst->d_bcount >= (int) dst->d_blk_softlimit) &&
 		    (dst->d_blk_softlimit > 0)) {
@@ -809,17 +809,17 @@ xfs_qm_export_qtype_flags(
 	/*
 	 * Can't be more than one, or none.
 	 */
-	ASSERT((flags & (XFS_PROJ_QUOTA | XFS_USER_QUOTA)) !=
-		(XFS_PROJ_QUOTA | XFS_USER_QUOTA));
-	ASSERT((flags & (XFS_PROJ_QUOTA | XFS_GROUP_QUOTA)) !=
-		(XFS_PROJ_QUOTA | XFS_GROUP_QUOTA));
-	ASSERT((flags & (XFS_USER_QUOTA | XFS_GROUP_QUOTA)) !=
-		(XFS_USER_QUOTA | XFS_GROUP_QUOTA));
-	ASSERT((flags & (XFS_PROJ_QUOTA|XFS_USER_QUOTA|XFS_GROUP_QUOTA)) != 0);
+	ASSERT((flags & (FS_PROJ_QUOTA | FS_USER_QUOTA)) !=
+		(FS_PROJ_QUOTA | FS_USER_QUOTA));
+	ASSERT((flags & (FS_PROJ_QUOTA | FS_GROUP_QUOTA)) !=
+		(FS_PROJ_QUOTA | FS_GROUP_QUOTA));
+	ASSERT((flags & (FS_USER_QUOTA | FS_GROUP_QUOTA)) !=
+		(FS_USER_QUOTA | FS_GROUP_QUOTA));
+	ASSERT((flags & (FS_PROJ_QUOTA|FS_USER_QUOTA|FS_GROUP_QUOTA)) != 0);
 
 	return (flags & XFS_DQ_USER) ?
-		XFS_USER_QUOTA : (flags & XFS_DQ_PROJ) ?
-			XFS_PROJ_QUOTA : XFS_GROUP_QUOTA;
+		FS_USER_QUOTA : (flags & XFS_DQ_PROJ) ?
+			FS_PROJ_QUOTA : FS_GROUP_QUOTA;
 }
 
 STATIC uint
@@ -830,16 +830,16 @@ xfs_qm_export_flags(
 
 	uflags = 0;
 	if (flags & XFS_UQUOTA_ACCT)
-		uflags |= XFS_QUOTA_UDQ_ACCT;
+		uflags |= FS_QUOTA_UDQ_ACCT;
 	if (flags & XFS_PQUOTA_ACCT)
-		uflags |= XFS_QUOTA_PDQ_ACCT;
+		uflags |= FS_QUOTA_PDQ_ACCT;
 	if (flags & XFS_GQUOTA_ACCT)
-		uflags |= XFS_QUOTA_GDQ_ACCT;
+		uflags |= FS_QUOTA_GDQ_ACCT;
 	if (flags & XFS_UQUOTA_ENFD)
-		uflags |= XFS_QUOTA_UDQ_ENFD;
+		uflags |= FS_QUOTA_UDQ_ENFD;
 	if (flags & (XFS_OQUOTA_ENFD)) {
 		uflags |= (flags & XFS_GQUOTA_ACCT) ?
-			XFS_QUOTA_GDQ_ENFD : XFS_QUOTA_PDQ_ENFD;
+			FS_QUOTA_GDQ_ENFD : FS_QUOTA_PDQ_ENFD;
 	}
 	return (uflags);
 }
diff --git a/include/linux/dqblk_xfs.h b/include/linux/dqblk_xfs.h
index 4389ae72024..86552807aed 100644
--- a/include/linux/dqblk_xfs.h
+++ b/include/linux/dqblk_xfs.h
@@ -49,7 +49,7 @@
 #define FS_DQUOT_VERSION	1	/* fs_disk_quota.d_version */
 typedef struct fs_disk_quota {
 	__s8		d_version;	/* version of this structure */
-	__s8		d_flags;	/* XFS_{USER,PROJ,GROUP}_QUOTA */
+	__s8		d_flags;	/* FS_{USER,PROJ,GROUP}_QUOTA */
 	__u16		d_fieldmask;	/* field specifier */
 	__u32		d_id;		/* user, project, or group ID */
 	__u64		d_blk_hardlimit;/* absolute limit on disk blks */
@@ -119,18 +119,18 @@ typedef struct fs_disk_quota {
 #define FS_DQ_ACCT_MASK		(FS_DQ_BCOUNT | FS_DQ_ICOUNT | FS_DQ_RTBCOUNT)
 
 /*
- * Various flags related to quotactl(2).  Only relevant to XFS filesystems.
+ * Various flags related to quotactl(2).
  */
-#define XFS_QUOTA_UDQ_ACCT	(1<<0)  /* user quota accounting */
-#define XFS_QUOTA_UDQ_ENFD	(1<<1)  /* user quota limits enforcement */
-#define XFS_QUOTA_GDQ_ACCT	(1<<2)  /* group quota accounting */
-#define XFS_QUOTA_GDQ_ENFD	(1<<3)  /* group quota limits enforcement */
-#define XFS_QUOTA_PDQ_ACCT	(1<<4)  /* project quota accounting */
-#define XFS_QUOTA_PDQ_ENFD	(1<<5)  /* project quota limits enforcement */
+#define FS_QUOTA_UDQ_ACCT	(1<<0)  /* user quota accounting */
+#define FS_QUOTA_UDQ_ENFD	(1<<1)  /* user quota limits enforcement */
+#define FS_QUOTA_GDQ_ACCT	(1<<2)  /* group quota accounting */
+#define FS_QUOTA_GDQ_ENFD	(1<<3)  /* group quota limits enforcement */
+#define FS_QUOTA_PDQ_ACCT	(1<<4)  /* project quota accounting */
+#define FS_QUOTA_PDQ_ENFD	(1<<5)  /* project quota limits enforcement */
 
-#define XFS_USER_QUOTA		(1<<0)	/* user quota type */
-#define XFS_PROJ_QUOTA		(1<<1)	/* project quota type */
-#define XFS_GROUP_QUOTA		(1<<2)	/* group quota type */
+#define FS_USER_QUOTA		(1<<0)	/* user quota type */
+#define FS_PROJ_QUOTA		(1<<1)	/* project quota type */
+#define FS_GROUP_QUOTA		(1<<2)	/* group quota type */
 
 /*
  * fs_quota_stat is the struct returned in Q_XGETQSTAT for a given file system.
@@ -151,7 +151,7 @@ typedef struct fs_qfilestat {
 
 typedef struct fs_quota_stat {
 	__s8		qs_version;	/* version number for future changes */
-	__u16		qs_flags;	/* XFS_QUOTA_{U,P,G}DQ_{ACCT,ENFD} */
+	__u16		qs_flags;	/* FS_QUOTA_{U,P,G}DQ_{ACCT,ENFD} */
 	__s8		qs_pad;		/* unused */
 	fs_qfilestat_t	qs_uquota;	/* user quota storage information */
 	fs_qfilestat_t	qs_gquota;	/* group quota storage information */
-- 
cgit v1.2.3-70-g09d2


From 189eef59e70e3e56edf726864629f310d114eefb Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 4 Jun 2010 10:56:29 +0200
Subject: quota: clean up quota active checks

The various quota operations check for any quota beeing active on
a superblock, and the inode not having the noquota flag.

Merge these two checks into a dquot_active check and move that
into dquot.c as that's the only place where it's needed.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/quota/dquot.c         | 23 ++++++++++++++++-------
 include/linux/quotaops.h | 10 ----------
 2 files changed, 16 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 2857fd67ff3..2eebf72d07c 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -1315,6 +1315,15 @@ static int info_bdq_free(struct dquot *dquot, qsize_t space)
 	return QUOTA_NL_NOWARN;
 }
 
+static int dquot_active(const struct inode *inode)
+{
+	struct super_block *sb = inode->i_sb;
+
+	if (IS_NOQUOTA(inode))
+		return 0;
+	return sb_any_quota_loaded(sb) & ~sb_any_quota_suspended(sb);
+}
+
 /*
  * Initialize quota pointers in inode
  *
@@ -1334,7 +1343,7 @@ static void __dquot_initialize(struct inode *inode, int type)
 
 	/* First test before acquiring mutex - solves deadlocks when we
          * re-enter the quota code and are already holding the mutex */
-	if (!sb_any_quota_active(inode->i_sb) || IS_NOQUOTA(inode))
+	if (!dquot_active(inode))
 		return;
 
 	/* First get references to structures we might need. */
@@ -1518,7 +1527,7 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags)
 	 * First test before acquiring mutex - solves deadlocks when we
 	 * re-enter the quota code and are already holding the mutex
 	 */
-	if (!sb_any_quota_active(inode->i_sb) || IS_NOQUOTA(inode)) {
+	if (!dquot_active(inode)) {
 		inode_incr_space(inode, number, reserve);
 		goto out;
 	}
@@ -1570,7 +1579,7 @@ int dquot_alloc_inode(const struct inode *inode)
 
 	/* First test before acquiring mutex - solves deadlocks when we
          * re-enter the quota code and are already holding the mutex */
-	if (!sb_any_quota_active(inode->i_sb) || IS_NOQUOTA(inode))
+	if (!dquot_active(inode))
 		return 0;
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++)
 		warntype[cnt] = QUOTA_NL_NOWARN;
@@ -1607,7 +1616,7 @@ int dquot_claim_space_nodirty(struct inode *inode, qsize_t number)
 {
 	int cnt;
 
-	if (!sb_any_quota_active(inode->i_sb) || IS_NOQUOTA(inode)) {
+	if (!dquot_active(inode)) {
 		inode_claim_rsv_space(inode, number);
 		return 0;
 	}
@@ -1640,7 +1649,7 @@ void __dquot_free_space(struct inode *inode, qsize_t number, int flags)
 
 	/* First test before acquiring mutex - solves deadlocks when we
          * re-enter the quota code and are already holding the mutex */
-	if (!sb_any_quota_active(inode->i_sb) || IS_NOQUOTA(inode)) {
+	if (!dquot_active(inode)) {
 		inode_decr_space(inode, number, reserve);
 		return;
 	}
@@ -1678,7 +1687,7 @@ void dquot_free_inode(const struct inode *inode)
 
 	/* First test before acquiring mutex - solves deadlocks when we
          * re-enter the quota code and are already holding the mutex */
-	if (!sb_any_quota_active(inode->i_sb) || IS_NOQUOTA(inode))
+	if (!dquot_active(inode))
 		return;
 
 	down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
@@ -1801,7 +1810,7 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
 	struct super_block *sb = inode->i_sb;
 	int ret;
 
-	if (!sb_any_quota_active(sb) || IS_NOQUOTA(inode))
+	if (!dquot_active(inode))
 		return 0;
 
 	if (iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid)
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index aa36793b48b..126193c1a5c 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -145,11 +145,6 @@ static inline bool sb_has_quota_active(struct super_block *sb, int type)
 	       !sb_has_quota_suspended(sb, type);
 }
 
-static inline unsigned sb_any_quota_active(struct super_block *sb)
-{
-	return sb_any_quota_loaded(sb) & ~sb_any_quota_suspended(sb);
-}
-
 /*
  * Operations supported for diskquotas.
  */
@@ -194,11 +189,6 @@ static inline int sb_has_quota_active(struct super_block *sb, int type)
 	return 0;
 }
 
-static inline int sb_any_quota_active(struct super_block *sb)
-{
-	return 0;
-}
-
 static inline void dquot_initialize(struct inode *inode)
 {
 }
-- 
cgit v1.2.3-70-g09d2


From 4c4d3901225518ed1a4c938ba15ba09842a00770 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 7 Jun 2010 10:20:39 +0200
Subject: ext3: remove vestiges of nobh support

The nobh option was only supported for writeback mode, but given that all
write paths (except mmapped writed) actually create buffer heads, it
effectively was a no-op already.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/ext3/inode.c         | 16 +---------------
 fs/ext3/super.c         | 17 ++++-------------
 include/linux/ext3_fs.h |  1 -
 3 files changed, 5 insertions(+), 29 deletions(-)

(limited to 'include')

diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 735f0190ec2..a786db403ef 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1625,10 +1625,7 @@ static int ext3_writeback_writepage(struct page *page,
 		goto out_fail;
 	}
 
-	if (test_opt(inode->i_sb, NOBH) && ext3_should_writeback_data(inode))
-		ret = nobh_writepage(page, ext3_get_block, wbc);
-	else
-		ret = block_write_full_page(page, ext3_get_block, wbc);
+	ret = block_write_full_page(page, ext3_get_block, wbc);
 
 	err = ext3_journal_stop(handle);
 	if (!ret)
@@ -1922,17 +1919,6 @@ static int ext3_block_truncate_page(handle_t *handle, struct page *page,
 	length = blocksize - (offset & (blocksize - 1));
 	iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
 
-	/*
-	 * For "nobh" option,  we can only work if we don't need to
-	 * read-in the page - otherwise we create buffers to do the IO.
-	 */
-	if (!page_has_buffers(page) && test_opt(inode->i_sb, NOBH) &&
-	     ext3_should_writeback_data(inode) && PageUptodate(page)) {
-		zero_user(page, offset, length);
-		set_page_dirty(page);
-		goto unlock;
-	}
-
 	if (!page_has_buffers(page))
 		create_empty_buffers(page, blocksize, 0);
 
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 6c953bb255e..9650a956fd0 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -661,9 +661,6 @@ static int ext3_show_options(struct seq_file *seq, struct vfsmount *vfs)
 	 */
 	seq_puts(seq, ",barrier=");
 	seq_puts(seq, test_opt(sb, BARRIER) ? "1" : "0");
-	if (test_opt(sb, NOBH))
-		seq_puts(seq, ",nobh");
-
 	seq_printf(seq, ",data=%s", data_mode_string(test_opt(sb, DATA_FLAGS)));
 	if (test_opt(sb, DATA_ERR_ABORT))
 		seq_puts(seq, ",data_err=abort");
@@ -1255,10 +1252,12 @@ set_qf_format:
 			*n_blocks_count = option;
 			break;
 		case Opt_nobh:
-			set_opt(sbi->s_mount_opt, NOBH);
+			ext3_msg(sb, KERN_WARNING,
+				"warning: ignoring deprecated nobh option");
 			break;
 		case Opt_bh:
-			clear_opt(sbi->s_mount_opt, NOBH);
+			ext3_msg(sb, KERN_WARNING,
+				"warning: ignoring deprecated bh option");
 			break;
 		default:
 			ext3_msg(sb, KERN_ERR,
@@ -2001,14 +2000,6 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
 		break;
 	}
 
-	if (test_opt(sb, NOBH)) {
-		if (!(test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_WRITEBACK_DATA)) {
-			ext3_msg(sb, KERN_WARNING,
-				"warning: ignoring nobh option - "
-				"it is supported only with writeback mode");
-			clear_opt(sbi->s_mount_opt, NOBH);
-		}
-	}
 	/*
 	 * The journal_load will have done any necessary log recovery,
 	 * so we can safely mount the rest of the filesystem now.
diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h
index 7fc62d4550b..3d3a9915dde 100644
--- a/include/linux/ext3_fs.h
+++ b/include/linux/ext3_fs.h
@@ -400,7 +400,6 @@ struct ext3_inode {
 #define EXT3_MOUNT_POSIX_ACL		0x08000	/* POSIX Access Control Lists */
 #define EXT3_MOUNT_RESERVATION		0x10000	/* Preallocation */
 #define EXT3_MOUNT_BARRIER		0x20000 /* Use block barriers */
-#define EXT3_MOUNT_NOBH			0x40000 /* No bufferheads */
 #define EXT3_MOUNT_QUOTA		0x80000 /* Some quota option set */
 #define EXT3_MOUNT_USRQUOTA		0x100000 /* "old" user quota */
 #define EXT3_MOUNT_GRPQUOTA		0x200000 /* "old" group quota */
-- 
cgit v1.2.3-70-g09d2


From fb5ffb0e160c93c3fe08ab83845eb9a2768af812 Mon Sep 17 00:00:00 2001
From: Jiaying Zhang <jiayingz@google.com>
Date: Tue, 20 Jul 2010 16:54:43 +0200
Subject: quota: Change quota error message to print out disk and function name

The current quota error message doesn't always print the disk name, so
it is hard to identify the "bad" disk when quota error happens.

This patch changes the standardized quota error message to print out disk name
and function name. It also uses a combination of cpp macro and inline function
to provide better type checking and to lower the text size of the message.

[Jan Kara: Export __quota_error]

Signed-off-by: Jiaying Zhang <jiayingz@google.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/quota/dquot.c         | 39 +++++++++++++++-------
 fs/quota/quota_tree.c    | 85 ++++++++++++++++++++++++------------------------
 fs/quota/quota_tree.h    |  6 ----
 fs/quota/quota_v1.c      |  3 +-
 fs/quota/quota_v2.c      | 11 +++----
 include/linux/quotaops.h |  6 ++++
 6 files changed, 80 insertions(+), 70 deletions(-)

(limited to 'include')

diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 2eebf72d07c..b171221000f 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -132,6 +132,22 @@ static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_state_lock);
 __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_data_lock);
 EXPORT_SYMBOL(dq_data_lock);
 
+void __quota_error(struct super_block *sb, const char *func,
+		  const char *fmt, ...)
+{
+	va_list args;
+
+	if (printk_ratelimit()) {
+		va_start(args, fmt);
+		printk(KERN_ERR "Quota error (device %s): %s: ",
+		       sb->s_id, func);
+		vprintk(fmt, args);
+		printk("\n");
+		va_end(args);
+	}
+}
+EXPORT_SYMBOL(__quota_error);
+
 #if defined(CONFIG_QUOTA_DEBUG) || defined(CONFIG_PRINT_QUOTA_WARNING)
 static char *quotatypes[] = INITQFNAMES;
 #endif
@@ -705,11 +721,8 @@ void dqput(struct dquot *dquot)
 		return;
 #ifdef CONFIG_QUOTA_DEBUG
 	if (!atomic_read(&dquot->dq_count)) {
-		printk("VFS: dqput: trying to free free dquot\n");
-		printk("VFS: device %s, dquot of %s %d\n",
-			dquot->dq_sb->s_id,
-			quotatypes[dquot->dq_type],
-			dquot->dq_id);
+		quota_error(dquot->dq_sb, "trying to free free dquot of %s %d",
+			    quotatypes[dquot->dq_type], dquot->dq_id);
 		BUG();
 	}
 #endif
@@ -732,9 +745,9 @@ we_slept:
 		/* Commit dquot before releasing */
 		ret = dquot->dq_sb->dq_op->write_dquot(dquot);
 		if (ret < 0) {
-			printk(KERN_ERR "VFS: cannot write quota structure on "
-				"device %s (error %d). Quota may get out of "
-				"sync!\n", dquot->dq_sb->s_id, ret);
+			quota_error(dquot->dq_sb, "Can't write quota structure"
+				    " (error %d). Quota may get out of sync!",
+				    ret);
 			/*
 			 * We clear dirty bit anyway, so that we avoid
 			 * infinite loop here
@@ -914,9 +927,9 @@ static void add_dquot_ref(struct super_block *sb, int type)
 
 #ifdef CONFIG_QUOTA_DEBUG
 	if (reserved) {
-		printk(KERN_WARNING "VFS (%s): Writes happened before quota"
-			" was turned on thus quota information is probably "
-			"inconsistent. Please run quotacheck(8).\n", sb->s_id);
+		quota_error(sb, "Writes happened before quota was turned on "
+			"thus quota information is probably inconsistent. "
+			"Please run quotacheck(8)");
 	}
 #endif
 }
@@ -947,7 +960,9 @@ static int remove_inode_dquot_ref(struct inode *inode, int type,
 		if (dqput_blocks(dquot)) {
 #ifdef CONFIG_QUOTA_DEBUG
 			if (atomic_read(&dquot->dq_count) != 1)
-				printk(KERN_WARNING "VFS: Adding dquot with dq_count %d to dispose list.\n", atomic_read(&dquot->dq_count));
+				quota_error(inode->i_sb, "Adding dquot with "
+					    "dq_count %d to dispose list",
+					    atomic_read(&dquot->dq_count));
 #endif
 			spin_lock(&dq_list_lock);
 			/* As dquot must have currently users it can't be on
diff --git a/fs/quota/quota_tree.c b/fs/quota/quota_tree.c
index 24f03407eeb..9e48874eabc 100644
--- a/fs/quota/quota_tree.c
+++ b/fs/quota/quota_tree.c
@@ -65,8 +65,7 @@ static ssize_t write_blk(struct qtree_mem_dqinfo *info, uint blk, char *buf)
 	ret = sb->s_op->quota_write(sb, info->dqi_type, buf,
 	       info->dqi_usable_bs, blk << info->dqi_blocksize_bits);
 	if (ret != info->dqi_usable_bs) {
-		q_warn(KERN_WARNING "VFS: dquota write failed on "
-			"dev %s\n", sb->s_id);
+		quota_error(sb, "dquota write failed");
 		if (ret >= 0)
 			ret = -EIO;
 	}
@@ -160,9 +159,8 @@ static int remove_free_dqentry(struct qtree_mem_dqinfo *info, char *buf,
 	dh->dqdh_next_free = dh->dqdh_prev_free = cpu_to_le32(0);
 	/* No matter whether write succeeds block is out of list */
 	if (write_blk(info, blk, buf) < 0)
-		q_warn(KERN_ERR
-		       "VFS: Can't write block (%u) with free entries.\n",
-		       blk);
+		quota_error(info->dqi_sb, "Can't write block (%u) "
+			    "with free entries", blk);
 	return 0;
 out_buf:
 	kfree(tmpbuf);
@@ -252,9 +250,8 @@ static uint find_free_dqentry(struct qtree_mem_dqinfo *info,
 	if (le16_to_cpu(dh->dqdh_entries) + 1 >= qtree_dqstr_in_blk(info)) {
 		*err = remove_free_dqentry(info, buf, blk);
 		if (*err < 0) {
-			q_warn(KERN_ERR "VFS: find_free_dqentry(): Can't "
-			       "remove block (%u) from entry free list.\n",
-			       blk);
+			quota_error(dquot->dq_sb, "Can't remove block (%u) "
+				    "from entry free list", blk);
 			goto out_buf;
 		}
 	}
@@ -268,16 +265,15 @@ static uint find_free_dqentry(struct qtree_mem_dqinfo *info,
 	}
 #ifdef __QUOTA_QT_PARANOIA
 	if (i == qtree_dqstr_in_blk(info)) {
-		printk(KERN_ERR "VFS: find_free_dqentry(): Data block full "
-				"but it shouldn't.\n");
+		quota_error(dquot->dq_sb, "Data block full but it shouldn't");
 		*err = -EIO;
 		goto out_buf;
 	}
 #endif
 	*err = write_blk(info, blk, buf);
 	if (*err < 0) {
-		q_warn(KERN_ERR "VFS: find_free_dqentry(): Can't write quota "
-				"data block %u.\n", blk);
+		quota_error(dquot->dq_sb, "Can't write quota data block %u",
+			    blk);
 		goto out_buf;
 	}
 	dquot->dq_off = (blk << info->dqi_blocksize_bits) +
@@ -311,8 +307,8 @@ static int do_insert_tree(struct qtree_mem_dqinfo *info, struct dquot *dquot,
 	} else {
 		ret = read_blk(info, *treeblk, buf);
 		if (ret < 0) {
-			q_warn(KERN_ERR "VFS: Can't read tree quota block "
-					"%u.\n", *treeblk);
+			quota_error(dquot->dq_sb, "Can't read tree quota "
+				    "block %u", *treeblk);
 			goto out_buf;
 		}
 	}
@@ -323,9 +319,9 @@ static int do_insert_tree(struct qtree_mem_dqinfo *info, struct dquot *dquot,
 	if (depth == info->dqi_qtree_depth - 1) {
 #ifdef __QUOTA_QT_PARANOIA
 		if (newblk) {
-			printk(KERN_ERR "VFS: Inserting already present quota "
-					"entry (block %u).\n",
-			       le32_to_cpu(ref[get_index(info,
+			quota_error(dquot->dq_sb, "Inserting already present "
+				    "quota entry (block %u)",
+				    le32_to_cpu(ref[get_index(info,
 						dquot->dq_id, depth)]));
 			ret = -EIO;
 			goto out_buf;
@@ -373,8 +369,8 @@ int qtree_write_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot)
 	if (!dquot->dq_off) {
 		ret = dq_insert_tree(info, dquot);
 		if (ret < 0) {
-			q_warn(KERN_ERR "VFS: Error %zd occurred while "
-					"creating quota.\n", ret);
+			quota_error(sb, "Error %zd occurred while creating "
+				    "quota", ret);
 			kfree(ddquot);
 			return ret;
 		}
@@ -385,8 +381,7 @@ int qtree_write_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot)
 	ret = sb->s_op->quota_write(sb, type, ddquot, info->dqi_entry_size,
 				    dquot->dq_off);
 	if (ret != info->dqi_entry_size) {
-		q_warn(KERN_WARNING "VFS: dquota write failed on dev %s\n",
-		       sb->s_id);
+		quota_error(sb, "dquota write failed");
 		if (ret >= 0)
 			ret = -ENOSPC;
 	} else {
@@ -410,14 +405,15 @@ static int free_dqentry(struct qtree_mem_dqinfo *info, struct dquot *dquot,
 	if (!buf)
 		return -ENOMEM;
 	if (dquot->dq_off >> info->dqi_blocksize_bits != blk) {
-		q_warn(KERN_ERR "VFS: Quota structure has offset to other "
-		  "block (%u) than it should (%u).\n", blk,
-		  (uint)(dquot->dq_off >> info->dqi_blocksize_bits));
+		quota_error(dquot->dq_sb, "Quota structure has offset to "
+			"other block (%u) than it should (%u)", blk,
+			(uint)(dquot->dq_off >> info->dqi_blocksize_bits));
 		goto out_buf;
 	}
 	ret = read_blk(info, blk, buf);
 	if (ret < 0) {
-		q_warn(KERN_ERR "VFS: Can't read quota data block %u\n", blk);
+		quota_error(dquot->dq_sb, "Can't read quota data block %u",
+			    blk);
 		goto out_buf;
 	}
 	dh = (struct qt_disk_dqdbheader *)buf;
@@ -427,8 +423,8 @@ static int free_dqentry(struct qtree_mem_dqinfo *info, struct dquot *dquot,
 		if (ret >= 0)
 			ret = put_free_dqblk(info, buf, blk);
 		if (ret < 0) {
-			q_warn(KERN_ERR "VFS: Can't move quota data block (%u) "
-			  "to free list.\n", blk);
+			quota_error(dquot->dq_sb, "Can't move quota data block "
+				    "(%u) to free list", blk);
 			goto out_buf;
 		}
 	} else {
@@ -440,15 +436,15 @@ static int free_dqentry(struct qtree_mem_dqinfo *info, struct dquot *dquot,
 			/* Insert will write block itself */
 			ret = insert_free_dqentry(info, buf, blk);
 			if (ret < 0) {
-				q_warn(KERN_ERR "VFS: Can't insert quota data "
-				       "block (%u) to free entry list.\n", blk);
+				quota_error(dquot->dq_sb, "Can't insert quota "
+				    "data block (%u) to free entry list", blk);
 				goto out_buf;
 			}
 		} else {
 			ret = write_blk(info, blk, buf);
 			if (ret < 0) {
-				q_warn(KERN_ERR "VFS: Can't write quota data "
-				  "block %u\n", blk);
+				quota_error(dquot->dq_sb, "Can't write quota "
+					    "data block %u", blk);
 				goto out_buf;
 			}
 		}
@@ -472,7 +468,8 @@ static int remove_tree(struct qtree_mem_dqinfo *info, struct dquot *dquot,
 		return -ENOMEM;
 	ret = read_blk(info, *blk, buf);
 	if (ret < 0) {
-		q_warn(KERN_ERR "VFS: Can't read quota data block %u\n", *blk);
+		quota_error(dquot->dq_sb, "Can't read quota data "
+			    "block %u", blk);
 		goto out_buf;
 	}
 	newblk = le32_to_cpu(ref[get_index(info, dquot->dq_id, depth)]);
@@ -496,8 +493,8 @@ static int remove_tree(struct qtree_mem_dqinfo *info, struct dquot *dquot,
 		} else {
 			ret = write_blk(info, *blk, buf);
 			if (ret < 0)
-				q_warn(KERN_ERR "VFS: Can't write quota tree "
-				  "block %u.\n", *blk);
+				quota_error(dquot->dq_sb, "Can't write quota "
+					    "tree block %u", blk);
 		}
 	}
 out_buf:
@@ -529,7 +526,8 @@ static loff_t find_block_dqentry(struct qtree_mem_dqinfo *info,
 		return -ENOMEM;
 	ret = read_blk(info, blk, buf);
 	if (ret < 0) {
-		q_warn(KERN_ERR "VFS: Can't read quota tree block %u.\n", blk);
+		quota_error(dquot->dq_sb, "Can't read quota tree "
+			    "block %u", blk);
 		goto out_buf;
 	}
 	ddquot = buf + sizeof(struct qt_disk_dqdbheader);
@@ -539,8 +537,8 @@ static loff_t find_block_dqentry(struct qtree_mem_dqinfo *info,
 		ddquot += info->dqi_entry_size;
 	}
 	if (i == qtree_dqstr_in_blk(info)) {
-		q_warn(KERN_ERR "VFS: Quota for id %u referenced "
-		  "but not present.\n", dquot->dq_id);
+		quota_error(dquot->dq_sb, "Quota for id %u referenced "
+			    "but not present", dquot->dq_id);
 		ret = -EIO;
 		goto out_buf;
 	} else {
@@ -564,7 +562,8 @@ static loff_t find_tree_dqentry(struct qtree_mem_dqinfo *info,
 		return -ENOMEM;
 	ret = read_blk(info, blk, buf);
 	if (ret < 0) {
-		q_warn(KERN_ERR "VFS: Can't read quota tree block %u.\n", blk);
+		quota_error(dquot->dq_sb, "Can't read quota tree block %u",
+			    blk);
 		goto out_buf;
 	}
 	ret = 0;
@@ -598,7 +597,7 @@ int qtree_read_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot)
 #ifdef __QUOTA_QT_PARANOIA
 	/* Invalidated quota? */
 	if (!sb_dqopt(dquot->dq_sb)->files[type]) {
-		printk(KERN_ERR "VFS: Quota invalidated while reading!\n");
+		quota_error(sb, "Quota invalidated while reading!");
 		return -EIO;
 	}
 #endif
@@ -607,8 +606,8 @@ int qtree_read_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot)
 		offset = find_dqentry(info, dquot);
 		if (offset <= 0) {	/* Entry not present? */
 			if (offset < 0)
-				q_warn(KERN_ERR "VFS: Can't read quota "
-				  "structure for id %u.\n", dquot->dq_id);
+				quota_error(sb, "Can't read quota structure "
+					    "for id %u", dquot->dq_id);
 			dquot->dq_off = 0;
 			set_bit(DQ_FAKE_B, &dquot->dq_flags);
 			memset(&dquot->dq_dqb, 0, sizeof(struct mem_dqblk));
@@ -625,8 +624,8 @@ int qtree_read_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot)
 	if (ret != info->dqi_entry_size) {
 		if (ret >= 0)
 			ret = -EIO;
-		q_warn(KERN_ERR "VFS: Error while reading quota "
-				"structure for id %u.\n", dquot->dq_id);
+		quota_error(sb, "Error while reading quota structure for id %u",
+			    dquot->dq_id);
 		set_bit(DQ_FAKE_B, &dquot->dq_flags);
 		memset(&dquot->dq_dqb, 0, sizeof(struct mem_dqblk));
 		kfree(ddquot);
diff --git a/fs/quota/quota_tree.h b/fs/quota/quota_tree.h
index ccc3e71fb1d..a1ab8db81a5 100644
--- a/fs/quota/quota_tree.h
+++ b/fs/quota/quota_tree.h
@@ -22,10 +22,4 @@ struct qt_disk_dqdbheader {
 
 #define QT_TREEOFF	1		/* Offset of tree in file in blocks */
 
-#define q_warn(fmt, args...) \
-do { \
-	if (printk_ratelimit()) \
-		printk(fmt, ## args); \
-} while(0)
-
 #endif /* _LINUX_QUOTAIO_TREE_H */
diff --git a/fs/quota/quota_v1.c b/fs/quota/quota_v1.c
index 4af344c5852..34b37a67bb1 100644
--- a/fs/quota/quota_v1.c
+++ b/fs/quota/quota_v1.c
@@ -95,8 +95,7 @@ static int v1_commit_dqblk(struct dquot *dquot)
 			(char *)&dqblk, sizeof(struct v1_disk_dqblk),
 			v1_dqoff(dquot->dq_id));
 	if (ret != sizeof(struct v1_disk_dqblk)) {
-		printk(KERN_WARNING "VFS: dquota write failed on dev %s\n",
-			dquot->dq_sb->s_id);
+		quota_error(dquot->dq_sb, "dquota write failed");
 		if (ret >= 0)
 			ret = -EIO;
 		goto out;
diff --git a/fs/quota/quota_v2.c b/fs/quota/quota_v2.c
index 135206af145..65444d29406 100644
--- a/fs/quota/quota_v2.c
+++ b/fs/quota/quota_v2.c
@@ -63,9 +63,8 @@ static int v2_read_header(struct super_block *sb, int type,
 	size = sb->s_op->quota_read(sb, type, (char *)dqhead,
 				    sizeof(struct v2_disk_dqheader), 0);
 	if (size != sizeof(struct v2_disk_dqheader)) {
-		q_warn(KERN_WARNING "quota_v2: Failed header read:"
-		       " expected=%zd got=%zd\n",
-			sizeof(struct v2_disk_dqheader), size);
+		quota_error(sb, "Failed header read: expected=%zd got=%zd",
+			    sizeof(struct v2_disk_dqheader), size);
 		return 0;
 	}
 	return 1;
@@ -106,8 +105,7 @@ static int v2_read_file_info(struct super_block *sb, int type)
 	size = sb->s_op->quota_read(sb, type, (char *)&dinfo,
 	       sizeof(struct v2_disk_dqinfo), V2_DQINFOOFF);
 	if (size != sizeof(struct v2_disk_dqinfo)) {
-		q_warn(KERN_WARNING "quota_v2: Can't read info structure on device %s.\n",
-			sb->s_id);
+		quota_error(sb, "Can't read info structure");
 		return -1;
 	}
 	info->dqi_priv = kmalloc(sizeof(struct qtree_mem_dqinfo), GFP_NOFS);
@@ -167,8 +165,7 @@ static int v2_write_file_info(struct super_block *sb, int type)
 	size = sb->s_op->quota_write(sb, type, (char *)&dinfo,
 	       sizeof(struct v2_disk_dqinfo), V2_DQINFOOFF);
 	if (size != sizeof(struct v2_disk_dqinfo)) {
-		q_warn(KERN_WARNING "Can't write info structure on device %s.\n",
-			sb->s_id);
+		quota_error(sb, "Can't write info structure");
 		return -1;
 	}
 	return 0;
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index 126193c1a5c..4881b49b1a9 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -28,6 +28,12 @@ static inline bool is_quota_modification(struct inode *inode, struct iattr *ia)
 
 #if defined(CONFIG_QUOTA)
 
+#define quota_error(sb, fmt, args...) \
+	__quota_error((sb), __func__, fmt , ## args)
+
+extern void __quota_error(struct super_block *sb, const char *func,
+			 const char *fmt, ...);
+
 /*
  * declaration of quota_function calls in kernel.
  */
-- 
cgit v1.2.3-70-g09d2


From 9849ed4d72251d273524efb8b70be0be9aecb1df Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Tue, 20 Jul 2010 03:13:35 -0400
Subject: tracing/documentation: Document dynamic ftracer internals

Add more details to the dynamic function tracing design implementation.

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
LKML-Reference: <1279610015-10250-1-git-send-email-vapier@gentoo.org>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 Documentation/trace/ftrace-design.txt | 153 ++++++++++++++++++++++++++++++++--
 include/linux/ftrace.h                |   5 ++
 2 files changed, 153 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/Documentation/trace/ftrace-design.txt b/Documentation/trace/ftrace-design.txt
index f1f81afee8a..dc52bd442c9 100644
--- a/Documentation/trace/ftrace-design.txt
+++ b/Documentation/trace/ftrace-design.txt
@@ -13,6 +13,9 @@ Note that this focuses on architecture implementation details only.  If you
 want more explanation of a feature in terms of common code, review the common
 ftrace.txt file.
 
+Ideally, everyone who wishes to retain performance while supporting tracing in
+their kernel should make it all the way to dynamic ftrace support.
+
 
 Prerequisites
 -------------
@@ -215,7 +218,7 @@ An arch may pass in a unique value (frame pointer) to both the entering and
 exiting of a function.  On exit, the value is compared and if it does not
 match, then it will panic the kernel.  This is largely a sanity check for bad
 code generation with gcc.  If gcc for your port sanely updates the frame
-pointer under different opitmization levels, then ignore this option.
+pointer under different optimization levels, then ignore this option.
 
 However, adding support for it isn't terribly difficult.  In your assembly code
 that calls prepare_ftrace_return(), pass the frame pointer as the 3rd argument.
@@ -234,7 +237,7 @@ If you can't trace NMI functions, then skip this option.
 
 
 HAVE_SYSCALL_TRACEPOINTS
----------------------
+------------------------
 
 You need very few things to get the syscalls tracing in an arch.
 
@@ -250,12 +253,152 @@ You need very few things to get the syscalls tracing in an arch.
 HAVE_FTRACE_MCOUNT_RECORD
 -------------------------
 
-See scripts/recordmcount.pl for more info.
+See scripts/recordmcount.pl for more info.  Just fill in the arch-specific
+details for how to locate the addresses of mcount call sites via objdump.
+This option doesn't make much sense without also implementing dynamic ftrace.
 
+
+HAVE_DYNAMIC_FTRACE
+-------------------
+
+You will first need HAVE_FTRACE_MCOUNT_RECORD and HAVE_FUNCTION_TRACER, so
+scroll your reader back up if you got over eager.
+
+Once those are out of the way, you will need to implement:
+	- asm/ftrace.h:
+		- MCOUNT_ADDR
+		- ftrace_call_adjust()
+		- struct dyn_arch_ftrace{}
+	- asm code:
+		- mcount() (new stub)
+		- ftrace_caller()
+		- ftrace_call()
+		- ftrace_stub()
+	- C code:
+		- ftrace_dyn_arch_init()
+		- ftrace_make_nop()
+		- ftrace_make_call()
+		- ftrace_update_ftrace_func()
+
+First you will need to fill out some arch details in your asm/ftrace.h.
+
+Define MCOUNT_ADDR as the address of your mcount symbol similar to:
+	#define MCOUNT_ADDR ((unsigned long)mcount)
+Since no one else will have a decl for that function, you will need to:
+	extern void mcount(void);
+
+You will also need the helper function ftrace_call_adjust().  Most people
+will be able to stub it out like so:
+	static inline unsigned long ftrace_call_adjust(unsigned long addr)
+	{
+		return addr;
+	}
 <details to be filled>
 
+Lastly you will need the custom dyn_arch_ftrace structure.  If you need
+some extra state when runtime patching arbitrary call sites, this is the
+place.  For now though, create an empty struct:
+	struct dyn_arch_ftrace {
+		/* No extra data needed */
+	};
+
+With the header out of the way, we can fill out the assembly code.  While we
+did already create a mcount() function earlier, dynamic ftrace only wants a
+stub function.  This is because the mcount() will only be used during boot
+and then all references to it will be patched out never to return.  Instead,
+the guts of the old mcount() will be used to create a new ftrace_caller()
+function.  Because the two are hard to merge, it will most likely be a lot
+easier to have two separate definitions split up by #ifdefs.  Same goes for
+the ftrace_stub() as that will now be inlined in ftrace_caller().
+
+Before we get confused anymore, let's check out some pseudo code so you can
+implement your own stuff in assembly:
 
-HAVE_DYNAMIC_FTRACE
----------------------
+void mcount(void)
+{
+	return;
+}
+
+void ftrace_caller(void)
+{
+	/* implement HAVE_FUNCTION_TRACE_MCOUNT_TEST if you desire */
+
+	/* save all state needed by the ABI (see paragraph above) */
+
+	unsigned long frompc = ...;
+	unsigned long selfpc = <return address> - MCOUNT_INSN_SIZE;
+
+ftrace_call:
+	ftrace_stub(frompc, selfpc);
+
+	/* restore all state needed by the ABI */
+
+ftrace_stub:
+	return;
+}
+
+This might look a little odd at first, but keep in mind that we will be runtime
+patching multiple things.  First, only functions that we actually want to trace
+will be patched to call ftrace_caller().  Second, since we only have one tracer
+active at a time, we will patch the ftrace_caller() function itself to call the
+specific tracer in question.  That is the point of the ftrace_call label.
+
+With that in mind, let's move on to the C code that will actually be doing the
+runtime patching.  You'll need a little knowledge of your arch's opcodes in
+order to make it through the next section.
+
+Every arch has an init callback function.  If you need to do something early on
+to initialize some state, this is the time to do that.  Otherwise, this simple
+function below should be sufficient for most people:
+
+int __init ftrace_dyn_arch_init(void *data)
+{
+	/* return value is done indirectly via data */
+	*(unsigned long *)data = 0;
+
+	return 0;
+}
+
+There are two functions that are used to do runtime patching of arbitrary
+functions.  The first is used to turn the mcount call site into a nop (which
+is what helps us retain runtime performance when not tracing).  The second is
+used to turn the mcount call site into a call to an arbitrary location (but
+typically that is ftracer_caller()).  See the general function definition in
+linux/ftrace.h for the functions:
+	ftrace_make_nop()
+	ftrace_make_call()
+The rec->ip value is the address of the mcount call site that was collected
+by the scripts/recordmcount.pl during build time.
+
+The last function is used to do runtime patching of the active tracer.  This
+will be modifying the assembly code at the location of the ftrace_call symbol
+inside of the ftrace_caller() function.  So you should have sufficient padding
+at that location to support the new function calls you'll be inserting.  Some
+people will be using a "call" type instruction while others will be using a
+"branch" type instruction.  Specifically, the function is:
+	ftrace_update_ftrace_func()
+
+
+HAVE_DYNAMIC_FTRACE + HAVE_FUNCTION_GRAPH_TRACER
+------------------------------------------------
+
+The function grapher needs a few tweaks in order to work with dynamic ftrace.
+Basically, you will need to:
+	- update:
+		- ftrace_caller()
+		- ftrace_graph_call()
+		- ftrace_graph_caller()
+	- implement:
+		- ftrace_enable_ftrace_graph_caller()
+		- ftrace_disable_ftrace_graph_caller()
 
 <details to be filled>
+Quick notes:
+	- add a nop stub after the ftrace_call location named ftrace_graph_call;
+	  stub needs to be large enough to support a call to ftrace_graph_caller()
+	- update ftrace_graph_caller() to work with being called by the new
+	  ftrace_caller() since some semantics may have changed
+	- ftrace_enable_ftrace_graph_caller() will runtime patch the
+	  ftrace_graph_call location with a call to ftrace_graph_caller()
+	- ftrace_disable_ftrace_graph_caller() will runtime patch the
+	  ftrace_graph_call location with nops
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 41e46330d9b..dcd6a7c3a43 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -1,3 +1,8 @@
+/*
+ * Ftrace header.  For implementation details beyond the random comments
+ * scattered below, see: Documentation/trace/ftrace-design.txt
+ */
+
 #ifndef _LINUX_FTRACE_H
 #define _LINUX_FTRACE_H
 
-- 
cgit v1.2.3-70-g09d2


From f03585689fdff4ae256edd45a35bc2dd83d3684a Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@nokia.com>
Date: Tue, 18 May 2010 13:20:32 +0200
Subject: Bluetooth: Add blacklist support for incoming connections

In some circumstances it could be desirable to reject incoming
connections on the baseband level. This patch adds this feature through
two new ioctl's: HCIBLOCKADDR and HCIUNBLOCKADDR. Both take a simple
Bluetooth address as a parameter. BDADDR_ANY can be used with
HCIUNBLOCKADDR to remove all devices from the blacklist.

Signed-off-by: Johan Hedberg <johan.hedberg@nokia.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 fs/compat_ioctl.c                |  2 +
 include/net/bluetooth/hci.h      |  3 ++
 include/net/bluetooth/hci_core.h |  9 ++++
 net/bluetooth/hci_core.c         |  3 ++
 net/bluetooth/hci_event.c        |  2 +-
 net/bluetooth/hci_sock.c         | 90 ++++++++++++++++++++++++++++++++++++++++
 6 files changed, 108 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 641640dc7ae..18638969a65 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -1328,6 +1328,8 @@ COMPATIBLE_IOCTL(HCISETLINKPOL)
 COMPATIBLE_IOCTL(HCISETLINKMODE)
 COMPATIBLE_IOCTL(HCISETACLMTU)
 COMPATIBLE_IOCTL(HCISETSCOMTU)
+COMPATIBLE_IOCTL(HCIBLOCKADDR)
+COMPATIBLE_IOCTL(HCIUNBLOCKADDR)
 COMPATIBLE_IOCTL(HCIINQUIRY)
 COMPATIBLE_IOCTL(HCIUARTSETPROTO)
 COMPATIBLE_IOCTL(HCIUARTGETPROTO)
diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index fc0c502d9fd..ca2518e0574 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -100,6 +100,9 @@ enum {
 #define HCISETACLMTU	_IOW('H', 227, int)
 #define HCISETSCOMTU	_IOW('H', 228, int)
 
+#define HCIBLOCKADDR	_IOW('H', 230, int)
+#define HCIUNBLOCKADDR	_IOW('H', 231, int)
+
 #define HCIINQUIRY	_IOR('H', 240, int)
 
 /* HCI timeouts */
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index e42f6ed5421..ffc637748b8 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -62,6 +62,11 @@ struct hci_conn_hash {
 	unsigned int     sco_num;
 };
 
+struct bdaddr_list {
+	struct list_head list;
+	bdaddr_t bdaddr;
+};
+
 struct hci_dev {
 	struct list_head list;
 	spinlock_t	lock;
@@ -127,6 +132,7 @@ struct hci_dev {
 
 	struct inquiry_cache	inq_cache;
 	struct hci_conn_hash	conn_hash;
+	struct bdaddr_list	blacklist;
 
 	struct hci_dev_stats	stat;
 
@@ -424,6 +430,9 @@ int hci_get_conn_info(struct hci_dev *hdev, void __user *arg);
 int hci_get_auth_info(struct hci_dev *hdev, void __user *arg);
 int hci_inquiry(void __user *arg);
 
+struct bdaddr_list *hci_blacklist_lookup(struct hci_dev *hdev, bdaddr_t *bdaddr);
+int hci_blacklist_clear(struct hci_dev *hdev);
+
 void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb);
 
 int hci_recv_frame(struct sk_buff *skb);
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 2f768de8701..aeb2982310a 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -562,6 +562,7 @@ static int hci_dev_do_close(struct hci_dev *hdev)
 	hci_dev_lock_bh(hdev);
 	inquiry_cache_flush(hdev);
 	hci_conn_hash_flush(hdev);
+	hci_blacklist_clear(hdev);
 	hci_dev_unlock_bh(hdev);
 
 	hci_notify(hdev, HCI_DEV_DOWN);
@@ -923,6 +924,8 @@ int hci_register_dev(struct hci_dev *hdev)
 
 	hci_conn_hash_init(hdev);
 
+	INIT_LIST_HEAD(&hdev->blacklist.list);
+
 	memset(&hdev->stat, 0, sizeof(struct hci_dev_stats));
 
 	atomic_set(&hdev->promisc, 0);
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 786b5de0bac..43feeef3c49 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -952,7 +952,7 @@ static inline void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *sk
 
 	mask |= hci_proto_connect_ind(hdev, &ev->bdaddr, ev->link_type);
 
-	if (mask & HCI_LM_ACCEPT) {
+	if ((mask & HCI_LM_ACCEPT) && !hci_blacklist_lookup(hdev, &ev->bdaddr)) {
 		/* Connection accepted */
 		struct inquiry_entry *ie;
 		struct hci_conn *conn;
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 38f08f6b86f..4f170a59593 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -165,6 +165,86 @@ static int hci_sock_release(struct socket *sock)
 	return 0;
 }
 
+struct bdaddr_list *hci_blacklist_lookup(struct hci_dev *hdev, bdaddr_t *bdaddr)
+{
+	struct list_head *p;
+	struct bdaddr_list *blacklist = &hdev->blacklist;
+
+	list_for_each(p, &blacklist->list) {
+		struct bdaddr_list *b;
+
+		b = list_entry(p, struct bdaddr_list, list);
+
+		if (bacmp(bdaddr, &b->bdaddr) == 0)
+			return b;
+	}
+
+	return NULL;
+}
+
+static int hci_blacklist_add(struct hci_dev *hdev, void __user *arg)
+{
+	bdaddr_t bdaddr;
+	struct bdaddr_list *entry;
+
+	if (copy_from_user(&bdaddr, arg, sizeof(bdaddr)))
+		return -EFAULT;
+
+	if (bacmp(&bdaddr, BDADDR_ANY) == 0)
+		return -EBADF;
+
+	if (hci_blacklist_lookup(hdev, &bdaddr))
+		return -EEXIST;
+
+	entry = kzalloc(sizeof(struct bdaddr_list), GFP_KERNEL);
+	if (!entry)
+		return -ENOMEM;
+
+	bacpy(&entry->bdaddr, &bdaddr);
+
+	list_add(&entry->list, &hdev->blacklist.list);
+
+	return 0;
+}
+
+int hci_blacklist_clear(struct hci_dev *hdev)
+{
+	struct list_head *p, *n;
+	struct bdaddr_list *blacklist = &hdev->blacklist;
+
+	list_for_each_safe(p, n, &blacklist->list) {
+		struct bdaddr_list *b;
+
+		b = list_entry(p, struct bdaddr_list, list);
+
+		list_del(p);
+		kfree(b);
+	}
+
+	return 0;
+}
+
+static int hci_blacklist_del(struct hci_dev *hdev, void __user *arg)
+{
+	bdaddr_t bdaddr;
+	struct bdaddr_list *entry;
+
+	if (copy_from_user(&bdaddr, arg, sizeof(bdaddr)))
+		return -EFAULT;
+
+	if (bacmp(&bdaddr, BDADDR_ANY) == 0)
+		return hci_blacklist_clear(hdev);
+
+	entry = hci_blacklist_lookup(hdev, &bdaddr);
+	if (!entry)
+		return -ENOENT;
+
+	list_del(&entry->list);
+	kfree(entry);
+
+	return 0;
+}
+
 /* Ioctls that require bound socket */
 static inline int hci_sock_bound_ioctl(struct sock *sk, unsigned int cmd, unsigned long arg)
 {
@@ -194,6 +274,16 @@ static inline int hci_sock_bound_ioctl(struct sock *sk, unsigned int cmd, unsign
 	case HCIGETAUTHINFO:
 		return hci_get_auth_info(hdev, (void __user *) arg);
 
+	case HCIBLOCKADDR:
+		if (!capable(CAP_NET_ADMIN))
+			return -EACCES;
+		return hci_blacklist_add(hdev, (void __user *) arg);
+
+	case HCIUNBLOCKADDR:
+		if (!capable(CAP_NET_ADMIN))
+			return -EACCES;
+		return hci_blacklist_del(hdev, (void __user *) arg);
+
 	default:
 		if (hdev->ioctl)
 			return hdev->ioctl(hdev, cmd, arg);
-- 
cgit v1.2.3-70-g09d2


From 04fafe4ed76f5547856f4c464bec5bf948afb498 Mon Sep 17 00:00:00 2001
From: Ron Shaffer <rshaffer@codeaurora.org>
Date: Fri, 28 May 2010 11:53:45 -0400
Subject: Bluetooth: Remove extraneous white space

Deleted extraneous white space from the end of several lines

Signed-off-by: Ron Shaffer <rshaffer@codeaurora.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index ffc637748b8..ab6d6186a46 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -1,4 +1,4 @@
-/* 
+/*
    BlueZ - Bluetooth protocol stack for Linux
    Copyright (C) 2000-2001 Qualcomm Incorporated
 
@@ -12,13 +12,13 @@
    OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.
    IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY
-   CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES 
-   WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 
-   ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 
+   CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES
+   WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+   ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
    OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 
-   ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, 
-   COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS 
+   ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS,
+   COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS
    SOFTWARE IS DISCLAIMED.
 */
 
@@ -386,7 +386,7 @@ static inline void __hci_dev_put(struct hci_dev *d)
 }
 
 static inline void hci_dev_put(struct hci_dev *d)
-{ 
+{
 	__hci_dev_put(d);
 	module_put(d->owner);
 }
-- 
cgit v1.2.3-70-g09d2


From 2d0a03460a8a0c611843500735096ff799aa8510 Mon Sep 17 00:00:00 2001
From: Ron Shaffer <rshaffer@codeaurora.org>
Date: Fri, 28 May 2010 11:53:46 -0400
Subject: Bluetooth: Reassigned copyright to Code Aurora Forum

Qualcomm, Inc. has reassigned rights to Code Aurora Forum. Accordingly,
as files are modified by Code Aurora Forum members, the copyright
statement will be updated.

Signed-off-by: Ron Shaffer <rshaffer@codeaurora.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h | 2 +-
 net/bluetooth/hci_conn.c         | 2 +-
 net/bluetooth/hci_event.c        | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index ab6d6186a46..600372d4457 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -1,6 +1,6 @@
 /*
    BlueZ - Bluetooth protocol stack for Linux
-   Copyright (C) 2000-2001 Qualcomm Incorporated
+   Copyright (c) 2000-2001, 2010, Code Aurora Forum. All rights reserved.
 
    Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com>
 
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 800b6b9fbba..e9fef83449f 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -1,6 +1,6 @@
 /*
    BlueZ - Bluetooth protocol stack for Linux
-   Copyright (C) 2000-2001 Qualcomm Incorporated
+   Copyright (c) 2000-2001, 2010, Code Aurora Forum. All rights reserved.
 
    Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com>
 
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 43feeef3c49..a9698002230 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -1,6 +1,6 @@
 /*
    BlueZ - Bluetooth protocol stack for Linux
-   Copyright (C) 2000-2001 Qualcomm Incorporated
+   Copyright (c) 2000-2001, 2010, Code Aurora Forum. All rights reserved.
 
    Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com>
 
-- 
cgit v1.2.3-70-g09d2


From cf6c2c0b9f47ee3cd12684b905725c8376d52135 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Mon, 7 Jun 2010 20:54:45 -0300
Subject: Bluetooth: Disconnect early if mode is not supported

When mode is mandatory we shall not send connect request and report this
to the userspace as well.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/l2cap.h |  5 ++++
 net/bluetooth/l2cap.c         | 56 ++++++++++++++++++++++++++++++-------------
 2 files changed, 45 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index 7c695bfd853..f8bae541543 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -287,6 +287,11 @@ struct l2cap_conn {
 	struct l2cap_chan_list chan_list;
 };
 
+struct sock_del_list {
+	struct sock *sk;
+	struct list_head list;
+};
+
 #define L2CAP_INFO_CL_MTU_REQ_SENT	0x01
 #define L2CAP_INFO_FEAT_MASK_REQ_SENT	0x04
 #define L2CAP_INFO_FEAT_MASK_REQ_DONE	0x08
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 2fb45c48176..6a33d269389 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -456,6 +456,22 @@ static void l2cap_do_start(struct sock *sk)
 	}
 }
 
+static inline int l2cap_mode_supported(__u8 mode, __u32 feat_mask)
+{
+	u32 local_feat_mask = l2cap_feat_mask;
+	if (enable_ertm)
+		local_feat_mask |= L2CAP_FEAT_ERTM | L2CAP_FEAT_STREAMING;
+
+	switch (mode) {
+	case L2CAP_MODE_ERTM:
+		return L2CAP_FEAT_ERTM & feat_mask & local_feat_mask;
+	case L2CAP_MODE_STREAMING:
+		return L2CAP_FEAT_STREAMING & feat_mask & local_feat_mask;
+	default:
+		return 0x00;
+	}
+}
+
 static void l2cap_send_disconn_req(struct l2cap_conn *conn, struct sock *sk, int err)
 {
 	struct l2cap_disconn_req req;
@@ -484,10 +500,13 @@ static void l2cap_send_disconn_req(struct l2cap_conn *conn, struct sock *sk, int
 static void l2cap_conn_start(struct l2cap_conn *conn)
 {
 	struct l2cap_chan_list *l = &conn->chan_list;
+	struct sock_del_list del, *tmp1, *tmp2;
 	struct sock *sk;
 
 	BT_DBG("conn %p", conn);
 
+	INIT_LIST_HEAD(&del.list);
+
 	read_lock(&l->lock);
 
 	for (sk = l->head; sk; sk = l2cap_pi(sk)->next_c) {
@@ -503,6 +522,19 @@ static void l2cap_conn_start(struct l2cap_conn *conn)
 			if (l2cap_check_security(sk) &&
 					__l2cap_no_conn_pending(sk)) {
 				struct l2cap_conn_req req;
+
+				if (!l2cap_mode_supported(l2cap_pi(sk)->mode,
+						conn->feat_mask)
+						&& l2cap_pi(sk)->conf_state &
+						L2CAP_CONF_STATE2_DEVICE) {
+					tmp1 = kzalloc(sizeof(struct srej_list),
+							GFP_ATOMIC);
+					tmp1->sk = sk;
+					list_add_tail(&tmp1->list, &del.list);
+					bh_unlock_sock(sk);
+					continue;
+				}
+
 				req.scid = cpu_to_le16(l2cap_pi(sk)->scid);
 				req.psm  = l2cap_pi(sk)->psm;
 
@@ -542,6 +574,14 @@ static void l2cap_conn_start(struct l2cap_conn *conn)
 	}
 
 	read_unlock(&l->lock);
+
+	list_for_each_entry_safe(tmp1, tmp2, &del.list, list) {
+		bh_lock_sock(tmp1->sk);
+		__l2cap_sock_close(tmp1->sk, ECONNRESET);
+		bh_unlock_sock(tmp1->sk);
+		list_del(&tmp1->list);
+		kfree(tmp1);
+	}
 }
 
 static void l2cap_conn_ready(struct l2cap_conn *conn)
@@ -2429,22 +2469,6 @@ static inline void l2cap_ertm_init(struct sock *sk)
 	INIT_WORK(&l2cap_pi(sk)->busy_work, l2cap_busy_work);
 }
 
-static int l2cap_mode_supported(__u8 mode, __u32 feat_mask)
-{
-	u32 local_feat_mask = l2cap_feat_mask;
-	if (enable_ertm)
-		local_feat_mask |= L2CAP_FEAT_ERTM | L2CAP_FEAT_STREAMING;
-
-	switch (mode) {
-	case L2CAP_MODE_ERTM:
-		return L2CAP_FEAT_ERTM & feat_mask & local_feat_mask;
-	case L2CAP_MODE_STREAMING:
-		return L2CAP_FEAT_STREAMING & feat_mask & local_feat_mask;
-	default:
-		return 0x00;
-	}
-}
-
 static inline __u8 l2cap_select_mode(__u8 mode, __u16 remote_feat_mask)
 {
 	switch (mode) {
-- 
cgit v1.2.3-70-g09d2


From e0f66218b3a7d0bcf37ca95186123c257fda0ba5 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Mon, 21 Jun 2010 18:50:49 -0300
Subject: Bluetooth: Remove the send_lock spinlock from ERTM

Using a lock to deal with the ERTM race condition - interruption with
new data from the hci layer - is wrong. We should use the native skb
backlog queue.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/l2cap.h |  1 -
 net/bluetooth/l2cap.c         | 28 +---------------------------
 2 files changed, 1 insertion(+), 28 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index f8bae541543..7df70e47c87 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -358,7 +358,6 @@ struct l2cap_pinfo {
 
 	__le16		sport;
 
-	spinlock_t		send_lock;
 	struct timer_list	retrans_timer;
 	struct timer_list	monitor_timer;
 	struct timer_list	ack_timer;
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index f6e46fdddd2..dc8601fc240 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -1562,16 +1562,11 @@ static int l2cap_retransmit_frames(struct sock *sk)
 	struct l2cap_pinfo *pi = l2cap_pi(sk);
 	int ret;
 
-	spin_lock_bh(&pi->send_lock);
-
 	if (!skb_queue_empty(TX_QUEUE(sk)))
 		sk->sk_send_head = TX_QUEUE(sk)->next;
 
 	pi->next_tx_seq = pi->expected_ack_seq;
 	ret = l2cap_ertm_send(sk);
-
-	spin_unlock_bh(&pi->send_lock);
-
 	return ret;
 }
 
@@ -1579,7 +1574,6 @@ static void l2cap_send_ack(struct l2cap_pinfo *pi)
 {
 	struct sock *sk = (struct sock *)pi;
 	u16 control = 0;
-	int nframes;
 
 	control |= pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT;
 
@@ -1590,11 +1584,7 @@ static void l2cap_send_ack(struct l2cap_pinfo *pi)
 		return;
 	}
 
-	spin_lock_bh(&pi->send_lock);
-	nframes = l2cap_ertm_send(sk);
-	spin_unlock_bh(&pi->send_lock);
-
-	if (nframes > 0)
+	if (l2cap_ertm_send(sk) > 0)
 		return;
 
 	control |= L2CAP_SUPER_RCV_READY;
@@ -1789,10 +1779,8 @@ static inline int l2cap_sar_segment_sdu(struct sock *sk, struct msghdr *msg, siz
 		size += buflen;
 	}
 	skb_queue_splice_tail(&sar_queue, TX_QUEUE(sk));
-	spin_lock_bh(&pi->send_lock);
 	if (sk->sk_send_head == NULL)
 		sk->sk_send_head = sar_queue.next;
-	spin_unlock_bh(&pi->send_lock);
 
 	return size;
 }
@@ -1864,14 +1852,9 @@ static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct ms
 			}
 			__skb_queue_tail(TX_QUEUE(sk), skb);
 
-			if (pi->mode == L2CAP_MODE_ERTM)
-				spin_lock_bh(&pi->send_lock);
-
 			if (sk->sk_send_head == NULL)
 				sk->sk_send_head = skb;
 
-			if (pi->mode == L2CAP_MODE_ERTM)
-				spin_unlock_bh(&pi->send_lock);
 		} else {
 		/* Segment SDU into multiples PDUs */
 			err = l2cap_sar_segment_sdu(sk, msg, len);
@@ -1887,9 +1870,7 @@ static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct ms
 				err = len;
 				break;
 			}
-			spin_lock_bh(&pi->send_lock);
 			err = l2cap_ertm_send(sk);
-			spin_unlock_bh(&pi->send_lock);
 		}
 
 		if (err >= 0)
@@ -2464,7 +2445,6 @@ static inline void l2cap_ertm_init(struct sock *sk)
 
 	__skb_queue_head_init(SREJ_QUEUE(sk));
 	__skb_queue_head_init(BUSY_QUEUE(sk));
-	spin_lock_init(&l2cap_pi(sk)->send_lock);
 
 	INIT_WORK(&l2cap_pi(sk)->busy_work, l2cap_busy_work);
 }
@@ -3462,9 +3442,7 @@ static inline void l2cap_send_i_or_rr_or_rnr(struct sock *sk)
 	if (pi->conn_state & L2CAP_CONN_REMOTE_BUSY)
 		l2cap_retransmit_frames(sk);
 
-	spin_lock_bh(&pi->send_lock);
 	l2cap_ertm_send(sk);
-	spin_unlock_bh(&pi->send_lock);
 
 	if (!(pi->conn_state & L2CAP_CONN_LOCAL_BUSY) &&
 			pi->frames_sent == 0) {
@@ -4066,9 +4044,7 @@ static inline void l2cap_data_channel_rrframe(struct sock *sk, u16 rx_control)
 		if (pi->conn_state & L2CAP_CONN_SREJ_SENT) {
 			l2cap_send_ack(pi);
 		} else {
-			spin_lock_bh(&pi->send_lock);
 			l2cap_ertm_send(sk);
-			spin_unlock_bh(&pi->send_lock);
 		}
 	}
 }
@@ -4113,9 +4089,7 @@ static inline void l2cap_data_channel_srejframe(struct sock *sk, u16 rx_control)
 		pi->conn_state |= L2CAP_CONN_SEND_FBIT;
 		l2cap_retransmit_one_frame(sk, tx_seq);
 
-		spin_lock_bh(&pi->send_lock);
 		l2cap_ertm_send(sk);
-		spin_unlock_bh(&pi->send_lock);
 
 		if (pi->conn_state & L2CAP_CONN_WAIT_F) {
 			pi->srej_save_reqseq = tx_seq;
-- 
cgit v1.2.3-70-g09d2


From ce5706bd69be6b25715ed6cd48a210b5080032bc Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Tue, 13 Jul 2010 11:57:11 -0300
Subject: Bluetooth: Add Copyright notice to L2CAP

Copyright for the time I worked on L2CAP during the Google Summer of Code
program.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/l2cap.h | 1 +
 net/bluetooth/l2cap.c         | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index 7df70e47c87..d0303021aba 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -1,6 +1,7 @@
 /* 
    BlueZ - Bluetooth protocol stack for Linux
    Copyright (C) 2000-2001 Qualcomm Incorporated
+   Copyright (C) 2009-2010 Gustavo F. Padovan <gustavo@padovan.org>
 
    Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com>
 
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 449cbdd4ddb..67a6f59873a 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -1,6 +1,7 @@
 /*
    BlueZ - Bluetooth protocol stack for Linux
    Copyright (C) 2000-2001 Qualcomm Incorporated
+   Copyright (C) 2009-2010 Gustavo F. Padovan <gustavo@padovan.org>
 
    Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com>
 
-- 
cgit v1.2.3-70-g09d2


From cd4c53919ed50b0d532f106aeb76e79077bece98 Mon Sep 17 00:00:00 2001
From: Suraj Sumangala <suraj@atheros.com>
Date: Wed, 14 Jul 2010 13:02:16 +0530
Subject: Bluetooth: Add one more buffer for HCI stream reassembly

Additional reassembly buffer to keep track of stream reasembly

Signed-off-by: Suraj Sumangala <suraj@atheros.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h | 4 ++--
 net/bluetooth/hci_core.c         | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 600372d4457..28e5eeefdec 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -66,7 +66,7 @@ struct bdaddr_list {
 	struct list_head list;
 	bdaddr_t bdaddr;
 };
-
+#define NUM_REASSEMBLY 4
 struct hci_dev {
 	struct list_head list;
 	spinlock_t	lock;
@@ -123,7 +123,7 @@ struct hci_dev {
 	struct sk_buff_head	cmd_q;
 
 	struct sk_buff		*sent_cmd;
-	struct sk_buff		*reassembly[3];
+	struct sk_buff		*reassembly[NUM_REASSEMBLY];
 
 	struct mutex		req_lock;
 	wait_queue_head_t	req_wait_q;
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index aeb2982310a..0ded790bfb6 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -914,7 +914,7 @@ int hci_register_dev(struct hci_dev *hdev)
 	skb_queue_head_init(&hdev->cmd_q);
 	skb_queue_head_init(&hdev->raw_q);
 
-	for (i = 0; i < 3; i++)
+	for (i = 0; i < NUM_REASSEMBLY; i++)
 		hdev->reassembly[i] = NULL;
 
 	init_waitqueue_head(&hdev->req_wait_q);
@@ -973,7 +973,7 @@ int hci_unregister_dev(struct hci_dev *hdev)
 
 	hci_dev_do_close(hdev);
 
-	for (i = 0; i < 3; i++)
+	for (i = 0; i < NUM_REASSEMBLY; i++)
 		kfree_skb(hdev->reassembly[i]);
 
 	hci_notify(hdev, HCI_DEV_UNREG);
@@ -1034,7 +1034,7 @@ int hci_recv_frame(struct sk_buff *skb)
 EXPORT_SYMBOL(hci_recv_frame);
 
 /* Receive packet type fragment */
-#define __reassembly(hdev, type)  ((hdev)->reassembly[(type) - 2])
+#define __reassembly(hdev, type)  ((hdev)->reassembly[(type) - 1])
 
 int hci_recv_fragment(struct hci_dev *hdev, int type, void *data, int count)
 {
-- 
cgit v1.2.3-70-g09d2


From 33e882a5f2301a23a85ef2994e30fd9f48d39d9b Mon Sep 17 00:00:00 2001
From: Suraj Sumangala <suraj@atheros.com>
Date: Wed, 14 Jul 2010 13:02:17 +0530
Subject: Bluetooth: Implement hci_reassembly helper to reassemble RX packets

Implements feature to reassemble received HCI frames from any input stream

Signed-off-by: Suraj Sumangala <suraj@atheros.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/bluetooth.h |   1 +
 net/bluetooth/hci_core.c          | 109 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 110 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h
index ff77e8f882f..d6b150c679f 100644
--- a/include/net/bluetooth/bluetooth.h
+++ b/include/net/bluetooth/bluetooth.h
@@ -138,6 +138,7 @@ struct sock *bt_accept_dequeue(struct sock *parent, struct socket *newsock);
 struct bt_skb_cb {
 	__u8 pkt_type;
 	__u8 incoming;
+	__u16 expect;
 	__u8 tx_seq;
 	__u8 retries;
 	__u8 sar;
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 0ded790bfb6..477c4a60a07 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -1033,6 +1033,115 @@ int hci_recv_frame(struct sk_buff *skb)
 }
 EXPORT_SYMBOL(hci_recv_frame);
 
+static int hci_reassembly(struct hci_dev *hdev, int type, void *data,
+			  int count, __u8 index, gfp_t gfp_mask)
+{
+	int len = 0;
+	int hlen = 0;
+	int remain = count;
+	struct sk_buff *skb;
+	struct bt_skb_cb *scb;
+
+	if ((type < HCI_ACLDATA_PKT || type > HCI_EVENT_PKT) ||
+				index >= NUM_REASSEMBLY)
+		return -EILSEQ;
+
+	skb = hdev->reassembly[index];
+
+	if (!skb) {
+		switch (type) {
+		case HCI_ACLDATA_PKT:
+			len = HCI_MAX_FRAME_SIZE;
+			hlen = HCI_ACL_HDR_SIZE;
+			break;
+		case HCI_EVENT_PKT:
+			len = HCI_MAX_EVENT_SIZE;
+			hlen = HCI_EVENT_HDR_SIZE;
+			break;
+		case HCI_SCODATA_PKT:
+			len = HCI_MAX_SCO_SIZE;
+			hlen = HCI_SCO_HDR_SIZE;
+			break;
+		}
+
+		skb = bt_skb_alloc(len, gfp_mask);
+		if (!skb)
+			return -ENOMEM;
+
+		scb = (void *) skb->cb;
+		scb->expect = hlen;
+		scb->pkt_type = type;
+
+		skb->dev = (void *) hdev;
+		hdev->reassembly[index] = skb;
+	}
+
+	while (count) {
+		scb = (void *) skb->cb;
+		len = min(scb->expect, (__u16)count);
+
+		memcpy(skb_put(skb, len), data, len);
+
+		count -= len;
+		data += len;
+		scb->expect -= len;
+		remain = count;
+
+		switch (type) {
+		case HCI_EVENT_PKT:
+			if (skb->len == HCI_EVENT_HDR_SIZE) {
+				struct hci_event_hdr *h = hci_event_hdr(skb);
+				scb->expect = h->plen;
+
+				if (skb_tailroom(skb) < scb->expect) {
+					kfree_skb(skb);
+					hdev->reassembly[index] = NULL;
+					return -ENOMEM;
+				}
+			}
+			break;
+
+		case HCI_ACLDATA_PKT:
+			if (skb->len  == HCI_ACL_HDR_SIZE) {
+				struct hci_acl_hdr *h = hci_acl_hdr(skb);
+				scb->expect = __le16_to_cpu(h->dlen);
+
+				if (skb_tailroom(skb) < scb->expect) {
+					kfree_skb(skb);
+					hdev->reassembly[index] = NULL;
+					return -ENOMEM;
+				}
+			}
+			break;
+
+		case HCI_SCODATA_PKT:
+			if (skb->len == HCI_SCO_HDR_SIZE) {
+				struct hci_sco_hdr *h = hci_sco_hdr(skb);
+				scb->expect = h->dlen;
+
+				if (skb_tailroom(skb) < scb->expect) {
+					kfree_skb(skb);
+					hdev->reassembly[index] = NULL;
+					return -ENOMEM;
+				}
+			}
+			break;
+		}
+
+		if (scb->expect == 0) {
+			/* Complete frame */
+
+			bt_cb(skb)->pkt_type = type;
+			hci_recv_frame(skb);
+
+			hdev->reassembly[index] = NULL;
+			return remain;
+		}
+	}
+
+	return remain;
+}
+
 /* Receive packet type fragment */
 #define __reassembly(hdev, type)  ((hdev)->reassembly[(type) - 1])
 
-- 
cgit v1.2.3-70-g09d2


From 9981151086385eecc2febf4ba95a14593f834b3d Mon Sep 17 00:00:00 2001
From: Suraj Sumangala <suraj@atheros.com>
Date: Wed, 14 Jul 2010 13:02:19 +0530
Subject: Bluetooth: Implemented HCI frame reassembly for RX from stream

Implemented frame reassembly implementation for reassembling fragments
received from stream.

Signed-off-by: Suraj Sumangala <suraj@atheros.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h |  1 +
 net/bluetooth/hci_core.c         | 35 +++++++++++++++++++++++++++++++++++
 2 files changed, 36 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 28e5eeefdec..350b3e6964b 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -437,6 +437,7 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb);
 
 int hci_recv_frame(struct sk_buff *skb);
 int hci_recv_fragment(struct hci_dev *hdev, int type, void *data, int count);
+int hci_recv_stream_fragment(struct hci_dev *hdev, void *data, int count);
 
 int hci_register_sysfs(struct hci_dev *hdev);
 void hci_unregister_sysfs(struct hci_dev *hdev);
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 451e266840a..995c9f9b84d 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -1163,6 +1163,41 @@ int hci_recv_fragment(struct hci_dev *hdev, int type, void *data, int count)
 }
 EXPORT_SYMBOL(hci_recv_fragment);
 
+#define STREAM_REASSEMBLY 0
+
+int hci_recv_stream_fragment(struct hci_dev *hdev, void *data, int count)
+{
+	int type;
+	int rem = 0;
+
+	do {
+		struct sk_buff *skb = hdev->reassembly[STREAM_REASSEMBLY];
+
+		if (!skb) {
+			struct { char type; } *pkt;
+
+			/* Start of the frame */
+			pkt = data;
+			type = pkt->type;
+
+			data++;
+			count--;
+		} else
+			type = bt_cb(skb)->pkt_type;
+
+		rem = hci_reassembly(hdev, type, data,
+					count, STREAM_REASSEMBLY, GFP_ATOMIC);
+		if (rem < 0)
+			return rem;
+
+		data += (count - rem);
+		count = rem;
+	} while (count);
+
+	return rem;
+}
+EXPORT_SYMBOL(hci_recv_stream_fragment);
+
 /* ---- Interface to upper protocols ---- */
 
 /* Register/Unregister protocols.
-- 
cgit v1.2.3-70-g09d2


From 5d8868ff3d11e3fc5a5c07477f281a16c71714a3 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Fri, 16 Jul 2010 16:18:39 -0300
Subject: Bluetooth: Add Google's copyright to L2CAP

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/l2cap.h | 1 +
 net/bluetooth/l2cap.c         | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index d0303021aba..4b86c783976 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -2,6 +2,7 @@
    BlueZ - Bluetooth protocol stack for Linux
    Copyright (C) 2000-2001 Qualcomm Incorporated
    Copyright (C) 2009-2010 Gustavo F. Padovan <gustavo@padovan.org>
+   Copyright (C) 2010 Google Inc.
 
    Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com>
 
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index cdd608d7274..d175cc26283 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -2,6 +2,7 @@
    BlueZ - Bluetooth protocol stack for Linux
    Copyright (C) 2000-2001 Qualcomm Incorporated
    Copyright (C) 2009-2010 Gustavo F. Padovan <gustavo@padovan.org>
+   Copyright (C) 2010 Google Inc.
 
    Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com>
 
-- 
cgit v1.2.3-70-g09d2


From 66c853cc21bd387a9a2109dcf3b3d53cc9ee9edf Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Mon, 19 Jul 2010 02:00:13 -0300
Subject: Bluetooth: Use __packed annotation

To make net/ and include/net/ code consistent use __packed instead of
__attribute__ ((packed)). Bluetooth subsystem was one of the last net
subsys still using __attribute__ ((packed)).

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/bluetooth.h |   2 +-
 include/net/bluetooth/hci.h       | 184 +++++++++++++++++++-------------------
 include/net/bluetooth/l2cap.h     |  26 +++---
 include/net/bluetooth/rfcomm.h    |  14 +--
 4 files changed, 113 insertions(+), 113 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h
index d6b150c679f..27a902d9b3a 100644
--- a/include/net/bluetooth/bluetooth.h
+++ b/include/net/bluetooth/bluetooth.h
@@ -84,7 +84,7 @@ enum {
 /* BD Address */
 typedef struct {
 	__u8 b[6];
-} __attribute__((packed)) bdaddr_t;
+} __packed bdaddr_t;
 
 #define BDADDR_ANY   (&(bdaddr_t) {{0, 0, 0, 0, 0, 0}})
 #define BDADDR_LOCAL (&(bdaddr_t) {{0, 0, 0, 0xff, 0xff, 0xff}})
diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index ca2518e0574..bcbdd6d4e6d 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -230,7 +230,7 @@ struct hci_cp_inquiry {
 	__u8     lap[3];
 	__u8     length;
 	__u8     num_rsp;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_INQUIRY_CANCEL		0x0402
 
@@ -244,81 +244,81 @@ struct hci_cp_create_conn {
 	__u8     pscan_mode;
 	__le16   clock_offset;
 	__u8     role_switch;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_DISCONNECT		0x0406
 struct hci_cp_disconnect {
 	__le16   handle;
 	__u8     reason;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_ADD_SCO			0x0407
 struct hci_cp_add_sco {
 	__le16   handle;
 	__le16   pkt_type;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_CREATE_CONN_CANCEL	0x0408
 struct hci_cp_create_conn_cancel {
 	bdaddr_t bdaddr;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_ACCEPT_CONN_REQ		0x0409
 struct hci_cp_accept_conn_req {
 	bdaddr_t bdaddr;
 	__u8     role;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_REJECT_CONN_REQ		0x040a
 struct hci_cp_reject_conn_req {
 	bdaddr_t bdaddr;
 	__u8     reason;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_LINK_KEY_REPLY		0x040b
 struct hci_cp_link_key_reply {
 	bdaddr_t bdaddr;
 	__u8     link_key[16];
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_LINK_KEY_NEG_REPLY	0x040c
 struct hci_cp_link_key_neg_reply {
 	bdaddr_t bdaddr;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_PIN_CODE_REPLY		0x040d
 struct hci_cp_pin_code_reply {
 	bdaddr_t bdaddr;
 	__u8     pin_len;
 	__u8     pin_code[16];
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_PIN_CODE_NEG_REPLY	0x040e
 struct hci_cp_pin_code_neg_reply {
 	bdaddr_t bdaddr;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_CHANGE_CONN_PTYPE	0x040f
 struct hci_cp_change_conn_ptype {
 	__le16   handle;
 	__le16   pkt_type;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_AUTH_REQUESTED		0x0411
 struct hci_cp_auth_requested {
 	__le16   handle;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_SET_CONN_ENCRYPT		0x0413
 struct hci_cp_set_conn_encrypt {
 	__le16   handle;
 	__u8     encrypt;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_CHANGE_CONN_LINK_KEY	0x0415
 struct hci_cp_change_conn_link_key {
 	__le16   handle;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_REMOTE_NAME_REQ		0x0419
 struct hci_cp_remote_name_req {
@@ -326,28 +326,28 @@ struct hci_cp_remote_name_req {
 	__u8     pscan_rep_mode;
 	__u8     pscan_mode;
 	__le16   clock_offset;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_REMOTE_NAME_REQ_CANCEL	0x041a
 struct hci_cp_remote_name_req_cancel {
 	bdaddr_t bdaddr;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_READ_REMOTE_FEATURES	0x041b
 struct hci_cp_read_remote_features {
 	__le16   handle;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_READ_REMOTE_EXT_FEATURES	0x041c
 struct hci_cp_read_remote_ext_features {
 	__le16   handle;
 	__u8     page;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_READ_REMOTE_VERSION	0x041d
 struct hci_cp_read_remote_version {
 	__le16   handle;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_SETUP_SYNC_CONN		0x0428
 struct hci_cp_setup_sync_conn {
@@ -358,7 +358,7 @@ struct hci_cp_setup_sync_conn {
 	__le16   voice_setting;
 	__u8     retrans_effort;
 	__le16   pkt_type;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_ACCEPT_SYNC_CONN_REQ	0x0429
 struct hci_cp_accept_sync_conn_req {
@@ -369,13 +369,13 @@ struct hci_cp_accept_sync_conn_req {
 	__le16   content_format;
 	__u8     retrans_effort;
 	__le16   pkt_type;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_REJECT_SYNC_CONN_REQ	0x042a
 struct hci_cp_reject_sync_conn_req {
 	bdaddr_t bdaddr;
 	__u8     reason;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_SNIFF_MODE		0x0803
 struct hci_cp_sniff_mode {
@@ -384,59 +384,59 @@ struct hci_cp_sniff_mode {
 	__le16   min_interval;
 	__le16   attempt;
 	__le16   timeout;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_EXIT_SNIFF_MODE		0x0804
 struct hci_cp_exit_sniff_mode {
 	__le16   handle;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_ROLE_DISCOVERY		0x0809
 struct hci_cp_role_discovery {
 	__le16   handle;
-} __attribute__ ((packed));
+} __packed;
 struct hci_rp_role_discovery {
 	__u8     status;
 	__le16   handle;
 	__u8     role;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_SWITCH_ROLE		0x080b
 struct hci_cp_switch_role {
 	bdaddr_t bdaddr;
 	__u8     role;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_READ_LINK_POLICY		0x080c
 struct hci_cp_read_link_policy {
 	__le16   handle;
-} __attribute__ ((packed));
+} __packed;
 struct hci_rp_read_link_policy {
 	__u8     status;
 	__le16   handle;
 	__le16   policy;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_WRITE_LINK_POLICY	0x080d
 struct hci_cp_write_link_policy {
 	__le16   handle;
 	__le16   policy;
-} __attribute__ ((packed));
+} __packed;
 struct hci_rp_write_link_policy {
 	__u8     status;
 	__le16   handle;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_READ_DEF_LINK_POLICY	0x080e
 struct hci_rp_read_def_link_policy {
 	__u8     status;
 	__le16   policy;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_WRITE_DEF_LINK_POLICY	0x080f
 struct hci_cp_write_def_link_policy {
 	__le16   policy;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_SNIFF_SUBRATE		0x0811
 struct hci_cp_sniff_subrate {
@@ -444,12 +444,12 @@ struct hci_cp_sniff_subrate {
 	__le16   max_latency;
 	__le16   min_remote_timeout;
 	__le16   min_local_timeout;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_SET_EVENT_MASK		0x0c01
 struct hci_cp_set_event_mask {
 	__u8     mask[8];
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_RESET			0x0c03
 
@@ -458,7 +458,7 @@ struct hci_cp_set_event_flt {
 	__u8     flt_type;
 	__u8     cond_type;
 	__u8     condition[0];
-} __attribute__ ((packed));
+} __packed;
 
 /* Filter types */
 #define HCI_FLT_CLEAR_ALL	0x00
@@ -477,13 +477,13 @@ struct hci_cp_set_event_flt {
 #define HCI_OP_WRITE_LOCAL_NAME		0x0c13
 struct hci_cp_write_local_name {
 	__u8     name[248];
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_READ_LOCAL_NAME		0x0c14
 struct hci_rp_read_local_name {
 	__u8     status;
 	__u8     name[248];
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_WRITE_CA_TIMEOUT		0x0c16
 
@@ -511,23 +511,23 @@ struct hci_rp_read_local_name {
 struct hci_rp_read_class_of_dev {
 	__u8     status;
 	__u8     dev_class[3];
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_WRITE_CLASS_OF_DEV	0x0c24
 struct hci_cp_write_class_of_dev {
 	__u8     dev_class[3];
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_READ_VOICE_SETTING	0x0c25
 struct hci_rp_read_voice_setting {
 	__u8     status;
 	__le16   voice_setting;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_WRITE_VOICE_SETTING	0x0c26
 struct hci_cp_write_voice_setting {
 	__le16   voice_setting;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_HOST_BUFFER_SIZE		0x0c33
 struct hci_cp_host_buffer_size {
@@ -535,18 +535,18 @@ struct hci_cp_host_buffer_size {
 	__u8     sco_mtu;
 	__le16   acl_max_pkt;
 	__le16   sco_max_pkt;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_READ_SSP_MODE		0x0c55
 struct hci_rp_read_ssp_mode {
 	__u8     status;
 	__u8     mode;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_WRITE_SSP_MODE		0x0c56
 struct hci_cp_write_ssp_mode {
 	__u8     mode;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_READ_LOCAL_VERSION	0x1001
 struct hci_rp_read_local_version {
@@ -556,19 +556,19 @@ struct hci_rp_read_local_version {
 	__u8     lmp_ver;
 	__le16   manufacturer;
 	__le16   lmp_subver;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_READ_LOCAL_COMMANDS	0x1002
 struct hci_rp_read_local_commands {
 	__u8     status;
 	__u8     commands[64];
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_READ_LOCAL_FEATURES	0x1003
 struct hci_rp_read_local_features {
 	__u8     status;
 	__u8     features[8];
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_READ_LOCAL_EXT_FEATURES	0x1004
 struct hci_rp_read_local_ext_features {
@@ -576,7 +576,7 @@ struct hci_rp_read_local_ext_features {
 	__u8     page;
 	__u8     max_page;
 	__u8     features[8];
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_READ_BUFFER_SIZE		0x1005
 struct hci_rp_read_buffer_size {
@@ -585,13 +585,13 @@ struct hci_rp_read_buffer_size {
 	__u8     sco_mtu;
 	__le16   acl_max_pkt;
 	__le16   sco_max_pkt;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_READ_BD_ADDR		0x1009
 struct hci_rp_read_bd_addr {
 	__u8     status;
 	bdaddr_t bdaddr;
-} __attribute__ ((packed));
+} __packed;
 
 /* ---- HCI Events ---- */
 #define HCI_EV_INQUIRY_COMPLETE		0x01
@@ -604,7 +604,7 @@ struct inquiry_info {
 	__u8     pscan_mode;
 	__u8     dev_class[3];
 	__le16   clock_offset;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_EV_CONN_COMPLETE		0x03
 struct hci_ev_conn_complete {
@@ -613,54 +613,54 @@ struct hci_ev_conn_complete {
 	bdaddr_t bdaddr;
 	__u8     link_type;
 	__u8     encr_mode;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_EV_CONN_REQUEST		0x04
 struct hci_ev_conn_request {
 	bdaddr_t bdaddr;
 	__u8     dev_class[3];
 	__u8     link_type;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_EV_DISCONN_COMPLETE		0x05
 struct hci_ev_disconn_complete {
 	__u8     status;
 	__le16   handle;
 	__u8     reason;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_EV_AUTH_COMPLETE		0x06
 struct hci_ev_auth_complete {
 	__u8     status;
 	__le16   handle;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_EV_REMOTE_NAME		0x07
 struct hci_ev_remote_name {
 	__u8     status;
 	bdaddr_t bdaddr;
 	__u8     name[248];
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_EV_ENCRYPT_CHANGE		0x08
 struct hci_ev_encrypt_change {
 	__u8     status;
 	__le16   handle;
 	__u8     encrypt;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_EV_CHANGE_LINK_KEY_COMPLETE	0x09
 struct hci_ev_change_link_key_complete {
 	__u8     status;
 	__le16   handle;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_EV_REMOTE_FEATURES		0x0b
 struct hci_ev_remote_features {
 	__u8     status;
 	__le16   handle;
 	__u8     features[8];
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_EV_REMOTE_VERSION		0x0c
 struct hci_ev_remote_version {
@@ -669,7 +669,7 @@ struct hci_ev_remote_version {
 	__u8     lmp_ver;
 	__le16   manufacturer;
 	__le16   lmp_subver;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_EV_QOS_SETUP_COMPLETE	0x0d
 struct hci_qos {
@@ -678,38 +678,38 @@ struct hci_qos {
 	__u32    peak_bandwidth;
 	__u32    latency;
 	__u32    delay_variation;
-} __attribute__ ((packed));
+} __packed;
 struct hci_ev_qos_setup_complete {
 	__u8     status;
 	__le16   handle;
 	struct   hci_qos qos;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_EV_CMD_COMPLETE		0x0e
 struct hci_ev_cmd_complete {
 	__u8     ncmd;
 	__le16   opcode;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_EV_CMD_STATUS		0x0f
 struct hci_ev_cmd_status {
 	__u8     status;
 	__u8     ncmd;
 	__le16   opcode;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_EV_ROLE_CHANGE		0x12
 struct hci_ev_role_change {
 	__u8     status;
 	bdaddr_t bdaddr;
 	__u8     role;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_EV_NUM_COMP_PKTS		0x13
 struct hci_ev_num_comp_pkts {
 	__u8     num_hndl;
 	/* variable length part */
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_EV_MODE_CHANGE		0x14
 struct hci_ev_mode_change {
@@ -717,44 +717,44 @@ struct hci_ev_mode_change {
 	__le16   handle;
 	__u8     mode;
 	__le16   interval;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_EV_PIN_CODE_REQ		0x16
 struct hci_ev_pin_code_req {
 	bdaddr_t bdaddr;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_EV_LINK_KEY_REQ		0x17
 struct hci_ev_link_key_req {
 	bdaddr_t bdaddr;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_EV_LINK_KEY_NOTIFY		0x18
 struct hci_ev_link_key_notify {
 	bdaddr_t bdaddr;
 	__u8     link_key[16];
 	__u8     key_type;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_EV_CLOCK_OFFSET		0x1c
 struct hci_ev_clock_offset {
 	__u8     status;
 	__le16   handle;
 	__le16   clock_offset;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_EV_PKT_TYPE_CHANGE		0x1d
 struct hci_ev_pkt_type_change {
 	__u8     status;
 	__le16   handle;
 	__le16   pkt_type;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_EV_PSCAN_REP_MODE		0x20
 struct hci_ev_pscan_rep_mode {
 	bdaddr_t bdaddr;
 	__u8     pscan_rep_mode;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_EV_INQUIRY_RESULT_WITH_RSSI	0x22
 struct inquiry_info_with_rssi {
@@ -764,7 +764,7 @@ struct inquiry_info_with_rssi {
 	__u8     dev_class[3];
 	__le16   clock_offset;
 	__s8     rssi;
-} __attribute__ ((packed));
+} __packed;
 struct inquiry_info_with_rssi_and_pscan_mode {
 	bdaddr_t bdaddr;
 	__u8     pscan_rep_mode;
@@ -773,7 +773,7 @@ struct inquiry_info_with_rssi_and_pscan_mode {
 	__u8     dev_class[3];
 	__le16   clock_offset;
 	__s8     rssi;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_EV_REMOTE_EXT_FEATURES	0x23
 struct hci_ev_remote_ext_features {
@@ -782,7 +782,7 @@ struct hci_ev_remote_ext_features {
 	__u8     page;
 	__u8     max_page;
 	__u8     features[8];
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_EV_SYNC_CONN_COMPLETE	0x2c
 struct hci_ev_sync_conn_complete {
@@ -795,7 +795,7 @@ struct hci_ev_sync_conn_complete {
 	__le16   rx_pkt_len;
 	__le16   tx_pkt_len;
 	__u8     air_mode;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_EV_SYNC_CONN_CHANGED	0x2d
 struct hci_ev_sync_conn_changed {
@@ -805,7 +805,7 @@ struct hci_ev_sync_conn_changed {
 	__u8     retrans_window;
 	__le16   rx_pkt_len;
 	__le16   tx_pkt_len;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_EV_SNIFF_SUBRATE		0x2e
 struct hci_ev_sniff_subrate {
@@ -815,7 +815,7 @@ struct hci_ev_sniff_subrate {
 	__le16   max_rx_latency;
 	__le16   max_remote_timeout;
 	__le16   max_local_timeout;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_EV_EXTENDED_INQUIRY_RESULT	0x2f
 struct extended_inquiry_info {
@@ -826,37 +826,37 @@ struct extended_inquiry_info {
 	__le16   clock_offset;
 	__s8     rssi;
 	__u8     data[240];
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_EV_IO_CAPA_REQUEST		0x31
 struct hci_ev_io_capa_request {
 	bdaddr_t bdaddr;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_EV_SIMPLE_PAIR_COMPLETE	0x36
 struct hci_ev_simple_pair_complete {
 	__u8     status;
 	bdaddr_t bdaddr;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_EV_REMOTE_HOST_FEATURES	0x3d
 struct hci_ev_remote_host_features {
 	bdaddr_t bdaddr;
 	__u8     features[8];
-} __attribute__ ((packed));
+} __packed;
 
 /* Internal events generated by Bluetooth stack */
 #define HCI_EV_STACK_INTERNAL	0xfd
 struct hci_ev_stack_internal {
 	__u16    type;
 	__u8     data[0];
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_EV_SI_DEVICE	0x01
 struct hci_ev_si_device {
 	__u16    event;
 	__u16    dev_id;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_EV_SI_SECURITY	0x02
 struct hci_ev_si_security {
@@ -864,7 +864,7 @@ struct hci_ev_si_security {
 	__u16    proto;
 	__u16    subproto;
 	__u8     incoming;
-} __attribute__ ((packed));
+} __packed;
 
 /* ---- HCI Packet structures ---- */
 #define HCI_COMMAND_HDR_SIZE 3
@@ -875,22 +875,22 @@ struct hci_ev_si_security {
 struct hci_command_hdr {
 	__le16	opcode;		/* OCF & OGF */
 	__u8 	plen;
-} __attribute__ ((packed));
+} __packed;
 
 struct hci_event_hdr {
 	__u8	evt;
 	__u8	plen;
-} __attribute__ ((packed));
+} __packed;
 
 struct hci_acl_hdr {
 	__le16	handle;		/* Handle & Flags(PB, BC) */
 	__le16	dlen;
-} __attribute__ ((packed));
+} __packed;
 
 struct hci_sco_hdr {
 	__le16	handle;
 	__u8	dlen;
-} __attribute__ ((packed));
+} __packed;
 
 #ifdef __KERNEL__
 #include <linux/skbuff.h>
diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index 4b86c783976..636724b203e 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -131,31 +131,31 @@ struct l2cap_conninfo {
 struct l2cap_hdr {
 	__le16     len;
 	__le16     cid;
-} __attribute__ ((packed));
+} __packed;
 #define L2CAP_HDR_SIZE		4
 
 struct l2cap_cmd_hdr {
 	__u8       code;
 	__u8       ident;
 	__le16     len;
-} __attribute__ ((packed));
+} __packed;
 #define L2CAP_CMD_HDR_SIZE	4
 
 struct l2cap_cmd_rej {
 	__le16     reason;
-} __attribute__ ((packed));
+} __packed;
 
 struct l2cap_conn_req {
 	__le16     psm;
 	__le16     scid;
-} __attribute__ ((packed));
+} __packed;
 
 struct l2cap_conn_rsp {
 	__le16     dcid;
 	__le16     scid;
 	__le16     result;
 	__le16     status;
-} __attribute__ ((packed));
+} __packed;
 
 /* channel indentifier */
 #define L2CAP_CID_SIGNALING	0x0001
@@ -179,14 +179,14 @@ struct l2cap_conf_req {
 	__le16     dcid;
 	__le16     flags;
 	__u8       data[0];
-} __attribute__ ((packed));
+} __packed;
 
 struct l2cap_conf_rsp {
 	__le16     scid;
 	__le16     flags;
 	__le16     result;
 	__u8       data[0];
-} __attribute__ ((packed));
+} __packed;
 
 #define L2CAP_CONF_SUCCESS	0x0000
 #define L2CAP_CONF_UNACCEPT	0x0001
@@ -197,7 +197,7 @@ struct l2cap_conf_opt {
 	__u8       type;
 	__u8       len;
 	__u8       val[0];
-} __attribute__ ((packed));
+} __packed;
 #define L2CAP_CONF_OPT_SIZE	2
 
 #define L2CAP_CONF_HINT		0x80
@@ -218,7 +218,7 @@ struct l2cap_conf_rfc {
 	__le16     retrans_timeout;
 	__le16     monitor_timeout;
 	__le16     max_pdu_size;
-} __attribute__ ((packed));
+} __packed;
 
 #define L2CAP_MODE_BASIC	0x00
 #define L2CAP_MODE_RETRANS	0x01
@@ -229,22 +229,22 @@ struct l2cap_conf_rfc {
 struct l2cap_disconn_req {
 	__le16     dcid;
 	__le16     scid;
-} __attribute__ ((packed));
+} __packed;
 
 struct l2cap_disconn_rsp {
 	__le16     dcid;
 	__le16     scid;
-} __attribute__ ((packed));
+} __packed;
 
 struct l2cap_info_req {
 	__le16      type;
-} __attribute__ ((packed));
+} __packed;
 
 struct l2cap_info_rsp {
 	__le16      type;
 	__le16      result;
 	__u8        data[0];
-} __attribute__ ((packed));
+} __packed;
 
 /* info type */
 #define L2CAP_IT_CL_MTU     0x0001
diff --git a/include/net/bluetooth/rfcomm.h b/include/net/bluetooth/rfcomm.h
index 921d7b3c7f8..a140847d622 100644
--- a/include/net/bluetooth/rfcomm.h
+++ b/include/net/bluetooth/rfcomm.h
@@ -106,19 +106,19 @@ struct rfcomm_hdr {
 	u8 addr;
 	u8 ctrl;
 	u8 len;    // Actual size can be 2 bytes
-} __attribute__ ((packed));
+} __packed;
 
 struct rfcomm_cmd {
 	u8 addr;
 	u8 ctrl;
 	u8 len;
 	u8 fcs;
-} __attribute__ ((packed));
+} __packed;
 
 struct rfcomm_mcc {
 	u8 type;
 	u8 len;
-} __attribute__ ((packed));
+} __packed;
 
 struct rfcomm_pn {
 	u8  dlci;
@@ -128,7 +128,7 @@ struct rfcomm_pn {
 	__le16 mtu;
 	u8  max_retrans;
 	u8  credits;
-} __attribute__ ((packed));
+} __packed;
 
 struct rfcomm_rpn {
 	u8  dlci;
@@ -138,17 +138,17 @@ struct rfcomm_rpn {
 	u8  xon_char;
 	u8  xoff_char;
 	__le16 param_mask;
-} __attribute__ ((packed));
+} __packed;
 
 struct rfcomm_rls {
 	u8  dlci;
 	u8  status;
-} __attribute__ ((packed));
+} __packed;
 
 struct rfcomm_msc {
 	u8  dlci;
 	u8  v24_sig;
-} __attribute__ ((packed));
+} __packed;
 
 /* ---- Core structures, flags etc ---- */
 
-- 
cgit v1.2.3-70-g09d2


From 942875ffc102a6b9992120808e4990eda7a618f6 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Wed, 21 Jul 2010 10:59:57 +0000
Subject: irda: Use __packed annotation instead IRDA_PACKED macro

Remove IRDA_PACKED macro, which maps to __attribute__((packed)). IRDA is
one of the last users of __attribute__((packet)). Networking code uses
__packed now.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/irda/irda.h        |  4 ----
 include/net/irda/irlap_frame.h | 18 +++++++++---------
 2 files changed, 9 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/net/irda/irda.h b/include/net/irda/irda.h
index 7e582061b23..3bed61d379a 100644
--- a/include/net/irda/irda.h
+++ b/include/net/irda/irda.h
@@ -53,10 +53,6 @@ typedef __u32 magic_t;
 #ifndef IRDA_ALIGN
 #  define IRDA_ALIGN __attribute__((aligned))
 #endif
-#ifndef IRDA_PACK
-#  define IRDA_PACK __attribute__((packed))
-#endif
-
 
 #ifdef CONFIG_IRDA_DEBUG
 
diff --git a/include/net/irda/irlap_frame.h b/include/net/irda/irlap_frame.h
index 641f88e848b..6b1dc4f8eca 100644
--- a/include/net/irda/irlap_frame.h
+++ b/include/net/irda/irlap_frame.h
@@ -85,7 +85,7 @@ struct discovery_t;
 struct disc_frame {
 	__u8 caddr;          /* Connection address */
 	__u8 control;
-} IRDA_PACK;
+} __packed;
 
 struct xid_frame {
 	__u8  caddr; /* Connection address */
@@ -96,41 +96,41 @@ struct xid_frame {
 	__u8  flags; /* Discovery flags */
 	__u8  slotnr;
 	__u8  version;
-} IRDA_PACK;
+} __packed;
 
 struct test_frame {
 	__u8 caddr;          /* Connection address */
 	__u8 control;
 	__le32 saddr;         /* Source device address */
 	__le32 daddr;         /* Destination device address */
-} IRDA_PACK;
+} __packed;
 
 struct ua_frame {
 	__u8 caddr;
 	__u8 control;
 	__le32 saddr; /* Source device address */
 	__le32 daddr; /* Dest device address */
-} IRDA_PACK;
+} __packed;
 
 struct dm_frame {
 	__u8 caddr;          /* Connection address */
 	__u8 control;
-} IRDA_PACK;
+} __packed;
 
 struct rd_frame {
 	__u8 caddr;          /* Connection address */
 	__u8 control;
-} IRDA_PACK;
+} __packed;
 
 struct rr_frame {
 	__u8 caddr;          /* Connection address */
 	__u8 control;
-} IRDA_PACK;
+} __packed;
 
 struct i_frame {
 	__u8 caddr;
 	__u8 control;
-} IRDA_PACK;
+} __packed;
 
 struct snrm_frame {
 	__u8  caddr;
@@ -138,7 +138,7 @@ struct snrm_frame {
 	__le32 saddr;
 	__le32 daddr;
 	__u8  ncaddr;
-} IRDA_PACK;
+} __packed;
 
 void irlap_queue_xmit(struct irlap_cb *self, struct sk_buff *skb);
 void irlap_send_discovery_xid_frame(struct irlap_cb *, int S, __u8 s, 
-- 
cgit v1.2.3-70-g09d2


From 3f30fc1570626f11e8f3efe5ebd41fe96e847ed1 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Wed, 21 Jul 2010 10:59:58 +0000
Subject: net: remove last uses of __attribute__((packed))

Network code uses the __packed macro instead of __attribute__((packed)).

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/mac80211.h             | 2 +-
 net/ipv4/netfilter/ipt_CLUSTERIP.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 7f256e23c57..23e46cee06f 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -412,7 +412,7 @@ struct ieee80211_tx_rate {
 	s8 idx;
 	u8 count;
 	u8 flags;
-} __attribute__((packed));
+} __packed;
 
 /**
  * struct ieee80211_tx_info - skb transmit information
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 64d0875f519..3a43cf36db8 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -469,7 +469,7 @@ struct arp_payload {
 	__be32 src_ip;
 	u_int8_t dst_hw[ETH_ALEN];
 	__be32 dst_ip;
-} __attribute__ ((packed));
+} __packed;
 
 #ifdef DEBUG
 static void arp_print(struct arp_payload *payload)
-- 
cgit v1.2.3-70-g09d2


From edd63cb6b91024332d6983fc51058ac1ef0c081e Mon Sep 17 00:00:00 2001
From: Jason Wessel <jason.wessel@windriver.com>
Date: Wed, 21 Jul 2010 19:27:07 -0500
Subject: sysrq,kdb: Use __handle_sysrq() for kdb's sysrq function

The kdb code should not toggle the sysrq state in case an end user
wants to try and resume the normal kernel execution.

Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
Acked-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/char/sysrq.c        | 2 +-
 include/linux/sysrq.h       | 1 +
 kernel/debug/kdb/kdb_main.c | 3 +--
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c
index 5d64e3acb00..878ac0c2cc6 100644
--- a/drivers/char/sysrq.c
+++ b/drivers/char/sysrq.c
@@ -493,7 +493,7 @@ static void __sysrq_put_key_op(int key, struct sysrq_key_op *op_p)
                 sysrq_key_table[i] = op_p;
 }
 
-static void __handle_sysrq(int key, struct tty_struct *tty, int check_mask)
+void __handle_sysrq(int key, struct tty_struct *tty, int check_mask)
 {
 	struct sysrq_key_op *op_p;
 	int orig_log_level;
diff --git a/include/linux/sysrq.h b/include/linux/sysrq.h
index 4496322e28d..609e8ca5f53 100644
--- a/include/linux/sysrq.h
+++ b/include/linux/sysrq.h
@@ -45,6 +45,7 @@ struct sysrq_key_op {
  */
 
 void handle_sysrq(int key, struct tty_struct *tty);
+void __handle_sysrq(int key, struct tty_struct *tty, int check_mask);
 int register_sysrq_key(int key, struct sysrq_key_op *op);
 int unregister_sysrq_key(int key, struct sysrq_key_op *op);
 struct sysrq_key_op *__sysrq_get_key_op(int key);
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index 7e9bfd54a0d..ebe4a287419 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -1820,9 +1820,8 @@ static int kdb_sr(int argc, const char **argv)
 {
 	if (argc != 1)
 		return KDB_ARGCOUNT;
-	sysrq_toggle_support(1);
 	kdb_trap_printk++;
-	handle_sysrq(*argv[1], NULL);
+	__handle_sysrq(*argv[1], NULL, 0);
 	kdb_trap_printk--;
 
 	return 0;
-- 
cgit v1.2.3-70-g09d2


From 4c21adf26f8fcf86a755b9b9f55c2e9fd241e1fb Mon Sep 17 00:00:00 2001
From: Thomas Renninger <trenn@suse.de>
Date: Tue, 20 Jul 2010 16:59:34 -0700
Subject: x86 cpufreq, perf: Make trace_power_frequency cpufreq driver
 independent

and fix the broken case if a core's frequency depends on others.

trace_power_frequency was only implemented in a rather ungeneric
way in acpi-cpufreq driver's target() function only.

-> Move the call to trace_power_frequency to
   cpufreq.c:cpufreq_notify_transition() where CPUFREQ_POSTCHANGE
   notifier is triggered.
   This will support power frequency tracing by all cpufreq
   drivers.

trace_power_frequency did not trace frequency changes correctly
when the userspace governor was used or when CPU cores'
frequency depend on each other.

-> Moving this into the CPUFREQ_POSTCHANGE notifier and pass the cpu
   which gets switched automatically fixes this.

Robert Schoene provided some important fixes on top of my
initial quick shot version which are integrated in this patch:
- Forgot some changes in power_end trace (TP_printk/variable names)
- Variable dummy in power_end must now be cpu_id
- Use static 64 bit variable instead of unsigned int for cpu_id

[akpm@linux-foundation.org: build fix]
Signed-off-by: Thomas Renninger <trenn@suse.de>
Cc: davej@codemonkey.org.uk
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Cc: Dave Jones <davej@codemonkey.org.uk>
Acked-by: Arjan van de Ven <arjan@infradead.org>
Cc: Robert Schoene <robert.schoene@tu-dresden.de>
Tested-by: Robert Schoene <robert.schoene@tu-dresden.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c |  3 ---
 arch/x86/kernel/process.c                  |  8 ++++----
 drivers/cpufreq/cpufreq.c                  |  3 +++
 drivers/cpuidle/cpuidle.c                  |  2 +-
 drivers/idle/intel_idle.c                  |  2 +-
 include/trace/events/power.h               | 27 +++++++++++++++------------
 tools/perf/builtin-timechart.c             | 11 ++++++-----
 7 files changed, 30 insertions(+), 26 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index 1d3cddaa40e..cee5263927c 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -34,7 +34,6 @@
 #include <linux/compiler.h>
 #include <linux/dmi.h>
 #include <linux/slab.h>
-#include <trace/events/power.h>
 
 #include <linux/acpi.h>
 #include <linux/io.h>
@@ -324,8 +323,6 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
 		}
 	}
 
-	trace_power_frequency(POWER_PSTATE, data->freq_table[next_state].frequency);
-
 	switch (data->cpu_feature) {
 	case SYSTEM_INTEL_MSR_CAPABLE:
 		cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index e7e35219b32..787572d43d9 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -371,7 +371,7 @@ static inline int hlt_use_halt(void)
 void default_idle(void)
 {
 	if (hlt_use_halt()) {
-		trace_power_start(POWER_CSTATE, 1);
+		trace_power_start(POWER_CSTATE, 1, smp_processor_id());
 		current_thread_info()->status &= ~TS_POLLING;
 		/*
 		 * TS_POLLING-cleared state must be visible before we
@@ -441,7 +441,7 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait);
  */
 void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
 {
-	trace_power_start(POWER_CSTATE, (ax>>4)+1);
+	trace_power_start(POWER_CSTATE, (ax>>4)+1, smp_processor_id());
 	if (!need_resched()) {
 		if (cpu_has(&current_cpu_data, X86_FEATURE_CLFLUSH_MONITOR))
 			clflush((void *)&current_thread_info()->flags);
@@ -457,7 +457,7 @@ void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
 static void mwait_idle(void)
 {
 	if (!need_resched()) {
-		trace_power_start(POWER_CSTATE, 1);
+		trace_power_start(POWER_CSTATE, 1, smp_processor_id());
 		if (cpu_has(&current_cpu_data, X86_FEATURE_CLFLUSH_MONITOR))
 			clflush((void *)&current_thread_info()->flags);
 
@@ -478,7 +478,7 @@ static void mwait_idle(void)
  */
 static void poll_idle(void)
 {
-	trace_power_start(POWER_CSTATE, 0);
+	trace_power_start(POWER_CSTATE, 0, smp_processor_id());
 	local_irq_enable();
 	while (!need_resched())
 		cpu_relax();
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 063b2184caf..4ed665725cc 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -29,6 +29,8 @@
 #include <linux/completion.h>
 #include <linux/mutex.h>
 
+#include <trace/events/power.h>
+
 #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_CORE, \
 						"cpufreq-core", msg)
 
@@ -354,6 +356,7 @@ void cpufreq_notify_transition(struct cpufreq_freqs *freqs, unsigned int state)
 
 	case CPUFREQ_POSTCHANGE:
 		adjust_jiffies(CPUFREQ_POSTCHANGE, freqs);
+                trace_power_frequency(POWER_PSTATE, freqs->new, freqs->cpu);
 		srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
 				CPUFREQ_POSTCHANGE, freqs);
 		if (likely(policy) && likely(policy->cpu == freqs->cpu))
diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index 199488576a0..dbefe15bd58 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -95,7 +95,7 @@ static void cpuidle_idle_call(void)
 	/* give the governor an opportunity to reflect on the outcome */
 	if (cpuidle_curr_governor->reflect)
 		cpuidle_curr_governor->reflect(dev);
-	trace_power_end(0);
+	trace_power_end(smp_processor_id());
 }
 
 /**
diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index 54f0fb4cd5d..03d202b1ff2 100755
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -231,7 +231,7 @@ static int intel_idle(struct cpuidle_device *dev, struct cpuidle_state *state)
 
 	stop_critical_timings();
 #ifndef MODULE
-	trace_power_start(POWER_CSTATE, (eax >> 4) + 1);
+	trace_power_start(POWER_CSTATE, (eax >> 4) + 1, cpu);
 #endif
 	if (!need_resched()) {
 
diff --git a/include/trace/events/power.h b/include/trace/events/power.h
index c4efe9b8280..35a2a6e7bf1 100644
--- a/include/trace/events/power.h
+++ b/include/trace/events/power.h
@@ -18,52 +18,55 @@ enum {
 
 DECLARE_EVENT_CLASS(power,
 
-	TP_PROTO(unsigned int type, unsigned int state),
+	TP_PROTO(unsigned int type, unsigned int state, unsigned int cpu_id),
 
-	TP_ARGS(type, state),
+	TP_ARGS(type, state, cpu_id),
 
 	TP_STRUCT__entry(
 		__field(	u64,		type		)
 		__field(	u64,		state		)
+		__field(	u64,		cpu_id		)
 	),
 
 	TP_fast_assign(
 		__entry->type = type;
 		__entry->state = state;
+		__entry->cpu_id = cpu_id;
 	),
 
-	TP_printk("type=%lu state=%lu", (unsigned long)__entry->type, (unsigned long)__entry->state)
+	TP_printk("type=%lu state=%lu cpu_id=%lu", (unsigned long)__entry->type,
+		(unsigned long)__entry->state, (unsigned long)__entry->cpu_id)
 );
 
 DEFINE_EVENT(power, power_start,
 
-	TP_PROTO(unsigned int type, unsigned int state),
+	TP_PROTO(unsigned int type, unsigned int state, unsigned int cpu_id),
 
-	TP_ARGS(type, state)
+	TP_ARGS(type, state, cpu_id)
 );
 
 DEFINE_EVENT(power, power_frequency,
 
-	TP_PROTO(unsigned int type, unsigned int state),
+	TP_PROTO(unsigned int type, unsigned int state, unsigned int cpu_id),
 
-	TP_ARGS(type, state)
+	TP_ARGS(type, state, cpu_id)
 );
 
 TRACE_EVENT(power_end,
 
-	TP_PROTO(int dummy),
+	TP_PROTO(unsigned int cpu_id),
 
-	TP_ARGS(dummy),
+	TP_ARGS(cpu_id),
 
 	TP_STRUCT__entry(
-		__field(	u64,		dummy		)
+		__field(	u64,		cpu_id		)
 	),
 
 	TP_fast_assign(
-		__entry->dummy = 0xffff;
+		__entry->cpu_id = cpu_id;
 	),
 
-	TP_printk("dummy=%lu", (unsigned long)__entry->dummy)
+	TP_printk("cpu_id=%lu", (unsigned long)__entry->cpu_id)
 
 );
 
diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c
index 5a52ed9fc10..5161619d471 100644
--- a/tools/perf/builtin-timechart.c
+++ b/tools/perf/builtin-timechart.c
@@ -300,8 +300,9 @@ struct trace_entry {
 
 struct power_entry {
 	struct trace_entry te;
-	s64	type;
-	s64	value;
+	u64	type;
+	u64	value;
+	u64	cpu_id;
 };
 
 #define TASK_COMM_LEN 16
@@ -498,13 +499,13 @@ static int process_sample_event(event_t *event, struct perf_session *session)
 			return 0;
 
 		if (strcmp(event_str, "power:power_start") == 0)
-			c_state_start(data.cpu, data.time, pe->value);
+			c_state_start(pe->cpu_id, data.time, pe->value);
 
 		if (strcmp(event_str, "power:power_end") == 0)
-			c_state_end(data.cpu, data.time);
+			c_state_end(pe->cpu_id, data.time);
 
 		if (strcmp(event_str, "power:power_frequency") == 0)
-			p_state_change(data.cpu, data.time, pe->value);
+			p_state_change(pe->cpu_id, data.time, pe->value);
 
 		if (strcmp(event_str, "sched:sched_wakeup") == 0)
 			sched_wakeup(data.cpu, data.time, data.pid, te);
-- 
cgit v1.2.3-70-g09d2


From e955cead031177b083fbf18d04a03c06e330a439 Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Wed, 29 Jul 2009 10:20:10 +0200
Subject: CAN: Add Flexcan CAN controller driver

This core is found on some Freescale SoCs and also some Coldfire
SoCs. Support for Coldfire is missing though at the moment as
they have an older revision of the core which does not have RX FIFO
support.

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
Acked-by: Wolfgang Grandegger <wg@grandegger.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/Kconfig              |    9 +
 drivers/net/can/Makefile             |    1 +
 drivers/net/can/flexcan.c            | 1030 ++++++++++++++++++++++++++++++++++
 include/linux/can/platform/flexcan.h |   20 +
 4 files changed, 1060 insertions(+)
 create mode 100644 drivers/net/can/flexcan.c
 create mode 100644 include/linux/can/platform/flexcan.h

(limited to 'include')

diff --git a/drivers/net/can/Kconfig b/drivers/net/can/Kconfig
index 2c5227c02fa..9d9e4539443 100644
--- a/drivers/net/can/Kconfig
+++ b/drivers/net/can/Kconfig
@@ -73,6 +73,15 @@ config CAN_JANZ_ICAN3
 	  This driver can also be built as a module. If so, the module will be
 	  called janz-ican3.ko.
 
+config HAVE_CAN_FLEXCAN
+	bool
+
+config CAN_FLEXCAN
+	tristate "Support for Freescale FLEXCAN based chips"
+	depends on CAN_DEV && HAVE_CAN_FLEXCAN
+	---help---
+	  Say Y here if you want to support for Freescale FlexCAN.
+
 source "drivers/net/can/mscan/Kconfig"
 
 source "drivers/net/can/sja1000/Kconfig"
diff --git a/drivers/net/can/Makefile b/drivers/net/can/Makefile
index 9047cd066fe..00575373bbd 100644
--- a/drivers/net/can/Makefile
+++ b/drivers/net/can/Makefile
@@ -16,5 +16,6 @@ obj-$(CONFIG_CAN_TI_HECC)	+= ti_hecc.o
 obj-$(CONFIG_CAN_MCP251X)	+= mcp251x.o
 obj-$(CONFIG_CAN_BFIN)		+= bfin_can.o
 obj-$(CONFIG_CAN_JANZ_ICAN3)	+= janz-ican3.o
+obj-$(CONFIG_CAN_FLEXCAN)	+= flexcan.o
 
 ccflags-$(CONFIG_CAN_DEBUG_DEVICES) := -DDEBUG
diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c
new file mode 100644
index 00000000000..ef443a090ba
--- /dev/null
+++ b/drivers/net/can/flexcan.c
@@ -0,0 +1,1030 @@
+/*
+ * flexcan.c - FLEXCAN CAN controller driver
+ *
+ * Copyright (c) 2005-2006 Varma Electronics Oy
+ * Copyright (c) 2009 Sascha Hauer, Pengutronix
+ * Copyright (c) 2010 Marc Kleine-Budde, Pengutronix
+ *
+ * Based on code originally by Andrey Volkov <avolkov@varma-el.com>
+ *
+ * LICENCE:
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation version 2.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/netdevice.h>
+#include <linux/can.h>
+#include <linux/can/dev.h>
+#include <linux/can/error.h>
+#include <linux/can/platform/flexcan.h>
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/if_arp.h>
+#include <linux/if_ether.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+
+#include <mach/clock.h>
+
+#define DRV_NAME			"flexcan"
+
+/* 8 for RX fifo and 2 error handling */
+#define FLEXCAN_NAPI_WEIGHT		(8 + 2)
+
+/* FLEXCAN module configuration register (CANMCR) bits */
+#define FLEXCAN_MCR_MDIS		BIT(31)
+#define FLEXCAN_MCR_FRZ			BIT(30)
+#define FLEXCAN_MCR_FEN			BIT(29)
+#define FLEXCAN_MCR_HALT		BIT(28)
+#define FLEXCAN_MCR_NOT_RDY		BIT(27)
+#define FLEXCAN_MCR_WAK_MSK		BIT(26)
+#define FLEXCAN_MCR_SOFTRST		BIT(25)
+#define FLEXCAN_MCR_FRZ_ACK		BIT(24)
+#define FLEXCAN_MCR_SUPV		BIT(23)
+#define FLEXCAN_MCR_SLF_WAK		BIT(22)
+#define FLEXCAN_MCR_WRN_EN		BIT(21)
+#define FLEXCAN_MCR_LPM_ACK		BIT(20)
+#define FLEXCAN_MCR_WAK_SRC		BIT(19)
+#define FLEXCAN_MCR_DOZE		BIT(18)
+#define FLEXCAN_MCR_SRX_DIS		BIT(17)
+#define FLEXCAN_MCR_BCC			BIT(16)
+#define FLEXCAN_MCR_LPRIO_EN		BIT(13)
+#define FLEXCAN_MCR_AEN			BIT(12)
+#define FLEXCAN_MCR_MAXMB(x)		((x) & 0xf)
+#define FLEXCAN_MCR_IDAM_A		(0 << 8)
+#define FLEXCAN_MCR_IDAM_B		(1 << 8)
+#define FLEXCAN_MCR_IDAM_C		(2 << 8)
+#define FLEXCAN_MCR_IDAM_D		(3 << 8)
+
+/* FLEXCAN control register (CANCTRL) bits */
+#define FLEXCAN_CTRL_PRESDIV(x)		(((x) & 0xff) << 24)
+#define FLEXCAN_CTRL_RJW(x)		(((x) & 0x03) << 22)
+#define FLEXCAN_CTRL_PSEG1(x)		(((x) & 0x07) << 19)
+#define FLEXCAN_CTRL_PSEG2(x)		(((x) & 0x07) << 16)
+#define FLEXCAN_CTRL_BOFF_MSK		BIT(15)
+#define FLEXCAN_CTRL_ERR_MSK		BIT(14)
+#define FLEXCAN_CTRL_CLK_SRC		BIT(13)
+#define FLEXCAN_CTRL_LPB		BIT(12)
+#define FLEXCAN_CTRL_TWRN_MSK		BIT(11)
+#define FLEXCAN_CTRL_RWRN_MSK		BIT(10)
+#define FLEXCAN_CTRL_SMP		BIT(7)
+#define FLEXCAN_CTRL_BOFF_REC		BIT(6)
+#define FLEXCAN_CTRL_TSYN		BIT(5)
+#define FLEXCAN_CTRL_LBUF		BIT(4)
+#define FLEXCAN_CTRL_LOM		BIT(3)
+#define FLEXCAN_CTRL_PROPSEG(x)		((x) & 0x07)
+#define FLEXCAN_CTRL_ERR_BUS		(FLEXCAN_CTRL_ERR_MSK)
+#define FLEXCAN_CTRL_ERR_STATE \
+	(FLEXCAN_CTRL_TWRN_MSK | FLEXCAN_CTRL_RWRN_MSK | \
+	 FLEXCAN_CTRL_BOFF_MSK)
+#define FLEXCAN_CTRL_ERR_ALL \
+	(FLEXCAN_CTRL_ERR_BUS | FLEXCAN_CTRL_ERR_STATE)
+
+/* FLEXCAN error and status register (ESR) bits */
+#define FLEXCAN_ESR_TWRN_INT		BIT(17)
+#define FLEXCAN_ESR_RWRN_INT		BIT(16)
+#define FLEXCAN_ESR_BIT1_ERR		BIT(15)
+#define FLEXCAN_ESR_BIT0_ERR		BIT(14)
+#define FLEXCAN_ESR_ACK_ERR		BIT(13)
+#define FLEXCAN_ESR_CRC_ERR		BIT(12)
+#define FLEXCAN_ESR_FRM_ERR		BIT(11)
+#define FLEXCAN_ESR_STF_ERR		BIT(10)
+#define FLEXCAN_ESR_TX_WRN		BIT(9)
+#define FLEXCAN_ESR_RX_WRN		BIT(8)
+#define FLEXCAN_ESR_IDLE		BIT(7)
+#define FLEXCAN_ESR_TXRX		BIT(6)
+#define FLEXCAN_EST_FLT_CONF_SHIFT	(4)
+#define FLEXCAN_ESR_FLT_CONF_MASK	(0x3 << FLEXCAN_EST_FLT_CONF_SHIFT)
+#define FLEXCAN_ESR_FLT_CONF_ACTIVE	(0x0 << FLEXCAN_EST_FLT_CONF_SHIFT)
+#define FLEXCAN_ESR_FLT_CONF_PASSIVE	(0x1 << FLEXCAN_EST_FLT_CONF_SHIFT)
+#define FLEXCAN_ESR_BOFF_INT		BIT(2)
+#define FLEXCAN_ESR_ERR_INT		BIT(1)
+#define FLEXCAN_ESR_WAK_INT		BIT(0)
+#define FLEXCAN_ESR_ERR_BUS \
+	(FLEXCAN_ESR_BIT1_ERR | FLEXCAN_ESR_BIT0_ERR | \
+	 FLEXCAN_ESR_ACK_ERR | FLEXCAN_ESR_CRC_ERR | \
+	 FLEXCAN_ESR_FRM_ERR | FLEXCAN_ESR_STF_ERR)
+#define FLEXCAN_ESR_ERR_STATE \
+	(FLEXCAN_ESR_TWRN_INT | FLEXCAN_ESR_RWRN_INT | FLEXCAN_ESR_BOFF_INT)
+#define FLEXCAN_ESR_ERR_ALL \
+	(FLEXCAN_ESR_ERR_BUS | FLEXCAN_ESR_ERR_STATE)
+
+/* FLEXCAN interrupt flag register (IFLAG) bits */
+#define FLEXCAN_TX_BUF_ID		8
+#define FLEXCAN_IFLAG_BUF(x)		BIT(x)
+#define FLEXCAN_IFLAG_RX_FIFO_OVERFLOW	BIT(7)
+#define FLEXCAN_IFLAG_RX_FIFO_WARN	BIT(6)
+#define FLEXCAN_IFLAG_RX_FIFO_AVAILABLE	BIT(5)
+#define FLEXCAN_IFLAG_DEFAULT \
+	(FLEXCAN_IFLAG_RX_FIFO_OVERFLOW | FLEXCAN_IFLAG_RX_FIFO_AVAILABLE | \
+	 FLEXCAN_IFLAG_BUF(FLEXCAN_TX_BUF_ID))
+
+/* FLEXCAN message buffers */
+#define FLEXCAN_MB_CNT_CODE(x)		(((x) & 0xf) << 24)
+#define FLEXCAN_MB_CNT_SRR		BIT(22)
+#define FLEXCAN_MB_CNT_IDE		BIT(21)
+#define FLEXCAN_MB_CNT_RTR		BIT(20)
+#define FLEXCAN_MB_CNT_LENGTH(x)	(((x) & 0xf) << 16)
+#define FLEXCAN_MB_CNT_TIMESTAMP(x)	((x) & 0xffff)
+
+#define FLEXCAN_MB_CODE_MASK		(0xf0ffffff)
+
+/* Structure of the message buffer */
+struct flexcan_mb {
+	u32 can_ctrl;
+	u32 can_id;
+	u32 data[2];
+};
+
+/* Structure of the hardware registers */
+struct flexcan_regs {
+	u32 mcr;		/* 0x00 */
+	u32 ctrl;		/* 0x04 */
+	u32 timer;		/* 0x08 */
+	u32 _reserved1;		/* 0x0c */
+	u32 rxgmask;		/* 0x10 */
+	u32 rx14mask;		/* 0x14 */
+	u32 rx15mask;		/* 0x18 */
+	u32 ecr;		/* 0x1c */
+	u32 esr;		/* 0x20 */
+	u32 imask2;		/* 0x24 */
+	u32 imask1;		/* 0x28 */
+	u32 iflag2;		/* 0x2c */
+	u32 iflag1;		/* 0x30 */
+	u32 _reserved2[19];
+	struct flexcan_mb cantxfg[64];
+};
+
+struct flexcan_priv {
+	struct can_priv can;
+	struct net_device *dev;
+	struct napi_struct napi;
+
+	void __iomem *base;
+	u32 reg_esr;
+	u32 reg_ctrl_default;
+
+	struct clk *clk;
+	struct flexcan_platform_data *pdata;
+};
+
+static struct can_bittiming_const flexcan_bittiming_const = {
+	.name = DRV_NAME,
+	.tseg1_min = 4,
+	.tseg1_max = 16,
+	.tseg2_min = 2,
+	.tseg2_max = 8,
+	.sjw_max = 4,
+	.brp_min = 1,
+	.brp_max = 256,
+	.brp_inc = 1,
+};
+
+/*
+ * Swtich transceiver on or off
+ */
+static void flexcan_transceiver_switch(const struct flexcan_priv *priv, int on)
+{
+	if (priv->pdata && priv->pdata->transceiver_switch)
+		priv->pdata->transceiver_switch(on);
+}
+
+static inline int flexcan_has_and_handle_berr(const struct flexcan_priv *priv,
+					      u32 reg_esr)
+{
+	return (priv->can.ctrlmode & CAN_CTRLMODE_BERR_REPORTING) &&
+		(reg_esr & FLEXCAN_ESR_ERR_BUS);
+}
+
+static inline void flexcan_chip_enable(struct flexcan_priv *priv)
+{
+	struct flexcan_regs __iomem *regs = priv->base;
+	u32 reg;
+
+	reg = readl(&regs->mcr);
+	reg &= ~FLEXCAN_MCR_MDIS;
+	writel(reg, &regs->mcr);
+
+	udelay(10);
+}
+
+static inline void flexcan_chip_disable(struct flexcan_priv *priv)
+{
+	struct flexcan_regs __iomem *regs = priv->base;
+	u32 reg;
+
+	reg = readl(&regs->mcr);
+	reg |= FLEXCAN_MCR_MDIS;
+	writel(reg, &regs->mcr);
+}
+
+static int flexcan_get_berr_counter(const struct net_device *dev,
+				    struct can_berr_counter *bec)
+{
+	const struct flexcan_priv *priv = netdev_priv(dev);
+	struct flexcan_regs __iomem *regs = priv->base;
+	u32 reg = readl(&regs->ecr);
+
+	bec->txerr = (reg >> 0) & 0xff;
+	bec->rxerr = (reg >> 8) & 0xff;
+
+	return 0;
+}
+
+static int flexcan_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	const struct flexcan_priv *priv = netdev_priv(dev);
+	struct net_device_stats *stats = &dev->stats;
+	struct flexcan_regs __iomem *regs = priv->base;
+	struct can_frame *cf = (struct can_frame *)skb->data;
+	u32 can_id;
+	u32 ctrl = FLEXCAN_MB_CNT_CODE(0xc) | (cf->can_dlc << 16);
+
+	if (can_dropped_invalid_skb(dev, skb))
+		return NETDEV_TX_OK;
+
+	netif_stop_queue(dev);
+
+	if (cf->can_id & CAN_EFF_FLAG) {
+		can_id = cf->can_id & CAN_EFF_MASK;
+		ctrl |= FLEXCAN_MB_CNT_IDE | FLEXCAN_MB_CNT_SRR;
+	} else {
+		can_id = (cf->can_id & CAN_SFF_MASK) << 18;
+	}
+
+	if (cf->can_id & CAN_RTR_FLAG)
+		ctrl |= FLEXCAN_MB_CNT_RTR;
+
+	if (cf->can_dlc > 0) {
+		u32 data = be32_to_cpup((__be32 *)&cf->data[0]);
+		writel(data, &regs->cantxfg[FLEXCAN_TX_BUF_ID].data[0]);
+	}
+	if (cf->can_dlc > 3) {
+		u32 data = be32_to_cpup((__be32 *)&cf->data[4]);
+		writel(data, &regs->cantxfg[FLEXCAN_TX_BUF_ID].data[1]);
+	}
+
+	writel(can_id, &regs->cantxfg[FLEXCAN_TX_BUF_ID].can_id);
+	writel(ctrl, &regs->cantxfg[FLEXCAN_TX_BUF_ID].can_ctrl);
+
+	kfree_skb(skb);
+
+	/* tx_packets is incremented in flexcan_irq */
+	stats->tx_bytes += cf->can_dlc;
+
+	return NETDEV_TX_OK;
+}
+
+static void do_bus_err(struct net_device *dev,
+		       struct can_frame *cf, u32 reg_esr)
+{
+	struct flexcan_priv *priv = netdev_priv(dev);
+	int rx_errors = 0, tx_errors = 0;
+
+	cf->can_id |= CAN_ERR_PROT | CAN_ERR_BUSERROR;
+
+	if (reg_esr & FLEXCAN_ESR_BIT1_ERR) {
+		dev_dbg(dev->dev.parent, "BIT1_ERR irq\n");
+		cf->data[2] |= CAN_ERR_PROT_BIT1;
+		tx_errors = 1;
+	}
+	if (reg_esr & FLEXCAN_ESR_BIT0_ERR) {
+		dev_dbg(dev->dev.parent, "BIT0_ERR irq\n");
+		cf->data[2] |= CAN_ERR_PROT_BIT0;
+		tx_errors = 1;
+	}
+	if (reg_esr & FLEXCAN_ESR_ACK_ERR) {
+		dev_dbg(dev->dev.parent, "ACK_ERR irq\n");
+		cf->can_id |= CAN_ERR_ACK;
+		cf->data[3] |= CAN_ERR_PROT_LOC_ACK;
+		tx_errors = 1;
+	}
+	if (reg_esr & FLEXCAN_ESR_CRC_ERR) {
+		dev_dbg(dev->dev.parent, "CRC_ERR irq\n");
+		cf->data[2] |= CAN_ERR_PROT_BIT;
+		cf->data[3] |= CAN_ERR_PROT_LOC_CRC_SEQ;
+		rx_errors = 1;
+	}
+	if (reg_esr & FLEXCAN_ESR_FRM_ERR) {
+		dev_dbg(dev->dev.parent, "FRM_ERR irq\n");
+		cf->data[2] |= CAN_ERR_PROT_FORM;
+		rx_errors = 1;
+	}
+	if (reg_esr & FLEXCAN_ESR_STF_ERR) {
+		dev_dbg(dev->dev.parent, "STF_ERR irq\n");
+		cf->data[2] |= CAN_ERR_PROT_STUFF;
+		rx_errors = 1;
+	}
+
+	priv->can.can_stats.bus_error++;
+	if (rx_errors)
+		dev->stats.rx_errors++;
+	if (tx_errors)
+		dev->stats.tx_errors++;
+}
+
+static int flexcan_poll_bus_err(struct net_device *dev, u32 reg_esr)
+{
+	struct sk_buff *skb;
+	struct can_frame *cf;
+
+	skb = alloc_can_err_skb(dev, &cf);
+	if (unlikely(!skb))
+		return 0;
+
+	do_bus_err(dev, cf, reg_esr);
+	netif_receive_skb(skb);
+
+	dev->stats.rx_packets++;
+	dev->stats.rx_bytes += cf->can_dlc;
+
+	return 1;
+}
+
+static void do_state(struct net_device *dev,
+		     struct can_frame *cf, enum can_state new_state)
+{
+	struct flexcan_priv *priv = netdev_priv(dev);
+	struct can_berr_counter bec;
+
+	flexcan_get_berr_counter(dev, &bec);
+
+	switch (priv->can.state) {
+	case CAN_STATE_ERROR_ACTIVE:
+		/*
+		 * from: ERROR_ACTIVE
+		 * to  : ERROR_WARNING, ERROR_PASSIVE, BUS_OFF
+		 * =>  : there was a warning int
+		 */
+		if (new_state >= CAN_STATE_ERROR_WARNING &&
+		    new_state <= CAN_STATE_BUS_OFF) {
+			dev_dbg(dev->dev.parent, "Error Warning IRQ\n");
+			priv->can.can_stats.error_warning++;
+
+			cf->can_id |= CAN_ERR_CRTL;
+			cf->data[1] = (bec.txerr > bec.rxerr) ?
+				CAN_ERR_CRTL_TX_WARNING :
+				CAN_ERR_CRTL_RX_WARNING;
+		}
+	case CAN_STATE_ERROR_WARNING:	/* fallthrough */
+		/*
+		 * from: ERROR_ACTIVE, ERROR_WARNING
+		 * to  : ERROR_PASSIVE, BUS_OFF
+		 * =>  : error passive int
+		 */
+		if (new_state >= CAN_STATE_ERROR_PASSIVE &&
+		    new_state <= CAN_STATE_BUS_OFF) {
+			dev_dbg(dev->dev.parent, "Error Passive IRQ\n");
+			priv->can.can_stats.error_passive++;
+
+			cf->can_id |= CAN_ERR_CRTL;
+			cf->data[1] = (bec.txerr > bec.rxerr) ?
+				CAN_ERR_CRTL_TX_PASSIVE :
+				CAN_ERR_CRTL_RX_PASSIVE;
+		}
+		break;
+	case CAN_STATE_BUS_OFF:
+		dev_err(dev->dev.parent,
+			"BUG! hardware recovered automatically from BUS_OFF\n");
+		break;
+	default:
+		break;
+	}
+
+	/* process state changes depending on the new state */
+	switch (new_state) {
+	case CAN_STATE_ERROR_ACTIVE:
+		dev_dbg(dev->dev.parent, "Error Active\n");
+		cf->can_id |= CAN_ERR_PROT;
+		cf->data[2] = CAN_ERR_PROT_ACTIVE;
+		break;
+	case CAN_STATE_BUS_OFF:
+		cf->can_id |= CAN_ERR_BUSOFF;
+		can_bus_off(dev);
+		break;
+	default:
+		break;
+	}
+}
+
+static int flexcan_poll_state(struct net_device *dev, u32 reg_esr)
+{
+	struct flexcan_priv *priv = netdev_priv(dev);
+	struct sk_buff *skb;
+	struct can_frame *cf;
+	enum can_state new_state;
+	int flt;
+
+	flt = reg_esr & FLEXCAN_ESR_FLT_CONF_MASK;
+	if (likely(flt == FLEXCAN_ESR_FLT_CONF_ACTIVE)) {
+		if (likely(!(reg_esr & (FLEXCAN_ESR_TX_WRN |
+					FLEXCAN_ESR_RX_WRN))))
+			new_state = CAN_STATE_ERROR_ACTIVE;
+		else
+			new_state = CAN_STATE_ERROR_WARNING;
+	} else if (unlikely(flt == FLEXCAN_ESR_FLT_CONF_PASSIVE))
+		new_state = CAN_STATE_ERROR_PASSIVE;
+	else
+		new_state = CAN_STATE_BUS_OFF;
+
+	/* state hasn't changed */
+	if (likely(new_state == priv->can.state))
+		return 0;
+
+	skb = alloc_can_err_skb(dev, &cf);
+	if (unlikely(!skb))
+		return 0;
+
+	do_state(dev, cf, new_state);
+	priv->can.state = new_state;
+	netif_receive_skb(skb);
+
+	dev->stats.rx_packets++;
+	dev->stats.rx_bytes += cf->can_dlc;
+
+	return 1;
+}
+
+static void flexcan_read_fifo(const struct net_device *dev,
+			      struct can_frame *cf)
+{
+	const struct flexcan_priv *priv = netdev_priv(dev);
+	struct flexcan_regs __iomem *regs = priv->base;
+	struct flexcan_mb __iomem *mb = &regs->cantxfg[0];
+	u32 reg_ctrl, reg_id;
+
+	reg_ctrl = readl(&mb->can_ctrl);
+	reg_id = readl(&mb->can_id);
+	if (reg_ctrl & FLEXCAN_MB_CNT_IDE)
+		cf->can_id = ((reg_id >> 0) & CAN_EFF_MASK) | CAN_EFF_FLAG;
+	else
+		cf->can_id = (reg_id >> 18) & CAN_SFF_MASK;
+
+	if (reg_ctrl & FLEXCAN_MB_CNT_RTR)
+		cf->can_id |= CAN_RTR_FLAG;
+	cf->can_dlc = get_can_dlc((reg_ctrl >> 16) & 0xf);
+
+	*(__be32 *)(cf->data + 0) = cpu_to_be32(readl(&mb->data[0]));
+	*(__be32 *)(cf->data + 4) = cpu_to_be32(readl(&mb->data[1]));
+
+	/* mark as read */
+	writel(FLEXCAN_IFLAG_RX_FIFO_AVAILABLE, &regs->iflag1);
+	readl(&regs->timer);
+}
+
+static int flexcan_read_frame(struct net_device *dev)
+{
+	struct net_device_stats *stats = &dev->stats;
+	struct can_frame *cf;
+	struct sk_buff *skb;
+
+	skb = alloc_can_skb(dev, &cf);
+	if (unlikely(!skb)) {
+		stats->rx_dropped++;
+		return 0;
+	}
+
+	flexcan_read_fifo(dev, cf);
+	netif_receive_skb(skb);
+
+	stats->rx_packets++;
+	stats->rx_bytes += cf->can_dlc;
+
+	return 1;
+}
+
+static int flexcan_poll(struct napi_struct *napi, int quota)
+{
+	struct net_device *dev = napi->dev;
+	const struct flexcan_priv *priv = netdev_priv(dev);
+	struct flexcan_regs __iomem *regs = priv->base;
+	u32 reg_iflag1, reg_esr;
+	int work_done = 0;
+
+	/*
+	 * The error bits are cleared on read,
+	 * use saved value from irq handler.
+	 */
+	reg_esr = readl(&regs->esr) | priv->reg_esr;
+
+	/* handle state changes */
+	work_done += flexcan_poll_state(dev, reg_esr);
+
+	/* handle RX-FIFO */
+	reg_iflag1 = readl(&regs->iflag1);
+	while (reg_iflag1 & FLEXCAN_IFLAG_RX_FIFO_AVAILABLE &&
+	       work_done < quota) {
+		work_done += flexcan_read_frame(dev);
+		reg_iflag1 = readl(&regs->iflag1);
+	}
+
+	/* report bus errors */
+	if (flexcan_has_and_handle_berr(priv, reg_esr) && work_done < quota)
+		work_done += flexcan_poll_bus_err(dev, reg_esr);
+
+	if (work_done < quota) {
+		napi_complete(napi);
+		/* enable IRQs */
+		writel(FLEXCAN_IFLAG_DEFAULT, &regs->imask1);
+		writel(priv->reg_ctrl_default, &regs->ctrl);
+	}
+
+	return work_done;
+}
+
+static irqreturn_t flexcan_irq(int irq, void *dev_id)
+{
+	struct net_device *dev = dev_id;
+	struct net_device_stats *stats = &dev->stats;
+	struct flexcan_priv *priv = netdev_priv(dev);
+	struct flexcan_regs __iomem *regs = priv->base;
+	u32 reg_iflag1, reg_esr;
+
+	reg_iflag1 = readl(&regs->iflag1);
+	reg_esr = readl(&regs->esr);
+	writel(FLEXCAN_ESR_ERR_INT, &regs->esr);	/* ACK err IRQ */
+
+	/*
+	 * schedule NAPI in case of:
+	 * - rx IRQ
+	 * - state change IRQ
+	 * - bus error IRQ and bus error reporting is activated
+	 */
+	if ((reg_iflag1 & FLEXCAN_IFLAG_RX_FIFO_AVAILABLE) ||
+	    (reg_esr & FLEXCAN_ESR_ERR_STATE) ||
+	    flexcan_has_and_handle_berr(priv, reg_esr)) {
+		/*
+		 * The error bits are cleared on read,
+		 * save them for later use.
+		 */
+		priv->reg_esr = reg_esr & FLEXCAN_ESR_ERR_BUS;
+		writel(FLEXCAN_IFLAG_DEFAULT & ~FLEXCAN_IFLAG_RX_FIFO_AVAILABLE,
+		       &regs->imask1);
+		writel(priv->reg_ctrl_default & ~FLEXCAN_CTRL_ERR_ALL,
+		       &regs->ctrl);
+		napi_schedule(&priv->napi);
+	}
+
+	/* FIFO overflow */
+	if (reg_iflag1 & FLEXCAN_IFLAG_RX_FIFO_OVERFLOW) {
+		writel(FLEXCAN_IFLAG_RX_FIFO_OVERFLOW, &regs->iflag1);
+		dev->stats.rx_over_errors++;
+		dev->stats.rx_errors++;
+	}
+
+	/* transmission complete interrupt */
+	if (reg_iflag1 & (1 << FLEXCAN_TX_BUF_ID)) {
+		/* tx_bytes is incremented in flexcan_start_xmit */
+		stats->tx_packets++;
+		writel((1 << FLEXCAN_TX_BUF_ID), &regs->iflag1);
+		netif_wake_queue(dev);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static void flexcan_set_bittiming(struct net_device *dev)
+{
+	const struct flexcan_priv *priv = netdev_priv(dev);
+	const struct can_bittiming *bt = &priv->can.bittiming;
+	struct flexcan_regs __iomem *regs = priv->base;
+	u32 reg;
+
+	reg = readl(&regs->ctrl);
+	reg &= ~(FLEXCAN_CTRL_PRESDIV(0xff) |
+		 FLEXCAN_CTRL_RJW(0x3) |
+		 FLEXCAN_CTRL_PSEG1(0x7) |
+		 FLEXCAN_CTRL_PSEG2(0x7) |
+		 FLEXCAN_CTRL_PROPSEG(0x7) |
+		 FLEXCAN_CTRL_LPB |
+		 FLEXCAN_CTRL_SMP |
+		 FLEXCAN_CTRL_LOM);
+
+	reg |= FLEXCAN_CTRL_PRESDIV(bt->brp - 1) |
+		FLEXCAN_CTRL_PSEG1(bt->phase_seg1 - 1) |
+		FLEXCAN_CTRL_PSEG2(bt->phase_seg2 - 1) |
+		FLEXCAN_CTRL_RJW(bt->sjw - 1) |
+		FLEXCAN_CTRL_PROPSEG(bt->prop_seg - 1);
+
+	if (priv->can.ctrlmode & CAN_CTRLMODE_LOOPBACK)
+		reg |= FLEXCAN_CTRL_LPB;
+	if (priv->can.ctrlmode & CAN_CTRLMODE_LISTENONLY)
+		reg |= FLEXCAN_CTRL_LOM;
+	if (priv->can.ctrlmode & CAN_CTRLMODE_3_SAMPLES)
+		reg |= FLEXCAN_CTRL_SMP;
+
+	dev_info(dev->dev.parent, "writing ctrl=0x%08x\n", reg);
+	writel(reg, &regs->ctrl);
+
+	/* print chip status */
+	dev_dbg(dev->dev.parent, "%s: mcr=0x%08x ctrl=0x%08x\n", __func__,
+		readl(&regs->mcr), readl(&regs->ctrl));
+}
+
+/*
+ * flexcan_chip_start
+ *
+ * this functions is entered with clocks enabled
+ *
+ */
+static int flexcan_chip_start(struct net_device *dev)
+{
+	struct flexcan_priv *priv = netdev_priv(dev);
+	struct flexcan_regs __iomem *regs = priv->base;
+	unsigned int i;
+	int err;
+	u32 reg_mcr, reg_ctrl;
+
+	/* enable module */
+	flexcan_chip_enable(priv);
+
+	/* soft reset */
+	writel(FLEXCAN_MCR_SOFTRST, &regs->mcr);
+	udelay(10);
+
+	reg_mcr = readl(&regs->mcr);
+	if (reg_mcr & FLEXCAN_MCR_SOFTRST) {
+		dev_err(dev->dev.parent,
+			"Failed to softreset can module (mcr=0x%08x)\n",
+			reg_mcr);
+		err = -ENODEV;
+		goto out;
+	}
+
+	flexcan_set_bittiming(dev);
+
+	/*
+	 * MCR
+	 *
+	 * enable freeze
+	 * enable fifo
+	 * halt now
+	 * only supervisor access
+	 * enable warning int
+	 * choose format C
+	 *
+	 */
+	reg_mcr = readl(&regs->mcr);
+	reg_mcr |= FLEXCAN_MCR_FRZ | FLEXCAN_MCR_FEN | FLEXCAN_MCR_HALT |
+		FLEXCAN_MCR_SUPV | FLEXCAN_MCR_WRN_EN |
+		FLEXCAN_MCR_IDAM_C;
+	dev_dbg(dev->dev.parent, "%s: writing mcr=0x%08x", __func__, reg_mcr);
+	writel(reg_mcr, &regs->mcr);
+
+	/*
+	 * CTRL
+	 *
+	 * disable timer sync feature
+	 *
+	 * disable auto busoff recovery
+	 * transmit lowest buffer first
+	 *
+	 * enable tx and rx warning interrupt
+	 * enable bus off interrupt
+	 * (== FLEXCAN_CTRL_ERR_STATE)
+	 *
+	 * _note_: we enable the "error interrupt"
+	 * (FLEXCAN_CTRL_ERR_MSK), too. Otherwise we don't get any
+	 * warning or bus passive interrupts.
+	 */
+	reg_ctrl = readl(&regs->ctrl);
+	reg_ctrl &= ~FLEXCAN_CTRL_TSYN;
+	reg_ctrl |= FLEXCAN_CTRL_BOFF_REC | FLEXCAN_CTRL_LBUF |
+		FLEXCAN_CTRL_ERR_STATE | FLEXCAN_CTRL_ERR_MSK;
+
+	/* save for later use */
+	priv->reg_ctrl_default = reg_ctrl;
+	dev_dbg(dev->dev.parent, "%s: writing ctrl=0x%08x", __func__, reg_ctrl);
+	writel(reg_ctrl, &regs->ctrl);
+
+	for (i = 0; i < ARRAY_SIZE(regs->cantxfg); i++) {
+		writel(0, &regs->cantxfg[i].can_ctrl);
+		writel(0, &regs->cantxfg[i].can_id);
+		writel(0, &regs->cantxfg[i].data[0]);
+		writel(0, &regs->cantxfg[i].data[1]);
+
+		/* put MB into rx queue */
+		writel(FLEXCAN_MB_CNT_CODE(0x4), &regs->cantxfg[i].can_ctrl);
+	}
+
+	/* acceptance mask/acceptance code (accept everything) */
+	writel(0x0, &regs->rxgmask);
+	writel(0x0, &regs->rx14mask);
+	writel(0x0, &regs->rx15mask);
+
+	flexcan_transceiver_switch(priv, 1);
+
+	/* synchronize with the can bus */
+	reg_mcr = readl(&regs->mcr);
+	reg_mcr &= ~FLEXCAN_MCR_HALT;
+	writel(reg_mcr, &regs->mcr);
+
+	priv->can.state = CAN_STATE_ERROR_ACTIVE;
+
+	/* enable FIFO interrupts */
+	writel(FLEXCAN_IFLAG_DEFAULT, &regs->imask1);
+
+	/* print chip status */
+	dev_dbg(dev->dev.parent, "%s: reading mcr=0x%08x ctrl=0x%08x\n",
+		__func__, readl(&regs->mcr), readl(&regs->ctrl));
+
+	return 0;
+
+ out:
+	flexcan_chip_disable(priv);
+	return err;
+}
+
+/*
+ * flexcan_chip_stop
+ *
+ * this functions is entered with clocks enabled
+ *
+ */
+static void flexcan_chip_stop(struct net_device *dev)
+{
+	struct flexcan_priv *priv = netdev_priv(dev);
+	struct flexcan_regs __iomem *regs = priv->base;
+	u32 reg;
+
+	/* Disable all interrupts */
+	writel(0, &regs->imask1);
+
+	/* Disable + halt module */
+	reg = readl(&regs->mcr);
+	reg |= FLEXCAN_MCR_MDIS | FLEXCAN_MCR_HALT;
+	writel(reg, &regs->mcr);
+
+	flexcan_transceiver_switch(priv, 0);
+	priv->can.state = CAN_STATE_STOPPED;
+
+	return;
+}
+
+static int flexcan_open(struct net_device *dev)
+{
+	struct flexcan_priv *priv = netdev_priv(dev);
+	int err;
+
+	clk_enable(priv->clk);
+
+	err = open_candev(dev);
+	if (err)
+		goto out;
+
+	err = request_irq(dev->irq, flexcan_irq, IRQF_SHARED, dev->name, dev);
+	if (err)
+		goto out_close;
+
+	/* start chip and queuing */
+	err = flexcan_chip_start(dev);
+	if (err)
+		goto out_close;
+	napi_enable(&priv->napi);
+	netif_start_queue(dev);
+
+	return 0;
+
+ out_close:
+	close_candev(dev);
+ out:
+	clk_disable(priv->clk);
+
+	return err;
+}
+
+static int flexcan_close(struct net_device *dev)
+{
+	struct flexcan_priv *priv = netdev_priv(dev);
+
+	netif_stop_queue(dev);
+	napi_disable(&priv->napi);
+	flexcan_chip_stop(dev);
+
+	free_irq(dev->irq, dev);
+	clk_disable(priv->clk);
+
+	close_candev(dev);
+
+	return 0;
+}
+
+static int flexcan_set_mode(struct net_device *dev, enum can_mode mode)
+{
+	int err;
+
+	switch (mode) {
+	case CAN_MODE_START:
+		err = flexcan_chip_start(dev);
+		if (err)
+			return err;
+
+		netif_wake_queue(dev);
+		break;
+
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+static const struct net_device_ops flexcan_netdev_ops = {
+	.ndo_open	= flexcan_open,
+	.ndo_stop	= flexcan_close,
+	.ndo_start_xmit	= flexcan_start_xmit,
+};
+
+static int __devinit register_flexcandev(struct net_device *dev)
+{
+	struct flexcan_priv *priv = netdev_priv(dev);
+	struct flexcan_regs __iomem *regs = priv->base;
+	u32 reg, err;
+
+	clk_enable(priv->clk);
+
+	/* select "bus clock", chip must be disabled */
+	flexcan_chip_disable(priv);
+	reg = readl(&regs->ctrl);
+	reg |= FLEXCAN_CTRL_CLK_SRC;
+	writel(reg, &regs->ctrl);
+
+	flexcan_chip_enable(priv);
+
+	/* set freeze, halt and activate FIFO, restrict register access */
+	reg = readl(&regs->mcr);
+	reg |= FLEXCAN_MCR_FRZ | FLEXCAN_MCR_HALT |
+		FLEXCAN_MCR_FEN | FLEXCAN_MCR_SUPV;
+	writel(reg, &regs->mcr);
+
+	/*
+	 * Currently we only support newer versions of this core
+	 * featuring a RX FIFO. Older cores found on some Coldfire
+	 * derivates are not yet supported.
+	 */
+	reg = readl(&regs->mcr);
+	if (!(reg & FLEXCAN_MCR_FEN)) {
+		dev_err(dev->dev.parent,
+			"Could not enable RX FIFO, unsupported core\n");
+		err = -ENODEV;
+		goto out;
+	}
+
+	err = register_candev(dev);
+
+ out:
+	/* disable core and turn off clocks */
+	flexcan_chip_disable(priv);
+	clk_disable(priv->clk);
+
+	return err;
+}
+
+static void __devexit unregister_flexcandev(struct net_device *dev)
+{
+	unregister_candev(dev);
+}
+
+static int __devinit flexcan_probe(struct platform_device *pdev)
+{
+	struct net_device *dev;
+	struct flexcan_priv *priv;
+	struct resource *mem;
+	struct clk *clk;
+	void __iomem *base;
+	resource_size_t mem_size;
+	int err, irq;
+
+	clk = clk_get(&pdev->dev, NULL);
+	if (IS_ERR(clk)) {
+		dev_err(&pdev->dev, "no clock defined\n");
+		err = PTR_ERR(clk);
+		goto failed_clock;
+	}
+
+	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	irq = platform_get_irq(pdev, 0);
+	if (!mem || irq <= 0) {
+		err = -ENODEV;
+		goto failed_get;
+	}
+
+	mem_size = resource_size(mem);
+	if (!request_mem_region(mem->start, mem_size, pdev->name)) {
+		err = -EBUSY;
+		goto failed_req;
+	}
+
+	base = ioremap(mem->start, mem_size);
+	if (!base) {
+		err = -ENOMEM;
+		goto failed_map;
+	}
+
+	dev = alloc_candev(sizeof(struct flexcan_priv), 0);
+	if (!dev) {
+		err = -ENOMEM;
+		goto failed_alloc;
+	}
+
+	dev->netdev_ops = &flexcan_netdev_ops;
+	dev->irq = irq;
+	dev->flags |= IFF_ECHO; /* we support local echo in hardware */
+
+	priv = netdev_priv(dev);
+	priv->can.clock.freq = clk_get_rate(clk);
+	priv->can.bittiming_const = &flexcan_bittiming_const;
+	priv->can.do_set_mode = flexcan_set_mode;
+	priv->can.do_get_berr_counter = flexcan_get_berr_counter;
+	priv->can.ctrlmode_supported = CAN_CTRLMODE_LOOPBACK |
+		CAN_CTRLMODE_LISTENONLY	| CAN_CTRLMODE_3_SAMPLES |
+		CAN_CTRLMODE_BERR_REPORTING;
+	priv->base = base;
+	priv->dev = dev;
+	priv->clk = clk;
+	priv->pdata = pdev->dev.platform_data;
+
+	netif_napi_add(dev, &priv->napi, flexcan_poll, FLEXCAN_NAPI_WEIGHT);
+
+	dev_set_drvdata(&pdev->dev, dev);
+	SET_NETDEV_DEV(dev, &pdev->dev);
+
+	err = register_flexcandev(dev);
+	if (err) {
+		dev_err(&pdev->dev, "registering netdev failed\n");
+		goto failed_register;
+	}
+
+	dev_info(&pdev->dev, "device registered (reg_base=%p, irq=%d)\n",
+		 priv->base, dev->irq);
+
+	return 0;
+
+ failed_register:
+	free_candev(dev);
+ failed_alloc:
+	iounmap(base);
+ failed_map:
+	release_mem_region(mem->start, mem_size);
+ failed_req:
+	clk_put(clk);
+ failed_get:
+ failed_clock:
+	return err;
+}
+
+static int __devexit flexcan_remove(struct platform_device *pdev)
+{
+	struct net_device *dev = platform_get_drvdata(pdev);
+	struct flexcan_priv *priv = netdev_priv(dev);
+	struct resource *mem;
+
+	unregister_flexcandev(dev);
+	platform_set_drvdata(pdev, NULL);
+	free_candev(dev);
+	iounmap(priv->base);
+
+	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	release_mem_region(mem->start, resource_size(mem));
+
+	clk_put(priv->clk);
+
+	return 0;
+}
+
+static struct platform_driver flexcan_driver = {
+	.driver.name = DRV_NAME,
+	.probe = flexcan_probe,
+	.remove = __devexit_p(flexcan_remove),
+};
+
+static int __init flexcan_init(void)
+{
+	pr_info("%s netdevice driver\n", DRV_NAME);
+	return platform_driver_register(&flexcan_driver);
+}
+
+static void __exit flexcan_exit(void)
+{
+	platform_driver_unregister(&flexcan_driver);
+	pr_info("%s: driver removed\n", DRV_NAME);
+}
+
+module_init(flexcan_init);
+module_exit(flexcan_exit);
+
+MODULE_AUTHOR("Sascha Hauer <kernel@pengutronix.de>, "
+	      "Marc Kleine-Budde <kernel@pengutronix.de>");
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("CAN port driver for flexcan based chip");
diff --git a/include/linux/can/platform/flexcan.h b/include/linux/can/platform/flexcan.h
new file mode 100644
index 00000000000..72b713ab57e
--- /dev/null
+++ b/include/linux/can/platform/flexcan.h
@@ -0,0 +1,20 @@
+/*
+ * Copyright (C) 2010 Marc Kleine-Budde <kernel@pengutronix.de>
+ *
+ * This file is released under the GPLv2
+ *
+ */
+
+#ifndef __CAN_PLATFORM_FLEXCAN_H
+#define __CAN_PLATFORM_FLEXCAN_H
+
+/**
+ * struct flexcan_platform_data - flex CAN controller platform data
+ * @transceiver_enable:         - called to power on/off the transceiver
+ *
+ */
+struct flexcan_platform_data {
+	void (*transceiver_switch)(int enable);
+};
+
+#endif /* __CAN_PLATFORM_FLEXCAN_H */
-- 
cgit v1.2.3-70-g09d2


From 8a35747a5d13b99e076b0222729e0caa48cb69b6 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Wed, 21 Jul 2010 21:44:31 +0000
Subject: macvtap: Limit packet queue length

Mark Wagner reported OOM symptoms when sending UDP traffic over
a macvtap link to a kvm receiver.

This appears to be caused by the fact that macvtap packet queues
are unlimited in length.  This means that if the receiver can't
keep up with the rate of flow, then we will hit OOM. Of course
it gets worse if the OOM killer then decides to kill the receiver.

This patch imposes a cap on the packet queue length, in the same
way as the tuntap driver, using the device TX queue length.

Please note that macvtap currently has no way of giving congestion
notification, that means the software device TX queue cannot be
used and packets will always be dropped once the macvtap driver
queue fills up.

This shouldn't be a great problem for the scenario where macvtap
is used to feed a kvm receiver, as the traffic is most likely
external in origin so congestion notification can't be applied
anyway.

Of course, if anybody decides to complain about guest-to-guest
UDP packet loss down the track, then we may have to revisit this.

Incidentally, this patch also fixes a real memory leak when
macvtap_get_queue fails.

Chris Wright noticed that for this patch to work, we need a
non-zero TX queue length.  This patch includes his work to change
the default macvtap TX queue length to 500.

Reported-by: Mark Wagner <mwagner@redhat.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Acked-by: Chris Wright <chrisw@sous-sol.org>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/macvlan.c      | 10 ++++++++--
 drivers/net/macvtap.c      | 18 ++++++++++++++++--
 include/linux/if_macvlan.h |  2 ++
 3 files changed, 26 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 87e8d4cb405..f15fe2cf72a 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -499,7 +499,7 @@ static const struct net_device_ops macvlan_netdev_ops = {
 	.ndo_validate_addr	= eth_validate_addr,
 };
 
-static void macvlan_setup(struct net_device *dev)
+void macvlan_common_setup(struct net_device *dev)
 {
 	ether_setup(dev);
 
@@ -508,6 +508,12 @@ static void macvlan_setup(struct net_device *dev)
 	dev->destructor		= free_netdev;
 	dev->header_ops		= &macvlan_hard_header_ops,
 	dev->ethtool_ops	= &macvlan_ethtool_ops;
+}
+EXPORT_SYMBOL_GPL(macvlan_common_setup);
+
+static void macvlan_setup(struct net_device *dev)
+{
+	macvlan_common_setup(dev);
 	dev->tx_queue_len	= 0;
 }
 
@@ -705,7 +711,6 @@ int macvlan_link_register(struct rtnl_link_ops *ops)
 	/* common fields */
 	ops->priv_size		= sizeof(struct macvlan_dev);
 	ops->get_tx_queues	= macvlan_get_tx_queues;
-	ops->setup		= macvlan_setup;
 	ops->validate		= macvlan_validate;
 	ops->maxtype		= IFLA_MACVLAN_MAX;
 	ops->policy		= macvlan_policy;
@@ -719,6 +724,7 @@ EXPORT_SYMBOL_GPL(macvlan_link_register);
 
 static struct rtnl_link_ops macvlan_link_ops = {
 	.kind		= "macvlan",
+	.setup		= macvlan_setup,
 	.newlink	= macvlan_newlink,
 	.dellink	= macvlan_dellink,
 };
diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
index a8a94e2f6dd..ff02b836c3c 100644
--- a/drivers/net/macvtap.c
+++ b/drivers/net/macvtap.c
@@ -180,11 +180,18 @@ static int macvtap_forward(struct net_device *dev, struct sk_buff *skb)
 {
 	struct macvtap_queue *q = macvtap_get_queue(dev, skb);
 	if (!q)
-		return -ENOLINK;
+		goto drop;
+
+	if (skb_queue_len(&q->sk.sk_receive_queue) >= dev->tx_queue_len)
+		goto drop;
 
 	skb_queue_tail(&q->sk.sk_receive_queue, skb);
 	wake_up_interruptible_poll(sk_sleep(&q->sk), POLLIN | POLLRDNORM | POLLRDBAND);
-	return 0;
+	return NET_RX_SUCCESS;
+
+drop:
+	kfree_skb(skb);
+	return NET_RX_DROP;
 }
 
 /*
@@ -235,8 +242,15 @@ static void macvtap_dellink(struct net_device *dev,
 	macvlan_dellink(dev, head);
 }
 
+static void macvtap_setup(struct net_device *dev)
+{
+	macvlan_common_setup(dev);
+	dev->tx_queue_len = TUN_READQ_SIZE;
+}
+
 static struct rtnl_link_ops macvtap_link_ops __read_mostly = {
 	.kind		= "macvtap",
+	.setup		= macvtap_setup,
 	.newlink	= macvtap_newlink,
 	.dellink	= macvtap_dellink,
 };
diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h
index 9ea047aca79..1ffaeffeff7 100644
--- a/include/linux/if_macvlan.h
+++ b/include/linux/if_macvlan.h
@@ -67,6 +67,8 @@ static inline void macvlan_count_rx(const struct macvlan_dev *vlan,
 	}
 }
 
+extern void macvlan_common_setup(struct net_device *dev);
+
 extern int macvlan_common_newlink(struct net *src_net, struct net_device *dev,
 				  struct nlattr *tb[], struct nlattr *data[],
 				  int (*receive)(struct sk_buff *skb),
-- 
cgit v1.2.3-70-g09d2


From e120153ddf8620fd0a194d301e9c5a8b28483bb5 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 22 Jul 2010 14:14:25 +0200
Subject: workqueue: fix how cpu number is stored in work->data

Once a work starts execution, its data contains the cpu number it was
on instead of pointing to cwq.  This is added by commit 7a22ad75
(workqueue: carry cpu number in work data once execution starts) to
reliably determine the work was last on even if the workqueue itself
was destroyed inbetween.

Whether data points to a cwq or contains a cpu number was
distinguished by comparing the value against PAGE_OFFSET.  The
assumption was that a cpu number should be below PAGE_OFFSET while a
pointer to cwq should be above it.  However, on architectures which
use separate address spaces for user and kernel spaces, this doesn't
hold as PAGE_OFFSET is zero.

Fix it by using an explicit flag, WORK_STRUCT_CWQ, to mark what the
data field contains.  If the flag is set, it's pointing to a cwq;
otherwise, it contains a cpu number.

Reported on s390 and microblaze during linux-next testing.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Sachin Sant <sachinp@in.ibm.com>
Reported-by: Michal Simek <michal.simek@petalogix.com>
Reported-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Tested-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Tested-by: Michal Simek <monstr@monstr.eu>
---
 include/linux/workqueue.h | 14 ++++++++------
 kernel/workqueue.c        | 36 +++++++++++++-----------------------
 2 files changed, 21 insertions(+), 29 deletions(-)

(limited to 'include')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index d74a529ed13..5f76001c4e6 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -25,17 +25,19 @@ typedef void (*work_func_t)(struct work_struct *work);
 
 enum {
 	WORK_STRUCT_PENDING_BIT	= 0,	/* work item is pending execution */
-	WORK_STRUCT_LINKED_BIT	= 1,	/* next work is linked to this one */
+	WORK_STRUCT_CWQ_BIT	= 1,	/* data points to cwq */
+	WORK_STRUCT_LINKED_BIT	= 2,	/* next work is linked to this one */
 #ifdef CONFIG_DEBUG_OBJECTS_WORK
-	WORK_STRUCT_STATIC_BIT	= 2,	/* static initializer (debugobjects) */
-	WORK_STRUCT_COLOR_SHIFT	= 3,	/* color for workqueue flushing */
+	WORK_STRUCT_STATIC_BIT	= 3,	/* static initializer (debugobjects) */
+	WORK_STRUCT_COLOR_SHIFT	= 4,	/* color for workqueue flushing */
 #else
-	WORK_STRUCT_COLOR_SHIFT	= 2,	/* color for workqueue flushing */
+	WORK_STRUCT_COLOR_SHIFT	= 3,	/* color for workqueue flushing */
 #endif
 
 	WORK_STRUCT_COLOR_BITS	= 4,
 
 	WORK_STRUCT_PENDING	= 1 << WORK_STRUCT_PENDING_BIT,
+	WORK_STRUCT_CWQ		= 1 << WORK_STRUCT_CWQ_BIT,
 	WORK_STRUCT_LINKED	= 1 << WORK_STRUCT_LINKED_BIT,
 #ifdef CONFIG_DEBUG_OBJECTS_WORK
 	WORK_STRUCT_STATIC	= 1 << WORK_STRUCT_STATIC_BIT,
@@ -56,8 +58,8 @@ enum {
 	WORK_CPU_LAST		= WORK_CPU_NONE,
 
 	/*
-	 * Reserve 6 bits off of cwq pointer w/ debugobjects turned
-	 * off.  This makes cwqs aligned to 64 bytes which isn't too
+	 * Reserve 7 bits off of cwq pointer w/ debugobjects turned
+	 * off.  This makes cwqs aligned to 128 bytes which isn't too
 	 * excessive while allowing 15 workqueue flush colors.
 	 */
 	WORK_STRUCT_FLAG_BITS	= WORK_STRUCT_COLOR_SHIFT +
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index c11edc9c936..e5cb7faac58 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -468,10 +468,9 @@ static int work_next_color(int color)
 }
 
 /*
- * Work data points to the cwq while a work is on queue.  Once
- * execution starts, it points to the cpu the work was last on.  This
- * can be distinguished by comparing the data value against
- * PAGE_OFFSET.
+ * A work's data points to the cwq with WORK_STRUCT_CWQ set while the
+ * work is on queue.  Once execution starts, WORK_STRUCT_CWQ is
+ * cleared and the work data contains the cpu number it was last on.
  *
  * set_work_{cwq|cpu}() and clear_work_data() can be used to set the
  * cwq, cpu or clear work->data.  These functions should only be
@@ -494,7 +493,7 @@ static void set_work_cwq(struct work_struct *work,
 			 unsigned long extra_flags)
 {
 	set_work_data(work, (unsigned long)cwq,
-		      WORK_STRUCT_PENDING | extra_flags);
+		      WORK_STRUCT_PENDING | WORK_STRUCT_CWQ | extra_flags);
 }
 
 static void set_work_cpu(struct work_struct *work, unsigned int cpu)
@@ -507,25 +506,24 @@ static void clear_work_data(struct work_struct *work)
 	set_work_data(work, WORK_STRUCT_NO_CPU, 0);
 }
 
-static inline unsigned long get_work_data(struct work_struct *work)
-{
-	return atomic_long_read(&work->data) & WORK_STRUCT_WQ_DATA_MASK;
-}
-
 static struct cpu_workqueue_struct *get_work_cwq(struct work_struct *work)
 {
-	unsigned long data = get_work_data(work);
+	unsigned long data = atomic_long_read(&work->data);
 
-	return data >= PAGE_OFFSET ? (void *)data : NULL;
+	if (data & WORK_STRUCT_CWQ)
+		return (void *)(data & WORK_STRUCT_WQ_DATA_MASK);
+	else
+		return NULL;
 }
 
 static struct global_cwq *get_work_gcwq(struct work_struct *work)
 {
-	unsigned long data = get_work_data(work);
+	unsigned long data = atomic_long_read(&work->data);
 	unsigned int cpu;
 
-	if (data >= PAGE_OFFSET)
-		return ((struct cpu_workqueue_struct *)data)->gcwq;
+	if (data & WORK_STRUCT_CWQ)
+		return ((struct cpu_workqueue_struct *)
+			(data & WORK_STRUCT_WQ_DATA_MASK))->gcwq;
 
 	cpu = data >> WORK_STRUCT_FLAG_BITS;
 	if (cpu == WORK_CPU_NONE)
@@ -3501,14 +3499,6 @@ void __init init_workqueues(void)
 	unsigned int cpu;
 	int i;
 
-	/*
-	 * The pointer part of work->data is either pointing to the
-	 * cwq or contains the cpu number the work ran last on.  Make
-	 * sure cpu number won't overflow into kernel pointer area so
-	 * that they can be distinguished.
-	 */
-	BUILD_BUG_ON(WORK_CPU_LAST << WORK_STRUCT_FLAG_BITS >= PAGE_OFFSET);
-
 	hotcpu_notifier(workqueue_cpu_callback, CPU_PRI_WORKQUEUE);
 
 	/* initialize gcwqs */
-- 
cgit v1.2.3-70-g09d2


From 963bfeeeec913d135c15dc400f2f86cb62655d81 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 20 Jul 2010 22:03:14 +0000
Subject: net: RTA_MARK addition

Add a new rt attribute, RTA_MARK, and use it in
rt_fill_info()/inet_rtm_getroute() to support following commands :

ip route get 192.168.20.110 mark NUMBER
ip route get 192.168.20.108 from 192.168.20.110 iif eth1 mark NUMBER
ip route list cache [192.168.20.110] mark NUMBER

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rtnetlink.h | 1 +
 net/ipv4/route.c          | 7 +++++++
 2 files changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index fbc8cb0d48c..58d44491880 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -282,6 +282,7 @@ enum rtattr_type_t {
 	RTA_SESSION, /* no longer used */
 	RTA_MP_ALGO, /* no longer used */
 	RTA_TABLE,
+	RTA_MARK,
 	__RTA_MAX
 };
 
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 562ce92de2a..3f56b6e6c6a 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2878,6 +2878,9 @@ static int rt_fill_info(struct net *net,
 	if (rtnetlink_put_metrics(skb, rt->dst.metrics) < 0)
 		goto nla_put_failure;
 
+	if (rt->fl.mark)
+		NLA_PUT_BE32(skb, RTA_MARK, rt->fl.mark);
+
 	error = rt->dst.error;
 	expires = rt->dst.expires ? rt->dst.expires - jiffies : 0;
 	if (rt->peer) {
@@ -2933,6 +2936,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
 	__be32 src = 0;
 	u32 iif;
 	int err;
+	int mark;
 	struct sk_buff *skb;
 
 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy);
@@ -2960,6 +2964,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
 	src = tb[RTA_SRC] ? nla_get_be32(tb[RTA_SRC]) : 0;
 	dst = tb[RTA_DST] ? nla_get_be32(tb[RTA_DST]) : 0;
 	iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0;
+	mark = tb[RTA_MARK] ? nla_get_u32(tb[RTA_MARK]) : 0;
 
 	if (iif) {
 		struct net_device *dev;
@@ -2972,6 +2977,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
 
 		skb->protocol	= htons(ETH_P_IP);
 		skb->dev	= dev;
+		skb->mark	= mark;
 		local_bh_disable();
 		err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev);
 		local_bh_enable();
@@ -2989,6 +2995,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
 				},
 			},
 			.oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0,
+			.mark = mark,
 		};
 		err = ip_route_output_key(net, &rt, &fl);
 	}
-- 
cgit v1.2.3-70-g09d2


From 718be4aaf3613cf7c2d097f925abc3d3553c0605 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Thu, 22 Jul 2010 16:54:27 -0400
Subject: ACPI: skip checking BM_STS if the BIOS doesn't ask for it

It turns out that there is a bit in the _CST for Intel FFH C3
that tells the OS if we should be checking BM_STS or not.

Linux has been unconditionally checking BM_STS.
If the chip-set is configured to enable BM_STS,
it can retard or completely prevent entry into
deep C-states -- as illustrated by turbostat:

http://userweb.kernel.org/~lenb/acpi/utils/pmtools/turbostat/

ref: Intel Processor Vendor-Specific ACPI Interface Specification
table 4 "_CST FFH GAS Field Encoding"
Bit 1: Set to 1 if OSPM should use Bus Master avoidance for this C-state

https://bugzilla.kernel.org/show_bug.cgi?id=15886

Signed-off-by: Len Brown <len.brown@intel.com>
---
 arch/x86/kernel/acpi/cstate.c | 9 +++++++++
 drivers/acpi/processor_idle.c | 2 +-
 include/acpi/processor.h      | 3 ++-
 3 files changed, 12 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c
index 2e837f5080f..fb7a5f052e2 100644
--- a/arch/x86/kernel/acpi/cstate.c
+++ b/arch/x86/kernel/acpi/cstate.c
@@ -145,6 +145,15 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu,
 		percpu_entry->states[cx->index].eax = cx->address;
 		percpu_entry->states[cx->index].ecx = MWAIT_ECX_INTERRUPT_BREAK;
 	}
+
+	/*
+	 * For _CST FFH on Intel, if GAS.access_size bit 1 is cleared,
+	 * then we should skip checking BM_STS for this C-state.
+	 * ref: "Intel Processor Vendor-Specific ACPI Interface Specification"
+	 */
+	if ((c->x86_vendor == X86_VENDOR_INTEL) && !(reg->access_size & 0x2))
+		cx->bm_sts_skip = 1;
+
 	return retval;
 }
 EXPORT_SYMBOL_GPL(acpi_processor_ffh_cstate_probe);
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index b1b385692f4..b351342f1fa 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -947,7 +947,7 @@ static int acpi_idle_enter_bm(struct cpuidle_device *dev,
 	if (acpi_idle_suspend)
 		return(acpi_idle_enter_c1(dev, state));
 
-	if (acpi_idle_bm_check()) {
+	if (!cx->bm_sts_skip && acpi_idle_bm_check()) {
 		if (dev->safe_state) {
 			dev->last_state = dev->safe_state;
 			return dev->safe_state->enter(dev, dev->safe_state);
diff --git a/include/acpi/processor.h b/include/acpi/processor.h
index da565a48240..a68ca8a11a5 100644
--- a/include/acpi/processor.h
+++ b/include/acpi/processor.h
@@ -48,7 +48,7 @@ struct acpi_power_register {
 	u8 space_id;
 	u8 bit_width;
 	u8 bit_offset;
-	u8 reserved;
+	u8 access_size;
 	u64 address;
 } __attribute__ ((packed));
 
@@ -63,6 +63,7 @@ struct acpi_processor_cx {
 	u32 power;
 	u32 usage;
 	u64 time;
+	u8 bm_sts_skip;
 	char desc[ACPI_CX_DESC_LEN];
 };
 
-- 
cgit v1.2.3-70-g09d2


From 8b8edefa2fffbff97f9eec8b70e78ae23abad1a0 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 20 Jul 2010 22:09:01 +0200
Subject: fscache: convert object to use workqueue instead of slow-work

Make fscache object state transition callbacks use workqueue instead
of slow-work.  New dedicated unbound CPU workqueue fscache_object_wq
is created.  get/put callbacks are renamed and modified to take
@object and called directly from the enqueue wrapper and the work
function.  While at it, make all open coded instances of get/put to
use fscache_get/put_object().

* Unbound workqueue is used.

* work_busy() output is printed instead of slow-work flags in object
  debugging outputs.  They mean basically the same thing bit-for-bit.

* sysctl fscache.object_max_active added to control concurrency.  The
  default value is nr_cpus clamped between 4 and
  WQ_UNBOUND_MAX_ACTIVE.

* slow_work_sleep_till_thread_needed() is replaced with fscache
  private implementation fscache_object_sleep_till_congested() which
  waits on fscache_object_wq congestion.

* debugfs support is dropped for now.  Tracing API based debug
  facility is planned to be added.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: David Howells <dhowells@redhat.com>
---
 Documentation/filesystems/caching/fscache.txt |  10 +--
 fs/cachefiles/namei.c                         |  13 ++--
 fs/fscache/internal.h                         |   7 ++
 fs/fscache/main.c                             |  76 ++++++++++++++++++
 fs/fscache/object-list.c                      |  11 ++-
 fs/fscache/object.c                           | 106 +++++++++++++-------------
 include/linux/fscache-cache.h                 |   9 ++-
 7 files changed, 158 insertions(+), 74 deletions(-)

(limited to 'include')

diff --git a/Documentation/filesystems/caching/fscache.txt b/Documentation/filesystems/caching/fscache.txt
index a91e2e2095b..770267af5b3 100644
--- a/Documentation/filesystems/caching/fscache.txt
+++ b/Documentation/filesystems/caching/fscache.txt
@@ -343,8 +343,8 @@ This will look something like:
 	[root@andromeda ~]# head /proc/fs/fscache/objects
 	OBJECT   PARENT   STAT CHLDN OPS OOP IPR EX READS EM EV F S | NETFS_COOKIE_DEF TY FL NETFS_DATA       OBJECT_KEY, AUX_DATA
 	======== ======== ==== ===== === === === == ===== == == = = | ================ == == ================ ================
-	   17e4b        2 ACTV     0   0   0   0  0     0 7b  4 0 8 | NFS.fh           DT  0 ffff88001dd82820 010006017edcf8bbc93b43298fdfbe71e50b57b13a172c0117f38472, e567634700000000000000000000000063f2404a000000000000000000000000c9030000000000000000000063f2404a
-	   1693a        2 ACTV     0   0   0   0  0     0 7b  4 0 8 | NFS.fh           DT  0 ffff88002db23380 010006017edcf8bbc93b43298fdfbe71e50b57b1e0162c01a2df0ea6, 420ebc4a000000000000000000000000420ebc4a0000000000000000000000000e1801000000000000000000420ebc4a
+	   17e4b        2 ACTV     0   0   0   0  0     0 7b  4 0 0 | NFS.fh           DT  0 ffff88001dd82820 010006017edcf8bbc93b43298fdfbe71e50b57b13a172c0117f38472, e567634700000000000000000000000063f2404a000000000000000000000000c9030000000000000000000063f2404a
+	   1693a        2 ACTV     0   0   0   0  0     0 7b  4 0 0 | NFS.fh           DT  0 ffff88002db23380 010006017edcf8bbc93b43298fdfbe71e50b57b1e0162c01a2df0ea6, 420ebc4a000000000000000000000000420ebc4a0000000000000000000000000e1801000000000000000000420ebc4a
 
 where the first set of columns before the '|' describe the object:
 
@@ -362,7 +362,7 @@ where the first set of columns before the '|' describe the object:
 	EM	Object's event mask
 	EV	Events raised on this object
 	F	Object flags
-	S	Object slow-work work item flags
+	S	Object work item busy state mask (1:pending 2:running)
 
 and the second set of columns describe the object's cookie, if present:
 
@@ -395,8 +395,8 @@ and the following paired letters:
 	w	Show objects that don't have pending writes
 	R	Show objects that have outstanding reads
 	r	Show objects that don't have outstanding reads
-	S	Show objects that have slow work queued
-	s	Show objects that don't have slow work queued
+	S	Show objects that have work queued
+	s	Show objects that don't have work queued
 
 If neither side of a letter pair is given, then both are implied.  For example:
 
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index f4a7840bf42..42c7fafc8bf 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -37,9 +37,9 @@ void __cachefiles_printk_object(struct cachefiles_object *object,
 
 	printk(KERN_ERR "%sobject: OBJ%x\n",
 	       prefix, object->fscache.debug_id);
-	printk(KERN_ERR "%sobjstate=%s fl=%lx swfl=%lx ev=%lx[%lx]\n",
+	printk(KERN_ERR "%sobjstate=%s fl=%lx wbusy=%x ev=%lx[%lx]\n",
 	       prefix, fscache_object_states[object->fscache.state],
-	       object->fscache.flags, object->fscache.work.flags,
+	       object->fscache.flags, work_busy(&object->fscache.work),
 	       object->fscache.events,
 	       object->fscache.event_mask & FSCACHE_OBJECT_EVENTS_MASK);
 	printk(KERN_ERR "%sops=%u inp=%u exc=%u\n",
@@ -212,7 +212,7 @@ wait_for_old_object:
 
 		/* if the object we're waiting for is queued for processing,
 		 * then just put ourselves on the queue behind it */
-		if (slow_work_is_queued(&xobject->fscache.work)) {
+		if (work_pending(&xobject->fscache.work)) {
 			_debug("queue OBJ%x behind OBJ%x immediately",
 			       object->fscache.debug_id,
 			       xobject->fscache.debug_id);
@@ -220,8 +220,7 @@ wait_for_old_object:
 		}
 
 		/* otherwise we sleep until either the object we're waiting for
-		 * is done, or the slow-work facility wants the thread back to
-		 * do other work */
+		 * is done, or the fscache_object is congested */
 		wq = bit_waitqueue(&xobject->flags, CACHEFILES_OBJECT_ACTIVE);
 		init_wait(&wait);
 		requeue = false;
@@ -229,8 +228,8 @@ wait_for_old_object:
 			prepare_to_wait(wq, &wait, TASK_UNINTERRUPTIBLE);
 			if (!test_bit(CACHEFILES_OBJECT_ACTIVE, &xobject->flags))
 				break;
-			requeue = slow_work_sleep_till_thread_needed(
-				&object->fscache.work, &timeout);
+
+			requeue = fscache_object_sleep_till_congested(&timeout);
 		} while (timeout > 0 && !requeue);
 		finish_wait(wq, &wait);
 
diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h
index edd7434ab6e..6e0b5fb2523 100644
--- a/fs/fscache/internal.h
+++ b/fs/fscache/internal.h
@@ -82,6 +82,13 @@ extern unsigned fscache_defer_lookup;
 extern unsigned fscache_defer_create;
 extern unsigned fscache_debug;
 extern struct kobject *fscache_root;
+extern struct workqueue_struct *fscache_object_wq;
+DECLARE_PER_CPU(wait_queue_head_t, fscache_object_cong_wait);
+
+static inline bool fscache_object_congested(void)
+{
+	return workqueue_congested(WORK_CPU_UNBOUND, fscache_object_wq);
+}
 
 extern int fscache_wait_bit(void *);
 extern int fscache_wait_bit_interruptible(void *);
diff --git a/fs/fscache/main.c b/fs/fscache/main.c
index add6bdb53f0..bb8d4c35c7a 100644
--- a/fs/fscache/main.c
+++ b/fs/fscache/main.c
@@ -15,6 +15,7 @@
 #include <linux/sched.h>
 #include <linux/completion.h>
 #include <linux/slab.h>
+#include <linux/seq_file.h>
 #include "internal.h"
 
 MODULE_DESCRIPTION("FS Cache Manager");
@@ -40,22 +41,89 @@ MODULE_PARM_DESC(fscache_debug,
 		 "FS-Cache debugging mask");
 
 struct kobject *fscache_root;
+struct workqueue_struct *fscache_object_wq;
+
+DEFINE_PER_CPU(wait_queue_head_t, fscache_object_cong_wait);
+
+/* these values serve as lower bounds, will be adjusted in fscache_init() */
+static unsigned fscache_object_max_active = 4;
+
+#ifdef CONFIG_SYSCTL
+static struct ctl_table_header *fscache_sysctl_header;
+
+static int fscache_max_active_sysctl(struct ctl_table *table, int write,
+				     void __user *buffer,
+				     size_t *lenp, loff_t *ppos)
+{
+	struct workqueue_struct **wqp = table->extra1;
+	unsigned int *datap = table->data;
+	int ret;
+
+	ret = proc_dointvec(table, write, buffer, lenp, ppos);
+	if (ret == 0)
+		workqueue_set_max_active(*wqp, *datap);
+	return ret;
+}
+
+ctl_table fscache_sysctls[] = {
+	{
+		.procname	= "object_max_active",
+		.data		= &fscache_object_max_active,
+		.maxlen		= sizeof(unsigned),
+		.mode		= 0644,
+		.proc_handler	= fscache_max_active_sysctl,
+		.extra1		= &fscache_object_wq,
+	},
+	{}
+};
+
+ctl_table fscache_sysctls_root[] = {
+	{
+		.procname	= "fscache",
+		.mode		= 0555,
+		.child		= fscache_sysctls,
+	},
+	{}
+};
+#endif
 
 /*
  * initialise the fs caching module
  */
 static int __init fscache_init(void)
 {
+	unsigned int nr_cpus = num_possible_cpus();
+	unsigned int cpu;
 	int ret;
 
 	ret = slow_work_register_user(THIS_MODULE);
 	if (ret < 0)
 		goto error_slow_work;
 
+	fscache_object_max_active =
+		clamp_val(nr_cpus,
+			  fscache_object_max_active, WQ_UNBOUND_MAX_ACTIVE);
+
+	ret = -ENOMEM;
+	fscache_object_wq = alloc_workqueue("fscache_object", WQ_UNBOUND,
+					    fscache_object_max_active);
+	if (!fscache_object_wq)
+		goto error_object_wq;
+
+	for_each_possible_cpu(cpu)
+		init_waitqueue_head(&per_cpu(fscache_object_cong_wait, cpu));
+
 	ret = fscache_proc_init();
 	if (ret < 0)
 		goto error_proc;
 
+#ifdef CONFIG_SYSCTL
+	ret = -ENOMEM;
+	fscache_sysctl_header = register_sysctl_table(fscache_sysctls_root);
+	if (!fscache_sysctl_header)
+		goto error_sysctl;
+#endif
+
 	fscache_cookie_jar = kmem_cache_create("fscache_cookie_jar",
 					       sizeof(struct fscache_cookie),
 					       0,
@@ -78,8 +146,14 @@ static int __init fscache_init(void)
 error_kobj:
 	kmem_cache_destroy(fscache_cookie_jar);
 error_cookie_jar:
+#ifdef CONFIG_SYSCTL
+	unregister_sysctl_table(fscache_sysctl_header);
+error_sysctl:
+#endif
 	fscache_proc_cleanup();
 error_proc:
+	destroy_workqueue(fscache_object_wq);
+error_object_wq:
 	slow_work_unregister_user(THIS_MODULE);
 error_slow_work:
 	return ret;
@@ -96,7 +170,9 @@ static void __exit fscache_exit(void)
 
 	kobject_put(fscache_root);
 	kmem_cache_destroy(fscache_cookie_jar);
+	unregister_sysctl_table(fscache_sysctl_header);
 	fscache_proc_cleanup();
+	destroy_workqueue(fscache_object_wq);
 	slow_work_unregister_user(THIS_MODULE);
 	printk(KERN_NOTICE "FS-Cache: Unloaded\n");
 }
diff --git a/fs/fscache/object-list.c b/fs/fscache/object-list.c
index 4a8eb31c533..ebe29c58138 100644
--- a/fs/fscache/object-list.c
+++ b/fs/fscache/object-list.c
@@ -34,8 +34,8 @@ struct fscache_objlist_data {
 #define FSCACHE_OBJLIST_CONFIG_NOREADS	0x00000200	/* show objects without active reads */
 #define FSCACHE_OBJLIST_CONFIG_EVENTS	0x00000400	/* show objects with events */
 #define FSCACHE_OBJLIST_CONFIG_NOEVENTS	0x00000800	/* show objects without no events */
-#define FSCACHE_OBJLIST_CONFIG_WORK	0x00001000	/* show objects with slow work */
-#define FSCACHE_OBJLIST_CONFIG_NOWORK	0x00002000	/* show objects without slow work */
+#define FSCACHE_OBJLIST_CONFIG_WORK	0x00001000	/* show objects with work */
+#define FSCACHE_OBJLIST_CONFIG_NOWORK	0x00002000	/* show objects without work */
 
 	u8		buf[512];	/* key and aux data buffer */
 };
@@ -231,12 +231,11 @@ static int fscache_objlist_show(struct seq_file *m, void *v)
 		       READS, NOREADS);
 		FILTER(obj->events & obj->event_mask,
 		       EVENTS, NOEVENTS);
-		FILTER(obj->work.flags & ~(1UL << SLOW_WORK_VERY_SLOW),
-		       WORK, NOWORK);
+		FILTER(work_busy(&obj->work), WORK, NOWORK);
 	}
 
 	seq_printf(m,
-		   "%8x %8x %s %5u %3u %3u %3u %2u %5u %2lx %2lx %1lx %1lx | ",
+		   "%8x %8x %s %5u %3u %3u %3u %2u %5u %2lx %2lx %1lx %1x | ",
 		   obj->debug_id,
 		   obj->parent ? obj->parent->debug_id : -1,
 		   fscache_object_states_short[obj->state],
@@ -249,7 +248,7 @@ static int fscache_objlist_show(struct seq_file *m, void *v)
 		   obj->event_mask & FSCACHE_OBJECT_EVENTS_MASK,
 		   obj->events,
 		   obj->flags,
-		   obj->work.flags);
+		   work_busy(&obj->work));
 
 	no_cookie = true;
 	keylen = auxlen = 0;
diff --git a/fs/fscache/object.c b/fs/fscache/object.c
index 0b589a9b4ff..b6b897c550a 100644
--- a/fs/fscache/object.c
+++ b/fs/fscache/object.c
@@ -14,7 +14,6 @@
 
 #define FSCACHE_DEBUG_LEVEL COOKIE
 #include <linux/module.h>
-#include <linux/seq_file.h>
 #include "internal.h"
 
 const char *fscache_object_states[FSCACHE_OBJECT__NSTATES] = {
@@ -50,12 +49,8 @@ const char fscache_object_states_short[FSCACHE_OBJECT__NSTATES][5] = {
 	[FSCACHE_OBJECT_DEAD]		= "DEAD",
 };
 
-static void fscache_object_slow_work_put_ref(struct slow_work *);
-static int  fscache_object_slow_work_get_ref(struct slow_work *);
-static void fscache_object_slow_work_execute(struct slow_work *);
-#ifdef CONFIG_SLOW_WORK_DEBUG
-static void fscache_object_slow_work_desc(struct slow_work *, struct seq_file *);
-#endif
+static int  fscache_get_object(struct fscache_object *);
+static void fscache_put_object(struct fscache_object *);
 static void fscache_initialise_object(struct fscache_object *);
 static void fscache_lookup_object(struct fscache_object *);
 static void fscache_object_available(struct fscache_object *);
@@ -64,17 +59,6 @@ static void fscache_withdraw_object(struct fscache_object *);
 static void fscache_enqueue_dependents(struct fscache_object *);
 static void fscache_dequeue_object(struct fscache_object *);
 
-const struct slow_work_ops fscache_object_slow_work_ops = {
-	.owner		= THIS_MODULE,
-	.get_ref	= fscache_object_slow_work_get_ref,
-	.put_ref	= fscache_object_slow_work_put_ref,
-	.execute	= fscache_object_slow_work_execute,
-#ifdef CONFIG_SLOW_WORK_DEBUG
-	.desc		= fscache_object_slow_work_desc,
-#endif
-};
-EXPORT_SYMBOL(fscache_object_slow_work_ops);
-
 /*
  * we need to notify the parent when an op completes that we had outstanding
  * upon it
@@ -345,7 +329,7 @@ unsupported_event:
 /*
  * execute an object
  */
-static void fscache_object_slow_work_execute(struct slow_work *work)
+void fscache_object_work_func(struct work_struct *work)
 {
 	struct fscache_object *object =
 		container_of(work, struct fscache_object, work);
@@ -359,23 +343,9 @@ static void fscache_object_slow_work_execute(struct slow_work *work)
 	if (object->events & object->event_mask)
 		fscache_enqueue_object(object);
 	clear_bit(FSCACHE_OBJECT_EV_REQUEUE, &object->events);
+	fscache_put_object(object);
 }
-
-/*
- * describe an object for slow-work debugging
- */
-#ifdef CONFIG_SLOW_WORK_DEBUG
-static void fscache_object_slow_work_desc(struct slow_work *work,
-					  struct seq_file *m)
-{
-	struct fscache_object *object =
-		container_of(work, struct fscache_object, work);
-
-	seq_printf(m, "FSC: OBJ%x: %s",
-		   object->debug_id,
-		   fscache_object_states_short[object->state]);
-}
-#endif
+EXPORT_SYMBOL(fscache_object_work_func);
 
 /*
  * initialise an object
@@ -393,7 +363,6 @@ static void fscache_initialise_object(struct fscache_object *object)
 	_enter("");
 	ASSERT(object->cookie != NULL);
 	ASSERT(object->cookie->parent != NULL);
-	ASSERT(list_empty(&object->work.link));
 
 	if (object->events & ((1 << FSCACHE_OBJECT_EV_ERROR) |
 			      (1 << FSCACHE_OBJECT_EV_RELEASE) |
@@ -671,10 +640,8 @@ static void fscache_drop_object(struct fscache_object *object)
 		object->parent = NULL;
 	}
 
-	/* this just shifts the object release to the slow work processor */
-	fscache_stat(&fscache_n_cop_put_object);
-	object->cache->ops->put_object(object);
-	fscache_stat_d(&fscache_n_cop_put_object);
+	/* this just shifts the object release to the work processor */
+	fscache_put_object(object);
 
 	_leave("");
 }
@@ -758,12 +725,10 @@ void fscache_withdrawing_object(struct fscache_cache *cache,
 }
 
 /*
- * allow the slow work item processor to get a ref on an object
+ * get a ref on an object
  */
-static int fscache_object_slow_work_get_ref(struct slow_work *work)
+static int fscache_get_object(struct fscache_object *object)
 {
-	struct fscache_object *object =
-		container_of(work, struct fscache_object, work);
 	int ret;
 
 	fscache_stat(&fscache_n_cop_grab_object);
@@ -773,13 +738,10 @@ static int fscache_object_slow_work_get_ref(struct slow_work *work)
 }
 
 /*
- * allow the slow work item processor to discard a ref on a work item
+ * discard a ref on a work item
  */
-static void fscache_object_slow_work_put_ref(struct slow_work *work)
+static void fscache_put_object(struct fscache_object *object)
 {
-	struct fscache_object *object =
-		container_of(work, struct fscache_object, work);
-
 	fscache_stat(&fscache_n_cop_put_object);
 	object->cache->ops->put_object(object);
 	fscache_stat_d(&fscache_n_cop_put_object);
@@ -792,8 +754,48 @@ void fscache_enqueue_object(struct fscache_object *object)
 {
 	_enter("{OBJ%x}", object->debug_id);
 
-	slow_work_enqueue(&object->work);
+	if (fscache_get_object(object) >= 0) {
+		wait_queue_head_t *cong_wq =
+			&get_cpu_var(fscache_object_cong_wait);
+
+		if (queue_work(fscache_object_wq, &object->work)) {
+			if (fscache_object_congested())
+				wake_up(cong_wq);
+		} else
+			fscache_put_object(object);
+
+		put_cpu_var(fscache_object_cong_wait);
+	}
+}
+
+/**
+ * fscache_object_sleep_till_congested - Sleep until object wq is congested
+ * @timoutp: Scheduler sleep timeout
+ *
+ * Allow an object handler to sleep until the object workqueue is congested.
+ *
+ * The caller must set up a wake up event before calling this and must have set
+ * the appropriate sleep mode (such as TASK_UNINTERRUPTIBLE) and tested its own
+ * condition before calling this function as no test is made here.
+ *
+ * %true is returned if the object wq is congested, %false otherwise.
+ */
+bool fscache_object_sleep_till_congested(signed long *timeoutp)
+{
+	wait_queue_head_t *cong_wq = &__get_cpu_var(fscache_object_cong_wait);
+	DEFINE_WAIT(wait);
+
+	if (fscache_object_congested())
+		return true;
+
+	add_wait_queue_exclusive(cong_wq, &wait);
+	if (!fscache_object_congested())
+		*timeoutp = schedule_timeout(*timeoutp);
+	finish_wait(cong_wq, &wait);
+
+	return fscache_object_congested();
 }
+EXPORT_SYMBOL_GPL(fscache_object_sleep_till_congested);
 
 /*
  * enqueue the dependents of an object for metadata-type processing
@@ -819,9 +821,7 @@ static void fscache_enqueue_dependents(struct fscache_object *object)
 
 		/* sort onto appropriate lists */
 		fscache_enqueue_object(dep);
-		fscache_stat(&fscache_n_cop_put_object);
-		dep->cache->ops->put_object(dep);
-		fscache_stat_d(&fscache_n_cop_put_object);
+		fscache_put_object(dep);
 
 		if (!list_empty(&object->dependents))
 			cond_resched_lock(&object->lock);
diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h
index c57db27ac86..27c8df50315 100644
--- a/include/linux/fscache-cache.h
+++ b/include/linux/fscache-cache.h
@@ -21,6 +21,7 @@
 #include <linux/fscache.h>
 #include <linux/sched.h>
 #include <linux/slow-work.h>
+#include <linux/workqueue.h>
 
 #define NR_MAXCACHES BITS_PER_LONG
 
@@ -389,7 +390,7 @@ struct fscache_object {
 	struct fscache_cache	*cache;		/* cache that supplied this object */
 	struct fscache_cookie	*cookie;	/* netfs's file/index object */
 	struct fscache_object	*parent;	/* parent object */
-	struct slow_work	work;		/* attention scheduling record */
+	struct work_struct	work;		/* attention scheduling record */
 	struct list_head	dependents;	/* FIFO of dependent objects */
 	struct list_head	dep_link;	/* link in parent's dependents list */
 	struct list_head	pending_ops;	/* unstarted operations on this object */
@@ -411,7 +412,7 @@ extern const char *fscache_object_states[];
 	(test_bit(FSCACHE_IOERROR, &(obj)->cache->flags) &&	\
 	 (obj)->state >= FSCACHE_OBJECT_DYING)
 
-extern const struct slow_work_ops fscache_object_slow_work_ops;
+extern void fscache_object_work_func(struct work_struct *work);
 
 /**
  * fscache_object_init - Initialise a cache object description
@@ -433,7 +434,7 @@ void fscache_object_init(struct fscache_object *object,
 	spin_lock_init(&object->lock);
 	INIT_LIST_HEAD(&object->cache_link);
 	INIT_HLIST_NODE(&object->cookie_link);
-	vslow_work_init(&object->work, &fscache_object_slow_work_ops);
+	INIT_WORK(&object->work, fscache_object_work_func);
 	INIT_LIST_HEAD(&object->dependents);
 	INIT_LIST_HEAD(&object->dep_link);
 	INIT_LIST_HEAD(&object->pending_ops);
@@ -534,6 +535,8 @@ extern void fscache_io_error(struct fscache_cache *cache);
 extern void fscache_mark_pages_cached(struct fscache_retrieval *op,
 				      struct pagevec *pagevec);
 
+extern bool fscache_object_sleep_till_congested(signed long *timeoutp);
+
 extern enum fscache_checkaux fscache_check_aux(struct fscache_object *object,
 					       const void *data,
 					       uint16_t datalen);
-- 
cgit v1.2.3-70-g09d2


From 8af7c12436803291c90295259db23d371a7ad9cc Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 20 Jul 2010 22:09:01 +0200
Subject: fscache: convert operation to use workqueue instead of slow-work

Make fscache operation to use only workqueue instead of combination of
workqueue and slow-work.  FSCACHE_OP_SLOW is dropped and
FSCACHE_OP_FAST is renamed to FSCACHE_OP_ASYNC and uses newly added
fscache_op_wq workqueue to execute op->processor().
fscache_operation_init_slow() is dropped and fscache_operation_init()
now takes @processor argument directly.

* Unbound workqueue is used.

* fscache_retrieval_work() is no longer necessary as OP_ASYNC now does
  the equivalent thing.

* sysctl fscache.operation_max_active added to control concurrency.
  The default value is nr_cpus clamped between 2 and
  WQ_UNBOUND_MAX_ACTIVE.

* debugfs support is dropped for now.  Tracing API based debug
  facility is planned to be added.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: David Howells <dhowells@redhat.com>
---
 fs/cachefiles/rdwr.c          |  4 +--
 fs/fscache/internal.h         |  1 +
 fs/fscache/main.c             | 23 +++++++++++++++
 fs/fscache/operation.c        | 67 ++++++-------------------------------------
 fs/fscache/page.c             | 36 ++++++-----------------
 include/linux/fscache-cache.h | 37 +++++++-----------------
 6 files changed, 53 insertions(+), 115 deletions(-)

(limited to 'include')

diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c
index 0f0d41fbb03..0e3c0924cc3 100644
--- a/fs/cachefiles/rdwr.c
+++ b/fs/cachefiles/rdwr.c
@@ -422,7 +422,7 @@ int cachefiles_read_or_alloc_page(struct fscache_retrieval *op,
 	shift = PAGE_SHIFT - inode->i_sb->s_blocksize_bits;
 
 	op->op.flags &= FSCACHE_OP_KEEP_FLAGS;
-	op->op.flags |= FSCACHE_OP_FAST;
+	op->op.flags |= FSCACHE_OP_ASYNC;
 	op->op.processor = cachefiles_read_copier;
 
 	pagevec_init(&pagevec, 0);
@@ -729,7 +729,7 @@ int cachefiles_read_or_alloc_pages(struct fscache_retrieval *op,
 	pagevec_init(&pagevec, 0);
 
 	op->op.flags &= FSCACHE_OP_KEEP_FLAGS;
-	op->op.flags |= FSCACHE_OP_FAST;
+	op->op.flags |= FSCACHE_OP_ASYNC;
 	op->op.processor = cachefiles_read_copier;
 
 	INIT_LIST_HEAD(&backpages);
diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h
index 6e0b5fb2523..6a026441c5a 100644
--- a/fs/fscache/internal.h
+++ b/fs/fscache/internal.h
@@ -83,6 +83,7 @@ extern unsigned fscache_defer_create;
 extern unsigned fscache_debug;
 extern struct kobject *fscache_root;
 extern struct workqueue_struct *fscache_object_wq;
+extern struct workqueue_struct *fscache_op_wq;
 DECLARE_PER_CPU(wait_queue_head_t, fscache_object_cong_wait);
 
 static inline bool fscache_object_congested(void)
diff --git a/fs/fscache/main.c b/fs/fscache/main.c
index bb8d4c35c7a..44d13ddab2c 100644
--- a/fs/fscache/main.c
+++ b/fs/fscache/main.c
@@ -42,11 +42,13 @@ MODULE_PARM_DESC(fscache_debug,
 
 struct kobject *fscache_root;
 struct workqueue_struct *fscache_object_wq;
+struct workqueue_struct *fscache_op_wq;
 
 DEFINE_PER_CPU(wait_queue_head_t, fscache_object_cong_wait);
 
 /* these values serve as lower bounds, will be adjusted in fscache_init() */
 static unsigned fscache_object_max_active = 4;
+static unsigned fscache_op_max_active = 2;
 
 #ifdef CONFIG_SYSCTL
 static struct ctl_table_header *fscache_sysctl_header;
@@ -74,6 +76,14 @@ ctl_table fscache_sysctls[] = {
 		.proc_handler	= fscache_max_active_sysctl,
 		.extra1		= &fscache_object_wq,
 	},
+	{
+		.procname	= "operation_max_active",
+		.data		= &fscache_op_max_active,
+		.maxlen		= sizeof(unsigned),
+		.mode		= 0644,
+		.proc_handler	= fscache_max_active_sysctl,
+		.extra1		= &fscache_op_wq,
+	},
 	{}
 };
 
@@ -110,6 +120,16 @@ static int __init fscache_init(void)
 	if (!fscache_object_wq)
 		goto error_object_wq;
 
+	fscache_op_max_active =
+		clamp_val(fscache_object_max_active / 2,
+			  fscache_op_max_active, WQ_UNBOUND_MAX_ACTIVE);
+
+	ret = -ENOMEM;
+	fscache_op_wq = alloc_workqueue("fscache_operation", WQ_UNBOUND,
+					fscache_op_max_active);
+	if (!fscache_op_wq)
+		goto error_op_wq;
+
 	for_each_possible_cpu(cpu)
 		init_waitqueue_head(&per_cpu(fscache_object_cong_wait, cpu));
 
@@ -152,6 +172,8 @@ error_sysctl:
 #endif
 	fscache_proc_cleanup();
 error_proc:
+	destroy_workqueue(fscache_op_wq);
+error_op_wq:
 	destroy_workqueue(fscache_object_wq);
 error_object_wq:
 	slow_work_unregister_user(THIS_MODULE);
@@ -172,6 +194,7 @@ static void __exit fscache_exit(void)
 	kmem_cache_destroy(fscache_cookie_jar);
 	unregister_sysctl_table(fscache_sysctl_header);
 	fscache_proc_cleanup();
+	destroy_workqueue(fscache_op_wq);
 	destroy_workqueue(fscache_object_wq);
 	slow_work_unregister_user(THIS_MODULE);
 	printk(KERN_NOTICE "FS-Cache: Unloaded\n");
diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c
index f17cecafae4..b9f34eaede0 100644
--- a/fs/fscache/operation.c
+++ b/fs/fscache/operation.c
@@ -42,16 +42,12 @@ void fscache_enqueue_operation(struct fscache_operation *op)
 
 	fscache_stat(&fscache_n_op_enqueue);
 	switch (op->flags & FSCACHE_OP_TYPE) {
-	case FSCACHE_OP_FAST:
-		_debug("queue fast");
+	case FSCACHE_OP_ASYNC:
+		_debug("queue async");
 		atomic_inc(&op->usage);
-		if (!schedule_work(&op->fast_work))
+		if (!queue_work(fscache_op_wq, &op->work))
 			fscache_put_operation(op);
 		break;
-	case FSCACHE_OP_SLOW:
-		_debug("queue slow");
-		slow_work_enqueue(&op->slow_work);
-		break;
 	case FSCACHE_OP_MYTHREAD:
 		_debug("queue for caller's attention");
 		break;
@@ -455,36 +451,13 @@ void fscache_operation_gc(struct work_struct *work)
 }
 
 /*
- * allow the slow work item processor to get a ref on an operation
- */
-static int fscache_op_get_ref(struct slow_work *work)
-{
-	struct fscache_operation *op =
-		container_of(work, struct fscache_operation, slow_work);
-
-	atomic_inc(&op->usage);
-	return 0;
-}
-
-/*
- * allow the slow work item processor to discard a ref on an operation
- */
-static void fscache_op_put_ref(struct slow_work *work)
-{
-	struct fscache_operation *op =
-		container_of(work, struct fscache_operation, slow_work);
-
-	fscache_put_operation(op);
-}
-
-/*
- * execute an operation using the slow thread pool to provide processing context
- * - the caller holds a ref to this object, so we don't need to hold one
+ * execute an operation using fs_op_wq to provide processing context -
+ * the caller holds a ref to this object, so we don't need to hold one
  */
-static void fscache_op_execute(struct slow_work *work)
+void fscache_op_work_func(struct work_struct *work)
 {
 	struct fscache_operation *op =
-		container_of(work, struct fscache_operation, slow_work);
+		container_of(work, struct fscache_operation, work);
 	unsigned long start;
 
 	_enter("{OBJ%x OP%x,%d}",
@@ -494,31 +467,7 @@ static void fscache_op_execute(struct slow_work *work)
 	start = jiffies;
 	op->processor(op);
 	fscache_hist(fscache_ops_histogram, start);
+	fscache_put_operation(op);
 
 	_leave("");
 }
-
-/*
- * describe an operation for slow-work debugging
- */
-#ifdef CONFIG_SLOW_WORK_DEBUG
-static void fscache_op_desc(struct slow_work *work, struct seq_file *m)
-{
-	struct fscache_operation *op =
-		container_of(work, struct fscache_operation, slow_work);
-
-	seq_printf(m, "FSC: OBJ%x OP%x: %s/%s fl=%lx",
-		   op->object->debug_id, op->debug_id,
-		   op->name, op->state, op->flags);
-}
-#endif
-
-const struct slow_work_ops fscache_op_slow_work_ops = {
-	.owner		= THIS_MODULE,
-	.get_ref	= fscache_op_get_ref,
-	.put_ref	= fscache_op_put_ref,
-	.execute	= fscache_op_execute,
-#ifdef CONFIG_SLOW_WORK_DEBUG
-	.desc		= fscache_op_desc,
-#endif
-};
diff --git a/fs/fscache/page.c b/fs/fscache/page.c
index 723b889fd21..41c441c2058 100644
--- a/fs/fscache/page.c
+++ b/fs/fscache/page.c
@@ -105,7 +105,7 @@ bool __fscache_maybe_release_page(struct fscache_cookie *cookie,
 
 page_busy:
 	/* we might want to wait here, but that could deadlock the allocator as
-	 * the slow-work threads writing to the cache may all end up sleeping
+	 * the work threads writing to the cache may all end up sleeping
 	 * on memory allocation */
 	fscache_stat(&fscache_n_store_vmscan_busy);
 	return false;
@@ -188,9 +188,8 @@ int __fscache_attr_changed(struct fscache_cookie *cookie)
 		return -ENOMEM;
 	}
 
-	fscache_operation_init(op, NULL);
-	fscache_operation_init_slow(op, fscache_attr_changed_op);
-	op->flags = FSCACHE_OP_SLOW | (1 << FSCACHE_OP_EXCLUSIVE);
+	fscache_operation_init(op, fscache_attr_changed_op, NULL);
+	op->flags = FSCACHE_OP_ASYNC | (1 << FSCACHE_OP_EXCLUSIVE);
 	fscache_set_op_name(op, "Attr");
 
 	spin_lock(&cookie->lock);
@@ -217,24 +216,6 @@ nobufs:
 }
 EXPORT_SYMBOL(__fscache_attr_changed);
 
-/*
- * handle secondary execution given to a retrieval op on behalf of the
- * cache
- */
-static void fscache_retrieval_work(struct work_struct *work)
-{
-	struct fscache_retrieval *op =
-		container_of(work, struct fscache_retrieval, op.fast_work);
-	unsigned long start;
-
-	_enter("{OP%x}", op->op.debug_id);
-
-	start = jiffies;
-	op->op.processor(&op->op);
-	fscache_hist(fscache_ops_histogram, start);
-	fscache_put_operation(&op->op);
-}
-
 /*
  * release a retrieval op reference
  */
@@ -269,13 +250,12 @@ static struct fscache_retrieval *fscache_alloc_retrieval(
 		return NULL;
 	}
 
-	fscache_operation_init(&op->op, fscache_release_retrieval_op);
+	fscache_operation_init(&op->op, NULL, fscache_release_retrieval_op);
 	op->op.flags	= FSCACHE_OP_MYTHREAD | (1 << FSCACHE_OP_WAITING);
 	op->mapping	= mapping;
 	op->end_io_func	= end_io_func;
 	op->context	= context;
 	op->start_time	= jiffies;
-	INIT_WORK(&op->op.fast_work, fscache_retrieval_work);
 	INIT_LIST_HEAD(&op->to_do);
 	fscache_set_op_name(&op->op, "Retr");
 	return op;
@@ -795,9 +775,9 @@ int __fscache_write_page(struct fscache_cookie *cookie,
 	if (!op)
 		goto nomem;
 
-	fscache_operation_init(&op->op, fscache_release_write_op);
-	fscache_operation_init_slow(&op->op, fscache_write_op);
-	op->op.flags = FSCACHE_OP_SLOW | (1 << FSCACHE_OP_WAITING);
+	fscache_operation_init(&op->op, fscache_write_op,
+			       fscache_release_write_op);
+	op->op.flags = FSCACHE_OP_ASYNC | (1 << FSCACHE_OP_WAITING);
 	fscache_set_op_name(&op->op, "Write1");
 
 	ret = radix_tree_preload(gfp & ~__GFP_HIGHMEM);
@@ -852,7 +832,7 @@ int __fscache_write_page(struct fscache_cookie *cookie,
 	fscache_stat(&fscache_n_store_ops);
 	fscache_stat(&fscache_n_stores_ok);
 
-	/* the slow work queue now carries its own ref on the object */
+	/* the work queue now carries its own ref on the object */
 	fscache_put_operation(&op->op);
 	_leave(" = 0");
 	return 0;
diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h
index 27c8df50315..17ed9c1dbfb 100644
--- a/include/linux/fscache-cache.h
+++ b/include/linux/fscache-cache.h
@@ -77,18 +77,14 @@ typedef void (*fscache_operation_release_t)(struct fscache_operation *op);
 typedef void (*fscache_operation_processor_t)(struct fscache_operation *op);
 
 struct fscache_operation {
-	union {
-		struct work_struct fast_work;	/* record for fast ops */
-		struct slow_work slow_work;	/* record for (very) slow ops */
-	};
+	struct work_struct	work;		/* record for async ops */
 	struct list_head	pend_link;	/* link in object->pending_ops */
 	struct fscache_object	*object;	/* object to be operated upon */
 
 	unsigned long		flags;
 #define FSCACHE_OP_TYPE		0x000f	/* operation type */
-#define FSCACHE_OP_FAST		0x0001	/* - fast op, processor may not sleep for disk */
-#define FSCACHE_OP_SLOW		0x0002	/* - (very) slow op, processor may sleep for disk */
-#define FSCACHE_OP_MYTHREAD	0x0003	/* - processing is done be issuing thread, not pool */
+#define FSCACHE_OP_ASYNC	0x0001	/* - async op, processor may sleep for disk */
+#define FSCACHE_OP_MYTHREAD	0x0002	/* - processing is done be issuing thread, not pool */
 #define FSCACHE_OP_WAITING	4	/* cleared when op is woken */
 #define FSCACHE_OP_EXCLUSIVE	5	/* exclusive op, other ops must wait */
 #define FSCACHE_OP_DEAD		6	/* op is now dead */
@@ -106,7 +102,8 @@ struct fscache_operation {
 	/* operation releaser */
 	fscache_operation_release_t release;
 
-#ifdef CONFIG_SLOW_WORK_DEBUG
+#ifdef CONFIG_WORKQUEUE_DEBUGFS
+	struct work_struct put_work;	/* work to delay operation put */
 	const char *name;		/* operation name */
 	const char *state;		/* operation state */
 #define fscache_set_op_name(OP, N)	do { (OP)->name  = (N); } while(0)
@@ -118,7 +115,7 @@ struct fscache_operation {
 };
 
 extern atomic_t fscache_op_debug_id;
-extern const struct slow_work_ops fscache_op_slow_work_ops;
+extern void fscache_op_work_func(struct work_struct *work);
 
 extern void fscache_enqueue_operation(struct fscache_operation *);
 extern void fscache_put_operation(struct fscache_operation *);
@@ -129,33 +126,21 @@ extern void fscache_put_operation(struct fscache_operation *);
  * @release: The release function to assign
  *
  * Do basic initialisation of an operation.  The caller must still set flags,
- * object, either fast_work or slow_work if necessary, and processor if needed.
+ * object and processor if needed.
  */
 static inline void fscache_operation_init(struct fscache_operation *op,
-					  fscache_operation_release_t release)
+					fscache_operation_processor_t processor,
+					fscache_operation_release_t release)
 {
+	INIT_WORK(&op->work, fscache_op_work_func);
 	atomic_set(&op->usage, 1);
 	op->debug_id = atomic_inc_return(&fscache_op_debug_id);
+	op->processor = processor;
 	op->release = release;
 	INIT_LIST_HEAD(&op->pend_link);
 	fscache_set_op_state(op, "Init");
 }
 
-/**
- * fscache_operation_init_slow - Do additional initialisation of a slow op
- * @op: The operation to initialise
- * @processor: The processor function to assign
- *
- * Do additional initialisation of an operation as required for slow work.
- */
-static inline
-void fscache_operation_init_slow(struct fscache_operation *op,
-				 fscache_operation_processor_t processor)
-{
-	op->processor = processor;
-	slow_work_init(&op->slow_work, &fscache_op_slow_work_ops);
-}
-
 /*
  * data read operation
  */
-- 
cgit v1.2.3-70-g09d2


From d098adfb7d281258173a43151483e52e21761021 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 20 Jul 2010 22:09:01 +0200
Subject: fscache: drop references to slow-work

fscache no longer uses slow-work.  Drop references to it.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: David Howells <dhowells@redhat.com>
---
 fs/fscache/Kconfig            | 1 -
 fs/fscache/main.c             | 7 -------
 include/linux/fscache-cache.h | 1 -
 3 files changed, 9 deletions(-)

(limited to 'include')

diff --git a/fs/fscache/Kconfig b/fs/fscache/Kconfig
index cc94bb9563f..3f6dfa98988 100644
--- a/fs/fscache/Kconfig
+++ b/fs/fscache/Kconfig
@@ -1,7 +1,6 @@
 
 config FSCACHE
 	tristate "General filesystem local caching manager"
-	select SLOW_WORK
 	help
 	  This option enables a generic filesystem caching manager that can be
 	  used by various network and other filesystems to cache data locally.
diff --git a/fs/fscache/main.c b/fs/fscache/main.c
index 44d13ddab2c..500936d9fff 100644
--- a/fs/fscache/main.c
+++ b/fs/fscache/main.c
@@ -106,10 +106,6 @@ static int __init fscache_init(void)
 	unsigned int cpu;
 	int ret;
 
-	ret = slow_work_register_user(THIS_MODULE);
-	if (ret < 0)
-		goto error_slow_work;
-
 	fscache_object_max_active =
 		clamp_val(nr_cpus,
 			  fscache_object_max_active, WQ_UNBOUND_MAX_ACTIVE);
@@ -176,8 +172,6 @@ error_proc:
 error_op_wq:
 	destroy_workqueue(fscache_object_wq);
 error_object_wq:
-	slow_work_unregister_user(THIS_MODULE);
-error_slow_work:
 	return ret;
 }
 
@@ -196,7 +190,6 @@ static void __exit fscache_exit(void)
 	fscache_proc_cleanup();
 	destroy_workqueue(fscache_op_wq);
 	destroy_workqueue(fscache_object_wq);
-	slow_work_unregister_user(THIS_MODULE);
 	printk(KERN_NOTICE "FS-Cache: Unloaded\n");
 }
 
diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h
index 17ed9c1dbfb..b8581c09d19 100644
--- a/include/linux/fscache-cache.h
+++ b/include/linux/fscache-cache.h
@@ -20,7 +20,6 @@
 
 #include <linux/fscache.h>
 #include <linux/sched.h>
-#include <linux/slow-work.h>
 #include <linux/workqueue.h>
 
 #define NR_MAXCACHES BITS_PER_LONG
-- 
cgit v1.2.3-70-g09d2


From 991ea75cb1df7188d209274b3d51c105b4f18ffe Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 20 Jul 2010 22:09:02 +0200
Subject: drm: use workqueue instead of slow-work

Workqueue can now handle high concurrency.  Convert drm_crtc_helper to
use system_nrt_wq instead of slow-work.  The conversion is mostly
straight forward.  One difference is that drm_helper_hpd_irq_event()
no longer blocks and can be called from any context.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: David Airlie <airlied@linux.ie>
Cc: dri-devel@lists.freedesktop.org
---
 drivers/gpu/drm/drm_crtc_helper.c | 29 ++++++++++-------------------
 include/drm/drm_crtc.h            |  3 +--
 2 files changed, 11 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_crtc_helper.c b/drivers/gpu/drm/drm_crtc_helper.c
index 9b2a54117c9..7fa33805f39 100644
--- a/drivers/gpu/drm/drm_crtc_helper.c
+++ b/drivers/gpu/drm/drm_crtc_helper.c
@@ -808,13 +808,11 @@ int drm_helper_resume_force_mode(struct drm_device *dev)
 }
 EXPORT_SYMBOL(drm_helper_resume_force_mode);
 
-static struct slow_work_ops output_poll_ops;
-
 #define DRM_OUTPUT_POLL_PERIOD (10*HZ)
-static void output_poll_execute(struct slow_work *work)
+static void output_poll_execute(struct work_struct *work)
 {
-	struct delayed_slow_work *delayed_work = container_of(work, struct delayed_slow_work, work);
-	struct drm_device *dev = container_of(delayed_work, struct drm_device, mode_config.output_poll_slow_work);
+	struct delayed_work *delayed_work = to_delayed_work(work);
+	struct drm_device *dev = container_of(delayed_work, struct drm_device, mode_config.output_poll_work);
 	struct drm_connector *connector;
 	enum drm_connector_status old_status, status;
 	bool repoll = false, changed = false;
@@ -854,7 +852,7 @@ static void output_poll_execute(struct slow_work *work)
 	}
 
 	if (repoll) {
-		ret = delayed_slow_work_enqueue(delayed_work, DRM_OUTPUT_POLL_PERIOD);
+		ret = queue_delayed_work(system_nrt_wq, delayed_work, DRM_OUTPUT_POLL_PERIOD);
 		if (ret)
 			DRM_ERROR("delayed enqueue failed %d\n", ret);
 	}
@@ -864,7 +862,7 @@ void drm_kms_helper_poll_disable(struct drm_device *dev)
 {
 	if (!dev->mode_config.poll_enabled)
 		return;
-	delayed_slow_work_cancel(&dev->mode_config.output_poll_slow_work);
+	cancel_delayed_work_sync(&dev->mode_config.output_poll_work);
 }
 EXPORT_SYMBOL(drm_kms_helper_poll_disable);
 
@@ -880,7 +878,7 @@ void drm_kms_helper_poll_enable(struct drm_device *dev)
 	}
 
 	if (poll) {
-		ret = delayed_slow_work_enqueue(&dev->mode_config.output_poll_slow_work, DRM_OUTPUT_POLL_PERIOD);
+		ret = queue_delayed_work(system_nrt_wq, &dev->mode_config.output_poll_work, DRM_OUTPUT_POLL_PERIOD);
 		if (ret)
 			DRM_ERROR("delayed enqueue failed %d\n", ret);
 	}
@@ -889,9 +887,7 @@ EXPORT_SYMBOL(drm_kms_helper_poll_enable);
 
 void drm_kms_helper_poll_init(struct drm_device *dev)
 {
-	slow_work_register_user(THIS_MODULE);
-	delayed_slow_work_init(&dev->mode_config.output_poll_slow_work,
-			       &output_poll_ops);
+	INIT_DELAYED_WORK(&dev->mode_config.output_poll_work, output_poll_execute);
 	dev->mode_config.poll_enabled = true;
 
 	drm_kms_helper_poll_enable(dev);
@@ -901,7 +897,6 @@ EXPORT_SYMBOL(drm_kms_helper_poll_init);
 void drm_kms_helper_poll_fini(struct drm_device *dev)
 {
 	drm_kms_helper_poll_disable(dev);
-	slow_work_unregister_user(THIS_MODULE);
 }
 EXPORT_SYMBOL(drm_kms_helper_poll_fini);
 
@@ -909,12 +904,8 @@ void drm_helper_hpd_irq_event(struct drm_device *dev)
 {
 	if (!dev->mode_config.poll_enabled)
 		return;
-	delayed_slow_work_cancel(&dev->mode_config.output_poll_slow_work);
-	/* schedule a slow work asap */
-	delayed_slow_work_enqueue(&dev->mode_config.output_poll_slow_work, 0);
+	/* kill timer and schedule immediate execution, this doesn't block */
+	cancel_delayed_work(&dev->mode_config.output_poll_work);
+	queue_delayed_work(system_nrt_wq, &dev->mode_config.output_poll_work, 0);
 }
 EXPORT_SYMBOL(drm_helper_hpd_irq_event);
-
-static struct slow_work_ops output_poll_ops = {
-	.execute = output_poll_execute,
-};
diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h
index 93a1a31b9c2..c707270bff5 100644
--- a/include/drm/drm_crtc.h
+++ b/include/drm/drm_crtc.h
@@ -31,7 +31,6 @@
 #include <linux/idr.h>
 
 #include <linux/fb.h>
-#include <linux/slow-work.h>
 
 struct drm_device;
 struct drm_mode_set;
@@ -595,7 +594,7 @@ struct drm_mode_config {
 
 	/* output poll support */
 	bool poll_enabled;
-	struct delayed_slow_work output_poll_slow_work;
+	struct delayed_work output_poll_work;
 
 	/* pointers to standard properties */
 	struct list_head property_blob_list;
-- 
cgit v1.2.3-70-g09d2


From 18f19aa62a267f2f759e278018f1032adf4c3774 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Fri, 14 May 2010 12:38:24 +0100
Subject: xen: Add support for HVM hypercalls.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Sheng Yang <sheng@linux.intel.com>
Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
---
 arch/x86/include/asm/xen/hypercall.h |  6 +++
 include/xen/hvm.h                    | 24 +++++++++
 include/xen/interface/hvm/hvm_op.h   | 35 +++++++++++++
 include/xen/interface/hvm/params.h   | 95 ++++++++++++++++++++++++++++++++++++
 4 files changed, 160 insertions(+)
 create mode 100644 include/xen/hvm.h
 create mode 100644 include/xen/interface/hvm/hvm_op.h
 create mode 100644 include/xen/interface/hvm/params.h

(limited to 'include')

diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
index 9c371e4a9fa..7fda040a76c 100644
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -417,6 +417,12 @@ HYPERVISOR_nmi_op(unsigned long op, unsigned long arg)
 	return _hypercall2(int, nmi_op, op, arg);
 }
 
+static inline unsigned long __must_check
+HYPERVISOR_hvm_op(int op, void *arg)
+{
+       return _hypercall2(unsigned long, hvm_op, op, arg);
+}
+
 static inline void
 MULTI_fpu_taskswitch(struct multicall_entry *mcl, int set)
 {
diff --git a/include/xen/hvm.h b/include/xen/hvm.h
new file mode 100644
index 00000000000..5dfe8fb86e6
--- /dev/null
+++ b/include/xen/hvm.h
@@ -0,0 +1,24 @@
+/* Simple wrappers around HVM functions */
+#ifndef XEN_HVM_H__
+#define XEN_HVM_H__
+
+#include <xen/interface/hvm/params.h>
+
+static inline int hvm_get_parameter(int idx, uint64_t *value)
+{
+	struct xen_hvm_param xhv;
+	int r;
+
+	xhv.domid = DOMID_SELF;
+	xhv.index = idx;
+	r = HYPERVISOR_hvm_op(HVMOP_get_param, &xhv);
+	if (r < 0) {
+		printk(KERN_ERR "Cannot get hvm parameter %d: %d!\n",
+			idx, r);
+		return r;
+	}
+	*value = xhv.value;
+	return r;
+}
+
+#endif /* XEN_HVM_H__ */
diff --git a/include/xen/interface/hvm/hvm_op.h b/include/xen/interface/hvm/hvm_op.h
new file mode 100644
index 00000000000..73c8c7eba48
--- /dev/null
+++ b/include/xen/interface/hvm/hvm_op.h
@@ -0,0 +1,35 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __XEN_PUBLIC_HVM_HVM_OP_H__
+#define __XEN_PUBLIC_HVM_HVM_OP_H__
+
+/* Get/set subcommands: the second argument of the hypercall is a
+ * pointer to a xen_hvm_param struct. */
+#define HVMOP_set_param           0
+#define HVMOP_get_param           1
+struct xen_hvm_param {
+    domid_t  domid;    /* IN */
+    uint32_t index;    /* IN */
+    uint64_t value;    /* IN/OUT */
+};
+DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_param);
+
+#endif /* __XEN_PUBLIC_HVM_HVM_OP_H__ */
diff --git a/include/xen/interface/hvm/params.h b/include/xen/interface/hvm/params.h
new file mode 100644
index 00000000000..1888d8c157e
--- /dev/null
+++ b/include/xen/interface/hvm/params.h
@@ -0,0 +1,95 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __XEN_PUBLIC_HVM_PARAMS_H__
+#define __XEN_PUBLIC_HVM_PARAMS_H__
+
+#include "hvm_op.h"
+
+/*
+ * Parameter space for HVMOP_{set,get}_param.
+ */
+
+/*
+ * How should CPU0 event-channel notifications be delivered?
+ * val[63:56] == 0: val[55:0] is a delivery GSI (Global System Interrupt).
+ * val[63:56] == 1: val[55:0] is a delivery PCI INTx line, as follows:
+ *                  Domain = val[47:32], Bus  = val[31:16],
+ *                  DevFn  = val[15: 8], IntX = val[ 1: 0]
+ * val[63:56] == 2: val[7:0] is a vector number.
+ * If val == 0 then CPU0 event-channel notifications are not delivered.
+ */
+#define HVM_PARAM_CALLBACK_IRQ 0
+
+#define HVM_PARAM_STORE_PFN    1
+#define HVM_PARAM_STORE_EVTCHN 2
+
+#define HVM_PARAM_PAE_ENABLED  4
+
+#define HVM_PARAM_IOREQ_PFN    5
+
+#define HVM_PARAM_BUFIOREQ_PFN 6
+
+/*
+ * Set mode for virtual timers (currently x86 only):
+ *  delay_for_missed_ticks (default):
+ *   Do not advance a vcpu's time beyond the correct delivery time for
+ *   interrupts that have been missed due to preemption. Deliver missed
+ *   interrupts when the vcpu is rescheduled and advance the vcpu's virtual
+ *   time stepwise for each one.
+ *  no_delay_for_missed_ticks:
+ *   As above, missed interrupts are delivered, but guest time always tracks
+ *   wallclock (i.e., real) time while doing so.
+ *  no_missed_ticks_pending:
+ *   No missed interrupts are held pending. Instead, to ensure ticks are
+ *   delivered at some non-zero rate, if we detect missed ticks then the
+ *   internal tick alarm is not disabled if the VCPU is preempted during the
+ *   next tick period.
+ *  one_missed_tick_pending:
+ *   Missed interrupts are collapsed together and delivered as one 'late tick'.
+ *   Guest time always tracks wallclock (i.e., real) time.
+ */
+#define HVM_PARAM_TIMER_MODE   10
+#define HVMPTM_delay_for_missed_ticks    0
+#define HVMPTM_no_delay_for_missed_ticks 1
+#define HVMPTM_no_missed_ticks_pending   2
+#define HVMPTM_one_missed_tick_pending   3
+
+/* Boolean: Enable virtual HPET (high-precision event timer)? (x86-only) */
+#define HVM_PARAM_HPET_ENABLED 11
+
+/* Identity-map page directory used by Intel EPT when CR0.PG=0. */
+#define HVM_PARAM_IDENT_PT     12
+
+/* Device Model domain, defaults to 0. */
+#define HVM_PARAM_DM_DOMAIN    13
+
+/* ACPI S state: currently support S0 and S3 on x86. */
+#define HVM_PARAM_ACPI_S_STATE 14
+
+/* TSS used on Intel when CR0.PE=0. */
+#define HVM_PARAM_VM86_TSS     15
+
+/* Boolean: Enable aligning all periodic vpts to reduce interrupts */
+#define HVM_PARAM_VPT_ALIGN    16
+
+#define HVM_NR_PARAMS          17
+
+#endif /* __XEN_PUBLIC_HVM_PARAMS_H__ */
-- 
cgit v1.2.3-70-g09d2


From 38e20b07efd541a959de367dc90a17f92ce2e8a6 Mon Sep 17 00:00:00 2001
From: Sheng Yang <sheng@linux.intel.com>
Date: Fri, 14 May 2010 12:40:51 +0100
Subject: x86/xen: event channels delivery on HVM.

Set the callback to receive evtchns from Xen, using the
callback vector delivery mechanism.

The traditional way for receiving event channel notifications from Xen
is via the interrupts from the platform PCI device.
The callback vector is a newer alternative that allow us to receive
notifications on any vcpu and doesn't need any PCI support: we allocate
a vector exclusively to receive events, in the vector handler we don't
need to interact with the vlapic, therefore we avoid a VMEXIT.

Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Signed-off-by: Sheng Yang <sheng@linux.intel.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/include/asm/irq_vectors.h |  3 ++
 arch/x86/kernel/entry_32.S         |  3 ++
 arch/x86/kernel/entry_64.S         |  3 ++
 arch/x86/xen/enlighten.c           | 28 +++++++++++++++
 arch/x86/xen/xen-ops.h             |  2 ++
 drivers/xen/events.c               | 70 ++++++++++++++++++++++++++++++++++----
 include/xen/events.h               |  7 ++++
 include/xen/hvm.h                  |  6 ++++
 include/xen/interface/features.h   |  3 ++
 9 files changed, 118 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 8767d99c4f6..e2ca3009255 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -125,6 +125,9 @@
  */
 #define MCE_SELF_VECTOR			0xeb
 
+/* Xen vector callback to receive events in a HVM domain */
+#define XEN_HVM_EVTCHN_CALLBACK		0xe9
+
 #define NR_VECTORS			 256
 
 #define FPU_IRQ				  13
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index cd49141cf15..6b196834a0d 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -1166,6 +1166,9 @@ ENTRY(xen_failsafe_callback)
 .previous
 ENDPROC(xen_failsafe_callback)
 
+BUILD_INTERRUPT3(xen_hvm_callback_vector, XEN_HVM_EVTCHN_CALLBACK,
+		xen_evtchn_do_upcall)
+
 #endif	/* CONFIG_XEN */
 
 #ifdef CONFIG_FUNCTION_TRACER
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 0697ff13983..490ae2bb18a 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1329,6 +1329,9 @@ ENTRY(xen_failsafe_callback)
 	CFI_ENDPROC
 END(xen_failsafe_callback)
 
+apicinterrupt XEN_HVM_EVTCHN_CALLBACK \
+	xen_hvm_callback_vector xen_evtchn_do_upcall
+
 #endif /* CONFIG_XEN */
 
 /*
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 09b36e9d507..b211a04c4b2 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -11,6 +11,7 @@
  * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
  */
 
+#include <linux/cpu.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/smp.h>
@@ -38,6 +39,7 @@
 #include <xen/interface/memory.h>
 #include <xen/features.h>
 #include <xen/page.h>
+#include <xen/hvm.h>
 #include <xen/hvc-console.h>
 
 #include <asm/paravirt.h>
@@ -80,6 +82,8 @@ struct shared_info xen_dummy_shared_info;
 void *xen_initial_gdt;
 
 RESERVE_BRK(shared_info_page_brk, PAGE_SIZE);
+__read_mostly int xen_have_vector_callback;
+EXPORT_SYMBOL_GPL(xen_have_vector_callback);
 
 /*
  * Point at some empty memory to start with. We map the real shared_info
@@ -1277,6 +1281,24 @@ static void __init init_shared_info(void)
 	per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0];
 }
 
+static int __cpuinit xen_hvm_cpu_notify(struct notifier_block *self,
+				    unsigned long action, void *hcpu)
+{
+	int cpu = (long)hcpu;
+	switch (action) {
+	case CPU_UP_PREPARE:
+		per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
+		break;
+	default:
+		break;
+	}
+	return NOTIFY_OK;
+}
+
+static struct notifier_block __cpuinitdata xen_hvm_cpu_notifier = {
+	.notifier_call	= xen_hvm_cpu_notify,
+};
+
 static void __init xen_hvm_guest_init(void)
 {
 	int r;
@@ -1287,6 +1309,12 @@ static void __init xen_hvm_guest_init(void)
 		return;
 
 	init_shared_info();
+
+	if (xen_feature(XENFEAT_hvm_callback_vector))
+		xen_have_vector_callback = 1;
+	register_cpu_notifier(&xen_hvm_cpu_notifier);
+	have_vcpu_info_placement = 0;
+	x86_init.irqs.intr_init = xen_init_IRQ;
 }
 
 static bool __init xen_hvm_platform(void)
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index f9153a300bc..0d0e0e6a747 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -38,6 +38,8 @@ void xen_enable_sysenter(void);
 void xen_enable_syscall(void);
 void xen_vcpu_restore(void);
 
+void xen_callback_vector(void);
+
 void __init xen_build_dynamic_phys_to_machine(void);
 
 void xen_init_irq_ops(void);
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index db8f506817f..d659480125f 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -29,6 +29,7 @@
 #include <linux/bootmem.h>
 #include <linux/slab.h>
 
+#include <asm/desc.h>
 #include <asm/ptrace.h>
 #include <asm/irq.h>
 #include <asm/idle.h>
@@ -36,10 +37,14 @@
 #include <asm/xen/hypercall.h>
 #include <asm/xen/hypervisor.h>
 
+#include <xen/xen.h>
+#include <xen/hvm.h>
 #include <xen/xen-ops.h>
 #include <xen/events.h>
 #include <xen/interface/xen.h>
 #include <xen/interface/event_channel.h>
+#include <xen/interface/hvm/hvm_op.h>
+#include <xen/interface/hvm/params.h>
 
 /*
  * This lock protects updates to the following mapping and reference-count
@@ -617,17 +622,13 @@ static DEFINE_PER_CPU(unsigned, xed_nesting_count);
  * a bitset of words which contain pending event bits.  The second
  * level is a bitset of pending events themselves.
  */
-void xen_evtchn_do_upcall(struct pt_regs *regs)
+static void __xen_evtchn_do_upcall(void)
 {
 	int cpu = get_cpu();
-	struct pt_regs *old_regs = set_irq_regs(regs);
 	struct shared_info *s = HYPERVISOR_shared_info;
 	struct vcpu_info *vcpu_info = __get_cpu_var(xen_vcpu);
  	unsigned count;
 
-	exit_idle();
-	irq_enter();
-
 	do {
 		unsigned long pending_words;
 
@@ -667,10 +668,26 @@ void xen_evtchn_do_upcall(struct pt_regs *regs)
 	} while(count != 1);
 
 out:
+
+	put_cpu();
+}
+
+void xen_evtchn_do_upcall(struct pt_regs *regs)
+{
+	struct pt_regs *old_regs = set_irq_regs(regs);
+
+	exit_idle();
+	irq_enter();
+
+	__xen_evtchn_do_upcall();
+
 	irq_exit();
 	set_irq_regs(old_regs);
+}
 
-	put_cpu();
+void xen_hvm_evtchn_do_upcall(void)
+{
+	__xen_evtchn_do_upcall();
 }
 
 /* Rebind a new event channel to an existing irq. */
@@ -933,6 +950,40 @@ static struct irq_chip xen_dynamic_chip __read_mostly = {
 	.retrigger	= retrigger_dynirq,
 };
 
+int xen_set_callback_via(uint64_t via)
+{
+	struct xen_hvm_param a;
+	a.domid = DOMID_SELF;
+	a.index = HVM_PARAM_CALLBACK_IRQ;
+	a.value = via;
+	return HYPERVISOR_hvm_op(HVMOP_set_param, &a);
+}
+EXPORT_SYMBOL_GPL(xen_set_callback_via);
+
+/* Vector callbacks are better than PCI interrupts to receive event
+ * channel notifications because we can receive vector callbacks on any
+ * vcpu and we don't need PCI support or APIC interactions. */
+void xen_callback_vector(void)
+{
+	int rc;
+	uint64_t callback_via;
+	if (xen_have_vector_callback) {
+		callback_via = HVM_CALLBACK_VECTOR(XEN_HVM_EVTCHN_CALLBACK);
+		rc = xen_set_callback_via(callback_via);
+		if (rc) {
+			printk(KERN_ERR "Request for Xen HVM callback vector"
+					" failed.\n");
+			xen_have_vector_callback = 0;
+			return;
+		}
+		printk(KERN_INFO "Xen HVM callback vector for event delivery is "
+				"enabled\n");
+		/* in the restore case the vector has already been allocated */
+		if (!test_bit(XEN_HVM_EVTCHN_CALLBACK, used_vectors))
+			alloc_intr_gate(XEN_HVM_EVTCHN_CALLBACK, xen_hvm_callback_vector);
+	}
+}
+
 void __init xen_init_IRQ(void)
 {
 	int i;
@@ -947,5 +998,10 @@ void __init xen_init_IRQ(void)
 	for (i = 0; i < NR_EVENT_CHANNELS; i++)
 		mask_evtchn(i);
 
-	irq_ctx_init(smp_processor_id());
+	if (xen_hvm_domain()) {
+		xen_callback_vector();
+		native_init_IRQ();
+	} else {
+		irq_ctx_init(smp_processor_id());
+	}
 }
diff --git a/include/xen/events.h b/include/xen/events.h
index e68d59a90ca..a15d93262e3 100644
--- a/include/xen/events.h
+++ b/include/xen/events.h
@@ -56,4 +56,11 @@ void xen_poll_irq(int irq);
 /* Determine the IRQ which is bound to an event channel */
 unsigned irq_from_evtchn(unsigned int evtchn);
 
+/* Xen HVM evtchn vector callback */
+extern void xen_hvm_callback_vector(void);
+extern int xen_have_vector_callback;
+int xen_set_callback_via(uint64_t via);
+void xen_evtchn_do_upcall(struct pt_regs *regs);
+void xen_hvm_evtchn_do_upcall(void);
+
 #endif	/* _XEN_EVENTS_H */
diff --git a/include/xen/hvm.h b/include/xen/hvm.h
index 5dfe8fb86e6..b193fa2f9fd 100644
--- a/include/xen/hvm.h
+++ b/include/xen/hvm.h
@@ -3,6 +3,7 @@
 #define XEN_HVM_H__
 
 #include <xen/interface/hvm/params.h>
+#include <asm/xen/hypercall.h>
 
 static inline int hvm_get_parameter(int idx, uint64_t *value)
 {
@@ -21,4 +22,9 @@ static inline int hvm_get_parameter(int idx, uint64_t *value)
 	return r;
 }
 
+#define HVM_CALLBACK_VIA_TYPE_VECTOR 0x2
+#define HVM_CALLBACK_VIA_TYPE_SHIFT 56
+#define HVM_CALLBACK_VECTOR(x) (((uint64_t)HVM_CALLBACK_VIA_TYPE_VECTOR)<<\
+		HVM_CALLBACK_VIA_TYPE_SHIFT | (x))
+
 #endif /* XEN_HVM_H__ */
diff --git a/include/xen/interface/features.h b/include/xen/interface/features.h
index f51b6413b05..8ab08b91bf6 100644
--- a/include/xen/interface/features.h
+++ b/include/xen/interface/features.h
@@ -41,6 +41,9 @@
 /* x86: Does this Xen host support the MMU_PT_UPDATE_PRESERVE_AD hypercall? */
 #define XENFEAT_mmu_pt_update_preserve_ad  5
 
+/* x86: Does this Xen host support the HVM callback vector type? */
+#define XENFEAT_hvm_callback_vector        8
+
 #define XENFEAT_NR_SUBMAPS 1
 
 #endif /* __XEN_PUBLIC_FEATURES_H__ */
-- 
cgit v1.2.3-70-g09d2


From 183d03cc4ff39e0f0d952c09aa96d0abfd6e0c3c Mon Sep 17 00:00:00 2001
From: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Date: Mon, 17 May 2010 17:08:21 +0100
Subject: xen: Xen PCI platform device driver.

Add the xen pci platform device driver that is responsible
for initializing the grant table and xenbus in PV on HVM mode.
Few changes to xenbus and grant table are necessary to allow the delayed
initialization in HVM mode.
Grant table needs few additional modifications to work in HVM mode.

The Xen PCI platform device raises an irq every time an event has been
delivered to us. However these interrupts are only delivered to vcpu 0.
The Xen PCI platform interrupt handler calls xen_hvm_evtchn_do_upcall
that is a little wrapper around __xen_evtchn_do_upcall, the traditional
Xen upcall handler, the very same used with traditional PV guests.

When running on HVM the event channel upcall is never called while in
progress because it is a normal Linux irq handler (and we cannot switch
the irq chip wholesale to the Xen PV ones as we are running QEMU and
might have passed in PCI devices), therefore we cannot be sure that
evtchn_upcall_pending is 0 when returning.
For this reason if evtchn_upcall_pending is set by Xen we need to loop
again on the event channels set pending otherwise we might loose some
event channel deliveries.

Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Signed-off-by: Sheng Yang <sheng@linux.intel.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 drivers/xen/Kconfig                 |   9 ++
 drivers/xen/Makefile                |   3 +-
 drivers/xen/events.c                |   8 +-
 drivers/xen/grant-table.c           |  77 +++++++++++++--
 drivers/xen/manage.c                |   1 +
 drivers/xen/platform-pci.c          | 181 ++++++++++++++++++++++++++++++++++++
 drivers/xen/xenbus/xenbus_probe.c   |  22 ++++-
 include/linux/pci_ids.h             |   3 +
 include/xen/grant_table.h           |   4 +
 include/xen/interface/grant_table.h |   1 +
 10 files changed, 291 insertions(+), 18 deletions(-)
 create mode 100644 drivers/xen/platform-pci.c

(limited to 'include')

diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index fad3df2c127..8f84b108b49 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -62,4 +62,13 @@ config XEN_SYS_HYPERVISOR
 	 virtual environment, /sys/hypervisor will still be present,
 	 but will have no xen contents.
 
+config XEN_PLATFORM_PCI
+	tristate "xen platform pci device driver"
+	depends on XEN
+	default m
+	help
+	  Driver for the Xen PCI Platform device: it is responsible for
+	  initializing xenbus and grant_table when running in a Xen HVM
+	  domain. As a consequence this driver is required to run any Xen PV
+	  frontend on Xen HVM.
 endmenu
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index 7c284342f30..e392fb776af 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -9,4 +9,5 @@ obj-$(CONFIG_XEN_XENCOMM)	+= xencomm.o
 obj-$(CONFIG_XEN_BALLOON)	+= balloon.o
 obj-$(CONFIG_XEN_DEV_EVTCHN)	+= evtchn.o
 obj-$(CONFIG_XENFS)		+= xenfs/
-obj-$(CONFIG_XEN_SYS_HYPERVISOR)	+= sys-hypervisor.o
\ No newline at end of file
+obj-$(CONFIG_XEN_SYS_HYPERVISOR)	+= sys-hypervisor.o
+obj-$(CONFIG_XEN_PLATFORM_PCI)	+= platform-pci.o
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index d659480125f..7c64473c9f3 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -665,7 +665,7 @@ static void __xen_evtchn_do_upcall(void)
 
 		count = __get_cpu_var(xed_nesting_count);
 		__get_cpu_var(xed_nesting_count) = 0;
-	} while(count != 1);
+	} while (count != 1 || vcpu_info->evtchn_upcall_pending);
 
 out:
 
@@ -689,6 +689,7 @@ void xen_hvm_evtchn_do_upcall(void)
 {
 	__xen_evtchn_do_upcall();
 }
+EXPORT_SYMBOL_GPL(xen_hvm_evtchn_do_upcall);
 
 /* Rebind a new event channel to an existing irq. */
 void rebind_evtchn_irq(int evtchn, int irq)
@@ -725,7 +726,10 @@ static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
 	struct evtchn_bind_vcpu bind_vcpu;
 	int evtchn = evtchn_from_irq(irq);
 
-	if (!VALID_EVTCHN(evtchn))
+	/* events delivered via platform PCI interrupts are always
+	 * routed to vcpu 0 */
+	if (!VALID_EVTCHN(evtchn) ||
+		(xen_hvm_domain() && !xen_have_vector_callback))
 		return -1;
 
 	/* Send future instances of this interrupt to other vcpu. */
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index f66db3b91d6..6c453181649 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -37,11 +37,13 @@
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
 #include <linux/uaccess.h>
+#include <linux/io.h>
 
 #include <xen/xen.h>
 #include <xen/interface/xen.h>
 #include <xen/page.h>
 #include <xen/grant_table.h>
+#include <xen/interface/memory.h>
 #include <asm/xen/hypercall.h>
 
 #include <asm/pgtable.h>
@@ -59,6 +61,8 @@ static unsigned int boot_max_nr_grant_frames;
 static int gnttab_free_count;
 static grant_ref_t gnttab_free_head;
 static DEFINE_SPINLOCK(gnttab_list_lock);
+unsigned long xen_hvm_resume_frames;
+EXPORT_SYMBOL_GPL(xen_hvm_resume_frames);
 
 static struct grant_entry *shared;
 
@@ -433,7 +437,7 @@ static unsigned int __max_nr_grant_frames(void)
 	return query.max_nr_frames;
 }
 
-static inline unsigned int max_nr_grant_frames(void)
+unsigned int gnttab_max_grant_frames(void)
 {
 	unsigned int xen_max = __max_nr_grant_frames();
 
@@ -441,6 +445,7 @@ static inline unsigned int max_nr_grant_frames(void)
 		return boot_max_nr_grant_frames;
 	return xen_max;
 }
+EXPORT_SYMBOL_GPL(gnttab_max_grant_frames);
 
 static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
 {
@@ -449,6 +454,30 @@ static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
 	unsigned int nr_gframes = end_idx + 1;
 	int rc;
 
+	if (xen_hvm_domain()) {
+		struct xen_add_to_physmap xatp;
+		unsigned int i = end_idx;
+		rc = 0;
+		/*
+		 * Loop backwards, so that the first hypercall has the largest
+		 * index, ensuring that the table will grow only once.
+		 */
+		do {
+			xatp.domid = DOMID_SELF;
+			xatp.idx = i;
+			xatp.space = XENMAPSPACE_grant_table;
+			xatp.gpfn = (xen_hvm_resume_frames >> PAGE_SHIFT) + i;
+			rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp);
+			if (rc != 0) {
+				printk(KERN_WARNING
+						"grant table add_to_physmap failed, err=%d\n", rc);
+				break;
+			}
+		} while (i-- > start_idx);
+
+		return rc;
+	}
+
 	frames = kmalloc(nr_gframes * sizeof(unsigned long), GFP_ATOMIC);
 	if (!frames)
 		return -ENOMEM;
@@ -465,7 +494,7 @@ static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
 
 	BUG_ON(rc || setup.status);
 
-	rc = arch_gnttab_map_shared(frames, nr_gframes, max_nr_grant_frames(),
+	rc = arch_gnttab_map_shared(frames, nr_gframes, gnttab_max_grant_frames(),
 				    &shared);
 	BUG_ON(rc);
 
@@ -476,9 +505,27 @@ static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
 
 int gnttab_resume(void)
 {
-	if (max_nr_grant_frames() < nr_grant_frames)
+	unsigned int max_nr_gframes;
+
+	max_nr_gframes = gnttab_max_grant_frames();
+	if (max_nr_gframes < nr_grant_frames)
 		return -ENOSYS;
-	return gnttab_map(0, nr_grant_frames - 1);
+
+	if (xen_pv_domain())
+		return gnttab_map(0, nr_grant_frames - 1);
+
+	if (!shared) {
+		shared = ioremap(xen_hvm_resume_frames, PAGE_SIZE * max_nr_gframes);
+		if (shared == NULL) {
+			printk(KERN_WARNING
+					"Failed to ioremap gnttab share frames!");
+			return -ENOMEM;
+		}
+	}
+
+	gnttab_map(0, nr_grant_frames - 1);
+
+	return 0;
 }
 
 int gnttab_suspend(void)
@@ -495,7 +542,7 @@ static int gnttab_expand(unsigned int req_entries)
 	cur = nr_grant_frames;
 	extra = ((req_entries + (GREFS_PER_GRANT_FRAME-1)) /
 		 GREFS_PER_GRANT_FRAME);
-	if (cur + extra > max_nr_grant_frames())
+	if (cur + extra > gnttab_max_grant_frames())
 		return -ENOSPC;
 
 	rc = gnttab_map(cur, cur + extra - 1);
@@ -505,15 +552,12 @@ static int gnttab_expand(unsigned int req_entries)
 	return rc;
 }
 
-static int __devinit gnttab_init(void)
+int gnttab_init(void)
 {
 	int i;
 	unsigned int max_nr_glist_frames, nr_glist_frames;
 	unsigned int nr_init_grefs;
 
-	if (!xen_domain())
-		return -ENODEV;
-
 	nr_grant_frames = 1;
 	boot_max_nr_grant_frames = __max_nr_grant_frames();
 
@@ -556,5 +600,18 @@ static int __devinit gnttab_init(void)
 	kfree(gnttab_list);
 	return -ENOMEM;
 }
+EXPORT_SYMBOL_GPL(gnttab_init);
+
+static int __devinit __gnttab_init(void)
+{
+	/* Delay grant-table initialization in the PV on HVM case */
+	if (xen_hvm_domain())
+		return 0;
+
+	if (!xen_pv_domain())
+		return -ENODEV;
+
+	return gnttab_init();
+}
 
-core_initcall(gnttab_init);
+core_initcall(__gnttab_init);
diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c
index 07e857b0de1..af9c5594d31 100644
--- a/drivers/xen/manage.c
+++ b/drivers/xen/manage.c
@@ -264,5 +264,6 @@ static int __init setup_shutdown_event(void)
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(xen_setup_shutdown_event);
 
 subsys_initcall(setup_shutdown_event);
diff --git a/drivers/xen/platform-pci.c b/drivers/xen/platform-pci.c
new file mode 100644
index 00000000000..a0ee5d06f71
--- /dev/null
+++ b/drivers/xen/platform-pci.c
@@ -0,0 +1,181 @@
+/******************************************************************************
+ * platform-pci.c
+ *
+ * Xen platform PCI device driver
+ * Copyright (c) 2005, Intel Corporation.
+ * Copyright (c) 2007, XenSource Inc.
+ * Copyright (c) 2010, Citrix
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
+#include <xen/grant_table.h>
+#include <xen/xenbus.h>
+#include <xen/events.h>
+#include <xen/hvm.h>
+
+#define DRV_NAME    "xen-platform-pci"
+
+MODULE_AUTHOR("ssmith@xensource.com and stefano.stabellini@eu.citrix.com");
+MODULE_DESCRIPTION("Xen platform PCI device");
+MODULE_LICENSE("GPL");
+
+static unsigned long platform_mmio;
+static unsigned long platform_mmio_alloc;
+static unsigned long platform_mmiolen;
+
+unsigned long alloc_xen_mmio(unsigned long len)
+{
+	unsigned long addr;
+
+	addr = platform_mmio + platform_mmio_alloc;
+	platform_mmio_alloc += len;
+	BUG_ON(platform_mmio_alloc > platform_mmiolen);
+
+	return addr;
+}
+
+static uint64_t get_callback_via(struct pci_dev *pdev)
+{
+	u8 pin;
+	int irq;
+
+	irq = pdev->irq;
+	if (irq < 16)
+		return irq; /* ISA IRQ */
+
+	pin = pdev->pin;
+
+	/* We don't know the GSI. Specify the PCI INTx line instead. */
+	return ((uint64_t)0x01 << 56) | /* PCI INTx identifier */
+		((uint64_t)pci_domain_nr(pdev->bus) << 32) |
+		((uint64_t)pdev->bus->number << 16) |
+		((uint64_t)(pdev->devfn & 0xff) << 8) |
+		((uint64_t)(pin - 1) & 3);
+}
+
+static irqreturn_t do_hvm_evtchn_intr(int irq, void *dev_id)
+{
+	xen_hvm_evtchn_do_upcall();
+	return IRQ_HANDLED;
+}
+
+static int xen_allocate_irq(struct pci_dev *pdev)
+{
+	return request_irq(pdev->irq, do_hvm_evtchn_intr,
+			IRQF_DISABLED | IRQF_NOBALANCING | IRQF_TRIGGER_RISING,
+			"xen-platform-pci", pdev);
+}
+
+static int __devinit platform_pci_init(struct pci_dev *pdev,
+				       const struct pci_device_id *ent)
+{
+	int i, ret;
+	long ioaddr, iolen;
+	long mmio_addr, mmio_len;
+	uint64_t callback_via;
+	unsigned int max_nr_gframes;
+
+	i = pci_enable_device(pdev);
+	if (i)
+		return i;
+
+	ioaddr = pci_resource_start(pdev, 0);
+	iolen = pci_resource_len(pdev, 0);
+
+	mmio_addr = pci_resource_start(pdev, 1);
+	mmio_len = pci_resource_len(pdev, 1);
+
+	if (mmio_addr == 0 || ioaddr == 0) {
+		dev_err(&pdev->dev, "no resources found\n");
+		ret = -ENOENT;
+		goto pci_out;
+	}
+
+	if (request_mem_region(mmio_addr, mmio_len, DRV_NAME) == NULL) {
+		dev_err(&pdev->dev, "MEM I/O resource 0x%lx @ 0x%lx busy\n",
+		       mmio_addr, mmio_len);
+		ret = -EBUSY;
+		goto pci_out;
+	}
+
+	if (request_region(ioaddr, iolen, DRV_NAME) == NULL) {
+		dev_err(&pdev->dev, "I/O resource 0x%lx @ 0x%lx busy\n",
+		       iolen, ioaddr);
+		ret = -EBUSY;
+		goto mem_out;
+	}
+
+	platform_mmio = mmio_addr;
+	platform_mmiolen = mmio_len;
+
+	if (!xen_have_vector_callback) {
+		ret = xen_allocate_irq(pdev);
+		if (ret) {
+			dev_warn(&pdev->dev, "request_irq failed err=%d\n", ret);
+			goto out;
+		}
+		callback_via = get_callback_via(pdev);
+		ret = xen_set_callback_via(callback_via);
+		if (ret) {
+			dev_warn(&pdev->dev, "Unable to set the evtchn callback "
+					 "err=%d\n", ret);
+			goto out;
+		}
+	}
+
+	max_nr_gframes = gnttab_max_grant_frames();
+	xen_hvm_resume_frames = alloc_xen_mmio(PAGE_SIZE * max_nr_gframes);
+	ret = gnttab_init();
+	if (ret)
+		goto out;
+	xenbus_probe(NULL);
+	return 0;
+
+out:
+	release_region(ioaddr, iolen);
+mem_out:
+	release_mem_region(mmio_addr, mmio_len);
+pci_out:
+	pci_disable_device(pdev);
+	return ret;
+}
+
+static struct pci_device_id platform_pci_tbl[] __devinitdata = {
+	{PCI_VENDOR_ID_XEN, PCI_DEVICE_ID_XEN_PLATFORM,
+		PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+	{0,}
+};
+
+MODULE_DEVICE_TABLE(pci, platform_pci_tbl);
+
+static struct pci_driver platform_driver = {
+	.name =           DRV_NAME,
+	.probe =          platform_pci_init,
+	.id_table =       platform_pci_tbl,
+};
+
+static int __init platform_pci_module_init(void)
+{
+	return pci_register_driver(&platform_driver);
+}
+
+module_init(platform_pci_module_init);
diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c
index d96fa75b45e..a9e83c438cb 100644
--- a/drivers/xen/xenbus/xenbus_probe.c
+++ b/drivers/xen/xenbus/xenbus_probe.c
@@ -781,8 +781,23 @@ void xenbus_probe(struct work_struct *unused)
 	/* Notify others that xenstore is up */
 	blocking_notifier_call_chain(&xenstore_chain, 0, NULL);
 }
+EXPORT_SYMBOL_GPL(xenbus_probe);
 
-static int __init xenbus_probe_init(void)
+static int __init xenbus_probe_initcall(void)
+{
+	if (!xen_domain())
+		return -ENODEV;
+
+	if (xen_initial_domain() || xen_hvm_domain())
+		return 0;
+
+	xenbus_probe(NULL);
+	return 0;
+}
+
+device_initcall(xenbus_probe_initcall);
+
+static int __init xenbus_init(void)
 {
 	int err = 0;
 
@@ -834,9 +849,6 @@ static int __init xenbus_probe_init(void)
 		goto out_unreg_back;
 	}
 
-	if (!xen_initial_domain())
-		xenbus_probe(NULL);
-
 #ifdef CONFIG_XEN_COMPAT_XENFS
 	/*
 	 * Create xenfs mountpoint in /proc for compatibility with
@@ -857,7 +869,7 @@ static int __init xenbus_probe_init(void)
 	return err;
 }
 
-postcore_initcall(xenbus_probe_init);
+postcore_initcall(xenbus_init);
 
 MODULE_LICENSE("GPL");
 
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 3bedcc149c8..cca2526f28d 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2772,3 +2772,6 @@
 #define PCI_DEVICE_ID_RME_DIGI32	0x9896
 #define PCI_DEVICE_ID_RME_DIGI32_PRO	0x9897
 #define PCI_DEVICE_ID_RME_DIGI32_8	0x9898
+
+#define PCI_VENDOR_ID_XEN		0x5853
+#define PCI_DEVICE_ID_XEN_PLATFORM	0x0001
diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h
index a40f1cd91be..9a731706a01 100644
--- a/include/xen/grant_table.h
+++ b/include/xen/grant_table.h
@@ -51,6 +51,7 @@ struct gnttab_free_callback {
 	u16 count;
 };
 
+int gnttab_init(void);
 int gnttab_suspend(void);
 int gnttab_resume(void);
 
@@ -112,6 +113,9 @@ int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_gframes,
 void arch_gnttab_unmap_shared(struct grant_entry *shared,
 			      unsigned long nr_gframes);
 
+extern unsigned long xen_hvm_resume_frames;
+unsigned int gnttab_max_grant_frames(void);
+
 #define gnttab_map_vaddr(map) ((void *)(map.host_virt_addr))
 
 #endif /* __ASM_GNTTAB_H__ */
diff --git a/include/xen/interface/grant_table.h b/include/xen/interface/grant_table.h
index 39da93c21de..39e571796e3 100644
--- a/include/xen/interface/grant_table.h
+++ b/include/xen/interface/grant_table.h
@@ -28,6 +28,7 @@
 #ifndef __XEN_PUBLIC_GRANT_TABLE_H__
 #define __XEN_PUBLIC_GRANT_TABLE_H__
 
+#include <xen/interface/xen.h>
 
 /***********************************
  * GRANT TABLE REPRESENTATION
-- 
cgit v1.2.3-70-g09d2


From 016b6f5fe8398b0291cece60b749d7c930a2e09c Mon Sep 17 00:00:00 2001
From: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Date: Fri, 14 May 2010 12:45:07 +0100
Subject: xen: Add suspend/resume support for PV on HVM guests.

Suspend/resume requires few different things on HVM: the suspend
hypercall is different; we don't need to save/restore memory related
settings; except the shared info page and the callback mechanism.

Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/xen/enlighten.c   | 24 ++++++++++++++++++------
 arch/x86/xen/suspend.c     |  6 ++++++
 arch/x86/xen/xen-ops.h     |  1 +
 drivers/xen/manage.c       | 45 +++++++++++++++++++++++++++++++++++++++++----
 drivers/xen/platform-pci.c | 22 +++++++++++++++++++++-
 include/xen/xen-ops.h      |  3 +++
 6 files changed, 90 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index b211a04c4b2..127c95c8d15 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1262,13 +1262,15 @@ static int init_hvm_pv_info(int *major, int *minor)
 	return 0;
 }
 
-static void __init init_shared_info(void)
+void xen_hvm_init_shared_info(void)
 {
+	int cpu;
 	struct xen_add_to_physmap xatp;
-	struct shared_info *shared_info_page;
+	static struct shared_info *shared_info_page = 0;
 
-	shared_info_page = (struct shared_info *)
-		extend_brk(PAGE_SIZE, PAGE_SIZE);
+	if (!shared_info_page)
+		shared_info_page = (struct shared_info *)
+			extend_brk(PAGE_SIZE, PAGE_SIZE);
 	xatp.domid = DOMID_SELF;
 	xatp.idx = 0;
 	xatp.space = XENMAPSPACE_shared_info;
@@ -1278,7 +1280,17 @@ static void __init init_shared_info(void)
 
 	HYPERVISOR_shared_info = (struct shared_info *)shared_info_page;
 
-	per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0];
+	/* xen_vcpu is a pointer to the vcpu_info struct in the shared_info
+	 * page, we use it in the event channel upcall and in some pvclock
+	 * related functions. We don't need the vcpu_info placement
+	 * optimizations because we don't use any pv_mmu or pv_irq op on
+	 * HVM.
+	 * When xen_hvm_init_shared_info is run at boot time only vcpu 0 is
+	 * online but xen_hvm_init_shared_info is run at resume time too and
+	 * in that case multiple vcpus might be online. */
+	for_each_online_cpu(cpu) {
+		per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
+	}
 }
 
 static int __cpuinit xen_hvm_cpu_notify(struct notifier_block *self,
@@ -1308,7 +1320,7 @@ static void __init xen_hvm_guest_init(void)
 	if (r < 0)
 		return;
 
-	init_shared_info();
+	xen_hvm_init_shared_info();
 
 	if (xen_feature(XENFEAT_hvm_callback_vector))
 		xen_have_vector_callback = 1;
diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c
index a9c66110803..d07479c340f 100644
--- a/arch/x86/xen/suspend.c
+++ b/arch/x86/xen/suspend.c
@@ -26,6 +26,12 @@ void xen_pre_suspend(void)
 		BUG();
 }
 
+void xen_hvm_post_suspend(int suspend_cancelled)
+{
+	xen_hvm_init_shared_info();
+	xen_callback_vector();
+}
+
 void xen_post_suspend(int suspend_cancelled)
 {
 	xen_build_mfn_list_list();
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 0d0e0e6a747..01c9dd38652 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -39,6 +39,7 @@ void xen_enable_syscall(void);
 void xen_vcpu_restore(void);
 
 void xen_callback_vector(void);
+void xen_hvm_init_shared_info(void);
 
 void __init xen_build_dynamic_phys_to_machine(void);
 
diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c
index af9c5594d31..1799bd89031 100644
--- a/drivers/xen/manage.c
+++ b/drivers/xen/manage.c
@@ -9,6 +9,7 @@
 #include <linux/stop_machine.h>
 #include <linux/freezer.h>
 
+#include <xen/xen.h>
 #include <xen/xenbus.h>
 #include <xen/grant_table.h>
 #include <xen/events.h>
@@ -17,6 +18,7 @@
 
 #include <asm/xen/hypercall.h>
 #include <asm/xen/page.h>
+#include <asm/xen/hypervisor.h>
 
 enum shutdown_state {
 	SHUTDOWN_INVALID = -1,
@@ -33,10 +35,30 @@ enum shutdown_state {
 static enum shutdown_state shutting_down = SHUTDOWN_INVALID;
 
 #ifdef CONFIG_PM_SLEEP
-static int xen_suspend(void *data)
+static int xen_hvm_suspend(void *data)
 {
+	struct sched_shutdown r = { .reason = SHUTDOWN_suspend };
 	int *cancelled = data;
+
+	BUG_ON(!irqs_disabled());
+
+	*cancelled = HYPERVISOR_sched_op(SCHEDOP_shutdown, &r);
+
+	xen_hvm_post_suspend(*cancelled);
+	gnttab_resume();
+
+	if (!*cancelled) {
+		xen_irq_resume();
+		xen_timer_resume();
+	}
+
+	return 0;
+}
+
+static int xen_suspend(void *data)
+{
 	int err;
+	int *cancelled = data;
 
 	BUG_ON(!irqs_disabled());
 
@@ -106,7 +128,10 @@ static void do_suspend(void)
 		goto out_resume;
 	}
 
-	err = stop_machine(xen_suspend, &cancelled, cpumask_of(0));
+	if (xen_hvm_domain())
+		err = stop_machine(xen_hvm_suspend, &cancelled, cpumask_of(0));
+	else
+		err = stop_machine(xen_suspend, &cancelled, cpumask_of(0));
 
 	dpm_resume_noirq(PMSG_RESUME);
 
@@ -255,7 +280,19 @@ static int shutdown_event(struct notifier_block *notifier,
 	return NOTIFY_DONE;
 }
 
-static int __init setup_shutdown_event(void)
+static int __init __setup_shutdown_event(void)
+{
+	/* Delay initialization in the PV on HVM case */
+	if (xen_hvm_domain())
+		return 0;
+
+	if (!xen_pv_domain())
+		return -ENODEV;
+
+	return xen_setup_shutdown_event();
+}
+
+int xen_setup_shutdown_event(void)
 {
 	static struct notifier_block xenstore_notifier = {
 		.notifier_call = shutdown_event
@@ -266,4 +303,4 @@ static int __init setup_shutdown_event(void)
 }
 EXPORT_SYMBOL_GPL(xen_setup_shutdown_event);
 
-subsys_initcall(setup_shutdown_event);
+subsys_initcall(__setup_shutdown_event);
diff --git a/drivers/xen/platform-pci.c b/drivers/xen/platform-pci.c
index a0ee5d06f71..bdb44f2473e 100644
--- a/drivers/xen/platform-pci.c
+++ b/drivers/xen/platform-pci.c
@@ -31,6 +31,7 @@
 #include <xen/xenbus.h>
 #include <xen/events.h>
 #include <xen/hvm.h>
+#include <xen/xen-ops.h>
 
 #define DRV_NAME    "xen-platform-pci"
 
@@ -41,6 +42,7 @@ MODULE_LICENSE("GPL");
 static unsigned long platform_mmio;
 static unsigned long platform_mmio_alloc;
 static unsigned long platform_mmiolen;
+static uint64_t callback_via;
 
 unsigned long alloc_xen_mmio(unsigned long len)
 {
@@ -85,13 +87,25 @@ static int xen_allocate_irq(struct pci_dev *pdev)
 			"xen-platform-pci", pdev);
 }
 
+static int platform_pci_resume(struct pci_dev *pdev)
+{
+	int err;
+	if (xen_have_vector_callback)
+		return 0;
+	err = xen_set_callback_via(callback_via);
+	if (err) {
+		dev_err(&pdev->dev, "platform_pci_resume failure!\n");
+		return err;
+	}
+	return 0;
+}
+
 static int __devinit platform_pci_init(struct pci_dev *pdev,
 				       const struct pci_device_id *ent)
 {
 	int i, ret;
 	long ioaddr, iolen;
 	long mmio_addr, mmio_len;
-	uint64_t callback_via;
 	unsigned int max_nr_gframes;
 
 	i = pci_enable_device(pdev);
@@ -148,6 +162,9 @@ static int __devinit platform_pci_init(struct pci_dev *pdev,
 	if (ret)
 		goto out;
 	xenbus_probe(NULL);
+	ret = xen_setup_shutdown_event();
+	if (ret)
+		goto out;
 	return 0;
 
 out:
@@ -171,6 +188,9 @@ static struct pci_driver platform_driver = {
 	.name =           DRV_NAME,
 	.probe =          platform_pci_init,
 	.id_table =       platform_pci_tbl,
+#ifdef CONFIG_PM
+	.resume_early =   platform_pci_resume,
+#endif
 };
 
 static int __init platform_pci_module_init(void)
diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h
index 883a21bba24..46bc81ef74c 100644
--- a/include/xen/xen-ops.h
+++ b/include/xen/xen-ops.h
@@ -7,6 +7,7 @@ DECLARE_PER_CPU(struct vcpu_info *, xen_vcpu);
 
 void xen_pre_suspend(void);
 void xen_post_suspend(int suspend_cancelled);
+void xen_hvm_post_suspend(int suspend_cancelled);
 
 void xen_mm_pin_all(void);
 void xen_mm_unpin_all(void);
@@ -14,4 +15,6 @@ void xen_mm_unpin_all(void);
 void xen_timer_resume(void);
 void xen_arch_resume(void);
 
+int xen_setup_shutdown_event(void);
+
 #endif /* INCLUDE_XEN_OPS_H */
-- 
cgit v1.2.3-70-g09d2


From 2f1b7cd29fa4917f19d2624afc773d941684c5df Mon Sep 17 00:00:00 2001
From: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
Date: Thu, 22 Jul 2010 03:22:18 +0900
Subject: nilfs2: clarify byte offset in super block format

This inserts comments indicating hexadecimal offset in declaration of
nilfs_super_block structure so that people can know offset of its
fields without counting from the head.

Signed-off-by: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
---
 include/linux/nilfs2_fs.h | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/linux/nilfs2_fs.h b/include/linux/nilfs2_fs.h
index 8c2c6116e78..cc3465e5be3 100644
--- a/include/linux/nilfs2_fs.h
+++ b/include/linux/nilfs2_fs.h
@@ -160,7 +160,7 @@ struct nilfs_super_root {
  * struct nilfs_super_block - structure of super block on disk
  */
 struct nilfs_super_block {
-	__le32	s_rev_level;		/* Revision level */
+/*00*/	__le32	s_rev_level;		/* Revision level */
 	__le16	s_minor_rev_level;	/* minor revision level */
 	__le16	s_magic;		/* Magic signature */
 
@@ -169,47 +169,47 @@ struct nilfs_super_block {
 					   is excluded. */
 	__le16  s_flags;		/* flags */
 	__le32  s_crc_seed;		/* Seed value of CRC calculation */
-	__le32	s_sum;			/* Check sum of super block */
+/*10*/	__le32	s_sum;			/* Check sum of super block */
 
 	__le32	s_log_block_size;	/* Block size represented as follows
 					   blocksize =
 					       1 << (s_log_block_size + 10) */
 	__le64  s_nsegments;		/* Number of segments in filesystem */
-	__le64  s_dev_size;		/* block device size in bytes */
+/*20*/	__le64  s_dev_size;		/* block device size in bytes */
 	__le64	s_first_data_block;	/* 1st seg disk block number */
-	__le32  s_blocks_per_segment;   /* number of blocks per full segment */
+/*30*/	__le32  s_blocks_per_segment;   /* number of blocks per full segment */
 	__le32	s_r_segments_percentage; /* Reserved segments percentage */
 
 	__le64  s_last_cno;		/* Last checkpoint number */
-	__le64  s_last_pseg;		/* disk block addr pseg written last */
+/*40*/	__le64  s_last_pseg;		/* disk block addr pseg written last */
 	__le64  s_last_seq;             /* seq. number of seg written last */
-	__le64	s_free_blocks_count;	/* Free blocks count */
+/*50*/	__le64	s_free_blocks_count;	/* Free blocks count */
 
 	__le64	s_ctime;		/* Creation time (execution time of
 					   newfs) */
-	__le64	s_mtime;		/* Mount time */
+/*60*/	__le64	s_mtime;		/* Mount time */
 	__le64	s_wtime;		/* Write time */
-	__le16	s_mnt_count;		/* Mount count */
+/*70*/	__le16	s_mnt_count;		/* Mount count */
 	__le16	s_max_mnt_count;	/* Maximal mount count */
 	__le16	s_state;		/* File system state */
 	__le16	s_errors;		/* Behaviour when detecting errors */
 	__le64	s_lastcheck;		/* time of last check */
 
-	__le32	s_checkinterval;	/* max. time between checks */
+/*80*/	__le32	s_checkinterval;	/* max. time between checks */
 	__le32	s_creator_os;		/* OS */
 	__le16	s_def_resuid;		/* Default uid for reserved blocks */
 	__le16	s_def_resgid;		/* Default gid for reserved blocks */
 	__le32	s_first_ino;		/* First non-reserved inode */
 
-	__le16  s_inode_size;		/* Size of an inode */
+/*90*/	__le16  s_inode_size;		/* Size of an inode */
 	__le16  s_dat_entry_size;       /* Size of a dat entry */
 	__le16  s_checkpoint_size;      /* Size of a checkpoint */
 	__le16	s_segment_usage_size;	/* Size of a segment usage */
 
-	__u8	s_uuid[16];		/* 128-bit uuid for volume */
-	char	s_volume_name[80];	/* volume name */
+/*98*/	__u8	s_uuid[16];		/* 128-bit uuid for volume */
+/*A8*/	char	s_volume_name[80];	/* volume name */
 
-	__le32  s_c_interval;           /* Commit interval of segment */
+/*F8*/	__le32  s_c_interval;           /* Commit interval of segment */
 	__le32  s_c_block_max;          /* Threshold of data amount for
 					   the segment construction */
 	__u32	s_reserved[192];	/* padding to the end of the block */
-- 
cgit v1.2.3-70-g09d2


From 1a80a1763fb760b3a84a28df87515f7cdc07a4f4 Mon Sep 17 00:00:00 2001
From: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
Date: Thu, 22 Jul 2010 03:22:19 +0900
Subject: nilfs2: add feature set fields to super block

This adds three new fields to nilfs_super_block structure, compatible
feature set, readonly-compatible feature set, and incompatible feature
set in order to prepare for future disk format modifications.

The role of these fields conforms to those of ext3 or other
filesystems.  Most important flags are the incompatible feature set;
it is used to refuse to mount the filesystem which sets an
incompatible feature the kernel doesn't know about.

Signed-off-by: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
---
 include/linux/nilfs2_fs.h | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/nilfs2_fs.h b/include/linux/nilfs2_fs.h
index cc3465e5be3..7dd4cd49449 100644
--- a/include/linux/nilfs2_fs.h
+++ b/include/linux/nilfs2_fs.h
@@ -212,7 +212,10 @@ struct nilfs_super_block {
 /*F8*/	__le32  s_c_interval;           /* Commit interval of segment */
 	__le32  s_c_block_max;          /* Threshold of data amount for
 					   the segment construction */
-	__u32	s_reserved[192];	/* padding to the end of the block */
+/*100*/	__le64  s_feature_compat;	/* Compatible feature set */
+	__le64  s_feature_compat_ro;	/* Read-only compatible feature set */
+	__le64  s_feature_incompat;	/* Incompatible feature set */
+	__u32	s_reserved[186];	/* padding to the end of the block */
 };
 
 /*
@@ -227,6 +230,16 @@ struct nilfs_super_block {
 #define NILFS_CURRENT_REV	2	/* current major revision */
 #define NILFS_MINOR_REV		0	/* minor revision */
 
+/*
+ * Feature set definitions
+ *
+ * If there is a bit set in the incompatible feature set that the kernel
+ * doesn't know about, it should refuse to mount the filesystem.
+ */
+#define NILFS_FEATURE_COMPAT_SUPP	0ULL
+#define NILFS_FEATURE_COMPAT_RO_SUPP	0ULL
+#define NILFS_FEATURE_INCOMPAT_SUPP	0ULL
+
 /*
  * Bytes count of super_block for CRC-calculation
  */
-- 
cgit v1.2.3-70-g09d2


From da5e37efe8704fc2b354626467f80f73c5e3c020 Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Tue, 13 Jul 2010 11:39:42 +0200
Subject: vmlinux.lds: fix .data..init_task output section (fix popwerpc boot)

The .data..init_task output section was missing
a load offset causing a popwerpc target to fail to boot.

Sean MacLennan tracked it down to the definition of
INIT_TASK_DATA_SECTION().

There are only two users of INIT_TASK_DATA_SECTION()
in the kernel today: cris and popwerpc.
cris do not support relocatable kernels and is thus not
impacted by this change.

Fix INIT_TASK_DATA_SECTION() to specify load offset like
all other output sections.

Reported-by: Sean MacLennan <smaclennan@pikatech.com>
Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 include/asm-generic/vmlinux.lds.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 48c5299cbf2..cdfff74e973 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -435,7 +435,7 @@
  */
 #define INIT_TASK_DATA_SECTION(align)					\
 	. = ALIGN(align);						\
-	.data..init_task : {						\
+	.data..init_task :  AT(ADDR(.data..init_task) - LOAD_OFFSET) {	\
 		INIT_TASK_DATA(align)					\
 	}
 
-- 
cgit v1.2.3-70-g09d2


From 9c3e1c39679144c250dda95098333ecb5f1f407a Mon Sep 17 00:00:00 2001
From: Hannes Eder <heder@google.com>
Date: Fri, 23 Jul 2010 12:42:58 +0200
Subject: netfilter: xt_ipvs (netfilter matcher for IPVS)

This implements the kernel-space side of the netfilter matcher xt_ipvs.

[ minor fixes by Simon Horman <horms@verge.net.au> ]
Signed-off-by: Hannes Eder <heder@google.com>
Signed-off-by: Simon Horman <horms@verge.net.au>
[ Patrick: added xt_ipvs.h to Kbuild ]
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter/Kbuild    |   1 +
 include/linux/netfilter/xt_ipvs.h |  27 ++++++
 net/netfilter/Kconfig             |  10 ++
 net/netfilter/Makefile            |   1 +
 net/netfilter/ipvs/ip_vs_proto.c  |   1 +
 net/netfilter/xt_ipvs.c           | 189 ++++++++++++++++++++++++++++++++++++++
 6 files changed, 229 insertions(+)
 create mode 100644 include/linux/netfilter/xt_ipvs.h
 create mode 100644 net/netfilter/xt_ipvs.c

(limited to 'include')

diff --git a/include/linux/netfilter/Kbuild b/include/linux/netfilter/Kbuild
index b93b64dc9fa..0cb62c85718 100644
--- a/include/linux/netfilter/Kbuild
+++ b/include/linux/netfilter/Kbuild
@@ -31,6 +31,7 @@ header-y += xt_dscp.h
 header-y += xt_esp.h
 header-y += xt_hashlimit.h
 header-y += xt_iprange.h
+header-y += xt_ipvs.h
 header-y += xt_helper.h
 header-y += xt_length.h
 header-y += xt_limit.h
diff --git a/include/linux/netfilter/xt_ipvs.h b/include/linux/netfilter/xt_ipvs.h
new file mode 100644
index 00000000000..1167aeb7a34
--- /dev/null
+++ b/include/linux/netfilter/xt_ipvs.h
@@ -0,0 +1,27 @@
+#ifndef _XT_IPVS_H
+#define _XT_IPVS_H
+
+enum {
+	XT_IPVS_IPVS_PROPERTY =	1 << 0, /* all other options imply this one */
+	XT_IPVS_PROTO =		1 << 1,
+	XT_IPVS_VADDR =		1 << 2,
+	XT_IPVS_VPORT =		1 << 3,
+	XT_IPVS_DIR =		1 << 4,
+	XT_IPVS_METHOD =	1 << 5,
+	XT_IPVS_VPORTCTL =	1 << 6,
+	XT_IPVS_MASK =		(1 << 7) - 1,
+	XT_IPVS_ONCE_MASK =	XT_IPVS_MASK & ~XT_IPVS_IPVS_PROPERTY
+};
+
+struct xt_ipvs_mtinfo {
+	union nf_inet_addr	vaddr, vmask;
+	__be16			vport;
+	__u8			l4proto;
+	__u8			fwd_method;
+	__be16			vportctl;
+
+	__u8			invert;
+	__u8			bitmask;
+};
+
+#endif /* _XT_IPVS_H */
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 5fb8efa84df..551b58419df 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -742,6 +742,16 @@ config NETFILTER_XT_MATCH_IPRANGE
 
 	If unsure, say M.
 
+config NETFILTER_XT_MATCH_IPVS
+	tristate '"ipvs" match support'
+	depends on IP_VS
+	depends on NETFILTER_ADVANCED
+	depends on NF_CONNTRACK
+	help
+	  This option allows you to match against IPVS properties of a packet.
+
+	  If unsure, say N.
+
 config NETFILTER_XT_MATCH_LENGTH
 	tristate '"length" match support'
 	depends on NETFILTER_ADVANCED
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 36ef8e63be1..4366c79a668 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -77,6 +77,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_HASHLIMIT) += xt_hashlimit.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_HELPER) += xt_helper.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_HL) += xt_hl.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_IPRANGE) += xt_iprange.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_IPVS) += xt_ipvs.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_LENGTH) += xt_length.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_LIMIT) += xt_limit.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_MAC) += xt_mac.o
diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c
index 2d3d5e4b35f..027f654799f 100644
--- a/net/netfilter/ipvs/ip_vs_proto.c
+++ b/net/netfilter/ipvs/ip_vs_proto.c
@@ -98,6 +98,7 @@ struct ip_vs_protocol * ip_vs_proto_get(unsigned short proto)
 
 	return NULL;
 }
+EXPORT_SYMBOL(ip_vs_proto_get);
 
 
 /*
diff --git a/net/netfilter/xt_ipvs.c b/net/netfilter/xt_ipvs.c
new file mode 100644
index 00000000000..7a4d66db95a
--- /dev/null
+++ b/net/netfilter/xt_ipvs.c
@@ -0,0 +1,189 @@
+/*
+ *	xt_ipvs - kernel module to match IPVS connection properties
+ *
+ *	Author: Hannes Eder <heder@google.com>
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/spinlock.h>
+#include <linux/skbuff.h>
+#ifdef CONFIG_IP_VS_IPV6
+#include <net/ipv6.h>
+#endif
+#include <linux/ip_vs.h>
+#include <linux/types.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_ipvs.h>
+#include <net/netfilter/nf_conntrack.h>
+
+#include <net/ip_vs.h>
+
+MODULE_AUTHOR("Hannes Eder <heder@google.com>");
+MODULE_DESCRIPTION("Xtables: match IPVS connection properties");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ipt_ipvs");
+MODULE_ALIAS("ip6t_ipvs");
+
+/* borrowed from xt_conntrack */
+static bool ipvs_mt_addrcmp(const union nf_inet_addr *kaddr,
+			    const union nf_inet_addr *uaddr,
+			    const union nf_inet_addr *umask,
+			    unsigned int l3proto)
+{
+	if (l3proto == NFPROTO_IPV4)
+		return ((kaddr->ip ^ uaddr->ip) & umask->ip) == 0;
+#ifdef CONFIG_IP_VS_IPV6
+	else if (l3proto == NFPROTO_IPV6)
+		return ipv6_masked_addr_cmp(&kaddr->in6, &umask->in6,
+		       &uaddr->in6) == 0;
+#endif
+	else
+		return false;
+}
+
+static bool
+ipvs_mt(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct xt_ipvs_mtinfo *data = par->matchinfo;
+	/* ipvs_mt_check ensures that family is only NFPROTO_IPV[46]. */
+	const u_int8_t family = par->family;
+	struct ip_vs_iphdr iph;
+	struct ip_vs_protocol *pp;
+	struct ip_vs_conn *cp;
+	bool match = true;
+
+	if (data->bitmask == XT_IPVS_IPVS_PROPERTY) {
+		match = skb->ipvs_property ^
+			!!(data->invert & XT_IPVS_IPVS_PROPERTY);
+		goto out;
+	}
+
+	/* other flags than XT_IPVS_IPVS_PROPERTY are set */
+	if (!skb->ipvs_property) {
+		match = false;
+		goto out;
+	}
+
+	ip_vs_fill_iphdr(family, skb_network_header(skb), &iph);
+
+	if (data->bitmask & XT_IPVS_PROTO)
+		if ((iph.protocol == data->l4proto) ^
+		    !(data->invert & XT_IPVS_PROTO)) {
+			match = false;
+			goto out;
+		}
+
+	pp = ip_vs_proto_get(iph.protocol);
+	if (unlikely(!pp)) {
+		match = false;
+		goto out;
+	}
+
+	/*
+	 * Check if the packet belongs to an existing entry
+	 */
+	cp = pp->conn_out_get(family, skb, pp, &iph, iph.len, 1 /* inverse */);
+	if (unlikely(cp == NULL)) {
+		match = false;
+		goto out;
+	}
+
+	/*
+	 * We found a connection, i.e. ct != 0, make sure to call
+	 * __ip_vs_conn_put before returning.  In our case jump to out_put_con.
+	 */
+
+	if (data->bitmask & XT_IPVS_VPORT)
+		if ((cp->vport == data->vport) ^
+		    !(data->invert & XT_IPVS_VPORT)) {
+			match = false;
+			goto out_put_cp;
+		}
+
+	if (data->bitmask & XT_IPVS_VPORTCTL)
+		if ((cp->control != NULL &&
+		     cp->control->vport == data->vportctl) ^
+		    !(data->invert & XT_IPVS_VPORTCTL)) {
+			match = false;
+			goto out_put_cp;
+		}
+
+	if (data->bitmask & XT_IPVS_DIR) {
+		enum ip_conntrack_info ctinfo;
+		struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+
+		if (ct == NULL || nf_ct_is_untracked(ct)) {
+			match = false;
+			goto out_put_cp;
+		}
+
+		if ((ctinfo >= IP_CT_IS_REPLY) ^
+		    !!(data->invert & XT_IPVS_DIR)) {
+			match = false;
+			goto out_put_cp;
+		}
+	}
+
+	if (data->bitmask & XT_IPVS_METHOD)
+		if (((cp->flags & IP_VS_CONN_F_FWD_MASK) == data->fwd_method) ^
+		    !(data->invert & XT_IPVS_METHOD)) {
+			match = false;
+			goto out_put_cp;
+		}
+
+	if (data->bitmask & XT_IPVS_VADDR) {
+		if (ipvs_mt_addrcmp(&cp->vaddr, &data->vaddr,
+				    &data->vmask, family) ^
+		    !(data->invert & XT_IPVS_VADDR)) {
+			match = false;
+			goto out_put_cp;
+		}
+	}
+
+out_put_cp:
+	__ip_vs_conn_put(cp);
+out:
+	pr_debug("match=%d\n", match);
+	return match;
+}
+
+static int ipvs_mt_check(const struct xt_mtchk_param *par)
+{
+	if (par->family != NFPROTO_IPV4
+#ifdef CONFIG_IP_VS_IPV6
+	    && par->family != NFPROTO_IPV6
+#endif
+		) {
+		pr_info("protocol family %u not supported\n", par->family);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static struct xt_match xt_ipvs_mt_reg __read_mostly = {
+	.name       = "ipvs",
+	.revision   = 0,
+	.family     = NFPROTO_UNSPEC,
+	.match      = ipvs_mt,
+	.checkentry = ipvs_mt_check,
+	.matchsize  = XT_ALIGN(sizeof(struct xt_ipvs_mtinfo)),
+	.me         = THIS_MODULE,
+};
+
+static int __init ipvs_mt_init(void)
+{
+	return xt_register_match(&xt_ipvs_mt_reg);
+}
+
+static void __exit ipvs_mt_exit(void)
+{
+	xt_unregister_match(&xt_ipvs_mt_reg);
+}
+
+module_init(ipvs_mt_init);
+module_exit(ipvs_mt_exit);
-- 
cgit v1.2.3-70-g09d2


From 7f1c407579519e71a0dcadc05614fd98acec585e Mon Sep 17 00:00:00 2001
From: Hannes Eder <heder@google.com>
Date: Fri, 23 Jul 2010 12:48:52 +0200
Subject: IPVS: make FTP work with full NAT support

Use nf_conntrack/nf_nat code to do the packet mangling and the TCP
sequence adjusting.  The function 'ip_vs_skb_replace' is now dead
code, so it is removed.

To SNAT FTP, use something like:

% iptables -t nat -A POSTROUTING -m ipvs --vaddr 192.168.100.30/32 \
    --vport 21 -j SNAT --to-source 192.168.10.10
and for the data connections in passive mode:

% iptables -t nat -A POSTROUTING -m ipvs --vaddr 192.168.100.30/32 \
    --vportctl 21 -j SNAT --to-source 192.168.10.10
using '-m state --state RELATED' would also works.

Make sure the kernel modules ip_vs_ftp, nf_conntrack_ftp, and
nf_nat_ftp are loaded.

[ up-port and minor fixes by Simon Horman <horms@verge.net.au> ]
Signed-off-by: Hannes Eder <heder@google.com>
Signed-off-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/net/ip_vs.h             |   2 -
 net/netfilter/ipvs/Kconfig      |   2 +-
 net/netfilter/ipvs/ip_vs_app.c  |  43 ----------
 net/netfilter/ipvs/ip_vs_core.c |   1 -
 net/netfilter/ipvs/ip_vs_ftp.c  | 176 +++++++++++++++++++++++++++++++++++++---
 5 files changed, 165 insertions(+), 59 deletions(-)

(limited to 'include')

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index fe82b1e10a2..1f9e51180bd 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -736,8 +736,6 @@ extern void ip_vs_app_inc_put(struct ip_vs_app *inc);
 
 extern int ip_vs_app_pkt_out(struct ip_vs_conn *, struct sk_buff *skb);
 extern int ip_vs_app_pkt_in(struct ip_vs_conn *, struct sk_buff *skb);
-extern int ip_vs_skb_replace(struct sk_buff *skb, gfp_t pri,
-			     char *o_buf, int o_len, char *n_buf, int n_len);
 extern int ip_vs_app_init(void);
 extern void ip_vs_app_cleanup(void);
 
diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig
index 366244492ac..be10f652604 100644
--- a/net/netfilter/ipvs/Kconfig
+++ b/net/netfilter/ipvs/Kconfig
@@ -235,7 +235,7 @@ comment 'IPVS application helper'
 
 config	IP_VS_FTP
   	tristate "FTP protocol helper"
-        depends on IP_VS_PROTO_TCP
+        depends on IP_VS_PROTO_TCP && NF_NAT
 	---help---
 	  FTP is a protocol that transfers IP address and/or port number in
 	  the payload. In the virtual server via Network Address Translation,
diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c
index 1cb0e834f8f..e76f87f4aca 100644
--- a/net/netfilter/ipvs/ip_vs_app.c
+++ b/net/netfilter/ipvs/ip_vs_app.c
@@ -569,49 +569,6 @@ static const struct file_operations ip_vs_app_fops = {
 };
 #endif
 
-
-/*
- *	Replace a segment of data with a new segment
- */
-int ip_vs_skb_replace(struct sk_buff *skb, gfp_t pri,
-		      char *o_buf, int o_len, char *n_buf, int n_len)
-{
-	int diff;
-	int o_offset;
-	int o_left;
-
-	EnterFunction(9);
-
-	diff = n_len - o_len;
-	o_offset = o_buf - (char *)skb->data;
-	/* The length of left data after o_buf+o_len in the skb data */
-	o_left = skb->len - (o_offset + o_len);
-
-	if (diff <= 0) {
-		memmove(o_buf + n_len, o_buf + o_len, o_left);
-		memcpy(o_buf, n_buf, n_len);
-		skb_trim(skb, skb->len + diff);
-	} else if (diff <= skb_tailroom(skb)) {
-		skb_put(skb, diff);
-		memmove(o_buf + n_len, o_buf + o_len, o_left);
-		memcpy(o_buf, n_buf, n_len);
-	} else {
-		if (pskb_expand_head(skb, skb_headroom(skb), diff, pri))
-			return -ENOMEM;
-		skb_put(skb, diff);
-		memmove(skb->data + o_offset + n_len,
-			skb->data + o_offset + o_len, o_left);
-		skb_copy_to_linear_data_offset(skb, o_offset, n_buf, n_len);
-	}
-
-	/* must update the iph total length here */
-	ip_hdr(skb)->tot_len = htons(skb->len);
-
-	LeaveFunction(9);
-	return 0;
-}
-
-
 int __init ip_vs_app_init(void)
 {
 	/* we will replace it with proc_net_ipvs_create() soon */
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 58f82dfc950..4f8ddba4801 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -54,7 +54,6 @@
 
 EXPORT_SYMBOL(register_ip_vs_scheduler);
 EXPORT_SYMBOL(unregister_ip_vs_scheduler);
-EXPORT_SYMBOL(ip_vs_skb_replace);
 EXPORT_SYMBOL(ip_vs_proto_name);
 EXPORT_SYMBOL(ip_vs_conn_new);
 EXPORT_SYMBOL(ip_vs_conn_in_get);
diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
index 2ae747a376a..f228a17ec64 100644
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -20,6 +20,17 @@
  *
  * Author:	Wouter Gadeyne
  *
+ *
+ * Code for ip_vs_expect_related and ip_vs_expect_callback is taken from
+ * http://www.ssi.bg/~ja/nfct/:
+ *
+ * ip_vs_nfct.c:	Netfilter connection tracking support for IPVS
+ *
+ * Portions Copyright (C) 2001-2002
+ * Antefacto Ltd, 181 Parnell St, Dublin 1, Ireland.
+ *
+ * Portions Copyright (C) 2003-2008
+ * Julian Anastasov
  */
 
 #define KMSG_COMPONENT "IPVS"
@@ -32,6 +43,9 @@
 #include <linux/in.h>
 #include <linux/ip.h>
 #include <linux/netfilter.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+#include <net/netfilter/nf_nat_helper.h>
 #include <linux/gfp.h>
 #include <net/protocol.h>
 #include <net/tcp.h>
@@ -43,6 +57,16 @@
 #define SERVER_STRING "227 Entering Passive Mode ("
 #define CLIENT_STRING "PORT "
 
+#define FMT_TUPLE	"%pI4:%u->%pI4:%u/%u"
+#define ARG_TUPLE(T)	&(T)->src.u3.ip, ntohs((T)->src.u.all), \
+			&(T)->dst.u3.ip, ntohs((T)->dst.u.all), \
+			(T)->dst.protonum
+
+#define FMT_CONN	"%pI4:%u->%pI4:%u->%pI4:%u/%u:%u"
+#define ARG_CONN(C)	&((C)->caddr.ip), ntohs((C)->cport), \
+			&((C)->vaddr.ip), ntohs((C)->vport), \
+			&((C)->daddr.ip), ntohs((C)->dport), \
+			(C)->protocol, (C)->state
 
 /*
  * List of ports (up to IP_VS_APP_MAX_PORTS) to be handled by helper
@@ -123,6 +147,119 @@ static int ip_vs_ftp_get_addrport(char *data, char *data_limit,
 	return 1;
 }
 
+/*
+ * Called from init_conntrack() as expectfn handler.
+ */
+static void
+ip_vs_expect_callback(struct nf_conn *ct,
+		      struct nf_conntrack_expect *exp)
+{
+	struct nf_conntrack_tuple *orig, new_reply;
+	struct ip_vs_conn *cp;
+
+	if (exp->tuple.src.l3num != PF_INET)
+		return;
+
+	/*
+	 * We assume that no NF locks are held before this callback.
+	 * ip_vs_conn_out_get and ip_vs_conn_in_get should match their
+	 * expectations even if they use wildcard values, now we provide the
+	 * actual values from the newly created original conntrack direction.
+	 * The conntrack is confirmed when packet reaches IPVS hooks.
+	 */
+
+	/* RS->CLIENT */
+	orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
+	cp = ip_vs_conn_out_get(exp->tuple.src.l3num, orig->dst.protonum,
+				&orig->src.u3, orig->src.u.tcp.port,
+				&orig->dst.u3, orig->dst.u.tcp.port);
+	if (cp) {
+		/* Change reply CLIENT->RS to CLIENT->VS */
+		new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
+		IP_VS_DBG(7, "%s(): ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", "
+			  FMT_TUPLE ", found inout cp=" FMT_CONN "\n",
+			  __func__, ct, ct->status,
+			  ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
+			  ARG_CONN(cp));
+		new_reply.dst.u3 = cp->vaddr;
+		new_reply.dst.u.tcp.port = cp->vport;
+		IP_VS_DBG(7, "%s(): ct=%p, new tuples=" FMT_TUPLE ", " FMT_TUPLE
+			  ", inout cp=" FMT_CONN "\n",
+			  __func__, ct,
+			  ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
+			  ARG_CONN(cp));
+		goto alter;
+	}
+
+	/* CLIENT->VS */
+	cp = ip_vs_conn_in_get(exp->tuple.src.l3num, orig->dst.protonum,
+			       &orig->src.u3, orig->src.u.tcp.port,
+			       &orig->dst.u3, orig->dst.u.tcp.port);
+	if (cp) {
+		/* Change reply VS->CLIENT to RS->CLIENT */
+		new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
+		IP_VS_DBG(7, "%s(): ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", "
+			  FMT_TUPLE ", found outin cp=" FMT_CONN "\n",
+			  __func__, ct, ct->status,
+			  ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
+			  ARG_CONN(cp));
+		new_reply.src.u3 = cp->daddr;
+		new_reply.src.u.tcp.port = cp->dport;
+		IP_VS_DBG(7, "%s(): ct=%p, new tuples=" FMT_TUPLE ", "
+			  FMT_TUPLE ", outin cp=" FMT_CONN "\n",
+			  __func__, ct,
+			  ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
+			  ARG_CONN(cp));
+		goto alter;
+	}
+
+	IP_VS_DBG(7, "%s(): ct=%p, status=0x%lX, tuple=" FMT_TUPLE
+		  " - unknown expect\n",
+		  __func__, ct, ct->status, ARG_TUPLE(orig));
+	return;
+
+alter:
+	/* Never alter conntrack for non-NAT conns */
+	if (IP_VS_FWD_METHOD(cp) == IP_VS_CONN_F_MASQ)
+		nf_conntrack_alter_reply(ct, &new_reply);
+	ip_vs_conn_put(cp);
+	return;
+}
+
+/*
+ * Create NF conntrack expectation with wildcard (optional) source port.
+ * Then the default callback function will alter the reply and will confirm
+ * the conntrack entry when the first packet comes.
+ */
+static void
+ip_vs_expect_related(struct sk_buff *skb, struct nf_conn *ct,
+		     struct ip_vs_conn *cp, u_int8_t proto,
+		     const __be16 *port, int from_rs)
+{
+	struct nf_conntrack_expect *exp;
+
+	BUG_ON(!ct || ct == &nf_conntrack_untracked);
+
+	exp = nf_ct_expect_alloc(ct);
+	if (!exp)
+		return;
+
+	if (from_rs)
+		nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT,
+				  nf_ct_l3num(ct), &cp->daddr, &cp->caddr,
+				  proto, port, &cp->cport);
+	else
+		nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT,
+				  nf_ct_l3num(ct), &cp->caddr, &cp->vaddr,
+				  proto, port, &cp->vport);
+
+	exp->expectfn = ip_vs_expect_callback;
+
+	IP_VS_DBG(7, "%s(): ct=%p, expect tuple=" FMT_TUPLE "\n",
+		  __func__, ct, ARG_TUPLE(&exp->tuple));
+	nf_ct_expect_related(exp);
+	nf_ct_expect_put(exp);
+}
 
 /*
  * Look at outgoing ftp packets to catch the response to a PASV command
@@ -149,7 +286,9 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
 	struct ip_vs_conn *n_cp;
 	char buf[24];		/* xxx.xxx.xxx.xxx,ppp,ppp\000 */
 	unsigned buf_len;
-	int ret;
+	int ret = 0;
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct;
 
 #ifdef CONFIG_IP_VS_IPV6
 	/* This application helper doesn't work with IPv6 yet,
@@ -219,19 +358,26 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
 
 		buf_len = strlen(buf);
 
+		ct = nf_ct_get(skb, &ctinfo);
+		if (ct && !nf_ct_is_untracked(ct)) {
+			/* If mangling fails this function will return 0
+			 * which will cause the packet to be dropped.
+			 * Mangling can only fail under memory pressure,
+			 * hopefully it will succeed on the retransmitted
+			 * packet.
+			 */
+			ret = nf_nat_mangle_tcp_packet(skb, ct, ctinfo,
+						       start-data, end-start,
+						       buf, buf_len);
+			if (ret)
+				ip_vs_expect_related(skb, ct, n_cp,
+						     IPPROTO_TCP, NULL, 0);
+		}
+
 		/*
-		 * Calculate required delta-offset to keep TCP happy
+		 * Not setting 'diff' is intentional, otherwise the sequence
+		 * would be adjusted twice.
 		 */
-		*diff = buf_len - (end-start);
-
-		if (*diff == 0) {
-			/* simply replace it with new passive address */
-			memcpy(start, buf, buf_len);
-			ret = 1;
-		} else {
-			ret = !ip_vs_skb_replace(skb, GFP_ATOMIC, start,
-					  end-start, buf, buf_len);
-		}
 
 		cp->app_data = NULL;
 		ip_vs_tcp_conn_listen(n_cp);
@@ -263,6 +409,7 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
 	union nf_inet_addr to;
 	__be16 port;
 	struct ip_vs_conn *n_cp;
+	struct nf_conn *ct;
 
 #ifdef CONFIG_IP_VS_IPV6
 	/* This application helper doesn't work with IPv6 yet,
@@ -349,6 +496,11 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
 		ip_vs_control_add(n_cp, cp);
 	}
 
+	ct = (struct nf_conn *)skb->nfct;
+	if (ct && ct != &nf_conntrack_untracked)
+		ip_vs_expect_related(skb, ct, n_cp,
+				     IPPROTO_TCP, &n_cp->dport, 1);
+
 	/*
 	 *	Move tunnel to listen state
 	 */
-- 
cgit v1.2.3-70-g09d2


From 43d2932d88e4ab776dd388c20b003ebd5e1d1f1f Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 21 Jul 2010 14:22:21 +0200
Subject: quota: Use mark_inode_dirty_sync instead of mark_inode_dirty

Quota code never touches file data. It just modifies i_blocks + i_bytes
of inodes and inode flags of quota files. So use mark_inode_dirty_sync
instead of mark_inode_dirty.

Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/quota/dquot.c         |  2 +-
 include/linux/quotaops.h | 10 +++++-----
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index b171221000f..a7023bcfae4 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -1992,7 +1992,7 @@ int dquot_disable(struct super_block *sb, int type, unsigned int flags)
 				truncate_inode_pages(&toputinode[cnt]->i_data,
 						     0);
 				mutex_unlock(&toputinode[cnt]->i_mutex);
-				mark_inode_dirty(toputinode[cnt]);
+				mark_inode_dirty_sync(toputinode[cnt]);
 			}
 			mutex_unlock(&dqopt->dqonoff_mutex);
 		}
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index 4881b49b1a9..d50ba858cfe 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -266,7 +266,7 @@ static inline int dquot_alloc_space_nodirty(struct inode *inode, qsize_t nr)
 static inline void dquot_alloc_space_nofail(struct inode *inode, qsize_t nr)
 {
 	__dquot_alloc_space(inode, nr, DQUOT_SPACE_WARN|DQUOT_SPACE_NOFAIL);
-	mark_inode_dirty(inode);
+	mark_inode_dirty_sync(inode);
 }
 
 static inline int dquot_alloc_space(struct inode *inode, qsize_t nr)
@@ -275,7 +275,7 @@ static inline int dquot_alloc_space(struct inode *inode, qsize_t nr)
 
 	ret = dquot_alloc_space_nodirty(inode, nr);
 	if (!ret)
-		mark_inode_dirty(inode);
+		mark_inode_dirty_sync(inode);
 	return ret;
 }
 
@@ -305,7 +305,7 @@ static inline int dquot_prealloc_block(struct inode *inode, qsize_t nr)
 
 	ret = dquot_prealloc_block_nodirty(inode, nr);
 	if (!ret)
-		mark_inode_dirty(inode);
+		mark_inode_dirty_sync(inode);
 	return ret;
 }
 
@@ -321,7 +321,7 @@ static inline int dquot_claim_block(struct inode *inode, qsize_t nr)
 
 	ret = dquot_claim_space_nodirty(inode, nr << inode->i_blkbits);
 	if (!ret)
-		mark_inode_dirty(inode);
+		mark_inode_dirty_sync(inode);
 	return ret;
 }
 
@@ -333,7 +333,7 @@ static inline void dquot_free_space_nodirty(struct inode *inode, qsize_t nr)
 static inline void dquot_free_space(struct inode *inode, qsize_t nr)
 {
 	dquot_free_space_nodirty(inode, nr);
-	mark_inode_dirty(inode);
+	mark_inode_dirty_sync(inode);
 }
 
 static inline void dquot_free_block_nodirty(struct inode *inode, qsize_t nr)
-- 
cgit v1.2.3-70-g09d2


From e8648a1fdb54da1f683784b36a17aa65ea56e931 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 23 Jul 2010 12:59:36 +0200
Subject: netfilter: add xt_cpu match

In some situations a CPU match permits a better spreading of
connections, or select targets only for a given cpu.

With Remote Packet Steering or multiqueue NIC and appropriate IRQ
affinities, we can distribute trafic on available cpus, per session.
(all RX packets for a given flow is handled by a given cpu)

Some legacy applications being not SMP friendly, one way to scale a
server is to run multiple copies of them.

Instead of randomly choosing an instance, we can use the cpu number as a
key so that softirq handler for a whole instance is running on a single
cpu, maximizing cache effects in TCP/UDP stacks.

Using NAT for example, a four ways machine might run four copies of
server application, using a separate listening port for each instance,
but still presenting an unique external port :

iptables -t nat -A PREROUTING -p tcp --dport 80 -m cpu --cpu 0 \
        -j REDIRECT --to-port 8080

iptables -t nat -A PREROUTING -p tcp --dport 80 -m cpu --cpu 1 \
        -j REDIRECT --to-port 8081

iptables -t nat -A PREROUTING -p tcp --dport 80 -m cpu --cpu 2 \
        -j REDIRECT --to-port 8082

iptables -t nat -A PREROUTING -p tcp --dport 80 -m cpu --cpu 3 \
        -j REDIRECT --to-port 8083

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter/Kbuild   |  3 +-
 include/linux/netfilter/xt_cpu.h | 11 +++++++
 net/netfilter/Kconfig            |  9 ++++++
 net/netfilter/Makefile           |  1 +
 net/netfilter/xt_cpu.c           | 63 ++++++++++++++++++++++++++++++++++++++++
 5 files changed, 86 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/netfilter/xt_cpu.h
 create mode 100644 net/netfilter/xt_cpu.c

(limited to 'include')

diff --git a/include/linux/netfilter/Kbuild b/include/linux/netfilter/Kbuild
index 0cb62c85718..edeeabdc150 100644
--- a/include/linux/netfilter/Kbuild
+++ b/include/linux/netfilter/Kbuild
@@ -20,12 +20,13 @@ header-y += xt_TCPMSS.h
 header-y += xt_TCPOPTSTRIP.h
 header-y += xt_TEE.h
 header-y += xt_TPROXY.h
+header-y += xt_cluster.h
 header-y += xt_comment.h
 header-y += xt_connbytes.h
 header-y += xt_connlimit.h
 header-y += xt_connmark.h
 header-y += xt_conntrack.h
-header-y += xt_cluster.h
+header-y += xt_cpu.h
 header-y += xt_dccp.h
 header-y += xt_dscp.h
 header-y += xt_esp.h
diff --git a/include/linux/netfilter/xt_cpu.h b/include/linux/netfilter/xt_cpu.h
new file mode 100644
index 00000000000..93c7f11d8f4
--- /dev/null
+++ b/include/linux/netfilter/xt_cpu.h
@@ -0,0 +1,11 @@
+#ifndef _XT_CPU_H
+#define _XT_CPU_H
+
+#include <linux/types.h>
+
+struct xt_cpu_info {
+	__u32	cpu;
+	__u32	invert;
+};
+
+#endif /*_XT_CPU_H*/
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 551b58419df..43288259f4a 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -663,6 +663,15 @@ config NETFILTER_XT_MATCH_CONNTRACK
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
+config NETFILTER_XT_MATCH_CPU
+	tristate '"cpu" match support'
+	depends on NETFILTER_ADVANCED
+	help
+	  CPU matching allows you to match packets based on the CPU
+	  currently handling the packet.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
 config NETFILTER_XT_MATCH_DCCP
 	tristate '"dccp" protocol match support'
 	depends on NETFILTER_ADVANCED
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 4366c79a668..441050f3111 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -70,6 +70,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_COMMENT) += xt_comment.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_CONNBYTES) += xt_connbytes.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_CONNLIMIT) += xt_connlimit.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_CONNTRACK) += xt_conntrack.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_CPU) += xt_cpu.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_DCCP) += xt_dccp.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_DSCP) += xt_dscp.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_ESP) += xt_esp.o
diff --git a/net/netfilter/xt_cpu.c b/net/netfilter/xt_cpu.c
new file mode 100644
index 00000000000..b39db8a5cba
--- /dev/null
+++ b/net/netfilter/xt_cpu.c
@@ -0,0 +1,63 @@
+/* Kernel module to match running CPU */
+
+/*
+ * Might be used to distribute connections on several daemons, if
+ * RPS (Remote Packet Steering) is enabled or NIC is multiqueue capable,
+ * each RX queue IRQ affined to one CPU (1:1 mapping)
+ *
+ */
+
+/* (C) 2010 Eric Dumazet
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/netfilter/xt_cpu.h>
+#include <linux/netfilter/x_tables.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Eric Dumazet <eric.dumazet@gmail.com>");
+MODULE_DESCRIPTION("Xtables: CPU match");
+
+static int cpu_mt_check(const struct xt_mtchk_param *par)
+{
+	const struct xt_cpu_info *info = par->matchinfo;
+
+	if (info->invert & ~1)
+		return -EINVAL;
+	return 0;
+}
+
+static bool cpu_mt(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct xt_cpu_info *info = par->matchinfo;
+
+	return (info->cpu == smp_processor_id()) ^ info->invert;
+}
+
+static struct xt_match cpu_mt_reg __read_mostly = {
+	.name       = "cpu",
+	.revision   = 0,
+	.family     = NFPROTO_UNSPEC,
+	.checkentry = cpu_mt_check,
+	.match      = cpu_mt,
+	.matchsize  = sizeof(struct xt_cpu_info),
+	.me         = THIS_MODULE,
+};
+
+static int __init cpu_mt_init(void)
+{
+	return xt_register_match(&cpu_mt_reg);
+}
+
+static void __exit cpu_mt_exit(void)
+{
+	xt_unregister_match(&cpu_mt_reg);
+}
+
+module_init(cpu_mt_init);
+module_exit(cpu_mt_exit);
-- 
cgit v1.2.3-70-g09d2


From 181a51f6e040d0ac006d6adaf4a031ffa440f41c Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 20 Jul 2010 22:09:02 +0200
Subject: slow-work: kill it

slow-work doesn't have any user left.  Kill it.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: David Howells <dhowells@redhat.com>
---
 Documentation/slow-work.txt |  322 -------------
 include/linux/slow-work.h   |  163 -------
 init/Kconfig                |   24 -
 kernel/Makefile             |    2 -
 kernel/slow-work-debugfs.c  |  227 ---------
 kernel/slow-work.c          | 1068 -------------------------------------------
 kernel/slow-work.h          |   72 ---
 kernel/sysctl.c             |    8 -
 8 files changed, 1886 deletions(-)
 delete mode 100644 Documentation/slow-work.txt
 delete mode 100644 include/linux/slow-work.h
 delete mode 100644 kernel/slow-work-debugfs.c
 delete mode 100644 kernel/slow-work.c
 delete mode 100644 kernel/slow-work.h

(limited to 'include')

diff --git a/Documentation/slow-work.txt b/Documentation/slow-work.txt
deleted file mode 100644
index 9dbf4470c7e..00000000000
--- a/Documentation/slow-work.txt
+++ /dev/null
@@ -1,322 +0,0 @@
-		     ====================================
-		     SLOW WORK ITEM EXECUTION THREAD POOL
-		     ====================================
-
-By: David Howells <dhowells@redhat.com>
-
-The slow work item execution thread pool is a pool of threads for performing
-things that take a relatively long time, such as making mkdir calls.
-Typically, when processing something, these items will spend a lot of time
-blocking a thread on I/O, thus making that thread unavailable for doing other
-work.
-
-The standard workqueue model is unsuitable for this class of work item as that
-limits the owner to a single thread or a single thread per CPU.  For some
-tasks, however, more threads - or fewer - are required.
-
-There is just one pool per system.  It contains no threads unless something
-wants to use it - and that something must register its interest first.  When
-the pool is active, the number of threads it contains is dynamic, varying
-between a maximum and minimum setting, depending on the load.
-
-
-====================
-CLASSES OF WORK ITEM
-====================
-
-This pool support two classes of work items:
-
- (*) Slow work items.
-
- (*) Very slow work items.
-
-The former are expected to finish much quicker than the latter.
-
-An operation of the very slow class may do a batch combination of several
-lookups, mkdirs, and a create for instance.
-
-An operation of the ordinarily slow class may, for example, write stuff or
-expand files, provided the time taken to do so isn't too long.
-
-Operations of both types may sleep during execution, thus tying up the thread
-loaned to it.
-
-A further class of work item is available, based on the slow work item class:
-
- (*) Delayed slow work items.
-
-These are slow work items that have a timer to defer queueing of the item for
-a while.
-
-
-THREAD-TO-CLASS ALLOCATION
---------------------------
-
-Not all the threads in the pool are available to work on very slow work items.
-The number will be between one and one fewer than the number of active threads.
-This is configurable (see the "Pool Configuration" section).
-
-All the threads are available to work on ordinarily slow work items, but a
-percentage of the threads will prefer to work on very slow work items.
-
-The configuration ensures that at least one thread will be available to work on
-very slow work items, and at least one thread will be available that won't work
-on very slow work items at all.
-
-
-=====================
-USING SLOW WORK ITEMS
-=====================
-
-Firstly, a module or subsystem wanting to make use of slow work items must
-register its interest:
-
-	 int ret = slow_work_register_user(struct module *module);
-
-This will return 0 if successful, or a -ve error upon failure.  The module
-pointer should be the module interested in using this facility (almost
-certainly THIS_MODULE).
-
-
-Slow work items may then be set up by:
-
- (1) Declaring a slow_work struct type variable:
-
-	#include <linux/slow-work.h>
-
-	struct slow_work myitem;
-
- (2) Declaring the operations to be used for this item:
-
-	struct slow_work_ops myitem_ops = {
-		.get_ref = myitem_get_ref,
-		.put_ref = myitem_put_ref,
-		.execute = myitem_execute,
-	};
-
-     [*] For a description of the ops, see section "Item Operations".
-
- (3) Initialising the item:
-
-	slow_work_init(&myitem, &myitem_ops);
-
-     or:
-
-	delayed_slow_work_init(&myitem, &myitem_ops);
-
-     or:
-
-	vslow_work_init(&myitem, &myitem_ops);
-
-     depending on its class.
-
-A suitably set up work item can then be enqueued for processing:
-
-	int ret = slow_work_enqueue(&myitem);
-
-This will return a -ve error if the thread pool is unable to gain a reference
-on the item, 0 otherwise, or (for delayed work):
-
-	int ret = delayed_slow_work_enqueue(&myitem, my_jiffy_delay);
-
-
-The items are reference counted, so there ought to be no need for a flush
-operation.  But as the reference counting is optional, means to cancel
-existing work items are also included:
-
-	cancel_slow_work(&myitem);
-	cancel_delayed_slow_work(&myitem);
-
-can be used to cancel pending work.  The above cancel function waits for
-existing work to have been executed (or prevent execution of them, depending
-on timing).
-
-
-When all a module's slow work items have been processed, and the
-module has no further interest in the facility, it should unregister its
-interest:
-
-	slow_work_unregister_user(struct module *module);
-
-The module pointer is used to wait for all outstanding work items for that
-module before completing the unregistration.  This prevents the put_ref() code
-from being taken away before it completes.  module should almost certainly be
-THIS_MODULE.
-
-
-================
-HELPER FUNCTIONS
-================
-
-The slow-work facility provides a function by which it can be determined
-whether or not an item is queued for later execution:
-
-	bool queued = slow_work_is_queued(struct slow_work *work);
-
-If it returns false, then the item is not on the queue (it may be executing
-with a requeue pending).  This can be used to work out whether an item on which
-another depends is on the queue, thus allowing a dependent item to be queued
-after it.
-
-If the above shows an item on which another depends not to be queued, then the
-owner of the dependent item might need to wait.  However, to avoid locking up
-the threads unnecessarily be sleeping in them, it can make sense under some
-circumstances to return the work item to the queue, thus deferring it until
-some other items have had a chance to make use of the yielded thread.
-
-To yield a thread and defer an item, the work function should simply enqueue
-the work item again and return.  However, this doesn't work if there's nothing
-actually on the queue, as the thread just vacated will jump straight back into
-the item's work function, thus busy waiting on a CPU.
-
-Instead, the item should use the thread to wait for the dependency to go away,
-but rather than using schedule() or schedule_timeout() to sleep, it should use
-the following function:
-
-	bool requeue = slow_work_sleep_till_thread_needed(
-			struct slow_work *work,
-			signed long *_timeout);
-
-This will add a second wait and then sleep, such that it will be woken up if
-either something appears on the queue that could usefully make use of the
-thread - and behind which this item can be queued, or if the event the caller
-set up to wait for happens.  True will be returned if something else appeared
-on the queue and this work function should perhaps return, of false if
-something else woke it up.  The timeout is as for schedule_timeout().
-
-For example:
-
-	wq = bit_waitqueue(&my_flags, MY_BIT);
-	init_wait(&wait);
-	requeue = false;
-	do {
-		prepare_to_wait(wq, &wait, TASK_UNINTERRUPTIBLE);
-		if (!test_bit(MY_BIT, &my_flags))
-			break;
-		requeue = slow_work_sleep_till_thread_needed(&my_work,
-							     &timeout);
-	} while (timeout > 0 && !requeue);
-	finish_wait(wq, &wait);
-	if (!test_bit(MY_BIT, &my_flags)
-		goto do_my_thing;
-	if (requeue)
-		return; // to slow_work
-
-
-===============
-ITEM OPERATIONS
-===============
-
-Each work item requires a table of operations of type struct slow_work_ops.
-Only ->execute() is required; the getting and putting of a reference and the
-describing of an item are all optional.
-
- (*) Get a reference on an item:
-
-	int (*get_ref)(struct slow_work *work);
-
-     This allows the thread pool to attempt to pin an item by getting a
-     reference on it.  This function should return 0 if the reference was
-     granted, or a -ve error otherwise.  If an error is returned,
-     slow_work_enqueue() will fail.
-
-     The reference is held whilst the item is queued and whilst it is being
-     executed.  The item may then be requeued with the same reference held, or
-     the reference will be released.
-
- (*) Release a reference on an item:
-
-	void (*put_ref)(struct slow_work *work);
-
-     This allows the thread pool to unpin an item by releasing the reference on
-     it.  The thread pool will not touch the item again once this has been
-     called.
-
- (*) Execute an item:
-
-	void (*execute)(struct slow_work *work);
-
-     This should perform the work required of the item.  It may sleep, it may
-     perform disk I/O and it may wait for locks.
-
- (*) View an item through /proc:
-
-	void (*desc)(struct slow_work *work, struct seq_file *m);
-
-     If supplied, this should print to 'm' a small string describing the work
-     the item is to do.  This should be no more than about 40 characters, and
-     shouldn't include a newline character.
-
-     See the 'Viewing executing and queued items' section below.
-
-
-==================
-POOL CONFIGURATION
-==================
-
-The slow-work thread pool has a number of configurables:
-
- (*) /proc/sys/kernel/slow-work/min-threads
-
-     The minimum number of threads that should be in the pool whilst it is in
-     use.  This may be anywhere between 2 and max-threads.
-
- (*) /proc/sys/kernel/slow-work/max-threads
-
-     The maximum number of threads that should in the pool.  This may be
-     anywhere between min-threads and 255 or NR_CPUS * 2, whichever is greater.
-
- (*) /proc/sys/kernel/slow-work/vslow-percentage
-
-     The percentage of active threads in the pool that may be used to execute
-     very slow work items.  This may be between 1 and 99.  The resultant number
-     is bounded to between 1 and one fewer than the number of active threads.
-     This ensures there is always at least one thread that can process very
-     slow work items, and always at least one thread that won't.
-
-
-==================================
-VIEWING EXECUTING AND QUEUED ITEMS
-==================================
-
-If CONFIG_SLOW_WORK_DEBUG is enabled, a debugfs file is made available:
-
-	/sys/kernel/debug/slow_work/runqueue
-
-through which the list of work items being executed and the queues of items to
-be executed may be viewed.  The owner of a work item is given the chance to
-add some information of its own.
-
-The contents look something like the following:
-
-    THR PID   ITEM ADDR        FL MARK  DESC
-    === ===== ================ == ===== ==========
-      0  3005 ffff880023f52348  a 952ms FSC: OBJ17d3: LOOK
-      1  3006 ffff880024e33668  2 160ms FSC: OBJ17e5 OP60d3b: Write1/Store fl=2
-      2  3165 ffff8800296dd180  a 424ms FSC: OBJ17e4: LOOK
-      3  4089 ffff8800262c8d78  a 212ms FSC: OBJ17ea: CRTN
-      4  4090 ffff88002792bed8  2 388ms FSC: OBJ17e8 OP60d36: Write1/Store fl=2
-      5  4092 ffff88002a0ef308  2 388ms FSC: OBJ17e7 OP60d2e: Write1/Store fl=2
-      6  4094 ffff88002abaf4b8  2 132ms FSC: OBJ17e2 OP60d4e: Write1/Store fl=2
-      7  4095 ffff88002bb188e0  a 388ms FSC: OBJ17e9: CRTN
-    vsq     - ffff880023d99668  1 308ms FSC: OBJ17e0 OP60f91: Write1/EnQ fl=2
-    vsq     - ffff8800295d1740  1 212ms FSC: OBJ16be OP4d4b6: Write1/EnQ fl=2
-    vsq     - ffff880025ba3308  1 160ms FSC: OBJ179a OP58dec: Write1/EnQ fl=2
-    vsq     - ffff880024ec83e0  1 160ms FSC: OBJ17ae OP599f2: Write1/EnQ fl=2
-    vsq     - ffff880026618e00  1 160ms FSC: OBJ17e6 OP60d33: Write1/EnQ fl=2
-    vsq     - ffff880025a2a4b8  1 132ms FSC: OBJ16a2 OP4d583: Write1/EnQ fl=2
-    vsq     - ffff880023cbe6d8  9 212ms FSC: OBJ17eb: LOOK
-    vsq     - ffff880024d37590  9 212ms FSC: OBJ17ec: LOOK
-    vsq     - ffff880027746cb0  9 212ms FSC: OBJ17ed: LOOK
-    vsq     - ffff880024d37ae8  9 212ms FSC: OBJ17ee: LOOK
-    vsq     - ffff880024d37cb0  9 212ms FSC: OBJ17ef: LOOK
-    vsq     - ffff880025036550  9 212ms FSC: OBJ17f0: LOOK
-    vsq     - ffff8800250368e0  9 212ms FSC: OBJ17f1: LOOK
-    vsq     - ffff880025036aa8  9 212ms FSC: OBJ17f2: LOOK
-
-In the 'THR' column, executing items show the thread they're occupying and
-queued threads indicate which queue they're on.  'PID' shows the process ID of
-a slow-work thread that's executing something.  'FL' shows the work item flags.
-'MARK' indicates how long since an item was queued or began executing.  Lastly,
-the 'DESC' column permits the owner of an item to give some information.
-
diff --git a/include/linux/slow-work.h b/include/linux/slow-work.h
deleted file mode 100644
index 13337bf6c3f..00000000000
--- a/include/linux/slow-work.h
+++ /dev/null
@@ -1,163 +0,0 @@
-/* Worker thread pool for slow items, such as filesystem lookups or mkdirs
- *
- * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public Licence
- * as published by the Free Software Foundation; either version
- * 2 of the Licence, or (at your option) any later version.
- *
- * See Documentation/slow-work.txt
- */
-
-#ifndef _LINUX_SLOW_WORK_H
-#define _LINUX_SLOW_WORK_H
-
-#ifdef CONFIG_SLOW_WORK
-
-#include <linux/sysctl.h>
-#include <linux/timer.h>
-
-struct slow_work;
-#ifdef CONFIG_SLOW_WORK_DEBUG
-struct seq_file;
-#endif
-
-/*
- * The operations used to support slow work items
- */
-struct slow_work_ops {
-	/* owner */
-	struct module *owner;
-
-	/* get a ref on a work item
-	 * - return 0 if successful, -ve if not
-	 */
-	int (*get_ref)(struct slow_work *work);
-
-	/* discard a ref to a work item */
-	void (*put_ref)(struct slow_work *work);
-
-	/* execute a work item */
-	void (*execute)(struct slow_work *work);
-
-#ifdef CONFIG_SLOW_WORK_DEBUG
-	/* describe a work item for debugfs */
-	void (*desc)(struct slow_work *work, struct seq_file *m);
-#endif
-};
-
-/*
- * A slow work item
- * - A reference is held on the parent object by the thread pool when it is
- *   queued
- */
-struct slow_work {
-	struct module		*owner;	/* the owning module */
-	unsigned long		flags;
-#define SLOW_WORK_PENDING	0	/* item pending (further) execution */
-#define SLOW_WORK_EXECUTING	1	/* item currently executing */
-#define SLOW_WORK_ENQ_DEFERRED	2	/* item enqueue deferred */
-#define SLOW_WORK_VERY_SLOW	3	/* item is very slow */
-#define SLOW_WORK_CANCELLING	4	/* item is being cancelled, don't enqueue */
-#define SLOW_WORK_DELAYED	5	/* item is struct delayed_slow_work with active timer */
-	const struct slow_work_ops *ops; /* operations table for this item */
-	struct list_head	link;	/* link in queue */
-#ifdef CONFIG_SLOW_WORK_DEBUG
-	struct timespec		mark;	/* jiffies at which queued or exec begun */
-#endif
-};
-
-struct delayed_slow_work {
-	struct slow_work	work;
-	struct timer_list	timer;
-};
-
-/**
- * slow_work_init - Initialise a slow work item
- * @work: The work item to initialise
- * @ops: The operations to use to handle the slow work item
- *
- * Initialise a slow work item.
- */
-static inline void slow_work_init(struct slow_work *work,
-				  const struct slow_work_ops *ops)
-{
-	work->flags = 0;
-	work->ops = ops;
-	INIT_LIST_HEAD(&work->link);
-}
-
-/**
- * slow_work_init - Initialise a delayed slow work item
- * @work: The work item to initialise
- * @ops: The operations to use to handle the slow work item
- *
- * Initialise a delayed slow work item.
- */
-static inline void delayed_slow_work_init(struct delayed_slow_work *dwork,
-					  const struct slow_work_ops *ops)
-{
-	init_timer(&dwork->timer);
-	slow_work_init(&dwork->work, ops);
-}
-
-/**
- * vslow_work_init - Initialise a very slow work item
- * @work: The work item to initialise
- * @ops: The operations to use to handle the slow work item
- *
- * Initialise a very slow work item.  This item will be restricted such that
- * only a certain number of the pool threads will be able to execute items of
- * this type.
- */
-static inline void vslow_work_init(struct slow_work *work,
-				   const struct slow_work_ops *ops)
-{
-	work->flags = 1 << SLOW_WORK_VERY_SLOW;
-	work->ops = ops;
-	INIT_LIST_HEAD(&work->link);
-}
-
-/**
- * slow_work_is_queued - Determine if a slow work item is on the work queue
- * work: The work item to test
- *
- * Determine if the specified slow-work item is on the work queue.  This
- * returns true if it is actually on the queue.
- *
- * If the item is executing and has been marked for requeue when execution
- * finishes, then false will be returned.
- *
- * Anyone wishing to wait for completion of execution can wait on the
- * SLOW_WORK_EXECUTING bit.
- */
-static inline bool slow_work_is_queued(struct slow_work *work)
-{
-	unsigned long flags = work->flags;
-	return flags & SLOW_WORK_PENDING && !(flags & SLOW_WORK_EXECUTING);
-}
-
-extern int slow_work_enqueue(struct slow_work *work);
-extern void slow_work_cancel(struct slow_work *work);
-extern int slow_work_register_user(struct module *owner);
-extern void slow_work_unregister_user(struct module *owner);
-
-extern int delayed_slow_work_enqueue(struct delayed_slow_work *dwork,
-				     unsigned long delay);
-
-static inline void delayed_slow_work_cancel(struct delayed_slow_work *dwork)
-{
-	slow_work_cancel(&dwork->work);
-}
-
-extern bool slow_work_sleep_till_thread_needed(struct slow_work *work,
-					       signed long *_timeout);
-
-#ifdef CONFIG_SYSCTL
-extern ctl_table slow_work_sysctls[];
-#endif
-
-#endif /* CONFIG_SLOW_WORK */
-#endif /* _LINUX_SLOW_WORK_H */
diff --git a/init/Kconfig b/init/Kconfig
index 5cff9a980c3..cb64c5889e0 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1143,30 +1143,6 @@ config TRACEPOINTS
 
 source "arch/Kconfig"
 
-config SLOW_WORK
-	default n
-	bool
-	help
-	  The slow work thread pool provides a number of dynamically allocated
-	  threads that can be used by the kernel to perform operations that
-	  take a relatively long time.
-
-	  An example of this would be CacheFiles doing a path lookup followed
-	  by a series of mkdirs and a create call, all of which have to touch
-	  disk.
-
-	  See Documentation/slow-work.txt.
-
-config SLOW_WORK_DEBUG
-	bool "Slow work debugging through debugfs"
-	default n
-	depends on SLOW_WORK && DEBUG_FS
-	help
-	  Display the contents of the slow work run queue through debugfs,
-	  including items currently executing.
-
-	  See Documentation/slow-work.txt.
-
 endmenu		# General setup
 
 config HAVE_GENERIC_DMA_COHERENT
diff --git a/kernel/Makefile b/kernel/Makefile
index 057472fbc27..2484ac39b2e 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -99,8 +99,6 @@ obj-$(CONFIG_TRACING) += trace/
 obj-$(CONFIG_X86_DS) += trace/
 obj-$(CONFIG_RING_BUFFER) += trace/
 obj-$(CONFIG_SMP) += sched_cpupri.o
-obj-$(CONFIG_SLOW_WORK) += slow-work.o
-obj-$(CONFIG_SLOW_WORK_DEBUG) += slow-work-debugfs.o
 obj-$(CONFIG_PERF_EVENTS) += perf_event.o
 obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
 obj-$(CONFIG_USER_RETURN_NOTIFIER) += user-return-notifier.o
diff --git a/kernel/slow-work-debugfs.c b/kernel/slow-work-debugfs.c
deleted file mode 100644
index e45c4364529..00000000000
--- a/kernel/slow-work-debugfs.c
+++ /dev/null
@@ -1,227 +0,0 @@
-/* Slow work debugging
- *
- * Copyright (C) 2009 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public Licence
- * as published by the Free Software Foundation; either version
- * 2 of the Licence, or (at your option) any later version.
- */
-
-#include <linux/module.h>
-#include <linux/slow-work.h>
-#include <linux/fs.h>
-#include <linux/time.h>
-#include <linux/seq_file.h>
-#include "slow-work.h"
-
-#define ITERATOR_SHIFT		(BITS_PER_LONG - 4)
-#define ITERATOR_SELECTOR	(0xfUL << ITERATOR_SHIFT)
-#define ITERATOR_COUNTER	(~ITERATOR_SELECTOR)
-
-void slow_work_new_thread_desc(struct slow_work *work, struct seq_file *m)
-{
-	seq_puts(m, "Slow-work: New thread");
-}
-
-/*
- * Render the time mark field on a work item into a 5-char time with units plus
- * a space
- */
-static void slow_work_print_mark(struct seq_file *m, struct slow_work *work)
-{
-	struct timespec now, diff;
-
-	now = CURRENT_TIME;
-	diff = timespec_sub(now, work->mark);
-
-	if (diff.tv_sec < 0)
-		seq_puts(m, "  -ve ");
-	else if (diff.tv_sec == 0 && diff.tv_nsec < 1000)
-		seq_printf(m, "%3luns ", diff.tv_nsec);
-	else if (diff.tv_sec == 0 && diff.tv_nsec < 1000000)
-		seq_printf(m, "%3luus ", diff.tv_nsec / 1000);
-	else if (diff.tv_sec == 0 && diff.tv_nsec < 1000000000)
-		seq_printf(m, "%3lums ", diff.tv_nsec / 1000000);
-	else if (diff.tv_sec <= 1)
-		seq_puts(m, "   1s ");
-	else if (diff.tv_sec < 60)
-		seq_printf(m, "%4lus ", diff.tv_sec);
-	else if (diff.tv_sec < 60 * 60)
-		seq_printf(m, "%4lum ", diff.tv_sec / 60);
-	else if (diff.tv_sec < 60 * 60 * 24)
-		seq_printf(m, "%4luh ", diff.tv_sec / 3600);
-	else
-		seq_puts(m, "exces ");
-}
-
-/*
- * Describe a slow work item for debugfs
- */
-static int slow_work_runqueue_show(struct seq_file *m, void *v)
-{
-	struct slow_work *work;
-	struct list_head *p = v;
-	unsigned long id;
-
-	switch ((unsigned long) v) {
-	case 1:
-		seq_puts(m, "THR PID   ITEM ADDR        FL MARK  DESC\n");
-		return 0;
-	case 2:
-		seq_puts(m, "=== ===== ================ == ===== ==========\n");
-		return 0;
-
-	case 3 ... 3 + SLOW_WORK_THREAD_LIMIT - 1:
-		id = (unsigned long) v - 3;
-
-		read_lock(&slow_work_execs_lock);
-		work = slow_work_execs[id];
-		if (work) {
-			smp_read_barrier_depends();
-
-			seq_printf(m, "%3lu %5d %16p %2lx ",
-				   id, slow_work_pids[id], work, work->flags);
-			slow_work_print_mark(m, work);
-
-			if (work->ops->desc)
-				work->ops->desc(work, m);
-			seq_putc(m, '\n');
-		}
-		read_unlock(&slow_work_execs_lock);
-		return 0;
-
-	default:
-		work = list_entry(p, struct slow_work, link);
-		seq_printf(m, "%3s     - %16p %2lx ",
-			   work->flags & SLOW_WORK_VERY_SLOW ? "vsq" : "sq",
-			   work, work->flags);
-		slow_work_print_mark(m, work);
-
-		if (work->ops->desc)
-			work->ops->desc(work, m);
-		seq_putc(m, '\n');
-		return 0;
-	}
-}
-
-/*
- * map the iterator to a work item
- */
-static void *slow_work_runqueue_index(struct seq_file *m, loff_t *_pos)
-{
-	struct list_head *p;
-	unsigned long count, id;
-
-	switch (*_pos >> ITERATOR_SHIFT) {
-	case 0x0:
-		if (*_pos == 0)
-			*_pos = 1;
-		if (*_pos < 3)
-			return (void *)(unsigned long) *_pos;
-		if (*_pos < 3 + SLOW_WORK_THREAD_LIMIT)
-			for (id = *_pos - 3;
-			     id < SLOW_WORK_THREAD_LIMIT;
-			     id++, (*_pos)++)
-				if (slow_work_execs[id])
-					return (void *)(unsigned long) *_pos;
-		*_pos = 0x1UL << ITERATOR_SHIFT;
-
-	case 0x1:
-		count = *_pos & ITERATOR_COUNTER;
-		list_for_each(p, &slow_work_queue) {
-			if (count == 0)
-				return p;
-			count--;
-		}
-		*_pos = 0x2UL << ITERATOR_SHIFT;
-
-	case 0x2:
-		count = *_pos & ITERATOR_COUNTER;
-		list_for_each(p, &vslow_work_queue) {
-			if (count == 0)
-				return p;
-			count--;
-		}
-		*_pos = 0x3UL << ITERATOR_SHIFT;
-
-	default:
-		return NULL;
-	}
-}
-
-/*
- * set up the iterator to start reading from the first line
- */
-static void *slow_work_runqueue_start(struct seq_file *m, loff_t *_pos)
-{
-	spin_lock_irq(&slow_work_queue_lock);
-	return slow_work_runqueue_index(m, _pos);
-}
-
-/*
- * move to the next line
- */
-static void *slow_work_runqueue_next(struct seq_file *m, void *v, loff_t *_pos)
-{
-	struct list_head *p = v;
-	unsigned long selector = *_pos >> ITERATOR_SHIFT;
-
-	(*_pos)++;
-	switch (selector) {
-	case 0x0:
-		return slow_work_runqueue_index(m, _pos);
-
-	case 0x1:
-		if (*_pos >> ITERATOR_SHIFT == 0x1) {
-			p = p->next;
-			if (p != &slow_work_queue)
-				return p;
-		}
-		*_pos = 0x2UL << ITERATOR_SHIFT;
-		p = &vslow_work_queue;
-
-	case 0x2:
-		if (*_pos >> ITERATOR_SHIFT == 0x2) {
-			p = p->next;
-			if (p != &vslow_work_queue)
-				return p;
-		}
-		*_pos = 0x3UL << ITERATOR_SHIFT;
-
-	default:
-		return NULL;
-	}
-}
-
-/*
- * clean up after reading
- */
-static void slow_work_runqueue_stop(struct seq_file *m, void *v)
-{
-	spin_unlock_irq(&slow_work_queue_lock);
-}
-
-static const struct seq_operations slow_work_runqueue_ops = {
-	.start		= slow_work_runqueue_start,
-	.stop		= slow_work_runqueue_stop,
-	.next		= slow_work_runqueue_next,
-	.show		= slow_work_runqueue_show,
-};
-
-/*
- * open "/sys/kernel/debug/slow_work/runqueue" to list queue contents
- */
-static int slow_work_runqueue_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &slow_work_runqueue_ops);
-}
-
-const struct file_operations slow_work_runqueue_fops = {
-	.owner		= THIS_MODULE,
-	.open		= slow_work_runqueue_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release,
-};
diff --git a/kernel/slow-work.c b/kernel/slow-work.c
deleted file mode 100644
index 7d3f4fa9ef4..00000000000
--- a/kernel/slow-work.c
+++ /dev/null
@@ -1,1068 +0,0 @@
-/* Worker thread pool for slow items, such as filesystem lookups or mkdirs
- *
- * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public Licence
- * as published by the Free Software Foundation; either version
- * 2 of the Licence, or (at your option) any later version.
- *
- * See Documentation/slow-work.txt
- */
-
-#include <linux/module.h>
-#include <linux/slow-work.h>
-#include <linux/kthread.h>
-#include <linux/freezer.h>
-#include <linux/wait.h>
-#include <linux/debugfs.h>
-#include "slow-work.h"
-
-static void slow_work_cull_timeout(unsigned long);
-static void slow_work_oom_timeout(unsigned long);
-
-#ifdef CONFIG_SYSCTL
-static int slow_work_min_threads_sysctl(struct ctl_table *, int,
-					void __user *, size_t *, loff_t *);
-
-static int slow_work_max_threads_sysctl(struct ctl_table *, int ,
-					void __user *, size_t *, loff_t *);
-#endif
-
-/*
- * The pool of threads has at least min threads in it as long as someone is
- * using the facility, and may have as many as max.
- *
- * A portion of the pool may be processing very slow operations.
- */
-static unsigned slow_work_min_threads = 2;
-static unsigned slow_work_max_threads = 4;
-static unsigned vslow_work_proportion = 50; /* % of threads that may process
-					     * very slow work */
-
-#ifdef CONFIG_SYSCTL
-static const int slow_work_min_min_threads = 2;
-static int slow_work_max_max_threads = SLOW_WORK_THREAD_LIMIT;
-static const int slow_work_min_vslow = 1;
-static const int slow_work_max_vslow = 99;
-
-ctl_table slow_work_sysctls[] = {
-	{
-		.procname	= "min-threads",
-		.data		= &slow_work_min_threads,
-		.maxlen		= sizeof(unsigned),
-		.mode		= 0644,
-		.proc_handler	= slow_work_min_threads_sysctl,
-		.extra1		= (void *) &slow_work_min_min_threads,
-		.extra2		= &slow_work_max_threads,
-	},
-	{
-		.procname	= "max-threads",
-		.data		= &slow_work_max_threads,
-		.maxlen		= sizeof(unsigned),
-		.mode		= 0644,
-		.proc_handler	= slow_work_max_threads_sysctl,
-		.extra1		= &slow_work_min_threads,
-		.extra2		= (void *) &slow_work_max_max_threads,
-	},
-	{
-		.procname	= "vslow-percentage",
-		.data		= &vslow_work_proportion,
-		.maxlen		= sizeof(unsigned),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= (void *) &slow_work_min_vslow,
-		.extra2		= (void *) &slow_work_max_vslow,
-	},
-	{}
-};
-#endif
-
-/*
- * The active state of the thread pool
- */
-static atomic_t slow_work_thread_count;
-static atomic_t vslow_work_executing_count;
-
-static bool slow_work_may_not_start_new_thread;
-static bool slow_work_cull; /* cull a thread due to lack of activity */
-static DEFINE_TIMER(slow_work_cull_timer, slow_work_cull_timeout, 0, 0);
-static DEFINE_TIMER(slow_work_oom_timer, slow_work_oom_timeout, 0, 0);
-static struct slow_work slow_work_new_thread; /* new thread starter */
-
-/*
- * slow work ID allocation (use slow_work_queue_lock)
- */
-static DECLARE_BITMAP(slow_work_ids, SLOW_WORK_THREAD_LIMIT);
-
-/*
- * Unregistration tracking to prevent put_ref() from disappearing during module
- * unload
- */
-#ifdef CONFIG_MODULES
-static struct module *slow_work_thread_processing[SLOW_WORK_THREAD_LIMIT];
-static struct module *slow_work_unreg_module;
-static struct slow_work *slow_work_unreg_work_item;
-static DECLARE_WAIT_QUEUE_HEAD(slow_work_unreg_wq);
-static DEFINE_MUTEX(slow_work_unreg_sync_lock);
-
-static void slow_work_set_thread_processing(int id, struct slow_work *work)
-{
-	if (work)
-		slow_work_thread_processing[id] = work->owner;
-}
-static void slow_work_done_thread_processing(int id, struct slow_work *work)
-{
-	struct module *module = slow_work_thread_processing[id];
-
-	slow_work_thread_processing[id] = NULL;
-	smp_mb();
-	if (slow_work_unreg_work_item == work ||
-	    slow_work_unreg_module == module)
-		wake_up_all(&slow_work_unreg_wq);
-}
-static void slow_work_clear_thread_processing(int id)
-{
-	slow_work_thread_processing[id] = NULL;
-}
-#else
-static void slow_work_set_thread_processing(int id, struct slow_work *work) {}
-static void slow_work_done_thread_processing(int id, struct slow_work *work) {}
-static void slow_work_clear_thread_processing(int id) {}
-#endif
-
-/*
- * Data for tracking currently executing items for indication through /proc
- */
-#ifdef CONFIG_SLOW_WORK_DEBUG
-struct slow_work *slow_work_execs[SLOW_WORK_THREAD_LIMIT];
-pid_t slow_work_pids[SLOW_WORK_THREAD_LIMIT];
-DEFINE_RWLOCK(slow_work_execs_lock);
-#endif
-
-/*
- * The queues of work items and the lock governing access to them.  These are
- * shared between all the CPUs.  It doesn't make sense to have per-CPU queues
- * as the number of threads bears no relation to the number of CPUs.
- *
- * There are two queues of work items: one for slow work items, and one for
- * very slow work items.
- */
-LIST_HEAD(slow_work_queue);
-LIST_HEAD(vslow_work_queue);
-DEFINE_SPINLOCK(slow_work_queue_lock);
-
-/*
- * The following are two wait queues that get pinged when a work item is placed
- * on an empty queue.  These allow work items that are hogging a thread by
- * sleeping in a way that could be deferred to yield their thread and enqueue
- * themselves.
- */
-static DECLARE_WAIT_QUEUE_HEAD(slow_work_queue_waits_for_occupation);
-static DECLARE_WAIT_QUEUE_HEAD(vslow_work_queue_waits_for_occupation);
-
-/*
- * The thread controls.  A variable used to signal to the threads that they
- * should exit when the queue is empty, a waitqueue used by the threads to wait
- * for signals, and a completion set by the last thread to exit.
- */
-static bool slow_work_threads_should_exit;
-static DECLARE_WAIT_QUEUE_HEAD(slow_work_thread_wq);
-static DECLARE_COMPLETION(slow_work_last_thread_exited);
-
-/*
- * The number of users of the thread pool and its lock.  Whilst this is zero we
- * have no threads hanging around, and when this reaches zero, we wait for all
- * active or queued work items to complete and kill all the threads we do have.
- */
-static int slow_work_user_count;
-static DEFINE_MUTEX(slow_work_user_lock);
-
-static inline int slow_work_get_ref(struct slow_work *work)
-{
-	if (work->ops->get_ref)
-		return work->ops->get_ref(work);
-
-	return 0;
-}
-
-static inline void slow_work_put_ref(struct slow_work *work)
-{
-	if (work->ops->put_ref)
-		work->ops->put_ref(work);
-}
-
-/*
- * Calculate the maximum number of active threads in the pool that are
- * permitted to process very slow work items.
- *
- * The answer is rounded up to at least 1, but may not equal or exceed the
- * maximum number of the threads in the pool.  This means we always have at
- * least one thread that can process slow work items, and we always have at
- * least one thread that won't get tied up doing so.
- */
-static unsigned slow_work_calc_vsmax(void)
-{
-	unsigned vsmax;
-
-	vsmax = atomic_read(&slow_work_thread_count) * vslow_work_proportion;
-	vsmax /= 100;
-	vsmax = max(vsmax, 1U);
-	return min(vsmax, slow_work_max_threads - 1);
-}
-
-/*
- * Attempt to execute stuff queued on a slow thread.  Return true if we managed
- * it, false if there was nothing to do.
- */
-static noinline bool slow_work_execute(int id)
-{
-	struct slow_work *work = NULL;
-	unsigned vsmax;
-	bool very_slow;
-
-	vsmax = slow_work_calc_vsmax();
-
-	/* see if we can schedule a new thread to be started if we're not
-	 * keeping up with the work */
-	if (!waitqueue_active(&slow_work_thread_wq) &&
-	    (!list_empty(&slow_work_queue) || !list_empty(&vslow_work_queue)) &&
-	    atomic_read(&slow_work_thread_count) < slow_work_max_threads &&
-	    !slow_work_may_not_start_new_thread)
-		slow_work_enqueue(&slow_work_new_thread);
-
-	/* find something to execute */
-	spin_lock_irq(&slow_work_queue_lock);
-	if (!list_empty(&vslow_work_queue) &&
-	    atomic_read(&vslow_work_executing_count) < vsmax) {
-		work = list_entry(vslow_work_queue.next,
-				  struct slow_work, link);
-		if (test_and_set_bit_lock(SLOW_WORK_EXECUTING, &work->flags))
-			BUG();
-		list_del_init(&work->link);
-		atomic_inc(&vslow_work_executing_count);
-		very_slow = true;
-	} else if (!list_empty(&slow_work_queue)) {
-		work = list_entry(slow_work_queue.next,
-				  struct slow_work, link);
-		if (test_and_set_bit_lock(SLOW_WORK_EXECUTING, &work->flags))
-			BUG();
-		list_del_init(&work->link);
-		very_slow = false;
-	} else {
-		very_slow = false; /* avoid the compiler warning */
-	}
-
-	slow_work_set_thread_processing(id, work);
-	if (work) {
-		slow_work_mark_time(work);
-		slow_work_begin_exec(id, work);
-	}
-
-	spin_unlock_irq(&slow_work_queue_lock);
-
-	if (!work)
-		return false;
-
-	if (!test_and_clear_bit(SLOW_WORK_PENDING, &work->flags))
-		BUG();
-
-	/* don't execute if the work is in the process of being cancelled */
-	if (!test_bit(SLOW_WORK_CANCELLING, &work->flags))
-		work->ops->execute(work);
-
-	if (very_slow)
-		atomic_dec(&vslow_work_executing_count);
-	clear_bit_unlock(SLOW_WORK_EXECUTING, &work->flags);
-
-	/* wake up anyone waiting for this work to be complete */
-	wake_up_bit(&work->flags, SLOW_WORK_EXECUTING);
-
-	slow_work_end_exec(id, work);
-
-	/* if someone tried to enqueue the item whilst we were executing it,
-	 * then it'll be left unenqueued to avoid multiple threads trying to
-	 * execute it simultaneously
-	 *
-	 * there is, however, a race between us testing the pending flag and
-	 * getting the spinlock, and between the enqueuer setting the pending
-	 * flag and getting the spinlock, so we use a deferral bit to tell us
-	 * if the enqueuer got there first
-	 */
-	if (test_bit(SLOW_WORK_PENDING, &work->flags)) {
-		spin_lock_irq(&slow_work_queue_lock);
-
-		if (!test_bit(SLOW_WORK_EXECUTING, &work->flags) &&
-		    test_and_clear_bit(SLOW_WORK_ENQ_DEFERRED, &work->flags))
-			goto auto_requeue;
-
-		spin_unlock_irq(&slow_work_queue_lock);
-	}
-
-	/* sort out the race between module unloading and put_ref() */
-	slow_work_put_ref(work);
-	slow_work_done_thread_processing(id, work);
-
-	return true;
-
-auto_requeue:
-	/* we must complete the enqueue operation
-	 * - we transfer our ref on the item back to the appropriate queue
-	 * - don't wake another thread up as we're awake already
-	 */
-	slow_work_mark_time(work);
-	if (test_bit(SLOW_WORK_VERY_SLOW, &work->flags))
-		list_add_tail(&work->link, &vslow_work_queue);
-	else
-		list_add_tail(&work->link, &slow_work_queue);
-	spin_unlock_irq(&slow_work_queue_lock);
-	slow_work_clear_thread_processing(id);
-	return true;
-}
-
-/**
- * slow_work_sleep_till_thread_needed - Sleep till thread needed by other work
- * work: The work item under execution that wants to sleep
- * _timeout: Scheduler sleep timeout
- *
- * Allow a requeueable work item to sleep on a slow-work processor thread until
- * that thread is needed to do some other work or the sleep is interrupted by
- * some other event.
- *
- * The caller must set up a wake up event before calling this and must have set
- * the appropriate sleep mode (such as TASK_UNINTERRUPTIBLE) and tested its own
- * condition before calling this function as no test is made here.
- *
- * False is returned if there is nothing on the queue; true is returned if the
- * work item should be requeued
- */
-bool slow_work_sleep_till_thread_needed(struct slow_work *work,
-					signed long *_timeout)
-{
-	wait_queue_head_t *wfo_wq;
-	struct list_head *queue;
-
-	DEFINE_WAIT(wait);
-
-	if (test_bit(SLOW_WORK_VERY_SLOW, &work->flags)) {
-		wfo_wq = &vslow_work_queue_waits_for_occupation;
-		queue = &vslow_work_queue;
-	} else {
-		wfo_wq = &slow_work_queue_waits_for_occupation;
-		queue = &slow_work_queue;
-	}
-
-	if (!list_empty(queue))
-		return true;
-
-	add_wait_queue_exclusive(wfo_wq, &wait);
-	if (list_empty(queue))
-		*_timeout = schedule_timeout(*_timeout);
-	finish_wait(wfo_wq, &wait);
-
-	return !list_empty(queue);
-}
-EXPORT_SYMBOL(slow_work_sleep_till_thread_needed);
-
-/**
- * slow_work_enqueue - Schedule a slow work item for processing
- * @work: The work item to queue
- *
- * Schedule a slow work item for processing.  If the item is already undergoing
- * execution, this guarantees not to re-enter the execution routine until the
- * first execution finishes.
- *
- * The item is pinned by this function as it retains a reference to it, managed
- * through the item operations.  The item is unpinned once it has been
- * executed.
- *
- * An item may hog the thread that is running it for a relatively large amount
- * of time, sufficient, for example, to perform several lookup, mkdir, create
- * and setxattr operations.  It may sleep on I/O and may sleep to obtain locks.
- *
- * Conversely, if a number of items are awaiting processing, it may take some
- * time before any given item is given attention.  The number of threads in the
- * pool may be increased to deal with demand, but only up to a limit.
- *
- * If SLOW_WORK_VERY_SLOW is set on the work item, then it will be placed in
- * the very slow queue, from which only a portion of the threads will be
- * allowed to pick items to execute.  This ensures that very slow items won't
- * overly block ones that are just ordinarily slow.
- *
- * Returns 0 if successful, -EAGAIN if not (or -ECANCELED if cancelled work is
- * attempted queued)
- */
-int slow_work_enqueue(struct slow_work *work)
-{
-	wait_queue_head_t *wfo_wq;
-	struct list_head *queue;
-	unsigned long flags;
-	int ret;
-
-	if (test_bit(SLOW_WORK_CANCELLING, &work->flags))
-		return -ECANCELED;
-
-	BUG_ON(slow_work_user_count <= 0);
-	BUG_ON(!work);
-	BUG_ON(!work->ops);
-
-	/* when honouring an enqueue request, we only promise that we will run
-	 * the work function in the future; we do not promise to run it once
-	 * per enqueue request
-	 *
-	 * we use the PENDING bit to merge together repeat requests without
-	 * having to disable IRQs and take the spinlock, whilst still
-	 * maintaining our promise
-	 */
-	if (!test_and_set_bit_lock(SLOW_WORK_PENDING, &work->flags)) {
-		if (test_bit(SLOW_WORK_VERY_SLOW, &work->flags)) {
-			wfo_wq = &vslow_work_queue_waits_for_occupation;
-			queue = &vslow_work_queue;
-		} else {
-			wfo_wq = &slow_work_queue_waits_for_occupation;
-			queue = &slow_work_queue;
-		}
-
-		spin_lock_irqsave(&slow_work_queue_lock, flags);
-
-		if (unlikely(test_bit(SLOW_WORK_CANCELLING, &work->flags)))
-			goto cancelled;
-
-		/* we promise that we will not attempt to execute the work
-		 * function in more than one thread simultaneously
-		 *
-		 * this, however, leaves us with a problem if we're asked to
-		 * enqueue the work whilst someone is executing the work
-		 * function as simply queueing the work immediately means that
-		 * another thread may try executing it whilst it is already
-		 * under execution
-		 *
-		 * to deal with this, we set the ENQ_DEFERRED bit instead of
-		 * enqueueing, and the thread currently executing the work
-		 * function will enqueue the work item when the work function
-		 * returns and it has cleared the EXECUTING bit
-		 */
-		if (test_bit(SLOW_WORK_EXECUTING, &work->flags)) {
-			set_bit(SLOW_WORK_ENQ_DEFERRED, &work->flags);
-		} else {
-			ret = slow_work_get_ref(work);
-			if (ret < 0)
-				goto failed;
-			slow_work_mark_time(work);
-			list_add_tail(&work->link, queue);
-			wake_up(&slow_work_thread_wq);
-
-			/* if someone who could be requeued is sleeping on a
-			 * thread, then ask them to yield their thread */
-			if (work->link.prev == queue)
-				wake_up(wfo_wq);
-		}
-
-		spin_unlock_irqrestore(&slow_work_queue_lock, flags);
-	}
-	return 0;
-
-cancelled:
-	ret = -ECANCELED;
-failed:
-	spin_unlock_irqrestore(&slow_work_queue_lock, flags);
-	return ret;
-}
-EXPORT_SYMBOL(slow_work_enqueue);
-
-static int slow_work_wait(void *word)
-{
-	schedule();
-	return 0;
-}
-
-/**
- * slow_work_cancel - Cancel a slow work item
- * @work: The work item to cancel
- *
- * This function will cancel a previously enqueued work item. If we cannot
- * cancel the work item, it is guarenteed to have run when this function
- * returns.
- */
-void slow_work_cancel(struct slow_work *work)
-{
-	bool wait = true, put = false;
-
-	set_bit(SLOW_WORK_CANCELLING, &work->flags);
-	smp_mb();
-
-	/* if the work item is a delayed work item with an active timer, we
-	 * need to wait for the timer to finish _before_ getting the spinlock,
-	 * lest we deadlock against the timer routine
-	 *
-	 * the timer routine will leave DELAYED set if it notices the
-	 * CANCELLING flag in time
-	 */
-	if (test_bit(SLOW_WORK_DELAYED, &work->flags)) {
-		struct delayed_slow_work *dwork =
-			container_of(work, struct delayed_slow_work, work);
-		del_timer_sync(&dwork->timer);
-	}
-
-	spin_lock_irq(&slow_work_queue_lock);
-
-	if (test_bit(SLOW_WORK_DELAYED, &work->flags)) {
-		/* the timer routine aborted or never happened, so we are left
-		 * holding the timer's reference on the item and should just
-		 * drop the pending flag and wait for any ongoing execution to
-		 * finish */
-		struct delayed_slow_work *dwork =
-			container_of(work, struct delayed_slow_work, work);
-
-		BUG_ON(timer_pending(&dwork->timer));
-		BUG_ON(!list_empty(&work->link));
-
-		clear_bit(SLOW_WORK_DELAYED, &work->flags);
-		put = true;
-		clear_bit(SLOW_WORK_PENDING, &work->flags);
-
-	} else if (test_bit(SLOW_WORK_PENDING, &work->flags) &&
-		   !list_empty(&work->link)) {
-		/* the link in the pending queue holds a reference on the item
-		 * that we will need to release */
-		list_del_init(&work->link);
-		wait = false;
-		put = true;
-		clear_bit(SLOW_WORK_PENDING, &work->flags);
-
-	} else if (test_and_clear_bit(SLOW_WORK_ENQ_DEFERRED, &work->flags)) {
-		/* the executor is holding our only reference on the item, so
-		 * we merely need to wait for it to finish executing */
-		clear_bit(SLOW_WORK_PENDING, &work->flags);
-	}
-
-	spin_unlock_irq(&slow_work_queue_lock);
-
-	/* the EXECUTING flag is set by the executor whilst the spinlock is set
-	 * and before the item is dequeued - so assuming the above doesn't
-	 * actually dequeue it, simply waiting for the EXECUTING flag to be
-	 * released here should be sufficient */
-	if (wait)
-		wait_on_bit(&work->flags, SLOW_WORK_EXECUTING, slow_work_wait,
-			    TASK_UNINTERRUPTIBLE);
-
-	clear_bit(SLOW_WORK_CANCELLING, &work->flags);
-	if (put)
-		slow_work_put_ref(work);
-}
-EXPORT_SYMBOL(slow_work_cancel);
-
-/*
- * Handle expiry of the delay timer, indicating that a delayed slow work item
- * should now be queued if not cancelled
- */
-static void delayed_slow_work_timer(unsigned long data)
-{
-	wait_queue_head_t *wfo_wq;
-	struct list_head *queue;
-	struct slow_work *work = (struct slow_work *) data;
-	unsigned long flags;
-	bool queued = false, put = false, first = false;
-
-	if (test_bit(SLOW_WORK_VERY_SLOW, &work->flags)) {
-		wfo_wq = &vslow_work_queue_waits_for_occupation;
-		queue = &vslow_work_queue;
-	} else {
-		wfo_wq = &slow_work_queue_waits_for_occupation;
-		queue = &slow_work_queue;
-	}
-
-	spin_lock_irqsave(&slow_work_queue_lock, flags);
-	if (likely(!test_bit(SLOW_WORK_CANCELLING, &work->flags))) {
-		clear_bit(SLOW_WORK_DELAYED, &work->flags);
-
-		if (test_bit(SLOW_WORK_EXECUTING, &work->flags)) {
-			/* we discard the reference the timer was holding in
-			 * favour of the one the executor holds */
-			set_bit(SLOW_WORK_ENQ_DEFERRED, &work->flags);
-			put = true;
-		} else {
-			slow_work_mark_time(work);
-			list_add_tail(&work->link, queue);
-			queued = true;
-			if (work->link.prev == queue)
-				first = true;
-		}
-	}
-
-	spin_unlock_irqrestore(&slow_work_queue_lock, flags);
-	if (put)
-		slow_work_put_ref(work);
-	if (first)
-		wake_up(wfo_wq);
-	if (queued)
-		wake_up(&slow_work_thread_wq);
-}
-
-/**
- * delayed_slow_work_enqueue - Schedule a delayed slow work item for processing
- * @dwork: The delayed work item to queue
- * @delay: When to start executing the work, in jiffies from now
- *
- * This is similar to slow_work_enqueue(), but it adds a delay before the work
- * is actually queued for processing.
- *
- * The item can have delayed processing requested on it whilst it is being
- * executed.  The delay will begin immediately, and if it expires before the
- * item finishes executing, the item will be placed back on the queue when it
- * has done executing.
- */
-int delayed_slow_work_enqueue(struct delayed_slow_work *dwork,
-			      unsigned long delay)
-{
-	struct slow_work *work = &dwork->work;
-	unsigned long flags;
-	int ret;
-
-	if (delay == 0)
-		return slow_work_enqueue(&dwork->work);
-
-	BUG_ON(slow_work_user_count <= 0);
-	BUG_ON(!work);
-	BUG_ON(!work->ops);
-
-	if (test_bit(SLOW_WORK_CANCELLING, &work->flags))
-		return -ECANCELED;
-
-	if (!test_and_set_bit_lock(SLOW_WORK_PENDING, &work->flags)) {
-		spin_lock_irqsave(&slow_work_queue_lock, flags);
-
-		if (test_bit(SLOW_WORK_CANCELLING, &work->flags))
-			goto cancelled;
-
-		/* the timer holds a reference whilst it is pending */
-		ret = slow_work_get_ref(work);
-		if (ret < 0)
-			goto cant_get_ref;
-
-		if (test_and_set_bit(SLOW_WORK_DELAYED, &work->flags))
-			BUG();
-		dwork->timer.expires = jiffies + delay;
-		dwork->timer.data = (unsigned long) work;
-		dwork->timer.function = delayed_slow_work_timer;
-		add_timer(&dwork->timer);
-
-		spin_unlock_irqrestore(&slow_work_queue_lock, flags);
-	}
-
-	return 0;
-
-cancelled:
-	ret = -ECANCELED;
-cant_get_ref:
-	spin_unlock_irqrestore(&slow_work_queue_lock, flags);
-	return ret;
-}
-EXPORT_SYMBOL(delayed_slow_work_enqueue);
-
-/*
- * Schedule a cull of the thread pool at some time in the near future
- */
-static void slow_work_schedule_cull(void)
-{
-	mod_timer(&slow_work_cull_timer,
-		  round_jiffies(jiffies + SLOW_WORK_CULL_TIMEOUT));
-}
-
-/*
- * Worker thread culling algorithm
- */
-static bool slow_work_cull_thread(void)
-{
-	unsigned long flags;
-	bool do_cull = false;
-
-	spin_lock_irqsave(&slow_work_queue_lock, flags);
-
-	if (slow_work_cull) {
-		slow_work_cull = false;
-
-		if (list_empty(&slow_work_queue) &&
-		    list_empty(&vslow_work_queue) &&
-		    atomic_read(&slow_work_thread_count) >
-		    slow_work_min_threads) {
-			slow_work_schedule_cull();
-			do_cull = true;
-		}
-	}
-
-	spin_unlock_irqrestore(&slow_work_queue_lock, flags);
-	return do_cull;
-}
-
-/*
- * Determine if there is slow work available for dispatch
- */
-static inline bool slow_work_available(int vsmax)
-{
-	return !list_empty(&slow_work_queue) ||
-		(!list_empty(&vslow_work_queue) &&
-		 atomic_read(&vslow_work_executing_count) < vsmax);
-}
-
-/*
- * Worker thread dispatcher
- */
-static int slow_work_thread(void *_data)
-{
-	int vsmax, id;
-
-	DEFINE_WAIT(wait);
-
-	set_freezable();
-	set_user_nice(current, -5);
-
-	/* allocate ourselves an ID */
-	spin_lock_irq(&slow_work_queue_lock);
-	id = find_first_zero_bit(slow_work_ids, SLOW_WORK_THREAD_LIMIT);
-	BUG_ON(id < 0 || id >= SLOW_WORK_THREAD_LIMIT);
-	__set_bit(id, slow_work_ids);
-	slow_work_set_thread_pid(id, current->pid);
-	spin_unlock_irq(&slow_work_queue_lock);
-
-	sprintf(current->comm, "kslowd%03u", id);
-
-	for (;;) {
-		vsmax = vslow_work_proportion;
-		vsmax *= atomic_read(&slow_work_thread_count);
-		vsmax /= 100;
-
-		prepare_to_wait_exclusive(&slow_work_thread_wq, &wait,
-					  TASK_INTERRUPTIBLE);
-		if (!freezing(current) &&
-		    !slow_work_threads_should_exit &&
-		    !slow_work_available(vsmax) &&
-		    !slow_work_cull)
-			schedule();
-		finish_wait(&slow_work_thread_wq, &wait);
-
-		try_to_freeze();
-
-		vsmax = vslow_work_proportion;
-		vsmax *= atomic_read(&slow_work_thread_count);
-		vsmax /= 100;
-
-		if (slow_work_available(vsmax) && slow_work_execute(id)) {
-			cond_resched();
-			if (list_empty(&slow_work_queue) &&
-			    list_empty(&vslow_work_queue) &&
-			    atomic_read(&slow_work_thread_count) >
-			    slow_work_min_threads)
-				slow_work_schedule_cull();
-			continue;
-		}
-
-		if (slow_work_threads_should_exit)
-			break;
-
-		if (slow_work_cull && slow_work_cull_thread())
-			break;
-	}
-
-	spin_lock_irq(&slow_work_queue_lock);
-	slow_work_set_thread_pid(id, 0);
-	__clear_bit(id, slow_work_ids);
-	spin_unlock_irq(&slow_work_queue_lock);
-
-	if (atomic_dec_and_test(&slow_work_thread_count))
-		complete_and_exit(&slow_work_last_thread_exited, 0);
-	return 0;
-}
-
-/*
- * Handle thread cull timer expiration
- */
-static void slow_work_cull_timeout(unsigned long data)
-{
-	slow_work_cull = true;
-	wake_up(&slow_work_thread_wq);
-}
-
-/*
- * Start a new slow work thread
- */
-static void slow_work_new_thread_execute(struct slow_work *work)
-{
-	struct task_struct *p;
-
-	if (slow_work_threads_should_exit)
-		return;
-
-	if (atomic_read(&slow_work_thread_count) >= slow_work_max_threads)
-		return;
-
-	if (!mutex_trylock(&slow_work_user_lock))
-		return;
-
-	slow_work_may_not_start_new_thread = true;
-	atomic_inc(&slow_work_thread_count);
-	p = kthread_run(slow_work_thread, NULL, "kslowd");
-	if (IS_ERR(p)) {
-		printk(KERN_DEBUG "Slow work thread pool: OOM\n");
-		if (atomic_dec_and_test(&slow_work_thread_count))
-			BUG(); /* we're running on a slow work thread... */
-		mod_timer(&slow_work_oom_timer,
-			  round_jiffies(jiffies + SLOW_WORK_OOM_TIMEOUT));
-	} else {
-		/* ratelimit the starting of new threads */
-		mod_timer(&slow_work_oom_timer, jiffies + 1);
-	}
-
-	mutex_unlock(&slow_work_user_lock);
-}
-
-static const struct slow_work_ops slow_work_new_thread_ops = {
-	.owner		= THIS_MODULE,
-	.execute	= slow_work_new_thread_execute,
-#ifdef CONFIG_SLOW_WORK_DEBUG
-	.desc		= slow_work_new_thread_desc,
-#endif
-};
-
-/*
- * post-OOM new thread start suppression expiration
- */
-static void slow_work_oom_timeout(unsigned long data)
-{
-	slow_work_may_not_start_new_thread = false;
-}
-
-#ifdef CONFIG_SYSCTL
-/*
- * Handle adjustment of the minimum number of threads
- */
-static int slow_work_min_threads_sysctl(struct ctl_table *table, int write,
-					void __user *buffer,
-					size_t *lenp, loff_t *ppos)
-{
-	int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
-	int n;
-
-	if (ret == 0) {
-		mutex_lock(&slow_work_user_lock);
-		if (slow_work_user_count > 0) {
-			/* see if we need to start or stop threads */
-			n = atomic_read(&slow_work_thread_count) -
-				slow_work_min_threads;
-
-			if (n < 0 && !slow_work_may_not_start_new_thread)
-				slow_work_enqueue(&slow_work_new_thread);
-			else if (n > 0)
-				slow_work_schedule_cull();
-		}
-		mutex_unlock(&slow_work_user_lock);
-	}
-
-	return ret;
-}
-
-/*
- * Handle adjustment of the maximum number of threads
- */
-static int slow_work_max_threads_sysctl(struct ctl_table *table, int write,
-					void __user *buffer,
-					size_t *lenp, loff_t *ppos)
-{
-	int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
-	int n;
-
-	if (ret == 0) {
-		mutex_lock(&slow_work_user_lock);
-		if (slow_work_user_count > 0) {
-			/* see if we need to stop threads */
-			n = slow_work_max_threads -
-				atomic_read(&slow_work_thread_count);
-
-			if (n < 0)
-				slow_work_schedule_cull();
-		}
-		mutex_unlock(&slow_work_user_lock);
-	}
-
-	return ret;
-}
-#endif /* CONFIG_SYSCTL */
-
-/**
- * slow_work_register_user - Register a user of the facility
- * @module: The module about to make use of the facility
- *
- * Register a user of the facility, starting up the initial threads if there
- * aren't any other users at this point.  This will return 0 if successful, or
- * an error if not.
- */
-int slow_work_register_user(struct module *module)
-{
-	struct task_struct *p;
-	int loop;
-
-	mutex_lock(&slow_work_user_lock);
-
-	if (slow_work_user_count == 0) {
-		printk(KERN_NOTICE "Slow work thread pool: Starting up\n");
-		init_completion(&slow_work_last_thread_exited);
-
-		slow_work_threads_should_exit = false;
-		slow_work_init(&slow_work_new_thread,
-			       &slow_work_new_thread_ops);
-		slow_work_may_not_start_new_thread = false;
-		slow_work_cull = false;
-
-		/* start the minimum number of threads */
-		for (loop = 0; loop < slow_work_min_threads; loop++) {
-			atomic_inc(&slow_work_thread_count);
-			p = kthread_run(slow_work_thread, NULL, "kslowd");
-			if (IS_ERR(p))
-				goto error;
-		}
-		printk(KERN_NOTICE "Slow work thread pool: Ready\n");
-	}
-
-	slow_work_user_count++;
-	mutex_unlock(&slow_work_user_lock);
-	return 0;
-
-error:
-	if (atomic_dec_and_test(&slow_work_thread_count))
-		complete(&slow_work_last_thread_exited);
-	if (loop > 0) {
-		printk(KERN_ERR "Slow work thread pool:"
-		       " Aborting startup on ENOMEM\n");
-		slow_work_threads_should_exit = true;
-		wake_up_all(&slow_work_thread_wq);
-		wait_for_completion(&slow_work_last_thread_exited);
-		printk(KERN_ERR "Slow work thread pool: Aborted\n");
-	}
-	mutex_unlock(&slow_work_user_lock);
-	return PTR_ERR(p);
-}
-EXPORT_SYMBOL(slow_work_register_user);
-
-/*
- * wait for all outstanding items from the calling module to complete
- * - note that more items may be queued whilst we're waiting
- */
-static void slow_work_wait_for_items(struct module *module)
-{
-#ifdef CONFIG_MODULES
-	DECLARE_WAITQUEUE(myself, current);
-	struct slow_work *work;
-	int loop;
-
-	mutex_lock(&slow_work_unreg_sync_lock);
-	add_wait_queue(&slow_work_unreg_wq, &myself);
-
-	for (;;) {
-		spin_lock_irq(&slow_work_queue_lock);
-
-		/* first of all, we wait for the last queued item in each list
-		 * to be processed */
-		list_for_each_entry_reverse(work, &vslow_work_queue, link) {
-			if (work->owner == module) {
-				set_current_state(TASK_UNINTERRUPTIBLE);
-				slow_work_unreg_work_item = work;
-				goto do_wait;
-			}
-		}
-		list_for_each_entry_reverse(work, &slow_work_queue, link) {
-			if (work->owner == module) {
-				set_current_state(TASK_UNINTERRUPTIBLE);
-				slow_work_unreg_work_item = work;
-				goto do_wait;
-			}
-		}
-
-		/* then we wait for the items being processed to finish */
-		slow_work_unreg_module = module;
-		smp_mb();
-		for (loop = 0; loop < SLOW_WORK_THREAD_LIMIT; loop++) {
-			if (slow_work_thread_processing[loop] == module)
-				goto do_wait;
-		}
-		spin_unlock_irq(&slow_work_queue_lock);
-		break; /* okay, we're done */
-
-	do_wait:
-		spin_unlock_irq(&slow_work_queue_lock);
-		schedule();
-		slow_work_unreg_work_item = NULL;
-		slow_work_unreg_module = NULL;
-	}
-
-	remove_wait_queue(&slow_work_unreg_wq, &myself);
-	mutex_unlock(&slow_work_unreg_sync_lock);
-#endif /* CONFIG_MODULES */
-}
-
-/**
- * slow_work_unregister_user - Unregister a user of the facility
- * @module: The module whose items should be cleared
- *
- * Unregister a user of the facility, killing all the threads if this was the
- * last one.
- *
- * This waits for all the work items belonging to the nominated module to go
- * away before proceeding.
- */
-void slow_work_unregister_user(struct module *module)
-{
-	/* first of all, wait for all outstanding items from the calling module
-	 * to complete */
-	if (module)
-		slow_work_wait_for_items(module);
-
-	/* then we can actually go about shutting down the facility if need
-	 * be */
-	mutex_lock(&slow_work_user_lock);
-
-	BUG_ON(slow_work_user_count <= 0);
-
-	slow_work_user_count--;
-	if (slow_work_user_count == 0) {
-		printk(KERN_NOTICE "Slow work thread pool: Shutting down\n");
-		slow_work_threads_should_exit = true;
-		del_timer_sync(&slow_work_cull_timer);
-		del_timer_sync(&slow_work_oom_timer);
-		wake_up_all(&slow_work_thread_wq);
-		wait_for_completion(&slow_work_last_thread_exited);
-		printk(KERN_NOTICE "Slow work thread pool:"
-		       " Shut down complete\n");
-	}
-
-	mutex_unlock(&slow_work_user_lock);
-}
-EXPORT_SYMBOL(slow_work_unregister_user);
-
-/*
- * Initialise the slow work facility
- */
-static int __init init_slow_work(void)
-{
-	unsigned nr_cpus = num_possible_cpus();
-
-	if (slow_work_max_threads < nr_cpus)
-		slow_work_max_threads = nr_cpus;
-#ifdef CONFIG_SYSCTL
-	if (slow_work_max_max_threads < nr_cpus * 2)
-		slow_work_max_max_threads = nr_cpus * 2;
-#endif
-#ifdef CONFIG_SLOW_WORK_DEBUG
-	{
-		struct dentry *dbdir;
-
-		dbdir = debugfs_create_dir("slow_work", NULL);
-		if (dbdir && !IS_ERR(dbdir))
-			debugfs_create_file("runqueue", S_IFREG | 0400, dbdir,
-					    NULL, &slow_work_runqueue_fops);
-	}
-#endif
-	return 0;
-}
-
-subsys_initcall(init_slow_work);
diff --git a/kernel/slow-work.h b/kernel/slow-work.h
deleted file mode 100644
index a29ebd1ef41..00000000000
--- a/kernel/slow-work.h
+++ /dev/null
@@ -1,72 +0,0 @@
-/* Slow work private definitions
- *
- * Copyright (C) 2009 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public Licence
- * as published by the Free Software Foundation; either version
- * 2 of the Licence, or (at your option) any later version.
- */
-
-#define SLOW_WORK_CULL_TIMEOUT (5 * HZ)	/* cull threads 5s after running out of
-					 * things to do */
-#define SLOW_WORK_OOM_TIMEOUT (5 * HZ)	/* can't start new threads for 5s after
-					 * OOM */
-
-#define SLOW_WORK_THREAD_LIMIT	255	/* abs maximum number of slow-work threads */
-
-/*
- * slow-work.c
- */
-#ifdef CONFIG_SLOW_WORK_DEBUG
-extern struct slow_work *slow_work_execs[];
-extern pid_t slow_work_pids[];
-extern rwlock_t slow_work_execs_lock;
-#endif
-
-extern struct list_head slow_work_queue;
-extern struct list_head vslow_work_queue;
-extern spinlock_t slow_work_queue_lock;
-
-/*
- * slow-work-debugfs.c
- */
-#ifdef CONFIG_SLOW_WORK_DEBUG
-extern const struct file_operations slow_work_runqueue_fops;
-
-extern void slow_work_new_thread_desc(struct slow_work *, struct seq_file *);
-#endif
-
-/*
- * Helper functions
- */
-static inline void slow_work_set_thread_pid(int id, pid_t pid)
-{
-#ifdef CONFIG_SLOW_WORK_DEBUG
-	slow_work_pids[id] = pid;
-#endif
-}
-
-static inline void slow_work_mark_time(struct slow_work *work)
-{
-#ifdef CONFIG_SLOW_WORK_DEBUG
-	work->mark = CURRENT_TIME;
-#endif
-}
-
-static inline void slow_work_begin_exec(int id, struct slow_work *work)
-{
-#ifdef CONFIG_SLOW_WORK_DEBUG
-	slow_work_execs[id] = work;
-#endif
-}
-
-static inline void slow_work_end_exec(int id, struct slow_work *work)
-{
-#ifdef CONFIG_SLOW_WORK_DEBUG
-	write_lock(&slow_work_execs_lock);
-	slow_work_execs[id] = NULL;
-	write_unlock(&slow_work_execs_lock);
-#endif
-}
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index d24f761f487..5821365b960 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -50,7 +50,6 @@
 #include <linux/acpi.h>
 #include <linux/reboot.h>
 #include <linux/ftrace.h>
-#include <linux/slow-work.h>
 #include <linux/perf_event.h>
 #include <linux/kprobes.h>
 #include <linux/pipe_fs_i.h>
@@ -906,13 +905,6 @@ static struct ctl_table kern_table[] = {
 		.proc_handler	= proc_dointvec,
 	},
 #endif
-#ifdef CONFIG_SLOW_WORK
-	{
-		.procname	= "slow-work",
-		.mode		= 0555,
-		.child		= slow_work_sysctls,
-	},
-#endif
 #ifdef CONFIG_PERF_EVENTS
 	{
 		.procname	= "perf_event_paranoid",
-- 
cgit v1.2.3-70-g09d2


From 18d0cdfd1a4cc9028c0ef80f94538b31541f8fe5 Mon Sep 17 00:00:00 2001
From: Stefan Richter <stefanr@s5r6.in-berlin.de>
Date: Sun, 18 Jul 2010 12:44:01 +0200
Subject: firewire: normalize status values in packet callbacks

core-transaction.c transmit_complete_callback() and close_transaction()
expect packet callback status to be an ACK or RCODE, and ACKs get
translated to RCODEs for transaction callbacks.

An old comment on the packet callback API (been there from the initial
submission of the stack) and the dummy_driver implementation of
send_request/send_response deviated from this as they also included
-ERRNO in the range of status values.

Let's narrow status values down to ACK and RCODE to prevent surprises.
RCODE_CANCELLED is chosen as the dummy_driver's RCODE as its meaning of
"transaction timed out" comes closest to what happens when a transaction
coincides with card removal.

Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
---
 drivers/firewire/core-card.c        | 4 ++--
 drivers/firewire/core-transaction.c | 5 ++++-
 include/linux/firewire.h            | 8 ++++----
 3 files changed, 10 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/drivers/firewire/core-card.c b/drivers/firewire/core-card.c
index 2bb5c036e80..0c312c4bb4b 100644
--- a/drivers/firewire/core-card.c
+++ b/drivers/firewire/core-card.c
@@ -570,12 +570,12 @@ static int dummy_set_config_rom(struct fw_card *card,
 
 static void dummy_send_request(struct fw_card *card, struct fw_packet *packet)
 {
-	packet->callback(packet, card, -ENODEV);
+	packet->callback(packet, card, RCODE_CANCELLED);
 }
 
 static void dummy_send_response(struct fw_card *card, struct fw_packet *packet)
 {
-	packet->callback(packet, card, -ENODEV);
+	packet->callback(packet, card, RCODE_CANCELLED);
 }
 
 static int dummy_cancel_packet(struct fw_card *card, struct fw_packet *packet)
diff --git a/drivers/firewire/core-transaction.c b/drivers/firewire/core-transaction.c
index 5f5a7852f7a..e2e4dc624fb 100644
--- a/drivers/firewire/core-transaction.c
+++ b/drivers/firewire/core-transaction.c
@@ -310,7 +310,10 @@ static int allocate_tlabel(struct fw_card *card)
  * After the transaction is completed successfully or unsuccessfully, the
  * @callback will be called.  Among its parameters is the response code which
  * is either one of the rcodes per IEEE 1394 or, in case of internal errors,
- * the firewire-core specific %RCODE_SEND_ERROR.
+ * the firewire-core specific %RCODE_SEND_ERROR.  The other firewire-core
+ * specific rcodes (%RCODE_CANCELLED, %RCODE_BUSY, %RCODE_GENERATION,
+ * %RCODE_NO_ACK) denote transaction timeout, busy responder, stale request
+ * generation, or missing ACK respectively.
  *
  * Note some timing corner cases:  fw_send_request() may complete much earlier
  * than when the request packet actually hits the wire.  On the other hand,
diff --git a/include/linux/firewire.h b/include/linux/firewire.h
index adc5b55e6e5..0c38b8e9772 100644
--- a/include/linux/firewire.h
+++ b/include/linux/firewire.h
@@ -285,10 +285,10 @@ struct fw_packet {
 	u32 timestamp;
 
 	/*
-	 * This callback is called when the packet transmission has
-	 * completed; for successful transmission, the status code is
-	 * the ack received from the destination, otherwise it's a
-	 * negative errno: ENOMEM, ESTALE, ETIMEDOUT, ENODEV, EIO.
+	 * This callback is called when the packet transmission has completed.
+	 * For successful transmission, the status code is the ack received
+	 * from the destination.  Otherwise it is one of the juju-specific
+	 * rcodes:  RCODE_SEND_ERROR, _CANCELLED, _BUSY, _GENERATION, _NO_ACK.
 	 * The callback can be called from tasklet context and thus
 	 * must never block.
 	 */
-- 
cgit v1.2.3-70-g09d2


From d505e6e87127d4dbdaa5d91561eed810c180ca23 Mon Sep 17 00:00:00 2001
From: Stefan Richter <stefanr@s5r6.in-berlin.de>
Date: Sat, 17 Jul 2010 21:36:02 +0200
Subject: firewire: cdev: some clarifications to the API documentation

Response events:
  - are generated on more occasions than their documentation claimed.

CSR allocation:
  - An already occupied CSR can be determined from errno==EBUSY.

Bus resets:
  - Note that FW_CDEV_IOC_INITIATE_BUS_RESET is nonblocking and that the
    client is not required to observe a grace period since kernels
    2.6.36+ will enforce it now (commit 02d37bed).

  - The possible values of fw_cdev_initiate_bus_reset.type are listed in
    the kerneldoc comment already.

  - Clarify that an application that uses FW_CDEV_IOC_ADD_DESCRIPTOR and
    FW_CDEV_IOC_REMOVE_DESCRIPTOR does not have to issue a bus reset.

Isochronous I/O contexts:
  - At most one can be created per open file descriptor.

Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
---
 include/linux/firewire-cdev.h | 33 +++++++++++++++++++++++++--------
 1 file changed, 25 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/firewire-cdev.h b/include/linux/firewire-cdev.h
index d31022b05bd..fde9568151d 100644
--- a/include/linux/firewire-cdev.h
+++ b/include/linux/firewire-cdev.h
@@ -84,8 +84,9 @@ struct fw_cdev_event_bus_reset {
 
 /**
  * struct fw_cdev_event_response - Sent when a response packet was received
- * @closure:	See &fw_cdev_event_common;
- *		set by %FW_CDEV_IOC_SEND_REQUEST ioctl
+ * @closure:	See &fw_cdev_event_common; set by %FW_CDEV_IOC_SEND_REQUEST
+ *		or %FW_CDEV_IOC_SEND_BROADCAST_REQUEST
+ *		or %FW_CDEV_IOC_SEND_STREAM_PACKET ioctl
  * @type:	See &fw_cdev_event_common; always %FW_CDEV_EVENT_RESPONSE
  * @rcode:	Response code returned by the remote node
  * @length:	Data length, i.e. the response's payload size in bytes
@@ -95,6 +96,11 @@ struct fw_cdev_event_bus_reset {
  * sent by %FW_CDEV_IOC_SEND_REQUEST ioctl.  The payload data for responses
  * carrying data (read and lock responses) follows immediately and can be
  * accessed through the @data field.
+ *
+ * The event is also generated after conclusions of transactions that do not
+ * involve response packets.  This includes unified write transactions,
+ * broadcast write transactions, and transmission of asynchronous stream
+ * packets.  @rcode indicates success or failure of such transmissions.
  */
 struct fw_cdev_event_response {
 	__u64 closure;
@@ -447,7 +453,9 @@ struct fw_cdev_send_response {
  * range to be used for later deallocation of the range.
  *
  * The address range is allocated on all local nodes.  The address allocation
- * is exclusive except for the FCP command and response registers.
+ * is exclusive except for the FCP command and response registers.  If an
+ * exclusive address region is already in use, the ioctl fails with errno set
+ * to %EBUSY.
  */
 struct fw_cdev_allocate {
 	__u64 offset;
@@ -475,9 +483,14 @@ struct fw_cdev_deallocate {
  * Initiate a bus reset for the bus this device is on.  The bus reset can be
  * either the original (long) bus reset or the arbitrated (short) bus reset
  * introduced in 1394a-2000.
+ *
+ * The ioctl returns immediately.  A subsequent &fw_cdev_event_bus_reset
+ * indicates when the reset actually happened.  Since ABI v4, this may be
+ * considerably later than the ioctl because the kernel ensures a grace period
+ * between subsequent bus resets as per IEEE 1394 bus management specification.
  */
 struct fw_cdev_initiate_bus_reset {
-	__u32 type;	/* FW_CDEV_SHORT_RESET or FW_CDEV_LONG_RESET */
+	__u32 type;
 };
 
 /**
@@ -501,9 +514,10 @@ struct fw_cdev_initiate_bus_reset {
  *
  * @immediate, @key, and @data array elements are CPU-endian quadlets.
  *
- * If successful, the kernel adds the descriptor and writes back a handle to the
- * kernel-side object to be used for later removal of the descriptor block and
- * immediate key.
+ * If successful, the kernel adds the descriptor and writes back a @handle to
+ * the kernel-side object to be used for later removal of the descriptor block
+ * and immediate key.  The kernel will also generate a bus reset to signal the
+ * change of the configuration ROM to other nodes.
  *
  * This ioctl affects the configuration ROMs of all local nodes.
  * The ioctl only succeeds on device files which represent a local node.
@@ -522,7 +536,8 @@ struct fw_cdev_add_descriptor {
  *		descriptor was added
  *
  * Remove a descriptor block and accompanying immediate key from the local
- * nodes' configuration ROMs.
+ * nodes' configuration ROMs.  The kernel will also generate a bus reset to
+ * signal the change of the configuration ROM to other nodes.
  */
 struct fw_cdev_remove_descriptor {
 	__u32 handle;
@@ -554,6 +569,8 @@ struct fw_cdev_remove_descriptor {
  *
  * Note that the effect of a @header_size > 4 depends on
  * &fw_cdev_get_info.version, as documented at &fw_cdev_event_iso_interrupt.
+ *
+ * No more than one iso context can be created per fd.
  */
 struct fw_cdev_create_iso_context {
 	__u32 type;
-- 
cgit v1.2.3-70-g09d2


From 850bb6f23b93c04ce1e4509a87fa607dc17d97c1 Mon Sep 17 00:00:00 2001
From: Stefan Richter <stefanr@s5r6.in-berlin.de>
Date: Fri, 16 Jul 2010 22:25:14 +0200
Subject: firewire: cdev: add PHY packet transmission

Add an FW_CDEV_IOC_SEND_PHY_PACKET ioctl() for /dev/fw* which can be
used to implement bus management related functionality in userspace.

This is also half of the functionality (the transmit part) that is
needed to support a userspace implementation of a VersaPHY transaction
layer.

Safety considerations:

  - PHY packets are generally broadcasts and may have interesting
    effects on PHYs and the bus, e.g. make asynchronous arbitration
    impossible due to too low gap count.  Hence some kind of elevated
    privileges should be required of a process to be able to send
    PHY packets.  This implementation assumes that a process that is
    allowed to open the /dev/fw* of a local node does have this
    privilege.

    There was an inconclusive discussion about introducing POSIX
    capabilities as a means to check for user privileges for these
    kinds of operations.

  - The kernel does not check integrity of the supplied packet data.
    That would be far too much code, considering the many kinds of
    PHY packets.  A process which got the privilege to send these
    packets is trusted to do it correctly.

Just like with the other "send packet" ioctls, a non-blocking API is
chosen; i.e. the ioctl may return even before AT DMA started.  After
transmission, an event for poll()/read() is enqueued.  Most users are
going to need a blocking API, but a blocking userspace wrapper is easy
to implement, and the second of the two existing libraw1394 calls
raw1394_phy_packet_write() and raw1394_start_phy_packet_write() can be
better supported that way.

Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
---
 drivers/firewire/core-cdev.c  | 64 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/firewire-cdev.h | 44 ++++++++++++++++++++++++++++-
 2 files changed, 107 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/firewire/core-cdev.c b/drivers/firewire/core-cdev.c
index acf4fa1f3f8..f9571992648 100644
--- a/drivers/firewire/core-cdev.c
+++ b/drivers/firewire/core-cdev.c
@@ -194,6 +194,13 @@ struct iso_resource_event {
 	struct fw_cdev_event_iso_resource iso_resource;
 };
 
+struct outbound_phy_packet_event {
+	struct event event;
+	struct client *client;
+	struct fw_packet p;
+	struct fw_cdev_event_phy_packet phy_packet;
+};
+
 static inline void __user *u64_to_uptr(__u64 value)
 {
 	return (void __user *)(unsigned long)value;
@@ -396,6 +403,7 @@ union ioctl_arg {
 	struct fw_cdev_allocate_iso_resource	allocate_iso_resource;
 	struct fw_cdev_send_stream_packet	send_stream_packet;
 	struct fw_cdev_get_cycle_timer2		get_cycle_timer2;
+	struct fw_cdev_send_phy_packet		send_phy_packet;
 };
 
 static int ioctl_get_info(struct client *client, union ioctl_arg *arg)
@@ -1384,6 +1392,61 @@ static int ioctl_send_stream_packet(struct client *client, union ioctl_arg *arg)
 	return init_request(client, &request, dest, a->speed);
 }
 
+static void outbound_phy_packet_callback(struct fw_packet *packet,
+					 struct fw_card *card, int status)
+{
+	struct outbound_phy_packet_event *e =
+		container_of(packet, struct outbound_phy_packet_event, p);
+
+	switch (status) {
+	/* expected: */
+	case ACK_COMPLETE:	e->phy_packet.rcode = RCODE_COMPLETE;	break;
+	/* should never happen with PHY packets: */
+	case ACK_PENDING:	e->phy_packet.rcode = RCODE_COMPLETE;	break;
+	case ACK_BUSY_X:
+	case ACK_BUSY_A:
+	case ACK_BUSY_B:	e->phy_packet.rcode = RCODE_BUSY;	break;
+	case ACK_DATA_ERROR:	e->phy_packet.rcode = RCODE_DATA_ERROR;	break;
+	case ACK_TYPE_ERROR:	e->phy_packet.rcode = RCODE_TYPE_ERROR;	break;
+	/* stale generation; cancelled; on certain controllers: no ack */
+	default:		e->phy_packet.rcode = status;		break;
+	}
+
+	queue_event(e->client, &e->event,
+		    &e->phy_packet, sizeof(e->phy_packet), NULL, 0);
+	client_put(e->client);
+}
+
+static int ioctl_send_phy_packet(struct client *client, union ioctl_arg *arg)
+{
+	struct fw_cdev_send_phy_packet *a = &arg->send_phy_packet;
+	struct fw_card *card = client->device->card;
+	struct outbound_phy_packet_event *e;
+
+	/* Access policy: Allow this ioctl only on local nodes' device files. */
+	if (!client->device->is_local)
+		return -ENOSYS;
+
+	e = kzalloc(sizeof(*e), GFP_KERNEL);
+	if (e == NULL)
+		return -ENOMEM;
+
+	client_get(client);
+	e->client		= client;
+	e->p.speed		= SCODE_100;
+	e->p.generation		= a->generation;
+	e->p.header[0]		= a->data[0];
+	e->p.header[1]		= a->data[1];
+	e->p.header_length	= 8;
+	e->p.callback		= outbound_phy_packet_callback;
+	e->phy_packet.closure	= a->closure;
+	e->phy_packet.type	= FW_CDEV_EVENT_PHY_PACKET_SENT;
+
+	card->driver->send_request(card, &e->p);
+
+	return 0;
+}
+
 static int (* const ioctl_handlers[])(struct client *, union ioctl_arg *) = {
 	[0x00] = ioctl_get_info,
 	[0x01] = ioctl_send_request,
@@ -1406,6 +1469,7 @@ static int (* const ioctl_handlers[])(struct client *, union ioctl_arg *) = {
 	[0x12] = ioctl_send_broadcast_request,
 	[0x13] = ioctl_send_stream_packet,
 	[0x14] = ioctl_get_cycle_timer2,
+	[0x15] = ioctl_send_phy_packet,
 };
 
 static int dispatch_ioctl(struct client *client,
diff --git a/include/linux/firewire-cdev.h b/include/linux/firewire-cdev.h
index fde9568151d..5bc051b9a01 100644
--- a/include/linux/firewire-cdev.h
+++ b/include/linux/firewire-cdev.h
@@ -34,6 +34,7 @@
 
 /* available since kernel version 2.6.36 */
 #define FW_CDEV_EVENT_REQUEST2			0x06
+#define FW_CDEV_EVENT_PHY_PACKET_SENT		0x07
 
 /**
  * struct fw_cdev_event_common - Common part of all fw_cdev_event_ types
@@ -283,6 +284,19 @@ struct fw_cdev_event_iso_resource {
 	__s32 bandwidth;
 };
 
+/**
+ * struct fw_cdev_event_phy_packet - A PHY packet was transmitted
+ * @closure:	See &fw_cdev_event_common;
+ *		set by %FW_CDEV_IOC_SEND_PHY_PACKET ioctl
+ * @type:	%FW_CDEV_EVENT_PHY_PACKET_SENT
+ * @rcode:	%RCODE_..., indicates success or failure of transmission
+ */
+struct fw_cdev_event_phy_packet {
+	__u64 closure;
+	__u32 type;
+	__u32 rcode;
+};
+
 /**
  * union fw_cdev_event - Convenience union of fw_cdev_event_ types
  * @common:        Valid for all types
@@ -294,6 +308,7 @@ struct fw_cdev_event_iso_resource {
  * @iso_resource:  Valid if @common.type ==
  *				%FW_CDEV_EVENT_ISO_RESOURCE_ALLOCATED or
  *				%FW_CDEV_EVENT_ISO_RESOURCE_DEALLOCATED
+ * @phy_packet:    Valid if @common.type == %FW_CDEV_EVENT_PHY_PACKET_SENT
  *
  * Convenience union for userspace use.  Events could be read(2) into an
  * appropriately aligned char buffer and then cast to this union for further
@@ -311,6 +326,7 @@ union fw_cdev_event {
 	struct fw_cdev_event_request2		request2;     /* added in 2.6.36 */
 	struct fw_cdev_event_iso_interrupt	iso_interrupt;
 	struct fw_cdev_event_iso_resource	iso_resource; /* added in 2.6.30 */
+	struct fw_cdev_event_phy_packet		phy_packet;   /* added in 2.6.36 */
 };
 
 /* available since kernel version 2.6.22 */
@@ -342,6 +358,9 @@ union fw_cdev_event {
 /* available since kernel version 2.6.34 */
 #define FW_CDEV_IOC_GET_CYCLE_TIMER2   _IOWR('#', 0x14, struct fw_cdev_get_cycle_timer2)
 
+/* available since kernel version 2.6.36 */
+#define FW_CDEV_IOC_SEND_PHY_PACKET    _IOWR('#', 0x15, struct fw_cdev_send_phy_packet)
+
 /*
  * ABI version history
  *  1  (2.6.22)  - initial version
@@ -357,8 +376,9 @@ union fw_cdev_event {
  *               - shared use and auto-response for FCP registers
  *  3  (2.6.34)  - made &fw_cdev_get_cycle_timer reliable
  *               - added %FW_CDEV_IOC_GET_CYCLE_TIMER2
- *  4  (2.6.36)  - added %FW_CDEV_EVENT_REQUEST2
+ *  4  (2.6.36)  - added %FW_CDEV_EVENT_REQUEST2, %FW_CDEV_EVENT_PHY_PACKET_SENT
  *               - implemented &fw_cdev_event_bus_reset.bm_node_id
+ *               - added %FW_CDEV_IOC_SEND_PHY_PACKET
  */
 #define FW_CDEV_VERSION 3 /* Meaningless; don't use this macro. */
 
@@ -808,4 +828,26 @@ struct fw_cdev_send_stream_packet {
 	__u32 speed;
 };
 
+/**
+ * struct fw_cdev_send_phy_packet - send a PHY packet
+ * @closure:	Passed back to userspace in the PHY-packet-sent event
+ * @data:	First and second quadlet of the PHY packet
+ * @generation:	The bus generation where packet is valid
+ *
+ * The %FW_CDEV_IOC_SEND_PHY_PACKET ioctl sends a PHY packet to all nodes
+ * on the same card as this device.  After transmission, an
+ * %FW_CDEV_EVENT_PHY_PACKET_SENT event is generated.
+ *
+ * The payload @data[] shall be specified in host byte order.  Usually,
+ * @data[1] needs to be the bitwise inverse of @data[0].  VersaPHY packets
+ * are an exception to this rule.
+ *
+ * The ioctl is only permitted on device files which represent a local node.
+ */
+struct fw_cdev_send_phy_packet {
+	__u64 closure;
+	__u32 data[2];
+	__u32 generation;
+};
+
 #endif /* _LINUX_FIREWIRE_CDEV_H */
-- 
cgit v1.2.3-70-g09d2


From bf54e1462b9192fdef7ea9e2bc44fdc16a4b87bc Mon Sep 17 00:00:00 2001
From: Stefan Richter <stefanr@s5r6.in-berlin.de>
Date: Fri, 16 Jul 2010 22:25:51 +0200
Subject: firewire: cdev: add PHY packet reception

Add an FW_CDEV_IOC_RECEIVE_PHY_PACKETS ioctl() and
FW_CDEV_EVENT_PHY_PACKET_RECEIVED poll()/read() event for /dev/fw*.
This can be used to get information from remote PHYs by remote access
PHY packets.

This is also the 2nd half of the functionality (the receive part) to
support a userspace implementation of a VersaPHY transaction layer.

Safety considerations:

  - PHY packets are generally broadcasts, hence some kind of elevated
    privileges should be required of a process to be able to listen in
    on PHY packets.  This implementation assumes that a process that is
    allowed to open the /dev/fw* of a local node does have this
    privilege.

    There was an inconclusive discussion about introducing POSIX
    capabilities as a means to check for user privileges for these
    kinds of operations.

Other limitations:

  - PHY packet reception may be switched on by ioctl() but cannot be
    switched off again.  It would be trivial to provide an off switch,
    but this is not worth the code.  The client should simply close()
    the fd then, or just ignore further events.

  - For sake of simplicity of API and kernel-side implementation, no
    filter per packet content is provided.

Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
---
 drivers/firewire/core-card.c        |  1 +
 drivers/firewire/core-cdev.c        | 73 ++++++++++++++++++++++++++++++++++---
 drivers/firewire/core-transaction.c |  5 +++
 drivers/firewire/core.h             |  2 +
 drivers/firewire/ohci.c             |  3 +-
 include/linux/firewire-cdev.h       | 39 ++++++++++++++++----
 include/linux/firewire.h            |  3 +-
 7 files changed, 111 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/drivers/firewire/core-card.c b/drivers/firewire/core-card.c
index 0c312c4bb4b..6d1cfae6aad 100644
--- a/drivers/firewire/core-card.c
+++ b/drivers/firewire/core-card.c
@@ -500,6 +500,7 @@ void fw_card_initialize(struct fw_card *card,
 	kref_init(&card->kref);
 	init_completion(&card->done);
 	INIT_LIST_HEAD(&card->transaction_list);
+	INIT_LIST_HEAD(&card->phy_receiver_list);
 	spin_lock_init(&card->lock);
 
 	card->local_node = NULL;
diff --git a/drivers/firewire/core-cdev.c b/drivers/firewire/core-cdev.c
index f9571992648..0425dd5dfcd 100644
--- a/drivers/firewire/core-cdev.c
+++ b/drivers/firewire/core-cdev.c
@@ -69,6 +69,9 @@ struct client {
 	struct fw_iso_buffer buffer;
 	unsigned long vm_start;
 
+	struct list_head phy_receiver_link;
+	u64 phy_receiver_closure;
+
 	struct list_head link;
 	struct kref kref;
 };
@@ -201,6 +204,11 @@ struct outbound_phy_packet_event {
 	struct fw_cdev_event_phy_packet phy_packet;
 };
 
+struct inbound_phy_packet_event {
+	struct event event;
+	struct fw_cdev_event_phy_packet phy_packet;
+};
+
 static inline void __user *u64_to_uptr(__u64 value)
 {
 	return (void __user *)(unsigned long)value;
@@ -236,6 +244,7 @@ static int fw_device_op_open(struct inode *inode, struct file *file)
 	idr_init(&client->resource_idr);
 	INIT_LIST_HEAD(&client->event_list);
 	init_waitqueue_head(&client->wait);
+	INIT_LIST_HEAD(&client->phy_receiver_link);
 	kref_init(&client->kref);
 
 	file->private_data = client;
@@ -357,7 +366,7 @@ static void queue_bus_reset_event(struct client *client)
 
 	e = kzalloc(sizeof(*e), GFP_KERNEL);
 	if (e == NULL) {
-		fw_notify("Out of memory when allocating bus reset event\n");
+		fw_notify("Out of memory when allocating event\n");
 		return;
 	}
 
@@ -404,6 +413,7 @@ union ioctl_arg {
 	struct fw_cdev_send_stream_packet	send_stream_packet;
 	struct fw_cdev_get_cycle_timer2		get_cycle_timer2;
 	struct fw_cdev_send_phy_packet		send_phy_packet;
+	struct fw_cdev_receive_phy_packets	receive_phy_packets;
 };
 
 static int ioctl_get_info(struct client *client, union ioctl_arg *arg)
@@ -671,9 +681,10 @@ static void handle_request(struct fw_card *card, struct fw_request *request,
 
 	r = kmalloc(sizeof(*r), GFP_ATOMIC);
 	e = kmalloc(sizeof(*e), GFP_ATOMIC);
-	if (r == NULL || e == NULL)
+	if (r == NULL || e == NULL) {
+		fw_notify("Out of memory when allocating event\n");
 		goto failed;
-
+	}
 	r->card    = card;
 	r->request = request;
 	r->data    = payload;
@@ -902,9 +913,10 @@ static void iso_callback(struct fw_iso_context *context, u32 cycle,
 	struct iso_interrupt_event *e;
 
 	e = kmalloc(sizeof(*e) + header_length, GFP_ATOMIC);
-	if (e == NULL)
+	if (e == NULL) {
+		fw_notify("Out of memory when allocating event\n");
 		return;
-
+	}
 	e->interrupt.type      = FW_CDEV_EVENT_ISO_INTERRUPT;
 	e->interrupt.closure   = client->iso_closure;
 	e->interrupt.cycle     = cycle;
@@ -1447,6 +1459,52 @@ static int ioctl_send_phy_packet(struct client *client, union ioctl_arg *arg)
 	return 0;
 }
 
+static int ioctl_receive_phy_packets(struct client *client, union ioctl_arg *arg)
+{
+	struct fw_cdev_receive_phy_packets *a = &arg->receive_phy_packets;
+	struct fw_card *card = client->device->card;
+
+	/* Access policy: Allow this ioctl only on local nodes' device files. */
+	if (!client->device->is_local)
+		return -ENOSYS;
+
+	spin_lock_irq(&card->lock);
+
+	list_move_tail(&client->phy_receiver_link, &card->phy_receiver_list);
+	client->phy_receiver_closure = a->closure;
+
+	spin_unlock_irq(&card->lock);
+
+	return 0;
+}
+
+void fw_cdev_handle_phy_packet(struct fw_card *card, struct fw_packet *p)
+{
+	struct client *client;
+	struct inbound_phy_packet_event *e;
+	unsigned long flags;
+
+	spin_lock_irqsave(&card->lock, flags);
+
+	list_for_each_entry(client, &card->phy_receiver_list, phy_receiver_link) {
+		e = kmalloc(sizeof(*e) + 8, GFP_ATOMIC);
+		if (e == NULL) {
+			fw_notify("Out of memory when allocating event\n");
+			break;
+		}
+		e->phy_packet.closure	= client->phy_receiver_closure;
+		e->phy_packet.type	= FW_CDEV_EVENT_PHY_PACKET_RECEIVED;
+		e->phy_packet.rcode	= RCODE_COMPLETE;
+		e->phy_packet.length	= 8;
+		e->phy_packet.data[0]	= p->header[1];
+		e->phy_packet.data[1]	= p->header[2];
+		queue_event(client, &e->event,
+			    &e->phy_packet, sizeof(e->phy_packet) + 8, NULL, 0);
+	}
+
+	spin_unlock_irqrestore(&card->lock, flags);
+}
+
 static int (* const ioctl_handlers[])(struct client *, union ioctl_arg *) = {
 	[0x00] = ioctl_get_info,
 	[0x01] = ioctl_send_request,
@@ -1470,6 +1528,7 @@ static int (* const ioctl_handlers[])(struct client *, union ioctl_arg *) = {
 	[0x13] = ioctl_send_stream_packet,
 	[0x14] = ioctl_get_cycle_timer2,
 	[0x15] = ioctl_send_phy_packet,
+	[0x16] = ioctl_receive_phy_packets,
 };
 
 static int dispatch_ioctl(struct client *client,
@@ -1577,6 +1636,10 @@ static int fw_device_op_release(struct inode *inode, struct file *file)
 	struct client *client = file->private_data;
 	struct event *event, *next_event;
 
+	spin_lock_irq(&client->device->card->lock);
+	list_del(&client->phy_receiver_link);
+	spin_unlock_irq(&client->device->card->lock);
+
 	mutex_lock(&client->device->client_list_mutex);
 	list_del(&client->link);
 	mutex_unlock(&client->device->client_list_mutex);
diff --git a/drivers/firewire/core-transaction.c b/drivers/firewire/core-transaction.c
index e2e4dc624fb..6f225cacbc3 100644
--- a/drivers/firewire/core-transaction.c
+++ b/drivers/firewire/core-transaction.c
@@ -883,6 +883,11 @@ void fw_core_handle_request(struct fw_card *card, struct fw_packet *p)
 	if (p->ack != ACK_PENDING && p->ack != ACK_COMPLETE)
 		return;
 
+	if (TCODE_IS_LINK_INTERNAL(HEADER_GET_TCODE(p->header[0]))) {
+		fw_cdev_handle_phy_packet(card, p);
+		return;
+	}
+
 	request = allocate_request(card, p);
 	if (request == NULL) {
 		/* FIXME: send statically allocated busy packet. */
diff --git a/drivers/firewire/core.h b/drivers/firewire/core.h
index ff6c9092200..3102b6b6343 100644
--- a/drivers/firewire/core.h
+++ b/drivers/firewire/core.h
@@ -128,6 +128,7 @@ extern const struct file_operations fw_device_ops;
 
 void fw_device_cdev_update(struct fw_device *device);
 void fw_device_cdev_remove(struct fw_device *device);
+void fw_cdev_handle_phy_packet(struct fw_card *card, struct fw_packet *p);
 
 
 /* -device */
@@ -214,6 +215,7 @@ static inline bool is_next_generation(int new_generation, int old_generation)
 
 #define TCODE_IS_READ_REQUEST(tcode)	(((tcode) & ~1) == 4)
 #define TCODE_IS_BLOCK_PACKET(tcode)	(((tcode) &  1) != 0)
+#define TCODE_IS_LINK_INTERNAL(tcode)	((tcode) == 0xe)
 #define TCODE_IS_REQUEST(tcode)		(((tcode) &  2) == 0)
 #define TCODE_IS_RESPONSE(tcode)	(((tcode) &  2) != 0)
 #define TCODE_HAS_REQUEST_DATA(tcode)	(((tcode) & 12) != 4)
diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c
index bb6a92bc9e6..08afccc6633 100644
--- a/drivers/firewire/ohci.c
+++ b/drivers/firewire/ohci.c
@@ -1759,10 +1759,9 @@ static int ohci_enable(struct fw_card *card,
 		  OHCI1394_HCControl_noByteSwapData);
 
 	reg_write(ohci, OHCI1394_SelfIDBuffer, ohci->self_id_bus);
-	reg_write(ohci, OHCI1394_LinkControlClear,
-		  OHCI1394_LinkControl_rcvPhyPkt);
 	reg_write(ohci, OHCI1394_LinkControlSet,
 		  OHCI1394_LinkControl_rcvSelfID |
+		  OHCI1394_LinkControl_rcvPhyPkt |
 		  OHCI1394_LinkControl_cycleTimerEnable |
 		  OHCI1394_LinkControl_cycleMaster);
 
diff --git a/include/linux/firewire-cdev.h b/include/linux/firewire-cdev.h
index 5bc051b9a01..b8740916079 100644
--- a/include/linux/firewire-cdev.h
+++ b/include/linux/firewire-cdev.h
@@ -35,6 +35,7 @@
 /* available since kernel version 2.6.36 */
 #define FW_CDEV_EVENT_REQUEST2			0x06
 #define FW_CDEV_EVENT_PHY_PACKET_SENT		0x07
+#define FW_CDEV_EVENT_PHY_PACKET_RECEIVED	0x08
 
 /**
  * struct fw_cdev_event_common - Common part of all fw_cdev_event_ types
@@ -285,16 +286,24 @@ struct fw_cdev_event_iso_resource {
 };
 
 /**
- * struct fw_cdev_event_phy_packet - A PHY packet was transmitted
- * @closure:	See &fw_cdev_event_common;
- *		set by %FW_CDEV_IOC_SEND_PHY_PACKET ioctl
- * @type:	%FW_CDEV_EVENT_PHY_PACKET_SENT
+ * struct fw_cdev_event_phy_packet - A PHY packet was transmitted or received
+ * @closure:	See &fw_cdev_event_common; set by %FW_CDEV_IOC_SEND_PHY_PACKET
+ *		or %FW_CDEV_IOC_RECEIVE_PHY_PACKETS ioctl
+ * @type:	%FW_CDEV_EVENT_PHY_PACKET_SENT or %..._RECEIVED
  * @rcode:	%RCODE_..., indicates success or failure of transmission
+ * @length:	Data length in bytes
+ * @data:	Incoming data
+ *
+ * If @type is %FW_CDEV_EVENT_PHY_PACKET_SENT, @length is 0 and @data empty.
+ * If @type is %FW_CDEV_EVENT_PHY_PACKET_RECEIVED, @length is 8 and @data
+ * consists of the two PHY packet quadlets, in host byte order.
  */
 struct fw_cdev_event_phy_packet {
 	__u64 closure;
 	__u32 type;
 	__u32 rcode;
+	__u32 length;
+	__u32 data[0];
 };
 
 /**
@@ -308,7 +317,9 @@ struct fw_cdev_event_phy_packet {
  * @iso_resource:  Valid if @common.type ==
  *				%FW_CDEV_EVENT_ISO_RESOURCE_ALLOCATED or
  *				%FW_CDEV_EVENT_ISO_RESOURCE_DEALLOCATED
- * @phy_packet:    Valid if @common.type == %FW_CDEV_EVENT_PHY_PACKET_SENT
+ * @phy_packet:    Valid if @common.type ==
+ *				%FW_CDEV_EVENT_PHY_PACKET_SENT or
+ *				%FW_CDEV_EVENT_PHY_PACKET_RECEIVED
  *
  * Convenience union for userspace use.  Events could be read(2) into an
  * appropriately aligned char buffer and then cast to this union for further
@@ -360,6 +371,7 @@ union fw_cdev_event {
 
 /* available since kernel version 2.6.36 */
 #define FW_CDEV_IOC_SEND_PHY_PACKET    _IOWR('#', 0x15, struct fw_cdev_send_phy_packet)
+#define FW_CDEV_IOC_RECEIVE_PHY_PACKETS _IOW('#', 0x16, struct fw_cdev_receive_phy_packets)
 
 /*
  * ABI version history
@@ -376,9 +388,9 @@ union fw_cdev_event {
  *               - shared use and auto-response for FCP registers
  *  3  (2.6.34)  - made &fw_cdev_get_cycle_timer reliable
  *               - added %FW_CDEV_IOC_GET_CYCLE_TIMER2
- *  4  (2.6.36)  - added %FW_CDEV_EVENT_REQUEST2, %FW_CDEV_EVENT_PHY_PACKET_SENT
+ *  4  (2.6.36)  - added %FW_CDEV_EVENT_REQUEST2, %FW_CDEV_EVENT_PHY_PACKET_*
  *               - implemented &fw_cdev_event_bus_reset.bm_node_id
- *               - added %FW_CDEV_IOC_SEND_PHY_PACKET
+ *               - added %FW_CDEV_IOC_SEND_PHY_PACKET, _RECEIVE_PHY_PACKETS
  */
 #define FW_CDEV_VERSION 3 /* Meaningless; don't use this macro. */
 
@@ -850,4 +862,17 @@ struct fw_cdev_send_phy_packet {
 	__u32 generation;
 };
 
+/**
+ * struct fw_cdev_receive_phy_packets - start reception of PHY packets
+ * @closure: Passed back to userspace in phy packet events
+ *
+ * This ioctl activates issuing of %FW_CDEV_EVENT_PHY_PACKET_RECEIVED due to
+ * incoming PHY packets from any node on the same bus as the device.
+ *
+ * The ioctl is only permitted on device files which represent a local node.
+ */
+struct fw_cdev_receive_phy_packets {
+	__u64 closure;
+};
+
 #endif /* _LINUX_FIREWIRE_CDEV_H */
diff --git a/include/linux/firewire.h b/include/linux/firewire.h
index 0c38b8e9772..d974aa4a24c 100644
--- a/include/linux/firewire.h
+++ b/include/linux/firewire.h
@@ -111,9 +111,10 @@ struct fw_card {
 	bool beta_repeaters_present;
 
 	int index;
-
 	struct list_head link;
 
+	struct list_head phy_receiver_list;
+
 	struct delayed_work br_work; /* bus reset job */
 	bool br_short;
 
-- 
cgit v1.2.3-70-g09d2


From cc550216ae9a2993ef3973464714dc1a39ab1f86 Mon Sep 17 00:00:00 2001
From: Stefan Richter <stefanr@s5r6.in-berlin.de>
Date: Sun, 18 Jul 2010 13:00:50 +0200
Subject: firewire: cdev: add PHY pinging

This extends the FW_CDEV_IOC_SEND_PHY_PACKET ioctl() for /dev/fw* to be
useful for ping time measurements.  One application for it would be gap
count optimization in userspace that is based on ping times rather than
hop count.  (The latter is implemented in firewire-core itself but is
not applicable to beta PHYs that act as repeater.)

Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
---
 drivers/firewire/core-cdev.c  | 9 ++++++---
 drivers/firewire/core.h       | 5 +++++
 drivers/firewire/ohci.c       | 3 +++
 include/linux/firewire-cdev.h | 5 ++++-
 4 files changed, 18 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/firewire/core-cdev.c b/drivers/firewire/core-cdev.c
index 0425dd5dfcd..31863cf8b6c 100644
--- a/drivers/firewire/core-cdev.c
+++ b/drivers/firewire/core-cdev.c
@@ -1423,9 +1423,10 @@ static void outbound_phy_packet_callback(struct fw_packet *packet,
 	/* stale generation; cancelled; on certain controllers: no ack */
 	default:		e->phy_packet.rcode = status;		break;
 	}
+	e->phy_packet.data[0] = packet->timestamp;
 
-	queue_event(e->client, &e->event,
-		    &e->phy_packet, sizeof(e->phy_packet), NULL, 0);
+	queue_event(e->client, &e->event, &e->phy_packet,
+		    sizeof(e->phy_packet) + e->phy_packet.length, NULL, 0);
 	client_put(e->client);
 }
 
@@ -1439,7 +1440,7 @@ static int ioctl_send_phy_packet(struct client *client, union ioctl_arg *arg)
 	if (!client->device->is_local)
 		return -ENOSYS;
 
-	e = kzalloc(sizeof(*e), GFP_KERNEL);
+	e = kzalloc(sizeof(*e) + 4, GFP_KERNEL);
 	if (e == NULL)
 		return -ENOMEM;
 
@@ -1453,6 +1454,8 @@ static int ioctl_send_phy_packet(struct client *client, union ioctl_arg *arg)
 	e->p.callback		= outbound_phy_packet_callback;
 	e->phy_packet.closure	= a->closure;
 	e->phy_packet.type	= FW_CDEV_EVENT_PHY_PACKET_SENT;
+	if (is_ping_packet(a->data))
+			e->phy_packet.length = 4;
 
 	card->driver->send_request(card, &e->p);
 
diff --git a/drivers/firewire/core.h b/drivers/firewire/core.h
index 3102b6b6343..28621e44b11 100644
--- a/drivers/firewire/core.h
+++ b/drivers/firewire/core.h
@@ -234,4 +234,9 @@ void fw_fill_response(struct fw_packet *response, u32 *request_header,
 void fw_send_phy_config(struct fw_card *card,
 			int node_id, int generation, int gap_count);
 
+static inline bool is_ping_packet(u32 *data)
+{
+	return (data[0] & 0xc0ffffff) == 0 && ~data[0] == data[1];
+}
+
 #endif /* _FIREWIRE_CORE_H */
diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c
index 08afccc6633..5f6bb2c5380 100644
--- a/drivers/firewire/ohci.c
+++ b/drivers/firewire/ohci.c
@@ -1068,6 +1068,9 @@ static int at_context_queue_packet(struct context *ctx,
 		header[1] = cpu_to_le32(packet->header[0]);
 		header[2] = cpu_to_le32(packet->header[1]);
 		d[0].req_count = cpu_to_le16(12);
+
+		if (is_ping_packet(packet->header))
+			d[0].control |= cpu_to_le16(DESCRIPTOR_PING);
 		break;
 
 	case 4:
diff --git a/include/linux/firewire-cdev.h b/include/linux/firewire-cdev.h
index b8740916079..da0fec7e8dc 100644
--- a/include/linux/firewire-cdev.h
+++ b/include/linux/firewire-cdev.h
@@ -294,7 +294,10 @@ struct fw_cdev_event_iso_resource {
  * @length:	Data length in bytes
  * @data:	Incoming data
  *
- * If @type is %FW_CDEV_EVENT_PHY_PACKET_SENT, @length is 0 and @data empty.
+ * If @type is %FW_CDEV_EVENT_PHY_PACKET_SENT, @length is 0 and @data empty,
+ * except in case of a ping packet:  Then, @length is 4, and @data[0] is the
+ * ping time in 49.152MHz clocks if @rcode is %RCODE_COMPLETE.
+ *
  * If @type is %FW_CDEV_EVENT_PHY_PACKET_RECEIVED, @length is 8 and @data
  * consists of the two PHY packet quadlets, in host byte order.
  */
-- 
cgit v1.2.3-70-g09d2


From 8e2b2b46ea4ca5ef790dddf78b360ed736a62d7c Mon Sep 17 00:00:00 2001
From: Stefan Richter <stefanr@s5r6.in-berlin.de>
Date: Fri, 23 Jul 2010 13:05:39 +0200
Subject: firewire: cdev: improve FW_CDEV_IOC_ALLOCATE

In both the ieee1394 stack and the firewire stack, the core treats
kernelspace drivers better than userspace drivers when it comes to
CSR address range allocation:  The former may request a register to be
placed automatically at a free spot anywhere inside a specified address
range.  The latter may only request a register at a fixed offset.

Hence, userspace drivers which do not require a fixed offset potentially
need to implement a retry loop with incremented offset in each retry
until the kernel does not fail allocation with EBUSY.  This awkward
procedure is not fundamentally necessary as the core already provides a
superior allocation API to kernelspace drivers.

Therefore change the ioctl() ABI by addition of a region_end member in
the existing struct fw_cdev_allocate.  Userspace and kernelspace APIs
work the same way now.

There is a small cost to pay by clients though:  If client source code
is required to compile with older kernel headers too, then any use of
the new member fw_cdev_allocate.region_end needs to be enclosed by
#ifdef/#endif directives.  However, any client program that seriously
wants to use address range allocations will require a kernel of cdev ABI
version >= 4 at runtime and a linux/firewire-cdev.h header of >= 4
anyway.  This is because v4 brings FW_CDEV_EVENT_REQUEST2.  The only
client program in which build-time compatibility with struct
fw_cdev_allocate as found in older kernel headers makes sense is
libraw1394.

(libraw1394 uses the older broken FW_CDEV_EVENT_REQUEST to implement a
makeshift, incorrect transaction responder that does at least work
somewhat in many simple scenarios, relying on guesswork by libraw1394
and by libraw1394 based applications.  Plus, address range allocation
and transaction responder is only one of many features that libraw1394
needs to provide, and these other features need to work with kernel and
kernel-headers as old as possible.  Any new linux/firewire-cdev.h based
client that implements a transaction responder should never attempt to
do it like libraw1394;  instead it should make a header and kernel of v4
or later a hard requirement.)

While we are at it, update the struct fw_cdev_allocate documentation to
better reflect the recent fw_cdev_event_request2 ABI addition.

Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
---
 drivers/firewire/core-cdev.c  | 12 +++++++++---
 include/linux/firewire-cdev.h | 29 ++++++++++++++++++++++++-----
 2 files changed, 33 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/drivers/firewire/core-cdev.c b/drivers/firewire/core-cdev.c
index 31863cf8b6c..f40098dec14 100644
--- a/drivers/firewire/core-cdev.c
+++ b/drivers/firewire/core-cdev.c
@@ -50,8 +50,9 @@
 /*
  * ABI version history is documented in linux/firewire-cdev.h.
  */
-#define FW_CDEV_KERNEL_VERSION		4
-#define FW_CDEV_VERSION_EVENT_REQUEST2	4
+#define FW_CDEV_KERNEL_VERSION			4
+#define FW_CDEV_VERSION_EVENT_REQUEST2		4
+#define FW_CDEV_VERSION_ALLOCATE_REGION_END	4
 
 struct client {
 	u32 version;
@@ -773,7 +774,11 @@ static int ioctl_allocate(struct client *client, union ioctl_arg *arg)
 		return -ENOMEM;
 
 	region.start = a->offset;
-	region.end   = a->offset + a->length;
+	if (client->version < FW_CDEV_VERSION_ALLOCATE_REGION_END)
+		region.end = a->offset + a->length;
+	else
+		region.end = a->region_end;
+
 	r->handler.length           = a->length;
 	r->handler.address_callback = handle_request;
 	r->handler.callback_data    = r;
@@ -785,6 +790,7 @@ static int ioctl_allocate(struct client *client, union ioctl_arg *arg)
 		kfree(r);
 		return ret;
 	}
+	a->offset = r->handler.offset;
 
 	r->resource.release = release_address_handler;
 	ret = add_client_resource(client, &r->resource, GFP_KERNEL);
diff --git a/include/linux/firewire-cdev.h b/include/linux/firewire-cdev.h
index da0fec7e8dc..14831119ff7 100644
--- a/include/linux/firewire-cdev.h
+++ b/include/linux/firewire-cdev.h
@@ -394,6 +394,7 @@ union fw_cdev_event {
  *  4  (2.6.36)  - added %FW_CDEV_EVENT_REQUEST2, %FW_CDEV_EVENT_PHY_PACKET_*
  *               - implemented &fw_cdev_event_bus_reset.bm_node_id
  *               - added %FW_CDEV_IOC_SEND_PHY_PACKET, _RECEIVE_PHY_PACKETS
+ *               - added &fw_cdev_allocate.region_end
  */
 #define FW_CDEV_VERSION 3 /* Meaningless; don't use this macro. */
 
@@ -473,17 +474,21 @@ struct fw_cdev_send_response {
 };
 
 /**
- * struct fw_cdev_allocate - Allocate a CSR address range
+ * struct fw_cdev_allocate - Allocate a CSR in an address range
  * @offset:	Start offset of the address range
  * @closure:	To be passed back to userspace in request events
- * @length:	Length of the address range, in bytes
+ * @length:	Length of the CSR, in bytes
  * @handle:	Handle to the allocation, written by the kernel
+ * @region_end:	First address above the address range (added in ABI v4, 2.6.36)
  *
  * Allocate an address range in the 48-bit address space on the local node
  * (the controller).  This allows userspace to listen for requests with an
- * offset within that address range.  When the kernel receives a request
- * within the range, an &fw_cdev_event_request event will be written back.
- * The @closure field is passed back to userspace in the response event.
+ * offset within that address range.  Every time when the kernel receives a
+ * request within the range, an &fw_cdev_event_request2 event will be emitted.
+ * (If the kernel or the client implements ABI version <= 3, an
+ * &fw_cdev_event_request will be generated instead.)
+ *
+ * The @closure field is passed back to userspace in these request events.
  * The @handle field is an out parameter, returning a handle to the allocated
  * range to be used for later deallocation of the range.
  *
@@ -491,12 +496,26 @@ struct fw_cdev_send_response {
  * is exclusive except for the FCP command and response registers.  If an
  * exclusive address region is already in use, the ioctl fails with errno set
  * to %EBUSY.
+ *
+ * If kernel and client implement ABI version >= 4, the kernel looks up a free
+ * spot of size @length inside [@offset..@region_end) and, if found, writes
+ * the start address of the new CSR back in @offset.  I.e. @offset is an
+ * in and out parameter.  If this automatic placement of a CSR in a bigger
+ * address range is not desired, the client simply needs to set @region_end
+ * = @offset + @length.
+ *
+ * If the kernel or the client implements ABI version <= 3, @region_end is
+ * ignored and effectively assumed to be @offset + @length.
+ *
+ * @region_end is only present in a kernel header >= 2.6.36.  If necessary,
+ * this can for example be tested by #ifdef FW_CDEV_EVENT_REQUEST2.
  */
 struct fw_cdev_allocate {
 	__u64 offset;
 	__u64 closure;
 	__u32 length;
 	__u32 handle;
+	__u64 region_end;	/* available since kernel version 2.6.36 */
 };
 
 /**
-- 
cgit v1.2.3-70-g09d2


From 49daf6a22622d4e1619aeaad5f9f0472bf89daff Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Fri, 23 Jul 2010 14:07:47 +0200
Subject: xt_quota: report initial quota value instead of current value to
 userspace

We should copy the initial value to userspace for iptables-save and
to allow removal of specific quota rules.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter/xt_quota.h | 2 +-
 net/netfilter/xt_quota.c           | 2 --
 2 files changed, 1 insertion(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter/xt_quota.h b/include/linux/netfilter/xt_quota.h
index 8dc89dfc136..b0d28c659ab 100644
--- a/include/linux/netfilter/xt_quota.h
+++ b/include/linux/netfilter/xt_quota.h
@@ -11,9 +11,9 @@ struct xt_quota_priv;
 struct xt_quota_info {
 	u_int32_t		flags;
 	u_int32_t		pad;
+	aligned_u64		quota;
 
 	/* Used internally by the kernel */
-	aligned_u64		quota;
 	struct xt_quota_priv	*master;
 };
 
diff --git a/net/netfilter/xt_quota.c b/net/netfilter/xt_quota.c
index 304b1fda1a0..70eb2b4984d 100644
--- a/net/netfilter/xt_quota.c
+++ b/net/netfilter/xt_quota.c
@@ -36,8 +36,6 @@ quota_mt(const struct sk_buff *skb, struct xt_action_param *par)
 		/* we do not allow even small packets from now on */
 		priv->quota = 0;
 	}
-	/* Copy quota back to matchinfo so that iptables can display it */
-	q->quota = priv->quota;
 	spin_unlock_bh(&priv->lock);
 
 	return ret;
-- 
cgit v1.2.3-70-g09d2


From 22b8f15c2f7130bb0386f548428df2ffd4e81903 Mon Sep 17 00:00:00 2001
From: Patrick Pannuto <ppannuto@codeaurora.org>
Date: Mon, 19 Jul 2010 15:09:26 -0700
Subject: timer: Added usleep[_range] timer

usleep[_range] are finer precision implementations of msleep
and are designed to be drop-in replacements for udelay where
a precise sleep / busy-wait is unnecessary. They also allow
an easy interface to specify slack when a precise (ish)
wakeup is unnecessary to help minimize wakeups

Signed-off-by: Patrick Pannuto <ppannuto@codeaurora.org>
Cc: akinobu.mita@gmail.com
Cc: sboyd@codeaurora.org
Acked-by: Arjan van de Ven <arjan@linux.intel.com>
LKML-Reference: <4C44CDD2.1070708@codeaurora.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/delay.h |  6 ++++++
 kernel/timer.c        | 22 ++++++++++++++++++++++
 2 files changed, 28 insertions(+)

(limited to 'include')

diff --git a/include/linux/delay.h b/include/linux/delay.h
index fd832c6d419..0e303d1aacd 100644
--- a/include/linux/delay.h
+++ b/include/linux/delay.h
@@ -45,6 +45,12 @@ extern unsigned long lpj_fine;
 void calibrate_delay(void);
 void msleep(unsigned int msecs);
 unsigned long msleep_interruptible(unsigned int msecs);
+void usleep_range(unsigned long min, unsigned long max);
+
+static inline void usleep(unsigned long usecs)
+{
+	usleep_range(usecs, usecs);
+}
 
 static inline void ssleep(unsigned int seconds)
 {
diff --git a/kernel/timer.c b/kernel/timer.c
index ce98685cd1c..f110f241ab6 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1755,3 +1755,25 @@ unsigned long msleep_interruptible(unsigned int msecs)
 }
 
 EXPORT_SYMBOL(msleep_interruptible);
+
+static int __sched do_usleep_range(unsigned long min, unsigned long max)
+{
+	ktime_t kmin;
+	unsigned long delta;
+
+	kmin = ktime_set(0, min * NSEC_PER_USEC);
+	delta = max - min;
+	return schedule_hrtimeout_range(&kmin, delta, HRTIMER_MODE_REL);
+}
+
+/**
+ * usleep_range - Drop in replacement for udelay where wakeup is flexible
+ * @min: Minimum time in usecs to sleep
+ * @max: Maximum time in usecs to sleep
+ */
+void usleep_range(unsigned long min, unsigned long max)
+{
+	__set_current_state(TASK_UNINTERRUPTIBLE);
+	do_usleep_range(min, max);
+}
+EXPORT_SYMBOL(usleep_range);
-- 
cgit v1.2.3-70-g09d2


From eca3930163ba8884060ce9d9ff5ef0d9b7c7b00f Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@secretlab.ca>
Date: Tue, 8 Jun 2010 07:48:21 -0600
Subject: of: Merge of_platform_bus_type with platform_bus_type

of_platform_bus was being used in the same manner as the platform_bus.
The only difference being that of_platform_bus devices are generated
from data in the device tree, and platform_bus devices are usually
statically allocated in platform code.  Having them separate causes
the problem of device drivers having to be registered twice if it
was possible for the same device to appear on either bus.

This patch removes of_platform_bus_type and registers all of_platform
bus devices and drivers on the platform bus instead.  A previous patch
made the of_device structure an alias for the platform_device structure,
and a shim is used to adapt of_platform_drivers to the platform bus.

After all of of_platform_bus drivers are converted to be normal platform
drivers, the shim code can be removed.

Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
Acked-by: David S. Miller <davem@davemloft.net>
---
 arch/microblaze/kernel/of_platform.c     | 11 ------
 arch/microblaze/kernel/setup.c           |  6 ---
 arch/powerpc/kernel/dma-swiotlb.c        |  8 ----
 arch/powerpc/kernel/of_platform.c        | 12 ------
 arch/powerpc/kernel/setup-common.c       |  7 ----
 arch/powerpc/platforms/cell/beat_iommu.c |  2 +-
 arch/powerpc/platforms/cell/iommu.c      |  2 +-
 arch/powerpc/sysdev/mv64x60_dev.c        |  7 +---
 arch/sparc/kernel/of_device_32.c         | 21 +++-------
 arch/sparc/kernel/of_device_64.c         | 21 +++-------
 arch/sparc/kernel/of_device_common.c     |  3 --
 drivers/base/platform.c                  |  6 +++
 drivers/of/device.c                      |  5 +++
 drivers/of/platform.c                    | 67 ++++++++++++++++++++++++++++++--
 include/linux/of_device.h                |  6 +++
 include/linux/of_platform.h              | 21 ++++------
 16 files changed, 102 insertions(+), 103 deletions(-)

(limited to 'include')

diff --git a/arch/microblaze/kernel/of_platform.c b/arch/microblaze/kernel/of_platform.c
index da79edf4542..fb286610433 100644
--- a/arch/microblaze/kernel/of_platform.c
+++ b/arch/microblaze/kernel/of_platform.c
@@ -26,17 +26,6 @@
 #include <linux/topology.h>
 #include <asm/atomic.h>
 
-struct bus_type of_platform_bus_type = {
-       .uevent	= of_device_uevent,
-};
-EXPORT_SYMBOL(of_platform_bus_type);
-
-static int __init of_bus_driver_init(void)
-{
-	return of_bus_type_init(&of_platform_bus_type, "of_platform");
-}
-postcore_initcall(of_bus_driver_init);
-
 /*
  * The list of OF IDs below is used for matching bus types in the
  * system whose devices are to be exposed as of_platform_devices.
diff --git a/arch/microblaze/kernel/setup.c b/arch/microblaze/kernel/setup.c
index 17c98dbcec8..f5f76884235 100644
--- a/arch/microblaze/kernel/setup.c
+++ b/arch/microblaze/kernel/setup.c
@@ -213,15 +213,9 @@ static struct notifier_block dflt_plat_bus_notifier = {
 	.priority = INT_MAX,
 };
 
-static struct notifier_block dflt_of_bus_notifier = {
-	.notifier_call = dflt_bus_notify,
-	.priority = INT_MAX,
-};
-
 static int __init setup_bus_notifier(void)
 {
 	bus_register_notifier(&platform_bus_type, &dflt_plat_bus_notifier);
-	bus_register_notifier(&of_platform_bus_type, &dflt_of_bus_notifier);
 
 	return 0;
 }
diff --git a/arch/powerpc/kernel/dma-swiotlb.c b/arch/powerpc/kernel/dma-swiotlb.c
index 02f724f3675..4295e0b94b2 100644
--- a/arch/powerpc/kernel/dma-swiotlb.c
+++ b/arch/powerpc/kernel/dma-swiotlb.c
@@ -82,17 +82,9 @@ static struct notifier_block ppc_swiotlb_plat_bus_notifier = {
 	.priority = 0,
 };
 
-static struct notifier_block ppc_swiotlb_of_bus_notifier = {
-	.notifier_call = ppc_swiotlb_bus_notify,
-	.priority = 0,
-};
-
 int __init swiotlb_setup_bus_notifier(void)
 {
 	bus_register_notifier(&platform_bus_type,
 			      &ppc_swiotlb_plat_bus_notifier);
-	bus_register_notifier(&of_platform_bus_type,
-			      &ppc_swiotlb_of_bus_notifier);
-
 	return 0;
 }
diff --git a/arch/powerpc/kernel/of_platform.c b/arch/powerpc/kernel/of_platform.c
index 4e0a2f7c1dd..d3497cd81e8 100644
--- a/arch/powerpc/kernel/of_platform.c
+++ b/arch/powerpc/kernel/of_platform.c
@@ -52,18 +52,6 @@ const struct of_device_id of_default_bus_ids[] = {
 	{},
 };
 
-struct bus_type of_platform_bus_type = {
-       .uevent	= of_device_uevent,
-};
-EXPORT_SYMBOL(of_platform_bus_type);
-
-static int __init of_bus_driver_init(void)
-{
-	return of_bus_type_init(&of_platform_bus_type, "of_platform");
-}
-
-postcore_initcall(of_bus_driver_init);
-
 static int of_dev_node_match(struct device *dev, void *data)
 {
 	return to_of_device(dev)->dev.of_node == data;
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index b7e6c7e193a..d1a5304b3dd 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -701,16 +701,9 @@ static struct notifier_block ppc_dflt_plat_bus_notifier = {
 	.priority = INT_MAX,
 };
 
-static struct notifier_block ppc_dflt_of_bus_notifier = {
-	.notifier_call = ppc_dflt_bus_notify,
-	.priority = INT_MAX,
-};
-
 static int __init setup_bus_notifier(void)
 {
 	bus_register_notifier(&platform_bus_type, &ppc_dflt_plat_bus_notifier);
-	bus_register_notifier(&of_platform_bus_type, &ppc_dflt_of_bus_notifier);
-
 	return 0;
 }
 
diff --git a/arch/powerpc/platforms/cell/beat_iommu.c b/arch/powerpc/platforms/cell/beat_iommu.c
index 39d361c5c6d..beec405eb6f 100644
--- a/arch/powerpc/platforms/cell/beat_iommu.c
+++ b/arch/powerpc/platforms/cell/beat_iommu.c
@@ -108,7 +108,7 @@ static int __init celleb_init_iommu(void)
 	celleb_init_direct_mapping();
 	set_pci_dma_ops(&dma_direct_ops);
 	ppc_md.pci_dma_dev_setup = celleb_pci_dma_dev_setup;
-	bus_register_notifier(&of_platform_bus_type, &celleb_of_bus_notifier);
+	bus_register_notifier(&platform_bus_type, &celleb_of_bus_notifier);
 
 	return 0;
 }
diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c
index 3712900471b..58b13ce3847 100644
--- a/arch/powerpc/platforms/cell/iommu.c
+++ b/arch/powerpc/platforms/cell/iommu.c
@@ -1204,7 +1204,7 @@ static int __init cell_iommu_init(void)
 	/* Register callbacks on OF platform device addition/removal
 	 * to handle linking them to the right DMA operations
 	 */
-	bus_register_notifier(&of_platform_bus_type, &cell_of_bus_notifier);
+	bus_register_notifier(&platform_bus_type, &cell_of_bus_notifier);
 
 	return 0;
 }
diff --git a/arch/powerpc/sysdev/mv64x60_dev.c b/arch/powerpc/sysdev/mv64x60_dev.c
index 31acd3b1718..1398bc45499 100644
--- a/arch/powerpc/sysdev/mv64x60_dev.c
+++ b/arch/powerpc/sysdev/mv64x60_dev.c
@@ -20,12 +20,7 @@
 
 #include <asm/prom.h>
 
-/*
- * These functions provide the necessary setup for the mv64x60 drivers.
- * These drivers are unusual in that they work on both the MIPS and PowerPC
- * architectures.  Because of that, the drivers do not support the normal
- * PowerPC of_platform_bus_type.  They support platform_bus_type instead.
- */
+/* These functions provide the necessary setup for the mv64x60 drivers. */
 
 static struct of_device_id __initdata of_mv64x60_devices[] = {
 	{ .compatible = "marvell,mv64306-devctrl", },
diff --git a/arch/sparc/kernel/of_device_32.c b/arch/sparc/kernel/of_device_32.c
index 331de91ad2b..75fc9d5cd7e 100644
--- a/arch/sparc/kernel/of_device_32.c
+++ b/arch/sparc/kernel/of_device_32.c
@@ -424,7 +424,7 @@ build_resources:
 	build_device_resources(op, parent);
 
 	op->dev.parent = parent;
-	op->dev.bus = &of_platform_bus_type;
+	op->dev.bus = &platform_bus_type;
 	if (!parent)
 		dev_set_name(&op->dev, "root");
 	else
@@ -452,30 +452,19 @@ static void __init scan_tree(struct device_node *dp, struct device *parent)
 	}
 }
 
-static void __init scan_of_devices(void)
+static int __init scan_of_devices(void)
 {
 	struct device_node *root = of_find_node_by_path("/");
 	struct of_device *parent;
 
 	parent = scan_one_device(root, NULL);
 	if (!parent)
-		return;
+		return 0;
 
 	scan_tree(root->child, &parent->dev);
+	return 0;
 }
-
-static int __init of_bus_driver_init(void)
-{
-	int err;
-
-	err = of_bus_type_init(&of_platform_bus_type, "of");
-	if (!err)
-		scan_of_devices();
-
-	return err;
-}
-
-postcore_initcall(of_bus_driver_init);
+postcore_initcall(scan_of_devices);
 
 static int __init of_debug(char *str)
 {
diff --git a/arch/sparc/kernel/of_device_64.c b/arch/sparc/kernel/of_device_64.c
index 5e8cbb942d3..9743d1d9fa0 100644
--- a/arch/sparc/kernel/of_device_64.c
+++ b/arch/sparc/kernel/of_device_64.c
@@ -667,7 +667,7 @@ static struct of_device * __init scan_one_device(struct device_node *dp,
 		op->archdata.irqs[i] = build_one_device_irq(op, parent, op->archdata.irqs[i]);
 
 	op->dev.parent = parent;
-	op->dev.bus = &of_platform_bus_type;
+	op->dev.bus = &platform_bus_type;
 	if (!parent)
 		dev_set_name(&op->dev, "root");
 	else
@@ -695,30 +695,19 @@ static void __init scan_tree(struct device_node *dp, struct device *parent)
 	}
 }
 
-static void __init scan_of_devices(void)
+static int __init scan_of_devices(void)
 {
 	struct device_node *root = of_find_node_by_path("/");
 	struct of_device *parent;
 
 	parent = scan_one_device(root, NULL);
 	if (!parent)
-		return;
+		return 0;
 
 	scan_tree(root->child, &parent->dev);
+	return 0;
 }
-
-static int __init of_bus_driver_init(void)
-{
-	int err;
-
-	err = of_bus_type_init(&of_platform_bus_type, "of");
-	if (!err)
-		scan_of_devices();
-
-	return err;
-}
-
-postcore_initcall(of_bus_driver_init);
+postcore_initcall(scan_of_devices);
 
 static int __init of_debug(char *str)
 {
diff --git a/arch/sparc/kernel/of_device_common.c b/arch/sparc/kernel/of_device_common.c
index 016c947d4ca..01f380c7995 100644
--- a/arch/sparc/kernel/of_device_common.c
+++ b/arch/sparc/kernel/of_device_common.c
@@ -64,9 +64,6 @@ void of_propagate_archdata(struct of_device *bus)
 	}
 }
 
-struct bus_type of_platform_bus_type;
-EXPORT_SYMBOL(of_platform_bus_type);
-
 static void get_cells(struct device_node *dp, int *addrc, int *sizec)
 {
 	if (addrc)
diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index fac3633c722..f699fabf403 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -636,6 +636,12 @@ static struct device_attribute platform_dev_attrs[] = {
 static int platform_uevent(struct device *dev, struct kobj_uevent_env *env)
 {
 	struct platform_device	*pdev = to_platform_device(dev);
+	int rc;
+
+	/* Some devices have extra OF data and an OF-style MODALIAS */
+	rc = of_device_uevent(dev,env);
+	if (rc != -ENODEV)
+		return rc;
 
 	add_uevent_var(env, "MODALIAS=%s%s", PLATFORM_MODULE_PREFIX,
 		(pdev->id_entry) ? pdev->id_entry->name : pdev->name);
diff --git a/drivers/of/device.c b/drivers/of/device.c
index 5282a202f5a..12a44b49351 100644
--- a/drivers/of/device.c
+++ b/drivers/of/device.c
@@ -104,6 +104,11 @@ int of_device_register(struct of_device *ofdev)
 
 	device_initialize(&ofdev->dev);
 
+	/* name and id have to be set so that the platform bus doesn't get
+	 * confused on matching */
+	ofdev->name = dev_name(&ofdev->dev);
+	ofdev->id = -1;
+
 	/* device_add will assume that this device is on the same node as
 	 * the parent. If there is no parent defined, set the node
 	 * explicitly */
diff --git a/drivers/of/platform.c b/drivers/of/platform.c
index 9d3d932bcb6..712dfd866df 100644
--- a/drivers/of/platform.c
+++ b/drivers/of/platform.c
@@ -20,6 +20,54 @@
 #include <linux/of_device.h>
 #include <linux/of_irq.h>
 #include <linux/of_platform.h>
+#include <linux/platform_device.h>
+
+static int platform_driver_probe_shim(struct platform_device *pdev)
+{
+	struct platform_driver *pdrv;
+	struct of_platform_driver *ofpdrv;
+	const struct of_device_id *match;
+
+	pdrv = container_of(pdev->dev.driver, struct platform_driver, driver);
+	ofpdrv = container_of(pdrv, struct of_platform_driver, platform_driver);
+	match = of_match_device(pdev->dev.driver->of_match_table, &pdev->dev);
+	return ofpdrv->probe(pdev, match);
+}
+
+static void platform_driver_shutdown_shim(struct platform_device *pdev)
+{
+	struct platform_driver *pdrv;
+	struct of_platform_driver *ofpdrv;
+
+	pdrv = container_of(pdev->dev.driver, struct platform_driver, driver);
+	ofpdrv = container_of(pdrv, struct of_platform_driver, platform_driver);
+	ofpdrv->shutdown(pdev);
+}
+
+/**
+ * of_register_platform_driver
+ */
+int of_register_platform_driver(struct of_platform_driver *drv)
+{
+	/* setup of_platform_driver to platform_driver adaptors */
+	drv->platform_driver.driver = drv->driver;
+	if (drv->probe)
+		drv->platform_driver.probe = platform_driver_probe_shim;
+	drv->platform_driver.remove = drv->remove;
+	if (drv->shutdown)
+		drv->platform_driver.shutdown = platform_driver_shutdown_shim;
+	drv->platform_driver.suspend = drv->suspend;
+	drv->platform_driver.resume = drv->resume;
+
+	return platform_driver_register(&drv->platform_driver);
+}
+EXPORT_SYMBOL(of_register_platform_driver);
+
+void of_unregister_platform_driver(struct of_platform_driver *drv)
+{
+	platform_driver_unregister(&drv->platform_driver);
+}
+EXPORT_SYMBOL(of_unregister_platform_driver);
 
 #if defined(CONFIG_PPC_DCR)
 #include <asm/dcr.h>
@@ -392,16 +440,29 @@ int of_bus_type_init(struct bus_type *bus, const char *name)
 
 int of_register_driver(struct of_platform_driver *drv, struct bus_type *bus)
 {
-	drv->driver.bus = bus;
+	/*
+	 * Temporary: of_platform_bus used to be distinct from the platform
+	 * bus.  It isn't anymore, and so drivers on the platform bus need
+	 * to be registered in a special way.
+	 *
+	 * After all of_platform_bus_type drivers are converted to
+	 * platform_drivers, this exception can be removed.
+	 */
+	if (bus == &platform_bus_type)
+		return of_register_platform_driver(drv);
 
 	/* register with core */
+	drv->driver.bus = bus;
 	return driver_register(&drv->driver);
 }
 EXPORT_SYMBOL(of_register_driver);
 
 void of_unregister_driver(struct of_platform_driver *drv)
 {
-	driver_unregister(&drv->driver);
+	if (drv->driver.bus == &platform_bus_type)
+		of_unregister_platform_driver(drv);
+	else
+		driver_unregister(&drv->driver);
 }
 EXPORT_SYMBOL(of_unregister_driver);
 
@@ -548,7 +609,7 @@ struct of_device *of_platform_device_create(struct device_node *np,
 	dev->archdata.dma_mask = 0xffffffffUL;
 #endif
 	dev->dev.coherent_dma_mask = DMA_BIT_MASK(32);
-	dev->dev.bus = &of_platform_bus_type;
+	dev->dev.bus = &platform_bus_type;
 
 	/* We do not fill the DMA ops for platform devices by default.
 	 * This is currently the responsibility of the platform code
diff --git a/include/linux/of_device.h b/include/linux/of_device.h
index 7d27f5a878f..8cd1fe7864e 100644
--- a/include/linux/of_device.h
+++ b/include/linux/of_device.h
@@ -65,6 +65,12 @@ static inline int of_driver_match_device(struct device *dev,
 	return 0;
 }
 
+static inline int of_device_uevent(struct device *dev,
+				   struct kobj_uevent_env *env)
+{
+	return -ENODEV;
+}
+
 #endif /* CONFIG_OF_DEVICE */
 
 #endif /* _LINUX_OF_DEVICE_H */
diff --git a/include/linux/of_platform.h b/include/linux/of_platform.h
index a51fd30176a..133ecf31a60 100644
--- a/include/linux/of_platform.h
+++ b/include/linux/of_platform.h
@@ -17,19 +17,19 @@
 #include <linux/mod_devicetable.h>
 #include <linux/pm.h>
 #include <linux/of_device.h>
+#include <linux/platform_device.h>
 
 /*
- * The of_platform_bus_type is a bus type used by drivers that do not
- * attach to a macio or similar bus but still use OF probing
- * mechanism
+ * of_platform_bus_type isn't it's own bus anymore.  It's now just an alias
+ * for the platform bus.
  */
-extern struct bus_type of_platform_bus_type;
+#define of_platform_bus_type platform_bus_type
 
 extern const struct of_device_id of_default_bus_ids[];
 
 /*
  * An of_platform_driver driver is attached to a basic of_device on
- * the "platform bus" (of_platform_bus_type).
+ * the "platform bus" (platform_bus_type).
  */
 struct of_platform_driver
 {
@@ -42,6 +42,7 @@ struct of_platform_driver
 	int	(*shutdown)(struct of_device* dev);
 
 	struct device_driver	driver;
+	struct platform_driver	platform_driver;
 };
 #define	to_of_platform_driver(drv) \
 	container_of(drv,struct of_platform_driver, driver)
@@ -51,14 +52,8 @@ extern int of_register_driver(struct of_platform_driver *drv,
 extern void of_unregister_driver(struct of_platform_driver *drv);
 
 /* Platform drivers register/unregister */
-static inline int of_register_platform_driver(struct of_platform_driver *drv)
-{
-	return of_register_driver(drv, &of_platform_bus_type);
-}
-static inline void of_unregister_platform_driver(struct of_platform_driver *drv)
-{
-	of_unregister_driver(drv);
-}
+extern int of_register_platform_driver(struct of_platform_driver *drv);
+extern void of_unregister_platform_driver(struct of_platform_driver *drv);
 
 extern struct of_device *of_device_alloc(struct device_node *np,
 					 const char *bus_id,
-- 
cgit v1.2.3-70-g09d2


From 1ab1d63a85cee2545272f63a7644e9f855cb65d0 Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@secretlab.ca>
Date: Thu, 24 Jun 2010 15:14:37 -0600
Subject: of/platform: remove all of_bus_type and of_platform_bus_type
 references

Both of_bus_type and of_platform_bus_type are just #define aliases
for the platform bus.  This patch removes all references to them and
switches to the of_register_platform_driver()/of_unregister_platform_driver()
API for registering.

Subsequent patches will convert each user of of_register_platform_driver()
into plain platform_drivers without the of_platform_driver shim.  At which
point the of_register_platform_driver()/of_unregister_platform_driver()
functions can be removed.

Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
Acked-by: David S. Miller <davem@davemloft.net>
---
 arch/microblaze/kernel/of_platform.c |  3 +--
 arch/powerpc/kernel/of_platform.c    |  3 +--
 arch/sparc/include/asm/of_platform.h |  2 --
 arch/sparc/include/asm/parport.h     |  4 +---
 arch/sparc/kernel/apc.c              |  2 +-
 arch/sparc/kernel/auxio_64.c         |  2 +-
 arch/sparc/kernel/central.c          |  4 ++--
 arch/sparc/kernel/chmc.c             |  4 ++--
 arch/sparc/kernel/of_device_common.c |  2 +-
 arch/sparc/kernel/pci_fire.c         |  2 +-
 arch/sparc/kernel/pci_psycho.c       |  2 +-
 arch/sparc/kernel/pci_sabre.c        |  2 +-
 arch/sparc/kernel/pci_schizo.c       |  2 +-
 arch/sparc/kernel/pci_sun4v.c        |  2 +-
 arch/sparc/kernel/pmc.c              |  2 +-
 arch/sparc/kernel/power.c            |  2 +-
 arch/sparc/kernel/time_32.c          |  2 +-
 arch/sparc/kernel/time_64.c          |  6 +++---
 drivers/atm/fore200e.c               |  6 +++---
 drivers/char/hw_random/n2-drv.c      |  4 ++--
 drivers/crypto/n2_core.c             | 10 +++++-----
 drivers/hwmon/ultra45_env.c          |  4 ++--
 drivers/input/misc/sparcspkr.c       | 12 +++++-------
 drivers/input/serio/i8042-sparcio.h  |  5 ++---
 drivers/mtd/maps/sun_uflash.c        |  4 ++--
 drivers/net/ibm_newemac/core.c       |  4 ++--
 drivers/net/myri_sbus.c              |  4 ++--
 drivers/net/niu.c                    |  6 +++---
 drivers/net/sunbmac.c                |  4 ++--
 drivers/net/sunhme.c                 |  4 ++--
 drivers/net/sunlance.c               |  4 ++--
 drivers/net/sunqe.c                  |  4 ++--
 drivers/parport/parport_sunbpp.c     |  4 ++--
 drivers/sbus/char/bbc_i2c.c          |  4 ++--
 drivers/sbus/char/display7seg.c      |  4 ++--
 drivers/sbus/char/envctrl.c          |  4 ++--
 drivers/sbus/char/flash.c            |  4 ++--
 drivers/sbus/char/uctrl.c            |  4 ++--
 drivers/scsi/qlogicpti.c             |  4 ++--
 drivers/scsi/sun_esp.c               |  4 ++--
 drivers/serial/sunhv.c               |  4 ++--
 drivers/serial/sunsab.c              |  4 ++--
 drivers/serial/sunsu.c               |  2 +-
 drivers/serial/sunzilog.c            |  6 +++---
 drivers/video/bw2.c                  |  4 ++--
 drivers/video/cg14.c                 |  4 ++--
 drivers/video/cg3.c                  |  4 ++--
 drivers/video/cg6.c                  |  4 ++--
 drivers/video/ffb.c                  |  4 ++--
 drivers/video/leo.c                  |  4 ++--
 drivers/video/p9100.c                |  4 ++--
 drivers/video/sunxvr1000.c           |  4 ++--
 drivers/video/tcx.c                  |  4 ++--
 drivers/watchdog/cpwd.c              |  4 ++--
 drivers/watchdog/riowd.c             |  4 ++--
 include/linux/of_platform.h          |  6 ------
 sound/sparc/amd7930.c                |  4 ++--
 sound/sparc/cs4231.c                 |  4 ++--
 sound/sparc/dbri.c                   |  4 ++--
 59 files changed, 109 insertions(+), 124 deletions(-)

(limited to 'include')

diff --git a/arch/microblaze/kernel/of_platform.c b/arch/microblaze/kernel/of_platform.c
index fb286610433..80c9c493cb2 100644
--- a/arch/microblaze/kernel/of_platform.c
+++ b/arch/microblaze/kernel/of_platform.c
@@ -57,8 +57,7 @@ struct of_device *of_find_device_by_node(struct device_node *np)
 {
 	struct device *dev;
 
-	dev = bus_find_device(&of_platform_bus_type,
-			      NULL, np, of_dev_node_match);
+	dev = bus_find_device(&platform_bus_type, NULL, np, of_dev_node_match);
 	if (dev)
 		return to_of_device(dev);
 	return NULL;
diff --git a/arch/powerpc/kernel/of_platform.c b/arch/powerpc/kernel/of_platform.c
index d3497cd81e8..b093d4b1f09 100644
--- a/arch/powerpc/kernel/of_platform.c
+++ b/arch/powerpc/kernel/of_platform.c
@@ -61,8 +61,7 @@ struct of_device *of_find_device_by_node(struct device_node *np)
 {
 	struct device *dev;
 
-	dev = bus_find_device(&of_platform_bus_type,
-			      NULL, np, of_dev_node_match);
+	dev = bus_find_device(&platform_bus_type, NULL, np, of_dev_node_match);
 	if (dev)
 		return to_of_device(dev);
 	return NULL;
diff --git a/arch/sparc/include/asm/of_platform.h b/arch/sparc/include/asm/of_platform.h
index 90da99059f8..26540ddfc51 100644
--- a/arch/sparc/include/asm/of_platform.h
+++ b/arch/sparc/include/asm/of_platform.h
@@ -13,6 +13,4 @@
  *
  */
 
-#define of_bus_type	of_platform_bus_type	/* for compatibility */
-
 #endif
diff --git a/arch/sparc/include/asm/parport.h b/arch/sparc/include/asm/parport.h
index 0c34a8792fc..4891fbce111 100644
--- a/arch/sparc/include/asm/parport.h
+++ b/arch/sparc/include/asm/parport.h
@@ -243,9 +243,7 @@ static struct of_platform_driver ecpp_driver = {
 
 static int parport_pc_find_nonpci_ports(int autoirq, int autodma)
 {
-	of_register_driver(&ecpp_driver, &of_bus_type);
-
-	return 0;
+	return of_register_platform_driver(&ecpp_driver);
 }
 
 #endif /* !(_ASM_SPARC64_PARPORT_H */
diff --git a/arch/sparc/kernel/apc.c b/arch/sparc/kernel/apc.c
index b27476caa13..c471251cd3f 100644
--- a/arch/sparc/kernel/apc.c
+++ b/arch/sparc/kernel/apc.c
@@ -184,7 +184,7 @@ static struct of_platform_driver apc_driver = {
 
 static int __init apc_init(void)
 {
-	return of_register_driver(&apc_driver, &of_bus_type);
+	return of_register_platform_driver(&apc_driver);
 }
 
 /* This driver is not critical to the boot process
diff --git a/arch/sparc/kernel/auxio_64.c b/arch/sparc/kernel/auxio_64.c
index ddc84128b3c..46ba58a8510 100644
--- a/arch/sparc/kernel/auxio_64.c
+++ b/arch/sparc/kernel/auxio_64.c
@@ -142,7 +142,7 @@ static struct of_platform_driver auxio_driver = {
 
 static int __init auxio_init(void)
 {
-	return of_register_driver(&auxio_driver, &of_platform_bus_type);
+	return of_register_platform_driver(&auxio_driver);
 }
 
 /* Must be after subsys_initcall() so that busses are probed.  Must
diff --git a/arch/sparc/kernel/central.c b/arch/sparc/kernel/central.c
index 434335f6582..b6080c39ed4 100644
--- a/arch/sparc/kernel/central.c
+++ b/arch/sparc/kernel/central.c
@@ -265,8 +265,8 @@ static struct of_platform_driver fhc_driver = {
 
 static int __init sunfire_init(void)
 {
-	(void) of_register_driver(&fhc_driver, &of_platform_bus_type);
-	(void) of_register_driver(&clock_board_driver, &of_platform_bus_type);
+	(void) of_register_platform_driver(&fhc_driver);
+	(void) of_register_platform_driver(&clock_board_driver);
 	return 0;
 }
 
diff --git a/arch/sparc/kernel/chmc.c b/arch/sparc/kernel/chmc.c
index 870cb65b3f2..04bb7df9f71 100644
--- a/arch/sparc/kernel/chmc.c
+++ b/arch/sparc/kernel/chmc.c
@@ -848,7 +848,7 @@ static int __init us3mc_init(void)
 	ret = register_dimm_printer(us3mc_dimm_printer);
 
 	if (!ret) {
-		ret = of_register_driver(&us3mc_driver, &of_bus_type);
+		ret = of_register_platform_driver(&us3mc_driver);
 		if (ret)
 			unregister_dimm_printer(us3mc_dimm_printer);
 	}
@@ -859,7 +859,7 @@ static void __exit us3mc_cleanup(void)
 {
 	if (us3mc_platform()) {
 		unregister_dimm_printer(us3mc_dimm_printer);
-		of_unregister_driver(&us3mc_driver);
+		of_unregister_platform_driver(&us3mc_driver);
 	}
 }
 
diff --git a/arch/sparc/kernel/of_device_common.c b/arch/sparc/kernel/of_device_common.c
index 01f380c7995..2a5c639e4c3 100644
--- a/arch/sparc/kernel/of_device_common.c
+++ b/arch/sparc/kernel/of_device_common.c
@@ -21,7 +21,7 @@ static int node_match(struct device *dev, void *data)
 
 struct of_device *of_find_device_by_node(struct device_node *dp)
 {
-	struct device *dev = bus_find_device(&of_platform_bus_type, NULL,
+	struct device *dev = bus_find_device(&platform_bus_type, NULL,
 					     dp, node_match);
 
 	if (dev)
diff --git a/arch/sparc/kernel/pci_fire.c b/arch/sparc/kernel/pci_fire.c
index 51cfa09e392..885f10b742e 100644
--- a/arch/sparc/kernel/pci_fire.c
+++ b/arch/sparc/kernel/pci_fire.c
@@ -518,7 +518,7 @@ static struct of_platform_driver fire_driver = {
 
 static int __init fire_init(void)
 {
-	return of_register_driver(&fire_driver, &of_bus_type);
+	return of_register_platform_driver(&fire_driver);
 }
 
 subsys_initcall(fire_init);
diff --git a/arch/sparc/kernel/pci_psycho.c b/arch/sparc/kernel/pci_psycho.c
index 93011e6e7dd..71550a7aacd 100644
--- a/arch/sparc/kernel/pci_psycho.c
+++ b/arch/sparc/kernel/pci_psycho.c
@@ -612,7 +612,7 @@ static struct of_platform_driver psycho_driver = {
 
 static int __init psycho_init(void)
 {
-	return of_register_driver(&psycho_driver, &of_bus_type);
+	return of_register_platform_driver(&psycho_driver);
 }
 
 subsys_initcall(psycho_init);
diff --git a/arch/sparc/kernel/pci_sabre.c b/arch/sparc/kernel/pci_sabre.c
index 99c6dba7d4f..2d7bf30552d 100644
--- a/arch/sparc/kernel/pci_sabre.c
+++ b/arch/sparc/kernel/pci_sabre.c
@@ -606,7 +606,7 @@ static struct of_platform_driver sabre_driver = {
 
 static int __init sabre_init(void)
 {
-	return of_register_driver(&sabre_driver, &of_bus_type);
+	return of_register_platform_driver(&sabre_driver);
 }
 
 subsys_initcall(sabre_init);
diff --git a/arch/sparc/kernel/pci_schizo.c b/arch/sparc/kernel/pci_schizo.c
index 9041dae7aac..04f29c46bfa 100644
--- a/arch/sparc/kernel/pci_schizo.c
+++ b/arch/sparc/kernel/pci_schizo.c
@@ -1501,7 +1501,7 @@ static struct of_platform_driver schizo_driver = {
 
 static int __init schizo_init(void)
 {
-	return of_register_driver(&schizo_driver, &of_bus_type);
+	return of_register_platform_driver(&schizo_driver);
 }
 
 subsys_initcall(schizo_init);
diff --git a/arch/sparc/kernel/pci_sun4v.c b/arch/sparc/kernel/pci_sun4v.c
index a24af6f7e17..18ee8b6f403 100644
--- a/arch/sparc/kernel/pci_sun4v.c
+++ b/arch/sparc/kernel/pci_sun4v.c
@@ -1019,7 +1019,7 @@ static struct of_platform_driver pci_sun4v_driver = {
 
 static int __init pci_sun4v_init(void)
 {
-	return of_register_driver(&pci_sun4v_driver, &of_bus_type);
+	return of_register_platform_driver(&pci_sun4v_driver);
 }
 
 subsys_initcall(pci_sun4v_init);
diff --git a/arch/sparc/kernel/pmc.c b/arch/sparc/kernel/pmc.c
index 9589d8b9b0c..a4c73edc897 100644
--- a/arch/sparc/kernel/pmc.c
+++ b/arch/sparc/kernel/pmc.c
@@ -89,7 +89,7 @@ static struct of_platform_driver pmc_driver = {
 
 static int __init pmc_init(void)
 {
-	return of_register_driver(&pmc_driver, &of_bus_type);
+	return of_register_platform_driver(&pmc_driver);
 }
 
 /* This driver is not critical to the boot process
diff --git a/arch/sparc/kernel/power.c b/arch/sparc/kernel/power.c
index 1cfee577f6b..abc194ed5a7 100644
--- a/arch/sparc/kernel/power.c
+++ b/arch/sparc/kernel/power.c
@@ -70,7 +70,7 @@ static struct of_platform_driver power_driver = {
 
 static int __init power_init(void)
 {
-	return of_register_driver(&power_driver, &of_platform_bus_type);
+	return of_register_platform_driver(&power_driver);
 }
 
 device_initcall(power_init);
diff --git a/arch/sparc/kernel/time_32.c b/arch/sparc/kernel/time_32.c
index e404b063be2..5dc20216952 100644
--- a/arch/sparc/kernel/time_32.c
+++ b/arch/sparc/kernel/time_32.c
@@ -189,7 +189,7 @@ static struct of_platform_driver clock_driver = {
 /* Probe for the mostek real time clock chip. */
 static int __init clock_init(void)
 {
-	return of_register_driver(&clock_driver, &of_platform_bus_type);
+	return of_register_platform_driver(&clock_driver);
 }
 /* Must be after subsys_initcall() so that busses are probed.  Must
  * be before device_initcall() because things like the RTC driver
diff --git a/arch/sparc/kernel/time_64.c b/arch/sparc/kernel/time_64.c
index 21e9fcae066..2423b336a71 100644
--- a/arch/sparc/kernel/time_64.c
+++ b/arch/sparc/kernel/time_64.c
@@ -586,9 +586,9 @@ static int __init clock_init(void)
 	if (tlb_type == hypervisor)
 		return platform_device_register(&rtc_sun4v_device);
 
-	(void) of_register_driver(&rtc_driver, &of_platform_bus_type);
-	(void) of_register_driver(&mostek_driver, &of_platform_bus_type);
-	(void) of_register_driver(&bq4802_driver, &of_platform_bus_type);
+	(void) of_register_platform_driver(&rtc_driver);
+	(void) of_register_platform_driver(&mostek_driver);
+	(void) of_register_platform_driver(&bq4802_driver);
 
 	return 0;
 }
diff --git a/drivers/atm/fore200e.c b/drivers/atm/fore200e.c
index 38df87b198d..b7385e07771 100644
--- a/drivers/atm/fore200e.c
+++ b/drivers/atm/fore200e.c
@@ -2795,7 +2795,7 @@ static int __init fore200e_module_init(void)
 	printk(FORE200E "FORE Systems 200E-series ATM driver - version " FORE200E_VERSION "\n");
 
 #ifdef CONFIG_SBUS
-	err = of_register_driver(&fore200e_sba_driver, &of_bus_type);
+	err = of_register_platform_driver(&fore200e_sba_driver);
 	if (err)
 		return err;
 #endif
@@ -2806,7 +2806,7 @@ static int __init fore200e_module_init(void)
 
 #ifdef CONFIG_SBUS
 	if (err)
-		of_unregister_driver(&fore200e_sba_driver);
+		of_unregister_platform_driver(&fore200e_sba_driver);
 #endif
 
 	return err;
@@ -2818,7 +2818,7 @@ static void __exit fore200e_module_cleanup(void)
 	pci_unregister_driver(&fore200e_pca_driver);
 #endif
 #ifdef CONFIG_SBUS
-	of_unregister_driver(&fore200e_sba_driver);
+	of_unregister_platform_driver(&fore200e_sba_driver);
 #endif
 }
 
diff --git a/drivers/char/hw_random/n2-drv.c b/drivers/char/hw_random/n2-drv.c
index 0f9cbf1aaf1..d8a4ca87987 100644
--- a/drivers/char/hw_random/n2-drv.c
+++ b/drivers/char/hw_random/n2-drv.c
@@ -762,12 +762,12 @@ static struct of_platform_driver n2rng_driver = {
 
 static int __init n2rng_init(void)
 {
-	return of_register_driver(&n2rng_driver, &of_bus_type);
+	return of_register_platform_driver(&n2rng_driver);
 }
 
 static void __exit n2rng_exit(void)
 {
-	of_unregister_driver(&n2rng_driver);
+	of_unregister_platform_driver(&n2rng_driver);
 }
 
 module_init(n2rng_init);
diff --git a/drivers/crypto/n2_core.c b/drivers/crypto/n2_core.c
index 23163fda503..34ac8ac8aae 100644
--- a/drivers/crypto/n2_core.c
+++ b/drivers/crypto/n2_core.c
@@ -2070,20 +2070,20 @@ static struct of_platform_driver n2_mau_driver = {
 
 static int __init n2_init(void)
 {
-	int err = of_register_driver(&n2_crypto_driver, &of_bus_type);
+	int err = of_register_platform_driver(&n2_crypto_driver);
 
 	if (!err) {
-		err = of_register_driver(&n2_mau_driver, &of_bus_type);
+		err = of_register_platform_driver(&n2_mau_driver);
 		if (err)
-			of_unregister_driver(&n2_crypto_driver);
+			of_unregister_platform_driver(&n2_crypto_driver);
 	}
 	return err;
 }
 
 static void __exit n2_exit(void)
 {
-	of_unregister_driver(&n2_mau_driver);
-	of_unregister_driver(&n2_crypto_driver);
+	of_unregister_platform_driver(&n2_mau_driver);
+	of_unregister_platform_driver(&n2_crypto_driver);
 }
 
 module_init(n2_init);
diff --git a/drivers/hwmon/ultra45_env.c b/drivers/hwmon/ultra45_env.c
index 5da5942cf97..89643261ccd 100644
--- a/drivers/hwmon/ultra45_env.c
+++ b/drivers/hwmon/ultra45_env.c
@@ -311,12 +311,12 @@ static struct of_platform_driver env_driver = {
 
 static int __init env_init(void)
 {
-	return of_register_driver(&env_driver, &of_bus_type);
+	return of_register_platform_driver(&env_driver);
 }
 
 static void __exit env_exit(void)
 {
-	of_unregister_driver(&env_driver);
+	of_unregister_platform_driver(&env_driver);
 }
 
 module_init(env_init);
diff --git a/drivers/input/misc/sparcspkr.c b/drivers/input/misc/sparcspkr.c
index 1dacae4b43f..f3bb92e9755 100644
--- a/drivers/input/misc/sparcspkr.c
+++ b/drivers/input/misc/sparcspkr.c
@@ -353,14 +353,12 @@ static struct of_platform_driver grover_beep_driver = {
 
 static int __init sparcspkr_init(void)
 {
-	int err = of_register_driver(&bbc_beep_driver,
-				     &of_platform_bus_type);
+	int err = of_register_platform_driver(&bbc_beep_driver);
 
 	if (!err) {
-		err = of_register_driver(&grover_beep_driver,
-					 &of_platform_bus_type);
+		err = of_register_platform_driver(&grover_beep_driver);
 		if (err)
-			of_unregister_driver(&bbc_beep_driver);
+			of_unregister_platform_driver(&bbc_beep_driver);
 	}
 
 	return err;
@@ -368,8 +366,8 @@ static int __init sparcspkr_init(void)
 
 static void __exit sparcspkr_exit(void)
 {
-	of_unregister_driver(&bbc_beep_driver);
-	of_unregister_driver(&grover_beep_driver);
+	of_unregister_platform_driver(&bbc_beep_driver);
+	of_unregister_platform_driver(&grover_beep_driver);
 }
 
 module_init(sparcspkr_init);
diff --git a/drivers/input/serio/i8042-sparcio.h b/drivers/input/serio/i8042-sparcio.h
index c7d50ff43fc..cb2a24b9474 100644
--- a/drivers/input/serio/i8042-sparcio.h
+++ b/drivers/input/serio/i8042-sparcio.h
@@ -116,8 +116,7 @@ static int __init i8042_platform_init(void)
 		if (!kbd_iobase)
 			return -ENODEV;
 	} else {
-		int err = of_register_driver(&sparc_i8042_driver,
-					     &of_bus_type);
+		int err = of_register_platform_driver(&sparc_i8042_driver);
 		if (err)
 			return err;
 
@@ -141,7 +140,7 @@ static inline void i8042_platform_exit(void)
 	struct device_node *root = of_find_node_by_path("/");
 
 	if (strcmp(root->name, "SUNW,JavaStation-1"))
-		of_unregister_driver(&sparc_i8042_driver);
+		of_unregister_platform_driver(&sparc_i8042_driver);
 }
 
 #else /* !CONFIG_PCI */
diff --git a/drivers/mtd/maps/sun_uflash.c b/drivers/mtd/maps/sun_uflash.c
index 0391c2527bd..8984236a8d0 100644
--- a/drivers/mtd/maps/sun_uflash.c
+++ b/drivers/mtd/maps/sun_uflash.c
@@ -160,12 +160,12 @@ static struct of_platform_driver uflash_driver = {
 
 static int __init uflash_init(void)
 {
-	return of_register_driver(&uflash_driver, &of_bus_type);
+	return of_register_platform_driver(&uflash_driver);
 }
 
 static void __exit uflash_exit(void)
 {
-	of_unregister_driver(&uflash_driver);
+	of_unregister_platform_driver(&uflash_driver);
 }
 
 module_init(uflash_init);
diff --git a/drivers/net/ibm_newemac/core.c b/drivers/net/ibm_newemac/core.c
index b150c102ca5..f10476fb262 100644
--- a/drivers/net/ibm_newemac/core.c
+++ b/drivers/net/ibm_newemac/core.c
@@ -2339,11 +2339,11 @@ static int __devinit emac_wait_deps(struct emac_instance *dev)
 		deps[EMAC_DEP_MDIO_IDX].phandle = dev->mdio_ph;
 	if (dev->blist && dev->blist > emac_boot_list)
 		deps[EMAC_DEP_PREV_IDX].phandle = 0xffffffffu;
-	bus_register_notifier(&of_platform_bus_type, &emac_of_bus_notifier);
+	bus_register_notifier(&platform_bus_type, &emac_of_bus_notifier);
 	wait_event_timeout(emac_probe_wait,
 			   emac_check_deps(dev, deps),
 			   EMAC_PROBE_DEP_TIMEOUT);
-	bus_unregister_notifier(&of_platform_bus_type, &emac_of_bus_notifier);
+	bus_unregister_notifier(&platform_bus_type, &emac_of_bus_notifier);
 	err = emac_check_deps(dev, deps) ? 0 : -ENODEV;
 	for (i = 0; i < EMAC_DEP_COUNT; i++) {
 		if (deps[i].node)
diff --git a/drivers/net/myri_sbus.c b/drivers/net/myri_sbus.c
index 370d3c17f24..04e552aa14e 100644
--- a/drivers/net/myri_sbus.c
+++ b/drivers/net/myri_sbus.c
@@ -1172,12 +1172,12 @@ static struct of_platform_driver myri_sbus_driver = {
 
 static int __init myri_sbus_init(void)
 {
-	return of_register_driver(&myri_sbus_driver, &of_bus_type);
+	return of_register_platform_driver(&myri_sbus_driver);
 }
 
 static void __exit myri_sbus_exit(void)
 {
-	of_unregister_driver(&myri_sbus_driver);
+	of_unregister_platform_driver(&myri_sbus_driver);
 }
 
 module_init(myri_sbus_init);
diff --git a/drivers/net/niu.c b/drivers/net/niu.c
index f6ecf6180f7..8cb2b30ecca 100644
--- a/drivers/net/niu.c
+++ b/drivers/net/niu.c
@@ -10251,14 +10251,14 @@ static int __init niu_init(void)
 	niu_debug = netif_msg_init(debug, NIU_MSG_DEFAULT);
 
 #ifdef CONFIG_SPARC64
-	err = of_register_driver(&niu_of_driver, &of_bus_type);
+	err = of_register_platform_driver(&niu_of_driver);
 #endif
 
 	if (!err) {
 		err = pci_register_driver(&niu_pci_driver);
 #ifdef CONFIG_SPARC64
 		if (err)
-			of_unregister_driver(&niu_of_driver);
+			of_unregister_platform_driver(&niu_of_driver);
 #endif
 	}
 
@@ -10269,7 +10269,7 @@ static void __exit niu_exit(void)
 {
 	pci_unregister_driver(&niu_pci_driver);
 #ifdef CONFIG_SPARC64
-	of_unregister_driver(&niu_of_driver);
+	of_unregister_platform_driver(&niu_of_driver);
 #endif
 }
 
diff --git a/drivers/net/sunbmac.c b/drivers/net/sunbmac.c
index 0b10d24de05..09c071bd6ad 100644
--- a/drivers/net/sunbmac.c
+++ b/drivers/net/sunbmac.c
@@ -1301,12 +1301,12 @@ static struct of_platform_driver bigmac_sbus_driver = {
 
 static int __init bigmac_init(void)
 {
-	return of_register_driver(&bigmac_sbus_driver, &of_bus_type);
+	return of_register_platform_driver(&bigmac_sbus_driver);
 }
 
 static void __exit bigmac_exit(void)
 {
-	of_unregister_driver(&bigmac_sbus_driver);
+	of_unregister_platform_driver(&bigmac_sbus_driver);
 }
 
 module_init(bigmac_init);
diff --git a/drivers/net/sunhme.c b/drivers/net/sunhme.c
index 0a63ebef86a..eec443f6407 100644
--- a/drivers/net/sunhme.c
+++ b/drivers/net/sunhme.c
@@ -3304,7 +3304,7 @@ static int __init happy_meal_sbus_init(void)
 {
 	int err;
 
-	err = of_register_driver(&hme_sbus_driver, &of_bus_type);
+	err = of_register_platform_driver(&hme_sbus_driver);
 	if (!err)
 		err = quattro_sbus_register_irqs();
 
@@ -3313,7 +3313,7 @@ static int __init happy_meal_sbus_init(void)
 
 static void happy_meal_sbus_exit(void)
 {
-	of_unregister_driver(&hme_sbus_driver);
+	of_unregister_platform_driver(&hme_sbus_driver);
 	quattro_sbus_free_irqs();
 
 	while (qfe_sbus_list) {
diff --git a/drivers/net/sunlance.c b/drivers/net/sunlance.c
index c6bfdad6c0c..ee364fa7563 100644
--- a/drivers/net/sunlance.c
+++ b/drivers/net/sunlance.c
@@ -1558,12 +1558,12 @@ static struct of_platform_driver sunlance_sbus_driver = {
 /* Find all the lance cards on the system and initialize them */
 static int __init sparc_lance_init(void)
 {
-	return of_register_driver(&sunlance_sbus_driver, &of_bus_type);
+	return of_register_platform_driver(&sunlance_sbus_driver);
 }
 
 static void __exit sparc_lance_exit(void)
 {
-	of_unregister_driver(&sunlance_sbus_driver);
+	of_unregister_platform_driver(&sunlance_sbus_driver);
 }
 
 module_init(sparc_lance_init);
diff --git a/drivers/net/sunqe.c b/drivers/net/sunqe.c
index 44651748708..5f84a5daded 100644
--- a/drivers/net/sunqe.c
+++ b/drivers/net/sunqe.c
@@ -988,12 +988,12 @@ static struct of_platform_driver qec_sbus_driver = {
 
 static int __init qec_init(void)
 {
-	return of_register_driver(&qec_sbus_driver, &of_bus_type);
+	return of_register_platform_driver(&qec_sbus_driver);
 }
 
 static void __exit qec_exit(void)
 {
-	of_unregister_driver(&qec_sbus_driver);
+	of_unregister_platform_driver(&qec_sbus_driver);
 
 	while (root_qec_dev) {
 		struct sunqec *next = root_qec_dev->next_module;
diff --git a/drivers/parport/parport_sunbpp.c b/drivers/parport/parport_sunbpp.c
index 3cdfe96e899..210a6441a06 100644
--- a/drivers/parport/parport_sunbpp.c
+++ b/drivers/parport/parport_sunbpp.c
@@ -393,12 +393,12 @@ static struct of_platform_driver bpp_sbus_driver = {
 
 static int __init parport_sunbpp_init(void)
 {
-	return of_register_driver(&bpp_sbus_driver, &of_bus_type);
+	return of_register_platform_driver(&bpp_sbus_driver);
 }
 
 static void __exit parport_sunbpp_exit(void)
 {
-	of_unregister_driver(&bpp_sbus_driver);
+	of_unregister_platform_driver(&bpp_sbus_driver);
 }
 
 MODULE_AUTHOR("Derrick J Brashear");
diff --git a/drivers/sbus/char/bbc_i2c.c b/drivers/sbus/char/bbc_i2c.c
index 40d7a1fc69a..3e89c313e98 100644
--- a/drivers/sbus/char/bbc_i2c.c
+++ b/drivers/sbus/char/bbc_i2c.c
@@ -425,12 +425,12 @@ static struct of_platform_driver bbc_i2c_driver = {
 
 static int __init bbc_i2c_init(void)
 {
-	return of_register_driver(&bbc_i2c_driver, &of_bus_type);
+	return of_register_platform_driver(&bbc_i2c_driver);
 }
 
 static void __exit bbc_i2c_exit(void)
 {
-	of_unregister_driver(&bbc_i2c_driver);
+	of_unregister_platform_driver(&bbc_i2c_driver);
 }
 
 module_init(bbc_i2c_init);
diff --git a/drivers/sbus/char/display7seg.c b/drivers/sbus/char/display7seg.c
index 7baf1b64403..8fd362e7fa6 100644
--- a/drivers/sbus/char/display7seg.c
+++ b/drivers/sbus/char/display7seg.c
@@ -277,12 +277,12 @@ static struct of_platform_driver d7s_driver = {
 
 static int __init d7s_init(void)
 {
-	return of_register_driver(&d7s_driver, &of_bus_type);
+	return of_register_platform_driver(&d7s_driver);
 }
 
 static void __exit d7s_exit(void)
 {
-	of_unregister_driver(&d7s_driver);
+	of_unregister_platform_driver(&d7s_driver);
 }
 
 module_init(d7s_init);
diff --git a/drivers/sbus/char/envctrl.c b/drivers/sbus/char/envctrl.c
index c8166ecf527..2c76f700265 100644
--- a/drivers/sbus/char/envctrl.c
+++ b/drivers/sbus/char/envctrl.c
@@ -1142,12 +1142,12 @@ static struct of_platform_driver envctrl_driver = {
 
 static int __init envctrl_init(void)
 {
-	return of_register_driver(&envctrl_driver, &of_bus_type);
+	return of_register_platform_driver(&envctrl_driver);
 }
 
 static void __exit envctrl_exit(void)
 {
-	of_unregister_driver(&envctrl_driver);
+	of_unregister_platform_driver(&envctrl_driver);
 }
 
 module_init(envctrl_init);
diff --git a/drivers/sbus/char/flash.c b/drivers/sbus/char/flash.c
index 368d66294d8..d79f386c348 100644
--- a/drivers/sbus/char/flash.c
+++ b/drivers/sbus/char/flash.c
@@ -218,12 +218,12 @@ static struct of_platform_driver flash_driver = {
 
 static int __init flash_init(void)
 {
-	return of_register_driver(&flash_driver, &of_bus_type);
+	return of_register_platform_driver(&flash_driver);
 }
 
 static void __exit flash_cleanup(void)
 {
-	of_unregister_driver(&flash_driver);
+	of_unregister_platform_driver(&flash_driver);
 }
 
 module_init(flash_init);
diff --git a/drivers/sbus/char/uctrl.c b/drivers/sbus/char/uctrl.c
index b8b40e9eca7..57f0612bb01 100644
--- a/drivers/sbus/char/uctrl.c
+++ b/drivers/sbus/char/uctrl.c
@@ -437,12 +437,12 @@ static struct of_platform_driver uctrl_driver = {
 
 static int __init uctrl_init(void)
 {
-	return of_register_driver(&uctrl_driver, &of_bus_type);
+	return of_register_platform_driver(&uctrl_driver);
 }
 
 static void __exit uctrl_exit(void)
 {
-	of_unregister_driver(&uctrl_driver);
+	of_unregister_platform_driver(&uctrl_driver);
 }
 
 module_init(uctrl_init);
diff --git a/drivers/scsi/qlogicpti.c b/drivers/scsi/qlogicpti.c
index 3f5b5411e6b..53d7ed0dc16 100644
--- a/drivers/scsi/qlogicpti.c
+++ b/drivers/scsi/qlogicpti.c
@@ -1467,12 +1467,12 @@ static struct of_platform_driver qpti_sbus_driver = {
 
 static int __init qpti_init(void)
 {
-	return of_register_driver(&qpti_sbus_driver, &of_bus_type);
+	return of_register_platform_driver(&qpti_sbus_driver);
 }
 
 static void __exit qpti_exit(void)
 {
-	of_unregister_driver(&qpti_sbus_driver);
+	of_unregister_platform_driver(&qpti_sbus_driver);
 }
 
 MODULE_DESCRIPTION("QlogicISP SBUS driver");
diff --git a/drivers/scsi/sun_esp.c b/drivers/scsi/sun_esp.c
index ddc221acd14..89ba6fe02f8 100644
--- a/drivers/scsi/sun_esp.c
+++ b/drivers/scsi/sun_esp.c
@@ -644,12 +644,12 @@ static struct of_platform_driver esp_sbus_driver = {
 
 static int __init sunesp_init(void)
 {
-	return of_register_driver(&esp_sbus_driver, &of_bus_type);
+	return of_register_platform_driver(&esp_sbus_driver);
 }
 
 static void __exit sunesp_exit(void)
 {
-	of_unregister_driver(&esp_sbus_driver);
+	of_unregister_platform_driver(&esp_sbus_driver);
 }
 
 MODULE_DESCRIPTION("Sun ESP SCSI driver");
diff --git a/drivers/serial/sunhv.c b/drivers/serial/sunhv.c
index 36e244867dd..a779e22d213 100644
--- a/drivers/serial/sunhv.c
+++ b/drivers/serial/sunhv.c
@@ -644,12 +644,12 @@ static int __init sunhv_init(void)
 	if (tlb_type != hypervisor)
 		return -ENODEV;
 
-	return of_register_driver(&hv_driver, &of_bus_type);
+	return of_register_platform_driver(&hv_driver);
 }
 
 static void __exit sunhv_exit(void)
 {
-	of_unregister_driver(&hv_driver);
+	of_unregister_platform_driver(&hv_driver);
 }
 
 module_init(sunhv_init);
diff --git a/drivers/serial/sunsab.c b/drivers/serial/sunsab.c
index 0a7dd6841ff..9845fb1cfb1 100644
--- a/drivers/serial/sunsab.c
+++ b/drivers/serial/sunsab.c
@@ -1130,12 +1130,12 @@ static int __init sunsab_init(void)
 		}
 	}
 
-	return of_register_driver(&sab_driver, &of_bus_type);
+	return of_register_platform_driver(&sab_driver);
 }
 
 static void __exit sunsab_exit(void)
 {
-	of_unregister_driver(&sab_driver);
+	of_unregister_platform_driver(&sab_driver);
 	if (sunsab_reg.nr) {
 		sunserial_unregister_minors(&sunsab_reg, sunsab_reg.nr);
 	}
diff --git a/drivers/serial/sunsu.c b/drivers/serial/sunsu.c
index 5deafc8180b..3cdf74822db 100644
--- a/drivers/serial/sunsu.c
+++ b/drivers/serial/sunsu.c
@@ -1586,7 +1586,7 @@ static int __init sunsu_init(void)
 			return err;
 	}
 
-	err = of_register_driver(&su_driver, &of_bus_type);
+	err = of_register_platform_driver(&su_driver);
 	if (err && num_uart)
 		sunserial_unregister_minors(&sunsu_reg, num_uart);
 
diff --git a/drivers/serial/sunzilog.c b/drivers/serial/sunzilog.c
index fcbe20d4803..d1e6bcb5954 100644
--- a/drivers/serial/sunzilog.c
+++ b/drivers/serial/sunzilog.c
@@ -1576,7 +1576,7 @@ static int __init sunzilog_init(void)
 			goto out_free_tables;
 	}
 
-	err = of_register_driver(&zs_driver, &of_bus_type);
+	err = of_register_platform_driver(&zs_driver);
 	if (err)
 		goto out_unregister_uart;
 
@@ -1604,7 +1604,7 @@ out:
 	return err;
 
 out_unregister_driver:
-	of_unregister_driver(&zs_driver);
+	of_unregister_platform_driver(&zs_driver);
 
 out_unregister_uart:
 	if (num_sunzilog) {
@@ -1619,7 +1619,7 @@ out_free_tables:
 
 static void __exit sunzilog_exit(void)
 {
-	of_unregister_driver(&zs_driver);
+	of_unregister_platform_driver(&zs_driver);
 
 	if (zilog_irq != -1) {
 		struct uart_sunzilog_port *up = sunzilog_irq_chain;
diff --git a/drivers/video/bw2.c b/drivers/video/bw2.c
index 09f1b9b462f..c7796637baf 100644
--- a/drivers/video/bw2.c
+++ b/drivers/video/bw2.c
@@ -390,12 +390,12 @@ static int __init bw2_init(void)
 	if (fb_get_options("bw2fb", NULL))
 		return -ENODEV;
 
-	return of_register_driver(&bw2_driver, &of_bus_type);
+	return of_register_platform_driver(&bw2_driver);
 }
 
 static void __exit bw2_exit(void)
 {
-	of_unregister_driver(&bw2_driver);
+	of_unregister_platform_driver(&bw2_driver);
 }
 
 module_init(bw2_init);
diff --git a/drivers/video/cg14.c b/drivers/video/cg14.c
index e5dc2241194..d09fde8beb6 100644
--- a/drivers/video/cg14.c
+++ b/drivers/video/cg14.c
@@ -610,12 +610,12 @@ static int __init cg14_init(void)
 	if (fb_get_options("cg14fb", NULL))
 		return -ENODEV;
 
-	return of_register_driver(&cg14_driver, &of_bus_type);
+	return of_register_platform_driver(&cg14_driver);
 }
 
 static void __exit cg14_exit(void)
 {
-	of_unregister_driver(&cg14_driver);
+	of_unregister_platform_driver(&cg14_driver);
 }
 
 module_init(cg14_init);
diff --git a/drivers/video/cg3.c b/drivers/video/cg3.c
index 558d73a948a..64aa29809fb 100644
--- a/drivers/video/cg3.c
+++ b/drivers/video/cg3.c
@@ -477,12 +477,12 @@ static int __init cg3_init(void)
 	if (fb_get_options("cg3fb", NULL))
 		return -ENODEV;
 
-	return of_register_driver(&cg3_driver, &of_bus_type);
+	return of_register_platform_driver(&cg3_driver);
 }
 
 static void __exit cg3_exit(void)
 {
-	of_unregister_driver(&cg3_driver);
+	of_unregister_platform_driver(&cg3_driver);
 }
 
 module_init(cg3_init);
diff --git a/drivers/video/cg6.c b/drivers/video/cg6.c
index 480d761a27a..2389a719dcc 100644
--- a/drivers/video/cg6.c
+++ b/drivers/video/cg6.c
@@ -870,12 +870,12 @@ static int __init cg6_init(void)
 	if (fb_get_options("cg6fb", NULL))
 		return -ENODEV;
 
-	return of_register_driver(&cg6_driver, &of_bus_type);
+	return of_register_platform_driver(&cg6_driver);
 }
 
 static void __exit cg6_exit(void)
 {
-	of_unregister_driver(&cg6_driver);
+	of_unregister_platform_driver(&cg6_driver);
 }
 
 module_init(cg6_init);
diff --git a/drivers/video/ffb.c b/drivers/video/ffb.c
index 95c0227f47f..f6ecfab296d 100644
--- a/drivers/video/ffb.c
+++ b/drivers/video/ffb.c
@@ -1067,12 +1067,12 @@ static int __init ffb_init(void)
 	if (fb_get_options("ffb", NULL))
 		return -ENODEV;
 
-	return of_register_driver(&ffb_driver, &of_bus_type);
+	return of_register_platform_driver(&ffb_driver);
 }
 
 static void __exit ffb_exit(void)
 {
-	of_unregister_driver(&ffb_driver);
+	of_unregister_platform_driver(&ffb_driver);
 }
 
 module_init(ffb_init);
diff --git a/drivers/video/leo.c b/drivers/video/leo.c
index 9e8bf7d5e24..ad677637ffb 100644
--- a/drivers/video/leo.c
+++ b/drivers/video/leo.c
@@ -677,12 +677,12 @@ static int __init leo_init(void)
 	if (fb_get_options("leofb", NULL))
 		return -ENODEV;
 
-	return of_register_driver(&leo_driver, &of_bus_type);
+	return of_register_platform_driver(&leo_driver);
 }
 
 static void __exit leo_exit(void)
 {
-	of_unregister_driver(&leo_driver);
+	of_unregister_platform_driver(&leo_driver);
 }
 
 module_init(leo_init);
diff --git a/drivers/video/p9100.c b/drivers/video/p9100.c
index 6552751e81a..688b055abab 100644
--- a/drivers/video/p9100.c
+++ b/drivers/video/p9100.c
@@ -367,12 +367,12 @@ static int __init p9100_init(void)
 	if (fb_get_options("p9100fb", NULL))
 		return -ENODEV;
 
-	return of_register_driver(&p9100_driver, &of_bus_type);
+	return of_register_platform_driver(&p9100_driver);
 }
 
 static void __exit p9100_exit(void)
 {
-	of_unregister_driver(&p9100_driver);
+	of_unregister_platform_driver(&p9100_driver);
 }
 
 module_init(p9100_init);
diff --git a/drivers/video/sunxvr1000.c b/drivers/video/sunxvr1000.c
index 489b44e8db8..7288934c0d4 100644
--- a/drivers/video/sunxvr1000.c
+++ b/drivers/video/sunxvr1000.c
@@ -213,12 +213,12 @@ static int __init gfb_init(void)
 	if (fb_get_options("gfb", NULL))
 		return -ENODEV;
 
-	return of_register_driver(&gfb_driver, &of_bus_type);
+	return of_register_platform_driver(&gfb_driver);
 }
 
 static void __exit gfb_exit(void)
 {
-	of_unregister_driver(&gfb_driver);
+	of_unregister_platform_driver(&gfb_driver);
 }
 
 module_init(gfb_init);
diff --git a/drivers/video/tcx.c b/drivers/video/tcx.c
index cc039b33d2d..f375e0db677 100644
--- a/drivers/video/tcx.c
+++ b/drivers/video/tcx.c
@@ -526,12 +526,12 @@ static int __init tcx_init(void)
 	if (fb_get_options("tcxfb", NULL))
 		return -ENODEV;
 
-	return of_register_driver(&tcx_driver, &of_bus_type);
+	return of_register_platform_driver(&tcx_driver);
 }
 
 static void __exit tcx_exit(void)
 {
-	of_unregister_driver(&tcx_driver);
+	of_unregister_platform_driver(&tcx_driver);
 }
 
 module_init(tcx_init);
diff --git a/drivers/watchdog/cpwd.c b/drivers/watchdog/cpwd.c
index 8c03fd71693..30a2512fd52 100644
--- a/drivers/watchdog/cpwd.c
+++ b/drivers/watchdog/cpwd.c
@@ -688,12 +688,12 @@ static struct of_platform_driver cpwd_driver = {
 
 static int __init cpwd_init(void)
 {
-	return of_register_driver(&cpwd_driver, &of_bus_type);
+	return of_register_platform_driver(&cpwd_driver);
 }
 
 static void __exit cpwd_exit(void)
 {
-	of_unregister_driver(&cpwd_driver);
+	of_unregister_platform_driver(&cpwd_driver);
 }
 
 module_init(cpwd_init);
diff --git a/drivers/watchdog/riowd.c b/drivers/watchdog/riowd.c
index 5dceeddc885..4082b4ace1f 100644
--- a/drivers/watchdog/riowd.c
+++ b/drivers/watchdog/riowd.c
@@ -250,12 +250,12 @@ static struct of_platform_driver riowd_driver = {
 
 static int __init riowd_init(void)
 {
-	return of_register_driver(&riowd_driver, &of_bus_type);
+	return of_register_platform_driver(&riowd_driver);
 }
 
 static void __exit riowd_exit(void)
 {
-	of_unregister_driver(&riowd_driver);
+	of_unregister_platform_driver(&riowd_driver);
 }
 
 module_init(riowd_init);
diff --git a/include/linux/of_platform.h b/include/linux/of_platform.h
index 133ecf31a60..429513ae8f6 100644
--- a/include/linux/of_platform.h
+++ b/include/linux/of_platform.h
@@ -19,12 +19,6 @@
 #include <linux/of_device.h>
 #include <linux/platform_device.h>
 
-/*
- * of_platform_bus_type isn't it's own bus anymore.  It's now just an alias
- * for the platform bus.
- */
-#define of_platform_bus_type platform_bus_type
-
 extern const struct of_device_id of_default_bus_ids[];
 
 /*
diff --git a/sound/sparc/amd7930.c b/sound/sparc/amd7930.c
index 43c63d44108..9eb1a4e0363 100644
--- a/sound/sparc/amd7930.c
+++ b/sound/sparc/amd7930.c
@@ -1075,7 +1075,7 @@ static struct of_platform_driver amd7930_sbus_driver = {
 
 static int __init amd7930_init(void)
 {
-	return of_register_driver(&amd7930_sbus_driver, &of_bus_type);
+	return of_register_platform_driver(&amd7930_sbus_driver);
 }
 
 static void __exit amd7930_exit(void)
@@ -1092,7 +1092,7 @@ static void __exit amd7930_exit(void)
 
 	amd7930_list = NULL;
 
-	of_unregister_driver(&amd7930_sbus_driver);
+	of_unregister_platform_driver(&amd7930_sbus_driver);
 }
 
 module_init(amd7930_init);
diff --git a/sound/sparc/cs4231.c b/sound/sparc/cs4231.c
index f7f05c24630..68570ee2c9b 100644
--- a/sound/sparc/cs4231.c
+++ b/sound/sparc/cs4231.c
@@ -2120,12 +2120,12 @@ static struct of_platform_driver cs4231_driver = {
 
 static int __init cs4231_init(void)
 {
-	return of_register_driver(&cs4231_driver, &of_bus_type);
+	return of_register_platform_driver(&cs4231_driver);
 }
 
 static void __exit cs4231_exit(void)
 {
-	of_unregister_driver(&cs4231_driver);
+	of_unregister_platform_driver(&cs4231_driver);
 }
 
 module_init(cs4231_init);
diff --git a/sound/sparc/dbri.c b/sound/sparc/dbri.c
index 491ce71c84b..c421901c48d 100644
--- a/sound/sparc/dbri.c
+++ b/sound/sparc/dbri.c
@@ -2699,12 +2699,12 @@ static struct of_platform_driver dbri_sbus_driver = {
 /* Probe for the dbri chip and then attach the driver. */
 static int __init dbri_init(void)
 {
-	return of_register_driver(&dbri_sbus_driver, &of_bus_type);
+	return of_register_platform_driver(&dbri_sbus_driver);
 }
 
 static void __exit dbri_exit(void)
 {
-	of_unregister_driver(&dbri_sbus_driver);
+	of_unregister_platform_driver(&dbri_sbus_driver);
 }
 
 module_init(dbri_init);
-- 
cgit v1.2.3-70-g09d2


From 129ac799ad627b1e08382739f9e8cd75d7477fa3 Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@secretlab.ca>
Date: Tue, 29 Jun 2010 09:26:53 -0700
Subject: of: remove asm/of_platform.h

Only thing left in it is of_instantiate_rtc() which can be moved to
asm/prom.h on PowerPC and is unused in microblaze.

Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
Acked-by: David S. Miller <davem@davemloft.net>
---
 arch/microblaze/include/asm/of_platform.h | 19 -------------------
 arch/powerpc/include/asm/of_platform.h    | 16 ----------------
 arch/powerpc/include/asm/prom.h           |  2 ++
 arch/sparc/include/asm/of_platform.h      | 16 ----------------
 include/linux/of_platform.h               |  2 --
 5 files changed, 2 insertions(+), 53 deletions(-)
 delete mode 100644 arch/microblaze/include/asm/of_platform.h
 delete mode 100644 arch/powerpc/include/asm/of_platform.h
 delete mode 100644 arch/sparc/include/asm/of_platform.h

(limited to 'include')

diff --git a/arch/microblaze/include/asm/of_platform.h b/arch/microblaze/include/asm/of_platform.h
deleted file mode 100644
index 353d8f651e3..00000000000
--- a/arch/microblaze/include/asm/of_platform.h
+++ /dev/null
@@ -1,19 +0,0 @@
-/*
- * Copyright (C) 2006 Benjamin Herrenschmidt, IBM Corp.
- *			<benh@kernel.crashing.org>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#ifndef _ASM_MICROBLAZE_OF_PLATFORM_H
-#define _ASM_MICROBLAZE_OF_PLATFORM_H
-
-/* This is just here during the transition */
-#include <linux/of_platform.h>
-
-extern void of_instantiate_rtc(void);
-
-#endif /* _ASM_MICROBLAZE_OF_PLATFORM_H */
diff --git a/arch/powerpc/include/asm/of_platform.h b/arch/powerpc/include/asm/of_platform.h
deleted file mode 100644
index d506aa61db8..00000000000
--- a/arch/powerpc/include/asm/of_platform.h
+++ /dev/null
@@ -1,16 +0,0 @@
-#ifndef _ASM_POWERPC_OF_PLATFORM_H
-#define _ASM_POWERPC_OF_PLATFORM_H
-/*
- *    Copyright (C) 2006 Benjamin Herrenschmidt, IBM Corp.
- *			 <benh@kernel.crashing.org>
- *
- *  This program is free software; you can redistribute it and/or
- *  modify it under the terms of the GNU General Public License
- *  as published by the Free Software Foundation; either version
- *  2 of the License, or (at your option) any later version.
- *
- */
-
-extern void of_instantiate_rtc(void);
-
-#endif	/* _ASM_POWERPC_OF_PLATFORM_H */
diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h
index f864722679e..da7dd634e7c 100644
--- a/arch/powerpc/include/asm/prom.h
+++ b/arch/powerpc/include/asm/prom.h
@@ -117,5 +117,7 @@ extern const void *of_get_mac_address(struct device_node *np);
 struct pci_dev;
 extern int of_irq_map_pci(struct pci_dev *pdev, struct of_irq *out_irq);
 
+extern void of_instantiate_rtc(void);
+
 #endif /* __KERNEL__ */
 #endif /* _POWERPC_PROM_H */
diff --git a/arch/sparc/include/asm/of_platform.h b/arch/sparc/include/asm/of_platform.h
deleted file mode 100644
index 26540ddfc51..00000000000
--- a/arch/sparc/include/asm/of_platform.h
+++ /dev/null
@@ -1,16 +0,0 @@
-#ifndef ___ASM_SPARC_OF_PLATFORM_H
-#define ___ASM_SPARC_OF_PLATFORM_H
-/*
- *    Copyright (C) 2006 Benjamin Herrenschmidt, IBM Corp.
- *			 <benh@kernel.crashing.org>
- *    Modified for Sparc by merging parts of asm/of_device.h
- *		by Stephen Rothwell
- *
- *  This program is free software; you can redistribute it and/or
- *  modify it under the terms of the GNU General Public License
- *  as published by the Free Software Foundation; either version
- *  2 of the License, or (at your option) any later version.
- *
- */
-
-#endif
diff --git a/include/linux/of_platform.h b/include/linux/of_platform.h
index 429513ae8f6..a79f59bf61a 100644
--- a/include/linux/of_platform.h
+++ b/include/linux/of_platform.h
@@ -52,8 +52,6 @@ extern void of_unregister_platform_driver(struct of_platform_driver *drv);
 extern struct of_device *of_device_alloc(struct device_node *np,
 					 const char *bus_id,
 					 struct device *parent);
-#include <asm/of_platform.h>
-
 extern struct of_device *of_find_device_by_node(struct device_node *np);
 
 extern int of_bus_type_init(struct bus_type *bus, const char *name);
-- 
cgit v1.2.3-70-g09d2


From 295960429675e17ec658320ebb24385727032bed Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@secretlab.ca>
Date: Tue, 29 Jun 2010 11:15:54 -0600
Subject: of: remove asm/of_device.h

It is mostly unused now.  Sparc has a few defines left in it, but they
can be moved to other headers.  Removing this header means that new
architectures adding CONFIG_OF support don't need to also add this
header file.

Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
Acked-by: David S. Miller <davem@davemloft.net>
---
 arch/microblaze/include/asm/of_device.h | 13 -------------
 arch/powerpc/include/asm/of_device.h    |  3 ---
 arch/sparc/include/asm/device.h         |  2 ++
 arch/sparc/include/asm/of_device.h      | 19 -------------------
 arch/sparc/include/asm/prom.h           |  4 ++++
 include/linux/of_device.h               |  2 +-
 6 files changed, 7 insertions(+), 36 deletions(-)
 delete mode 100644 arch/microblaze/include/asm/of_device.h
 delete mode 100644 arch/powerpc/include/asm/of_device.h
 delete mode 100644 arch/sparc/include/asm/of_device.h

(limited to 'include')

diff --git a/arch/microblaze/include/asm/of_device.h b/arch/microblaze/include/asm/of_device.h
deleted file mode 100644
index 47e8d42aee8..00000000000
--- a/arch/microblaze/include/asm/of_device.h
+++ /dev/null
@@ -1,13 +0,0 @@
-/*
- * Copyright (C) 2007-2008 Michal Simek <monstr@monstr.eu>
- *
- * based on PowerPC of_device.h
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License. See the file "COPYING" in the main directory of this archive
- * for more details.
- */
-
-#ifndef _ASM_MICROBLAZE_OF_DEVICE_H
-#define _ASM_MICROBLAZE_OF_DEVICE_H
-#endif /* _ASM_MICROBLAZE_OF_DEVICE_H */
diff --git a/arch/powerpc/include/asm/of_device.h b/arch/powerpc/include/asm/of_device.h
deleted file mode 100644
index 04f76717f82..00000000000
--- a/arch/powerpc/include/asm/of_device.h
+++ /dev/null
@@ -1,3 +0,0 @@
-#ifndef _ASM_POWERPC_OF_DEVICE_H
-#define _ASM_POWERPC_OF_DEVICE_H
-#endif /* _ASM_POWERPC_OF_DEVICE_H */
diff --git a/arch/sparc/include/asm/device.h b/arch/sparc/include/asm/device.h
index fb220e48203..daa6a8a5e9c 100644
--- a/arch/sparc/include/asm/device.h
+++ b/arch/sparc/include/asm/device.h
@@ -19,6 +19,8 @@ struct dev_archdata {
 	int			numa_node;
 };
 
+extern void of_propagate_archdata(struct platform_device *bus);
+
 struct pdev_archdata {
 	struct resource		resource[PROMREG_MAX];
 	unsigned int		irqs[PROMINTR_MAX];
diff --git a/arch/sparc/include/asm/of_device.h b/arch/sparc/include/asm/of_device.h
deleted file mode 100644
index 22b9828fe69..00000000000
--- a/arch/sparc/include/asm/of_device.h
+++ /dev/null
@@ -1,19 +0,0 @@
-#ifndef _ASM_SPARC_OF_DEVICE_H
-#define _ASM_SPARC_OF_DEVICE_H
-#ifdef __KERNEL__
-
-#include <linux/device.h>
-#include <linux/of.h>
-#include <linux/mod_devicetable.h>
-#include <asm/openprom.h>
-
-extern void __iomem *of_ioremap(struct resource *res, unsigned long offset, unsigned long size, char *name);
-extern void of_iounmap(struct resource *res, void __iomem *base, unsigned long size);
-
-extern void of_propagate_archdata(struct of_device *bus);
-
-/* This is just here during the transition */
-#include <linux/of_platform.h>
-
-#endif /* __KERNEL__ */
-#endif /* _ASM_SPARC_OF_DEVICE_H */
diff --git a/arch/sparc/include/asm/prom.h b/arch/sparc/include/asm/prom.h
index ac695742df8..d35df5ace18 100644
--- a/arch/sparc/include/asm/prom.h
+++ b/arch/sparc/include/asm/prom.h
@@ -51,6 +51,10 @@ extern void prom_build_devicetree(void);
 extern void of_populate_present_mask(void);
 extern void of_fill_in_cpu_data(void);
 
+struct resource;
+extern void __iomem *of_ioremap(struct resource *res, unsigned long offset, unsigned long size, char *name);
+extern void of_iounmap(struct resource *res, void __iomem *base, unsigned long size);
+
 /* These routines are here to provide compatibility with how powerpc
  * handles IRQ mapping for OF device nodes.  We precompute and permanently
  * register them in the of_device objects, whereas powerpc computes them
diff --git a/include/linux/of_device.h b/include/linux/of_device.h
index 8cd1fe7864e..0f191199455 100644
--- a/include/linux/of_device.h
+++ b/include/linux/of_device.h
@@ -16,12 +16,12 @@
  */
 #define of_device platform_device
 #include <linux/platform_device.h>
+#include <linux/of_platform.h> /* temporary until merge */
 
 #ifdef CONFIG_OF_DEVICE
 #include <linux/device.h>
 #include <linux/of.h>
 #include <linux/mod_devicetable.h>
-#include <asm/of_device.h>
 
 #define	to_of_device(d) container_of(d, struct of_device, dev)
 
-- 
cgit v1.2.3-70-g09d2


From 94a0cb1fc61ab7a0d47d268a7764374efeb2160b Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@secretlab.ca>
Date: Thu, 22 Jul 2010 13:59:23 -0600
Subject: of/device: Replace of_device with platform_device in includes and
 core code

of_device is currently just an #define alias to platform_device until it
gets removed entirely.  This patch removes references to it from the
include directories and the core drivers/of code.

Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
Acked-by: David S. Miller <davem@davemloft.net>
---
 arch/powerpc/include/asm/macio.h   |  2 +-
 arch/sparc/include/asm/floppy_64.h |  6 +++---
 arch/sparc/include/asm/parport.h   |  4 ++--
 drivers/of/device.c                | 22 +++++++++++-----------
 drivers/of/platform.c              | 24 ++++++++++++------------
 include/linux/of_device.h          | 10 +++++-----
 include/linux/of_platform.h        | 16 ++++++++--------
 7 files changed, 42 insertions(+), 42 deletions(-)

(limited to 'include')

diff --git a/arch/powerpc/include/asm/macio.h b/arch/powerpc/include/asm/macio.h
index 675e159b5ef..7ab82c825a0 100644
--- a/arch/powerpc/include/asm/macio.h
+++ b/arch/powerpc/include/asm/macio.h
@@ -38,7 +38,7 @@ struct macio_dev
 {
 	struct macio_bus	*bus;		/* macio bus this device is on */
 	struct macio_dev	*media_bay;	/* Device is part of a media bay */
-	struct of_device	ofdev;
+	struct platform_device	ofdev;
 	struct device_dma_parameters dma_parms; /* ide needs that */
 	int			n_resources;
 	struct resource		resource[MACIO_DEV_COUNT_RESOURCES];
diff --git a/arch/sparc/include/asm/floppy_64.h b/arch/sparc/include/asm/floppy_64.h
index 4f5bde638f7..6597ce874d7 100644
--- a/arch/sparc/include/asm/floppy_64.h
+++ b/arch/sparc/include/asm/floppy_64.h
@@ -43,7 +43,7 @@ struct sun_flpy_controller {
 /* You'll only ever find one controller on an Ultra anyways. */
 static struct sun_flpy_controller *sun_fdc = (struct sun_flpy_controller *)-1;
 unsigned long fdc_status;
-static struct of_device *floppy_op = NULL;
+static struct platform_device *floppy_op = NULL;
 
 struct sun_floppy_ops {
 	unsigned char	(*fd_inb) (unsigned long port);
@@ -548,7 +548,7 @@ static unsigned long __init sun_floppy_init(void)
 {
 	static int initialized = 0;
 	struct device_node *dp;
-	struct of_device *op;
+	struct platform_device *op;
 	const char *prop;
 	char state[128];
 
@@ -661,7 +661,7 @@ static unsigned long __init sun_floppy_init(void)
 		config = 0;
 		for (dp = ebus_dp->child; dp; dp = dp->sibling) {
 			if (!strcmp(dp->name, "ecpp")) {
-				struct of_device *ecpp_op;
+				struct platform_device *ecpp_op;
 
 				ecpp_op = of_find_device_by_node(dp);
 				if (ecpp_op)
diff --git a/arch/sparc/include/asm/parport.h b/arch/sparc/include/asm/parport.h
index 4891fbce111..4f7afa01b2a 100644
--- a/arch/sparc/include/asm/parport.h
+++ b/arch/sparc/include/asm/parport.h
@@ -103,7 +103,7 @@ static inline unsigned int get_dma_residue(unsigned int dmanr)
 	return ebus_dma_residue(&sparc_ebus_dmas[dmanr].info);
 }
 
-static int __devinit ecpp_probe(struct of_device *op, const struct of_device_id *match)
+static int __devinit ecpp_probe(struct platform_device *op, const struct of_device_id *match)
 {
 	unsigned long base = op->resource[0].start;
 	unsigned long config = op->resource[1].start;
@@ -192,7 +192,7 @@ out_err:
 	return err;
 }
 
-static int __devexit ecpp_remove(struct of_device *op)
+static int __devexit ecpp_remove(struct platform_device *op)
 {
 	struct parport *p = dev_get_drvdata(&op->dev);
 	int slot = p->dma;
diff --git a/drivers/of/device.c b/drivers/of/device.c
index 12a44b49351..0d8a0644f54 100644
--- a/drivers/of/device.c
+++ b/drivers/of/device.c
@@ -26,7 +26,7 @@ const struct of_device_id *of_match_device(const struct of_device_id *matches,
 }
 EXPORT_SYMBOL(of_match_device);
 
-struct of_device *of_dev_get(struct of_device *dev)
+struct platform_device *of_dev_get(struct platform_device *dev)
 {
 	struct device *tmp;
 
@@ -34,13 +34,13 @@ struct of_device *of_dev_get(struct of_device *dev)
 		return NULL;
 	tmp = get_device(&dev->dev);
 	if (tmp)
-		return to_of_device(tmp);
+		return to_platform_device(tmp);
 	else
 		return NULL;
 }
 EXPORT_SYMBOL(of_dev_get);
 
-void of_dev_put(struct of_device *dev)
+void of_dev_put(struct platform_device *dev)
 {
 	if (dev)
 		put_device(&dev->dev);
@@ -50,18 +50,18 @@ EXPORT_SYMBOL(of_dev_put);
 static ssize_t devspec_show(struct device *dev,
 				struct device_attribute *attr, char *buf)
 {
-	struct of_device *ofdev;
+	struct platform_device *ofdev;
 
-	ofdev = to_of_device(dev);
+	ofdev = to_platform_device(dev);
 	return sprintf(buf, "%s\n", ofdev->dev.of_node->full_name);
 }
 
 static ssize_t name_show(struct device *dev,
 				struct device_attribute *attr, char *buf)
 {
-	struct of_device *ofdev;
+	struct platform_device *ofdev;
 
-	ofdev = to_of_device(dev);
+	ofdev = to_platform_device(dev);
 	return sprintf(buf, "%s\n", ofdev->dev.of_node->name);
 }
 
@@ -90,15 +90,15 @@ struct device_attribute of_platform_device_attrs[] = {
  */
 void of_release_dev(struct device *dev)
 {
-	struct of_device *ofdev;
+	struct platform_device *ofdev;
 
-	ofdev = to_of_device(dev);
+	ofdev = to_platform_device(dev);
 	of_node_put(ofdev->dev.of_node);
 	kfree(ofdev);
 }
 EXPORT_SYMBOL(of_release_dev);
 
-int of_device_register(struct of_device *ofdev)
+int of_device_register(struct platform_device *ofdev)
 {
 	BUG_ON(ofdev->dev.of_node == NULL);
 
@@ -119,7 +119,7 @@ int of_device_register(struct of_device *ofdev)
 }
 EXPORT_SYMBOL(of_device_register);
 
-void of_device_unregister(struct of_device *ofdev)
+void of_device_unregister(struct platform_device *ofdev)
 {
 	device_unregister(&ofdev->dev);
 }
diff --git a/drivers/of/platform.c b/drivers/of/platform.c
index f3f1ec81ef4..9f3840cdcde 100644
--- a/drivers/of/platform.c
+++ b/drivers/of/platform.c
@@ -94,11 +94,11 @@ static int of_platform_device_probe(struct device *dev)
 {
 	int error = -ENODEV;
 	struct of_platform_driver *drv;
-	struct of_device *of_dev;
+	struct platform_device *of_dev;
 	const struct of_device_id *match;
 
 	drv = to_of_platform_driver(dev->driver);
-	of_dev = to_of_device(dev);
+	of_dev = to_platform_device(dev);
 
 	if (!drv->probe)
 		return error;
@@ -116,7 +116,7 @@ static int of_platform_device_probe(struct device *dev)
 
 static int of_platform_device_remove(struct device *dev)
 {
-	struct of_device *of_dev = to_of_device(dev);
+	struct platform_device *of_dev = to_platform_device(dev);
 	struct of_platform_driver *drv = to_of_platform_driver(dev->driver);
 
 	if (dev->driver && drv->remove)
@@ -126,7 +126,7 @@ static int of_platform_device_remove(struct device *dev)
 
 static void of_platform_device_shutdown(struct device *dev)
 {
-	struct of_device *of_dev = to_of_device(dev);
+	struct platform_device *of_dev = to_platform_device(dev);
 	struct of_platform_driver *drv = to_of_platform_driver(dev->driver);
 
 	if (dev->driver && drv->shutdown)
@@ -137,7 +137,7 @@ static void of_platform_device_shutdown(struct device *dev)
 
 static int of_platform_legacy_suspend(struct device *dev, pm_message_t mesg)
 {
-	struct of_device *of_dev = to_of_device(dev);
+	struct platform_device *of_dev = to_platform_device(dev);
 	struct of_platform_driver *drv = to_of_platform_driver(dev->driver);
 	int ret = 0;
 
@@ -148,7 +148,7 @@ static int of_platform_legacy_suspend(struct device *dev, pm_message_t mesg)
 
 static int of_platform_legacy_resume(struct device *dev)
 {
-	struct of_device *of_dev = to_of_device(dev);
+	struct platform_device *of_dev = to_platform_device(dev);
 	struct of_platform_driver *drv = to_of_platform_driver(dev->driver);
 	int ret = 0;
 
@@ -543,11 +543,11 @@ static void of_device_make_bus_id(struct device *dev)
  * @bus_id: Name to assign to the device.  May be null to use default name.
  * @parent: Parent device.
  */
-struct of_device *of_device_alloc(struct device_node *np,
+struct platform_device *of_device_alloc(struct device_node *np,
 				  const char *bus_id,
 				  struct device *parent)
 {
-	struct of_device *dev;
+	struct platform_device *dev;
 	int rc, i, num_reg = 0, num_irq = 0;
 	struct resource *res, temp_res;
 
@@ -600,11 +600,11 @@ EXPORT_SYMBOL(of_device_alloc);
  * @bus_id: name to assign device
  * @parent: Linux device model parent device.
  */
-struct of_device *of_platform_device_create(struct device_node *np,
+struct platform_device *of_platform_device_create(struct device_node *np,
 					    const char *bus_id,
 					    struct device *parent)
 {
-	struct of_device *dev;
+	struct platform_device *dev;
 
 	dev = of_device_alloc(np, bus_id, parent);
 	if (!dev)
@@ -642,7 +642,7 @@ static int of_platform_bus_create(const struct device_node *bus,
 				  struct device *parent)
 {
 	struct device_node *child;
-	struct of_device *dev;
+	struct platform_device *dev;
 	int rc = 0;
 
 	for_each_child_of_node(bus, child) {
@@ -678,7 +678,7 @@ int of_platform_bus_probe(struct device_node *root,
 			  struct device *parent)
 {
 	struct device_node *child;
-	struct of_device *dev;
+	struct platform_device *dev;
 	int rc = 0;
 
 	if (matches == NULL)
diff --git a/include/linux/of_device.h b/include/linux/of_device.h
index 0f191199455..e11a0be7893 100644
--- a/include/linux/of_device.h
+++ b/include/linux/of_device.h
@@ -39,14 +39,14 @@ static inline int of_driver_match_device(const struct device *dev,
 	return of_match_device(drv->of_match_table, dev) != NULL;
 }
 
-extern struct of_device *of_dev_get(struct of_device *dev);
-extern void of_dev_put(struct of_device *dev);
+extern struct platform_device *of_dev_get(struct platform_device *dev);
+extern void of_dev_put(struct platform_device *dev);
 
-extern int of_device_register(struct of_device *ofdev);
-extern void of_device_unregister(struct of_device *ofdev);
+extern int of_device_register(struct platform_device *ofdev);
+extern void of_device_unregister(struct platform_device *ofdev);
 extern void of_release_dev(struct device *dev);
 
-static inline void of_device_free(struct of_device *dev)
+static inline void of_device_free(struct platform_device *dev)
 {
 	of_release_dev(&dev->dev);
 }
diff --git a/include/linux/of_platform.h b/include/linux/of_platform.h
index a79f59bf61a..b24c5a5b042 100644
--- a/include/linux/of_platform.h
+++ b/include/linux/of_platform.h
@@ -27,13 +27,13 @@ extern const struct of_device_id of_default_bus_ids[];
  */
 struct of_platform_driver
 {
-	int	(*probe)(struct of_device* dev,
+	int	(*probe)(struct platform_device* dev,
 			 const struct of_device_id *match);
-	int	(*remove)(struct of_device* dev);
+	int	(*remove)(struct platform_device* dev);
 
-	int	(*suspend)(struct of_device* dev, pm_message_t state);
-	int	(*resume)(struct of_device* dev);
-	int	(*shutdown)(struct of_device* dev);
+	int	(*suspend)(struct platform_device* dev, pm_message_t state);
+	int	(*resume)(struct platform_device* dev);
+	int	(*shutdown)(struct platform_device* dev);
 
 	struct device_driver	driver;
 	struct platform_driver	platform_driver;
@@ -49,16 +49,16 @@ extern void of_unregister_driver(struct of_platform_driver *drv);
 extern int of_register_platform_driver(struct of_platform_driver *drv);
 extern void of_unregister_platform_driver(struct of_platform_driver *drv);
 
-extern struct of_device *of_device_alloc(struct device_node *np,
+extern struct platform_device *of_device_alloc(struct device_node *np,
 					 const char *bus_id,
 					 struct device *parent);
-extern struct of_device *of_find_device_by_node(struct device_node *np);
+extern struct platform_device *of_find_device_by_node(struct device_node *np);
 
 extern int of_bus_type_init(struct bus_type *bus, const char *name);
 
 #if !defined(CONFIG_SPARC) /* SPARC has its own device registration method */
 /* Platform devices and busses creation */
-extern struct of_device *of_platform_device_create(struct device_node *np,
+extern struct platform_device *of_platform_device_create(struct device_node *np,
 						   const char *bus_id,
 						   struct device *parent);
 
-- 
cgit v1.2.3-70-g09d2


From c0dd394ca5e78649b7013c3ce2d6338af9f228f0 Mon Sep 17 00:00:00 2001
From: Jonas Bonn <jonas@southpole.se>
Date: Fri, 23 Jul 2010 20:19:24 +0200
Subject: of: remove of_default_bus_ids

This list used was by only two platforms with all other platforms defining an
own list of valid bus id's to pass to of_platform_bus_probe.  This patch:

i)   copies the default list to the two platforms that depended on it (powerpc)
ii)  remove the usage of of_default_bus_ids in of_platform_bus_probe
iii) removes the definition of the list from all architectures that defined it

Passing a NULL 'matches' parameter to of_platform_bus_probe is still valid; the
function returns no error in that case as the NULL value is equivalent to an
empty list.

Signed-off-by: Jonas Bonn <jonas@southpole.se>
[grant.likely@secretlab.ca: added __initdata annotations, warn on and return error on missing match table, and fix whitespace errors]
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 arch/microblaze/kernel/Makefile           |  2 +-
 arch/microblaze/kernel/of_platform.c      | 49 -------------------------------
 arch/powerpc/kernel/of_platform.c         | 24 ---------------
 arch/powerpc/platforms/cell/qpace_setup.c | 14 ++++++++-
 arch/powerpc/platforms/cell/setup.c       | 14 ++++++++-
 drivers/of/platform.c                     |  4 +--
 include/linux/of_platform.h               |  2 --
 7 files changed, 28 insertions(+), 81 deletions(-)
 delete mode 100644 arch/microblaze/kernel/of_platform.c

(limited to 'include')

diff --git a/arch/microblaze/kernel/Makefile b/arch/microblaze/kernel/Makefile
index 727e2cbff9c..7fcc5f7b5a4 100644
--- a/arch/microblaze/kernel/Makefile
+++ b/arch/microblaze/kernel/Makefile
@@ -16,7 +16,7 @@ extra-y := head.o vmlinux.lds
 
 obj-y += dma.o exceptions.o \
 	hw_exception_handler.o init_task.o intc.o irq.o \
-	of_platform.o process.o prom.o prom_parse.o ptrace.o \
+	process.o prom.o prom_parse.o ptrace.o \
 	setup.o signal.o sys_microblaze.o timer.o traps.o reset.o
 
 obj-y += cpu/
diff --git a/arch/microblaze/kernel/of_platform.c b/arch/microblaze/kernel/of_platform.c
deleted file mode 100644
index 6cffadbe2fc..00000000000
--- a/arch/microblaze/kernel/of_platform.c
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- *    Copyright (C) 2006 Benjamin Herrenschmidt, IBM Corp.
- *			 <benh@kernel.crashing.org>
- *    and		 Arnd Bergmann, IBM Corp.
- *
- *  This program is free software; you can redistribute it and/or
- *  modify it under the terms of the GNU General Public License
- *  as published by the Free Software Foundation; either version
- *  2 of the License, or (at your option) any later version.
- *
- */
-
-#undef DEBUG
-
-#include <linux/string.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/mod_devicetable.h>
-#include <linux/pci.h>
-#include <linux/of.h>
-#include <linux/of_device.h>
-#include <linux/of_platform.h>
-
-#include <linux/errno.h>
-#include <linux/topology.h>
-#include <asm/atomic.h>
-
-/*
- * The list of OF IDs below is used for matching bus types in the
- * system whose devices are to be exposed as of_platform_devices.
- *
- * This is the default list valid for most platforms. This file provides
- * functions who can take an explicit list if necessary though
- *
- * The search is always performed recursively looking for children of
- * the provided device_node and recursively if such a children matches
- * a bus type in the list
- */
-
-const struct of_device_id of_default_bus_ids[] = {
-	{ .type = "soc", },
-	{ .compatible = "soc", },
-	{ .type = "plb5", },
-	{ .type = "plb4", },
-	{ .type = "opb", },
-	{ .type = "simple", },
-	{},
-};
diff --git a/arch/powerpc/kernel/of_platform.c b/arch/powerpc/kernel/of_platform.c
index 760a7af7fdb..b2c363ef38a 100644
--- a/arch/powerpc/kernel/of_platform.c
+++ b/arch/powerpc/kernel/of_platform.c
@@ -28,30 +28,6 @@
 #include <asm/ppc-pci.h>
 #include <asm/atomic.h>
 
-/*
- * The list of OF IDs below is used for matching bus types in the
- * system whose devices are to be exposed as of_platform_devices.
- *
- * This is the default list valid for most platforms. This file provides
- * functions who can take an explicit list if necessary though
- *
- * The search is always performed recursively looking for children of
- * the provided device_node and recursively if such a children matches
- * a bus type in the list
- */
-
-const struct of_device_id of_default_bus_ids[] = {
-	{ .type = "soc", },
-	{ .compatible = "soc", },
-	{ .type = "spider", },
-	{ .type = "axon", },
-	{ .type = "plb5", },
-	{ .type = "plb4", },
-	{ .type = "opb", },
-	{ .type = "ebc", },
-	{},
-};
-
 #ifdef CONFIG_PPC_OF_PLATFORM_PCI
 
 /* The probing of PCI controllers from of_platform is currently
diff --git a/arch/powerpc/platforms/cell/qpace_setup.c b/arch/powerpc/platforms/cell/qpace_setup.c
index c5ce02e84c8..1b574904275 100644
--- a/arch/powerpc/platforms/cell/qpace_setup.c
+++ b/arch/powerpc/platforms/cell/qpace_setup.c
@@ -61,12 +61,24 @@ static void qpace_progress(char *s, unsigned short hex)
 	printk("*** %04x : %s\n", hex, s ? s : "");
 }
 
+static const struct of_device_id qpace_bus_ids[] __initdata = {
+	{ .type = "soc", },
+	{ .compatible = "soc", },
+	{ .type = "spider", },
+	{ .type = "axon", },
+	{ .type = "plb5", },
+	{ .type = "plb4", },
+	{ .type = "opb", },
+	{ .type = "ebc", },
+	{},
+};
+
 static int __init qpace_publish_devices(void)
 {
 	int node;
 
 	/* Publish OF platform devices for southbridge IOs */
-	of_platform_bus_probe(NULL, NULL, NULL);
+	of_platform_bus_probe(NULL, qpace_bus_ids, NULL);
 
 	/* There is no device for the MIC memory controller, thus we create
 	 * a platform device for it to attach the EDAC driver to.
diff --git a/arch/powerpc/platforms/cell/setup.c b/arch/powerpc/platforms/cell/setup.c
index 50385db586b..691995761b3 100644
--- a/arch/powerpc/platforms/cell/setup.c
+++ b/arch/powerpc/platforms/cell/setup.c
@@ -141,6 +141,18 @@ static int __devinit cell_setup_phb(struct pci_controller *phb)
 	return 0;
 }
 
+static const struct of_device_id cell_bus_ids[] __initdata = {
+	{ .type = "soc", },
+	{ .compatible = "soc", },
+	{ .type = "spider", },
+	{ .type = "axon", },
+	{ .type = "plb5", },
+	{ .type = "plb4", },
+	{ .type = "opb", },
+	{ .type = "ebc", },
+	{},
+};
+
 static int __init cell_publish_devices(void)
 {
 	struct device_node *root = of_find_node_by_path("/");
@@ -148,7 +160,7 @@ static int __init cell_publish_devices(void)
 	int node;
 
 	/* Publish OF platform devices for southbridge IOs */
-	of_platform_bus_probe(NULL, NULL, NULL);
+	of_platform_bus_probe(NULL, cell_bus_ids, NULL);
 
 	/* On spider based blades, we need to manually create the OF
 	 * platform devices for the PCI host bridges
diff --git a/drivers/of/platform.c b/drivers/of/platform.c
index f79f40b516c..033a224a9fd 100644
--- a/drivers/of/platform.c
+++ b/drivers/of/platform.c
@@ -701,9 +701,7 @@ int of_platform_bus_probe(struct device_node *root,
 	struct platform_device *dev;
 	int rc = 0;
 
-	if (matches == NULL)
-		matches = of_default_bus_ids;
-	if (matches == OF_NO_DEEP_PROBE)
+	if (WARN_ON(!matches || matches == OF_NO_DEEP_PROBE))
 		return -EINVAL;
 	if (root == NULL)
 		root = of_find_node_by_path("/");
diff --git a/include/linux/of_platform.h b/include/linux/of_platform.h
index b24c5a5b042..4e6d989c06d 100644
--- a/include/linux/of_platform.h
+++ b/include/linux/of_platform.h
@@ -19,8 +19,6 @@
 #include <linux/of_device.h>
 #include <linux/platform_device.h>
 
-extern const struct of_device_id of_default_bus_ids[];
-
 /*
  * An of_platform_driver driver is attached to a basic of_device on
  * the "platform bus" (platform_bus_type).
-- 
cgit v1.2.3-70-g09d2


From 72ad5d77fb981963edae15eee8196c80238f5ed0 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Fri, 23 Jul 2010 22:59:09 +0200
Subject: ACPI / Sleep: Allow the NVS saving to be skipped during suspend to
 RAM

Commit 2a6b69765ad794389f2fc3e14a0afa1a995221c2
(ACPI: Store NVS state even when entering suspend to RAM) caused the
ACPI suspend code save the NVS area during suspend and restore it
during resume unconditionally, although it is known that some systems
need to use acpi_sleep=s4_nonvs for hibernation to work.  To allow
the affected systems to avoid saving and restoring the NVS area
during suspend to RAM and resume, introduce kernel command line
option acpi_sleep=nonvs and make acpi_sleep=s4_nonvs work as its
alias temporarily (add acpi_sleep=s4_nonvs to the feature removal
file).

Addresses https://bugzilla.kernel.org/show_bug.cgi?id=16396 .

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Reported-and-tested-by: tomas m <tmezzadra@gmail.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 Documentation/feature-removal-schedule.txt |  7 ++++++
 Documentation/kernel-parameters.txt        |  4 ++--
 arch/x86/kernel/acpi/sleep.c               |  9 ++++++--
 drivers/acpi/sleep.c                       | 35 +++++++++++++++---------------
 include/linux/acpi.h                       |  2 +-
 5 files changed, 34 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index c268783bc4e..1571c0c83db 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -647,3 +647,10 @@ Who:	Stefan Richter <stefanr@s5r6.in-berlin.de>
 
 ----------------------------
 
+What:	The acpi_sleep=s4_nonvs command line option
+When:	2.6.37
+Files:	arch/x86/kernel/acpi/sleep.c
+Why:	superseded by acpi_sleep=nonvs
+Who:	Rafael J. Wysocki <rjw@sisk.pl>
+
+----------------------------
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 4ddb58df081..2b2407d9a6d 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -254,8 +254,8 @@ and is between 256 and 4096 characters. It is defined in the file
 			control method, with respect to putting devices into
 			low power states, to be enforced (the ACPI 2.0 ordering
 			of _PTS is used by default).
-			s4_nonvs prevents the kernel from saving/restoring the
-			ACPI NVS memory during hibernation.
+			nonvs prevents the kernel from saving/restoring the
+			ACPI NVS memory during suspend/hibernation and resume.
 			sci_force_enable causes the kernel to set SCI_EN directly
 			on resume from S1/S3 (which is against the ACPI spec,
 			but some broken systems don't work without it).
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index 82e508677b9..fcc3c61fdec 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -157,9 +157,14 @@ static int __init acpi_sleep_setup(char *str)
 #ifdef CONFIG_HIBERNATION
 		if (strncmp(str, "s4_nohwsig", 10) == 0)
 			acpi_no_s4_hw_signature();
-		if (strncmp(str, "s4_nonvs", 8) == 0)
-			acpi_s4_no_nvs();
+		if (strncmp(str, "s4_nonvs", 8) == 0) {
+			pr_warning("ACPI: acpi_sleep=s4_nonvs is deprecated, "
+					"please use acpi_sleep=nonvs instead");
+			acpi_nvs_nosave();
+		}
 #endif
+		if (strncmp(str, "nonvs", 5) == 0)
+			acpi_nvs_nosave();
 		if (strncmp(str, "old_ordering", 12) == 0)
 			acpi_old_suspend_ordering();
 		str = strchr(str, ',');
diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c
index 5b7c52e4a00..2862c781b37 100644
--- a/drivers/acpi/sleep.c
+++ b/drivers/acpi/sleep.c
@@ -81,6 +81,20 @@ static int acpi_sleep_prepare(u32 acpi_state)
 #ifdef CONFIG_ACPI_SLEEP
 static u32 acpi_target_sleep_state = ACPI_STATE_S0;
 
+/*
+ * The ACPI specification wants us to save NVS memory regions during hibernation
+ * and to restore them during the subsequent resume.  Windows does that also for
+ * suspend to RAM.  However, it is known that this mechanism does not work on
+ * all machines, so we allow the user to disable it with the help of the
+ * 'acpi_sleep=nonvs' kernel command line option.
+ */
+static bool nvs_nosave;
+
+void __init acpi_nvs_nosave(void)
+{
+	nvs_nosave = true;
+}
+
 /*
  * ACPI 1.0 wants us to execute _PTS before suspending devices, so we allow the
  * user to request that behavior by using the 'acpi_old_suspend_ordering'
@@ -197,8 +211,7 @@ static int acpi_suspend_begin(suspend_state_t pm_state)
 	u32 acpi_state = acpi_suspend_states[pm_state];
 	int error = 0;
 
-	error = suspend_nvs_alloc();
-
+	error = nvs_nosave ? 0 : suspend_nvs_alloc();
 	if (error)
 		return error;
 
@@ -388,20 +401,6 @@ static struct dmi_system_id __initdata acpisleep_dmi_table[] = {
 #endif /* CONFIG_SUSPEND */
 
 #ifdef CONFIG_HIBERNATION
-/*
- * The ACPI specification wants us to save NVS memory regions during hibernation
- * and to restore them during the subsequent resume.  However, it is not certain
- * if this mechanism is going to work on all machines, so we allow the user to
- * disable this mechanism using the 'acpi_sleep=s4_nonvs' kernel command line
- * option.
- */
-static bool s4_no_nvs;
-
-void __init acpi_s4_no_nvs(void)
-{
-	s4_no_nvs = true;
-}
-
 static unsigned long s4_hardware_signature;
 static struct acpi_table_facs *facs;
 static bool nosigcheck;
@@ -415,7 +414,7 @@ static int acpi_hibernation_begin(void)
 {
 	int error;
 
-	error = s4_no_nvs ? 0 : suspend_nvs_alloc();
+	error = nvs_nosave ? 0 : suspend_nvs_alloc();
 	if (!error) {
 		acpi_target_sleep_state = ACPI_STATE_S4;
 		acpi_sleep_tts_switch(acpi_target_sleep_state);
@@ -510,7 +509,7 @@ static int acpi_hibernation_begin_old(void)
 	error = acpi_sleep_prepare(ACPI_STATE_S4);
 
 	if (!error) {
-		if (!s4_no_nvs)
+		if (!nvs_nosave)
 			error = suspend_nvs_alloc();
 		if (!error)
 			acpi_target_sleep_state = ACPI_STATE_S4;
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 224a38c960d..ccf94dc5acd 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -253,7 +253,7 @@ int acpi_resources_are_enforced(void);
 #ifdef CONFIG_PM_SLEEP
 void __init acpi_no_s4_hw_signature(void);
 void __init acpi_old_suspend_ordering(void);
-void __init acpi_s4_no_nvs(void);
+void __init acpi_nvs_nosave(void);
 #endif /* CONFIG_PM_SLEEP */
 
 struct acpi_osc_context {
-- 
cgit v1.2.3-70-g09d2


From c1f79426e2df5ef96fe3e76de6c7606d15bf390b Mon Sep 17 00:00:00 2001
From: Stefan Assmann <sassmann@redhat.com>
Date: Thu, 22 Jul 2010 02:50:21 +0000
Subject: sysfs: add attribute to indicate hw address assignment type

Add addr_assign_type to struct net_device and expose it via sysfs.
This new attribute has the purpose of giving user-space the ability to
distinguish between different assignment types of MAC addresses.

For example user-space can treat NICs with randomly generated MAC
addresses differently than NICs that have permanent (locally assigned)
MAC addresses.
For the former udev could write a persistent net rule by matching the
device path instead of the MAC address.
There's also the case of devices that 'steal' MAC addresses from slave
devices. In which it is also be beneficial for user-space to be aware
of the fact.

This patch also introduces a helper function to assist adoption of
drivers that generate MAC addresses randomly.

Signed-off-by: Stefan Assmann <sassmann@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/etherdevice.h | 14 ++++++++++++++
 include/linux/netdevice.h   |  6 ++++++
 net/core/net-sysfs.c        |  2 ++
 3 files changed, 22 insertions(+)

(limited to 'include')

diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index 3d7a6687d24..848480bc2bf 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -126,6 +126,20 @@ static inline void random_ether_addr(u8 *addr)
 	addr [0] |= 0x02;	/* set local assignment bit (IEEE802) */
 }
 
+/**
+ * dev_hw_addr_random - Create random MAC and set device flag
+ * @dev: pointer to net_device structure
+ * @addr: Pointer to a six-byte array containing the Ethernet address
+ *
+ * Generate random MAC to be used by a device and set addr_assign_type
+ * so the state can be read by sysfs and be used by udev.
+ */
+static inline void dev_hw_addr_random(struct net_device *dev, u8 *hwaddr)
+{
+	dev->addr_assign_type |= NET_ADDR_RANDOM;
+	random_ether_addr(hwaddr);
+}
+
 /**
  * compare_ether_addr - Compare two Ethernet addresses
  * @addr1: Pointer to a six-byte array containing the Ethernet address
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index b6262898ece..1bca6171b1a 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -66,6 +66,11 @@ struct wireless_dev;
 #define HAVE_FREE_NETDEV		/* free_netdev() */
 #define HAVE_NETDEV_PRIV		/* netdev_priv() */
 
+/* hardware address assignment types */
+#define NET_ADDR_PERM		0	/* address is permanent (default) */
+#define NET_ADDR_RANDOM		1	/* address is generated randomly */
+#define NET_ADDR_STOLEN		2	/* address is stolen from other device */
+
 /* Backlog congestion levels */
 #define NET_RX_SUCCESS		0	/* keep 'em coming, baby */
 #define NET_RX_DROP		1	/* packet dropped */
@@ -919,6 +924,7 @@ struct net_device {
 
 	/* Interface address info. */
 	unsigned char		perm_addr[MAX_ADDR_LEN]; /* permanent hw address */
+	unsigned char		addr_assign_type; /* hw address assignment type */
 	unsigned char		addr_len;	/* hardware address length	*/
 	unsigned short          dev_id;		/* for shared network cards */
 
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index d2b596537d4..af4dfbadf2a 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -95,6 +95,7 @@ static ssize_t netdev_store(struct device *dev, struct device_attribute *attr,
 }
 
 NETDEVICE_SHOW(dev_id, fmt_hex);
+NETDEVICE_SHOW(addr_assign_type, fmt_dec);
 NETDEVICE_SHOW(addr_len, fmt_dec);
 NETDEVICE_SHOW(iflink, fmt_dec);
 NETDEVICE_SHOW(ifindex, fmt_dec);
@@ -295,6 +296,7 @@ static ssize_t show_ifalias(struct device *dev,
 }
 
 static struct device_attribute net_class_attributes[] = {
+	__ATTR(addr_assign_type, S_IRUGO, show_addr_assign_type, NULL),
 	__ATTR(addr_len, S_IRUGO, show_addr_len, NULL),
 	__ATTR(dev_id, S_IRUGO, show_dev_id, NULL),
 	__ATTR(ifalias, S_IRUGO | S_IWUSR, show_ifalias, store_ifalias),
-- 
cgit v1.2.3-70-g09d2


From 3b87956ea645fb4de7e59c7d0aa94de04be72615 Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Thu, 22 Jul 2010 18:45:04 +0000
Subject: net sched: fix race in mirred device removal

This fixes hang when target device of mirred packet classifier
action is removed.

If a mirror or redirection action is configured to cause packets
to go to another device, the classifier holds a ref count, but was assuming
the adminstrator cleaned up all redirections before removing. The fix
is to add a notifier and cleanup during unregister.

The new list is implicitly protected by RTNL mutex because
it is held during filter add/delete as well as notifier.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Acked-by: Jamal Hadi Salim <hadi@cyberus.ca>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tc_act/tc_mirred.h |  1 +
 net/sched/act_mirred.c         | 43 +++++++++++++++++++++++++++++++++++++++---
 2 files changed, 41 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/tc_act/tc_mirred.h b/include/net/tc_act/tc_mirred.h
index ceac661cdfd..cfe2943690f 100644
--- a/include/net/tc_act/tc_mirred.h
+++ b/include/net/tc_act/tc_mirred.h
@@ -9,6 +9,7 @@ struct tcf_mirred {
 	int			tcfm_ifindex;
 	int			tcfm_ok_push;
 	struct net_device	*tcfm_dev;
+	struct list_head	tcfm_list;
 };
 #define to_mirred(pc) \
 	container_of(pc, struct tcf_mirred, common)
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index c0b6863e3b8..1980b71c283 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -33,6 +33,7 @@
 static struct tcf_common *tcf_mirred_ht[MIRRED_TAB_MASK + 1];
 static u32 mirred_idx_gen;
 static DEFINE_RWLOCK(mirred_lock);
+static LIST_HEAD(mirred_list);
 
 static struct tcf_hashinfo mirred_hash_info = {
 	.htab	=	tcf_mirred_ht,
@@ -47,7 +48,9 @@ static inline int tcf_mirred_release(struct tcf_mirred *m, int bind)
 			m->tcf_bindcnt--;
 		m->tcf_refcnt--;
 		if(!m->tcf_bindcnt && m->tcf_refcnt <= 0) {
-			dev_put(m->tcfm_dev);
+			list_del(&m->tcfm_list);
+			if (m->tcfm_dev)
+				dev_put(m->tcfm_dev);
 			tcf_hash_destroy(&m->common, &mirred_hash_info);
 			return 1;
 		}
@@ -134,8 +137,10 @@ static int tcf_mirred_init(struct nlattr *nla, struct nlattr *est,
 		m->tcfm_ok_push = ok_push;
 	}
 	spin_unlock_bh(&m->tcf_lock);
-	if (ret == ACT_P_CREATED)
+	if (ret == ACT_P_CREATED) {
+		list_add(&m->tcfm_list, &mirred_list);
 		tcf_hash_insert(pc, &mirred_hash_info);
+	}
 
 	return ret;
 }
@@ -162,9 +167,14 @@ static int tcf_mirred(struct sk_buff *skb, struct tc_action *a,
 	m->tcf_tm.lastuse = jiffies;
 
 	dev = m->tcfm_dev;
+	if (!dev) {
+		printk_once(KERN_NOTICE "tc mirred: target device is gone\n");
+		goto out;
+	}
+
 	if (!(dev->flags & IFF_UP)) {
 		if (net_ratelimit())
-			pr_notice("tc mirred to Houston: device %s is gone!\n",
+			pr_notice("tc mirred to Houston: device %s is down\n",
 				  dev->name);
 		goto out;
 	}
@@ -232,6 +242,28 @@ nla_put_failure:
 	return -1;
 }
 
+static int mirred_device_event(struct notifier_block *unused,
+			       unsigned long event, void *ptr)
+{
+	struct net_device *dev = ptr;
+	struct tcf_mirred *m;
+
+	if (event == NETDEV_UNREGISTER)
+		list_for_each_entry(m, &mirred_list, tcfm_list) {
+			if (m->tcfm_dev == dev) {
+				dev_put(dev);
+				m->tcfm_dev = NULL;
+			}
+		}
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block mirred_device_notifier = {
+	.notifier_call = mirred_device_event,
+};
+
+
 static struct tc_action_ops act_mirred_ops = {
 	.kind		=	"mirred",
 	.hinfo		=	&mirred_hash_info,
@@ -252,12 +284,17 @@ MODULE_LICENSE("GPL");
 
 static int __init mirred_init_module(void)
 {
+	int err = register_netdevice_notifier(&mirred_device_notifier);
+	if (err)
+		return err;
+
 	pr_info("Mirror/redirect action on\n");
 	return tcf_register_action(&act_mirred_ops);
 }
 
 static void __exit mirred_cleanup_module(void)
 {
+	unregister_netdevice_notifier(&mirred_device_notifier);
 	tcf_unregister_action(&act_mirred_ops);
 }
 
-- 
cgit v1.2.3-70-g09d2


From fed66381d65a35198639f564365e61a7f256bf79 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 22 Jul 2010 19:09:08 +0000
Subject: net: pskb_expand_head() optimization

Move frags[] at the end of struct skb_shared_info, and make
pskb_expand_head() copy only the used part of it instead of whole array.

This should avoid kmemcheck warnings and speedup pskb_expand_head() as
well, avoiding a lot of cache misses.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 3 ++-
 net/core/skbuff.c      | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index f5aa87e1e0c..d89876b806a 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -202,10 +202,11 @@ struct skb_shared_info {
 	 */
 	atomic_t	dataref;
 
-	skb_frag_t	frags[MAX_SKB_FRAGS];
 	/* Intermediate layers must ensure that destructor_arg
 	 * remains valid until skb destructor */
 	void *		destructor_arg;
+	/* must be last field, see pskb_expand_head() */
+	skb_frag_t	frags[MAX_SKB_FRAGS];
 };
 
 /* We divide dataref into two halves.  The higher 16 bits hold references
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 76d33ca5f03..7da58a25ad9 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -817,7 +817,7 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
 	memcpy(data + nhead, skb->head, skb->tail - skb->head);
 #endif
 	memcpy(data + size, skb_end_pointer(skb),
-	       sizeof(struct skb_shared_info));
+	       offsetof(struct skb_shared_info, frags[skb_shinfo(skb)->nr_frags]));
 
 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
 		get_page(skb_shinfo(skb)->frags[i].page);
-- 
cgit v1.2.3-70-g09d2


From 6cda9fa2575ec0869fe77b0bdf295c0e51868cab Mon Sep 17 00:00:00 2001
From: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
Date: Sun, 25 Jul 2010 20:39:03 +0900
Subject: nilfs2: avoid rec_len overflow with 64KB block size

With 64KB blocksize, a directory entry can have size 64KB which does
not fit into 16 bits we have for entry length.  So this patch stores
0xffff instead and converts value when read from / written to disk.

Nilfs derives its directory implementation from ext2 filesystem, and
this draws upon the corresponding change on ext2.

Signed-off-by: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
---
 fs/nilfs2/dir.c           | 26 ++++++++++++++------------
 include/linux/nilfs2_fs.h | 18 ++++++++++++++++++
 2 files changed, 32 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c
index d8d183e6d09..b60277b4446 100644
--- a/fs/nilfs2/dir.c
+++ b/fs/nilfs2/dir.c
@@ -141,7 +141,7 @@ static void nilfs_check_page(struct page *page)
 	}
 	for (offs = 0; offs <= limit - NILFS_DIR_REC_LEN(1); offs += rec_len) {
 		p = (struct nilfs_dir_entry *)(kaddr + offs);
-		rec_len = le16_to_cpu(p->rec_len);
+		rec_len = nilfs_rec_len_from_disk(p->rec_len);
 
 		if (rec_len < NILFS_DIR_REC_LEN(1))
 			goto Eshort;
@@ -235,7 +235,8 @@ nilfs_match(int len, const unsigned char *name, struct nilfs_dir_entry *de)
  */
 static struct nilfs_dir_entry *nilfs_next_entry(struct nilfs_dir_entry *p)
 {
-	return (struct nilfs_dir_entry *)((char *)p + le16_to_cpu(p->rec_len));
+	return (struct nilfs_dir_entry *)((char *)p +
+					  nilfs_rec_len_from_disk(p->rec_len));
 }
 
 static unsigned char
@@ -326,7 +327,7 @@ static int nilfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 					goto success;
 				}
 			}
-			filp->f_pos += le16_to_cpu(de->rec_len);
+			filp->f_pos += nilfs_rec_len_from_disk(de->rec_len);
 		}
 		nilfs_put_page(page);
 	}
@@ -441,7 +442,7 @@ void nilfs_set_link(struct inode *dir, struct nilfs_dir_entry *de,
 		    struct page *page, struct inode *inode)
 {
 	unsigned from = (char *) de - (char *) page_address(page);
-	unsigned to = from + le16_to_cpu(de->rec_len);
+	unsigned to = from + nilfs_rec_len_from_disk(de->rec_len);
 	struct address_space *mapping = page->mapping;
 	int err;
 
@@ -497,7 +498,7 @@ int nilfs_add_link(struct dentry *dentry, struct inode *inode)
 				/* We hit i_size */
 				name_len = 0;
 				rec_len = chunk_size;
-				de->rec_len = cpu_to_le16(chunk_size);
+				de->rec_len = nilfs_rec_len_to_disk(chunk_size);
 				de->inode = 0;
 				goto got_it;
 			}
@@ -511,7 +512,7 @@ int nilfs_add_link(struct dentry *dentry, struct inode *inode)
 			if (nilfs_match(namelen, name, de))
 				goto out_unlock;
 			name_len = NILFS_DIR_REC_LEN(de->name_len);
-			rec_len = le16_to_cpu(de->rec_len);
+			rec_len = nilfs_rec_len_from_disk(de->rec_len);
 			if (!de->inode && rec_len >= reclen)
 				goto got_it;
 			if (rec_len >= name_len + reclen)
@@ -534,8 +535,8 @@ got_it:
 		struct nilfs_dir_entry *de1;
 
 		de1 = (struct nilfs_dir_entry *)((char *)de + name_len);
-		de1->rec_len = cpu_to_le16(rec_len - name_len);
-		de->rec_len = cpu_to_le16(name_len);
+		de1->rec_len = nilfs_rec_len_to_disk(rec_len - name_len);
+		de->rec_len = nilfs_rec_len_to_disk(name_len);
 		de = de1;
 	}
 	de->name_len = namelen;
@@ -566,7 +567,8 @@ int nilfs_delete_entry(struct nilfs_dir_entry *dir, struct page *page)
 	struct inode *inode = mapping->host;
 	char *kaddr = page_address(page);
 	unsigned from = ((char *)dir - kaddr) & ~(nilfs_chunk_size(inode) - 1);
-	unsigned to = ((char *)dir - kaddr) + le16_to_cpu(dir->rec_len);
+	unsigned to = ((char *)dir - kaddr) +
+		nilfs_rec_len_from_disk(dir->rec_len);
 	struct nilfs_dir_entry *pde = NULL;
 	struct nilfs_dir_entry *de = (struct nilfs_dir_entry *)(kaddr + from);
 	int err;
@@ -587,7 +589,7 @@ int nilfs_delete_entry(struct nilfs_dir_entry *dir, struct page *page)
 	err = nilfs_prepare_chunk(page, mapping, from, to);
 	BUG_ON(err);
 	if (pde)
-		pde->rec_len = cpu_to_le16(to - from);
+		pde->rec_len = nilfs_rec_len_to_disk(to - from);
 	dir->inode = 0;
 	nilfs_commit_chunk(page, mapping, from, to);
 	inode->i_ctime = inode->i_mtime = CURRENT_TIME;
@@ -621,14 +623,14 @@ int nilfs_make_empty(struct inode *inode, struct inode *parent)
 	memset(kaddr, 0, chunk_size);
 	de = (struct nilfs_dir_entry *)kaddr;
 	de->name_len = 1;
-	de->rec_len = cpu_to_le16(NILFS_DIR_REC_LEN(1));
+	de->rec_len = nilfs_rec_len_to_disk(NILFS_DIR_REC_LEN(1));
 	memcpy(de->name, ".\0\0", 4);
 	de->inode = cpu_to_le64(inode->i_ino);
 	nilfs_set_de_type(de, inode);
 
 	de = (struct nilfs_dir_entry *)(kaddr + NILFS_DIR_REC_LEN(1));
 	de->name_len = 2;
-	de->rec_len = cpu_to_le16(chunk_size - NILFS_DIR_REC_LEN(1));
+	de->rec_len = nilfs_rec_len_to_disk(chunk_size - NILFS_DIR_REC_LEN(1));
 	de->inode = cpu_to_le64(parent->i_ino);
 	memcpy(de->name, "..\0", 4);
 	nilfs_set_de_type(de, inode);
diff --git a/include/linux/nilfs2_fs.h b/include/linux/nilfs2_fs.h
index 7dd4cd49449..970828a5ffc 100644
--- a/include/linux/nilfs2_fs.h
+++ b/include/linux/nilfs2_fs.h
@@ -326,7 +326,25 @@ enum {
 #define NILFS_DIR_ROUND			(NILFS_DIR_PAD - 1)
 #define NILFS_DIR_REC_LEN(name_len)	(((name_len) + 12 + NILFS_DIR_ROUND) & \
 					~NILFS_DIR_ROUND)
+#define NILFS_MAX_REC_LEN		((1<<16)-1)
 
+static inline unsigned nilfs_rec_len_from_disk(__le16 dlen)
+{
+	unsigned len = le16_to_cpu(dlen);
+
+	if (len == NILFS_MAX_REC_LEN)
+		return 1 << 16;
+	return len;
+}
+
+static inline __le16 nilfs_rec_len_to_disk(unsigned len)
+{
+	if (len == (1 << 16))
+		return cpu_to_le16(NILFS_MAX_REC_LEN);
+	else if (len > (1 << 16))
+		BUG();
+	return cpu_to_le16(len);
+}
 
 /**
  * struct nilfs_finfo - file information
-- 
cgit v1.2.3-70-g09d2


From 89c0fd014d34d409a7b196667c2b9a4813b6c968 Mon Sep 17 00:00:00 2001
From: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
Date: Sun, 25 Jul 2010 22:44:53 +0900
Subject: nilfs2: reject filesystem with unsupported block size

This inserts sanity check that refuses to mount a filesystem with
unsupported block size.

Previously, kernel code of nilfs was looking only limitation of
devices though mkfs.nilfs2 limits the range of block sizes; there was
no check that prevents rec_len overflow with larger block sizes.

With this change, block sizes larger than 64KB or smaller than 1KB
will get rejected explicitly by kernel.

Signed-off-by: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
---
 fs/nilfs2/the_nilfs.c     | 9 ++++++++-
 include/linux/nilfs2_fs.h | 6 ++++++
 2 files changed, 14 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index da67b560f3c..37de1f062d8 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -671,7 +671,7 @@ int init_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, char *data)
 		goto out;
 	}
 
-	blocksize = sb_min_blocksize(sb, BLOCK_SIZE);
+	blocksize = sb_min_blocksize(sb, NILFS_MIN_BLOCK_SIZE);
 	if (!blocksize) {
 		printk(KERN_ERR "NILFS: unable to set blocksize\n");
 		err = -EINVAL;
@@ -690,6 +690,13 @@ int init_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, char *data)
 		goto failed_sbh;
 
 	blocksize = BLOCK_SIZE << le32_to_cpu(sbp->s_log_block_size);
+	if (blocksize < NILFS_MIN_BLOCK_SIZE ||
+	    blocksize > NILFS_MAX_BLOCK_SIZE) {
+		printk(KERN_ERR "NILFS: couldn't mount because of unsupported "
+		       "filesystem blocksize %d\n", blocksize);
+		err = -EINVAL;
+		goto failed_sbh;
+	}
 	if (sb->s_blocksize != blocksize) {
 		int hw_blocksize = bdev_logical_block_size(sb->s_bdev);
 
diff --git a/include/linux/nilfs2_fs.h b/include/linux/nilfs2_fs.h
index 970828a5ffc..f5487b6f91e 100644
--- a/include/linux/nilfs2_fs.h
+++ b/include/linux/nilfs2_fs.h
@@ -286,6 +286,12 @@ struct nilfs_super_block {
 
 #define NILFS_NAME_LEN 255
 
+/*
+ * Block size limitations
+ */
+#define NILFS_MIN_BLOCK_SIZE		1024
+#define NILFS_MAX_BLOCK_SIZE		65536
+
 /*
  * The new version of the directory entry.  Since V0 structures are
  * stored in intel byte order, and the name_len field could never be
-- 
cgit v1.2.3-70-g09d2


From ba9f507a1bea5ca2fc4a19e227c56b60fd5faca3 Mon Sep 17 00:00:00 2001
From: Xiaolong Chen <xiaolong.chen@gmail.com>
Date: Mon, 26 Jul 2010 01:01:11 -0700
Subject: Input: adp5588-keys - export unused GPIO pins

This patch allows exporting GPIO pins not used by the keypad itself
to be accessible from elsewhere.

Signed-off-by: Xiaolong Chen <xiao-long.chen@motorola.com>
Signed-off-by: Yuanbo Ye <yuan-bo.ye@motorola.com>
Signed-off-by: Tao Hu <taohu@motorola.com>
Acked-by: Michael Hennerich <michael.hennerich@analog.com>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/keyboard/adp5588-keys.c | 209 +++++++++++++++++++++++++++++++++-
 include/linux/i2c/adp5588.h           |   1 +
 2 files changed, 208 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/input/keyboard/adp5588-keys.c b/drivers/input/keyboard/adp5588-keys.c
index 9096db73c3c..c39ec93c0c5 100644
--- a/drivers/input/keyboard/adp5588-keys.c
+++ b/drivers/input/keyboard/adp5588-keys.c
@@ -19,6 +19,7 @@
 #include <linux/platform_device.h>
 #include <linux/input.h>
 #include <linux/i2c.h>
+#include <linux/gpio.h>
 #include <linux/slab.h>
 
 #include <linux/i2c/adp5588.h>
@@ -54,6 +55,10 @@
 
 #define KEYP_MAX_EVENT		10
 
+#define MAXGPIO			18
+#define ADP_BANK(offs)		((offs) >> 3)
+#define ADP_BIT(offs)		(1u << ((offs) & 0x7))
+
 /*
  * Early pre 4.0 Silicon required to delay readout by at least 25ms,
  * since the Event Counter Register updated 25ms after the interrupt
@@ -69,6 +74,14 @@ struct adp5588_kpad {
 	unsigned short keycode[ADP5588_KEYMAPSIZE];
 	const struct adp5588_gpi_map *gpimap;
 	unsigned short gpimapsize;
+#ifdef CONFIG_GPIOLIB
+	unsigned char gpiomap[MAXGPIO];
+	bool export_gpio;
+	struct gpio_chip gc;
+	struct mutex gpio_lock;	/* Protect cached dir, dat_out */
+	u8 dat_out[3];
+	u8 dir[3];
+#endif
 };
 
 static int adp5588_read(struct i2c_client *client, u8 reg)
@@ -86,6 +99,183 @@ static int adp5588_write(struct i2c_client *client, u8 reg, u8 val)
 	return i2c_smbus_write_byte_data(client, reg, val);
 }
 
+#ifdef CONFIG_GPIOLIB
+static int adp5588_gpio_get_value(struct gpio_chip *chip, unsigned off)
+{
+	struct adp5588_kpad *kpad = container_of(chip, struct adp5588_kpad, gc);
+	unsigned int bank = ADP_BANK(kpad->gpiomap[off]);
+	unsigned int bit = ADP_BIT(kpad->gpiomap[off]);
+
+	return !!(adp5588_read(kpad->client, GPIO_DAT_STAT1 + bank) & bit);
+}
+
+static void adp5588_gpio_set_value(struct gpio_chip *chip,
+				   unsigned off, int val)
+{
+	struct adp5588_kpad *kpad = container_of(chip, struct adp5588_kpad, gc);
+	unsigned int bank = ADP_BANK(kpad->gpiomap[off]);
+	unsigned int bit = ADP_BIT(kpad->gpiomap[off]);
+
+	mutex_lock(&kpad->gpio_lock);
+
+	if (val)
+		kpad->dat_out[bank] |= bit;
+	else
+		kpad->dat_out[bank] &= ~bit;
+
+	adp5588_write(kpad->client, GPIO_DAT_OUT1 + bank,
+			   kpad->dat_out[bank]);
+
+	mutex_unlock(&kpad->gpio_lock);
+}
+
+static int adp5588_gpio_direction_input(struct gpio_chip *chip, unsigned off)
+{
+	struct adp5588_kpad *kpad = container_of(chip, struct adp5588_kpad, gc);
+	unsigned int bank = ADP_BANK(kpad->gpiomap[off]);
+	unsigned int bit = ADP_BIT(kpad->gpiomap[off]);
+	int ret;
+
+	mutex_lock(&kpad->gpio_lock);
+
+	kpad->dir[bank] &= ~bit;
+	ret = adp5588_write(kpad->client, GPIO_DIR1 + bank, kpad->dir[bank]);
+
+	mutex_unlock(&kpad->gpio_lock);
+
+	return ret;
+}
+
+static int adp5588_gpio_direction_output(struct gpio_chip *chip,
+					 unsigned off, int val)
+{
+	struct adp5588_kpad *kpad = container_of(chip, struct adp5588_kpad, gc);
+	unsigned int bank = ADP_BANK(kpad->gpiomap[off]);
+	unsigned int bit = ADP_BIT(kpad->gpiomap[off]);
+	int ret;
+
+	mutex_lock(&kpad->gpio_lock);
+
+	kpad->dir[bank] |= bit;
+
+	if (val)
+		kpad->dat_out[bank] |= bit;
+	else
+		kpad->dat_out[bank] &= ~bit;
+
+	ret = adp5588_write(kpad->client, GPIO_DAT_OUT1 + bank,
+				 kpad->dat_out[bank]);
+	ret |= adp5588_write(kpad->client, GPIO_DIR1 + bank,
+				 kpad->dir[bank]);
+
+	mutex_unlock(&kpad->gpio_lock);
+
+	return ret;
+}
+
+static int __devinit adp5588_gpio_add(struct device *dev)
+{
+	struct adp5588_kpad *kpad = dev_get_drvdata(dev);
+	const struct adp5588_kpad_platform_data *pdata = dev->platform_data;
+	const struct adp5588_gpio_platform_data *gpio_data = pdata->gpio_data;
+	int i, error;
+
+	if (gpio_data) {
+		int j = 0;
+		bool pin_used[MAXGPIO];
+
+		for (i = 0; i < pdata->rows; i++)
+			pin_used[i] = true;
+
+		for (i = 0; i < pdata->cols; i++)
+			pin_used[i + GPI_PIN_COL_BASE - GPI_PIN_BASE] = true;
+
+		for (i = 0; i < kpad->gpimapsize; i++)
+			pin_used[kpad->gpimap[i].pin - GPI_PIN_BASE] = true;
+
+		for (i = 0; i < MAXGPIO; i++) {
+			if (!pin_used[i])
+				kpad->gpiomap[j++] = i;
+		}
+		kpad->gc.ngpio = j;
+
+		if (kpad->gc.ngpio)
+			kpad->export_gpio = true;
+	}
+
+	if (!kpad->export_gpio) {
+		dev_info(dev, "No unused gpios left to export\n");
+		return 0;
+	}
+
+	kpad->gc.direction_input = adp5588_gpio_direction_input;
+	kpad->gc.direction_output = adp5588_gpio_direction_output;
+	kpad->gc.get = adp5588_gpio_get_value;
+	kpad->gc.set = adp5588_gpio_set_value;
+	kpad->gc.can_sleep = 1;
+
+	kpad->gc.base = gpio_data->gpio_start;
+	kpad->gc.label = kpad->client->name;
+	kpad->gc.owner = THIS_MODULE;
+
+	mutex_init(&kpad->gpio_lock);
+
+	error = gpiochip_add(&kpad->gc);
+	if (error) {
+		dev_err(dev, "gpiochip_add failed, err: %d\n", error);
+		return error;
+	}
+
+	for (i = 0; i <= ADP_BANK(MAXGPIO); i++) {
+		kpad->dat_out[i] = adp5588_read(kpad->client,
+						GPIO_DAT_OUT1 + i);
+		kpad->dir[i] = adp5588_read(kpad->client, GPIO_DIR1 + i);
+	}
+
+	if (gpio_data->setup) {
+		error = gpio_data->setup(kpad->client,
+					 kpad->gc.base, kpad->gc.ngpio,
+					 gpio_data->context);
+		if (error)
+			dev_warn(dev, "setup failed, %d\n", error);
+	}
+
+	return 0;
+}
+
+static void __devexit adp5588_gpio_remove(struct device *dev)
+{
+	struct adp5588_kpad *kpad = dev_get_drvdata(dev);
+	const struct adp5588_kpad_platform_data *pdata = dev->platform_data;
+	const struct adp5588_gpio_platform_data *gpio_data = pdata->gpio_data;
+	int error;
+
+	if (!kpad->export_gpio)
+		return;
+
+	if (gpio_data->teardown) {
+		error = gpio_data->teardown(kpad->client,
+					    kpad->gc.base, kpad->gc.ngpio,
+					    gpio_data->context);
+		if (error)
+			dev_warn(dev, "teardown failed %d\n", error);
+	}
+
+	error = gpiochip_remove(&kpad->gc);
+	if (error)
+		dev_warn(dev, "gpiochip_remove failed %d\n", error);
+}
+#else
+static inline int adp5588_gpio_add(struct device *dev)
+{
+	return 0;
+}
+
+static inline void adp5588_gpio_remove(struct device *dev)
+{
+}
+#endif
+
 static void adp5588_report_events(struct adp5588_kpad *kpad, int ev_cnt)
 {
 	int i, j;
@@ -150,7 +340,8 @@ static irqreturn_t adp5588_irq(int irq, void *handle)
 
 static int __devinit adp5588_setup(struct i2c_client *client)
 {
-	struct adp5588_kpad_platform_data *pdata = client->dev.platform_data;
+	const struct adp5588_kpad_platform_data *pdata = client->dev.platform_data;
+	const struct adp5588_gpio_platform_data *gpio_data = pdata->gpio_data;
 	int i, ret;
 	unsigned char evt_mode1 = 0, evt_mode2 = 0, evt_mode3 = 0;
 
@@ -184,6 +375,15 @@ static int __devinit adp5588_setup(struct i2c_client *client)
 		ret |= adp5588_write(client, GPI_EM3, evt_mode3);
 	}
 
+	if (gpio_data) {
+		for (i = 0; i <= ADP_BANK(MAXGPIO); i++) {
+			int pull_mask = gpio_data->pullup_dis_mask;
+
+			ret |= adp5588_write(client, GPIO_PULL1 + i,
+				(pull_mask >> (8 * i)) & 0xFF);
+		}
+	}
+
 	ret |= adp5588_write(client, INT_STAT, CMP2_INT | CMP1_INT |
 					OVR_FLOW_INT | K_LCK_INT |
 					GPI_INT | KE_INT); /* Status is W1C */
@@ -240,7 +440,7 @@ static int __devinit adp5588_probe(struct i2c_client *client,
 					const struct i2c_device_id *id)
 {
 	struct adp5588_kpad *kpad;
-	struct adp5588_kpad_platform_data *pdata = client->dev.platform_data;
+	const struct adp5588_kpad_platform_data *pdata = client->dev.platform_data;
 	struct input_dev *input;
 	unsigned int revid;
 	int ret, i;
@@ -381,6 +581,10 @@ static int __devinit adp5588_probe(struct i2c_client *client,
 	if (kpad->gpimapsize)
 		adp5588_report_switch_state(kpad);
 
+	error = adp5588_gpio_add(&client->dev);
+	if (error)
+		goto err_free_irq;
+
 	device_init_wakeup(&client->dev, 1);
 	i2c_set_clientdata(client, kpad);
 
@@ -407,6 +611,7 @@ static int __devexit adp5588_remove(struct i2c_client *client)
 	free_irq(client->irq, kpad);
 	cancel_delayed_work_sync(&kpad->work);
 	input_unregister_device(kpad->input);
+	adp5588_gpio_remove(&client->dev);
 	kfree(kpad);
 
 	return 0;
diff --git a/include/linux/i2c/adp5588.h b/include/linux/i2c/adp5588.h
index b5f57c498e2..269181b8f62 100644
--- a/include/linux/i2c/adp5588.h
+++ b/include/linux/i2c/adp5588.h
@@ -123,6 +123,7 @@ struct adp5588_kpad_platform_data {
 	unsigned short unlock_key2;	/* Unlock Key 2 */
 	const struct adp5588_gpi_map *gpimap;
 	unsigned short gpimapsize;
+	const struct adp5588_gpio_platform_data *gpio_data;
 };
 
 struct adp5588_gpio_platform_data {
-- 
cgit v1.2.3-70-g09d2


From 3289a8368c294726659588d044e354dd3bcf44b3 Mon Sep 17 00:00:00 2001
From: "John W. Linville" <linville@tuxdriver.com>
Date: Thu, 22 Jul 2010 16:31:48 -0400
Subject: lib80211: remove unused host_build_iv option

Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/ipw2x00/libipw.h    |  1 -
 drivers/net/wireless/ipw2x00/libipw_tx.c | 16 +++-------------
 drivers/net/wireless/ipw2x00/libipw_wx.c |  2 +-
 include/net/lib80211.h                   |  3 ---
 net/wireless/lib80211_crypt_ccmp.c       |  1 -
 net/wireless/lib80211_crypt_tkip.c       |  1 -
 net/wireless/lib80211_crypt_wep.c        |  1 -
 7 files changed, 4 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/drivers/net/wireless/ipw2x00/libipw.h b/drivers/net/wireless/ipw2x00/libipw.h
index 284b0e4cb81..7b9539a1b54 100644
--- a/drivers/net/wireless/ipw2x00/libipw.h
+++ b/drivers/net/wireless/ipw2x00/libipw.h
@@ -828,7 +828,6 @@ struct libipw_device {
 	int host_strip_iv_icv;
 
 	int host_open_frag;
-	int host_build_iv;
 	int ieee802_1x;		/* is IEEE 802.1X used */
 
 	/* WPA data */
diff --git a/drivers/net/wireless/ipw2x00/libipw_tx.c b/drivers/net/wireless/ipw2x00/libipw_tx.c
index da8beac7fcf..01c88a71abe 100644
--- a/drivers/net/wireless/ipw2x00/libipw_tx.c
+++ b/drivers/net/wireless/ipw2x00/libipw_tx.c
@@ -260,7 +260,7 @@ netdev_tx_t libipw_xmit(struct sk_buff *skb, struct net_device *dev)
 	int i, bytes_per_frag, nr_frags, bytes_last_frag, frag_size,
 	    rts_required;
 	unsigned long flags;
-	int encrypt, host_encrypt, host_encrypt_msdu, host_build_iv;
+	int encrypt, host_encrypt, host_encrypt_msdu;
 	__be16 ether_type;
 	int bytes, fc, hdr_len;
 	struct sk_buff *skb_frag;
@@ -301,7 +301,6 @@ netdev_tx_t libipw_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	host_encrypt = ieee->host_encrypt && encrypt && crypt;
 	host_encrypt_msdu = ieee->host_encrypt_msdu && encrypt && crypt;
-	host_build_iv = ieee->host_build_iv && encrypt && crypt;
 
 	if (!encrypt && ieee->ieee802_1x &&
 	    ieee->drop_unencrypted && ether_type != htons(ETH_P_PAE)) {
@@ -313,7 +312,7 @@ netdev_tx_t libipw_xmit(struct sk_buff *skb, struct net_device *dev)
 	skb_copy_from_linear_data(skb, dest, ETH_ALEN);
 	skb_copy_from_linear_data_offset(skb, ETH_ALEN, src, ETH_ALEN);
 
-	if (host_encrypt || host_build_iv)
+	if (host_encrypt)
 		fc = IEEE80211_FTYPE_DATA | IEEE80211_STYPE_DATA |
 		    IEEE80211_FCTL_PROTECTED;
 	else
@@ -467,7 +466,7 @@ netdev_tx_t libipw_xmit(struct sk_buff *skb, struct net_device *dev)
 	for (; i < nr_frags; i++) {
 		skb_frag = txb->fragments[i];
 
-		if (host_encrypt || host_build_iv)
+		if (host_encrypt)
 			skb_reserve(skb_frag,
 				    crypt->ops->extra_mpdu_prefix_len);
 
@@ -502,15 +501,6 @@ netdev_tx_t libipw_xmit(struct sk_buff *skb, struct net_device *dev)
 		 * to insert the IV between the header and the payload */
 		if (host_encrypt)
 			libipw_encrypt_fragment(ieee, skb_frag, hdr_len);
-		else if (host_build_iv) {
-			atomic_inc(&crypt->refcnt);
-			if (crypt->ops->build_iv)
-				crypt->ops->build_iv(skb_frag, hdr_len,
-				      ieee->sec.keys[ieee->sec.active_key],
-				      ieee->sec.key_sizes[ieee->sec.active_key],
-				      crypt->priv);
-			atomic_dec(&crypt->refcnt);
-		}
 
 		if (ieee->config &
 		    (CFG_LIBIPW_COMPUTE_FCS | CFG_LIBIPW_RESERVE_FCS))
diff --git a/drivers/net/wireless/ipw2x00/libipw_wx.c b/drivers/net/wireless/ipw2x00/libipw_wx.c
index 8a4bae44b10..d7bd6cf00a8 100644
--- a/drivers/net/wireless/ipw2x00/libipw_wx.c
+++ b/drivers/net/wireless/ipw2x00/libipw_wx.c
@@ -320,7 +320,7 @@ int libipw_wx_set_encode(struct libipw_device *ieee,
 	};
 	int i, key, key_provided, len;
 	struct lib80211_crypt_data **crypt;
-	int host_crypto = ieee->host_encrypt || ieee->host_decrypt || ieee->host_build_iv;
+	int host_crypto = ieee->host_encrypt || ieee->host_decrypt;
 	DECLARE_SSID_BUF(ssid);
 
 	LIBIPW_DEBUG_WX("SET_ENCODE\n");
diff --git a/include/net/lib80211.h b/include/net/lib80211.h
index fb4e2784857..848cce1bb7a 100644
--- a/include/net/lib80211.h
+++ b/include/net/lib80211.h
@@ -54,9 +54,6 @@ struct lib80211_crypto_ops {
 	/* deinitialize crypto context and free allocated private data */
 	void (*deinit) (void *priv);
 
-	int (*build_iv) (struct sk_buff * skb, int hdr_len,
-			 u8 *key, int keylen, void *priv);
-
 	/* encrypt/decrypt return < 0 on error or >= 0 on success. The return
 	 * value from decrypt_mpdu is passed as the keyidx value for
 	 * decrypt_msdu. skb must have enough head and tail room for the
diff --git a/net/wireless/lib80211_crypt_ccmp.c b/net/wireless/lib80211_crypt_ccmp.c
index b7fa31d5fd1..dacb3b4b1bd 100644
--- a/net/wireless/lib80211_crypt_ccmp.c
+++ b/net/wireless/lib80211_crypt_ccmp.c
@@ -467,7 +467,6 @@ static struct lib80211_crypto_ops lib80211_crypt_ccmp = {
 	.name = "CCMP",
 	.init = lib80211_ccmp_init,
 	.deinit = lib80211_ccmp_deinit,
-	.build_iv = lib80211_ccmp_hdr,
 	.encrypt_mpdu = lib80211_ccmp_encrypt,
 	.decrypt_mpdu = lib80211_ccmp_decrypt,
 	.encrypt_msdu = NULL,
diff --git a/net/wireless/lib80211_crypt_tkip.c b/net/wireless/lib80211_crypt_tkip.c
index a7f995613f1..0fe40510e2c 100644
--- a/net/wireless/lib80211_crypt_tkip.c
+++ b/net/wireless/lib80211_crypt_tkip.c
@@ -757,7 +757,6 @@ static struct lib80211_crypto_ops lib80211_crypt_tkip = {
 	.name = "TKIP",
 	.init = lib80211_tkip_init,
 	.deinit = lib80211_tkip_deinit,
-	.build_iv = lib80211_tkip_hdr,
 	.encrypt_mpdu = lib80211_tkip_encrypt,
 	.decrypt_mpdu = lib80211_tkip_decrypt,
 	.encrypt_msdu = lib80211_michael_mic_add,
diff --git a/net/wireless/lib80211_crypt_wep.c b/net/wireless/lib80211_crypt_wep.c
index 6d41e05ca33..e2e88878ba3 100644
--- a/net/wireless/lib80211_crypt_wep.c
+++ b/net/wireless/lib80211_crypt_wep.c
@@ -269,7 +269,6 @@ static struct lib80211_crypto_ops lib80211_crypt_wep = {
 	.name = "WEP",
 	.init = lib80211_wep_init,
 	.deinit = lib80211_wep_deinit,
-	.build_iv = lib80211_wep_build_iv,
 	.encrypt_mpdu = lib80211_wep_encrypt,
 	.decrypt_mpdu = lib80211_wep_decrypt,
 	.encrypt_msdu = NULL,
-- 
cgit v1.2.3-70-g09d2


From 40e2e97316af6e62affab7a392e792494b8d9dde Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Sun, 18 Jul 2010 21:17:09 +0000
Subject: direct-io: move aio_complete into ->end_io

Filesystems with unwritten extent support must not complete an AIO request
until the transaction to convert the extent has been commited.  That means
the aio_complete calls needs to be moved into the ->end_io callback so
that the filesystem can control when to call it exactly.

This makes a bit of a mess out of dio_complete and the ->end_io callback
prototype even more complicated.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Alex Elder <aelder@sgi.com>
---
 fs/direct-io.c              | 26 ++++++++++++++------------
 fs/ext4/inode.c             | 10 +++++++---
 fs/ocfs2/aops.c             |  7 ++++++-
 fs/xfs/linux-2.6/xfs_aops.c |  7 ++++++-
 fs/xfs/linux-2.6/xfs_aops.h |  2 ++
 include/linux/fs.h          |  3 ++-
 6 files changed, 37 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/fs/direct-io.c b/fs/direct-io.c
index 7600aacf531..a10cb91cade 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -218,7 +218,7 @@ static struct page *dio_get_page(struct dio *dio)
  * filesystems can use it to hold additional state between get_block calls and
  * dio_complete.
  */
-static int dio_complete(struct dio *dio, loff_t offset, int ret)
+static int dio_complete(struct dio *dio, loff_t offset, int ret, bool is_async)
 {
 	ssize_t transferred = 0;
 
@@ -239,14 +239,6 @@ static int dio_complete(struct dio *dio, loff_t offset, int ret)
 			transferred = dio->i_size - offset;
 	}
 
-	if (dio->end_io && dio->result)
-		dio->end_io(dio->iocb, offset, transferred,
-			    dio->map_bh.b_private);
-
-	if (dio->flags & DIO_LOCKING)
-		/* lockdep: non-owner release */
-		up_read_non_owner(&dio->inode->i_alloc_sem);
-
 	if (ret == 0)
 		ret = dio->page_errors;
 	if (ret == 0)
@@ -254,6 +246,17 @@ static int dio_complete(struct dio *dio, loff_t offset, int ret)
 	if (ret == 0)
 		ret = transferred;
 
+	if (dio->end_io && dio->result) {
+		dio->end_io(dio->iocb, offset, transferred,
+			    dio->map_bh.b_private, ret, is_async);
+	} else if (is_async) {
+		aio_complete(dio->iocb, ret, 0);
+	}
+
+	if (dio->flags & DIO_LOCKING)
+		/* lockdep: non-owner release */
+		up_read_non_owner(&dio->inode->i_alloc_sem);
+
 	return ret;
 }
 
@@ -277,8 +280,7 @@ static void dio_bio_end_aio(struct bio *bio, int error)
 	spin_unlock_irqrestore(&dio->bio_lock, flags);
 
 	if (remaining == 0) {
-		int ret = dio_complete(dio, dio->iocb->ki_pos, 0);
-		aio_complete(dio->iocb, ret, 0);
+		dio_complete(dio, dio->iocb->ki_pos, 0, true);
 		kfree(dio);
 	}
 }
@@ -1126,7 +1128,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
 	spin_unlock_irqrestore(&dio->bio_lock, flags);
 
 	if (ret2 == 0) {
-		ret = dio_complete(dio, offset, ret);
+		ret = dio_complete(dio, offset, ret, false);
 		kfree(dio);
 	} else
 		BUG_ON(ret != -EIOCBQUEUED);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 42272d67955..0afc8c1d8cf 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3775,7 +3775,8 @@ static ext4_io_end_t *ext4_init_io_end (struct inode *inode, gfp_t flags)
 }
 
 static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
-			    ssize_t size, void *private)
+			    ssize_t size, void *private, int ret,
+			    bool is_async)
 {
         ext4_io_end_t *io_end = iocb->private;
 	struct workqueue_struct *wq;
@@ -3784,7 +3785,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
 
 	/* if not async direct IO or dio with 0 bytes write, just return */
 	if (!io_end || !size)
-		return;
+		goto out;
 
 	ext_debug("ext4_end_io_dio(): io_end 0x%p"
 		  "for inode %lu, iocb 0x%p, offset %llu, size %llu\n",
@@ -3795,7 +3796,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
 	if (io_end->flag != EXT4_IO_UNWRITTEN){
 		ext4_free_io_end(io_end);
 		iocb->private = NULL;
-		return;
+		goto out;
 	}
 
 	io_end->offset = offset;
@@ -3812,6 +3813,9 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
 	list_add_tail(&io_end->list, &ei->i_completed_io_list);
 	spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
 	iocb->private = NULL;
+out:
+	if (is_async)
+		aio_complete(iocb, ret, 0);
 }
 
 static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate)
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 356e976772b..96337a4fbbd 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -578,7 +578,9 @@ bail:
 static void ocfs2_dio_end_io(struct kiocb *iocb,
 			     loff_t offset,
 			     ssize_t bytes,
-			     void *private)
+			     void *private,
+			     int ret,
+			     bool is_async)
 {
 	struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
 	int level;
@@ -592,6 +594,9 @@ static void ocfs2_dio_end_io(struct kiocb *iocb,
 	if (!level)
 		up_read(&inode->i_alloc_sem);
 	ocfs2_rw_unlock(inode, level);
+
+	if (is_async)
+		aio_complete(iocb, ret, 0);
 }
 
 /*
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 8abbf0532ea..95d1e2695c3 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -1406,7 +1406,9 @@ xfs_end_io_direct(
 	struct kiocb	*iocb,
 	loff_t		offset,
 	ssize_t		size,
-	void		*private)
+	void		*private,
+	int		ret,
+	bool		is_async)
 {
 	xfs_ioend_t	*ioend = iocb->private;
 
@@ -1452,6 +1454,9 @@ xfs_end_io_direct(
 	 * against double-freeing.
 	 */
 	iocb->private = NULL;
+
+	if (is_async)
+		aio_complete(iocb, ret, 0);
 }
 
 STATIC ssize_t
diff --git a/fs/xfs/linux-2.6/xfs_aops.h b/fs/xfs/linux-2.6/xfs_aops.h
index 319da173cc1..c5057fb6237 100644
--- a/fs/xfs/linux-2.6/xfs_aops.h
+++ b/fs/xfs/linux-2.6/xfs_aops.h
@@ -37,6 +37,8 @@ typedef struct xfs_ioend {
 	size_t			io_size;	/* size of the extent */
 	xfs_off_t		io_offset;	/* offset in the file */
 	struct work_struct	io_work;	/* xfsdatad work queue */
+	struct kiocb		*io_iocb;
+	int			io_result;
 } xfs_ioend_t;
 
 extern const struct address_space_operations xfs_address_space_operations;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 68ca1b0491a..f91affb7d53 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -415,7 +415,8 @@ struct buffer_head;
 typedef int (get_block_t)(struct inode *inode, sector_t iblock,
 			struct buffer_head *bh_result, int create);
 typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
-			ssize_t bytes, void *private);
+			ssize_t bytes, void *private, int ret,
+			bool is_async);
 
 /*
  * Attribute flags.  These should be or-ed together to figure out what
-- 
cgit v1.2.3-70-g09d2


From 409771d258e9dd71c30f3c9520fd2b796ffc40f0 Mon Sep 17 00:00:00 2001
From: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Date: Fri, 14 May 2010 12:48:19 +0100
Subject: x86: Use xen_vcpuop_clockevent, xen_clocksource and xen wallclock.

Use xen_vcpuop_clockevent instead of hpet and APIC timers as main
clockevent device on all vcpus, use the xen wallclock time as wallclock
instead of rtc and use xen_clocksource as clocksource.
The pv clock algorithm needs to work correctly for the xen_clocksource
and xen wallclock to be usable, only modern Xen versions offer a
reliable pv clock in HVM guests (XENFEAT_hvm_safe_pvclock).

Using the hpet as clocksource means a VMEXIT every time we read/write to
the hpet mmio addresses, pvclock give us a better rating without
VMEXITs. Same goes for the xen wallclock and xen_vcpuop_clockevent

Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Signed-off-by: Don Dutile <ddutile@redhat.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/xen/enlighten.c         | 14 ++--------
 arch/x86/xen/suspend.c           |  6 +++++
 arch/x86/xen/time.c              | 58 ++++++++++++++++++++++++++++++++++++----
 arch/x86/xen/xen-ops.h           |  7 ++---
 include/xen/interface/features.h |  3 +++
 5 files changed, 66 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 127c95c8d15..a9017296388 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -935,10 +935,6 @@ static const struct pv_init_ops xen_init_ops __initdata = {
 	.patch = xen_patch,
 };
 
-static const struct pv_time_ops xen_time_ops __initdata = {
-	.sched_clock = xen_sched_clock,
-};
-
 static const struct pv_cpu_ops xen_cpu_ops __initdata = {
 	.cpuid = xen_cpuid,
 
@@ -1076,7 +1072,6 @@ asmlinkage void __init xen_start_kernel(void)
 	/* Install Xen paravirt ops */
 	pv_info = xen_info;
 	pv_init_ops = xen_init_ops;
-	pv_time_ops = xen_time_ops;
 	pv_cpu_ops = xen_cpu_ops;
 	pv_apic_ops = xen_apic_ops;
 
@@ -1084,13 +1079,7 @@ asmlinkage void __init xen_start_kernel(void)
 	x86_init.oem.arch_setup = xen_arch_setup;
 	x86_init.oem.banner = xen_banner;
 
-	x86_init.timers.timer_init = xen_time_init;
-	x86_init.timers.setup_percpu_clockev = x86_init_noop;
-	x86_cpuinit.setup_percpu_clockev = x86_init_noop;
-
-	x86_platform.calibrate_tsc = xen_tsc_khz;
-	x86_platform.get_wallclock = xen_get_wallclock;
-	x86_platform.set_wallclock = xen_set_wallclock;
+	xen_init_time_ops();
 
 	/*
 	 * Set up some pagetable state before starting to set any ptes.
@@ -1327,6 +1316,7 @@ static void __init xen_hvm_guest_init(void)
 	register_cpu_notifier(&xen_hvm_cpu_notifier);
 	have_vcpu_info_placement = 0;
 	x86_init.irqs.intr_init = xen_init_IRQ;
+	xen_hvm_init_time_ops();
 }
 
 static bool __init xen_hvm_platform(void)
diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c
index d07479c340f..1d789d56877 100644
--- a/arch/x86/xen/suspend.c
+++ b/arch/x86/xen/suspend.c
@@ -28,8 +28,14 @@ void xen_pre_suspend(void)
 
 void xen_hvm_post_suspend(int suspend_cancelled)
 {
+	int cpu;
 	xen_hvm_init_shared_info();
 	xen_callback_vector();
+	if (xen_feature(XENFEAT_hvm_safe_pvclock)) {
+		for_each_online_cpu(cpu) {
+			xen_setup_runstate_info(cpu);
+		}
+	}
 }
 
 void xen_post_suspend(int suspend_cancelled)
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index b3c6c59ed30..4780e55886a 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -20,6 +20,7 @@
 #include <asm/xen/hypercall.h>
 
 #include <xen/events.h>
+#include <xen/features.h>
 #include <xen/interface/xen.h>
 #include <xen/interface/vcpu.h>
 
@@ -160,7 +161,7 @@ static void do_stolen_accounting(void)
  * nanoseconds, which is nanoseconds the VCPU spent in RUNNING+BLOCKED
  * states.
  */
-unsigned long long xen_sched_clock(void)
+static unsigned long long xen_sched_clock(void)
 {
 	struct vcpu_runstate_info state;
 	cycle_t now;
@@ -195,7 +196,7 @@ unsigned long long xen_sched_clock(void)
 
 
 /* Get the TSC speed from Xen */
-unsigned long xen_tsc_khz(void)
+static unsigned long xen_tsc_khz(void)
 {
 	struct pvclock_vcpu_time_info *info =
 		&HYPERVISOR_shared_info->vcpu_info[0].time;
@@ -230,7 +231,7 @@ static void xen_read_wallclock(struct timespec *ts)
 	put_cpu_var(xen_vcpu);
 }
 
-unsigned long xen_get_wallclock(void)
+static unsigned long xen_get_wallclock(void)
 {
 	struct timespec ts;
 
@@ -238,7 +239,7 @@ unsigned long xen_get_wallclock(void)
 	return ts.tv_sec;
 }
 
-int xen_set_wallclock(unsigned long now)
+static int xen_set_wallclock(unsigned long now)
 {
 	/* do nothing for domU */
 	return -1;
@@ -473,7 +474,11 @@ void xen_timer_resume(void)
 	}
 }
 
-__init void xen_time_init(void)
+static const struct pv_time_ops xen_time_ops __initdata = {
+	.sched_clock = xen_sched_clock,
+};
+
+static __init void xen_time_init(void)
 {
 	int cpu = smp_processor_id();
 	struct timespec tp;
@@ -497,3 +502,46 @@ __init void xen_time_init(void)
 	xen_setup_timer(cpu);
 	xen_setup_cpu_clockevents();
 }
+
+__init void xen_init_time_ops(void)
+{
+	pv_time_ops = xen_time_ops;
+
+	x86_init.timers.timer_init = xen_time_init;
+	x86_init.timers.setup_percpu_clockev = x86_init_noop;
+	x86_cpuinit.setup_percpu_clockev = x86_init_noop;
+
+	x86_platform.calibrate_tsc = xen_tsc_khz;
+	x86_platform.get_wallclock = xen_get_wallclock;
+	x86_platform.set_wallclock = xen_set_wallclock;
+}
+
+static void xen_hvm_setup_cpu_clockevents(void)
+{
+	int cpu = smp_processor_id();
+	xen_setup_runstate_info(cpu);
+	xen_setup_timer(cpu);
+	xen_setup_cpu_clockevents();
+}
+
+__init void xen_hvm_init_time_ops(void)
+{
+	/* vector callback is needed otherwise we cannot receive interrupts
+	 * on cpu > 0 */
+	if (!xen_have_vector_callback && num_present_cpus() > 1)
+		return;
+	if (!xen_feature(XENFEAT_hvm_safe_pvclock)) {
+		printk(KERN_INFO "Xen doesn't support pvclock on HVM,"
+				"disable pv timer\n");
+		return;
+	}
+
+	pv_time_ops = xen_time_ops;
+	x86_init.timers.setup_percpu_clockev = xen_time_init;
+	x86_cpuinit.setup_percpu_clockev = xen_hvm_setup_cpu_clockevents;
+
+	x86_platform.calibrate_tsc = xen_tsc_khz;
+	x86_platform.get_wallclock = xen_get_wallclock;
+	x86_platform.set_wallclock = xen_set_wallclock;
+}
+
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 01c9dd38652..089d18923d2 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -49,11 +49,8 @@ void xen_setup_runstate_info(int cpu);
 void xen_teardown_timer(int cpu);
 cycle_t xen_clocksource_read(void);
 void xen_setup_cpu_clockevents(void);
-unsigned long xen_tsc_khz(void);
-void __init xen_time_init(void);
-unsigned long xen_get_wallclock(void);
-int xen_set_wallclock(unsigned long time);
-unsigned long long xen_sched_clock(void);
+void __init xen_init_time_ops(void);
+void __init xen_hvm_init_time_ops(void);
 
 irqreturn_t xen_debug_interrupt(int irq, void *dev_id);
 
diff --git a/include/xen/interface/features.h b/include/xen/interface/features.h
index 8ab08b91bf6..70d2563ab16 100644
--- a/include/xen/interface/features.h
+++ b/include/xen/interface/features.h
@@ -44,6 +44,9 @@
 /* x86: Does this Xen host support the HVM callback vector type? */
 #define XENFEAT_hvm_callback_vector        8
 
+/* x86: pvclock algorithm is safe to use on HVM */
+#define XENFEAT_hvm_safe_pvclock           9
+
 #define XENFEAT_NR_SUBMAPS 1
 
 #endif /* __XEN_PUBLIC_FEATURES_H__ */
-- 
cgit v1.2.3-70-g09d2


From c1c5413ad58cb73267d328e6020268aa2e50d8ca Mon Sep 17 00:00:00 2001
From: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Date: Fri, 14 May 2010 12:44:30 +0100
Subject: x86: Unplug emulated disks and nics.

Add a xen_emul_unplug command line option to the kernel to unplug
xen emulated disks and nics.

Set the default value of xen_emul_unplug depending on whether or
not the Xen PV frontends and the Xen platform PCI driver have
been compiled for this kernel (modules or built-in are both OK).

The user can specify xen_emul_unplug=ignore to enable PV drivers on HVM
even if the host platform doesn't support unplug.

Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 Documentation/kernel-parameters.txt |  11 +++
 arch/x86/xen/Makefile               |   2 +-
 arch/x86/xen/enlighten.c            |   1 +
 arch/x86/xen/platform-pci-unplug.c  | 135 ++++++++++++++++++++++++++++++++++++
 arch/x86/xen/xen-ops.h              |   1 +
 drivers/block/xen-blkfront.c        |  17 +++++
 drivers/xen/platform-pci.c          |   6 ++
 drivers/xen/xenbus/xenbus_probe.c   |   4 ++
 include/xen/platform_pci.h          |  49 +++++++++++++
 9 files changed, 225 insertions(+), 1 deletion(-)
 create mode 100644 arch/x86/xen/platform-pci-unplug.c
 create mode 100644 include/xen/platform_pci.h

(limited to 'include')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 82d6aeb5228..eefcd805102 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -115,6 +115,7 @@ parameter is applicable:
 			More X86-64 boot options can be found in
 			Documentation/x86/x86_64/boot-options.txt .
 	X86	Either 32bit or 64bit x86 (same as X86-32+X86-64)
+	XEN	Xen support is enabled
 
 In addition, the following text indicates that the option:
 
@@ -2879,6 +2880,16 @@ and is between 256 and 4096 characters. It is defined in the file
 	xd=		[HW,XT] Original XT pre-IDE (RLL encoded) disks.
 	xd_geo=		See header of drivers/block/xd.c.
 
+	xen_emul_unplug=		[HW,X86,XEN]
+			Unplug Xen emulated devices
+			Format: [unplug0,][unplug1]
+			ide-disks -- unplug primary master IDE devices
+			aux-ide-disks -- unplug non-primary-master IDE devices
+			nics -- unplug network devices
+			all -- unplug all emulated devices (NICs and IDE disks)
+			ignore -- continue loading the Xen platform PCI driver even
+				if the version check failed
+
 	xirc2ps_cs=	[NET,PCMCIA]
 			Format:
 			<irq>,<irq_mask>,<io>,<full_duplex>,<do_sound>,<lockup_hack>[,<irq2>[,<irq3>[,<irq4>]]]
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
index 3bb4fc21f4f..93095468598 100644
--- a/arch/x86/xen/Makefile
+++ b/arch/x86/xen/Makefile
@@ -12,7 +12,7 @@ CFLAGS_mmu.o			:= $(nostackp)
 
 obj-y		:= enlighten.o setup.o multicalls.o mmu.o irq.o \
 			time.o xen-asm.o xen-asm_$(BITS).o \
-			grant-table.o suspend.o
+			grant-table.o suspend.o platform-pci-unplug.o
 
 obj-$(CONFIG_SMP)		+= smp.o
 obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index a9017296388..157c93b62dd 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1314,6 +1314,7 @@ static void __init xen_hvm_guest_init(void)
 	if (xen_feature(XENFEAT_hvm_callback_vector))
 		xen_have_vector_callback = 1;
 	register_cpu_notifier(&xen_hvm_cpu_notifier);
+	xen_unplug_emulated_devices();
 	have_vcpu_info_placement = 0;
 	x86_init.irqs.intr_init = xen_init_IRQ;
 	xen_hvm_init_time_ops();
diff --git a/arch/x86/xen/platform-pci-unplug.c b/arch/x86/xen/platform-pci-unplug.c
new file mode 100644
index 00000000000..2f7f3fb3477
--- /dev/null
+++ b/arch/x86/xen/platform-pci-unplug.c
@@ -0,0 +1,135 @@
+/******************************************************************************
+ * platform-pci-unplug.c
+ *
+ * Xen platform PCI device driver
+ * Copyright (c) 2010, Citrix
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/module.h>
+
+#include <xen/platform_pci.h>
+
+#define XEN_PLATFORM_ERR_MAGIC -1
+#define XEN_PLATFORM_ERR_PROTOCOL -2
+#define XEN_PLATFORM_ERR_BLACKLIST -3
+
+/* store the value of xen_emul_unplug after the unplug is done */
+int xen_platform_pci_unplug;
+EXPORT_SYMBOL_GPL(xen_platform_pci_unplug);
+static int xen_emul_unplug;
+
+static int __init check_platform_magic(void)
+{
+	short magic;
+	char protocol;
+
+	magic = inw(XEN_IOPORT_MAGIC);
+	if (magic != XEN_IOPORT_MAGIC_VAL) {
+		printk(KERN_ERR "Xen Platform PCI: unrecognised magic value\n");
+		return XEN_PLATFORM_ERR_MAGIC;
+	}
+
+	protocol = inb(XEN_IOPORT_PROTOVER);
+
+	printk(KERN_DEBUG "Xen Platform PCI: I/O protocol version %d\n",
+			protocol);
+
+	switch (protocol) {
+	case 1:
+		outw(XEN_IOPORT_LINUX_PRODNUM, XEN_IOPORT_PRODNUM);
+		outl(XEN_IOPORT_LINUX_DRVVER, XEN_IOPORT_DRVVER);
+		if (inw(XEN_IOPORT_MAGIC) != XEN_IOPORT_MAGIC_VAL) {
+			printk(KERN_ERR "Xen Platform: blacklisted by host\n");
+			return XEN_PLATFORM_ERR_BLACKLIST;
+		}
+		break;
+	default:
+		printk(KERN_WARNING "Xen Platform PCI: unknown I/O protocol version");
+		return XEN_PLATFORM_ERR_PROTOCOL;
+	}
+
+	return 0;
+}
+
+void __init xen_unplug_emulated_devices(void)
+{
+	int r;
+
+	/* check the version of the xen platform PCI device */
+	r = check_platform_magic();
+	/* If the version matches enable the Xen platform PCI driver.
+	 * Also enable the Xen platform PCI driver if the version is really old
+	 * and the user told us to ignore it. */
+	if (r && !(r == XEN_PLATFORM_ERR_MAGIC &&
+			(xen_emul_unplug & XEN_UNPLUG_IGNORE)))
+		return;
+	/* Set the default value of xen_emul_unplug depending on whether or
+	 * not the Xen PV frontends and the Xen platform PCI driver have
+	 * been compiled for this kernel (modules or built-in are both OK). */
+	if (!xen_emul_unplug) {
+		if (xen_must_unplug_nics()) {
+			printk(KERN_INFO "Netfront and the Xen platform PCI driver have "
+					"been compiled for this kernel: unplug emulated NICs.\n");
+			xen_emul_unplug |= XEN_UNPLUG_ALL_NICS;
+		}
+		if (xen_must_unplug_disks()) {
+			printk(KERN_INFO "Blkfront and the Xen platform PCI driver have "
+					"been compiled for this kernel: unplug emulated disks.\n"
+					"You might have to change the root device\n"
+					"from /dev/hd[a-d] to /dev/xvd[a-d]\n"
+					"in your root= kernel command line option\n");
+			xen_emul_unplug |= XEN_UNPLUG_ALL_IDE_DISKS;
+		}
+	}
+	/* Now unplug the emulated devices */
+	if (!(xen_emul_unplug & XEN_UNPLUG_IGNORE))
+		outw(xen_emul_unplug, XEN_IOPORT_UNPLUG);
+	xen_platform_pci_unplug = xen_emul_unplug;
+}
+
+static int __init parse_xen_emul_unplug(char *arg)
+{
+	char *p, *q;
+	int l;
+
+	for (p = arg; p; p = q) {
+		q = strchr(p, ',');
+		if (q) {
+			l = q - p;
+			q++;
+		} else {
+			l = strlen(p);
+		}
+		if (!strncmp(p, "all", l))
+			xen_emul_unplug |= XEN_UNPLUG_ALL;
+		else if (!strncmp(p, "ide-disks", l))
+			xen_emul_unplug |= XEN_UNPLUG_ALL_IDE_DISKS;
+		else if (!strncmp(p, "aux-ide-disks", l))
+			xen_emul_unplug |= XEN_UNPLUG_AUX_IDE_DISKS;
+		else if (!strncmp(p, "nics", l))
+			xen_emul_unplug |= XEN_UNPLUG_ALL_NICS;
+		else if (!strncmp(p, "ignore", l))
+			xen_emul_unplug |= XEN_UNPLUG_IGNORE;
+		else
+			printk(KERN_WARNING "unrecognised option '%s' "
+				 "in parameter 'xen_emul_unplug'\n", p);
+	}
+	return 0;
+}
+early_param("xen_emul_unplug", parse_xen_emul_unplug);
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 089d18923d2..ed776949024 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -40,6 +40,7 @@ void xen_vcpu_restore(void);
 
 void xen_callback_vector(void);
 void xen_hvm_init_shared_info(void);
+void __init xen_unplug_emulated_devices(void);
 
 void __init xen_build_dynamic_phys_to_machine(void);
 
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 82ed403147c..6eb2989a9d0 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -48,6 +48,7 @@
 #include <xen/grant_table.h>
 #include <xen/events.h>
 #include <xen/page.h>
+#include <xen/platform_pci.h>
 
 #include <xen/interface/grant_table.h>
 #include <xen/interface/io/blkif.h>
@@ -737,6 +738,22 @@ static int blkfront_probe(struct xenbus_device *dev,
 		}
 	}
 
+	/* no unplug has been done: do not hook devices != xen vbds */
+	if (xen_hvm_domain() && (xen_platform_pci_unplug & XEN_UNPLUG_IGNORE)) {
+		int major;
+
+		if (!VDEV_IS_EXTENDED(vdevice))
+			major = BLKIF_MAJOR(vdevice);
+		else
+			major = XENVBD_MAJOR;
+
+		if (major != XENVBD_MAJOR) {
+			printk(KERN_INFO
+					"%s: HVM does not support vbd %d as xen block device\n",
+					__FUNCTION__, vdevice);
+			return -ENODEV;
+		}
+	}
 	info = kzalloc(sizeof(*info), GFP_KERNEL);
 	if (!info) {
 		xenbus_dev_fatal(dev, -ENOMEM, "allocating info structure");
diff --git a/drivers/xen/platform-pci.c b/drivers/xen/platform-pci.c
index bdb44f2473e..c01b5ddce52 100644
--- a/drivers/xen/platform-pci.c
+++ b/drivers/xen/platform-pci.c
@@ -27,6 +27,7 @@
 #include <linux/module.h>
 #include <linux/pci.h>
 
+#include <xen/platform_pci.h>
 #include <xen/grant_table.h>
 #include <xen/xenbus.h>
 #include <xen/events.h>
@@ -195,6 +196,11 @@ static struct pci_driver platform_driver = {
 
 static int __init platform_pci_module_init(void)
 {
+	/* no unplug has been done, IGNORE hasn't been specified: just
+	 * return now */
+	if (!xen_platform_pci_unplug)
+		return -ENODEV;
+
 	return pci_register_driver(&platform_driver);
 }
 
diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c
index a9e83c438cb..37e8894b50d 100644
--- a/drivers/xen/xenbus/xenbus_probe.c
+++ b/drivers/xen/xenbus/xenbus_probe.c
@@ -56,6 +56,7 @@
 #include <xen/events.h>
 #include <xen/page.h>
 
+#include <xen/platform_pci.h>
 #include <xen/hvm.h>
 
 #include "xenbus_comms.h"
@@ -977,6 +978,9 @@ static void wait_for_devices(struct xenbus_driver *xendrv)
 #ifndef MODULE
 static int __init boot_wait_for_devices(void)
 {
+	if (xen_hvm_domain() && !xen_platform_pci_unplug)
+		return -ENODEV;
+
 	ready_to_wait_for_devices = 1;
 	wait_for_devices(NULL);
 	return 0;
diff --git a/include/xen/platform_pci.h b/include/xen/platform_pci.h
new file mode 100644
index 00000000000..ce9d671c636
--- /dev/null
+++ b/include/xen/platform_pci.h
@@ -0,0 +1,49 @@
+#ifndef _XEN_PLATFORM_PCI_H
+#define _XEN_PLATFORM_PCI_H
+
+#define XEN_IOPORT_MAGIC_VAL 0x49d2
+#define XEN_IOPORT_LINUX_PRODNUM 0x0003
+#define XEN_IOPORT_LINUX_DRVVER  0x0001
+
+#define XEN_IOPORT_BASE 0x10
+
+#define XEN_IOPORT_PLATFLAGS	(XEN_IOPORT_BASE + 0) /* 1 byte access (R/W) */
+#define XEN_IOPORT_MAGIC	(XEN_IOPORT_BASE + 0) /* 2 byte access (R) */
+#define XEN_IOPORT_UNPLUG	(XEN_IOPORT_BASE + 0) /* 2 byte access (W) */
+#define XEN_IOPORT_DRVVER	(XEN_IOPORT_BASE + 0) /* 4 byte access (W) */
+
+#define XEN_IOPORT_SYSLOG	(XEN_IOPORT_BASE + 2) /* 1 byte access (W) */
+#define XEN_IOPORT_PROTOVER	(XEN_IOPORT_BASE + 2) /* 1 byte access (R) */
+#define XEN_IOPORT_PRODNUM	(XEN_IOPORT_BASE + 2) /* 2 byte access (W) */
+
+#define XEN_UNPLUG_ALL_IDE_DISKS 1
+#define XEN_UNPLUG_ALL_NICS 2
+#define XEN_UNPLUG_AUX_IDE_DISKS 4
+#define XEN_UNPLUG_ALL 7
+#define XEN_UNPLUG_IGNORE 8
+
+static inline int xen_must_unplug_nics(void) {
+#if (defined(CONFIG_XEN_NETDEV_FRONTEND) || \
+		defined(CONFIG_XEN_NETDEV_FRONTEND_MODULE)) && \
+		(defined(CONFIG_XEN_PLATFORM_PCI) || \
+		 defined(CONFIG_XEN_PLATFORM_PCI_MODULE))
+        return 1;
+#else
+        return 0;
+#endif
+}
+
+static inline int xen_must_unplug_disks(void) {
+#if (defined(CONFIG_XEN_BLKDEV_FRONTEND) || \
+		defined(CONFIG_XEN_BLKDEV_FRONTEND_MODULE)) && \
+		(defined(CONFIG_XEN_PLATFORM_PCI) || \
+		 defined(CONFIG_XEN_PLATFORM_PCI_MODULE))
+        return 1;
+#else
+        return 0;
+#endif
+}
+
+extern int xen_platform_pci_unplug;
+
+#endif /* _XEN_PLATFORM_PCI_H */
-- 
cgit v1.2.3-70-g09d2


From 5915100106b8f14a38053ad6c03a664d208aeaa2 Mon Sep 17 00:00:00 2001
From: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Date: Thu, 17 Jun 2010 14:22:52 +0100
Subject: x86: Call HVMOP_pagetable_dying on exit_mmap.

When a pagetable is about to be destroyed, we notify Xen so that the
hypervisor can clear the related shadow pagetable.

Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/xen/enlighten.c           |  1 +
 arch/x86/xen/mmu.c                 | 33 +++++++++++++++++++++++++++++++++
 arch/x86/xen/mmu.h                 |  1 +
 include/xen/interface/hvm/hvm_op.h | 11 +++++++++++
 4 files changed, 46 insertions(+)

(limited to 'include')

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 157c93b62dd..75b479a684f 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1318,6 +1318,7 @@ static void __init xen_hvm_guest_init(void)
 	have_vcpu_info_placement = 0;
 	x86_init.irqs.intr_init = xen_init_IRQ;
 	xen_hvm_init_time_ops();
+	xen_hvm_init_mmu_ops();
 }
 
 static bool __init xen_hvm_platform(void)
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 914f04695ce..84648c1bf13 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -58,6 +58,7 @@
 
 #include <xen/page.h>
 #include <xen/interface/xen.h>
+#include <xen/interface/hvm/hvm_op.h>
 #include <xen/interface/version.h>
 #include <xen/hvc-console.h>
 
@@ -1941,6 +1942,38 @@ void __init xen_init_mmu_ops(void)
 	pv_mmu_ops = xen_mmu_ops;
 }
 
+static void xen_hvm_exit_mmap(struct mm_struct *mm)
+{
+	struct xen_hvm_pagetable_dying a;
+	int rc;
+
+	a.domid = DOMID_SELF;
+	a.gpa = __pa(mm->pgd);
+	rc = HYPERVISOR_hvm_op(HVMOP_pagetable_dying, &a);
+	WARN_ON_ONCE(rc < 0);
+}
+
+static int is_pagetable_dying_supported(void)
+{
+	struct xen_hvm_pagetable_dying a;
+	int rc = 0;
+
+	a.domid = DOMID_SELF;
+	a.gpa = 0x00;
+	rc = HYPERVISOR_hvm_op(HVMOP_pagetable_dying, &a);
+	if (rc < 0) {
+		printk(KERN_DEBUG "HVMOP_pagetable_dying not supported\n");
+		return 0;
+	}
+	return 1;
+}
+
+void __init xen_hvm_init_mmu_ops(void)
+{
+	if (is_pagetable_dying_supported())
+		pv_mmu_ops.exit_mmap = xen_hvm_exit_mmap;
+}
+
 #ifdef CONFIG_XEN_DEBUG_FS
 
 static struct dentry *d_mmu_debug;
diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h
index 5fe6bc7f5ec..fa938c4aa2f 100644
--- a/arch/x86/xen/mmu.h
+++ b/arch/x86/xen/mmu.h
@@ -60,4 +60,5 @@ void  xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
 unsigned long xen_read_cr2_direct(void);
 
 extern void xen_init_mmu_ops(void);
+extern void xen_hvm_init_mmu_ops(void);
 #endif	/* _XEN_MMU_H */
diff --git a/include/xen/interface/hvm/hvm_op.h b/include/xen/interface/hvm/hvm_op.h
index 73c8c7eba48..a4827f46ee9 100644
--- a/include/xen/interface/hvm/hvm_op.h
+++ b/include/xen/interface/hvm/hvm_op.h
@@ -32,4 +32,15 @@ struct xen_hvm_param {
 };
 DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_param);
 
+/* Hint from PV drivers for pagetable destruction. */
+#define HVMOP_pagetable_dying       9
+struct xen_hvm_pagetable_dying {
+    /* Domain with a pagetable about to be destroyed. */
+    domid_t  domid;
+    /* guest physical address of the toplevel pagetable dying */
+    aligned_u64 gpa;
+};
+typedef struct xen_hvm_pagetable_dying xen_hvm_pagetable_dying_t;
+DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_pagetable_dying_t);
+ 
 #endif /* __XEN_PUBLIC_HVM_HVM_OP_H__ */
-- 
cgit v1.2.3-70-g09d2


From 98864ff58dd2b8ef9e72b0d2c70f34e7ff24a2ee Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Sat, 22 May 2010 23:59:11 +0100
Subject: ARM: OMAP: Convert OMAPFB and VRAM SDRAM reservation to LMB

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/plat-omap/common.c            |  4 ++--
 arch/arm/plat-omap/fb.c                | 30 +++++++++++++++++++-----------
 arch/arm/plat-omap/include/plat/vram.h |  4 ++--
 drivers/video/omap2/vram.c             | 33 +++++++++++++++------------------
 include/linux/omapfb.h                 |  2 +-
 5 files changed, 39 insertions(+), 34 deletions(-)

(limited to 'include')

diff --git a/arch/arm/plat-omap/common.c b/arch/arm/plat-omap/common.c
index 9f6bbc178a7..ebed82699eb 100644
--- a/arch/arm/plat-omap/common.c
+++ b/arch/arm/plat-omap/common.c
@@ -85,8 +85,8 @@ EXPORT_SYMBOL(omap_get_var_config);
 
 void __init omap_reserve(void)
 {
-	omapfb_reserve_sdram();
-	omap_vram_reserve_sdram();
+	omapfb_reserve_sdram_memblock();
+	omap_vram_reserve_sdram_memblock();
 }
 
 /*
diff --git a/arch/arm/plat-omap/fb.c b/arch/arm/plat-omap/fb.c
index 97db493904f..0054b9501a5 100644
--- a/arch/arm/plat-omap/fb.c
+++ b/arch/arm/plat-omap/fb.c
@@ -26,7 +26,7 @@
 #include <linux/mm.h>
 #include <linux/init.h>
 #include <linux/platform_device.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/io.h>
 #include <linux/omapfb.h>
 
@@ -173,25 +173,27 @@ static int check_fbmem_region(int region_idx, struct omapfb_mem_region *rg,
 
 static int valid_sdram(unsigned long addr, unsigned long size)
 {
-	struct bootmem_data *bdata = NODE_DATA(0)->bdata;
-	unsigned long sdram_start, sdram_end;
+	struct memblock_property res;
 
-	sdram_start = bdata->node_min_pfn << PAGE_SHIFT;
-	sdram_end = bdata->node_low_pfn << PAGE_SHIFT;
-
-	return addr >= sdram_start && sdram_end - addr >= size;
+	res.base = addr;
+	res.size = size;
+	return !memblock_find(&res) && res.base == addr && res.size == size;
 }
 
 static int reserve_sdram(unsigned long addr, unsigned long size)
 {
-	return reserve_bootmem(addr, size, BOOTMEM_EXCLUSIVE);
+	if (memblock_is_region_reserved(addr, size))
+		return -EBUSY;
+	if (memblock_reserve(addr, size))
+		return -ENOMEM;
+	return 0;
 }
 
 /*
  * Called from map_io. We need to call to this early enough so that we
  * can reserve the fixed SDRAM regions before VM could get hold of them.
  */
-void __init omapfb_reserve_sdram(void)
+void __init omapfb_reserve_sdram_memblock(void)
 {
 	unsigned long reserved = 0;
 	int i;
@@ -386,7 +388,10 @@ static inline int omap_init_fb(void)
 
 arch_initcall(omap_init_fb);
 
-void omapfb_reserve_sdram(void) {}
+void omapfb_reserve_sdram_memblock(void)
+{
+}
+
 unsigned long omapfb_reserve_sram(unsigned long sram_pstart,
 				  unsigned long sram_vstart,
 				  unsigned long sram_size,
@@ -402,7 +407,10 @@ void omapfb_set_platform_data(struct omapfb_platform_data *data)
 {
 }
 
-void omapfb_reserve_sdram(void) {}
+void omapfb_reserve_sdram_memblock(void)
+{
+}
+
 unsigned long omapfb_reserve_sram(unsigned long sram_pstart,
 				  unsigned long sram_vstart,
 				  unsigned long sram_size,
diff --git a/arch/arm/plat-omap/include/plat/vram.h b/arch/arm/plat-omap/include/plat/vram.h
index edd4987758a..0aa4ecd12c7 100644
--- a/arch/arm/plat-omap/include/plat/vram.h
+++ b/arch/arm/plat-omap/include/plat/vram.h
@@ -38,7 +38,7 @@ extern void omap_vram_get_info(unsigned long *vram, unsigned long *free_vram,
 extern void omap_vram_set_sdram_vram(u32 size, u32 start);
 extern void omap_vram_set_sram_vram(u32 size, u32 start);
 
-extern void omap_vram_reserve_sdram(void);
+extern void omap_vram_reserve_sdram_memblock(void);
 extern unsigned long omap_vram_reserve_sram(unsigned long sram_pstart,
 					    unsigned long sram_vstart,
 					    unsigned long sram_size,
@@ -48,7 +48,7 @@ extern unsigned long omap_vram_reserve_sram(unsigned long sram_pstart,
 static inline void omap_vram_set_sdram_vram(u32 size, u32 start) { }
 static inline void omap_vram_set_sram_vram(u32 size, u32 start) { }
 
-static inline void omap_vram_reserve_sdram(void) { }
+static inline void omap_vram_reserve_sdram_memblock(void) { }
 static inline unsigned long omap_vram_reserve_sram(unsigned long sram_pstart,
 					    unsigned long sram_vstart,
 					    unsigned long sram_size,
diff --git a/drivers/video/omap2/vram.c b/drivers/video/omap2/vram.c
index 3b1237ad85e..f6fdc2085f3 100644
--- a/drivers/video/omap2/vram.c
+++ b/drivers/video/omap2/vram.c
@@ -25,7 +25,7 @@
 #include <linux/list.h>
 #include <linux/slab.h>
 #include <linux/seq_file.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/completion.h>
 #include <linux/debugfs.h>
 #include <linux/jiffies.h>
@@ -525,10 +525,8 @@ early_param("vram", omap_vram_early_vram);
  * Called from map_io. We need to call to this early enough so that we
  * can reserve the fixed SDRAM regions before VM could get hold of them.
  */
-void __init omap_vram_reserve_sdram(void)
+void __init omap_vram_reserve_sdram_memblock(void)
 {
-	struct bootmem_data	*bdata;
-	unsigned long		sdram_start, sdram_size;
 	u32 paddr;
 	u32 size = 0;
 
@@ -555,29 +553,28 @@ void __init omap_vram_reserve_sdram(void)
 
 	size = PAGE_ALIGN(size);
 
-	bdata = NODE_DATA(0)->bdata;
-	sdram_start = bdata->node_min_pfn << PAGE_SHIFT;
-	sdram_size = (bdata->node_low_pfn << PAGE_SHIFT) - sdram_start;
-
 	if (paddr) {
-		if ((paddr & ~PAGE_MASK) || paddr < sdram_start ||
-				paddr + size > sdram_start + sdram_size) {
+		struct memblock_property res;
+
+		res.base = paddr;
+		res.size = size;
+		if ((paddr & ~PAGE_MASK) || memblock_find(&res) ||
+		    res.base != paddr || res.size != size) {
 			pr_err("Illegal SDRAM region for VRAM\n");
 			return;
 		}
 
-		if (reserve_bootmem(paddr, size, BOOTMEM_EXCLUSIVE) < 0) {
-			pr_err("FB: failed to reserve VRAM\n");
+		if (memblock_is_region_reserved(paddr, size)) {
+			pr_err("FB: failed to reserve VRAM - busy\n");
 			return;
 		}
-	} else {
-		if (size > sdram_size) {
-			pr_err("Illegal SDRAM size for VRAM\n");
+
+		if (memblock_reserve(paddr, size) < 0) {
+			pr_err("FB: failed to reserve VRAM - no memory\n");
 			return;
 		}
-
-		paddr = virt_to_phys(alloc_bootmem_pages(size));
-		BUG_ON(paddr & ~PAGE_MASK);
+	} else {
+		paddr = memblock_alloc_base(size, PAGE_SIZE, MEMBLOCK_REAL_LIMIT);
 	}
 
 	omap_vram_add_region(paddr, size);
diff --git a/include/linux/omapfb.h b/include/linux/omapfb.h
index 9bdd91486b4..7e4cd616bcb 100644
--- a/include/linux/omapfb.h
+++ b/include/linux/omapfb.h
@@ -253,7 +253,7 @@ struct omapfb_platform_data {
 /* in arch/arm/plat-omap/fb.c */
 extern void omapfb_set_platform_data(struct omapfb_platform_data *data);
 extern void omapfb_set_ctrl_platform_data(void *pdata);
-extern void omapfb_reserve_sdram(void);
+extern void omapfb_reserve_sdram_memblock(void);
 
 #endif
 
-- 
cgit v1.2.3-70-g09d2


From ec489aa8f993f8d2ec962ce113071faac482aa27 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@stericsson.com>
Date: Wed, 2 Jun 2010 08:13:52 +0100
Subject: ARM: 6157/2: PL011 TX/RX split of LCR for ST-Ericssons derivative

In the ST-Ericsson version of the PL011 the TX and RX have different
control registers.

Cc: Alessandro Rubini <rubini@unipv.it>
Signed-off-by: Marcin Mielczarczyk <marcin.mielczarczyk@tieto.com>
Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 drivers/serial/amba-pl011.c | 61 +++++++++++++++++++++++++++++++++++++--------
 include/linux/amba/serial.h |  2 ++
 2 files changed, 52 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/drivers/serial/amba-pl011.c b/drivers/serial/amba-pl011.c
index eb4cb480b93..5644cf2385b 100644
--- a/drivers/serial/amba-pl011.c
+++ b/drivers/serial/amba-pl011.c
@@ -69,9 +69,11 @@
 struct uart_amba_port {
 	struct uart_port	port;
 	struct clk		*clk;
-	unsigned int		im;	/* interrupt mask */
+	unsigned int		im;		/* interrupt mask */
 	unsigned int		old_status;
-	unsigned int		ifls;	/* vendor-specific */
+	unsigned int		ifls;		/* vendor-specific */
+	unsigned int		lcrh_tx;	/* vendor-specific */
+	unsigned int		lcrh_rx;	/* vendor-specific */
 	bool			autorts;
 };
 
@@ -79,16 +81,22 @@ struct uart_amba_port {
 struct vendor_data {
 	unsigned int		ifls;
 	unsigned int		fifosize;
+	unsigned int		lcrh_tx;
+	unsigned int		lcrh_rx;
 };
 
 static struct vendor_data vendor_arm = {
 	.ifls			= UART011_IFLS_RX4_8|UART011_IFLS_TX4_8,
 	.fifosize		= 16,
+	.lcrh_tx		= UART011_LCRH,
+	.lcrh_rx		= UART011_LCRH,
 };
 
 static struct vendor_data vendor_st = {
 	.ifls			= UART011_IFLS_RX_HALF|UART011_IFLS_TX_HALF,
 	.fifosize		= 64,
+	.lcrh_tx		= ST_UART011_LCRH_TX,
+	.lcrh_rx		= ST_UART011_LCRH_RX,
 };
 
 static void pl011_stop_tx(struct uart_port *port)
@@ -327,12 +335,12 @@ static void pl011_break_ctl(struct uart_port *port, int break_state)
 	unsigned int lcr_h;
 
 	spin_lock_irqsave(&uap->port.lock, flags);
-	lcr_h = readw(uap->port.membase + UART011_LCRH);
+	lcr_h = readw(uap->port.membase + uap->lcrh_tx);
 	if (break_state == -1)
 		lcr_h |= UART01x_LCRH_BRK;
 	else
 		lcr_h &= ~UART01x_LCRH_BRK;
-	writew(lcr_h, uap->port.membase + UART011_LCRH);
+	writew(lcr_h, uap->port.membase + uap->lcrh_tx);
 	spin_unlock_irqrestore(&uap->port.lock, flags);
 }
 
@@ -393,7 +401,17 @@ static int pl011_startup(struct uart_port *port)
 	writew(cr, uap->port.membase + UART011_CR);
 	writew(0, uap->port.membase + UART011_FBRD);
 	writew(1, uap->port.membase + UART011_IBRD);
-	writew(0, uap->port.membase + UART011_LCRH);
+	writew(0, uap->port.membase + uap->lcrh_rx);
+	if (uap->lcrh_tx != uap->lcrh_rx) {
+		int i;
+		/*
+		 * Wait 10 PCLKs before writing LCRH_TX register,
+		 * to get this delay write read only register 10 times
+		 */
+		for (i = 0; i < 10; ++i)
+			writew(0xff, uap->port.membase + UART011_MIS);
+		writew(0, uap->port.membase + uap->lcrh_tx);
+	}
 	writew(0, uap->port.membase + UART01x_DR);
 	while (readw(uap->port.membase + UART01x_FR) & UART01x_FR_BUSY)
 		barrier();
@@ -422,10 +440,19 @@ static int pl011_startup(struct uart_port *port)
 	return retval;
 }
 
+static void pl011_shutdown_channel(struct uart_amba_port *uap,
+					unsigned int lcrh)
+{
+      unsigned long val;
+
+      val = readw(uap->port.membase + lcrh);
+      val &= ~(UART01x_LCRH_BRK | UART01x_LCRH_FEN);
+      writew(val, uap->port.membase + lcrh);
+}
+
 static void pl011_shutdown(struct uart_port *port)
 {
 	struct uart_amba_port *uap = (struct uart_amba_port *)port;
-	unsigned long val;
 
 	/*
 	 * disable all interrupts
@@ -450,9 +477,9 @@ static void pl011_shutdown(struct uart_port *port)
 	/*
 	 * disable break condition and fifos
 	 */
-	val = readw(uap->port.membase + UART011_LCRH);
-	val &= ~(UART01x_LCRH_BRK | UART01x_LCRH_FEN);
-	writew(val, uap->port.membase + UART011_LCRH);
+	pl011_shutdown_channel(uap, uap->lcrh_rx);
+	if (uap->lcrh_rx != uap->lcrh_tx)
+		pl011_shutdown_channel(uap, uap->lcrh_tx);
 
 	/*
 	 * Shut down the clock producer
@@ -561,7 +588,17 @@ pl011_set_termios(struct uart_port *port, struct ktermios *termios,
 	 * NOTE: MUST BE WRITTEN AFTER UARTLCR_M & UARTLCR_L
 	 * ----------^----------^----------^----------^-----
 	 */
-	writew(lcr_h, port->membase + UART011_LCRH);
+	writew(lcr_h, port->membase + uap->lcrh_rx);
+	if (uap->lcrh_rx != uap->lcrh_tx) {
+		int i;
+		/*
+		 * Wait 10 PCLKs before writing LCRH_TX register,
+		 * to get this delay write read only register 10 times
+		 */
+		for (i = 0; i < 10; ++i)
+			writew(0xff, uap->port.membase + UART011_MIS);
+		writew(lcr_h, port->membase + uap->lcrh_tx);
+	}
 	writew(old_cr, port->membase + UART011_CR);
 
 	spin_unlock_irqrestore(&port->lock, flags);
@@ -688,7 +725,7 @@ pl011_console_get_options(struct uart_amba_port *uap, int *baud,
 	if (readw(uap->port.membase + UART011_CR) & UART01x_CR_UARTEN) {
 		unsigned int lcr_h, ibrd, fbrd;
 
-		lcr_h = readw(uap->port.membase + UART011_LCRH);
+		lcr_h = readw(uap->port.membase + uap->lcrh_tx);
 
 		*parity = 'n';
 		if (lcr_h & UART01x_LCRH_PEN) {
@@ -800,6 +837,8 @@ static int pl011_probe(struct amba_device *dev, struct amba_id *id)
 	}
 
 	uap->ifls = vendor->ifls;
+	uap->lcrh_rx = vendor->lcrh_rx;
+	uap->lcrh_tx = vendor->lcrh_tx;
 	uap->port.dev = &dev->dev;
 	uap->port.mapbase = dev->res.start;
 	uap->port.membase = base;
diff --git a/include/linux/amba/serial.h b/include/linux/amba/serial.h
index 5a5a7fd6249..93c96a66c51 100644
--- a/include/linux/amba/serial.h
+++ b/include/linux/amba/serial.h
@@ -38,10 +38,12 @@
 #define UART01x_FR		0x18	/* Flag register (Read only). */
 #define UART010_IIR		0x1C	/* Interrupt indentification register (Read). */
 #define UART010_ICR		0x1C	/* Interrupt clear register (Write). */
+#define ST_UART011_LCRH_RX	0x1C    /* Rx line control register. */
 #define UART01x_ILPR		0x20	/* IrDA low power counter register. */
 #define UART011_IBRD		0x24	/* Integer baud rate divisor register. */
 #define UART011_FBRD		0x28	/* Fractional baud rate divisor register. */
 #define UART011_LCRH		0x2c	/* Line control register. */
+#define ST_UART011_LCRH_TX	0x2c    /* Tx Line control register. */
 #define UART011_CR		0x30	/* Control register. */
 #define UART011_IFLS		0x34	/* Interrupt fifo level select. */
 #define UART011_IMSC		0x38	/* Interrupt mask. */
-- 
cgit v1.2.3-70-g09d2


From ac3e3fb424d44109dda3b1a3459e1b30fa60ac4a Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@stericsson.com>
Date: Wed, 2 Jun 2010 20:40:22 +0100
Subject: ARM: 6158/2: PL011 baudrate extension for ST-Ericssons derivative

Implementation of the ST-Ericsson baudrate extension in the PL011
block. In this modified variant it is possible to change the
sampling factor from 16 to 8, and thanks to this we can get higher
baudrates while still using the same peripheral clock.

Also replace the simple division to determine the baud divisor
with DIV_ROUND_CLOSEST() rather than a simple integer division.

Cc: Alessandro Rubini <rubini@unipv.it>
Cc: Jerzy Kasenberg <jerzy.kasenberg@tieto.com>
Signed-off-by: Marcin Mielczarczyk <marcin.mielczarczyk@tieto.com>
Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 drivers/serial/amba-pl011.c | 27 +++++++++++++++++++++++++--
 include/linux/amba/serial.h |  1 +
 2 files changed, 26 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/serial/amba-pl011.c b/drivers/serial/amba-pl011.c
index 5644cf2385b..f67e09da6d3 100644
--- a/drivers/serial/amba-pl011.c
+++ b/drivers/serial/amba-pl011.c
@@ -74,6 +74,7 @@ struct uart_amba_port {
 	unsigned int		ifls;		/* vendor-specific */
 	unsigned int		lcrh_tx;	/* vendor-specific */
 	unsigned int		lcrh_rx;	/* vendor-specific */
+	bool			oversampling;   /* vendor-specific */
 	bool			autorts;
 };
 
@@ -83,6 +84,7 @@ struct vendor_data {
 	unsigned int		fifosize;
 	unsigned int		lcrh_tx;
 	unsigned int		lcrh_rx;
+	bool			oversampling;
 };
 
 static struct vendor_data vendor_arm = {
@@ -90,6 +92,7 @@ static struct vendor_data vendor_arm = {
 	.fifosize		= 16,
 	.lcrh_tx		= UART011_LCRH,
 	.lcrh_rx		= UART011_LCRH,
+	.oversampling		= false,
 };
 
 static struct vendor_data vendor_st = {
@@ -97,6 +100,7 @@ static struct vendor_data vendor_st = {
 	.fifosize		= 64,
 	.lcrh_tx		= ST_UART011_LCRH_TX,
 	.lcrh_rx		= ST_UART011_LCRH_RX,
+	.oversampling		= true,
 };
 
 static void pl011_stop_tx(struct uart_port *port)
@@ -499,8 +503,13 @@ pl011_set_termios(struct uart_port *port, struct ktermios *termios,
 	/*
 	 * Ask the core to calculate the divisor for us.
 	 */
-	baud = uart_get_baud_rate(port, termios, old, 0, port->uartclk/16);
-	quot = port->uartclk * 4 / baud;
+	baud = uart_get_baud_rate(port, termios, old, 0,
+				  port->uartclk/(uap->oversampling ? 8 : 16));
+
+	if (baud > port->uartclk/16)
+		quot = DIV_ROUND_CLOSEST(port->uartclk * 8, baud);
+	else
+		quot = DIV_ROUND_CLOSEST(port->uartclk * 4, baud);
 
 	switch (termios->c_cflag & CSIZE) {
 	case CS5:
@@ -579,6 +588,13 @@ pl011_set_termios(struct uart_port *port, struct ktermios *termios,
 		uap->autorts = false;
 	}
 
+	if (uap->oversampling) {
+		if (baud > port->uartclk/16)
+			old_cr |= ST_UART011_CR_OVSFACT;
+		else
+			old_cr &= ~ST_UART011_CR_OVSFACT;
+	}
+
 	/* Set baud rate */
 	writew(quot & 0x3f, port->membase + UART011_FBRD);
 	writew(quot >> 6, port->membase + UART011_IBRD);
@@ -744,6 +760,12 @@ pl011_console_get_options(struct uart_amba_port *uap, int *baud,
 		fbrd = readw(uap->port.membase + UART011_FBRD);
 
 		*baud = uap->port.uartclk * 4 / (64 * ibrd + fbrd);
+
+		if (uap->oversampling) {
+			if (readw(uap->port.membase + UART011_CR)
+				  & ST_UART011_CR_OVSFACT)
+				*baud *= 2;
+		}
 	}
 }
 
@@ -839,6 +861,7 @@ static int pl011_probe(struct amba_device *dev, struct amba_id *id)
 	uap->ifls = vendor->ifls;
 	uap->lcrh_rx = vendor->lcrh_rx;
 	uap->lcrh_tx = vendor->lcrh_tx;
+	uap->oversampling = vendor->oversampling;
 	uap->port.dev = &dev->dev;
 	uap->port.mapbase = dev->res.start;
 	uap->port.membase = base;
diff --git a/include/linux/amba/serial.h b/include/linux/amba/serial.h
index 93c96a66c51..e1b634b635f 100644
--- a/include/linux/amba/serial.h
+++ b/include/linux/amba/serial.h
@@ -86,6 +86,7 @@
 #define UART010_CR_TIE 		0x0020
 #define UART010_CR_RIE 		0x0010
 #define UART010_CR_MSIE		0x0008
+#define ST_UART011_CR_OVSFACT	0x0008	/* Oversampling factor */
 #define UART01x_CR_IIRLP	0x0004	/* SIR low power mode */
 #define UART01x_CR_SIREN	0x0002	/* SIR enable */
 #define UART01x_CR_UARTEN	0x0001	/* UART enable */
-- 
cgit v1.2.3-70-g09d2


From ce3bf7ab22527183634a76512d9854a38615e4d5 Mon Sep 17 00:00:00 2001
From: John Stultz <johnstul@us.ibm.com>
Date: Tue, 13 Jul 2010 17:56:19 -0700
Subject: time: Implement timespec_add

After accidentally misusing timespec_add_safe, I wanted to make sure
we don't accidently trip over that issue again, so I created a simple
timespec_add() function which we can use to replace the instances
of timespec_add_safe() that don't want the overflow detection.

Signed-off-by: John Stultz <johnstul@us.ibm.com>
LKML-Reference: <1279068988-21864-3-git-send-email-johnstul@us.ibm.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/time.h      | 16 ++++++++++++++++
 kernel/time/timekeeping.c |  6 +++---
 2 files changed, 19 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/time.h b/include/linux/time.h
index ea3559f0b3f..9072df83de1 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -76,9 +76,25 @@ extern unsigned long mktime(const unsigned int year, const unsigned int mon,
 			    const unsigned int min, const unsigned int sec);
 
 extern void set_normalized_timespec(struct timespec *ts, time_t sec, s64 nsec);
+
+/*
+ * timespec_add_safe assumes both values are positive and checks
+ * for overflow. It will return TIME_T_MAX if the reutrn would be
+ * smaller then either of the arguments.
+ */
 extern struct timespec timespec_add_safe(const struct timespec lhs,
 					 const struct timespec rhs);
 
+
+static inline struct timespec timespec_add(struct timespec lhs,
+						struct timespec rhs)
+{
+	struct timespec ts_delta;
+	set_normalized_timespec(&ts_delta, lhs.tv_sec + rhs.tv_sec,
+				lhs.tv_nsec + rhs.tv_nsec);
+	return ts_delta;
+}
+
 /*
  * sub = lhs - rhs, in normalized form
  */
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index caf8d4d4f5c..623fe3d504d 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -579,9 +579,9 @@ static int timekeeping_resume(struct sys_device *dev)
 
 	if (timespec_compare(&ts, &timekeeping_suspend_time) > 0) {
 		ts = timespec_sub(ts, timekeeping_suspend_time);
-		xtime = timespec_add_safe(xtime, ts);
+		xtime = timespec_add(xtime, ts);
 		wall_to_monotonic = timespec_sub(wall_to_monotonic, ts);
-		total_sleep_time = timespec_add_safe(total_sleep_time, ts);
+		total_sleep_time = timespec_add(total_sleep_time, ts);
 	}
 	/* re-base the last cycle value */
 	timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock);
@@ -887,7 +887,7 @@ EXPORT_SYMBOL_GPL(getboottime);
  */
 void monotonic_to_bootbased(struct timespec *ts)
 {
-	*ts = timespec_add_safe(*ts, total_sleep_time);
+	*ts = timespec_add(*ts, total_sleep_time);
 }
 EXPORT_SYMBOL_GPL(monotonic_to_bootbased);
 
-- 
cgit v1.2.3-70-g09d2


From 7615856ebfee52b080c22d263ca4debbd0df0ac1 Mon Sep 17 00:00:00 2001
From: John Stultz <johnstul@us.ibm.com>
Date: Tue, 13 Jul 2010 17:56:23 -0700
Subject: timkeeping: Fix update_vsyscall to provide wall_to_monotonic offset

update_vsyscall() did not provide the wall_to_monotoinc offset,
so arch specific implementations tend to reference wall_to_monotonic
directly. This limits future cleanups in the timekeeping core, so
this patch fixes the update_vsyscall interface to provide
wall_to_monotonic, allowing wall_to_monotonic to be made static
as planned in Documentation/feature-removal-schedule.txt

Signed-off-by: John Stultz <johnstul@us.ibm.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Anton Blanchard <anton@samba.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Tony Luck <tony.luck@intel.com>
LKML-Reference: <1279068988-21864-7-git-send-email-johnstul@us.ibm.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/ia64/kernel/time.c       | 7 ++++---
 arch/powerpc/kernel/time.c    | 8 ++++----
 arch/s390/kernel/time.c       | 8 ++++----
 arch/x86/kernel/vsyscall_64.c | 6 +++---
 include/linux/clocksource.h   | 6 ++++--
 kernel/time/timekeeping.c     | 9 ++++++---
 6 files changed, 25 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index 653b3c46ea8..ed6f22eb5b1 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -471,7 +471,8 @@ void update_vsyscall_tz(void)
 {
 }
 
-void update_vsyscall(struct timespec *wall, struct clocksource *c, u32 mult)
+void update_vsyscall(struct timespec *wall, struct timespec *wtm,
+			struct clocksource *c, u32 mult)
 {
         unsigned long flags;
 
@@ -487,9 +488,9 @@ void update_vsyscall(struct timespec *wall, struct clocksource *c, u32 mult)
 	/* copy kernel time structures */
         fsyscall_gtod_data.wall_time.tv_sec = wall->tv_sec;
         fsyscall_gtod_data.wall_time.tv_nsec = wall->tv_nsec;
-        fsyscall_gtod_data.monotonic_time.tv_sec = wall_to_monotonic.tv_sec
+	fsyscall_gtod_data.monotonic_time.tv_sec = wtm->tv_sec
 							+ wall->tv_sec;
-        fsyscall_gtod_data.monotonic_time.tv_nsec = wall_to_monotonic.tv_nsec
+	fsyscall_gtod_data.monotonic_time.tv_nsec = wtm->tv_nsec
 							+ wall->tv_nsec;
 
 	/* normalize */
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 0711d60f40b..e215f76bba1 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -849,8 +849,8 @@ static cycle_t timebase_read(struct clocksource *cs)
 	return (cycle_t)get_tb();
 }
 
-void update_vsyscall(struct timespec *wall_time, struct clocksource *clock,
-		     u32 mult)
+void update_vsyscall(struct timespec *wall_time, struct timespec *wtm,
+			struct clocksource *clock, u32 mult)
 {
 	u64 new_tb_to_xs, new_stamp_xsec;
 
@@ -882,8 +882,8 @@ void update_vsyscall(struct timespec *wall_time, struct clocksource *clock,
 	vdso_data->tb_orig_stamp = clock->cycle_last;
 	vdso_data->stamp_xsec = new_stamp_xsec;
 	vdso_data->tb_to_xs = new_tb_to_xs;
-	vdso_data->wtom_clock_sec = wall_to_monotonic.tv_sec;
-	vdso_data->wtom_clock_nsec = wall_to_monotonic.tv_nsec;
+	vdso_data->wtom_clock_sec = wtm->tv_sec;
+	vdso_data->wtom_clock_nsec = wtm->tv_nsec;
 	vdso_data->stamp_xtime = *wall_time;
 	smp_wmb();
 	++(vdso_data->tb_update_count);
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index a2163c95eb9..aeb30c6f279 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -207,8 +207,8 @@ struct clocksource * __init clocksource_default_clock(void)
 	return &clocksource_tod;
 }
 
-void update_vsyscall(struct timespec *wall_time, struct clocksource *clock,
-		     u32 mult)
+void update_vsyscall(struct timespec *wall_time, struct timespec *wtm,
+			struct clocksource *clock, u32 mult)
 {
 	if (clock != &clocksource_tod)
 		return;
@@ -219,8 +219,8 @@ void update_vsyscall(struct timespec *wall_time, struct clocksource *clock,
 	vdso_data->xtime_tod_stamp = clock->cycle_last;
 	vdso_data->xtime_clock_sec = wall_time->tv_sec;
 	vdso_data->xtime_clock_nsec = wall_time->tv_nsec;
-	vdso_data->wtom_clock_sec = wall_to_monotonic.tv_sec;
-	vdso_data->wtom_clock_nsec = wall_to_monotonic.tv_nsec;
+	vdso_data->wtom_clock_sec = wtm->tv_sec;
+	vdso_data->wtom_clock_nsec = wtm->tv_nsec;
 	vdso_data->ntp_mult = mult;
 	smp_wmb();
 	++vdso_data->tb_update_count;
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index dce0c3c5a78..dcbb28c4b69 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -73,8 +73,8 @@ void update_vsyscall_tz(void)
 	write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags);
 }
 
-void update_vsyscall(struct timespec *wall_time, struct clocksource *clock,
-		     u32 mult)
+void update_vsyscall(struct timespec *wall_time, struct timespec *wtm,
+			struct clocksource *clock, u32 mult)
 {
 	unsigned long flags;
 
@@ -87,7 +87,7 @@ void update_vsyscall(struct timespec *wall_time, struct clocksource *clock,
 	vsyscall_gtod_data.clock.shift = clock->shift;
 	vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec;
 	vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec;
-	vsyscall_gtod_data.wall_to_monotonic = wall_to_monotonic;
+	vsyscall_gtod_data.wall_to_monotonic = *wtm;
 	vsyscall_gtod_data.wall_time_coarse = __current_kernel_time();
 	write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags);
 }
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 5ea3c60c160..21677d99a16 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -313,11 +313,13 @@ clocksource_calc_mult_shift(struct clocksource *cs, u32 freq, u32 minsec)
 
 #ifdef CONFIG_GENERIC_TIME_VSYSCALL
 extern void
-update_vsyscall(struct timespec *ts, struct clocksource *c, u32 mult);
+update_vsyscall(struct timespec *ts, struct timespec *wtm,
+			struct clocksource *c, u32 mult);
 extern void update_vsyscall_tz(void);
 #else
 static inline void
-update_vsyscall(struct timespec *ts, struct clocksource *c, u32 mult)
+update_vsyscall(struct timespec *ts, struct timespec *wtm,
+			struct clocksource *c, u32 mult)
 {
 }
 
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 73edd4074b5..b15c3acafd5 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -170,7 +170,8 @@ void timekeeping_leap_insert(int leapsecond)
 {
 	xtime.tv_sec += leapsecond;
 	wall_to_monotonic.tv_sec -= leapsecond;
-	update_vsyscall(&xtime, timekeeper.clock, timekeeper.mult);
+	update_vsyscall(&xtime, &wall_to_monotonic, timekeeper.clock,
+			timekeeper.mult);
 }
 
 /**
@@ -326,7 +327,8 @@ int do_settimeofday(struct timespec *tv)
 	timekeeper.ntp_error = 0;
 	ntp_clear();
 
-	update_vsyscall(&xtime, timekeeper.clock, timekeeper.mult);
+	update_vsyscall(&xtime, &wall_to_monotonic, timekeeper.clock,
+				timekeeper.mult);
 
 	write_sequnlock_irqrestore(&xtime_lock, flags);
 
@@ -809,7 +811,8 @@ void update_wall_time(void)
 	}
 
 	/* check to see if there is a new clocksource to use */
-	update_vsyscall(&xtime, timekeeper.clock, timekeeper.mult);
+	update_vsyscall(&xtime, &wall_to_monotonic, timekeeper.clock,
+				timekeeper.mult);
 }
 
 /**
-- 
cgit v1.2.3-70-g09d2


From 8ab4351a4c888016620f43bde605b3d0964af339 Mon Sep 17 00:00:00 2001
From: John Stultz <johnstul@us.ibm.com>
Date: Tue, 13 Jul 2010 17:56:25 -0700
Subject: hrtimer: Cleanup direct access to wall_to_monotonic

Provides an accessor function to replace hrtimer.c's
direct access of wall_to_monotonic.

This will allow wall_to_monotonic to be made static as
planned in Documentation/feature-removal-schedule.txt

Signed-off-by: John Stultz <johnstul@us.ibm.com>
LKML-Reference: <1279068988-21864-9-git-send-email-johnstul@us.ibm.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/time.h      | 3 ++-
 kernel/hrtimer.c          | 9 ++++-----
 kernel/time/timekeeping.c | 5 +++++
 3 files changed, 11 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/time.h b/include/linux/time.h
index 9072df83de1..a57e0f67b3d 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -126,7 +126,8 @@ extern int timekeeping_suspended;
 
 unsigned long get_seconds(void);
 struct timespec current_kernel_time(void);
-struct timespec __current_kernel_time(void); /* does not hold xtime_lock */
+struct timespec __current_kernel_time(void); /* does not take xtime_lock */
+struct timespec __get_wall_to_monotonic(void); /* does not take xtime_lock */
 struct timespec get_monotonic_coarse(void);
 
 #define CURRENT_TIME		(current_kernel_time())
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 5c69e996bd0..809f48c7055 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -90,7 +90,7 @@ static void hrtimer_get_softirq_time(struct hrtimer_cpu_base *base)
 	do {
 		seq = read_seqbegin(&xtime_lock);
 		xts = __current_kernel_time();
-		tom = wall_to_monotonic;
+		tom = __get_wall_to_monotonic();
 	} while (read_seqretry(&xtime_lock, seq));
 
 	xtim = timespec_to_ktime(xts);
@@ -612,7 +612,7 @@ static int hrtimer_reprogram(struct hrtimer *timer,
 static void retrigger_next_event(void *arg)
 {
 	struct hrtimer_cpu_base *base;
-	struct timespec realtime_offset;
+	struct timespec realtime_offset, wtm;
 	unsigned long seq;
 
 	if (!hrtimer_hres_active())
@@ -620,10 +620,9 @@ static void retrigger_next_event(void *arg)
 
 	do {
 		seq = read_seqbegin(&xtime_lock);
-		set_normalized_timespec(&realtime_offset,
-					-wall_to_monotonic.tv_sec,
-					-wall_to_monotonic.tv_nsec);
+		wtm = __get_wall_to_monotonic();
 	} while (read_seqretry(&xtime_lock, seq));
+	set_normalized_timespec(&realtime_offset, -wtm.tv_sec, -wtm.tv_nsec);
 
 	base = &__get_cpu_var(hrtimer_bases);
 
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index b15c3acafd5..fb61c2ed366 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -858,6 +858,11 @@ struct timespec __current_kernel_time(void)
 	return xtime;
 }
 
+struct timespec __get_wall_to_monotonic(void)
+{
+	return wall_to_monotonic;
+}
+
 struct timespec current_kernel_time(void)
 {
 	struct timespec now;
-- 
cgit v1.2.3-70-g09d2


From 0fb86b06298b6cd3205cac2e68a499f269282dac Mon Sep 17 00:00:00 2001
From: John Stultz <johnstul@us.ibm.com>
Date: Tue, 13 Jul 2010 17:56:26 -0700
Subject: timekeeping: Make xtime and wall_to_monotonic static

This patch makes xtime and wall_to_monotonic static, as planned in
Documentation/feature-removal-schedule.txt. This will allow for
further cleanups to the timekeeping core.

Signed-off-by: John Stultz <johnstul@us.ibm.com>
LKML-Reference: <1279068988-21864-10-git-send-email-johnstul@us.ibm.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 Documentation/feature-removal-schedule.txt | 10 ----------
 include/linux/time.h                       |  2 --
 kernel/time/timekeeping.c                  |  4 ++--
 3 files changed, 2 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index 1571c0c83db..cd648dbb514 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -549,16 +549,6 @@ Who:	Avi Kivity <avi@redhat.com>
 
 ----------------------------
 
-What:	xtime, wall_to_monotonic
-When:	2.6.36+
-Files:	kernel/time/timekeeping.c include/linux/time.h
-Why:	Cleaning up timekeeping internal values. Please use
-	existing timekeeping accessor functions to access
-	the equivalent functionality.
-Who:	John Stultz <johnstul@us.ibm.com>
-
-----------------------------
-
 What:	KVM kernel-allocated memory slots
 When:	July 2010
 Why:	Since 2.6.25, kvm supports user-allocated memory slots, which are
diff --git a/include/linux/time.h b/include/linux/time.h
index a57e0f67b3d..cb34e35faba 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -113,8 +113,6 @@ static inline struct timespec timespec_sub(struct timespec lhs,
 #define timespec_valid(ts) \
 	(((ts)->tv_sec >= 0) && (((unsigned long) (ts)->tv_nsec) < NSEC_PER_SEC))
 
-extern struct timespec xtime;
-extern struct timespec wall_to_monotonic;
 extern seqlock_t xtime_lock;
 
 extern void read_persistent_clock(struct timespec *ts);
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index fb61c2ed366..e14c839e9fa 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -153,8 +153,8 @@ __cacheline_aligned_in_smp DEFINE_SEQLOCK(xtime_lock);
  * - wall_to_monotonic is no longer the boot time, getboottime must be
  * used instead.
  */
-struct timespec xtime __attribute__ ((aligned (16)));
-struct timespec wall_to_monotonic __attribute__ ((aligned (16)));
+static struct timespec xtime __attribute__ ((aligned (16)));
+static struct timespec wall_to_monotonic __attribute__ ((aligned (16)));
 static struct timespec total_sleep_time;
 
 /*
-- 
cgit v1.2.3-70-g09d2


From 852db46d55e85b475a72e665ca08d3317769ceef Mon Sep 17 00:00:00 2001
From: John Stultz <johnstul@us.ibm.com>
Date: Tue, 13 Jul 2010 17:56:28 -0700
Subject: clocksource: Add __clocksource_updatefreq_hz/khz methods

To properly handle clocksources that change frequencies
at the clocksource->enable() point, this patch adds
a method that will update the clocksource's mult/shift and
max_idle_ns values.

Signed-off-by: John Stultz <johnstul@us.ibm.com>
LKML-Reference: <1279068988-21864-12-git-send-email-johnstul@us.ibm.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/clocksource.h | 11 +++++++++++
 kernel/time/clocksource.c   | 29 ++++++++++++++++++++++++-----
 2 files changed, 35 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 21677d99a16..c37b21ad5a3 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -292,6 +292,8 @@ clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 minsec);
  */
 extern int
 __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq);
+extern void
+__clocksource_updatefreq_scale(struct clocksource *cs, u32 scale, u32 freq);
 
 static inline int clocksource_register_hz(struct clocksource *cs, u32 hz)
 {
@@ -303,6 +305,15 @@ static inline int clocksource_register_khz(struct clocksource *cs, u32 khz)
 	return __clocksource_register_scale(cs, 1000, khz);
 }
 
+static inline void __clocksource_updatefreq_hz(struct clocksource *cs, u32 hz)
+{
+	__clocksource_updatefreq_scale(cs, 1, hz);
+}
+
+static inline void __clocksource_updatefreq_khz(struct clocksource *cs, u32 khz)
+{
+	__clocksource_updatefreq_scale(cs, 1000, khz);
+}
 
 static inline void
 clocksource_calc_mult_shift(struct clocksource *cs, u32 freq, u32 minsec)
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index c543d21b4e5..c18d7efa1b4 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -639,19 +639,18 @@ static void clocksource_enqueue(struct clocksource *cs)
 #define MAX_UPDATE_LENGTH 5 /* Seconds */
 
 /**
- * __clocksource_register_scale - Used to install new clocksources
+ * __clocksource_updatefreq_scale - Used update clocksource with new freq
  * @t:		clocksource to be registered
  * @scale:	Scale factor multiplied against freq to get clocksource hz
  * @freq:	clocksource frequency (cycles per second) divided by scale
  *
- * Returns -EBUSY if registration fails, zero otherwise.
+ * This should only be called from the clocksource->enable() method.
  *
  * This *SHOULD NOT* be called directly! Please use the
- * clocksource_register_hz() or clocksource_register_khz helper functions.
+ * clocksource_updatefreq_hz() or clocksource_updatefreq_khz helper functions.
  */
-int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq)
+void __clocksource_updatefreq_scale(struct clocksource *cs, u32 scale, u32 freq)
 {
-
 	/*
 	 * Ideally we want to use  some of the limits used in
 	 * clocksource_max_deferment, to provide a more informed
@@ -662,7 +661,27 @@ int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq)
 				      NSEC_PER_SEC/scale,
 				      MAX_UPDATE_LENGTH*scale);
 	cs->max_idle_ns = clocksource_max_deferment(cs);
+}
+EXPORT_SYMBOL_GPL(__clocksource_updatefreq_scale);
+
+/**
+ * __clocksource_register_scale - Used to install new clocksources
+ * @t:		clocksource to be registered
+ * @scale:	Scale factor multiplied against freq to get clocksource hz
+ * @freq:	clocksource frequency (cycles per second) divided by scale
+ *
+ * Returns -EBUSY if registration fails, zero otherwise.
+ *
+ * This *SHOULD NOT* be called directly! Please use the
+ * clocksource_register_hz() or clocksource_register_khz helper functions.
+ */
+int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq)
+{
+
+	/* Intialize mult/shift and max_idle_ns */
+	__clocksource_updatefreq_scale(cs, scale, freq);
 
+	/* Add clocksource to the clcoksource list */
 	mutex_lock(&clocksource_mutex);
 	clocksource_enqueue(cs);
 	clocksource_select();
-- 
cgit v1.2.3-70-g09d2


From e5880d76aea443b04e07da19830da0f6f7494eef Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Tue, 27 Jul 2010 11:56:04 -0400
Subject: ext4: fix potential NULL dereference while tracing

The allocation_context pointer can be NULL.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/mballoc.c           |  4 ++--
 include/trace/events/ext4.h | 20 ++++++++++++--------
 2 files changed, 14 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 3dfad95f0f9..8b3b9344a59 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -3575,7 +3575,7 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
 			trace_ext4_mballoc_discard(ac);
 		}
 
-		trace_ext4_mb_release_inode_pa(ac, pa, grp_blk_start + bit,
+		trace_ext4_mb_release_inode_pa(sb, ac, pa, grp_blk_start + bit,
 					       next - bit);
 		mb_free_blocks(pa->pa_inode, e4b, bit, next - bit);
 		bit = next + 1;
@@ -3606,7 +3606,7 @@ ext4_mb_release_group_pa(struct ext4_buddy *e4b,
 	ext4_group_t group;
 	ext4_grpblk_t bit;
 
-	trace_ext4_mb_release_group_pa(ac, pa);
+	trace_ext4_mb_release_group_pa(sb, ac, pa);
 	BUG_ON(pa->pa_deleted == 0);
 	ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
 	BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index f3865c7b416..01e9e0076a9 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -395,11 +395,12 @@ DEFINE_EVENT(ext4__mb_new_pa, ext4_mb_new_group_pa,
 );
 
 TRACE_EVENT(ext4_mb_release_inode_pa,
-	TP_PROTO(struct ext4_allocation_context *ac,
+	TP_PROTO(struct super_block *sb,
+		 struct ext4_allocation_context *ac,
 		 struct ext4_prealloc_space *pa,
 		 unsigned long long block, unsigned int count),
 
-	TP_ARGS(ac, pa, block, count),
+	TP_ARGS(sb, ac, pa, block, count),
 
 	TP_STRUCT__entry(
 		__field(	dev_t,	dev			)
@@ -410,8 +411,9 @@ TRACE_EVENT(ext4_mb_release_inode_pa,
 	),
 
 	TP_fast_assign(
-		__entry->dev		= ac->ac_sb->s_dev;
-		__entry->ino		= ac->ac_inode->i_ino;
+		__entry->dev		= sb->s_dev;
+		__entry->ino		= (ac && ac->ac_inode) ? 
+						ac->ac_inode->i_ino : 0;
 		__entry->block		= block;
 		__entry->count		= count;
 	),
@@ -422,10 +424,11 @@ TRACE_EVENT(ext4_mb_release_inode_pa,
 );
 
 TRACE_EVENT(ext4_mb_release_group_pa,
-	TP_PROTO(struct ext4_allocation_context *ac,
+	TP_PROTO(struct super_block *sb,
+		 struct ext4_allocation_context *ac,
 		 struct ext4_prealloc_space *pa),
 
-	TP_ARGS(ac, pa),
+	TP_ARGS(sb, ac, pa),
 
 	TP_STRUCT__entry(
 		__field(	dev_t,	dev			)
@@ -436,8 +439,9 @@ TRACE_EVENT(ext4_mb_release_group_pa,
 	),
 
 	TP_fast_assign(
-		__entry->dev		= ac->ac_sb->s_dev;
-		__entry->ino		= ac->ac_inode->i_ino;
+		__entry->dev		= sb->s_dev;
+		__entry->ino		= (ac && ac->ac_inode) ?
+						ac->ac_inode->i_ino : 0;
 		__entry->pa_pstart	= pa->pa_pstart;
 		__entry->pa_len		= pa->pa_len;
 	),
-- 
cgit v1.2.3-70-g09d2


From 47def82672b3ba4e7c5e9a4fe48a556f8684d0d6 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Tue, 27 Jul 2010 11:56:05 -0400
Subject: jbd2: Remove __GFP_NOFAIL from jbd2 layer

__GFP_NOFAIL is going away, so add our own retry loop.  Also add
jbd2__journal_start() and jbd2__journal_restart() which take a gfp
mask, so that file systems can optionally (re)start transaction
handles using GFP_KERNEL.  If they do this, then they need to be
prepared to handle receiving an PTR_ERR(-ENOMEM) error, and be ready
to reflect that error up to userspace.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/jbd2/journal.c     | 15 ++++++++++---
 fs/jbd2/transaction.c | 61 +++++++++++++++++++++++++++++++++++----------------
 include/linux/jbd2.h  |  4 +++-
 3 files changed, 57 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index f7bf15787d6..a79d3345b55 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -41,6 +41,7 @@
 #include <linux/hash.h>
 #include <linux/log2.h>
 #include <linux/vmalloc.h>
+#include <linux/backing-dev.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/jbd2.h>
@@ -48,8 +49,6 @@
 #include <asm/uaccess.h>
 #include <asm/page.h>
 
-EXPORT_SYMBOL(jbd2_journal_start);
-EXPORT_SYMBOL(jbd2_journal_restart);
 EXPORT_SYMBOL(jbd2_journal_extend);
 EXPORT_SYMBOL(jbd2_journal_stop);
 EXPORT_SYMBOL(jbd2_journal_lock_updates);
@@ -311,7 +310,17 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
 	 */
 	J_ASSERT_BH(bh_in, buffer_jbddirty(bh_in));
 
-	new_bh = alloc_buffer_head(GFP_NOFS|__GFP_NOFAIL);
+retry_alloc:
+	new_bh = alloc_buffer_head(GFP_NOFS);
+	if (!new_bh) {
+		/*
+		 * Failure is not an option, but __GFP_NOFAIL is going
+		 * away; so we retry ourselves here.
+		 */
+		congestion_wait(BLK_RW_ASYNC, HZ/50);
+		goto retry_alloc;
+	}
+
 	/* keep subsequent assertions sane */
 	new_bh->b_state = 0;
 	init_buffer(new_bh, NULL, NULL);
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index e214d68620a..001e95fb0fe 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -26,6 +26,8 @@
 #include <linux/mm.h>
 #include <linux/highmem.h>
 #include <linux/hrtimer.h>
+#include <linux/backing-dev.h>
+#include <linux/module.h>
 
 static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh);
 
@@ -83,30 +85,38 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction)
  * transaction's buffer credits.
  */
 
-static int start_this_handle(journal_t *journal, handle_t *handle)
+static int start_this_handle(journal_t *journal, handle_t *handle,
+			     int gfp_mask)
 {
 	transaction_t *transaction;
 	int needed;
 	int nblocks = handle->h_buffer_credits;
 	transaction_t *new_transaction = NULL;
-	int ret = 0;
 	unsigned long ts = jiffies;
 
 	if (nblocks > journal->j_max_transaction_buffers) {
 		printk(KERN_ERR "JBD: %s wants too many credits (%d > %d)\n",
 		       current->comm, nblocks,
 		       journal->j_max_transaction_buffers);
-		ret = -ENOSPC;
-		goto out;
+		return -ENOSPC;
 	}
 
 alloc_transaction:
 	if (!journal->j_running_transaction) {
-		new_transaction = kzalloc(sizeof(*new_transaction),
-						GFP_NOFS|__GFP_NOFAIL);
+		new_transaction = kzalloc(sizeof(*new_transaction), gfp_mask);
 		if (!new_transaction) {
-			ret = -ENOMEM;
-			goto out;
+			/*
+			 * If __GFP_FS is not present, then we may be
+			 * being called from inside the fs writeback
+			 * layer, so we MUST NOT fail.  Since
+			 * __GFP_NOFAIL is going away, we will arrange
+			 * to retry the allocation ourselves.
+			 */
+			if ((gfp_mask & __GFP_FS) == 0) {
+				congestion_wait(BLK_RW_ASYNC, HZ/50);
+				goto alloc_transaction;
+			}
+			return -ENOMEM;
 		}
 	}
 
@@ -123,8 +133,8 @@ repeat_locked:
 	if (is_journal_aborted(journal) ||
 	    (journal->j_errno != 0 && !(journal->j_flags & JBD2_ACK_ERR))) {
 		spin_unlock(&journal->j_state_lock);
-		ret = -EROFS;
-		goto out;
+		kfree(new_transaction);
+		return -EROFS;
 	}
 
 	/* Wait on the journal's transaction barrier if necessary */
@@ -240,10 +250,8 @@ repeat_locked:
 	spin_unlock(&journal->j_state_lock);
 
 	lock_map_acquire(&handle->h_lockdep_map);
-out:
-	if (unlikely(new_transaction))		/* It's usually NULL */
-		kfree(new_transaction);
-	return ret;
+	kfree(new_transaction);
+	return 0;
 }
 
 static struct lock_class_key jbd2_handle_key;
@@ -278,7 +286,7 @@ static handle_t *new_handle(int nblocks)
  *
  * Return a pointer to a newly allocated handle, or NULL on failure
  */
-handle_t *jbd2_journal_start(journal_t *journal, int nblocks)
+handle_t *jbd2__journal_start(journal_t *journal, int nblocks, int gfp_mask)
 {
 	handle_t *handle = journal_current_handle();
 	int err;
@@ -298,7 +306,7 @@ handle_t *jbd2_journal_start(journal_t *journal, int nblocks)
 
 	current->journal_info = handle;
 
-	err = start_this_handle(journal, handle);
+	err = start_this_handle(journal, handle, gfp_mask);
 	if (err < 0) {
 		jbd2_free_handle(handle);
 		current->journal_info = NULL;
@@ -308,6 +316,15 @@ handle_t *jbd2_journal_start(journal_t *journal, int nblocks)
 out:
 	return handle;
 }
+EXPORT_SYMBOL(jbd2__journal_start);
+
+
+handle_t *jbd2_journal_start(journal_t *journal, int nblocks)
+{
+	return jbd2__journal_start(journal, nblocks, GFP_NOFS);
+}
+EXPORT_SYMBOL(jbd2_journal_start);
+
 
 /**
  * int jbd2_journal_extend() - extend buffer credits.
@@ -394,8 +411,7 @@ out:
  * transaction capabable of guaranteeing the requested number of
  * credits.
  */
-
-int jbd2_journal_restart(handle_t *handle, int nblocks)
+int jbd2__journal_restart(handle_t *handle, int nblocks, int gfp_mask)
 {
 	transaction_t *transaction = handle->h_transaction;
 	journal_t *journal = transaction->t_journal;
@@ -428,10 +444,17 @@ int jbd2_journal_restart(handle_t *handle, int nblocks)
 
 	lock_map_release(&handle->h_lockdep_map);
 	handle->h_buffer_credits = nblocks;
-	ret = start_this_handle(journal, handle);
+	ret = start_this_handle(journal, handle, gfp_mask);
 	return ret;
 }
+EXPORT_SYMBOL(jbd2__journal_restart);
+
 
+int jbd2_journal_restart(handle_t *handle, int nblocks)
+{
+	return jbd2__journal_restart(handle, nblocks, GFP_NOFS);
+}
+EXPORT_SYMBOL(jbd2_journal_restart);
 
 /**
  * void jbd2_journal_lock_updates () - establish a transaction barrier.
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index a4d2e9f7088..5a72bc75b27 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -1081,7 +1081,9 @@ static inline handle_t *journal_current_handle(void)
  */
 
 extern handle_t *jbd2_journal_start(journal_t *, int nblocks);
-extern int	 jbd2_journal_restart (handle_t *, int nblocks);
+extern handle_t *jbd2__journal_start(journal_t *, int nblocks, int gfp_mask);
+extern int	 jbd2_journal_restart(handle_t *, int nblocks);
+extern int	 jbd2__journal_restart(handle_t *, int nblocks, int gfp_mask);
 extern int	 jbd2_journal_extend (handle_t *, int nblocks);
 extern int	 jbd2_journal_get_write_access(handle_t *, struct buffer_head *);
 extern int	 jbd2_journal_get_create_access (handle_t *, struct buffer_head *);
-- 
cgit v1.2.3-70-g09d2


From 552ef8024f909d9b3a7442d0ab0d48a22de24e9e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 27 Jul 2010 11:56:06 -0400
Subject: direct-io: move aio_complete into ->end_io

Filesystems with unwritten extent support must not complete an AIO request
until the transaction to convert the extent has been commited.  That means
the aio_complete calls needs to be moved into the ->end_io callback so
that the filesystem can control when to call it exactly.

This makes a bit of a mess out of dio_complete and the ->end_io callback
prototype even more complicated.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/direct-io.c              | 26 ++++++++++++++------------
 fs/ext4/inode.c             | 10 +++++++---
 fs/ocfs2/aops.c             |  7 ++++++-
 fs/xfs/linux-2.6/xfs_aops.c |  7 ++++++-
 fs/xfs/linux-2.6/xfs_aops.h |  2 ++
 include/linux/fs.h          |  3 ++-
 6 files changed, 37 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/fs/direct-io.c b/fs/direct-io.c
index 7600aacf531..a10cb91cade 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -218,7 +218,7 @@ static struct page *dio_get_page(struct dio *dio)
  * filesystems can use it to hold additional state between get_block calls and
  * dio_complete.
  */
-static int dio_complete(struct dio *dio, loff_t offset, int ret)
+static int dio_complete(struct dio *dio, loff_t offset, int ret, bool is_async)
 {
 	ssize_t transferred = 0;
 
@@ -239,14 +239,6 @@ static int dio_complete(struct dio *dio, loff_t offset, int ret)
 			transferred = dio->i_size - offset;
 	}
 
-	if (dio->end_io && dio->result)
-		dio->end_io(dio->iocb, offset, transferred,
-			    dio->map_bh.b_private);
-
-	if (dio->flags & DIO_LOCKING)
-		/* lockdep: non-owner release */
-		up_read_non_owner(&dio->inode->i_alloc_sem);
-
 	if (ret == 0)
 		ret = dio->page_errors;
 	if (ret == 0)
@@ -254,6 +246,17 @@ static int dio_complete(struct dio *dio, loff_t offset, int ret)
 	if (ret == 0)
 		ret = transferred;
 
+	if (dio->end_io && dio->result) {
+		dio->end_io(dio->iocb, offset, transferred,
+			    dio->map_bh.b_private, ret, is_async);
+	} else if (is_async) {
+		aio_complete(dio->iocb, ret, 0);
+	}
+
+	if (dio->flags & DIO_LOCKING)
+		/* lockdep: non-owner release */
+		up_read_non_owner(&dio->inode->i_alloc_sem);
+
 	return ret;
 }
 
@@ -277,8 +280,7 @@ static void dio_bio_end_aio(struct bio *bio, int error)
 	spin_unlock_irqrestore(&dio->bio_lock, flags);
 
 	if (remaining == 0) {
-		int ret = dio_complete(dio, dio->iocb->ki_pos, 0);
-		aio_complete(dio->iocb, ret, 0);
+		dio_complete(dio, dio->iocb->ki_pos, 0, true);
 		kfree(dio);
 	}
 }
@@ -1126,7 +1128,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
 	spin_unlock_irqrestore(&dio->bio_lock, flags);
 
 	if (ret2 == 0) {
-		ret = dio_complete(dio, offset, ret);
+		ret = dio_complete(dio, offset, ret, false);
 		kfree(dio);
 	} else
 		BUG_ON(ret != -EIOCBQUEUED);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 699d1d01c5d..609159e990d 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3775,7 +3775,8 @@ static ext4_io_end_t *ext4_init_io_end (struct inode *inode, gfp_t flags)
 }
 
 static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
-			    ssize_t size, void *private)
+			    ssize_t size, void *private, int ret,
+			    bool is_async)
 {
         ext4_io_end_t *io_end = iocb->private;
 	struct workqueue_struct *wq;
@@ -3784,7 +3785,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
 
 	/* if not async direct IO or dio with 0 bytes write, just return */
 	if (!io_end || !size)
-		return;
+		goto out;
 
 	ext_debug("ext4_end_io_dio(): io_end 0x%p"
 		  "for inode %lu, iocb 0x%p, offset %llu, size %llu\n",
@@ -3795,7 +3796,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
 	if (io_end->flag != EXT4_IO_UNWRITTEN){
 		ext4_free_io_end(io_end);
 		iocb->private = NULL;
-		return;
+		goto out;
 	}
 
 	io_end->offset = offset;
@@ -3812,6 +3813,9 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
 	list_add_tail(&io_end->list, &ei->i_completed_io_list);
 	spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
 	iocb->private = NULL;
+out:
+	if (is_async)
+		aio_complete(iocb, ret, 0);
 }
 
 static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate)
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 3623ca20cc1..1d2b1f156bc 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -609,7 +609,9 @@ bail:
 static void ocfs2_dio_end_io(struct kiocb *iocb,
 			     loff_t offset,
 			     ssize_t bytes,
-			     void *private)
+			     void *private,
+			     int ret,
+			     bool is_async)
 {
 	struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
 	int level;
@@ -623,6 +625,9 @@ static void ocfs2_dio_end_io(struct kiocb *iocb,
 	if (!level)
 		up_read(&inode->i_alloc_sem);
 	ocfs2_rw_unlock(inode, level);
+
+	if (is_async)
+		aio_complete(iocb, ret, 0);
 }
 
 /*
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 34640d6dbdc..5895aaf62ac 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -1599,7 +1599,9 @@ xfs_end_io_direct(
 	struct kiocb	*iocb,
 	loff_t		offset,
 	ssize_t		size,
-	void		*private)
+	void		*private,
+	int		ret,
+	bool		is_async)
 {
 	xfs_ioend_t	*ioend = iocb->private;
 
@@ -1645,6 +1647,9 @@ xfs_end_io_direct(
 	 * against double-freeing.
 	 */
 	iocb->private = NULL;
+
+	if (is_async)
+		aio_complete(iocb, ret, 0);
 }
 
 STATIC ssize_t
diff --git a/fs/xfs/linux-2.6/xfs_aops.h b/fs/xfs/linux-2.6/xfs_aops.h
index 4cfc6ea87df..9f566d92ae3 100644
--- a/fs/xfs/linux-2.6/xfs_aops.h
+++ b/fs/xfs/linux-2.6/xfs_aops.h
@@ -37,6 +37,8 @@ typedef struct xfs_ioend {
 	size_t			io_size;	/* size of the extent */
 	xfs_off_t		io_offset;	/* offset in the file */
 	struct work_struct	io_work;	/* xfsdatad work queue */
+	struct kiocb		*io_iocb;
+	int			io_result;
 } xfs_ioend_t;
 
 extern const struct address_space_operations xfs_address_space_operations;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 471e1ff5079..a0912f6075e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -415,7 +415,8 @@ struct buffer_head;
 typedef int (get_block_t)(struct inode *inode, sector_t iblock,
 			struct buffer_head *bh_result, int create);
 typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
-			ssize_t bytes, void *private);
+			ssize_t bytes, void *private, int ret,
+			bool is_async);
 
 /*
  * Attribute flags.  These should be or-ed together to figure out what
-- 
cgit v1.2.3-70-g09d2


From 5d4a2e29fba5b2bef95b96a46b338ec4d76fa4fd Mon Sep 17 00:00:00 2001
From: Joe Eykholt <jeykholt@cisco.com>
Date: Fri, 11 Jun 2010 16:43:59 -0700
Subject: [SCSI] fcoe: clean up TBD comments in FCoE prototype header

Some old comments in fc_fcoe.h say TBD long after the
standard has been passed by T11.  Clean them up.

Signed-off-by: Joe Eykholt <jeykholt@cisco.com>
Signed-off-by: Robert Love <robert.w.love@intel.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 include/scsi/fc/fc_fcoe.h | 15 ++++-----------
 1 file changed, 4 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/scsi/fc/fc_fcoe.h b/include/scsi/fc/fc_fcoe.h
index e6ad3d2ae47..d5dcd606281 100644
--- a/include/scsi/fc/fc_fcoe.h
+++ b/include/scsi/fc/fc_fcoe.h
@@ -22,23 +22,18 @@
 
 /*
  * FCoE - Fibre Channel over Ethernet.
+ * See T11 FC-BB-5 Rev 2.00 (09-056v5.pdf)
  */
 
 /*
- * FC_FCOE_OUI hasn't been standardized yet.   XXX TBD.
+ * Default FC_FCOE_OUI / FC-MAP value.
  */
-#ifndef FC_FCOE_OUI
-#define	FC_FCOE_OUI	0x0efc00	/* upper 24 bits of FCOE dest MAC TBD */
-#endif
+#define	FC_FCOE_OUI	0x0efc00	/* upper 24 bits of FCOE MAC */
 
 /*
- * The destination MAC address for the fabric login may get a different OUI.
- * This isn't standardized yet.
+ * Fabric Login (FLOGI) MAC for non-FIP use.  Non-FIP use is deprecated.
  */
-#ifndef FC_FCOE_FLOGI_MAC
-/* gateway MAC - TBD */
 #define	FC_FCOE_FLOGI_MAC { 0x0e, 0xfc, 0x00, 0xff, 0xff, 0xfe }
-#endif
 
 #define	FC_FCOE_VER	0			/* version */
 
@@ -51,8 +46,6 @@
 
 /*
  * FCoE frame header - 14 bytes
- *
- * This is the August 2007 version of the FCoE header as defined by T11.
  * This follows the VLAN header, which includes the ethertype.
  */
 struct fcoe_hdr {
-- 
cgit v1.2.3-70-g09d2


From f8fc6c2c99b8085368119d6cf39b997255052826 Mon Sep 17 00:00:00 2001
From: Bhanu Prakash Gollapudi <bprakash@broadcom.com>
Date: Fri, 11 Jun 2010 16:44:04 -0700
Subject: [SCSI] libfc: Handle unsolicited PRLO request

Resubmitting after incorporating Joe's review comment.

Unsolicited PRLO request is now handled by sending LS_ACC,
and then relogin to the remote port if an N-port login
session exists for that remote port.

Note that this patch should be applied on top of Joe Eykholt's
"Fix remote port restart problem" patch.

Signed-off-by: Bhanu Prakash Gollapudi <bprakash@broadcom.com>
Signed-off-by: Robert Love <robert.w.love@intel.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 drivers/scsi/libfc/fc_rport.c | 71 ++++++++++++++++++++++++++++++++++++++-----
 include/scsi/fc/fc_els.h      |  9 ++++++
 2 files changed, 72 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/libfc/fc_rport.c b/drivers/scsi/libfc/fc_rport.c
index e33c5c7961a..df85e19079f 100644
--- a/drivers/scsi/libfc/fc_rport.c
+++ b/drivers/scsi/libfc/fc_rport.c
@@ -1573,30 +1573,85 @@ drop:
  * fc_rport_recv_prlo_req() - Handler for process logout (PRLO) requests
  * @rdata: The remote port that sent the PRLO request
  * @sp:	   The sequence that the PRLO was on
- * @fp:	   The PRLO request frame
+ * @rx_fp: The PRLO request frame
  *
  * Locking Note: The rport lock is exected to be held before calling
  * this function.
  */
 static void fc_rport_recv_prlo_req(struct fc_rport_priv *rdata,
 				   struct fc_seq *sp,
-				   struct fc_frame *fp)
+				   struct fc_frame *rx_fp)
 {
 	struct fc_lport *lport = rdata->local_port;
-
 	struct fc_frame_header *fh;
+	struct fc_exch *ep;
+	struct fc_frame *fp;
+	struct {
+		struct fc_els_prlo prlo;
+		struct fc_els_spp spp;
+	} *pp;
+	struct fc_els_spp *rspp;	/* request service param page */
+	struct fc_els_spp *spp;		/* response spp */
+	unsigned int len;
+	unsigned int plen;
+	u32 f_ctl;
 	struct fc_seq_els_data rjt_data;
 
-	fh = fc_frame_header_get(fp);
+	rjt_data.fp = NULL;
+	fh = fc_frame_header_get(rx_fp);
 
 	FC_RPORT_DBG(rdata, "Received PRLO request while in state %s\n",
 		     fc_rport_state(rdata));
 
-	rjt_data.fp = NULL;
-	rjt_data.reason = ELS_RJT_UNAB;
-	rjt_data.explan = ELS_EXPL_NONE;
+	len = fr_len(rx_fp) - sizeof(*fh);
+	pp = fc_frame_payload_get(rx_fp, sizeof(*pp));
+	if (!pp)
+		goto reject_len;
+	plen = ntohs(pp->prlo.prlo_len);
+	if (plen != 20)
+		goto reject_len;
+	if (plen < len)
+		len = plen;
+
+	rspp = &pp->spp;
+
+	fp = fc_frame_alloc(lport, len);
+	if (!fp) {
+		rjt_data.reason = ELS_RJT_UNAB;
+		rjt_data.explan = ELS_EXPL_INSUF_RES;
+		goto reject;
+	}
+
+	sp = lport->tt.seq_start_next(sp);
+	WARN_ON(!sp);
+	pp = fc_frame_payload_get(fp, len);
+	WARN_ON(!pp);
+	memset(pp, 0, len);
+	pp->prlo.prlo_cmd = ELS_LS_ACC;
+	pp->prlo.prlo_obs = 0x10;
+	pp->prlo.prlo_len = htons(len);
+	spp = &pp->spp;
+	spp->spp_type = rspp->spp_type;
+	spp->spp_type_ext = rspp->spp_type_ext;
+	spp->spp_flags = FC_SPP_RESP_ACK;
+
+	fc_rport_enter_delete(rdata, RPORT_EV_LOGO);
+
+	f_ctl = FC_FC_EX_CTX | FC_FC_LAST_SEQ;
+	f_ctl |= FC_FC_END_SEQ | FC_FC_SEQ_INIT;
+	ep = fc_seq_exch(sp);
+	fc_fill_fc_hdr(fp, FC_RCTL_ELS_REP, ep->did, ep->sid,
+		       FC_TYPE_ELS, f_ctl, 0);
+	lport->tt.seq_send(lport, sp, fp);
+	goto drop;
+
+reject_len:
+	rjt_data.reason = ELS_RJT_PROT;
+	rjt_data.explan = ELS_EXPL_INV_LEN;
+reject:
 	lport->tt.seq_els_rsp_send(sp, ELS_LS_RJT, &rjt_data);
-	fc_frame_free(fp);
+drop:
+	fc_frame_free(rx_fp);
 }
 
 /**
diff --git a/include/scsi/fc/fc_els.h b/include/scsi/fc/fc_els.h
index f94328132a2..70a7e92a766 100644
--- a/include/scsi/fc/fc_els.h
+++ b/include/scsi/fc/fc_els.h
@@ -404,6 +404,15 @@ struct fc_els_prli {
 	/* service parameter pages follow */
 };
 
+/*
+ * ELS_PRLO - Process logout request and response.
+ */
+struct fc_els_prlo {
+	__u8            prlo_cmd;       /* command */
+	__u8            prlo_obs;       /* obsolete, but shall be set to 10h */
+	__be16          prlo_len;       /* payload length */
+};
+
 /*
  * ELS_ADISC payload
  */
-- 
cgit v1.2.3-70-g09d2


From 4b2164d4d212e437c9f080023a67f8f9356d2c4c Mon Sep 17 00:00:00 2001
From: Joe Eykholt <jeykholt@cisco.com>
Date: Fri, 11 Jun 2010 16:44:51 -0700
Subject: [SCSI] libfc: Fix remote port restart problem

This patch somewhat combines two fixes to remote port handing in libfc.

The first problem was that rport work could be queued on a deleted
and freed rport.  This is handled by not resetting rdata->event
ton NONE if the rdata is about to be deleted.

However, that fix led to the second problem, described by
Bhanu Gollapudi, as follows:
> Here is the sequence of events. T1 is first LOGO receive thread, T2 is
> fc_rport_work() scheduled by T1 and T3 is second LOGO receive thread and
> T4 is fc_rport_work scheduled by T3.
>
> 1. (T1)Received 1st LOGO in state Ready
> 2. (T1)Delete port & enter to RESTART state.
> 3. (T1)schdule event_work, since event is RPORT_EV_NONE.
> 4. (T1)set event = RPORT_EV_LOGO
> 5. (T1)Enter RESTART state as disc_id is set.
> 6. (T2)remember to PLOGI, and set event = RPORT_EV_NONE
> 6. (T3)Received 2nd LOGO
> 7. (T3)Delete Port & enter to RESTART state.
> 8. (T3)schedule event_work, since event is RPORT_EV_NONE.
> 9. (T3)Enter RESTART state as disc_id is set.
> 9. (T3)set event = RPORT_EV_LOGO
> 10.(T2)work restart, enter PLOGI state and issues PLOGI
> 11.(T4)Since state is not RESTART anymore, restart is not set, and the
> event is not reset to RPORT_EV_NONE. (current event is RPORT_EV_LOGO).
> 12. Now, PLOGI succeeds and fc_rport_enter_ready() will not schedule
> event_work, and hence the rport will never be created, eventually losing
> the target after dev_loss_tmo.

So, the problem here is that we were tracking the desire for
the rport be restarted by state RESTART, which was otherwise
equivalent to DELETE.  A contributing factor is that we dropped
the lock between steps 6 and 10 in thread T2, which allows the
state to change, and we didn't completely re-evaluate then.

This is hopefully corrected by the following minor redesign:

Simplify the rport restart logic by making the decision to
restart after deleting the transport rport.  That decision
is based on a new STARTED flag that indicates fc_rport_login()
has been called and fc_rport_logoff() has not been called
since then.  This replaces the need for the RESTART state.

Only restart if the rdata is still in DELETED state
and only if it still has the STARTED flag set.

Also now, since we clear the event code much later in the
work thread, allow for the possibility that the rport may
have become READY again via incoming PLOGI, and if so,
queue another event to handle that.

In the problem scenario, the second LOGO received will
cause the LOGO event to occur again.

Reported-by: Bhanu Gollapudi <bprakash@broadcom.com>
Signed-off-by: Joe Eykholt <jeykholt@cisco.com>
Signed-off-by: Robert Love <robert.w.love@intel.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 drivers/scsi/libfc/fc_rport.c | 75 ++++++++++++++++++-------------------------
 include/scsi/libfc.h          |  5 ++-
 2 files changed, 33 insertions(+), 47 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/libfc/fc_rport.c b/drivers/scsi/libfc/fc_rport.c
index df85e19079f..d385efc68c1 100644
--- a/drivers/scsi/libfc/fc_rport.c
+++ b/drivers/scsi/libfc/fc_rport.c
@@ -89,7 +89,6 @@ static const char *fc_rport_state_names[] = {
 	[RPORT_ST_LOGO] = "LOGO",
 	[RPORT_ST_ADISC] = "ADISC",
 	[RPORT_ST_DELETE] = "Delete",
-	[RPORT_ST_RESTART] = "Restart",
 };
 
 /**
@@ -246,7 +245,6 @@ static void fc_rport_work(struct work_struct *work)
 	struct fc_rport_operations *rport_ops;
 	struct fc_rport_identifiers ids;
 	struct fc_rport *rport;
-	int restart = 0;
 
 	mutex_lock(&rdata->rp_mutex);
 	event = rdata->event;
@@ -298,24 +296,6 @@ static void fc_rport_work(struct work_struct *work)
 		port_id = rdata->ids.port_id;
 		mutex_unlock(&rdata->rp_mutex);
 
-		if (port_id != FC_FID_DIR_SERV) {
-			/*
-			 * We must drop rp_mutex before taking disc_mutex.
-			 * Re-evaluate state to allow for restart.
-			 * A transition to RESTART state must only happen
-			 * while disc_mutex is held and rdata is on the list.
-			 */
-			mutex_lock(&lport->disc.disc_mutex);
-			mutex_lock(&rdata->rp_mutex);
-			if (rdata->rp_state == RPORT_ST_RESTART)
-				restart = 1;
-			else
-				list_del(&rdata->peers);
-			rdata->event = RPORT_EV_NONE;
-			mutex_unlock(&rdata->rp_mutex);
-			mutex_unlock(&lport->disc.disc_mutex);
-		}
-
 		if (rport_ops && rport_ops->event_callback) {
 			FC_RPORT_DBG(rdata, "callback ev %d\n", event);
 			rport_ops->event_callback(lport, rdata, event);
@@ -336,13 +316,34 @@ static void fc_rport_work(struct work_struct *work)
 			mutex_unlock(&rdata->rp_mutex);
 			fc_remote_port_delete(rport);
 		}
-		if (restart) {
-			mutex_lock(&rdata->rp_mutex);
-			FC_RPORT_DBG(rdata, "work restart\n");
-			fc_rport_enter_plogi(rdata);
+
+		mutex_lock(&lport->disc.disc_mutex);
+		mutex_lock(&rdata->rp_mutex);
+		if (rdata->rp_state == RPORT_ST_DELETE) {
+			if (port_id == FC_FID_DIR_SERV) {
+				rdata->event = RPORT_EV_NONE;
+				mutex_unlock(&rdata->rp_mutex);
+			} else if (rdata->flags & FC_RP_STARTED) {
+				rdata->event = RPORT_EV_NONE;
+				FC_RPORT_DBG(rdata, "work restart\n");
+				fc_rport_enter_plogi(rdata);
+				mutex_unlock(&rdata->rp_mutex);
+			} else {
+				FC_RPORT_DBG(rdata, "work delete\n");
+				list_del(&rdata->peers);
+				mutex_unlock(&rdata->rp_mutex);
+				kref_put(&rdata->kref, lport->tt.rport_destroy);
+			}
+		} else {
+			/*
+			 * Re-open for events.  Reissue READY event if ready.
+			 */
+			rdata->event = RPORT_EV_NONE;
+			if (rdata->rp_state == RPORT_ST_READY)
+				fc_rport_enter_ready(rdata);
 			mutex_unlock(&rdata->rp_mutex);
-		} else
-			kref_put(&rdata->kref, lport->tt.rport_destroy);
+		}
+		mutex_unlock(&lport->disc.disc_mutex);
 		break;
 
 	default:
@@ -367,16 +368,14 @@ int fc_rport_login(struct fc_rport_priv *rdata)
 {
 	mutex_lock(&rdata->rp_mutex);
 
+	rdata->flags |= FC_RP_STARTED;
 	switch (rdata->rp_state) {
 	case RPORT_ST_READY:
 		FC_RPORT_DBG(rdata, "ADISC port\n");
 		fc_rport_enter_adisc(rdata);
 		break;
-	case RPORT_ST_RESTART:
-		break;
 	case RPORT_ST_DELETE:
 		FC_RPORT_DBG(rdata, "Restart deleted port\n");
-		fc_rport_state_enter(rdata, RPORT_ST_RESTART);
 		break;
 	default:
 		FC_RPORT_DBG(rdata, "Login to port\n");
@@ -431,15 +430,12 @@ int fc_rport_logoff(struct fc_rport_priv *rdata)
 
 	FC_RPORT_DBG(rdata, "Remove port\n");
 
+	rdata->flags &= ~FC_RP_STARTED;
 	if (rdata->rp_state == RPORT_ST_DELETE) {
 		FC_RPORT_DBG(rdata, "Port in Delete state, not removing\n");
 		goto out;
 	}
-
-	if (rdata->rp_state == RPORT_ST_RESTART)
-		FC_RPORT_DBG(rdata, "Port in Restart state, deleting\n");
-	else
-		fc_rport_enter_logo(rdata);
+	fc_rport_enter_logo(rdata);
 
 	/*
 	 * Change the state to Delete so that we discard
@@ -503,7 +499,6 @@ static void fc_rport_timeout(struct work_struct *work)
 	case RPORT_ST_READY:
 	case RPORT_ST_INIT:
 	case RPORT_ST_DELETE:
-	case RPORT_ST_RESTART:
 		break;
 	}
 
@@ -527,6 +522,7 @@ static void fc_rport_error(struct fc_rport_priv *rdata, struct fc_frame *fp)
 	switch (rdata->rp_state) {
 	case RPORT_ST_PLOGI:
 	case RPORT_ST_LOGO:
+		rdata->flags &= ~FC_RP_STARTED;
 		fc_rport_enter_delete(rdata, RPORT_EV_FAILED);
 		break;
 	case RPORT_ST_RTV:
@@ -537,7 +533,6 @@ static void fc_rport_error(struct fc_rport_priv *rdata, struct fc_frame *fp)
 		fc_rport_enter_logo(rdata);
 		break;
 	case RPORT_ST_DELETE:
-	case RPORT_ST_RESTART:
 	case RPORT_ST_READY:
 	case RPORT_ST_INIT:
 		break;
@@ -1392,7 +1387,6 @@ static void fc_rport_recv_plogi_req(struct fc_lport *lport,
 		break;
 	case RPORT_ST_DELETE:
 	case RPORT_ST_LOGO:
-	case RPORT_ST_RESTART:
 		FC_RPORT_DBG(rdata, "Received PLOGI in state %s - send busy\n",
 			     fc_rport_state(rdata));
 		mutex_unlock(&rdata->rp_mutex);
@@ -1684,13 +1678,6 @@ static void fc_rport_recv_logo_req(struct fc_lport *lport,
 			     fc_rport_state(rdata));
 
 		fc_rport_enter_delete(rdata, RPORT_EV_LOGO);
-
-		/*
-		 * If the remote port was created due to discovery, set state
-		 * to log back in.  It may have seen a stale RSCN about us.
-		 */
-		if (rdata->disc_id)
-			fc_rport_state_enter(rdata, RPORT_ST_RESTART);
 		mutex_unlock(&rdata->rp_mutex);
 	} else
 		FC_RPORT_ID_DBG(lport, sid,
diff --git a/include/scsi/libfc.h b/include/scsi/libfc.h
index 7495c0ba67e..db54c4a2d14 100644
--- a/include/scsi/libfc.h
+++ b/include/scsi/libfc.h
@@ -104,7 +104,6 @@ enum fc_disc_event {
  * @RPORT_ST_LOGO:    Remote port logout (LOGO) sent
  * @RPORT_ST_ADISC:   Discover Address sent
  * @RPORT_ST_DELETE:  Remote port being deleted
- * @RPORT_ST_RESTART: Remote port being deleted and will restart
 */
 enum fc_rport_state {
 	RPORT_ST_INIT,
@@ -115,7 +114,6 @@ enum fc_rport_state {
 	RPORT_ST_LOGO,
 	RPORT_ST_ADISC,
 	RPORT_ST_DELETE,
-	RPORT_ST_RESTART,
 };
 
 /**
@@ -173,6 +171,7 @@ struct fc_rport_libfc_priv {
 	u16			   flags;
 	#define FC_RP_FLAGS_REC_SUPPORTED	(1 << 0)
 	#define FC_RP_FLAGS_RETRY		(1 << 1)
+	#define FC_RP_STARTED			(1 << 2)
 	unsigned int	           e_d_tov;
 	unsigned int	           r_a_tov;
 };
@@ -185,7 +184,7 @@ struct fc_rport_libfc_priv {
  * @rp_state:       Enumeration that tracks progress of PLOGI, PRLI,
  *                  and RTV exchanges
  * @ids:            The remote port identifiers and roles
- * @flags:          REC and RETRY supported flags
+ * @flags:          STARTED, REC and RETRY_SUPPORTED flags
  * @max_seq:        Maximum number of concurrent sequences
  * @disc_id:        The discovery identifier
  * @maxframe_size:  The maximum frame size
-- 
cgit v1.2.3-70-g09d2


From f034260db330bb3ffc815fcb682b1c84aca09591 Mon Sep 17 00:00:00 2001
From: Joe Eykholt <jeykholt@cisco.com>
Date: Fri, 11 Jun 2010 16:44:57 -0700
Subject: [SCSI] libfc: fix indefinite rport restart

Remote ports were restarting indefinitely after getting
rejects in PRLI.

Fix by adding a counter of restarts and limiting that with
the port login retry limit as well.

Signed-off-by: Joe Eykholt <jeykholt@cisco.com>
Signed-off-by: Robert Love <robert.w.love@intel.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 drivers/scsi/libfc/fc_rport.c | 6 +++++-
 include/scsi/libfc.h          | 2 ++
 2 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/scsi/libfc/fc_rport.c b/drivers/scsi/libfc/fc_rport.c
index d385efc68c1..363cde30c94 100644
--- a/drivers/scsi/libfc/fc_rport.c
+++ b/drivers/scsi/libfc/fc_rport.c
@@ -257,6 +257,7 @@ static void fc_rport_work(struct work_struct *work)
 	case RPORT_EV_READY:
 		ids = rdata->ids;
 		rdata->event = RPORT_EV_NONE;
+		rdata->major_retries = 0;
 		kref_get(&rdata->kref);
 		mutex_unlock(&rdata->rp_mutex);
 
@@ -323,7 +324,10 @@ static void fc_rport_work(struct work_struct *work)
 			if (port_id == FC_FID_DIR_SERV) {
 				rdata->event = RPORT_EV_NONE;
 				mutex_unlock(&rdata->rp_mutex);
-			} else if (rdata->flags & FC_RP_STARTED) {
+			} else if ((rdata->flags & FC_RP_STARTED) &&
+				   rdata->major_retries <
+				   lport->max_rport_retry_count) {
+				rdata->major_retries++;
 				rdata->event = RPORT_EV_NONE;
 				FC_RPORT_DBG(rdata, "work restart\n");
 				fc_rport_enter_plogi(rdata);
diff --git a/include/scsi/libfc.h b/include/scsi/libfc.h
index db54c4a2d14..6d78df77dab 100644
--- a/include/scsi/libfc.h
+++ b/include/scsi/libfc.h
@@ -189,6 +189,7 @@ struct fc_rport_libfc_priv {
  * @disc_id:        The discovery identifier
  * @maxframe_size:  The maximum frame size
  * @retries:        The retry count for the current state
+ * @major_retries:  The retry count for the entire PLOGI/PRLI state machine
  * @e_d_tov:        Error detect timeout value (in msec)
  * @r_a_tov:        Resource allocation timeout value (in msec)
  * @rp_mutex:       The mutex that protects the remote port
@@ -206,6 +207,7 @@ struct fc_rport_priv {
 	u16			    disc_id;
 	u16			    maxframe_size;
 	unsigned int	            retries;
+	unsigned int	            major_retries;
 	unsigned int	            e_d_tov;
 	unsigned int	            r_a_tov;
 	struct mutex                rp_mutex;
-- 
cgit v1.2.3-70-g09d2


From e1db74fcc3d95c8a051ec457241b5aa65a01a603 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Mon, 26 Jul 2010 14:39:57 -0700
Subject: include/net/cfg80211.h: Add wiphy_<level> printk equivalents

Simplify logging messages for wiphy devices

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/cfg80211.h | 64 +++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 63 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 4d1f19d7079..7fe774c2d43 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1416,7 +1416,7 @@ static inline struct device *wiphy_dev(struct wiphy *wiphy)
  *
  * @wiphy: The wiphy whose name to return
  */
-static inline const char *wiphy_name(struct wiphy *wiphy)
+static inline const char *wiphy_name(const struct wiphy *wiphy)
 {
 	return dev_name(&wiphy->dev);
 }
@@ -2420,4 +2420,66 @@ void cfg80211_cqm_rssi_notify(struct net_device *dev,
 			      enum nl80211_cqm_rssi_threshold_event rssi_event,
 			      gfp_t gfp);
 
+#ifdef __KERNEL__
+
+/* Logging, debugging and troubleshooting/diagnostic helpers. */
+
+/* wiphy_printk helpers, similar to dev_printk */
+
+#define wiphy_printk(level, wiphy, format, args...)		\
+	printk(level "%s: " format, wiphy_name(wiphy), ##args)
+#define wiphy_emerg(wiphy, format, args...)			\
+	wiphy_printk(KERN_EMERG, wiphy, format, ##args)
+#define wiphy_alert(wiphy, format, args...)			\
+	wiphy_printk(KERN_ALERT, wiphy, format, ##args)
+#define wiphy_crit(wiphy, format, args...)			\
+	wiphy_printk(KERN_CRIT, wiphy, format, ##args)
+#define wiphy_err(wiphy, format, args...)			\
+	wiphy_printk(KERN_ERR, wiphy, format, ##args)
+#define wiphy_warn(wiphy, format, args...)			\
+	wiphy_printk(KERN_WARNING, wiphy, format, ##args)
+#define wiphy_notice(wiphy, format, args...)			\
+	wiphy_printk(KERN_NOTICE, wiphy, format, ##args)
+#define wiphy_info(wiphy, format, args...)			\
+	wiphy_printk(KERN_INFO, wiphy, format, ##args)
+#define wiphy_debug(wiphy, format, args...)			\
+	wiphy_printk(KERN_DEBUG, wiphy, format, ##args)
+
+#if defined(DEBUG)
+#define wiphy_dbg(wiphy, format, args...)			\
+	wiphy_printk(KERN_DEBUG, wiphy, format, ##args)
+#elif defined(CONFIG_DYNAMIC_DEBUG)
+#define wiphy_dbg(wiphy, format, args...)			\
+	dynamic_pr_debug("%s: " format,	wiphy_name(dev), ##args)
+#else
+#define wiphy_dbg(wiphy, format, args...)				\
+({									\
+	if (0)								\
+		wiphy_printk(KERN_DEBUG, wiphy, format, ##args);	\
+	0;								\
+})
+#endif
+
+#if defined(VERBOSE_DEBUG)
+#define wiphy_vdbg	wiphy_dbg
+#else
+
+#define wiphy_vdbg(wiphy, format, args...)				\
+({									\
+	if (0)								\
+		wiphy_printk(KERN_DEBUG, wiphy, format, ##args);	\
+		0;							\
+})
+#endif
+
+/*
+ * wiphy_WARN() acts like wiphy_printk(), but with the key difference
+ * of using a WARN/WARN_ON to get the message out, including the
+ * file/line information and a backtrace.
+ */
+#define wiphy_WARN(wiphy, format, args...)			\
+	WARN(1, "wiphy: %s\n" format, wiphy_name(wiphy), ##args);
+
+#endif
+
 #endif /* __NET_CFG80211_H */
-- 
cgit v1.2.3-70-g09d2


From 073730d771d97bb5bbef080bd5d6d0a5af7cba7d Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Mon, 26 Jul 2010 14:40:00 -0700
Subject: wireless: Convert wiphy_debug macro to function

Save a few bytes of text

(allyesconfig)
$ size drivers/net/wireless/built-in.o*
   text	   data	    bss	    dec	    hex	filename
3924568	 100548	 871056	4896172	 4ab5ac	drivers/net/wireless/built-in.o.new
3926520	 100548	 871464	4898532	 4abee4	drivers/net/wireless/built-in.o.old

$ size net/wireless/core.o*
   text	   data	    bss	    dec	    hex	filename
  12843	    216	   3768	  16827	   41bb	net/wireless/core.o.new
  12328	    216	   3656	  16200	   3f48	net/wireless/core.o

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/cfg80211.h |  5 +++--
 net/wireless/core.c    | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 52 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 7fe774c2d43..ae80f8fb17f 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -2442,8 +2442,9 @@ void cfg80211_cqm_rssi_notify(struct net_device *dev,
 	wiphy_printk(KERN_NOTICE, wiphy, format, ##args)
 #define wiphy_info(wiphy, format, args...)			\
 	wiphy_printk(KERN_INFO, wiphy, format, ##args)
-#define wiphy_debug(wiphy, format, args...)			\
-	wiphy_printk(KERN_DEBUG, wiphy, format, ##args)
+
+int wiphy_debug(const struct wiphy *wiphy, const char *format, ...)
+	__attribute__ ((format (printf, 2, 3)));
 
 #if defined(DEBUG)
 #define wiphy_dbg(wiphy, format, args...)			\
diff --git a/net/wireless/core.c b/net/wireless/core.c
index f65c6494ede..541e2fff5e9 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -907,3 +907,52 @@ static void __exit cfg80211_exit(void)
 	destroy_workqueue(cfg80211_wq);
 }
 module_exit(cfg80211_exit);
+
+static int ___wiphy_printk(const char *level, const struct wiphy *wiphy,
+			   struct va_format *vaf)
+{
+	if (!wiphy)
+		return printk("%s(NULL wiphy *): %pV", level, vaf);
+
+	return printk("%s%s: %pV", level, wiphy_name(wiphy), vaf);
+}
+
+int __wiphy_printk(const char *level, const struct wiphy *wiphy,
+		   const char *fmt, ...)
+{
+	struct va_format vaf;
+	va_list args;
+	int r;
+
+	va_start(args, fmt);
+
+	vaf.fmt = fmt;
+	vaf.va = &args;
+
+	r = ___wiphy_printk(level, wiphy, &vaf);
+	va_end(args);
+
+	return r;
+}
+EXPORT_SYMBOL(__wiphy_printk);
+
+#define define_wiphy_printk_level(func, kern_level)		\
+int func(const struct wiphy *wiphy, const char *fmt, ...)	\
+{								\
+	struct va_format vaf;					\
+	va_list args;						\
+	int r;							\
+								\
+	va_start(args, fmt);					\
+								\
+	vaf.fmt = fmt;						\
+	vaf.va = &args;						\
+								\
+	r = ___wiphy_printk(kern_level, wiphy, &vaf);		\
+	va_end(args);						\
+								\
+	return r;						\
+}								\
+EXPORT_SYMBOL(func);
+
+define_wiphy_printk_level(wiphy_debug, KERN_DEBUG);
-- 
cgit v1.2.3-70-g09d2


From e73439d8c0e4c522c843b8bb98c0eb5700da6b05 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Mon, 26 Jul 2010 10:06:00 -0400
Subject: Bluetooth: Defer SCO setup if mode change is pending

Certain headsets such as the Motorola H350 will reject SCO and eSCO
connection requests while the ACL is transitioning from sniff mode
to active mode. Add synchronization so that SCO and eSCO connection
requests will wait until the ACL has fully transitioned to active mode.

< HCI Command: Exit Sniff Mode (0x02|0x0004) plen 2
    handle 12
> HCI Event: Command Status (0x0f) plen 4
    Exit Sniff Mode (0x02|0x0004) status 0x00 ncmd 1
< HCI Command:  Setup Synchronous Connection (0x01|0x0028) plen 17
    handle 12 voice setting 0x0040
> HCI Event: Command Status (0x0f) plen 4
    Setup Synchronous Connection (0x01|0x0028) status 0x00 ncmd 1
> HCI Event: Number of Completed Packets (0x13) plen 5
    handle 12 packets 1
> HCI Event: Mode Change (0x14) plen 6
    status 0x00 handle 12 mode 0x00 interval 0
    Mode: Active
> HCI Event: Synchronous Connect Complete (0x2c) plen 17
    status 0x10 handle 14 bdaddr 00:1A:0E:50:28:A4 type SCO
    Error: Connection Accept Timeout Exceeded

Signed-off-by: Ron Shaffer <rshaffer@codeaurora.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h |  2 ++
 net/bluetooth/hci_conn.c         | 32 ++++++++++++++++++++++++++++----
 net/bluetooth/hci_event.c        | 31 +++++++++++++++----------------
 3 files changed, 45 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 350b3e6964b..8b28962e737 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -256,6 +256,7 @@ enum {
 	HCI_CONN_ENCRYPT_PEND,
 	HCI_CONN_RSWITCH_PEND,
 	HCI_CONN_MODE_CHANGE_PEND,
+	HCI_CONN_SCO_SETUP_PEND,
 };
 
 static inline void hci_conn_hash_init(struct hci_dev *hdev)
@@ -336,6 +337,7 @@ void hci_acl_connect(struct hci_conn *conn);
 void hci_acl_disconn(struct hci_conn *conn, __u8 reason);
 void hci_add_sco(struct hci_conn *conn, __u16 handle);
 void hci_setup_sync(struct hci_conn *conn, __u16 handle);
+void hci_sco_setup(struct hci_conn *conn, __u8 status);
 
 struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst);
 int hci_conn_del(struct hci_conn *conn);
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index e9fef83449f..0b1e460fe44 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -155,6 +155,27 @@ void hci_setup_sync(struct hci_conn *conn, __u16 handle)
 	hci_send_cmd(hdev, HCI_OP_SETUP_SYNC_CONN, sizeof(cp), &cp);
 }
 
+/* Device _must_ be locked */
+void hci_sco_setup(struct hci_conn *conn, __u8 status)
+{
+	struct hci_conn *sco = conn->link;
+
+	BT_DBG("%p", conn);
+
+	if (!sco)
+		return;
+
+	if (!status) {
+		if (lmp_esco_capable(conn->hdev))
+			hci_setup_sync(sco, conn->handle);
+		else
+			hci_add_sco(sco, conn->handle);
+	} else {
+		hci_proto_connect_cfm(sco, status);
+		hci_conn_del(sco);
+	}
+}
+
 static void hci_conn_timeout(unsigned long arg)
 {
 	struct hci_conn *conn = (void *) arg;
@@ -385,10 +406,13 @@ struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst, __u8
 		acl->power_save = 1;
 		hci_conn_enter_active_mode(acl);
 
-		if (lmp_esco_capable(hdev))
-			hci_setup_sync(sco, acl->handle);
-		else
-			hci_add_sco(sco, acl->handle);
+		if (test_bit(HCI_CONN_MODE_CHANGE_PEND, &acl->pend)) {
+			/* defer SCO setup until mode change completed */
+			set_bit(HCI_CONN_SCO_SETUP_PEND, &acl->pend);
+			return sco;
+		}
+
+		hci_sco_setup(acl, 0x00);
 	}
 
 	return sco;
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 2069c3b05fd..bfef5bae0b3 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -785,9 +785,13 @@ static void hci_cs_sniff_mode(struct hci_dev *hdev, __u8 status)
 	hci_dev_lock(hdev);
 
 	conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(cp->handle));
-	if (conn)
+	if (conn) {
 		clear_bit(HCI_CONN_MODE_CHANGE_PEND, &conn->pend);
 
+		if (test_and_clear_bit(HCI_CONN_SCO_SETUP_PEND, &conn->pend))
+			hci_sco_setup(conn, status);
+	}
+
 	hci_dev_unlock(hdev);
 }
 
@@ -808,9 +812,13 @@ static void hci_cs_exit_sniff_mode(struct hci_dev *hdev, __u8 status)
 	hci_dev_lock(hdev);
 
 	conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(cp->handle));
-	if (conn)
+	if (conn) {
 		clear_bit(HCI_CONN_MODE_CHANGE_PEND, &conn->pend);
 
+		if (test_and_clear_bit(HCI_CONN_SCO_SETUP_PEND, &conn->pend))
+			hci_sco_setup(conn, status);
+	}
+
 	hci_dev_unlock(hdev);
 }
 
@@ -915,20 +923,8 @@ static inline void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *s
 	} else
 		conn->state = BT_CLOSED;
 
-	if (conn->type == ACL_LINK) {
-		struct hci_conn *sco = conn->link;
-		if (sco) {
-			if (!ev->status) {
-				if (lmp_esco_capable(hdev))
-					hci_setup_sync(sco, conn->handle);
-				else
-					hci_add_sco(sco, conn->handle);
-			} else {
-				hci_proto_connect_cfm(sco, ev->status);
-				hci_conn_del(sco);
-			}
-		}
-	}
+	if (conn->type == ACL_LINK)
+		hci_sco_setup(conn, ev->status);
 
 	if (ev->status) {
 		hci_proto_connect_cfm(conn, ev->status);
@@ -1481,6 +1477,9 @@ static inline void hci_mode_change_evt(struct hci_dev *hdev, struct sk_buff *skb
 			else
 				conn->power_save = 0;
 		}
+
+		if (test_and_clear_bit(HCI_CONN_SCO_SETUP_PEND, &conn->pend))
+			hci_sco_setup(conn, ev->status);
 	}
 
 	hci_dev_unlock(hdev);
-- 
cgit v1.2.3-70-g09d2


From 94fe8c683cea97fe2c59a5f0dc206aa329c5763c Mon Sep 17 00:00:00 2001
From: Richard Röjfors <richard.rojfors@pelagicore.com>
Date: Tue, 27 Jul 2010 12:57:01 +0000
Subject: ks8842: Support DMA when accessed via timberdale
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch adds support for RX and TX DMA via the DMA API,
this is only supported when the KS8842 is accessed via timberdale.

There is no support for DMA on the generic bus interface it self,
a state machine inside the FPGA is handling RX and TX transfers to/from
buffers in the FPGA. The host CPU can do DMA to and from these buffers.

The FPGA has to handle the RX interrupts, so these must be enabled in
the ks8842 but not in the FPGA. The driver must not disable the RX interrupt
that would mean that the data transfers into the FPGA buffers would stop.

The host shall not enable TX interrupts since TX is handled by the FPGA,
the host is notified by DMA callbacks when transfers are finished.

Which DMA channels to use are added as parameters in the platform data struct.

Signed-off-by: Richard Röjfors <richard.rojfors@pelagicore.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ks8842.c   | 464 ++++++++++++++++++++++++++++++++++++++++++++++---
 include/linux/ks8842.h |   4 +
 2 files changed, 447 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ks8842.c b/drivers/net/ks8842.c
index 289b0bee346..3fe38c787f2 100644
--- a/drivers/net/ks8842.c
+++ b/drivers/net/ks8842.c
@@ -30,6 +30,9 @@
 #include <linux/etherdevice.h>
 #include <linux/ethtool.h>
 #include <linux/ks8842.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/scatterlist.h>
 
 #define DRV_NAME "ks8842"
 
@@ -82,6 +85,15 @@
 #define IRQ_RX_ERROR	0x0080
 #define ENABLED_IRQS	(IRQ_LINK_CHANGE | IRQ_TX | IRQ_RX | IRQ_RX_STOPPED | \
 		IRQ_TX_STOPPED | IRQ_RX_OVERRUN | IRQ_RX_ERROR)
+/* When running via timberdale in DMA mode, the RX interrupt should be
+   enabled in the KS8842, but not in the FPGA IP, since the IP handles
+   RX DMA internally.
+   TX interrupts are not needed it is handled by the FPGA the driver is
+   notified via DMA callbacks.
+*/
+#define ENABLED_IRQS_DMA_IP	(IRQ_LINK_CHANGE | IRQ_RX_STOPPED | \
+	IRQ_TX_STOPPED | IRQ_RX_OVERRUN | IRQ_RX_ERROR)
+#define ENABLED_IRQS_DMA	(ENABLED_IRQS_DMA_IP | IRQ_RX)
 #define REG_ISR		0x02
 #define REG_RXSR	0x04
 #define RXSR_VALID	0x8000
@@ -124,6 +136,28 @@
 #define	MICREL_KS884X		0x01	/* 0=Timeberdale(FPGA), 1=Micrel */
 #define	KS884X_16BIT		0x02	/*  1=16bit, 0=32bit */
 
+#define DMA_BUFFER_SIZE		2048
+
+struct ks8842_tx_dma_ctl {
+	struct dma_chan *chan;
+	struct dma_async_tx_descriptor *adesc;
+	void *buf;
+	struct scatterlist sg;
+	int channel;
+};
+
+struct ks8842_rx_dma_ctl {
+	struct dma_chan *chan;
+	struct dma_async_tx_descriptor *adesc;
+	struct sk_buff  *skb;
+	struct scatterlist sg;
+	struct tasklet_struct tasklet;
+	int channel;
+};
+
+#define KS8842_USE_DMA(adapter) (((adapter)->dma_tx.channel != -1) && \
+	 ((adapter)->dma_rx.channel != -1))
+
 struct ks8842_adapter {
 	void __iomem	*hw_addr;
 	int		irq;
@@ -132,8 +166,19 @@ struct ks8842_adapter {
 	spinlock_t	lock; /* spinlock to be interrupt safe */
 	struct work_struct timeout_work;
 	struct net_device *netdev;
+	struct device *dev;
+	struct ks8842_tx_dma_ctl	dma_tx;
+	struct ks8842_rx_dma_ctl	dma_rx;
 };
 
+static void ks8842_dma_rx_cb(void *data);
+static void ks8842_dma_tx_cb(void *data);
+
+static inline void ks8842_resume_dma(struct ks8842_adapter *adapter)
+{
+	iowrite32(1, adapter->hw_addr + REQ_TIMB_DMA_RESUME);
+}
+
 static inline void ks8842_select_bank(struct ks8842_adapter *adapter, u16 bank)
 {
 	iowrite16(bank, adapter->hw_addr + REG_SELECT_BANK);
@@ -297,8 +342,19 @@ static void ks8842_reset_hw(struct ks8842_adapter *adapter)
 	ks8842_write16(adapter, 18, 0xffff, REG_ISR);
 
 	/* enable interrupts */
-	ks8842_write16(adapter, 18, ENABLED_IRQS, REG_IER);
-
+	if (KS8842_USE_DMA(adapter)) {
+		/* When running in DMA Mode the RX interrupt is not enabled in
+		   timberdale because RX data is received by DMA callbacks
+		   it must still be enabled in the KS8842 because it indicates
+		   to timberdale when there is RX data for it's DMA FIFOs */
+		iowrite16(ENABLED_IRQS_DMA_IP, adapter->hw_addr + REG_TIMB_IER);
+		ks8842_write16(adapter, 18, ENABLED_IRQS_DMA, REG_IER);
+	} else {
+		if (!(adapter->conf_flags & MICREL_KS884X))
+			iowrite16(ENABLED_IRQS,
+				adapter->hw_addr + REG_TIMB_IER);
+		ks8842_write16(adapter, 18, ENABLED_IRQS, REG_IER);
+	}
 	/* enable the switch */
 	ks8842_write16(adapter, 32, 0x1, REG_SW_ID_AND_ENABLE);
 }
@@ -371,6 +427,53 @@ static inline u16 ks8842_tx_fifo_space(struct ks8842_adapter *adapter)
 	return ks8842_read16(adapter, 16, REG_TXMIR) & 0x1fff;
 }
 
+static int ks8842_tx_frame_dma(struct sk_buff *skb, struct net_device *netdev)
+{
+	struct ks8842_adapter *adapter = netdev_priv(netdev);
+	struct ks8842_tx_dma_ctl *ctl = &adapter->dma_tx;
+	u8 *buf = ctl->buf;
+
+	if (ctl->adesc) {
+		netdev_dbg(netdev, "%s: TX ongoing\n", __func__);
+		/* transfer ongoing */
+		return NETDEV_TX_BUSY;
+	}
+
+	sg_dma_len(&ctl->sg) = skb->len + sizeof(u32);
+
+	/* copy data to the TX buffer */
+	/* the control word, enable IRQ, port 1 and the length */
+	*buf++ = 0x00;
+	*buf++ = 0x01; /* Port 1 */
+	*buf++ = skb->len & 0xff;
+	*buf++ = (skb->len >> 8) & 0xff;
+	skb_copy_from_linear_data(skb, buf, skb->len);
+
+	dma_sync_single_range_for_device(adapter->dev,
+		sg_dma_address(&ctl->sg), 0, sg_dma_len(&ctl->sg),
+		DMA_TO_DEVICE);
+
+	/* make sure the length is a multiple of 4 */
+	if (sg_dma_len(&ctl->sg) % 4)
+		sg_dma_len(&ctl->sg) += 4 - sg_dma_len(&ctl->sg) % 4;
+
+	ctl->adesc = ctl->chan->device->device_prep_slave_sg(ctl->chan,
+		&ctl->sg, 1, DMA_TO_DEVICE,
+		DMA_PREP_INTERRUPT | DMA_COMPL_SKIP_SRC_UNMAP);
+	if (!ctl->adesc)
+		return NETDEV_TX_BUSY;
+
+	ctl->adesc->callback_param = netdev;
+	ctl->adesc->callback = ks8842_dma_tx_cb;
+	ctl->adesc->tx_submit(ctl->adesc);
+
+	netdev->stats.tx_bytes += skb->len;
+
+	dev_kfree_skb(skb);
+
+	return NETDEV_TX_OK;
+}
+
 static int ks8842_tx_frame(struct sk_buff *skb, struct net_device *netdev)
 {
 	struct ks8842_adapter *adapter = netdev_priv(netdev);
@@ -422,6 +525,121 @@ static int ks8842_tx_frame(struct sk_buff *skb, struct net_device *netdev)
 	return NETDEV_TX_OK;
 }
 
+static void ks8842_update_rx_err_counters(struct net_device *netdev, u32 status)
+{
+	netdev_dbg(netdev, "RX error, status: %x\n", status);
+
+	netdev->stats.rx_errors++;
+	if (status & RXSR_TOO_LONG)
+		netdev->stats.rx_length_errors++;
+	if (status & RXSR_CRC_ERROR)
+		netdev->stats.rx_crc_errors++;
+	if (status & RXSR_RUNT)
+		netdev->stats.rx_frame_errors++;
+}
+
+static void ks8842_update_rx_counters(struct net_device *netdev, u32 status,
+	int len)
+{
+	netdev_dbg(netdev, "RX packet, len: %d\n", len);
+
+	netdev->stats.rx_packets++;
+	netdev->stats.rx_bytes += len;
+	if (status & RXSR_MULTICAST)
+		netdev->stats.multicast++;
+}
+
+static int __ks8842_start_new_rx_dma(struct net_device *netdev)
+{
+	struct ks8842_adapter *adapter = netdev_priv(netdev);
+	struct ks8842_rx_dma_ctl *ctl = &adapter->dma_rx;
+	struct scatterlist *sg = &ctl->sg;
+	int err;
+
+	ctl->skb = netdev_alloc_skb(netdev, DMA_BUFFER_SIZE);
+	if (ctl->skb) {
+		sg_init_table(sg, 1);
+		sg_dma_address(sg) = dma_map_single(adapter->dev,
+			ctl->skb->data, DMA_BUFFER_SIZE, DMA_FROM_DEVICE);
+		err = dma_mapping_error(adapter->dev, sg_dma_address(sg));
+		if (unlikely(err)) {
+			sg_dma_address(sg) = 0;
+			goto out;
+		}
+
+		sg_dma_len(sg) = DMA_BUFFER_SIZE;
+
+		ctl->adesc = ctl->chan->device->device_prep_slave_sg(ctl->chan,
+			sg, 1, DMA_FROM_DEVICE,
+			DMA_PREP_INTERRUPT | DMA_COMPL_SKIP_SRC_UNMAP);
+
+		if (!ctl->adesc)
+			goto out;
+
+		ctl->adesc->callback_param = netdev;
+		ctl->adesc->callback = ks8842_dma_rx_cb;
+		ctl->adesc->tx_submit(ctl->adesc);
+	} else {
+		err = -ENOMEM;
+		sg_dma_address(sg) = 0;
+		goto out;
+	}
+
+	return err;
+out:
+	if (sg_dma_address(sg))
+		dma_unmap_single(adapter->dev, sg_dma_address(sg),
+			DMA_BUFFER_SIZE, DMA_FROM_DEVICE);
+	sg_dma_address(sg) = 0;
+	if (ctl->skb)
+		dev_kfree_skb(ctl->skb);
+
+	ctl->skb = NULL;
+
+	printk(KERN_ERR DRV_NAME": Failed to start RX DMA: %d\n", err);
+	return err;
+}
+
+static void ks8842_rx_frame_dma_tasklet(unsigned long arg)
+{
+	struct net_device *netdev = (struct net_device *)arg;
+	struct ks8842_adapter *adapter = netdev_priv(netdev);
+	struct ks8842_rx_dma_ctl *ctl = &adapter->dma_rx;
+	struct sk_buff *skb = ctl->skb;
+	dma_addr_t addr = sg_dma_address(&ctl->sg);
+	u32 status;
+
+	ctl->adesc = NULL;
+
+	/* kick next transfer going */
+	__ks8842_start_new_rx_dma(netdev);
+
+	/* now handle the data we got */
+	dma_unmap_single(adapter->dev, addr, DMA_BUFFER_SIZE, DMA_FROM_DEVICE);
+
+	status = *((u32 *)skb->data);
+
+	netdev_dbg(netdev, "%s - rx_data: status: %x\n",
+		__func__, status & 0xffff);
+
+	/* check the status */
+	if ((status & RXSR_VALID) && !(status & RXSR_ERROR)) {
+		int len = (status >> 16) & 0x7ff;
+
+		ks8842_update_rx_counters(netdev, status, len);
+
+		/* reserve 4 bytes which is the status word */
+		skb_reserve(skb, 4);
+		skb_put(skb, len);
+
+		skb->protocol = eth_type_trans(skb, netdev);
+		netif_rx(skb);
+	} else {
+		ks8842_update_rx_err_counters(netdev, status);
+		dev_kfree_skb(skb);
+	}
+}
+
 static void ks8842_rx_frame(struct net_device *netdev,
 	struct ks8842_adapter *adapter)
 {
@@ -445,13 +663,9 @@ static void ks8842_rx_frame(struct net_device *netdev,
 	if ((status & RXSR_VALID) && !(status & RXSR_ERROR)) {
 		struct sk_buff *skb = netdev_alloc_skb_ip_align(netdev, len);
 
-		netdev_dbg(netdev, "%s, got package, len: %d\n", __func__, len);
 		if (skb) {
 
-			netdev->stats.rx_packets++;
-			netdev->stats.rx_bytes += len;
-			if (status & RXSR_MULTICAST)
-				netdev->stats.multicast++;
+			ks8842_update_rx_counters(netdev, status, len);
 
 			if (adapter->conf_flags & KS884X_16BIT) {
 				u16 *data16 = (u16 *)skb_put(skb, len);
@@ -477,16 +691,8 @@ static void ks8842_rx_frame(struct net_device *netdev,
 			netif_rx(skb);
 		} else
 			netdev->stats.rx_dropped++;
-	} else {
-		netdev_dbg(netdev, "RX error, status: %x\n", status);
-		netdev->stats.rx_errors++;
-		if (status & RXSR_TOO_LONG)
-			netdev->stats.rx_length_errors++;
-		if (status & RXSR_CRC_ERROR)
-			netdev->stats.rx_crc_errors++;
-		if (status & RXSR_RUNT)
-			netdev->stats.rx_frame_errors++;
-	}
+	} else
+		ks8842_update_rx_err_counters(netdev, status);
 
 	/* set high watermark to 3K */
 	ks8842_clear_bits(adapter, 0, 1 << 12, REG_QRFCR);
@@ -541,6 +747,12 @@ void ks8842_tasklet(unsigned long arg)
 	isr = ks8842_read16(adapter, 18, REG_ISR);
 	netdev_dbg(netdev, "%s - ISR: 0x%x\n", __func__, isr);
 
+	/* when running in DMA mode, do not ack RX interrupts, it is handled
+	   internally by timberdale, otherwise it's DMA FIFO:s would stop
+	*/
+	if (KS8842_USE_DMA(adapter))
+		isr &= ~IRQ_RX;
+
 	/* Ack */
 	ks8842_write16(adapter, 18, isr, REG_ISR);
 
@@ -554,9 +766,11 @@ void ks8842_tasklet(unsigned long arg)
 	if (isr & IRQ_LINK_CHANGE)
 		ks8842_update_link_status(netdev, adapter);
 
-	if (isr & (IRQ_RX | IRQ_RX_ERROR))
+	/* should not get IRQ_RX when running DMA mode */
+	if (isr & (IRQ_RX | IRQ_RX_ERROR) && !KS8842_USE_DMA(adapter))
 		ks8842_handle_rx(netdev, adapter);
 
+	/* should only happen when in PIO mode */
 	if (isr & IRQ_TX)
 		ks8842_handle_tx(netdev, adapter);
 
@@ -575,8 +789,17 @@ void ks8842_tasklet(unsigned long arg)
 
 	/* re-enable interrupts, put back the bank selection register */
 	spin_lock_irqsave(&adapter->lock, flags);
-	ks8842_write16(adapter, 18, ENABLED_IRQS, REG_IER);
+	if (KS8842_USE_DMA(adapter))
+		ks8842_write16(adapter, 18, ENABLED_IRQS_DMA, REG_IER);
+	else
+		ks8842_write16(adapter, 18, ENABLED_IRQS, REG_IER);
 	iowrite16(entry_bank, adapter->hw_addr + REG_SELECT_BANK);
+
+	/* Make sure timberdale continues DMA operations, they are stopped while
+	   we are handling the ks8842 because we might change bank */
+	if (KS8842_USE_DMA(adapter))
+		ks8842_resume_dma(adapter);
+
 	spin_unlock_irqrestore(&adapter->lock, flags);
 }
 
@@ -592,8 +815,12 @@ static irqreturn_t ks8842_irq(int irq, void *devid)
 	netdev_dbg(netdev, "%s - ISR: 0x%x\n", __func__, isr);
 
 	if (isr) {
-		/* disable IRQ */
-		ks8842_write16(adapter, 18, 0x00, REG_IER);
+		if (KS8842_USE_DMA(adapter))
+			/* disable all but RX IRQ, since the FPGA relies on it*/
+			ks8842_write16(adapter, 18, IRQ_RX, REG_IER);
+		else
+			/* disable IRQ */
+			ks8842_write16(adapter, 18, 0x00, REG_IER);
 
 		/* schedule tasklet */
 		tasklet_schedule(&adapter->tasklet);
@@ -603,9 +830,151 @@ static irqreturn_t ks8842_irq(int irq, void *devid)
 
 	iowrite16(entry_bank, adapter->hw_addr + REG_SELECT_BANK);
 
+	/* After an interrupt, tell timberdale to continue DMA operations.
+	   DMA is disabled while we are handling the ks8842 because we might
+	   change bank */
+	ks8842_resume_dma(adapter);
+
 	return ret;
 }
 
+static void ks8842_dma_rx_cb(void *data)
+{
+	struct net_device	*netdev = data;
+	struct ks8842_adapter	*adapter = netdev_priv(netdev);
+
+	netdev_dbg(netdev, "RX DMA finished\n");
+	/* schedule tasklet */
+	if (adapter->dma_rx.adesc)
+		tasklet_schedule(&adapter->dma_rx.tasklet);
+}
+
+static void ks8842_dma_tx_cb(void *data)
+{
+	struct net_device		*netdev = data;
+	struct ks8842_adapter		*adapter = netdev_priv(netdev);
+	struct ks8842_tx_dma_ctl	*ctl = &adapter->dma_tx;
+
+	netdev_dbg(netdev, "TX DMA finished\n");
+
+	if (!ctl->adesc)
+		return;
+
+	netdev->stats.tx_packets++;
+	ctl->adesc = NULL;
+
+	if (netif_queue_stopped(netdev))
+		netif_wake_queue(netdev);
+}
+
+static void ks8842_stop_dma(struct ks8842_adapter *adapter)
+{
+	struct ks8842_tx_dma_ctl *tx_ctl = &adapter->dma_tx;
+	struct ks8842_rx_dma_ctl *rx_ctl = &adapter->dma_rx;
+
+	tx_ctl->adesc = NULL;
+	if (tx_ctl->chan)
+		tx_ctl->chan->device->device_control(tx_ctl->chan,
+			DMA_TERMINATE_ALL, 0);
+
+	rx_ctl->adesc = NULL;
+	if (rx_ctl->chan)
+		rx_ctl->chan->device->device_control(rx_ctl->chan,
+			DMA_TERMINATE_ALL, 0);
+
+	if (sg_dma_address(&rx_ctl->sg))
+		dma_unmap_single(adapter->dev, sg_dma_address(&rx_ctl->sg),
+			DMA_BUFFER_SIZE, DMA_FROM_DEVICE);
+	sg_dma_address(&rx_ctl->sg) = 0;
+
+	dev_kfree_skb(rx_ctl->skb);
+	rx_ctl->skb = NULL;
+}
+
+static void ks8842_dealloc_dma_bufs(struct ks8842_adapter *adapter)
+{
+	struct ks8842_tx_dma_ctl *tx_ctl = &adapter->dma_tx;
+	struct ks8842_rx_dma_ctl *rx_ctl = &adapter->dma_rx;
+
+	ks8842_stop_dma(adapter);
+
+	if (tx_ctl->chan)
+		dma_release_channel(tx_ctl->chan);
+	tx_ctl->chan = NULL;
+
+	if (rx_ctl->chan)
+		dma_release_channel(rx_ctl->chan);
+	rx_ctl->chan = NULL;
+
+	tasklet_kill(&rx_ctl->tasklet);
+
+	if (sg_dma_address(&tx_ctl->sg))
+		dma_unmap_single(adapter->dev, sg_dma_address(&tx_ctl->sg),
+			DMA_BUFFER_SIZE, DMA_TO_DEVICE);
+	sg_dma_address(&tx_ctl->sg) = 0;
+
+	kfree(tx_ctl->buf);
+	tx_ctl->buf = NULL;
+}
+
+static bool ks8842_dma_filter_fn(struct dma_chan *chan, void *filter_param)
+{
+	return chan->chan_id == (int)filter_param;
+}
+
+static int ks8842_alloc_dma_bufs(struct net_device *netdev)
+{
+	struct ks8842_adapter *adapter = netdev_priv(netdev);
+	struct ks8842_tx_dma_ctl *tx_ctl = &adapter->dma_tx;
+	struct ks8842_rx_dma_ctl *rx_ctl = &adapter->dma_rx;
+	int err;
+
+	dma_cap_mask_t mask;
+
+	dma_cap_zero(mask);
+	dma_cap_set(DMA_SLAVE, mask);
+	dma_cap_set(DMA_PRIVATE, mask);
+
+	sg_init_table(&tx_ctl->sg, 1);
+
+	tx_ctl->chan = dma_request_channel(mask, ks8842_dma_filter_fn,
+		(void *)tx_ctl->channel);
+	if (!tx_ctl->chan) {
+		err = -ENODEV;
+		goto err;
+	}
+
+	/* allocate DMA buffer */
+	tx_ctl->buf = kmalloc(DMA_BUFFER_SIZE, GFP_KERNEL);
+	if (!tx_ctl->buf) {
+		err = -ENOMEM;
+		goto err;
+	}
+
+	sg_dma_address(&tx_ctl->sg) = dma_map_single(adapter->dev,
+		tx_ctl->buf, DMA_BUFFER_SIZE, DMA_TO_DEVICE);
+	err = dma_mapping_error(adapter->dev,
+		sg_dma_address(&tx_ctl->sg));
+	if (err) {
+		sg_dma_address(&tx_ctl->sg) = 0;
+		goto err;
+	}
+
+	rx_ctl->chan = dma_request_channel(mask, ks8842_dma_filter_fn,
+		(void *)rx_ctl->channel);
+	if (!rx_ctl->chan) {
+		err = -ENODEV;
+		goto err;
+	}
+
+	tasklet_init(&rx_ctl->tasklet, ks8842_rx_frame_dma_tasklet,
+		(unsigned long)netdev);
+
+	return 0;
+err:
+	ks8842_dealloc_dma_bufs(adapter);
+	return err;
+}
 
 /* Netdevice operations */
 
@@ -616,6 +985,25 @@ static int ks8842_open(struct net_device *netdev)
 
 	netdev_dbg(netdev, "%s - entry\n", __func__);
 
+	if (KS8842_USE_DMA(adapter)) {
+		err = ks8842_alloc_dma_bufs(netdev);
+
+		if (!err) {
+			/* start RX dma */
+			err = __ks8842_start_new_rx_dma(netdev);
+			if (err)
+				ks8842_dealloc_dma_bufs(adapter);
+		}
+
+		if (err) {
+			printk(KERN_WARNING DRV_NAME
+				": Failed to initiate DMA, running PIO\n");
+			ks8842_dealloc_dma_bufs(adapter);
+			adapter->dma_rx.channel = -1;
+			adapter->dma_tx.channel = -1;
+		}
+	}
+
 	/* reset the HW */
 	ks8842_reset_hw(adapter);
 
@@ -641,6 +1029,9 @@ static int ks8842_close(struct net_device *netdev)
 
 	cancel_work_sync(&adapter->timeout_work);
 
+	if (KS8842_USE_DMA(adapter))
+		ks8842_dealloc_dma_bufs(adapter);
+
 	/* free the irq */
 	free_irq(adapter->irq, netdev);
 
@@ -658,6 +1049,17 @@ static netdev_tx_t ks8842_xmit_frame(struct sk_buff *skb,
 
 	netdev_dbg(netdev, "%s: entry\n", __func__);
 
+	if (KS8842_USE_DMA(adapter)) {
+		unsigned long flags;
+		ret = ks8842_tx_frame_dma(skb, netdev);
+		/* for now only allow one transfer at the time */
+		spin_lock_irqsave(&adapter->lock, flags);
+		if (adapter->dma_tx.adesc)
+			netif_stop_queue(netdev);
+		spin_unlock_irqrestore(&adapter->lock, flags);
+		return ret;
+	}
+
 	ret = ks8842_tx_frame(skb, netdev);
 
 	if (ks8842_tx_fifo_space(adapter) <  netdev->mtu + 8)
@@ -693,6 +1095,10 @@ static void ks8842_tx_timeout_work(struct work_struct *work)
 	netdev_dbg(netdev, "%s: entry\n", __func__);
 
 	spin_lock_irqsave(&adapter->lock, flags);
+
+	if (KS8842_USE_DMA(adapter))
+		ks8842_stop_dma(adapter);
+
 	/* disable interrupts */
 	ks8842_write16(adapter, 18, 0, REG_IER);
 	ks8842_write16(adapter, 18, 0xFFFF, REG_ISR);
@@ -706,6 +1112,9 @@ static void ks8842_tx_timeout_work(struct work_struct *work)
 	ks8842_write_mac_addr(adapter, netdev->dev_addr);
 
 	ks8842_update_link_status(netdev, adapter);
+
+	if (KS8842_USE_DMA(adapter))
+		__ks8842_start_new_rx_dma(netdev);
 }
 
 static void ks8842_tx_timeout(struct net_device *netdev)
@@ -765,6 +1174,19 @@ static int __devinit ks8842_probe(struct platform_device *pdev)
 		goto err_get_irq;
 	}
 
+	adapter->dev = (pdev->dev.parent) ? pdev->dev.parent : &pdev->dev;
+
+	/* DMA is only supported when accessed via timberdale */
+	if (!(adapter->conf_flags & MICREL_KS884X) && pdata &&
+		(pdata->tx_dma_channel != -1) &&
+		(pdata->rx_dma_channel != -1)) {
+		adapter->dma_rx.channel = pdata->rx_dma_channel;
+		adapter->dma_tx.channel = pdata->tx_dma_channel;
+	} else {
+		adapter->dma_rx.channel = -1;
+		adapter->dma_tx.channel = -1;
+	}
+
 	tasklet_init(&adapter->tasklet, ks8842_tasklet, (unsigned long)netdev);
 	spin_lock_init(&adapter->lock);
 
diff --git a/include/linux/ks8842.h b/include/linux/ks8842.h
index da0341b8ca0..14ba4452296 100644
--- a/include/linux/ks8842.h
+++ b/include/linux/ks8842.h
@@ -25,10 +25,14 @@
  * struct ks8842_platform_data - Platform data of the KS8842 network driver
  * @macaddr:	The MAC address of the device, set to all 0:s to use the on in
  *		the chip.
+ * @rx_dma_channel:	The DMA channel to use for RX, -1 for none.
+ * @tx_dma_channel:	The DMA channel to use for TX, -1 for none.
  *
  */
 struct ks8842_platform_data {
 	u8 macaddr[ETH_ALEN];
+	int rx_dma_channel;
+	int tx_dma_channel;
 };
 
 #endif
-- 
cgit v1.2.3-70-g09d2


From c7f52cdc2f3e1733d3864e439ac2e92edd99ef31 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Thu, 22 Jul 2010 22:58:01 -0700
Subject: support multiple .discard.* sections to avoid section type conflicts

gcc 4.4.4 will complain if you use a .discard section for both text and
data ("causes a section type conflict").  Add support for ".discard.*"
sections, and use .discard.text for a dummy function in the x86
RESERVE_BRK() macro.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/include/asm/setup.h      | 2 +-
 include/asm-generic/vmlinux.lds.h | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index 86b1506f417..ef292c792d7 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -82,7 +82,7 @@ void *extend_brk(size_t size, size_t align);
  * executable.)
  */
 #define RESERVE_BRK(name,sz)						\
-	static void __section(.discard) __used				\
+	static void __section(.discard.text) __used			\
 	__brk_reservation_fn_##name##__(void) {				\
 		asm volatile (						\
 			".pushsection .brk_reservation,\"aw\",@nobits;" \
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 48c5299cbf2..ae6b88eb1de 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -643,6 +643,7 @@
 	EXIT_DATA							\
 	EXIT_CALL							\
 	*(.discard)							\
+	*(.discard.*)							\
 	}
 
 /**
-- 
cgit v1.2.3-70-g09d2


From d7926ee38f5c6e0bbebe712304f99a4c67e40f84 Mon Sep 17 00:00:00 2001
From: Sridhar Samudrala <samudrala.sridhar@gmail.com>
Date: Sun, 30 May 2010 22:24:39 +0200
Subject: cgroups: Add an API to attach a task to current task's cgroup

Add a new kernel API to attach a task to current task's cgroup
in all the active hierarchies.

Signed-off-by: Sridhar Samudrala <sri@us.ibm.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Paul Menage <menage@google.com>
Acked-by: Li Zefan <lizf@cn.fujitsu.com>
---
 include/linux/cgroup.h |  7 +++++++
 kernel/cgroup.c        | 23 +++++++++++++++++++++++
 2 files changed, 30 insertions(+)

(limited to 'include')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 0c621604baa..e0aa067d1b1 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -570,6 +570,7 @@ struct task_struct *cgroup_iter_next(struct cgroup *cgrp,
 void cgroup_iter_end(struct cgroup *cgrp, struct cgroup_iter *it);
 int cgroup_scan_tasks(struct cgroup_scanner *scan);
 int cgroup_attach_task(struct cgroup *, struct task_struct *);
+int cgroup_attach_task_current_cg(struct task_struct *);
 
 /*
  * CSS ID is ID for cgroup_subsys_state structs under subsys. This only works
@@ -626,6 +627,12 @@ static inline int cgroupstats_build(struct cgroupstats *stats,
 	return -EINVAL;
 }
 
+/* No cgroups - nothing to do */
+static inline int cgroup_attach_task_current_cg(struct task_struct *t)
+{
+	return 0;
+}
+
 #endif /* !CONFIG_CGROUPS */
 
 #endif /* _LINUX_CGROUP_H */
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 422cb19f156..37642ad9cca 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1788,6 +1788,29 @@ out:
 	return retval;
 }
 
+/**
+ * cgroup_attach_task_current_cg - attach task 'tsk' to current task's cgroup
+ * @tsk: the task to be attached
+ */
+int cgroup_attach_task_current_cg(struct task_struct *tsk)
+{
+	struct cgroupfs_root *root;
+	struct cgroup *cur_cg;
+	int retval = 0;
+
+	cgroup_lock();
+	for_each_active_root(root) {
+		cur_cg = task_cgroup_from_root(current, root);
+		retval = cgroup_attach_task(cur_cg, tsk);
+		if (retval)
+			break;
+	}
+	cgroup_unlock();
+
+	return retval;
+}
+EXPORT_SYMBOL_GPL(cgroup_attach_task_current_cg);
+
 /*
  * Attach task with pid 'pid' to cgroup 'cgrp'. Call with cgroup_mutex
  * held. May take task_lock of task
-- 
cgit v1.2.3-70-g09d2


From 3b2bef1fc85f127a99ad6b90a94b033fdc57341c Mon Sep 17 00:00:00 2001
From: Vikas Chaudhary <vikas.chaudhary@qlogic.com>
Date: Sat, 10 Jul 2010 14:51:30 +0530
Subject: [SCSI] iscsi_transport: added new iscsi_param to display target alias
 in sysfs

Signed-off-by: Vikas Chaudhary <vikas.chaudhary@qlogic.com>
Signed-off-by: Ravi Anand <ravi.anand@qlogic.com>
Reviewed-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 drivers/scsi/qla4xxx/ql4_init.c     | 3 +++
 drivers/scsi/qla4xxx/ql4_os.c       | 7 ++++++-
 drivers/scsi/scsi_transport_iscsi.c | 6 ++++--
 include/scsi/iscsi_if.h             | 2 ++
 4 files changed, 15 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/qla4xxx/ql4_init.c b/drivers/scsi/qla4xxx/ql4_init.c
index e6b73b9fcc5..266ebd45396 100644
--- a/drivers/scsi/qla4xxx/ql4_init.c
+++ b/drivers/scsi/qla4xxx/ql4_init.c
@@ -594,6 +594,9 @@ static int qla4xxx_update_ddb_entry(struct scsi_qla_host *ha,
 	memcpy(&ddb_entry->iscsi_name[0], &fw_ddb_entry->iscsi_name[0],
 	       min(sizeof(ddb_entry->iscsi_name),
 		   sizeof(fw_ddb_entry->iscsi_name)));
+	memcpy(&ddb_entry->iscsi_alias[0], &fw_ddb_entry->iscsi_alias[0],
+	       min(sizeof(ddb_entry->iscsi_alias),
+		   sizeof(fw_ddb_entry->iscsi_alias)));
 	memcpy(&ddb_entry->ip_addr[0], &fw_ddb_entry->ip_addr[0],
 	       min(sizeof(ddb_entry->ip_addr), sizeof(fw_ddb_entry->ip_addr)));
 
diff --git a/drivers/scsi/qla4xxx/ql4_os.c b/drivers/scsi/qla4xxx/ql4_os.c
index daf5a4bf9b0..821384147a4 100644
--- a/drivers/scsi/qla4xxx/ql4_os.c
+++ b/drivers/scsi/qla4xxx/ql4_os.c
@@ -126,7 +126,8 @@ static struct iscsi_transport qla4xxx_iscsi_transport = {
 	.caps			= CAP_FW_DB | CAP_SENDTARGETS_OFFLOAD |
 				  CAP_DATA_PATH_OFFLOAD,
 	.param_mask		= ISCSI_CONN_PORT | ISCSI_CONN_ADDRESS |
-				  ISCSI_TARGET_NAME | ISCSI_TPGT,
+				  ISCSI_TARGET_NAME | ISCSI_TPGT |
+				  ISCSI_TARGET_ALIAS,
 	.host_param_mask	= ISCSI_HOST_HWADDRESS |
 				  ISCSI_HOST_IPADDRESS |
 				  ISCSI_HOST_INITIATOR_NAME,
@@ -210,6 +211,10 @@ static int qla4xxx_sess_get_param(struct iscsi_cls_session *sess,
 	case ISCSI_PARAM_TPGT:
 		len = sprintf(buf, "%u\n", ddb_entry->tpgt);
 		break;
+	case ISCSI_PARAM_TARGET_ALIAS:
+		len = snprintf(buf, PAGE_SIZE - 1, "%s\n",
+		    ddb_entry->iscsi_alias);
+		break;
 	default:
 		return -ENOSYS;
 	}
diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c
index 1e6d4793542..b9aec304872 100644
--- a/drivers/scsi/scsi_transport_iscsi.c
+++ b/drivers/scsi/scsi_transport_iscsi.c
@@ -31,7 +31,7 @@
 #include <scsi/scsi_transport_iscsi.h>
 #include <scsi/iscsi_if.h>
 
-#define ISCSI_SESSION_ATTRS 22
+#define ISCSI_SESSION_ATTRS 23
 #define ISCSI_CONN_ATTRS 13
 #define ISCSI_HOST_ATTRS 4
 
@@ -1763,7 +1763,8 @@ iscsi_session_attr(abort_tmo, ISCSI_PARAM_ABORT_TMO, 0);
 iscsi_session_attr(lu_reset_tmo, ISCSI_PARAM_LU_RESET_TMO, 0);
 iscsi_session_attr(tgt_reset_tmo, ISCSI_PARAM_TGT_RESET_TMO, 0);
 iscsi_session_attr(ifacename, ISCSI_PARAM_IFACE_NAME, 0);
-iscsi_session_attr(initiatorname, ISCSI_PARAM_INITIATOR_NAME, 0)
+iscsi_session_attr(initiatorname, ISCSI_PARAM_INITIATOR_NAME, 0);
+iscsi_session_attr(targetalias, ISCSI_PARAM_TARGET_ALIAS, 0);
 
 static ssize_t
 show_priv_session_state(struct device *dev, struct device_attribute *attr,
@@ -2006,6 +2007,7 @@ iscsi_register_transport(struct iscsi_transport *tt)
 	SETUP_SESSION_RD_ATTR(tgt_reset_tmo,ISCSI_TGT_RESET_TMO);
 	SETUP_SESSION_RD_ATTR(ifacename, ISCSI_IFACE_NAME);
 	SETUP_SESSION_RD_ATTR(initiatorname, ISCSI_INITIATOR_NAME);
+	SETUP_SESSION_RD_ATTR(targetalias, ISCSI_TARGET_ALIAS);
 	SETUP_PRIV_SESSION_RD_ATTR(recovery_tmo);
 	SETUP_PRIV_SESSION_RD_ATTR(state);
 
diff --git a/include/scsi/iscsi_if.h b/include/scsi/iscsi_if.h
index 66d377b9c72..a8631acd37c 100644
--- a/include/scsi/iscsi_if.h
+++ b/include/scsi/iscsi_if.h
@@ -313,6 +313,7 @@ enum iscsi_param {
 	ISCSI_PARAM_INITIATOR_NAME,
 
 	ISCSI_PARAM_TGT_RESET_TMO,
+	ISCSI_PARAM_TARGET_ALIAS,
 	/* must always be last */
 	ISCSI_PARAM_MAX,
 };
@@ -353,6 +354,7 @@ enum iscsi_param {
 #define ISCSI_ISID			(1ULL << ISCSI_PARAM_ISID)
 #define ISCSI_INITIATOR_NAME		(1ULL << ISCSI_PARAM_INITIATOR_NAME)
 #define ISCSI_TGT_RESET_TMO		(1ULL << ISCSI_PARAM_TGT_RESET_TMO)
+#define ISCSI_TARGET_ALIAS		(1ULL << ISCSI_PARAM_TARGET_ALIAS)
 
 /* iSCSI HBA params */
 enum iscsi_host_param {
-- 
cgit v1.2.3-70-g09d2


From d058fd31c7f44960b00566bda39c85377f461a7b Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Tue, 20 Jul 2010 15:19:15 -0700
Subject: [SCSI] fcoe: make it possible to verify fcoe with sparse

Analyzing fcoe with sparse currently fails. This is because struct
fcoe_rcv_info contains two enum members that have been declared with
__attribute__((packed)). Apparently gcc honors this attribute while sparse
ignores it. The result is that sizeof(struct fcoe_rcv_info)
== sizeof(struct sk_buff::cb) == 48 on a 64-bit system according to gcc, but
not according to sparse. The patch below modifies the definition of
struct fcoe_rcv_info such that gcc and sparse interpret this structure
definition in the same way. The current sparse output is as follows:

$ cd linux-2.6.34
$ make C=2 M=drivers/scsi/fcoe modules
 CHECK   drivers/scsi/fcoe/fcoe.c

include/scsi/fc_frame.h:81:9: error: invalid bitfield width, -1.
 CC [M]  drivers/scsi/fcoe/fcoe.o
 CHECK   drivers/scsi/fcoe/libfcoe.c

include/scsi/fc_frame.h:81:9: error: invalid bitfield width, -1.
drivers/scsi/fcoe/libfcoe.c:56:37: error: invalid initializer

Signed-off-by: Bart Van Assche <bart.vanassche@gmail.com>
Cc: jeykholt@cisco.com
Signed-off-by: Robert Love <robert.w.love@intel.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 include/scsi/fc_frame.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/scsi/fc_frame.h b/include/scsi/fc_frame.h
index 4d3e9c7b7c5..15427fab8a5 100644
--- a/include/scsi/fc_frame.h
+++ b/include/scsi/fc_frame.h
@@ -66,8 +66,8 @@ struct fcoe_rcv_info {
 	struct fc_fcp_pkt *fr_fsp;	/* for the corresponding fcp I/O */
 	u32		fr_crc;
 	u16		fr_max_payload;	/* max FC payload */
-	enum fc_sof	fr_sof;		/* start of frame delimiter */
-	enum fc_eof	fr_eof;		/* end of frame delimiter */
+	u8		fr_sof;		/* start of frame delimiter */
+	u8		fr_eof;		/* end of frame delimiter */
 	u8		fr_flags;	/* flags - see below */
 	u8		granted_mac[ETH_ALEN]; /* FCoE MAC address */
 };
-- 
cgit v1.2.3-70-g09d2


From 519e5135e2537c9dbc1cbcc0891b0a936ff5dcd2 Mon Sep 17 00:00:00 2001
From: Vasu Dev <vasu.dev@intel.com>
Date: Tue, 20 Jul 2010 15:19:32 -0700
Subject: [SCSI] fcoe: adds src and dest mac address checking for fcoe frames

This is  per FC-BB-5 Annex-D recommendation and per that
if address checking fails then drop the frame.

FIP code paths are already doing this so only needed for fcoe
frames.

The src address checking is limited to only fip mode since
this might break non-fip mode used in p2p due to used OUI
based addressing in some p2p code paths, going forward FIP
will be the only mode, therefore limited this to only FIP
mode so that it won't break non-fip p2p mode for now.

-v2
Removes FCOE packet type checking since fcoe_rcv is
registered to receive only FCoE type packets from netdev
and it is already checked by netdev.

Signed-off-by: Vasu Dev <vasu.dev@intel.com>
Signed-off-by: Robert Love <robert.w.love@intel.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 drivers/scsi/fcoe/fcoe.c | 20 +++++++++++++++++---
 include/scsi/libfcoe.h   | 10 ++++++++++
 2 files changed, 27 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c
index d340cf2d857..a120962b25b 100644
--- a/drivers/scsi/fcoe/fcoe.c
+++ b/drivers/scsi/fcoe/fcoe.c
@@ -1210,6 +1210,8 @@ int fcoe_rcv(struct sk_buff *skb, struct net_device *netdev,
 	struct fcoe_interface *fcoe;
 	struct fc_frame_header *fh;
 	struct fcoe_percpu_s *fps;
+	struct fcoe_port *port;
+	struct ethhdr *eh;
 	unsigned int cpu;
 
 	fcoe = container_of(ptype, struct fcoe_interface, fcoe_packet_type);
@@ -1227,9 +1229,21 @@ int fcoe_rcv(struct sk_buff *skb, struct net_device *netdev,
 			skb_tail_pointer(skb), skb_end_pointer(skb),
 			skb->csum, skb->dev ? skb->dev->name : "<NULL>");
 
-	/* check for FCOE packet type */
-	if (unlikely(eth_hdr(skb)->h_proto != htons(ETH_P_FCOE))) {
-		FCOE_NETDEV_DBG(netdev, "Wrong FC type frame");
+	/* check for mac addresses */
+	eh = eth_hdr(skb);
+	port = lport_priv(lport);
+	if (compare_ether_addr(eh->h_dest, port->data_src_addr) &&
+	    compare_ether_addr(eh->h_dest, fcoe->ctlr.ctl_src_addr) &&
+	    compare_ether_addr(eh->h_dest, (u8[6])FC_FCOE_FLOGI_MAC)) {
+		FCOE_NETDEV_DBG(netdev, "wrong destination mac address:%pM\n",
+				eh->h_dest);
+		goto err;
+	}
+
+	if (is_fip_mode(&fcoe->ctlr) &&
+	    compare_ether_addr(eh->h_source, fcoe->ctlr.dest_addr)) {
+		FCOE_NETDEV_DBG(netdev, "wrong source mac address:%pM\n",
+				eh->h_source);
 		goto err;
 	}
 
diff --git a/include/scsi/libfcoe.h b/include/scsi/libfcoe.h
index ec13f51531f..81aee1c4c2f 100644
--- a/include/scsi/libfcoe.h
+++ b/include/scsi/libfcoe.h
@@ -170,4 +170,14 @@ int fcoe_ctlr_recv_flogi(struct fcoe_ctlr *, struct fc_lport *,
 u64 fcoe_wwn_from_mac(unsigned char mac[], unsigned int, unsigned int);
 int fcoe_libfc_config(struct fc_lport *, struct libfc_function_template *);
 
+/**
+ * is_fip_mode() - returns true if FIP mode selected.
+ * @fip:	FCoE controller.
+ */
+static inline bool is_fip_mode(struct fcoe_ctlr *fip)
+{
+	return fip->state == FIP_ST_ENABLED;
+}
+
+
 #endif /* _LIBFCOE_H */
-- 
cgit v1.2.3-70-g09d2


From 42e9041467cf5fd33501b91b27e26807c259c896 Mon Sep 17 00:00:00 2001
From: Joe Eykholt <jeykholt@cisco.com>
Date: Tue, 20 Jul 2010 15:19:37 -0700
Subject: [SCSI] libfc: convert rport lookup to be RCU safe

To allow LLD to do lookups on rports without grabbing a mutex,
make them RCU-safe.  The caller of lport->tt.rport_lookup will
have the choice of holding disc_mutex or the rcu_read_lock().

Signed-off-by: Joe Eykholt <jeykholt@cisco.com>
Signed-off-by: Robert Love <robert.w.love@intel.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 drivers/scsi/libfc/fc_disc.c  |  6 +++---
 drivers/scsi/libfc/fc_rport.c | 22 ++++++++++++++++++----
 include/scsi/libfc.h          |  2 ++
 3 files changed, 23 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/libfc/fc_disc.c b/drivers/scsi/libfc/fc_disc.c
index c7985da8809..d0fa9a0ddc8 100644
--- a/drivers/scsi/libfc/fc_disc.c
+++ b/drivers/scsi/libfc/fc_disc.c
@@ -63,12 +63,12 @@ static void fc_disc_restart(struct fc_disc *);
 void fc_disc_stop_rports(struct fc_disc *disc)
 {
 	struct fc_lport *lport;
-	struct fc_rport_priv *rdata, *next;
+	struct fc_rport_priv *rdata;
 
 	lport = disc->lport;
 
 	mutex_lock(&disc->disc_mutex);
-	list_for_each_entry_safe(rdata, next, &disc->rports, peers)
+	list_for_each_entry_rcu(rdata, &disc->rports, peers)
 		lport->tt.rport_logoff(rdata);
 	mutex_unlock(&disc->disc_mutex);
 }
@@ -292,7 +292,7 @@ static void fc_disc_done(struct fc_disc *disc, enum fc_disc_event event)
 	 * Skip ports which were never discovered.  These are the dNS port
 	 * and ports which were created by PLOGI.
 	 */
-	list_for_each_entry(rdata, &disc->rports, peers) {
+	list_for_each_entry_rcu(rdata, &disc->rports, peers) {
 		if (!rdata->disc_id)
 			continue;
 		if (rdata->disc_id == disc->disc_id)
diff --git a/drivers/scsi/libfc/fc_rport.c b/drivers/scsi/libfc/fc_rport.c
index 363cde30c94..6b569732f89 100644
--- a/drivers/scsi/libfc/fc_rport.c
+++ b/drivers/scsi/libfc/fc_rport.c
@@ -95,13 +95,15 @@ static const char *fc_rport_state_names[] = {
  * fc_rport_lookup() - Lookup a remote port by port_id
  * @lport:   The local port to lookup the remote port on
  * @port_id: The remote port ID to look up
+ *
+ * The caller must hold either disc_mutex or rcu_read_lock().
  */
 static struct fc_rport_priv *fc_rport_lookup(const struct fc_lport *lport,
 					     u32 port_id)
 {
 	struct fc_rport_priv *rdata;
 
-	list_for_each_entry(rdata, &lport->disc.rports, peers)
+	list_for_each_entry_rcu(rdata, &lport->disc.rports, peers)
 		if (rdata->ids.port_id == port_id)
 			return rdata;
 	return NULL;
@@ -146,10 +148,22 @@ static struct fc_rport_priv *fc_rport_create(struct fc_lport *lport,
 	INIT_DELAYED_WORK(&rdata->retry_work, fc_rport_timeout);
 	INIT_WORK(&rdata->event_work, fc_rport_work);
 	if (port_id != FC_FID_DIR_SERV)
-		list_add(&rdata->peers, &lport->disc.rports);
+		list_add_rcu(&rdata->peers, &lport->disc.rports);
 	return rdata;
 }
 
+/**
+ * fc_rport_free_rcu() - Free a remote port
+ * @rcu: The rcu_head structure inside the remote port
+ */
+static void fc_rport_free_rcu(struct rcu_head *rcu)
+{
+	struct fc_rport_priv *rdata;
+
+	rdata = container_of(rcu, struct fc_rport_priv, rcu);
+	kfree(rdata);
+}
+
 /**
  * fc_rport_destroy() - Free a remote port after last reference is released
  * @kref: The remote port's kref
@@ -159,7 +173,7 @@ static void fc_rport_destroy(struct kref *kref)
 	struct fc_rport_priv *rdata;
 
 	rdata = container_of(kref, struct fc_rport_priv, kref);
-	kfree(rdata);
+	call_rcu(&rdata->rcu, fc_rport_free_rcu);
 }
 
 /**
@@ -334,7 +348,7 @@ static void fc_rport_work(struct work_struct *work)
 				mutex_unlock(&rdata->rp_mutex);
 			} else {
 				FC_RPORT_DBG(rdata, "work delete\n");
-				list_del(&rdata->peers);
+				list_del_rcu(&rdata->peers);
 				mutex_unlock(&rdata->rp_mutex);
 				kref_put(&rdata->kref, lport->tt.rport_destroy);
 			}
diff --git a/include/scsi/libfc.h b/include/scsi/libfc.h
index 6d78df77dab..b0310b9b346 100644
--- a/include/scsi/libfc.h
+++ b/include/scsi/libfc.h
@@ -195,6 +195,7 @@ struct fc_rport_libfc_priv {
  * @rp_mutex:       The mutex that protects the remote port
  * @retry_work:     Handle for retries
  * @event_callback: Callback when READY, FAILED or LOGO states complete
+ * @rcu:	    Structure used for freeing in an RCU-safe manner
  */
 struct fc_rport_priv {
 	struct fc_lport		    *local_port;
@@ -217,6 +218,7 @@ struct fc_rport_priv {
 	struct list_head            peers;
 	struct work_struct          event_work;
 	u32			    supported_classes;
+	struct rcu_head		    rcu;
 };
 
 /**
-- 
cgit v1.2.3-70-g09d2


From f90377abcab2e305450ee76a0f9042907560c5d8 Mon Sep 17 00:00:00 2001
From: Joe Eykholt <jeykholt@cisco.com>
Date: Tue, 20 Jul 2010 15:19:42 -0700
Subject: [SCSI] libfc: provide space for LLD after remote port structure

Add pre-zeroed space after the allocation for fc_rport_priv
for use by the lower-level driver.

This is primarily for VN2VN FIP mode, but could be used in
other ways someday.

The space required is specified in lport->rport_priv_size.

Signed-off-by: Joe Eykholt <jeykholt@cisco.com>
Signed-off-by: Robert Love <robert.w.love@intel.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 drivers/scsi/libfc/fc_rport.c | 2 +-
 include/scsi/libfc.h          | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/scsi/libfc/fc_rport.c b/drivers/scsi/libfc/fc_rport.c
index 6b569732f89..6d68482649c 100644
--- a/drivers/scsi/libfc/fc_rport.c
+++ b/drivers/scsi/libfc/fc_rport.c
@@ -127,7 +127,7 @@ static struct fc_rport_priv *fc_rport_create(struct fc_lport *lport,
 	if (rdata)
 		return rdata;
 
-	rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
+	rdata = kzalloc(sizeof(*rdata) + lport->rport_priv_size, GFP_KERNEL);
 	if (!rdata)
 		return NULL;
 
diff --git a/include/scsi/libfc.h b/include/scsi/libfc.h
index b0310b9b346..fcbee8c38b0 100644
--- a/include/scsi/libfc.h
+++ b/include/scsi/libfc.h
@@ -799,6 +799,7 @@ struct fc_disc {
  * @mfs:                   The maximum Fibre Channel payload size
  * @max_retry_count:       The maximum retry attempts
  * @max_rport_retry_count: The maximum remote port retry attempts
+ * @rport_priv_size:       Size needed by driver after struct fc_rport_priv
  * @lro_xid:               The maximum XID for LRO
  * @lso_max:               The maximum large offload send size
  * @fcts:                  FC-4 type mask
@@ -848,6 +849,7 @@ struct fc_lport {
 	u32			       mfs;
 	u8			       max_retry_count;
 	u8			       max_rport_retry_count;
+	u16			       rport_priv_size;
 	u16			       link_speed;
 	u16			       link_supported_speeds;
 	u16			       lro_xid;
-- 
cgit v1.2.3-70-g09d2


From fdb068c6cd6e30d43664f856d3530715a5742713 Mon Sep 17 00:00:00 2001
From: Joe Eykholt <jeykholt@cisco.com>
Date: Tue, 20 Jul 2010 15:19:47 -0700
Subject: [SCSI] libfcoe: convert FIP to lock with mutex instead of spin lock

It turns out most of the FIP work is now done from worker threads
or process context now, so there's no need to use a spin lock.

Change to use mutex instead of spin lock and delayed_work instead
of a timer.

This will make it nicer for the VN_port to VN_port feature that
will interact more with the libfc layers requiring that
spinlocks not be held.

Signed-off-by: Joe Eykholt <jeykholt@cisco.com>
Signed-off-by: Robert Love <robert.w.love@intel.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 drivers/scsi/fcoe/libfcoe.c | 116 +++++++++++++++++++++-----------------------
 include/scsi/libfcoe.h      |   9 +---
 2 files changed, 57 insertions(+), 68 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/fcoe/libfcoe.c b/drivers/scsi/fcoe/libfcoe.c
index f009191063f..e510888e78c 100644
--- a/drivers/scsi/fcoe/libfcoe.c
+++ b/drivers/scsi/fcoe/libfcoe.c
@@ -113,7 +113,7 @@ void fcoe_ctlr_init(struct fcoe_ctlr *fip)
 	fip->state = FIP_ST_LINK_WAIT;
 	fip->mode = FIP_ST_AUTO;
 	INIT_LIST_HEAD(&fip->fcfs);
-	spin_lock_init(&fip->lock);
+	mutex_init(&fip->ctlr_mutex);
 	fip->flogi_oxid = FC_XID_UNKNOWN;
 	setup_timer(&fip->timer, fcoe_ctlr_timeout, (unsigned long)fip);
 	INIT_WORK(&fip->timer_work, fcoe_ctlr_timer_work);
@@ -159,10 +159,10 @@ void fcoe_ctlr_destroy(struct fcoe_ctlr *fip)
 	cancel_work_sync(&fip->recv_work);
 	skb_queue_purge(&fip->fip_recv_list);
 
-	spin_lock_bh(&fip->lock);
+	mutex_lock(&fip->ctlr_mutex);
 	fip->state = FIP_ST_DISABLED;
 	fcoe_ctlr_reset_fcfs(fip);
-	spin_unlock_bh(&fip->lock);
+	mutex_unlock(&fip->ctlr_mutex);
 	del_timer_sync(&fip->timer);
 	cancel_work_sync(&fip->timer_work);
 }
@@ -255,19 +255,19 @@ static void fcoe_ctlr_solicit(struct fcoe_ctlr *fip, struct fcoe_fcf *fcf)
  */
 void fcoe_ctlr_link_up(struct fcoe_ctlr *fip)
 {
-	spin_lock_bh(&fip->lock);
+	mutex_lock(&fip->ctlr_mutex);
 	if (fip->state == FIP_ST_NON_FIP || fip->state == FIP_ST_AUTO) {
-		spin_unlock_bh(&fip->lock);
+		mutex_unlock(&fip->ctlr_mutex);
 		fc_linkup(fip->lp);
 	} else if (fip->state == FIP_ST_LINK_WAIT) {
 		fip->state = fip->mode;
-		spin_unlock_bh(&fip->lock);
+		mutex_unlock(&fip->ctlr_mutex);
 		if (fip->state == FIP_ST_AUTO)
 			LIBFCOE_FIP_DBG(fip, "%s", "setting AUTO mode.\n");
 		fc_linkup(fip->lp);
 		fcoe_ctlr_solicit(fip, NULL);
 	} else
-		spin_unlock_bh(&fip->lock);
+		mutex_unlock(&fip->ctlr_mutex);
 }
 EXPORT_SYMBOL(fcoe_ctlr_link_up);
 
@@ -300,11 +300,11 @@ int fcoe_ctlr_link_down(struct fcoe_ctlr *fip)
 	int link_dropped;
 
 	LIBFCOE_FIP_DBG(fip, "link down.\n");
-	spin_lock_bh(&fip->lock);
+	mutex_lock(&fip->ctlr_mutex);
 	fcoe_ctlr_reset(fip);
 	link_dropped = fip->state != FIP_ST_LINK_WAIT;
 	fip->state = FIP_ST_LINK_WAIT;
-	spin_unlock_bh(&fip->lock);
+	mutex_unlock(&fip->ctlr_mutex);
 
 	if (link_dropped)
 		fc_linkdown(fip->lp);
@@ -577,12 +577,12 @@ static unsigned long fcoe_ctlr_age_fcfs(struct fcoe_ctlr *fip)
 	unsigned long sel_time = 0;
 	struct fcoe_dev_stats *stats;
 
+	stats = per_cpu_ptr(fip->lp->dev_stats, get_cpu());
+
 	list_for_each_entry_safe(fcf, next, &fip->fcfs, list) {
 		deadline = fcf->time + fcf->fka_period + fcf->fka_period / 2;
 		if (fip->sel_fcf == fcf) {
 			if (time_after(jiffies, deadline)) {
-				stats = per_cpu_ptr(fip->lp->dev_stats,
-						    smp_processor_id());
 				stats->MissDiscAdvCount++;
 				printk(KERN_INFO "libfcoe: host%d: "
 				       "Missing Discovery Advertisement "
@@ -601,8 +601,6 @@ static unsigned long fcoe_ctlr_age_fcfs(struct fcoe_ctlr *fip)
 			WARN_ON(!fip->fcf_count);
 			fip->fcf_count--;
 			kfree(fcf);
-			stats = per_cpu_ptr(fip->lp->dev_stats,
-					    smp_processor_id());
 			stats->VLinkFailureCount++;
 		} else {
 			if (time_after(next_timer, deadline))
@@ -612,6 +610,7 @@ static unsigned long fcoe_ctlr_age_fcfs(struct fcoe_ctlr *fip)
 				sel_time = fcf->time;
 		}
 	}
+	put_cpu();
 	if (sel_time && !fip->sel_fcf && !fip->sel_time) {
 		sel_time += msecs_to_jiffies(FCOE_CTLR_START_DELAY);
 		fip->sel_time = sel_time;
@@ -768,7 +767,7 @@ static void fcoe_ctlr_recv_adv(struct fcoe_ctlr *fip, struct sk_buff *skb)
 	if (fcoe_ctlr_parse_adv(fip, skb, &new))
 		return;
 
-	spin_lock_bh(&fip->lock);
+	mutex_lock(&fip->ctlr_mutex);
 	first = list_empty(&fip->fcfs);
 	found = NULL;
 	list_for_each_entry(fcf, &fip->fcfs, list) {
@@ -847,7 +846,7 @@ static void fcoe_ctlr_recv_adv(struct fcoe_ctlr *fip, struct sk_buff *skb)
 			mod_timer(&fip->timer, fip->sel_time);
 	}
 out:
-	spin_unlock_bh(&fip->lock);
+	mutex_unlock(&fip->ctlr_mutex);
 }
 
 /**
@@ -1108,11 +1107,12 @@ static void fcoe_ctlr_recv_clr_vlink(struct fcoe_ctlr *fip,
 		if (is_vn_port)
 			fc_lport_reset(vn_port);
 		else {
-			spin_lock_bh(&fip->lock);
+			mutex_lock(&fip->ctlr_mutex);
 			per_cpu_ptr(lport->dev_stats,
-				    smp_processor_id())->VLinkFailureCount++;
+				    get_cpu())->VLinkFailureCount++;
+			put_cpu();
 			fcoe_ctlr_reset(fip);
-			spin_unlock_bh(&fip->lock);
+			mutex_unlock(&fip->ctlr_mutex);
 
 			fc_lport_reset(fip->lp);
 			fcoe_ctlr_solicit(fip, NULL);
@@ -1166,7 +1166,7 @@ static int fcoe_ctlr_recv_handler(struct fcoe_ctlr *fip, struct sk_buff *skb)
 	if (ntohs(fiph->fip_dl_len) * FIP_BPW + sizeof(*fiph) > skb->len)
 		goto drop;
 
-	spin_lock_bh(&fip->lock);
+	mutex_lock(&fip->ctlr_mutex);
 	state = fip->state;
 	if (state == FIP_ST_AUTO) {
 		fip->map_dest = 0;
@@ -1174,7 +1174,7 @@ static int fcoe_ctlr_recv_handler(struct fcoe_ctlr *fip, struct sk_buff *skb)
 		state = FIP_ST_ENABLED;
 		LIBFCOE_FIP_DBG(fip, "Using FIP mode\n");
 	}
-	spin_unlock_bh(&fip->lock);
+	mutex_unlock(&fip->ctlr_mutex);
 	if (state != FIP_ST_ENABLED)
 		goto drop;
 
@@ -1240,19 +1240,38 @@ static void fcoe_ctlr_select(struct fcoe_ctlr *fip)
 /**
  * fcoe_ctlr_timeout() - FIP timeout handler
  * @arg: The FCoE controller that timed out
- *
- * Ages FCFs.  Triggers FCF selection if possible.  Sends keep-alives.
  */
 static void fcoe_ctlr_timeout(unsigned long arg)
 {
 	struct fcoe_ctlr *fip = (struct fcoe_ctlr *)arg;
+
+	schedule_work(&fip->timer_work);
+}
+
+/**
+ * fcoe_ctlr_timer_work() - Worker thread function for timer work
+ * @work: Handle to a FCoE controller
+ *
+ * Ages FCFs.  Triggers FCF selection if possible.
+ * Sends keep-alives and resets.
+ */
+static void fcoe_ctlr_timer_work(struct work_struct *work)
+{
+	struct fcoe_ctlr *fip;
+	struct fc_lport *vport;
+	u8 *mac;
+	u8 reset = 0;
+	u8 send_ctlr_ka = 0;
+	u8 send_port_ka = 0;
 	struct fcoe_fcf *sel;
 	struct fcoe_fcf *fcf;
 	unsigned long next_timer;
 
-	spin_lock_bh(&fip->lock);
+	fip = container_of(work, struct fcoe_ctlr, timer_work);
+
+	mutex_lock(&fip->ctlr_mutex);
 	if (fip->state == FIP_ST_DISABLED) {
-		spin_unlock_bh(&fip->lock);
+		mutex_unlock(&fip->ctlr_mutex);
 		return;
 	}
 
@@ -1286,7 +1305,7 @@ static void fcoe_ctlr_timeout(unsigned long arg)
 			       "FIP Fibre-Channel Forwarder timed out.	"
 			       "Starting FCF discovery.\n",
 			       fip->lp->host->host_no);
-			fip->reset_req = 1;
+			reset = 1;
 			schedule_work(&fip->timer_work);
 		}
 	}
@@ -1294,7 +1313,7 @@ static void fcoe_ctlr_timeout(unsigned long arg)
 	if (sel && !sel->fd_flags) {
 		if (time_after_eq(jiffies, fip->ctlr_ka_time)) {
 			fip->ctlr_ka_time = jiffies + sel->fka_period;
-			fip->send_ctlr_ka = 1;
+			send_ctlr_ka = 1;
 		}
 		if (time_after(next_timer, fip->ctlr_ka_time))
 			next_timer = fip->ctlr_ka_time;
@@ -1302,37 +1321,14 @@ static void fcoe_ctlr_timeout(unsigned long arg)
 		if (time_after_eq(jiffies, fip->port_ka_time)) {
 			fip->port_ka_time = jiffies +
 				msecs_to_jiffies(FIP_VN_KA_PERIOD);
-			fip->send_port_ka = 1;
+			send_port_ka = 1;
 		}
 		if (time_after(next_timer, fip->port_ka_time))
 			next_timer = fip->port_ka_time;
 	}
 	if (!list_empty(&fip->fcfs))
 		mod_timer(&fip->timer, next_timer);
-	if (fip->send_ctlr_ka || fip->send_port_ka)
-		schedule_work(&fip->timer_work);
-	spin_unlock_bh(&fip->lock);
-}
-
-/**
- * fcoe_ctlr_timer_work() - Worker thread function for timer work
- * @work: Handle to a FCoE controller
- *
- * Sends keep-alives and resets which must not
- * be called from the timer directly, since they use a mutex.
- */
-static void fcoe_ctlr_timer_work(struct work_struct *work)
-{
-	struct fcoe_ctlr *fip;
-	struct fc_lport *vport;
-	u8 *mac;
-	int reset;
-
-	fip = container_of(work, struct fcoe_ctlr, timer_work);
-	spin_lock_bh(&fip->lock);
-	reset = fip->reset_req;
-	fip->reset_req = 0;
-	spin_unlock_bh(&fip->lock);
+	mutex_unlock(&fip->ctlr_mutex);
 
 	if (reset) {
 		fc_lport_reset(fip->lp);
@@ -1340,12 +1336,10 @@ static void fcoe_ctlr_timer_work(struct work_struct *work)
 		fcoe_ctlr_solicit(fip, NULL);
 	}
 
-	if (fip->send_ctlr_ka) {
-		fip->send_ctlr_ka = 0;
+	if (send_ctlr_ka)
 		fcoe_ctlr_send_keep_alive(fip, NULL, 0, fip->ctl_src_addr);
-	}
-	if (fip->send_port_ka) {
-		fip->send_port_ka = 0;
+
+	if (send_port_ka) {
 		mutex_lock(&fip->lp->lp_mutex);
 		mac = fip->get_src_addr(fip->lp);
 		fcoe_ctlr_send_keep_alive(fip, fip->lp, 1, mac);
@@ -1402,9 +1396,9 @@ int fcoe_ctlr_recv_flogi(struct fcoe_ctlr *fip, struct fc_lport *lport,
 	if (op == ELS_LS_ACC && fh->fh_r_ctl == FC_RCTL_ELS_REP &&
 	    fip->flogi_oxid == ntohs(fh->fh_ox_id)) {
 
-		spin_lock_bh(&fip->lock);
+		mutex_lock(&fip->ctlr_mutex);
 		if (fip->state != FIP_ST_AUTO && fip->state != FIP_ST_NON_FIP) {
-			spin_unlock_bh(&fip->lock);
+			mutex_unlock(&fip->ctlr_mutex);
 			return -EINVAL;
 		}
 		fip->state = FIP_ST_NON_FIP;
@@ -1424,13 +1418,13 @@ int fcoe_ctlr_recv_flogi(struct fcoe_ctlr *fip, struct fc_lport *lport,
 			fip->map_dest = 0;
 		}
 		fip->flogi_oxid = FC_XID_UNKNOWN;
-		spin_unlock_bh(&fip->lock);
+		mutex_unlock(&fip->ctlr_mutex);
 		fc_fcoe_set_mac(fr_cb(fp)->granted_mac, fh->fh_d_id);
 	} else if (op == ELS_FLOGI && fh->fh_r_ctl == FC_RCTL_ELS_REQ && sa) {
 		/*
 		 * Save source MAC for point-to-point responses.
 		 */
-		spin_lock_bh(&fip->lock);
+		mutex_lock(&fip->ctlr_mutex);
 		if (fip->state == FIP_ST_AUTO || fip->state == FIP_ST_NON_FIP) {
 			memcpy(fip->dest_addr, sa, ETH_ALEN);
 			fip->map_dest = 0;
@@ -1439,7 +1433,7 @@ int fcoe_ctlr_recv_flogi(struct fcoe_ctlr *fip, struct fc_lport *lport,
 						"Setting non-FIP mode\n");
 			fip->state = FIP_ST_NON_FIP;
 		}
-		spin_unlock_bh(&fip->lock);
+		mutex_unlock(&fip->ctlr_mutex);
 	}
 	return 0;
 }
diff --git a/include/scsi/libfcoe.h b/include/scsi/libfcoe.h
index 81aee1c4c2f..7d18b500f2c 100644
--- a/include/scsi/libfcoe.h
+++ b/include/scsi/libfcoe.h
@@ -75,14 +75,12 @@ enum fip_state {
  * @flogi_count:   number of FLOGI attempts in AUTO mode.
  * @map_dest:	   use the FC_MAP mode for destination MAC addresses.
  * @spma:	   supports SPMA server-provided MACs mode
- * @send_ctlr_ka:  need to send controller keep alive
- * @send_port_ka:  need to send port keep alives
  * @dest_addr:	   MAC address of the selected FC forwarder.
  * @ctl_src_addr:  the native MAC address of our local port.
  * @send:	   LLD-supplied function to handle sending FIP Ethernet frames
  * @update_mac:    LLD-supplied function to handle changes to MAC addresses.
  * @get_src_addr:  LLD-supplied function to supply a source MAC address.
- * @lock:	   lock protecting this structure.
+ * @ctlr_mutex:	   lock protecting this structure.
  *
  * This structure is used by all FCoE drivers.  It contains information
  * needed by all FCoE low-level drivers (LLDs) as well as internal state
@@ -106,18 +104,15 @@ struct fcoe_ctlr {
 	u16 user_mfs;
 	u16 flogi_oxid;
 	u8 flogi_count;
-	u8 reset_req;
 	u8 map_dest;
 	u8 spma;
-	u8 send_ctlr_ka;
-	u8 send_port_ka;
 	u8 dest_addr[ETH_ALEN];
 	u8 ctl_src_addr[ETH_ALEN];
 
 	void (*send)(struct fcoe_ctlr *, struct sk_buff *);
 	void (*update_mac)(struct fc_lport *, u8 *addr);
 	u8 * (*get_src_addr)(struct fc_lport *);
-	spinlock_t lock;
+	struct mutex ctlr_mutex;
 };
 
 /**
-- 
cgit v1.2.3-70-g09d2


From 0685230c59b5482e04ab50e7afc51119ceaba651 Mon Sep 17 00:00:00 2001
From: Joe Eykholt <jeykholt@cisco.com>
Date: Tue, 20 Jul 2010 15:19:53 -0700
Subject: [SCSI] libfc: add discovery-private pointer for LLD

For VN_port to VN_port mode, FIP will do discovery and needs a
way to find its state from the local port or discovery structure.
It seems that any other LLD that implements its own discovery
would also need something like this.

Replace disc->lport with disc->priv, and use container_of to
find the lport.  We could use disc->priv for that, but
container_of is smaller and faster.

Signed-off-by: Joe Eykholt <jeykholt@cisco.com>
Signed-off-by: Robert Love <robert.w.love@intel.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 drivers/scsi/libfc/fc_disc.c  | 14 +++++++-------
 drivers/scsi/libfc/fc_libfc.h |  2 +-
 include/scsi/libfc.h          |  9 +++++++--
 3 files changed, 15 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/libfc/fc_disc.c b/drivers/scsi/libfc/fc_disc.c
index d0fa9a0ddc8..04474556f2d 100644
--- a/drivers/scsi/libfc/fc_disc.c
+++ b/drivers/scsi/libfc/fc_disc.c
@@ -65,7 +65,7 @@ void fc_disc_stop_rports(struct fc_disc *disc)
 	struct fc_lport *lport;
 	struct fc_rport_priv *rdata;
 
-	lport = disc->lport;
+	lport = fc_disc_lport(disc);
 
 	mutex_lock(&disc->disc_mutex);
 	list_for_each_entry_rcu(rdata, &disc->rports, peers)
@@ -96,7 +96,7 @@ static void fc_disc_recv_rscn_req(struct fc_seq *sp, struct fc_frame *fp,
 	LIST_HEAD(disc_ports);
 	struct fc_disc_port *dp, *next;
 
-	lport = disc->lport;
+	lport = fc_disc_lport(disc);
 
 	FC_DISC_DBG(disc, "Received an RSCN event\n");
 
@@ -275,7 +275,7 @@ static void fc_disc_start(void (*disc_callback)(struct fc_lport *,
  */
 static void fc_disc_done(struct fc_disc *disc, enum fc_disc_event event)
 {
-	struct fc_lport *lport = disc->lport;
+	struct fc_lport *lport = fc_disc_lport(disc);
 	struct fc_rport_priv *rdata;
 
 	FC_DISC_DBG(disc, "Discovery complete\n");
@@ -313,7 +313,7 @@ static void fc_disc_done(struct fc_disc *disc, enum fc_disc_event event)
  */
 static void fc_disc_error(struct fc_disc *disc, struct fc_frame *fp)
 {
-	struct fc_lport *lport = disc->lport;
+	struct fc_lport *lport = fc_disc_lport(disc);
 	unsigned long delay = 0;
 
 	FC_DISC_DBG(disc, "Error %ld, retries %d/%d\n",
@@ -353,7 +353,7 @@ static void fc_disc_error(struct fc_disc *disc, struct fc_frame *fp)
 static void fc_disc_gpn_ft_req(struct fc_disc *disc)
 {
 	struct fc_frame *fp;
-	struct fc_lport *lport = disc->lport;
+	struct fc_lport *lport = fc_disc_lport(disc);
 
 	WARN_ON(!fc_lport_test_ready(lport));
 
@@ -396,7 +396,7 @@ static int fc_disc_gpn_ft_parse(struct fc_disc *disc, void *buf, size_t len)
 	struct fc_rport_identifiers ids;
 	struct fc_rport_priv *rdata;
 
-	lport = disc->lport;
+	lport = fc_disc_lport(disc);
 	disc->seq_count++;
 
 	/*
@@ -733,7 +733,7 @@ int fc_disc_init(struct fc_lport *lport)
 	mutex_init(&disc->disc_mutex);
 	INIT_LIST_HEAD(&disc->rports);
 
-	disc->lport = lport;
+	disc->priv = lport;
 
 	return 0;
 }
diff --git a/drivers/scsi/libfc/fc_libfc.h b/drivers/scsi/libfc/fc_libfc.h
index f5c0ca4b6ef..16d2162dda1 100644
--- a/drivers/scsi/libfc/fc_libfc.h
+++ b/drivers/scsi/libfc/fc_libfc.h
@@ -52,7 +52,7 @@ extern unsigned int fc_debug_logging;
 #define FC_DISC_DBG(disc, fmt, args...)				\
 	FC_CHECK_LOGGING(FC_DISC_LOGGING,			\
 			 printk(KERN_INFO "host%u: disc: " fmt,	\
-				(disc)->lport->host->host_no,	\
+				fc_disc_lport(disc)->host->host_no,	\
 				##args))
 
 #define FC_RPORT_ID_DBG(lport, port_id, fmt, args...)			\
diff --git a/include/scsi/libfc.h b/include/scsi/libfc.h
index fcbee8c38b0..5f64e593cca 100644
--- a/include/scsi/libfc.h
+++ b/include/scsi/libfc.h
@@ -739,7 +739,7 @@ struct libfc_function_template {
  * @buf_len:       Length of the discovery buffer
  * @disc_id:       Discovery ID
  * @rports:        List of discovered remote ports
- * @lport:         The local port that discovery is for
+ * @priv:          Private pointer for use by discovery code
  * @disc_mutex:    Mutex that protects the discovery context
  * @partial_buf:   Partial name buffer (if names are returned
  *                 in multiple frames)
@@ -755,7 +755,7 @@ struct fc_disc {
 	u16                   disc_id;
 
 	struct list_head      rports;
-	struct fc_lport	      *lport;
+	void		      *priv;
 	struct mutex	      disc_mutex;
 	struct fc_gpn_ft_resp partial_buf;
 	struct delayed_work   disc_work;
@@ -1003,6 +1003,11 @@ void fc_rport_terminate_io(struct fc_rport *);
  *****************************/
 int fc_disc_init(struct fc_lport *);
 
+static inline struct fc_lport *fc_disc_lport(struct fc_disc *disc)
+{
+	return container_of(disc, struct fc_lport, disc);
+}
+
 /*
  * FCP LAYER
  *****************************/
-- 
cgit v1.2.3-70-g09d2


From 3d902ac09a2812b359edf633425d1327a18399e9 Mon Sep 17 00:00:00 2001
From: Joe Eykholt <jeykholt@cisco.com>
Date: Tue, 20 Jul 2010 15:19:58 -0700
Subject: [SCSI] libfcoe: fcoe: fnic: change fcoe_ctlr_init interface to
 specify mode

There are three modes that libfcoe currently supports, and a new one
is coming.  Change the fcoe_ctlr_init() interface to add the mode
desired.  This should not change any functionality.

Signed-off-by: Joe Eykholt <jeykholt@cisco.com>
Signed-off-by: Robert Love <robert.w.love@intel.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 drivers/scsi/fcoe/fcoe.c      |  2 +-
 drivers/scsi/fcoe/libfcoe.c   |  4 ++--
 drivers/scsi/fnic/fnic_main.c |  4 ++--
 include/scsi/libfcoe.h        | 11 ++++++++++-
 4 files changed, 15 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c
index a120962b25b..9d64e08305c 100644
--- a/drivers/scsi/fcoe/fcoe.c
+++ b/drivers/scsi/fcoe/fcoe.c
@@ -357,7 +357,7 @@ static struct fcoe_interface *fcoe_interface_create(struct net_device *netdev)
 	/*
 	 * Initialize FIP.
 	 */
-	fcoe_ctlr_init(&fcoe->ctlr);
+	fcoe_ctlr_init(&fcoe->ctlr, FIP_MODE_AUTO);
 	fcoe->ctlr.send = fcoe_fip_send;
 	fcoe->ctlr.update_mac = fcoe_update_src_mac;
 	fcoe->ctlr.get_src_addr = fcoe_get_src_mac;
diff --git a/drivers/scsi/fcoe/libfcoe.c b/drivers/scsi/fcoe/libfcoe.c
index e510888e78c..76056e4c929 100644
--- a/drivers/scsi/fcoe/libfcoe.c
+++ b/drivers/scsi/fcoe/libfcoe.c
@@ -108,10 +108,10 @@ static inline int fcoe_ctlr_fcf_usable(struct fcoe_fcf *fcf)
  * fcoe_ctlr_init() - Initialize the FCoE Controller instance
  * @fip: The FCoE controller to initialize
  */
-void fcoe_ctlr_init(struct fcoe_ctlr *fip)
+void fcoe_ctlr_init(struct fcoe_ctlr *fip, enum fip_state mode)
 {
 	fip->state = FIP_ST_LINK_WAIT;
-	fip->mode = FIP_ST_AUTO;
+	fip->mode = mode;
 	INIT_LIST_HEAD(&fip->fcfs);
 	mutex_init(&fip->ctlr_mutex);
 	fip->flogi_oxid = FC_XID_UNKNOWN;
diff --git a/drivers/scsi/fnic/fnic_main.c b/drivers/scsi/fnic/fnic_main.c
index 265e73d9cd6..d0fe1c3345b 100644
--- a/drivers/scsi/fnic/fnic_main.c
+++ b/drivers/scsi/fnic/fnic_main.c
@@ -617,7 +617,6 @@ static int __devinit fnic_probe(struct pci_dev *pdev,
 	fnic->ctlr.send = fnic_eth_send;
 	fnic->ctlr.update_mac = fnic_update_mac;
 	fnic->ctlr.get_src_addr = fnic_get_mac;
-	fcoe_ctlr_init(&fnic->ctlr);
 	if (fnic->config.flags & VFCF_FIP_CAPABLE) {
 		shost_printk(KERN_INFO, fnic->lport->host,
 			     "firmware supports FIP\n");
@@ -625,10 +624,11 @@ static int __devinit fnic_probe(struct pci_dev *pdev,
 		vnic_dev_packet_filter(fnic->vdev, 1, 1, 0, 0, 0);
 		vnic_dev_add_addr(fnic->vdev, FIP_ALL_ENODE_MACS);
 		vnic_dev_add_addr(fnic->vdev, fnic->ctlr.ctl_src_addr);
+		fcoe_ctlr_init(&fnic->ctlr, FIP_MODE_AUTO);
 	} else {
 		shost_printk(KERN_INFO, fnic->lport->host,
 			     "firmware uses non-FIP mode\n");
-		fnic->ctlr.mode = FIP_ST_NON_FIP;
+		fcoe_ctlr_init(&fnic->ctlr, FIP_MODE_NON_FIP);
 	}
 	fnic->state = FNIC_IN_FC_MODE;
 
diff --git a/include/scsi/libfcoe.h b/include/scsi/libfcoe.h
index 7d18b500f2c..1a84a3182da 100644
--- a/include/scsi/libfcoe.h
+++ b/include/scsi/libfcoe.h
@@ -54,6 +54,15 @@ enum fip_state {
 	FIP_ST_ENABLED,
 };
 
+/*
+ * Modes:
+ * The mode is the state that is to be entered after link up.
+ * It must not change after fcoe_ctlr_init() sets it.
+ */
+#define FIP_MODE_AUTO		FIP_ST_AUTO
+#define FIP_MODE_NON_FIP	FIP_ST_NON_FIP
+#define FIP_MODE_FABRIC		FIP_ST_ENABLED
+
 /**
  * struct fcoe_ctlr - FCoE Controller and FIP state
  * @state:	   internal FIP state for network link and FIP or non-FIP mode.
@@ -152,7 +161,7 @@ struct fcoe_fcf {
 };
 
 /* FIP API functions */
-void fcoe_ctlr_init(struct fcoe_ctlr *);
+void fcoe_ctlr_init(struct fcoe_ctlr *, enum fip_state);
 void fcoe_ctlr_destroy(struct fcoe_ctlr *);
 void fcoe_ctlr_link_up(struct fcoe_ctlr *);
 int fcoe_ctlr_link_down(struct fcoe_ctlr *);
-- 
cgit v1.2.3-70-g09d2


From 3726f3584e113697b68d3d4ff1ecf1042a06f800 Mon Sep 17 00:00:00 2001
From: Joe Eykholt <jeykholt@cisco.com>
Date: Tue, 20 Jul 2010 15:20:03 -0700
Subject: [SCSI] libfc: Add local port point-to-multipoint flag

For VN_port to VN_port mode, the transport sets the port_id and
there's no lport FLOGI.  This is similar to FC loop mode.

Add a point_to_multipoint flag that indicates the local port is in
point-to-multipoint mode.  This skips FLOGI and discovery.
It also skips resetting the port_id on resets other than link down.

Add function fc_lport_set_local_id() that sets the local port_id.
This is called by libfcoe on behalf of the low-level driver
to set the port_id when the link comes up.

Signed-off-by: Joe Eykholt <jeykholt@cisco.com>
Signed-off-by: Robert Love <robert.w.love@intel.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 drivers/scsi/libfc/fc_lport.c | 36 +++++++++++++++++++++++++++++++++++-
 include/scsi/libfc.h          |  2 ++
 2 files changed, 37 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/scsi/libfc/fc_lport.c b/drivers/scsi/libfc/fc_lport.c
index 79c9e3ccd34..f7bff2cad4e 100644
--- a/drivers/scsi/libfc/fc_lport.c
+++ b/drivers/scsi/libfc/fc_lport.c
@@ -754,6 +754,34 @@ static void fc_lport_set_port_id(struct fc_lport *lport, u32 port_id,
 		lport->tt.lport_set_port_id(lport, port_id, fp);
 }
 
+/**
+ * fc_lport_set_port_id() - set the local port Port ID for point-to-multipoint
+ * @lport: The local port which will have its Port ID set.
+ * @port_id: The new port ID.
+ *
+ * Called by the lower-level driver when transport sets the local port_id.
+ * This is used in VN_port to VN_port mode for FCoE, and causes FLOGI and
+ * discovery to be skipped.
+ */
+void fc_lport_set_local_id(struct fc_lport *lport, u32 port_id)
+{
+	mutex_lock(&lport->lp_mutex);
+
+	fc_lport_set_port_id(lport, port_id, NULL);
+
+	switch (lport->state) {
+	case LPORT_ST_RESET:
+	case LPORT_ST_FLOGI:
+		if (port_id)
+			fc_lport_enter_ready(lport);
+		break;
+	default:
+		break;
+	}
+	mutex_unlock(&lport->lp_mutex);
+}
+EXPORT_SYMBOL(fc_lport_set_local_id);
+
 /**
  * fc_lport_recv_flogi_req() - Receive a FLOGI request
  * @sp_in: The sequence the FLOGI is on
@@ -954,7 +982,7 @@ static void fc_lport_reset_locked(struct fc_lport *lport)
 	lport->tt.exch_mgr_reset(lport, 0, 0);
 	fc_host_fabric_name(lport->host) = 0;
 
-	if (lport->port_id)
+	if (lport->port_id && (!lport->point_to_multipoint || !lport->link_up))
 		fc_lport_set_port_id(lport, 0, NULL);
 }
 
@@ -1536,6 +1564,12 @@ void fc_lport_enter_flogi(struct fc_lport *lport)
 
 	fc_lport_state_enter(lport, LPORT_ST_FLOGI);
 
+	if (lport->point_to_multipoint) {
+		if (lport->port_id)
+			fc_lport_enter_ready(lport);
+		return;
+	}
+
 	fp = fc_frame_alloc(lport, sizeof(struct fc_els_flogi));
 	if (!fp)
 		return fc_lport_error(lport, fp);
diff --git a/include/scsi/libfc.h b/include/scsi/libfc.h
index 5f64e593cca..bd0560509ce 100644
--- a/include/scsi/libfc.h
+++ b/include/scsi/libfc.h
@@ -846,6 +846,7 @@ struct fc_lport {
 	u32			       lro_enabled:1;
 	u32			       does_npiv:1;
 	u32			       npiv_enabled:1;
+	u32			       point_to_multipoint:1;
 	u32			       mfs;
 	u8			       max_retry_count;
 	u8			       max_rport_retry_count;
@@ -991,6 +992,7 @@ int fc_set_mfs(struct fc_lport *, u32 mfs);
 struct fc_lport *libfc_vport_create(struct fc_vport *, int privsize);
 struct fc_lport *fc_vport_id_lookup(struct fc_lport *, u32 port_id);
 int fc_lport_bsg_request(struct fc_bsg_job *);
+void fc_lport_set_local_id(struct fc_lport *, u32 port_id);
 
 /*
  * REMOTE PORT LAYER
-- 
cgit v1.2.3-70-g09d2


From a7b12a279faaad26837276065104a1f9cf60e962 Mon Sep 17 00:00:00 2001
From: Joe Eykholt <jeykholt@cisco.com>
Date: Tue, 20 Jul 2010 15:20:08 -0700
Subject: [SCSI] libfc: add FLOGI state to rport for VN2VN

The FIP proposal for VN_port to VN_port point-to-multipoint
operation requires a FLOGI be sent to each remote port.
The FLOGI is sent with the assigned S_ID and D_IDs of the
local and remote ports.  This and the response get
FIP-encapsulated for Ethernet.

Add FLOGI state to the remote port state machine.
This will be skipped if not in point-to-multipoint mode.

To reduce a little duplication between PLOGI and FLOGI
response handling, added fc_rport_login_complete(), which
handles the parameters for the rdata struct.

Signed-off-by: Joe Eykholt <jeykholt@cisco.com>
Signed-off-by: Robert Love <robert.w.love@intel.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 drivers/scsi/libfc/fc_lport.c |   4 +-
 drivers/scsi/libfc/fc_rport.c | 290 ++++++++++++++++++++++++++++++++++++++++--
 include/scsi/fc/fc_els.h      |   2 +
 include/scsi/libfc.h          |   4 +
 4 files changed, 286 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/libfc/fc_lport.c b/drivers/scsi/libfc/fc_lport.c
index f7bff2cad4e..ec9850c4617 100644
--- a/drivers/scsi/libfc/fc_lport.c
+++ b/drivers/scsi/libfc/fc_lport.c
@@ -906,10 +906,10 @@ static void fc_lport_recv_req(struct fc_lport *lport, struct fc_seq *sp,
 		recv = lport->tt.rport_recv_req;
 		switch (fc_frame_payload_op(fp)) {
 		case ELS_FLOGI:
-			recv = fc_lport_recv_flogi_req;
+			if (!lport->point_to_multipoint)
+				recv = fc_lport_recv_flogi_req;
 			break;
 		case ELS_LOGO:
-			fh = fc_frame_header_get(fp);
 			if (ntoh24(fh->fh_s_id) == FC_FID_FLOGI)
 				recv = fc_lport_recv_logo_req;
 			break;
diff --git a/drivers/scsi/libfc/fc_rport.c b/drivers/scsi/libfc/fc_rport.c
index 6d68482649c..4d6adf29b4f 100644
--- a/drivers/scsi/libfc/fc_rport.c
+++ b/drivers/scsi/libfc/fc_rport.c
@@ -60,6 +60,7 @@
 
 struct workqueue_struct *rport_event_queue;
 
+static void fc_rport_enter_flogi(struct fc_rport_priv *);
 static void fc_rport_enter_plogi(struct fc_rport_priv *);
 static void fc_rport_enter_prli(struct fc_rport_priv *);
 static void fc_rport_enter_rtv(struct fc_rport_priv *);
@@ -82,6 +83,8 @@ static void fc_rport_work(struct work_struct *);
 
 static const char *fc_rport_state_names[] = {
 	[RPORT_ST_INIT] = "Init",
+	[RPORT_ST_FLOGI] = "FLOGI",
+	[RPORT_ST_PLOGI_WAIT] = "PLOGI_WAIT",
 	[RPORT_ST_PLOGI] = "PLOGI",
 	[RPORT_ST_PRLI] = "PRLI",
 	[RPORT_ST_RTV] = "RTV",
@@ -207,7 +210,7 @@ EXPORT_SYMBOL(fc_set_rport_loss_tmo);
 /**
  * fc_plogi_get_maxframe() - Get the maximum payload from the common service
  *			     parameters in a FLOGI frame
- * @flp:    The FLOGI payload
+ * @flp:    The FLOGI or PLOGI payload
  * @maxval: The maximum frame size upper limit; this may be less than what
  *	    is in the service parameters
  */
@@ -344,7 +347,7 @@ static void fc_rport_work(struct work_struct *work)
 				rdata->major_retries++;
 				rdata->event = RPORT_EV_NONE;
 				FC_RPORT_DBG(rdata, "work restart\n");
-				fc_rport_enter_plogi(rdata);
+				fc_rport_enter_flogi(rdata);
 				mutex_unlock(&rdata->rp_mutex);
 			} else {
 				FC_RPORT_DBG(rdata, "work delete\n");
@@ -397,7 +400,7 @@ int fc_rport_login(struct fc_rport_priv *rdata)
 		break;
 	default:
 		FC_RPORT_DBG(rdata, "Login to port\n");
-		fc_rport_enter_plogi(rdata);
+		fc_rport_enter_flogi(rdata);
 		break;
 	}
 	mutex_unlock(&rdata->rp_mutex);
@@ -499,6 +502,9 @@ static void fc_rport_timeout(struct work_struct *work)
 	mutex_lock(&rdata->rp_mutex);
 
 	switch (rdata->rp_state) {
+	case RPORT_ST_FLOGI:
+		fc_rport_enter_flogi(rdata);
+		break;
 	case RPORT_ST_PLOGI:
 		fc_rport_enter_plogi(rdata);
 		break;
@@ -514,6 +520,7 @@ static void fc_rport_timeout(struct work_struct *work)
 	case RPORT_ST_ADISC:
 		fc_rport_enter_adisc(rdata);
 		break;
+	case RPORT_ST_PLOGI_WAIT:
 	case RPORT_ST_READY:
 	case RPORT_ST_INIT:
 	case RPORT_ST_DELETE:
@@ -538,6 +545,7 @@ static void fc_rport_error(struct fc_rport_priv *rdata, struct fc_frame *fp)
 		     fc_rport_state(rdata), rdata->retries);
 
 	switch (rdata->rp_state) {
+	case RPORT_ST_FLOGI:
 	case RPORT_ST_PLOGI:
 	case RPORT_ST_LOGO:
 		rdata->flags &= ~FC_RP_STARTED;
@@ -550,6 +558,7 @@ static void fc_rport_error(struct fc_rport_priv *rdata, struct fc_frame *fp)
 	case RPORT_ST_ADISC:
 		fc_rport_enter_logo(rdata);
 		break;
+	case RPORT_ST_PLOGI_WAIT:
 	case RPORT_ST_DELETE:
 	case RPORT_ST_READY:
 	case RPORT_ST_INIT:
@@ -592,7 +601,260 @@ static void fc_rport_error_retry(struct fc_rport_priv *rdata,
 }
 
 /**
- * fc_rport_plogi_recv_resp() - Handler for ELS PLOGI responses
+ * fc_rport_login_complete() - Handle parameters and completion of p-mp login.
+ * @rdata:  The remote port which we logged into or which logged into us.
+ * @fp:     The FLOGI or PLOGI request or response frame
+ *
+ * Returns non-zero error if a problem is detected with the frame.
+ * Does not free the frame.
+ *
+ * This is only used in point-to-multipoint mode for FIP currently.
+ */
+static int fc_rport_login_complete(struct fc_rport_priv *rdata,
+				   struct fc_frame *fp)
+{
+	struct fc_lport *lport = rdata->local_port;
+	struct fc_els_flogi *flogi;
+	unsigned int e_d_tov;
+	u16 csp_flags;
+
+	flogi = fc_frame_payload_get(fp, sizeof(*flogi));
+	if (!flogi)
+		return -EINVAL;
+
+	csp_flags = ntohs(flogi->fl_csp.sp_features);
+
+	if (fc_frame_payload_op(fp) == ELS_FLOGI) {
+		if (csp_flags & FC_SP_FT_FPORT) {
+			FC_RPORT_DBG(rdata, "Fabric bit set in FLOGI\n");
+			return -EINVAL;
+		}
+	} else {
+
+		/*
+		 * E_D_TOV is not valid on an incoming FLOGI request.
+		 */
+		e_d_tov = ntohl(flogi->fl_csp.sp_e_d_tov);
+		if (csp_flags & FC_SP_FT_EDTR)
+			e_d_tov /= 1000000;
+		if (e_d_tov > rdata->e_d_tov)
+			rdata->e_d_tov = e_d_tov;
+	}
+	rdata->maxframe_size = fc_plogi_get_maxframe(flogi, lport->mfs);
+	return 0;
+}
+
+/**
+ * fc_rport_flogi_resp() - Handle response to FLOGI request for p-mp mode
+ * @sp:	    The sequence that the FLOGI was on
+ * @fp:	    The FLOGI response frame
+ * @rp_arg: The remote port that received the FLOGI response
+ */
+void fc_rport_flogi_resp(struct fc_seq *sp, struct fc_frame *fp,
+			 void *rp_arg)
+{
+	struct fc_rport_priv *rdata = rp_arg;
+	struct fc_lport *lport = rdata->local_port;
+	struct fc_els_flogi *flogi;
+	unsigned int r_a_tov;
+
+	FC_RPORT_DBG(rdata, "Received a FLOGI %s\n", fc_els_resp_type(fp));
+
+	if (fp == ERR_PTR(-FC_EX_CLOSED))
+		return;
+
+	mutex_lock(&rdata->rp_mutex);
+
+	if (rdata->rp_state != RPORT_ST_FLOGI) {
+		FC_RPORT_DBG(rdata, "Received a FLOGI response, but in state "
+			     "%s\n", fc_rport_state(rdata));
+		if (IS_ERR(fp))
+			goto err;
+		goto out;
+	}
+
+	if (IS_ERR(fp)) {
+		fc_rport_error(rdata, fp);
+		goto err;
+	}
+
+	if (fc_frame_payload_op(fp) != ELS_LS_ACC)
+		goto bad;
+	if (fc_rport_login_complete(rdata, fp))
+		goto bad;
+
+	flogi = fc_frame_payload_get(fp, sizeof(*flogi));
+	if (!flogi)
+		goto bad;
+	r_a_tov = ntohl(flogi->fl_csp.sp_r_a_tov);
+	if (r_a_tov > rdata->r_a_tov)
+		rdata->r_a_tov = r_a_tov;
+
+	if (rdata->ids.port_name < lport->wwpn)
+		fc_rport_enter_plogi(rdata);
+	else
+		fc_rport_state_enter(rdata, RPORT_ST_PLOGI_WAIT);
+out:
+	fc_frame_free(fp);
+err:
+	mutex_unlock(&rdata->rp_mutex);
+	kref_put(&rdata->kref, rdata->local_port->tt.rport_destroy);
+	return;
+bad:
+	FC_RPORT_DBG(rdata, "Bad FLOGI response\n");
+	fc_rport_error_retry(rdata, fp);
+	goto out;
+}
+
+/**
+ * fc_rport_enter_flogi() - Send a FLOGI request to the remote port for p-mp
+ * @rdata: The remote port to send a FLOGI to
+ *
+ * Locking Note: The rport lock is expected to be held before calling
+ * this routine.
+ */
+static void fc_rport_enter_flogi(struct fc_rport_priv *rdata)
+{
+	struct fc_lport *lport = rdata->local_port;
+	struct fc_frame *fp;
+
+	if (!lport->point_to_multipoint)
+		return fc_rport_enter_plogi(rdata);
+
+	FC_RPORT_DBG(rdata, "Entered FLOGI state from %s state\n",
+		     fc_rport_state(rdata));
+
+	fc_rport_state_enter(rdata, RPORT_ST_FLOGI);
+
+	fp = fc_frame_alloc(lport, sizeof(struct fc_els_flogi));
+	if (!fp)
+		return fc_rport_error_retry(rdata, fp);
+
+	if (!lport->tt.elsct_send(lport, rdata->ids.port_id, fp, ELS_FLOGI,
+				  fc_rport_flogi_resp, rdata,
+				  2 * lport->r_a_tov))
+		fc_rport_error_retry(rdata, NULL);
+	else
+		kref_get(&rdata->kref);
+}
+
+/**
+ * fc_rport_recv_flogi_req() - Handle Fabric Login (FLOGI) request in p-mp mode
+ * @lport: The local port that received the PLOGI request
+ * @sp:	   The sequence that the PLOGI request was on
+ * @rx_fp: The PLOGI request frame
+ */
+static void fc_rport_recv_flogi_req(struct fc_lport *lport,
+				    struct fc_seq *sp, struct fc_frame *rx_fp)
+{
+	struct fc_disc *disc;
+	struct fc_els_flogi *flp;
+	struct fc_rport_priv *rdata;
+	struct fc_frame *fp = rx_fp;
+	struct fc_exch *ep;
+	struct fc_frame_header *fh;
+	struct fc_seq_els_data rjt_data;
+	u32 sid, f_ctl;
+
+	rjt_data.fp = NULL;
+	fh = fc_frame_header_get(fp);
+	sid = ntoh24(fh->fh_s_id);
+
+	FC_RPORT_ID_DBG(lport, sid, "Received FLOGI request\n");
+
+	disc = &lport->disc;
+	mutex_lock(&disc->disc_mutex);
+
+	if (!lport->point_to_multipoint) {
+		rjt_data.reason = ELS_RJT_UNSUP;
+		rjt_data.explan = ELS_EXPL_NONE;
+		goto reject;
+	}
+
+	flp = fc_frame_payload_get(fp, sizeof(*flp));
+	if (!flp) {
+		rjt_data.reason = ELS_RJT_LOGIC;
+		rjt_data.explan = ELS_EXPL_INV_LEN;
+		goto reject;
+	}
+
+	rdata = lport->tt.rport_lookup(lport, sid);
+	if (!rdata) {
+		rjt_data.reason = ELS_RJT_FIP;
+		rjt_data.explan = ELS_EXPL_NOT_NEIGHBOR;
+		goto reject;
+	}
+	mutex_lock(&rdata->rp_mutex);
+
+	FC_RPORT_DBG(rdata, "Received FLOGI in %s state\n",
+		     fc_rport_state(rdata));
+
+	switch (rdata->rp_state) {
+	case RPORT_ST_INIT:
+	case RPORT_ST_LOGO:
+	case RPORT_ST_DELETE:
+		mutex_unlock(&rdata->rp_mutex);
+		rjt_data.reason = ELS_RJT_FIP;
+		rjt_data.explan = ELS_EXPL_NOT_NEIGHBOR;
+		goto reject;
+	case RPORT_ST_FLOGI:
+	case RPORT_ST_PLOGI_WAIT:
+	case RPORT_ST_PLOGI:
+		break;
+	case RPORT_ST_PRLI:
+	case RPORT_ST_RTV:
+	case RPORT_ST_READY:
+	case RPORT_ST_ADISC:
+		/*
+		 * Set the remote port to be deleted and to then restart.
+		 * This queues work to be sure exchanges are reset.
+		 */
+		fc_rport_enter_delete(rdata, RPORT_EV_LOGO);
+		mutex_unlock(&rdata->rp_mutex);
+		rjt_data.reason = ELS_RJT_BUSY;
+		rjt_data.explan = ELS_EXPL_NONE;
+		goto reject;
+	}
+	if (fc_rport_login_complete(rdata, fp)) {
+		mutex_unlock(&rdata->rp_mutex);
+		rjt_data.reason = ELS_RJT_LOGIC;
+		rjt_data.explan = ELS_EXPL_NONE;
+		goto reject;
+	}
+	fc_frame_free(rx_fp);
+
+	fp = fc_frame_alloc(lport, sizeof(*flp));
+	if (!fp)
+		goto out;
+
+	sp = lport->tt.seq_start_next(sp);
+	fc_flogi_fill(lport, fp);
+	flp = fc_frame_payload_get(fp, sizeof(*flp));
+	flp->fl_cmd = ELS_LS_ACC;
+
+	f_ctl = FC_FC_EX_CTX | FC_FC_LAST_SEQ | FC_FC_END_SEQ | FC_FC_SEQ_INIT;
+	ep = fc_seq_exch(sp);
+	fc_fill_fc_hdr(fp, FC_RCTL_ELS_REP, ep->did, ep->sid,
+		       FC_TYPE_ELS, f_ctl, 0);
+	lport->tt.seq_send(lport, sp, fp);
+
+	if (rdata->ids.port_name < lport->wwpn)
+		fc_rport_enter_plogi(rdata);
+	else
+		fc_rport_state_enter(rdata, RPORT_ST_PLOGI_WAIT);
+out:
+	mutex_unlock(&rdata->rp_mutex);
+	mutex_unlock(&disc->disc_mutex);
+	return;
+
+reject:
+	mutex_unlock(&disc->disc_mutex);
+	lport->tt.seq_els_rsp_send(sp, ELS_LS_RJT, &rjt_data);
+	fc_frame_free(fp);
+}
+
+/**
+ * fc_rport_plogi_resp() - Handler for ELS PLOGI responses
  * @sp:	       The sequence the PLOGI is on
  * @fp:	       The PLOGI response frame
  * @rdata_arg: The remote port that sent the PLOGI response
@@ -607,7 +869,6 @@ static void fc_rport_plogi_resp(struct fc_seq *sp, struct fc_frame *fp,
 	struct fc_rport_priv *rdata = rdata_arg;
 	struct fc_lport *lport = rdata->local_port;
 	struct fc_els_flogi *plp = NULL;
-	unsigned int tov;
 	u16 csp_seq;
 	u16 cssp_seq;
 	u8 op;
@@ -635,11 +896,8 @@ static void fc_rport_plogi_resp(struct fc_seq *sp, struct fc_frame *fp,
 		rdata->ids.port_name = get_unaligned_be64(&plp->fl_wwpn);
 		rdata->ids.node_name = get_unaligned_be64(&plp->fl_wwnn);
 
-		tov = ntohl(plp->fl_csp.sp_e_d_tov);
-		if (ntohs(plp->fl_csp.sp_features) & FC_SP_FT_EDTR)
-			tov /= 1000000;
-		if (tov > rdata->e_d_tov)
-			rdata->e_d_tov = tov;
+		if (lport->point_to_multipoint)
+			fc_rport_login_complete(rdata, fp);
 		csp_seq = ntohs(plp->fl_csp.sp_tot_seq);
 		cssp_seq = ntohs(plp->fl_cssp[3 - 1].cp_con_seq);
 		if (cssp_seq < csp_seq)
@@ -677,6 +935,7 @@ static void fc_rport_enter_plogi(struct fc_rport_priv *rdata)
 	rdata->maxframe_size = FC_MIN_MAX_PAYLOAD;
 	fp = fc_frame_alloc(lport, sizeof(struct fc_els_flogi));
 	if (!fp) {
+		FC_RPORT_DBG(rdata, "%s frame alloc failed\n", __func__);
 		fc_rport_error_retry(rdata, fp);
 		return;
 	}
@@ -1041,7 +1300,7 @@ static void fc_rport_adisc_resp(struct fc_seq *sp, struct fc_frame *fp,
 	    get_unaligned_be64(&adisc->adisc_wwpn) != rdata->ids.port_name ||
 	    get_unaligned_be64(&adisc->adisc_wwnn) != rdata->ids.node_name) {
 		FC_RPORT_DBG(rdata, "ADISC error or mismatch\n");
-		fc_rport_enter_plogi(rdata);
+		fc_rport_enter_flogi(rdata);
 	} else {
 		FC_RPORT_DBG(rdata, "ADISC OK\n");
 		fc_rport_enter_ready(rdata);
@@ -1291,12 +1550,15 @@ void fc_rport_recv_req(struct fc_seq *sp, struct fc_frame *fp,
 	struct fc_seq_els_data els_data;
 
 	/*
-	 * Handle PLOGI and LOGO requests separately, since they
+	 * Handle FLOGI, PLOGI and LOGO requests separately, since they
 	 * don't require prior login.
 	 * Check for unsupported opcodes first and reject them.
 	 * For some ops, it would be incorrect to reject with "PLOGI required".
 	 */
 	switch (fc_frame_payload_op(fp)) {
+	case ELS_FLOGI:
+		fc_rport_recv_flogi_req(lport, sp, fp);
+		break;
 	case ELS_PLOGI:
 		fc_rport_recv_plogi_req(lport, sp, fp);
 		break;
@@ -1386,6 +1648,9 @@ static void fc_rport_recv_plogi_req(struct fc_lport *lport,
 	case RPORT_ST_INIT:
 		FC_RPORT_DBG(rdata, "Received PLOGI in INIT state\n");
 		break;
+	case RPORT_ST_PLOGI_WAIT:
+		FC_RPORT_DBG(rdata, "Received PLOGI in PLOGI_WAIT state\n");
+		break;
 	case RPORT_ST_PLOGI:
 		FC_RPORT_DBG(rdata, "Received PLOGI in PLOGI state\n");
 		if (rdata->ids.port_name < lport->wwpn) {
@@ -1403,6 +1668,7 @@ static void fc_rport_recv_plogi_req(struct fc_lport *lport,
 			     "- ignored for now\n", rdata->rp_state);
 		/* XXX TBD - should reset */
 		break;
+	case RPORT_ST_FLOGI:
 	case RPORT_ST_DELETE:
 	case RPORT_ST_LOGO:
 		FC_RPORT_DBG(rdata, "Received PLOGI in state %s - send busy\n",
diff --git a/include/scsi/fc/fc_els.h b/include/scsi/fc/fc_els.h
index 70a7e92a766..481abbd48e3 100644
--- a/include/scsi/fc/fc_els.h
+++ b/include/scsi/fc/fc_els.h
@@ -191,6 +191,7 @@ enum fc_els_rjt_reason {
 	ELS_RJT_UNAB =		0x09,	/* unable to perform command request */
 	ELS_RJT_UNSUP =		0x0b,	/* command not supported */
 	ELS_RJT_INPROG =	0x0e,	/* command already in progress */
+	ELS_RJT_FIP =		0x20,	/* FIP error */
 	ELS_RJT_VENDOR =	0xff,	/* vendor specific error */
 };
 
@@ -212,6 +213,7 @@ enum fc_els_rjt_explan {
 	ELS_EXPL_UNAB_DATA =	0x2a,	/* unable to supply requested data */
 	ELS_EXPL_UNSUPR =	0x2c,	/* Request not supported */
 	ELS_EXPL_INV_LEN =	0x2d,	/* Invalid payload length */
+	ELS_EXPL_NOT_NEIGHBOR = 0x62,	/* VN2VN_Port not in neighbor set */
 	/* TBD - above definitions incomplete */
 };
 
diff --git a/include/scsi/libfc.h b/include/scsi/libfc.h
index bd0560509ce..24b91c92205 100644
--- a/include/scsi/libfc.h
+++ b/include/scsi/libfc.h
@@ -97,6 +97,8 @@ enum fc_disc_event {
 /**
  * enum fc_rport_state - Remote port states
  * @RPORT_ST_INIT:    Initialized
+ * @RPORT_ST_FLOGI:   Waiting for FLOGI completion for point-to-multipoint
+ * @RPORT_ST_PLOGI_WAIT:   Waiting for peer to login for point-to-multipoint
  * @RPORT_ST_PLOGI:   Waiting for PLOGI completion
  * @RPORT_ST_PRLI:    Waiting for PRLI completion
  * @RPORT_ST_RTV:     Waiting for RTV completion
@@ -107,6 +109,8 @@ enum fc_disc_event {
 */
 enum fc_rport_state {
 	RPORT_ST_INIT,
+	RPORT_ST_FLOGI,
+	RPORT_ST_PLOGI_WAIT,
 	RPORT_ST_PLOGI,
 	RPORT_ST_PRLI,
 	RPORT_ST_RTV,
-- 
cgit v1.2.3-70-g09d2


From f60e12e9c778c8256a646f80603d1b88ba5ce891 Mon Sep 17 00:00:00 2001
From: Joe Eykholt <jeykholt@cisco.com>
Date: Tue, 20 Jul 2010 15:20:14 -0700
Subject: [SCSI] libfc: track FIP exchanges

When an exchange is received with a FIP encapsulation, we need
to know that the response must be sent via FIP and what the original
ELS opcode was.  This becomes important for VN2VN mode, where we may
receive FLOGI or LOGO from several peer VN_ports, and the LS_ACC or
LS_RJT must be sent FIP-encapsulated with the correct sub-type.

Add a field to the struct fc_frame, fr_encaps, to indicate the
encapsulation values.  That term is chosen to be neutral and
LLD-agnostic in case non-FCoE/FIP LLDs might find it useful.

The frame fr_encaps is transferred from the ingress frame to the
exchange by fc_exch_recv_req(), and back to the outgoing frame
by fc_seq_send().

This is taking the last byte in the skb->cb array.  If needed,
we could combine the info in sof, eof, flags, and encaps
together into one field, but it'd be better to do that if
and when its needed.

Signed-off-by: Joe Eykholt <jeykholt@cisco.com>
Signed-off-by: Robert Love <robert.w.love@intel.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 drivers/scsi/libfc/fc_exch.c | 2 ++
 include/scsi/fc_frame.h      | 3 +++
 include/scsi/libfc.h         | 2 ++
 3 files changed, 7 insertions(+)

(limited to 'include')

diff --git a/drivers/scsi/libfc/fc_exch.c b/drivers/scsi/libfc/fc_exch.c
index 104e0fba7c4..61eabd3ce43 100644
--- a/drivers/scsi/libfc/fc_exch.c
+++ b/drivers/scsi/libfc/fc_exch.c
@@ -464,6 +464,7 @@ static int fc_seq_send(struct fc_lport *lport, struct fc_seq *sp,
 
 	f_ctl = ntoh24(fh->fh_f_ctl);
 	fc_exch_setup_hdr(ep, fp, f_ctl);
+	fr_encaps(fp) = ep->encaps;
 
 	/*
 	 * update sequence count if this frame is carrying
@@ -1259,6 +1260,7 @@ static void fc_exch_recv_req(struct fc_lport *lport, struct fc_exch_mgr *mp,
 		sp = fr_seq(fp);	/* sequence will be held */
 		ep = fc_seq_exch(sp);
 		fc_seq_send_ack(sp, fp);
+		ep->encaps = fr_encaps(fp);
 
 		/*
 		 * Call the receive function.
diff --git a/include/scsi/fc_frame.h b/include/scsi/fc_frame.h
index 15427fab8a5..29dd97d5b53 100644
--- a/include/scsi/fc_frame.h
+++ b/include/scsi/fc_frame.h
@@ -51,6 +51,7 @@
 #define fr_sof(fp)	(fr_cb(fp)->fr_sof)
 #define fr_eof(fp)	(fr_cb(fp)->fr_eof)
 #define fr_flags(fp)	(fr_cb(fp)->fr_flags)
+#define fr_encaps(fp)	(fr_cb(fp)->fr_encaps)
 #define fr_max_payload(fp)	(fr_cb(fp)->fr_max_payload)
 #define fr_fsp(fp)	(fr_cb(fp)->fr_fsp)
 #define fr_crc(fp)	(fr_cb(fp)->fr_crc)
@@ -69,6 +70,7 @@ struct fcoe_rcv_info {
 	u8		fr_sof;		/* start of frame delimiter */
 	u8		fr_eof;		/* end of frame delimiter */
 	u8		fr_flags;	/* flags - see below */
+	u8		fr_encaps;	/* LLD encapsulation info (e.g. FIP) */
 	u8		granted_mac[ETH_ALEN]; /* FCoE MAC address */
 };
 
@@ -97,6 +99,7 @@ static inline void fc_frame_init(struct fc_frame *fp)
 	fr_dev(fp) = NULL;
 	fr_seq(fp) = NULL;
 	fr_flags(fp) = 0;
+	fr_encaps(fp) = 0;
 }
 
 struct fc_frame *fc_frame_alloc_fill(struct fc_lport *, size_t payload_len);
diff --git a/include/scsi/libfc.h b/include/scsi/libfc.h
index 24b91c92205..8d297f9a0a4 100644
--- a/include/scsi/libfc.h
+++ b/include/scsi/libfc.h
@@ -412,6 +412,7 @@ struct fc_seq {
  * @esb_stat:     ESB exchange status
  * @r_a_tov:      Resouce allocation time out value (in msecs)
  * @seq_id:       The next sequence ID to use
+ * @encaps:       encapsulation information for lower-level driver
  * @f_ctl:        F_CTL flags for the sequence
  * @fh_type:      The frame type
  * @class:        The class of service
@@ -443,6 +444,7 @@ struct fc_exch {
 	u32		    esb_stat;
 	u32		    r_a_tov;
 	u8		    seq_id;
+	u8		    encaps;
 	u32		    f_ctl;
 	u8		    fh_type;
 	enum fc_class	    class;
-- 
cgit v1.2.3-70-g09d2


From edcbb4395ecd2f2731fbf38ecbff5be0316513cb Mon Sep 17 00:00:00 2001
From: Joe Eykholt <jeykholt@cisco.com>
Date: Tue, 20 Jul 2010 15:20:19 -0700
Subject: [SCSI] libfcoe: add protocol description of FIP VN2VN mode

The FC-BB-6 committee is proposing a new FIP usage model called
VN_port to VN_port mode.  It allows VN_ports to discover each other
over a loss-free L2 Ethernet without any FCF or Fibre-channel fabric
services.  This is point-to-multipoint.  There is also a variant
of this called point-to-point which provides for making sure there
is just one pair of ports operating over the Ethernet fabric.

This patch defines the new message type and subtypes as well as
one new descriptor type used by VN2VN mode.

These are all still at the proposed stage and subject to change.

Signed-off-by: Joe Eykholt <jeykholt@cisco.com>
Signed-off-by: Robert Love <robert.w.love@intel.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 include/scsi/fc/fc_fip.h | 46 +++++++++++++++++++++++++++++++++++++++++++---
 include/scsi/fc/fc_ns.h  |  7 +++++++
 2 files changed, 50 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/scsi/fc/fc_fip.h b/include/scsi/fc/fc_fip.h
index 17baa19380f..ae25d4ab254 100644
--- a/include/scsi/fc/fc_fip.h
+++ b/include/scsi/fc/fc_fip.h
@@ -17,9 +17,12 @@
 #ifndef _FC_FIP_H_
 #define _FC_FIP_H_
 
+#include <scsi/fc/fc_ns.h>
+
 /*
  * This version is based on:
  * http://www.t11.org/ftp/t11/pub/fc/bb-5/08-543v1.pdf
+ * and T11 FC-BB-6 10-019v4.pdf (June 2010 VN2VN proposal)
  */
 
 #define FIP_DEF_PRI	128	/* default selection priority */
@@ -28,12 +31,25 @@
 #define FIP_VN_KA_PERIOD 90000	/* required VN_port keep-alive period (mS) */
 #define FIP_FCF_FUZZ	100	/* random time added by FCF (mS) */
 
+/*
+ * VN2VN proposed-standard values.
+ */
+#define FIP_VN_FC_MAP	0x0efd00 /* MAC OUI for VN2VN use */
+#define FIP_VN_PROBE_WAIT 100	/* interval between VN2VN probes (ms) */
+#define FIP_VN_ANN_WAIT 400	/* interval between VN2VN announcements (ms) */
+#define FIP_VN_RLIM_INT 10000	/* interval between probes when rate limited */
+#define FIP_VN_RLIM_COUNT 10	/* number of probes before rate limiting */
+#define FIP_VN_BEACON_INT 8000	/* interval between VN2VN beacons */
+#define FIP_VN_BEACON_FUZZ 100	/* random time to add to beacon period (ms) */
+
 /*
  * Multicast MAC addresses.  T11-adopted.
  */
-#define FIP_ALL_FCOE_MACS	((u8[6]) { 1, 0x10, 0x18, 1, 0, 0 })
-#define FIP_ALL_ENODE_MACS	((u8[6]) { 1, 0x10, 0x18, 1, 0, 1 })
-#define FIP_ALL_FCF_MACS	((u8[6]) { 1, 0x10, 0x18, 1, 0, 2 })
+#define FIP_ALL_FCOE_MACS	((__u8[6]) { 1, 0x10, 0x18, 1, 0, 0 })
+#define FIP_ALL_ENODE_MACS	((__u8[6]) { 1, 0x10, 0x18, 1, 0, 1 })
+#define FIP_ALL_FCF_MACS	((__u8[6]) { 1, 0x10, 0x18, 1, 0, 2 })
+#define FIP_ALL_VN2VN_MACS	((__u8[6]) { 1, 0x10, 0x18, 1, 0, 4 })
+#define FIP_ALL_P2P_MACS	((__u8[6]) { 1, 0x10, 0x18, 1, 0, 5 })
 
 #define FIP_VER		1		/* version for fip_header */
 
@@ -60,6 +76,7 @@ enum fip_opcode {
 	FIP_OP_LS =	2,		/* Link Service request or reply */
 	FIP_OP_CTRL =	3,		/* Keep Alive / Link Reset */
 	FIP_OP_VLAN =	4,		/* VLAN discovery */
+	FIP_OP_VN2VN =	5,		/* VN2VN operation */
 	FIP_OP_VENDOR_MIN = 0xfff8,	/* min vendor-specific opcode */
 	FIP_OP_VENDOR_MAX = 0xfffe,	/* max vendor-specific opcode */
 };
@@ -96,12 +113,24 @@ enum fip_vlan_subcode {
 	FIP_SC_VL_REP =	2,		/* reply */
 };
 
+/*
+ * Subcodes for FIP_OP_VN2VN.
+ */
+enum fip_vn2vn_subcode {
+	FIP_SC_VN_PROBE_REQ = 1,	/* probe request */
+	FIP_SC_VN_PROBE_REP = 2,	/* probe reply */
+	FIP_SC_VN_CLAIM_NOTIFY = 3,	/* claim notification */
+	FIP_SC_VN_CLAIM_REP = 4,	/* claim response */
+	FIP_SC_VN_BEACON = 5,		/* beacon */
+};
+
 /*
  * flags in header fip_flags.
  */
 enum fip_flag {
 	FIP_FL_FPMA =	0x8000,		/* supports FPMA fabric-provided MACs */
 	FIP_FL_SPMA =	0x4000,		/* supports SPMA server-provided MACs */
+	FIP_FL_REC_OR_P2P = 0x0008,	/* configured addr or point-to-point */
 	FIP_FL_AVAIL =	0x0004,		/* available for FLOGI/ELP */
 	FIP_FL_SOL =	0x0002,		/* this is a solicited message */
 	FIP_FL_FPORT =	0x0001,		/* sent from an F port */
@@ -130,6 +159,7 @@ enum fip_desc_type {
 	FIP_DT_FKA =	12,		/* advertisement keep-alive period */
 	FIP_DT_VENDOR =	13,		/* vendor ID */
 	FIP_DT_VLAN =	14,		/* vlan number */
+	FIP_DT_FC4F =	15,		/* FC-4 features */
 	FIP_DT_LIMIT,			/* max defined desc_type + 1 */
 	FIP_DT_VENDOR_BASE = 128,	/* first vendor-specific desc_type */
 };
@@ -228,6 +258,16 @@ enum fip_fka_flags {
 
 /* FIP_DT_FKA flags */
 
+/*
+ * FIP_DT_FC4F - FC-4 features.
+ */
+struct fip_fc4_feat {
+	struct fip_desc fd_desc;
+	__u8		fd_resvd[2];
+	struct fc_ns_fts fd_fts;
+	struct fc_ns_ff	fd_ff;
+} __attribute__((packed));
+
 /*
  * FIP_DT_VENDOR descriptor.
  */
diff --git a/include/scsi/fc/fc_ns.h b/include/scsi/fc/fc_ns.h
index e7d3ac497d7..185015dd116 100644
--- a/include/scsi/fc/fc_ns.h
+++ b/include/scsi/fc/fc_ns.h
@@ -99,6 +99,13 @@ struct fc_ns_fts {
 	__be32	ff_type_map[FC_NS_TYPES / FC_NS_BPW]; /* bitmap of FC-4 types */
 };
 
+/*
+ * FC4-features object.
+ */
+struct fc_ns_ff	{
+	__be32	fd_feat[FC_NS_TYPES * 4 / FC_NS_BPW]; /* 4-bits per FC-type */
+};
+
 /*
  * GID_PT request.
  */
-- 
cgit v1.2.3-70-g09d2


From e10f8c667b874a57512c936089092a3d1ef7ab8a Mon Sep 17 00:00:00 2001
From: Joe Eykholt <jeykholt@cisco.com>
Date: Tue, 20 Jul 2010 15:20:30 -0700
Subject: [SCSI] libfcoe: fcoe: fnic: add FIP VN2VN point-to-multipoint support

The FC-BB-6 committee is proposing a new FIP usage model called
VN_port to VN_port mode.  It allows VN_ports to discover each other
over a loss-free L2 Ethernet without any FCF or Fibre-channel fabric
services.  This is point-to-multipoint.  There is also a variant
of this called point-to-point which provides for making sure there
is just one pair of ports operating over the Ethernet fabric.

We add these new states:  VNMP_START, _PROBE1, _PROBE2, _CLAIM, and _UP.
These usually go quickly in that sequence.  After waiting a random
amount of time up to 100 ms in START, we select a pseudo-random
proposed locally-unique port ID and send out probes in states PROBE1
and PROBE2, 100 ms apart.  If no probe responses are heard, we
proceed to CLAIM state 400 ms later and send a claim notification.
We wait another 400 ms to receive claim responses, which give us
a list of the other nodes on the network, including their FC-4
capabilities.  After another 400 ms we go to VNMP_UP state and
should start interoperating with any of the nodes for whic we
receivec claim responses.  More details are in the spec.j

Add the new mode as FIP_MODE_VN2VN.  The driver must specify
explicitly that it wants to operate in this mode.  There is
no automatic detection between point-to-multipoint and fabric
mode, and the local port initialization is affected, so it isn't
anticipated that there will ever be any such automatic switchover.

It may eventually be possible to have both fabric and VN2VN
modes on the same L2 network, which may be done by two separate
local VN_ports (lports).

When in VN2VN mode, FIP replaces libfc's fabric-oriented discovery
module with its own simple code that adds remote ports as they
are discovered from incoming claim notifications and responses.
These hooks are placed by fcoe_disc_init().

A linear list of discovered vn_ports is maintained under the
fcoe_ctlr struct.  It is expected to be short for now, and
accessed infrequently.  It is kept under RCU for lock-ordering
reasons.  The lport and/or rport mutexes may be held when we
need to lookup a fcoe_vnport during an ELS send.

Change fcoe_ctlr_encaps() to lookup the destination vn_port in
the list of peers for the destination MAC address of the
FIP-encapsulated frame.

Add a new function fcoe_disc_init() to initialize just the
discovery portion of libfcoe for VN2VN mode.

Signed-off-by: Joe Eykholt <jeykholt@cisco.com>
Signed-off-by: Robert Love <robert.w.love@intel.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 drivers/scsi/fcoe/fcoe.c      |   16 +-
 drivers/scsi/fcoe/libfcoe.c   | 1075 ++++++++++++++++++++++++++++++++++++++---
 drivers/scsi/fnic/fnic_main.c |    7 +-
 include/scsi/libfcoe.h        |   42 +-
 4 files changed, 1071 insertions(+), 69 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c
index 9d64e08305c..216aba375fe 100644
--- a/drivers/scsi/fcoe/fcoe.c
+++ b/drivers/scsi/fcoe/fcoe.c
@@ -315,7 +315,11 @@ static int fcoe_interface_setup(struct fcoe_interface *fcoe,
 	dev_uc_add(netdev, flogi_maddr);
 	if (fip->spma)
 		dev_uc_add(netdev, fip->ctl_src_addr);
-	dev_mc_add(netdev, FIP_ALL_ENODE_MACS);
+	if (fip->mode == FIP_MODE_VN2VN) {
+		dev_mc_add(netdev, FIP_ALL_VN2VN_MACS);
+		dev_mc_add(netdev, FIP_ALL_P2P_MACS);
+	} else
+		dev_mc_add(netdev, FIP_ALL_ENODE_MACS);
 
 	/*
 	 * setup the receive function from ethernet driver
@@ -401,7 +405,11 @@ void fcoe_interface_cleanup(struct fcoe_interface *fcoe)
 	dev_uc_del(netdev, flogi_maddr);
 	if (fip->spma)
 		dev_uc_del(netdev, fip->ctl_src_addr);
-	dev_mc_del(netdev, FIP_ALL_ENODE_MACS);
+	if (fip->mode == FIP_MODE_VN2VN) {
+		dev_mc_del(netdev, FIP_ALL_VN2VN_MACS);
+		dev_mc_del(netdev, FIP_ALL_P2P_MACS);
+	} else
+		dev_mc_del(netdev, FIP_ALL_ENODE_MACS);
 
 	/* Tell the LLD we are done w/ FCoE */
 	ops = netdev->netdev_ops;
@@ -967,7 +975,7 @@ static struct fc_lport *fcoe_if_create(struct fcoe_interface *fcoe,
 	}
 
 	/* Initialize the library */
-	rc = fcoe_libfc_config(lport, &fcoe_libfc_fcn_templ);
+	rc = fcoe_libfc_config(lport, &fcoe->ctlr, &fcoe_libfc_fcn_templ, 1);
 	if (rc) {
 		FCOE_NETDEV_DBG(netdev, "Could not configure libfc for the "
 				"interface\n");
@@ -2533,6 +2541,8 @@ static struct fc_seq *fcoe_elsct_send(struct fc_lport *lport, u32 did,
 	switch (op) {
 	case ELS_FLOGI:
 	case ELS_FDISC:
+		if (lport->point_to_multipoint)
+			break;
 		return fc_elsct_send(lport, did, fp, op, fcoe_flogi_resp,
 				     fip, timeout);
 	case ELS_LOGO:
diff --git a/drivers/scsi/fcoe/libfcoe.c b/drivers/scsi/fcoe/libfcoe.c
index 11f3db5e506..79df78f2b08 100644
--- a/drivers/scsi/fcoe/libfcoe.c
+++ b/drivers/scsi/fcoe/libfcoe.c
@@ -39,6 +39,7 @@
 #include <scsi/fc/fc_fip.h>
 #include <scsi/fc/fc_encaps.h>
 #include <scsi/fc/fc_fcoe.h>
+#include <scsi/fc/fc_fcp.h>
 
 #include <scsi/libfc.h>
 #include <scsi/libfcoe.h>
@@ -54,7 +55,15 @@ static void fcoe_ctlr_timeout(unsigned long);
 static void fcoe_ctlr_timer_work(struct work_struct *);
 static void fcoe_ctlr_recv_work(struct work_struct *);
 
+static void fcoe_ctlr_vn_start(struct fcoe_ctlr *);
+static int fcoe_ctlr_vn_recv(struct fcoe_ctlr *, struct sk_buff *);
+static void fcoe_ctlr_vn_timeout(struct fcoe_ctlr *);
+static int fcoe_ctlr_vn_lookup(struct fcoe_ctlr *, u32, u8 *);
+
 static u8 fcoe_all_fcfs[ETH_ALEN] = FIP_ALL_FCF_MACS;
+static u8 fcoe_all_enode[ETH_ALEN] = FIP_ALL_ENODE_MACS;
+static u8 fcoe_all_vn2vn[ETH_ALEN] = FIP_ALL_VN2VN_MACS;
+static u8 fcoe_all_p2p[ETH_ALEN] = FIP_ALL_P2P_MACS;
 
 unsigned int libfcoe_debug_logging;
 module_param_named(debug_logging, libfcoe_debug_logging, int, S_IRUGO|S_IWUSR);
@@ -86,6 +95,11 @@ static const char *fcoe_ctlr_states[] = {
 	[FIP_ST_AUTO] =		"AUTO",
 	[FIP_ST_NON_FIP] =	"NON_FIP",
 	[FIP_ST_ENABLED] =	"ENABLED",
+	[FIP_ST_VNMP_START] =	"VNMP_START",
+	[FIP_ST_VNMP_PROBE1] =	"VNMP_PROBE1",
+	[FIP_ST_VNMP_PROBE2] =	"VNMP_PROBE2",
+	[FIP_ST_VNMP_CLAIM] =	"VNMP_CLAIM",
+	[FIP_ST_VNMP_UP] =	"VNMP_UP",
 };
 
 static const char *fcoe_ctlr_state(enum fip_state state)
@@ -295,11 +309,25 @@ void fcoe_ctlr_link_up(struct fcoe_ctlr *fip)
 		fc_linkup(fip->lp);
 	} else if (fip->state == FIP_ST_LINK_WAIT) {
 		fcoe_ctlr_set_state(fip, fip->mode);
-		mutex_unlock(&fip->ctlr_mutex);
-		if (fip->state == FIP_ST_AUTO)
+		switch (fip->mode) {
+		default:
+			LIBFCOE_FIP_DBG(fip, "invalid mode %d\n", fip->mode);
+			/* fall-through */
+		case FIP_MODE_AUTO:
 			LIBFCOE_FIP_DBG(fip, "%s", "setting AUTO mode.\n");
-		fc_linkup(fip->lp);
-		fcoe_ctlr_solicit(fip, NULL);
+			/* fall-through */
+		case FIP_MODE_FABRIC:
+		case FIP_MODE_NON_FIP:
+			mutex_unlock(&fip->ctlr_mutex);
+			fc_linkup(fip->lp);
+			fcoe_ctlr_solicit(fip, NULL);
+			break;
+		case FIP_MODE_VN2VN:
+			fcoe_ctlr_vn_start(fip);
+			mutex_unlock(&fip->ctlr_mutex);
+			fc_linkup(fip->lp);
+			break;
+		}
 	} else
 		mutex_unlock(&fip->ctlr_mutex);
 }
@@ -423,6 +451,7 @@ static void fcoe_ctlr_send_keep_alive(struct fcoe_ctlr *fip,
  * @fip:   The FCoE controller for the ELS frame
  * @dtype: The FIP descriptor type for the frame
  * @skb:   The FCoE ELS frame including FC header but no FCoE headers
+ * @d_id:  The destination port ID.
  *
  * Returns non-zero error code on failure.
  *
@@ -433,7 +462,7 @@ static void fcoe_ctlr_send_keep_alive(struct fcoe_ctlr *fip,
  * Ethernet header.  The tailroom is for the FIP MAC descriptor.
  */
 static int fcoe_ctlr_encaps(struct fcoe_ctlr *fip, struct fc_lport *lport,
-			    u8 dtype, struct sk_buff *skb)
+			    u8 dtype, struct sk_buff *skb, u32 d_id)
 {
 	struct fip_encaps_head {
 		struct ethhdr eth;
@@ -445,21 +474,24 @@ static int fcoe_ctlr_encaps(struct fcoe_ctlr *fip, struct fc_lport *lport,
 	size_t dlen;
 	u16 fip_flags;
 
-	fcf = fip->sel_fcf;
-	if (!fcf)
-		return -ENODEV;
-
-	/* set flags according to both FCF and lport's capability on SPMA */
-	fip_flags = fcf->flags;
-	fip_flags &= fip->spma ? FIP_FL_SPMA | FIP_FL_FPMA : FIP_FL_FPMA;
-	if (!fip_flags)
-		return -ENODEV;
-
 	dlen = sizeof(struct fip_encaps) + skb->len;	/* len before push */
 	cap = (struct fip_encaps_head *)skb_push(skb, sizeof(*cap));
-
 	memset(cap, 0, sizeof(*cap));
-	memcpy(cap->eth.h_dest, fcf->fcf_mac, ETH_ALEN);
+
+	if (lport->point_to_multipoint) {
+		if (fcoe_ctlr_vn_lookup(fip, d_id, cap->eth.h_dest))
+			return -ENODEV;
+	} else {
+		fcf = fip->sel_fcf;
+		if (!fcf)
+			return -ENODEV;
+		fip_flags = fcf->flags;
+		fip_flags &= fip->spma ? FIP_FL_SPMA | FIP_FL_FPMA :
+					 FIP_FL_FPMA;
+		if (!fip_flags)
+			return -ENODEV;
+		memcpy(cap->eth.h_dest, fcf->fcf_mac, ETH_ALEN);
+	}
 	memcpy(cap->eth.h_source, fip->ctl_src_addr, ETH_ALEN);
 	cap->eth.h_proto = htons(ETH_P_FIP);
 
@@ -503,19 +535,22 @@ static int fcoe_ctlr_encaps(struct fcoe_ctlr *fip, struct fc_lport *lport,
  *
  * The caller must check that the length is a multiple of 4.
  * The SKB must have enough headroom (28 bytes) and tailroom (8 bytes).
+ * The the skb must also be an fc_frame.
  */
 int fcoe_ctlr_els_send(struct fcoe_ctlr *fip, struct fc_lport *lport,
 		       struct sk_buff *skb)
 {
+	struct fc_frame *fp;
 	struct fc_frame_header *fh;
 	u16 old_xid;
 	u8 op;
 	u8 mac[ETH_ALEN];
 
+	fp = container_of(skb, struct fc_frame, skb);
 	fh = (struct fc_frame_header *)skb->data;
 	op = *(u8 *)(fh + 1);
 
-	if (op == ELS_FLOGI) {
+	if (op == ELS_FLOGI && fip->mode != FIP_MODE_VN2VN) {
 		old_xid = fip->flogi_oxid;
 		fip->flogi_oxid = ntohs(fh->fh_ox_id);
 		if (fip->state == FIP_ST_AUTO) {
@@ -533,9 +568,8 @@ int fcoe_ctlr_els_send(struct fcoe_ctlr *fip, struct fc_lport *lport,
 
 	if (fip->state == FIP_ST_NON_FIP)
 		return 0;
-	if (!fip->sel_fcf)
+	if (!fip->sel_fcf && fip->mode != FIP_MODE_VN2VN)
 		goto drop;
-
 	switch (op) {
 	case ELS_FLOGI:
 		op = FIP_DT_FLOGI;
@@ -546,36 +580,49 @@ int fcoe_ctlr_els_send(struct fcoe_ctlr *fip, struct fc_lport *lport,
 		op = FIP_DT_FDISC;
 		break;
 	case ELS_LOGO:
-		if (fip->state != FIP_ST_ENABLED)
-			return 0;
-		if (ntoh24(fh->fh_d_id) != FC_FID_FLOGI)
-			return 0;
+		if (fip->mode == FIP_MODE_VN2VN) {
+			if (fip->state != FIP_ST_VNMP_UP)
+				return -EINVAL;
+			if (ntoh24(fh->fh_d_id) == FC_FID_FLOGI)
+				return -EINVAL;
+		} else {
+			if (fip->state != FIP_ST_ENABLED)
+				return 0;
+			if (ntoh24(fh->fh_d_id) != FC_FID_FLOGI)
+				return 0;
+		}
 		op = FIP_DT_LOGO;
 		break;
 	case ELS_LS_ACC:
-		if (fip->flogi_oxid == FC_XID_UNKNOWN)
-			return 0;
-		if (!ntoh24(fh->fh_s_id))
-			return 0;
-		if (fip->state == FIP_ST_AUTO)
-			return 0;
 		/*
-		 * Here we must've gotten an SID by accepting an FLOGI
+		 * If non-FIP, we may have gotten an SID by accepting an FLOGI
 		 * from a point-to-point connection.  Switch to using
 		 * the source mac based on the SID.  The destination
 		 * MAC in this case would have been set by receving the
 		 * FLOGI.
 		 */
-		fip->flogi_oxid = FC_XID_UNKNOWN;
-		fc_fcoe_set_mac(mac, fh->fh_d_id);
-		fip->update_mac(lport, mac);
+		if (fip->state == FIP_ST_NON_FIP) {
+			if (fip->flogi_oxid == FC_XID_UNKNOWN)
+				return 0;
+			fip->flogi_oxid = FC_XID_UNKNOWN;
+			fc_fcoe_set_mac(mac, fh->fh_d_id);
+			fip->update_mac(lport, mac);
+		}
+		/* fall through */
+	case ELS_LS_RJT:
+		op = fr_encaps(fp);
+		if (op)
+			break;
 		return 0;
 	default:
-		if (fip->state != FIP_ST_ENABLED)
+		if (fip->state != FIP_ST_ENABLED &&
+		    fip->state != FIP_ST_VNMP_UP)
 			goto drop;
 		return 0;
 	}
-	if (fcoe_ctlr_encaps(fip, lport, op, skb))
+	LIBFCOE_FIP_DBG(fip, "els_send op %u d_id %x\n",
+			op, ntoh24(fh->fh_d_id));
+	if (fcoe_ctlr_encaps(fip, lport, op, skb, ntoh24(fh->fh_d_id)))
 		goto drop;
 	fip->send(fip, skb);
 	return -EINPROGRESS;
@@ -717,8 +764,9 @@ static int fcoe_ctlr_parse_adv(struct fcoe_ctlr *fip,
 			       ((struct fip_mac_desc *)desc)->fd_mac,
 			       ETH_ALEN);
 			if (!is_valid_ether_addr(fcf->fcf_mac)) {
-				LIBFCOE_FIP_DBG(fip, "Invalid MAC address "
-						"in FIP adv\n");
+				LIBFCOE_FIP_DBG(fip,
+					"Invalid MAC addr %pM in FIP adv\n",
+					fcf->fcf_mac);
 				return -EINVAL;
 			}
 			desc_mask &= ~BIT(FIP_DT_MAC);
@@ -944,12 +992,6 @@ static void fcoe_ctlr_recv_els(struct fcoe_ctlr *fip, struct sk_buff *skb)
 			memcpy(granted_mac,
 			       ((struct fip_mac_desc *)desc)->fd_mac,
 			       ETH_ALEN);
-			if (!is_valid_ether_addr(granted_mac)) {
-				LIBFCOE_FIP_DBG(fip, "Invalid MAC address "
-						"in FIP ELS\n");
-				goto drop;
-			}
-			memcpy(fr_cb(fp)->granted_mac, granted_mac, ETH_ALEN);
 			break;
 		case FIP_DT_FLOGI:
 		case FIP_DT_FDISC:
@@ -990,10 +1032,20 @@ static void fcoe_ctlr_recv_els(struct fcoe_ctlr *fip, struct sk_buff *skb)
 		goto drop;
 	els_op = *(u8 *)(fh + 1);
 
-	if (els_dtype == FIP_DT_FLOGI && sub == FIP_SC_REP &&
-	    fip->flogi_oxid == ntohs(fh->fh_ox_id) &&
-	    els_op == ELS_LS_ACC && is_valid_ether_addr(granted_mac))
-		fip->flogi_oxid = FC_XID_UNKNOWN;
+	if ((els_dtype == FIP_DT_FLOGI || els_dtype == FIP_DT_FDISC) &&
+	    sub == FIP_SC_REP && els_op == ELS_LS_ACC &&
+	    fip->mode != FIP_MODE_VN2VN) {
+		if (!is_valid_ether_addr(granted_mac)) {
+			LIBFCOE_FIP_DBG(fip,
+				"Invalid MAC address %pM in FIP ELS\n",
+				granted_mac);
+			goto drop;
+		}
+		memcpy(fr_cb(fp)->granted_mac, granted_mac, ETH_ALEN);
+
+		if (fip->flogi_oxid == ntohs(fh->fh_ox_id))
+			fip->flogi_oxid = FC_XID_UNKNOWN;
+	}
 
 	if ((desc_cnt == 0) || ((els_op != ELS_LS_RJT) &&
 	    (!(1U << FIP_DT_MAC & desc_mask)))) {
@@ -1012,6 +1064,7 @@ static void fcoe_ctlr_recv_els(struct fcoe_ctlr *fip, struct sk_buff *skb)
 	fr_sof(fp) = FC_SOF_I3;
 	fr_eof(fp) = FC_EOF_T;
 	fr_dev(fp) = lport;
+	fr_encaps(fp) = els_dtype;
 
 	stats = per_cpu_ptr(lport->dev_stats, get_cpu());
 	stats->RxFrames++;
@@ -1188,8 +1241,13 @@ static int fcoe_ctlr_recv_handler(struct fcoe_ctlr *fip, struct sk_buff *skb)
 	if (skb->len < sizeof(*fiph))
 		goto drop;
 	eh = eth_hdr(skb);
-	if (compare_ether_addr(eh->h_dest, fip->ctl_src_addr) &&
-	    compare_ether_addr(eh->h_dest, FIP_ALL_ENODE_MACS))
+	if (fip->mode == FIP_MODE_VN2VN) {
+		if (compare_ether_addr(eh->h_dest, fip->ctl_src_addr) &&
+		    compare_ether_addr(eh->h_dest, fcoe_all_vn2vn) &&
+		    compare_ether_addr(eh->h_dest, fcoe_all_p2p))
+			goto drop;
+	} else if (compare_ether_addr(eh->h_dest, fip->ctl_src_addr) &&
+		   compare_ether_addr(eh->h_dest, fcoe_all_enode))
 		goto drop;
 	fiph = (struct fip_header *)skb->data;
 	op = ntohs(fiph->fip_op);
@@ -1209,13 +1267,22 @@ static int fcoe_ctlr_recv_handler(struct fcoe_ctlr *fip, struct sk_buff *skb)
 		LIBFCOE_FIP_DBG(fip, "Using FIP mode\n");
 	}
 	mutex_unlock(&fip->ctlr_mutex);
-	if (state != FIP_ST_ENABLED)
+
+	if (fip->mode == FIP_MODE_VN2VN && op == FIP_OP_VN2VN)
+		return fcoe_ctlr_vn_recv(fip, skb);
+
+	if (state != FIP_ST_ENABLED && state != FIP_ST_VNMP_UP &&
+	    state != FIP_ST_VNMP_CLAIM)
 		goto drop;
 
 	if (op == FIP_OP_LS) {
 		fcoe_ctlr_recv_els(fip, skb);	/* consumes skb */
 		return 0;
 	}
+
+	if (state != FIP_ST_ENABLED)
+		goto drop;
+
 	if (op == FIP_OP_DISC && sub == FIP_SC_ADV)
 		fcoe_ctlr_recv_adv(fip, skb);
 	else if (op == FIP_OP_CTRL && sub == FIP_SC_CLR_VLINK)
@@ -1302,7 +1369,8 @@ static void fcoe_ctlr_timer_work(struct work_struct *work)
 	unsigned long next_timer;
 
 	fip = container_of(work, struct fcoe_ctlr, timer_work);
-
+	if (fip->mode == FIP_MODE_VN2VN)
+		return fcoe_ctlr_vn_timeout(fip);
 	mutex_lock(&fip->ctlr_mutex);
 	if (fip->state == FIP_ST_DISABLED) {
 		mutex_unlock(&fip->ctlr_mutex);
@@ -1340,7 +1408,6 @@ static void fcoe_ctlr_timer_work(struct work_struct *work)
 			       "Starting FCF discovery.\n",
 			       fip->lp->host->host_no);
 			reset = 1;
-			schedule_work(&fip->timer_work);
 		}
 	}
 
@@ -1514,27 +1581,917 @@ u64 fcoe_wwn_from_mac(unsigned char mac[MAX_ADDR_LEN],
 }
 EXPORT_SYMBOL_GPL(fcoe_wwn_from_mac);
 
+/**
+ * fcoe_ctlr_rport() - return the fcoe_rport for a given fc_rport_priv
+ * @rdata: libfc remote port
+ */
+static inline struct fcoe_rport *fcoe_ctlr_rport(struct fc_rport_priv *rdata)
+{
+	return (struct fcoe_rport *)(rdata + 1);
+}
+
+/**
+ * fcoe_ctlr_vn_send() - Send a FIP VN2VN Probe Request or Reply.
+ * @fip: The FCoE controller
+ * @sub: sub-opcode for probe request, reply, or advertisement.
+ * @dest: The destination Ethernet MAC address
+ * @min_len: minimum size of the Ethernet payload to be sent
+ */
+static void fcoe_ctlr_vn_send(struct fcoe_ctlr *fip,
+			      enum fip_vn2vn_subcode sub,
+			      const u8 *dest, size_t min_len)
+{
+	struct sk_buff *skb;
+	struct fip_frame {
+		struct ethhdr eth;
+		struct fip_header fip;
+		struct fip_mac_desc mac;
+		struct fip_wwn_desc wwnn;
+		struct fip_vn_desc vn;
+	} __attribute__((packed)) *frame;
+	struct fip_fc4_feat *ff;
+	struct fip_size_desc *size;
+	u32 fcp_feat;
+	size_t len;
+	size_t dlen;
+
+	len = sizeof(*frame);
+	dlen = 0;
+	if (sub == FIP_SC_VN_CLAIM_NOTIFY || sub == FIP_SC_VN_CLAIM_REP) {
+		dlen = sizeof(struct fip_fc4_feat) +
+		       sizeof(struct fip_size_desc);
+		len += dlen;
+	}
+	dlen += sizeof(frame->mac) + sizeof(frame->wwnn) + sizeof(frame->vn);
+	len = max(len, min_len + sizeof(struct ethhdr));
+
+	skb = dev_alloc_skb(len);
+	if (!skb)
+		return;
+
+	frame = (struct fip_frame *)skb->data;
+	memset(frame, 0, len);
+	memcpy(frame->eth.h_dest, dest, ETH_ALEN);
+	memcpy(frame->eth.h_source, fip->ctl_src_addr, ETH_ALEN);
+	frame->eth.h_proto = htons(ETH_P_FIP);
+
+	frame->fip.fip_ver = FIP_VER_ENCAPS(FIP_VER);
+	frame->fip.fip_op = htons(FIP_OP_VN2VN);
+	frame->fip.fip_subcode = sub;
+	frame->fip.fip_dl_len = htons(dlen / FIP_BPW);
+
+	frame->mac.fd_desc.fip_dtype = FIP_DT_MAC;
+	frame->mac.fd_desc.fip_dlen = sizeof(frame->mac) / FIP_BPW;
+	memcpy(frame->mac.fd_mac, fip->ctl_src_addr, ETH_ALEN);
+
+	frame->wwnn.fd_desc.fip_dtype = FIP_DT_NAME;
+	frame->wwnn.fd_desc.fip_dlen = sizeof(frame->wwnn) / FIP_BPW;
+	put_unaligned_be64(fip->lp->wwnn, &frame->wwnn.fd_wwn);
+
+	frame->vn.fd_desc.fip_dtype = FIP_DT_VN_ID;
+	frame->vn.fd_desc.fip_dlen = sizeof(frame->vn) / FIP_BPW;
+	hton24(frame->vn.fd_mac, FIP_VN_FC_MAP);
+	hton24(frame->vn.fd_mac + 3, fip->port_id);
+	hton24(frame->vn.fd_fc_id, fip->port_id);
+	put_unaligned_be64(fip->lp->wwpn, &frame->vn.fd_wwpn);
+
+	/*
+	 * For claims, add FC-4 features.
+	 * TBD: Add interface to get fc-4 types and features from libfc.
+	 */
+	if (sub == FIP_SC_VN_CLAIM_NOTIFY || sub == FIP_SC_VN_CLAIM_REP) {
+		ff = (struct fip_fc4_feat *)(frame + 1);
+		ff->fd_desc.fip_dtype = FIP_DT_FC4F;
+		ff->fd_desc.fip_dlen = sizeof(*ff) / FIP_BPW;
+		ff->fd_fts = fip->lp->fcts;
+
+		fcp_feat = 0;
+		if (fip->lp->service_params & FCP_SPPF_INIT_FCN)
+			fcp_feat |= FCP_FEAT_INIT;
+		if (fip->lp->service_params & FCP_SPPF_TARG_FCN)
+			fcp_feat |= FCP_FEAT_TARG;
+		fcp_feat <<= (FC_TYPE_FCP * 4) % 32;
+		ff->fd_ff.fd_feat[FC_TYPE_FCP * 4 / 32] = htonl(fcp_feat);
+
+		size = (struct fip_size_desc *)(ff + 1);
+		size->fd_desc.fip_dtype = FIP_DT_FCOE_SIZE;
+		size->fd_desc.fip_dlen = sizeof(*size) / FIP_BPW;
+		size->fd_size = htons(fcoe_ctlr_fcoe_size(fip));
+	}
+
+	skb_put(skb, len);
+	skb->protocol = htons(ETH_P_FIP);
+	skb_reset_mac_header(skb);
+	skb_reset_network_header(skb);
+
+	fip->send(fip, skb);
+}
+
+/**
+ * fcoe_ctlr_vn_rport_callback - Event handler for rport events.
+ * @lport: The lport which is receiving the event
+ * @rdata: remote port private data
+ * @event: The event that occured
+ *
+ * Locking Note:  The rport lock must not be held when calling this function.
+ */
+static void fcoe_ctlr_vn_rport_callback(struct fc_lport *lport,
+					struct fc_rport_priv *rdata,
+					enum fc_rport_event event)
+{
+	struct fcoe_ctlr *fip = lport->disc.priv;
+	struct fcoe_rport *frport = fcoe_ctlr_rport(rdata);
+
+	LIBFCOE_FIP_DBG(fip, "vn_rport_callback %x event %d\n",
+			rdata->ids.port_id, event);
+
+	mutex_lock(&fip->ctlr_mutex);
+	switch (event) {
+	case RPORT_EV_READY:
+		frport->login_count = 0;
+		break;
+	case RPORT_EV_LOGO:
+	case RPORT_EV_FAILED:
+	case RPORT_EV_STOP:
+		frport->login_count++;
+		if (frport->login_count > FCOE_CTLR_VN2VN_LOGIN_LIMIT) {
+			LIBFCOE_FIP_DBG(fip,
+					"rport FLOGI limited port_id %6.6x\n",
+					rdata->ids.port_id);
+			lport->tt.rport_logoff(rdata);
+		}
+		break;
+	default:
+		break;
+	}
+	mutex_unlock(&fip->ctlr_mutex);
+}
+
+static struct fc_rport_operations fcoe_ctlr_vn_rport_ops = {
+	.event_callback = fcoe_ctlr_vn_rport_callback,
+};
+
+/**
+ * fcoe_ctlr_disc_stop_locked() - stop discovery in VN2VN mode
+ * @fip: The FCoE controller
+ *
+ * Called with ctlr_mutex held.
+ */
+static void fcoe_ctlr_disc_stop_locked(struct fc_lport *lport)
+{
+	mutex_lock(&lport->disc.disc_mutex);
+	lport->disc.disc_callback = NULL;
+	mutex_unlock(&lport->disc.disc_mutex);
+}
+
+/**
+ * fcoe_ctlr_disc_stop() - stop discovery in VN2VN mode
+ * @fip: The FCoE controller
+ *
+ * Called through the local port template for discovery.
+ * Called without the ctlr_mutex held.
+ */
+static void fcoe_ctlr_disc_stop(struct fc_lport *lport)
+{
+	struct fcoe_ctlr *fip = lport->disc.priv;
+
+	mutex_lock(&fip->ctlr_mutex);
+	fcoe_ctlr_disc_stop_locked(lport);
+	mutex_unlock(&fip->ctlr_mutex);
+}
+
+/**
+ * fcoe_ctlr_disc_stop_final() - stop discovery for shutdown in VN2VN mode
+ * @fip: The FCoE controller
+ *
+ * Called through the local port template for discovery.
+ * Called without the ctlr_mutex held.
+ */
+static void fcoe_ctlr_disc_stop_final(struct fc_lport *lport)
+{
+	fcoe_ctlr_disc_stop(lport);
+	lport->tt.rport_flush_queue();
+	synchronize_rcu();
+}
+
+/**
+ * fcoe_ctlr_vn_restart() - VN2VN probe restart with new port_id
+ * @fip: The FCoE controller
+ *
+ * Called with fcoe_ctlr lock held.
+ */
+static void fcoe_ctlr_vn_restart(struct fcoe_ctlr *fip)
+{
+	unsigned long wait;
+	u32 port_id;
+
+	fcoe_ctlr_disc_stop_locked(fip->lp);
+
+	/*
+	 * Get proposed port ID.
+	 * If this is the first try after link up, use any previous port_id.
+	 * If there was none, use the low bits of the port_name.
+	 * On subsequent tries, get the next random one.
+	 * Don't use reserved IDs, use another non-zero value, just as random.
+	 */
+	port_id = fip->port_id;
+	if (fip->probe_tries)
+		port_id = prandom32(&fip->rnd_state) & 0xffff;
+	else if (!port_id)
+		port_id = fip->lp->wwpn & 0xffff;
+	if (!port_id || port_id == 0xffff)
+		port_id = 1;
+	fip->port_id = port_id;
+
+	if (fip->probe_tries < FIP_VN_RLIM_COUNT) {
+		fip->probe_tries++;
+		wait = random32() % FIP_VN_PROBE_WAIT;
+	} else
+		wait = FIP_VN_RLIM_INT;
+	mod_timer(&fip->timer, jiffies + msecs_to_jiffies(wait));
+	fcoe_ctlr_set_state(fip, FIP_ST_VNMP_START);
+}
+
+/**
+ * fcoe_ctlr_vn_start() - Start in VN2VN mode
+ * @fip: The FCoE controller
+ *
+ * Called with fcoe_ctlr lock held.
+ */
+static void fcoe_ctlr_vn_start(struct fcoe_ctlr *fip)
+{
+	fip->probe_tries = 0;
+	prandom32_seed(&fip->rnd_state, fip->lp->wwpn);
+	fcoe_ctlr_vn_restart(fip);
+}
+
+/**
+ * fcoe_ctlr_vn_parse - parse probe request or response
+ * @fip: The FCoE controller
+ * @skb: incoming packet
+ * @rdata: buffer for resulting parsed VN entry plus fcoe_rport
+ *
+ * Returns non-zero error number on error.
+ * Does not consume the packet.
+ */
+static int fcoe_ctlr_vn_parse(struct fcoe_ctlr *fip,
+			      struct sk_buff *skb,
+			      struct fc_rport_priv *rdata)
+{
+	struct fip_header *fiph;
+	struct fip_desc *desc = NULL;
+	struct fip_mac_desc *macd = NULL;
+	struct fip_wwn_desc *wwn = NULL;
+	struct fip_vn_desc *vn = NULL;
+	struct fip_size_desc *size = NULL;
+	struct fcoe_rport *frport;
+	size_t rlen;
+	size_t dlen;
+	u32 desc_mask = 0;
+	u32 dtype;
+	u8 sub;
+
+	memset(rdata, 0, sizeof(*rdata) + sizeof(*frport));
+	frport = fcoe_ctlr_rport(rdata);
+
+	fiph = (struct fip_header *)skb->data;
+	frport->flags = ntohs(fiph->fip_flags);
+
+	sub = fiph->fip_subcode;
+	switch (sub) {
+	case FIP_SC_VN_PROBE_REQ:
+	case FIP_SC_VN_PROBE_REP:
+	case FIP_SC_VN_BEACON:
+		desc_mask = BIT(FIP_DT_MAC) | BIT(FIP_DT_NAME) |
+			    BIT(FIP_DT_VN_ID);
+		break;
+	case FIP_SC_VN_CLAIM_NOTIFY:
+	case FIP_SC_VN_CLAIM_REP:
+		desc_mask = BIT(FIP_DT_MAC) | BIT(FIP_DT_NAME) |
+			    BIT(FIP_DT_VN_ID) | BIT(FIP_DT_FC4F) |
+			    BIT(FIP_DT_FCOE_SIZE);
+		break;
+	default:
+		LIBFCOE_FIP_DBG(fip, "vn_parse unknown subcode %u\n", sub);
+		return -EINVAL;
+	}
+
+	rlen = ntohs(fiph->fip_dl_len) * 4;
+	if (rlen + sizeof(*fiph) > skb->len)
+		return -EINVAL;
+
+	desc = (struct fip_desc *)(fiph + 1);
+	while (rlen > 0) {
+		dlen = desc->fip_dlen * FIP_BPW;
+		if (dlen < sizeof(*desc) || dlen > rlen)
+			return -EINVAL;
+
+		dtype = desc->fip_dtype;
+		if (dtype < 32) {
+			if (!(desc_mask & BIT(dtype))) {
+				LIBFCOE_FIP_DBG(fip,
+						"unexpected or duplicated desc "
+						"desc type %u in "
+						"FIP VN2VN subtype %u\n",
+						dtype, sub);
+				return -EINVAL;
+			}
+			desc_mask &= ~BIT(dtype);
+		}
+
+		switch (dtype) {
+		case FIP_DT_MAC:
+			if (dlen != sizeof(struct fip_mac_desc))
+				goto len_err;
+			macd = (struct fip_mac_desc *)desc;
+			if (!is_valid_ether_addr(macd->fd_mac)) {
+				LIBFCOE_FIP_DBG(fip,
+					"Invalid MAC addr %pM in FIP VN2VN\n",
+					 macd->fd_mac);
+				return -EINVAL;
+			}
+			memcpy(frport->enode_mac, macd->fd_mac, ETH_ALEN);
+			break;
+		case FIP_DT_NAME:
+			if (dlen != sizeof(struct fip_wwn_desc))
+				goto len_err;
+			wwn = (struct fip_wwn_desc *)desc;
+			rdata->ids.node_name = get_unaligned_be64(&wwn->fd_wwn);
+			break;
+		case FIP_DT_VN_ID:
+			if (dlen != sizeof(struct fip_vn_desc))
+				goto len_err;
+			vn = (struct fip_vn_desc *)desc;
+			memcpy(frport->vn_mac, vn->fd_mac, ETH_ALEN);
+			rdata->ids.port_id = ntoh24(vn->fd_fc_id);
+			rdata->ids.port_name = get_unaligned_be64(&vn->fd_wwpn);
+			break;
+		case FIP_DT_FC4F:
+			if (dlen != sizeof(struct fip_fc4_feat))
+				goto len_err;
+			break;
+		case FIP_DT_FCOE_SIZE:
+			if (dlen != sizeof(struct fip_size_desc))
+				goto len_err;
+			size = (struct fip_size_desc *)desc;
+			frport->fcoe_len = ntohs(size->fd_size);
+			break;
+		default:
+			LIBFCOE_FIP_DBG(fip, "unexpected descriptor type %x "
+					"in FIP probe\n", dtype);
+			/* standard says ignore unknown descriptors >= 128 */
+			if (dtype < FIP_DT_VENDOR_BASE)
+				return -EINVAL;
+			break;
+		}
+		desc = (struct fip_desc *)((char *)desc + dlen);
+		rlen -= dlen;
+	}
+	return 0;
+
+len_err:
+	LIBFCOE_FIP_DBG(fip, "FIP length error in descriptor type %x len %zu\n",
+			dtype, dlen);
+	return -EINVAL;
+}
+
+/**
+ * fcoe_ctlr_vn_send_claim() - send multicast FIP VN2VN Claim Notification.
+ * @fip: The FCoE controller
+ *
+ * Called with ctlr_mutex held.
+ */
+static void fcoe_ctlr_vn_send_claim(struct fcoe_ctlr *fip)
+{
+	fcoe_ctlr_vn_send(fip, FIP_SC_VN_CLAIM_NOTIFY, fcoe_all_vn2vn, 0);
+	fip->sol_time = jiffies;
+}
+
+/**
+ * fcoe_ctlr_vn_probe_req() - handle incoming VN2VN probe request.
+ * @fip: The FCoE controller
+ * @rdata: parsed remote port with frport from the probe request
+ *
+ * Called with ctlr_mutex held.
+ */
+static void fcoe_ctlr_vn_probe_req(struct fcoe_ctlr *fip,
+				   struct fc_rport_priv *rdata)
+{
+	struct fcoe_rport *frport = fcoe_ctlr_rport(rdata);
+
+	if (rdata->ids.port_id != fip->port_id)
+		return;
+
+	switch (fip->state) {
+	case FIP_ST_VNMP_CLAIM:
+	case FIP_ST_VNMP_UP:
+		fcoe_ctlr_vn_send(fip, FIP_SC_VN_PROBE_REP,
+				  frport->enode_mac, 0);
+		break;
+	case FIP_ST_VNMP_PROBE1:
+	case FIP_ST_VNMP_PROBE2:
+		/*
+		 * Decide whether to reply to the Probe.
+		 * Our selected address is never a "recorded" one, so
+		 * only reply if our WWPN is greater and the
+		 * Probe's REC bit is not set.
+		 * If we don't reply, we will change our address.
+		 */
+		if (fip->lp->wwpn > rdata->ids.port_name &&
+		    !(frport->flags & FIP_FL_REC_OR_P2P)) {
+			fcoe_ctlr_vn_send(fip, FIP_SC_VN_PROBE_REP,
+					  frport->enode_mac, 0);
+			break;
+		}
+		/* fall through */
+	case FIP_ST_VNMP_START:
+		fcoe_ctlr_vn_restart(fip);
+		break;
+	default:
+		break;
+	}
+}
+
+/**
+ * fcoe_ctlr_vn_probe_reply() - handle incoming VN2VN probe reply.
+ * @fip: The FCoE controller
+ * @rdata: parsed remote port with frport from the probe request
+ *
+ * Called with ctlr_mutex held.
+ */
+static void fcoe_ctlr_vn_probe_reply(struct fcoe_ctlr *fip,
+				   struct fc_rport_priv *rdata)
+{
+	if (rdata->ids.port_id != fip->port_id)
+		return;
+	switch (fip->state) {
+	case FIP_ST_VNMP_START:
+	case FIP_ST_VNMP_PROBE1:
+	case FIP_ST_VNMP_PROBE2:
+	case FIP_ST_VNMP_CLAIM:
+		fcoe_ctlr_vn_restart(fip);
+		break;
+	case FIP_ST_VNMP_UP:
+		fcoe_ctlr_vn_send_claim(fip);
+		break;
+	default:
+		break;
+	}
+}
+
+/**
+ * fcoe_ctlr_vn_add() - Add a VN2VN entry to the list, based on a claim reply.
+ * @fip: The FCoE controller
+ * @new: newly-parsed remote port with frport as a template for new rdata
+ *
+ * Called with ctlr_mutex held.
+ */
+static void fcoe_ctlr_vn_add(struct fcoe_ctlr *fip, struct fc_rport_priv *new)
+{
+	struct fc_lport *lport = fip->lp;
+	struct fc_rport_priv *rdata;
+	struct fc_rport_identifiers *ids;
+	struct fcoe_rport *frport;
+	u32 port_id;
+
+	port_id = new->ids.port_id;
+	if (port_id == fip->port_id)
+		return;
+
+	mutex_lock(&lport->disc.disc_mutex);
+	rdata = lport->tt.rport_create(lport, port_id);
+	if (!rdata) {
+		mutex_unlock(&lport->disc.disc_mutex);
+		return;
+	}
+
+	rdata->ops = &fcoe_ctlr_vn_rport_ops;
+	rdata->disc_id = lport->disc.disc_id;
+
+	ids = &rdata->ids;
+	if ((ids->port_name != -1 && ids->port_name != new->ids.port_name) ||
+	    (ids->node_name != -1 && ids->node_name != new->ids.node_name))
+		lport->tt.rport_logoff(rdata);
+	ids->port_name = new->ids.port_name;
+	ids->node_name = new->ids.node_name;
+	mutex_unlock(&lport->disc.disc_mutex);
+
+	frport = fcoe_ctlr_rport(rdata);
+	LIBFCOE_FIP_DBG(fip, "vn_add rport %6.6x %s\n",
+			port_id, frport->fcoe_len ? "old" : "new");
+	*frport = *fcoe_ctlr_rport(new);
+	frport->time = 0;
+}
+
+/**
+ * fcoe_ctlr_vn_lookup() - Find VN remote port's MAC address
+ * @fip: The FCoE controller
+ * @port_id:  The port_id of the remote VN_node
+ * @mac: buffer which will hold the VN_NODE destination MAC address, if found.
+ *
+ * Returns non-zero error if no remote port found.
+ */
+static int fcoe_ctlr_vn_lookup(struct fcoe_ctlr *fip, u32 port_id, u8 *mac)
+{
+	struct fc_lport *lport = fip->lp;
+	struct fc_rport_priv *rdata;
+	struct fcoe_rport *frport;
+	int ret = -1;
+
+	rcu_read_lock();
+	rdata = lport->tt.rport_lookup(lport, port_id);
+	if (rdata) {
+		frport = fcoe_ctlr_rport(rdata);
+		memcpy(mac, frport->enode_mac, ETH_ALEN);
+		ret = 0;
+	}
+	rcu_read_unlock();
+	return ret;
+}
+
+/**
+ * fcoe_ctlr_vn_claim_notify() - handle received FIP VN2VN Claim Notification
+ * @fip: The FCoE controller
+ * @new: newly-parsed remote port with frport as a template for new rdata
+ *
+ * Called with ctlr_mutex held.
+ */
+static void fcoe_ctlr_vn_claim_notify(struct fcoe_ctlr *fip,
+				      struct fc_rport_priv *new)
+{
+	struct fcoe_rport *frport = fcoe_ctlr_rport(new);
+
+	if (frport->flags & FIP_FL_REC_OR_P2P) {
+		fcoe_ctlr_vn_send(fip, FIP_SC_VN_PROBE_REQ, fcoe_all_vn2vn, 0);
+		return;
+	}
+	switch (fip->state) {
+	case FIP_ST_VNMP_START:
+	case FIP_ST_VNMP_PROBE1:
+	case FIP_ST_VNMP_PROBE2:
+		if (new->ids.port_id == fip->port_id)
+			fcoe_ctlr_vn_restart(fip);
+		break;
+	case FIP_ST_VNMP_CLAIM:
+	case FIP_ST_VNMP_UP:
+		if (new->ids.port_id == fip->port_id) {
+			if (new->ids.port_name > fip->lp->wwpn) {
+				fcoe_ctlr_vn_restart(fip);
+				break;
+			}
+			fcoe_ctlr_vn_send_claim(fip);
+			break;
+		}
+		fcoe_ctlr_vn_send(fip, FIP_SC_VN_CLAIM_REP, frport->enode_mac,
+				  min((u32)frport->fcoe_len,
+				      fcoe_ctlr_fcoe_size(fip)));
+		fcoe_ctlr_vn_add(fip, new);
+		break;
+	default:
+		break;
+	}
+}
+
+/**
+ * fcoe_ctlr_vn_claim_resp() - handle received Claim Response
+ * @fip: The FCoE controller that received the frame
+ * @new: newly-parsed remote port with frport from the Claim Response
+ *
+ * Called with ctlr_mutex held.
+ */
+static void fcoe_ctlr_vn_claim_resp(struct fcoe_ctlr *fip,
+				    struct fc_rport_priv *new)
+{
+	LIBFCOE_FIP_DBG(fip, "claim resp from from rport %x - state %s\n",
+			new->ids.port_id, fcoe_ctlr_state(fip->state));
+	if (fip->state == FIP_ST_VNMP_UP || fip->state == FIP_ST_VNMP_CLAIM)
+		fcoe_ctlr_vn_add(fip, new);
+}
+
+/**
+ * fcoe_ctlr_vn_beacon() - handle received beacon.
+ * @fip: The FCoE controller that received the frame
+ * @new: newly-parsed remote port with frport from the Beacon
+ *
+ * Called with ctlr_mutex held.
+ */
+static void fcoe_ctlr_vn_beacon(struct fcoe_ctlr *fip,
+				struct fc_rport_priv *new)
+{
+	struct fc_lport *lport = fip->lp;
+	struct fc_rport_priv *rdata;
+	struct fcoe_rport *frport;
+
+	frport = fcoe_ctlr_rport(new);
+	if (frport->flags & FIP_FL_REC_OR_P2P) {
+		fcoe_ctlr_vn_send(fip, FIP_SC_VN_PROBE_REQ, fcoe_all_vn2vn, 0);
+		return;
+	}
+	mutex_lock(&lport->disc.disc_mutex);
+	rdata = lport->tt.rport_lookup(lport, new->ids.port_id);
+	if (rdata)
+		kref_get(&rdata->kref);
+	mutex_unlock(&lport->disc.disc_mutex);
+	if (rdata) {
+		if (rdata->ids.node_name == new->ids.node_name &&
+		    rdata->ids.port_name == new->ids.port_name) {
+			frport = fcoe_ctlr_rport(rdata);
+			if (!frport->time && fip->state == FIP_ST_VNMP_UP)
+				lport->tt.rport_login(rdata);
+			frport->time = jiffies;
+		}
+		kref_put(&rdata->kref, lport->tt.rport_destroy);
+		return;
+	}
+	if (fip->state != FIP_ST_VNMP_UP)
+		return;
+
+	/*
+	 * Beacon from a new neighbor.
+	 * Send a claim notify if one hasn't been sent recently.
+	 * Don't add the neighbor yet.
+	 */
+	LIBFCOE_FIP_DBG(fip, "beacon from new rport %x. sending claim notify\n",
+			new->ids.port_id);
+	if (time_after(jiffies,
+		       fip->sol_time + msecs_to_jiffies(FIP_VN_ANN_WAIT)))
+		fcoe_ctlr_vn_send_claim(fip);
+}
+
+/**
+ * fcoe_ctlr_vn_age() - Check for VN_ports without recent beacons
+ * @fip: The FCoE controller
+ *
+ * Called with ctlr_mutex held.
+ * Called only in state FIP_ST_VNMP_UP.
+ * Returns the soonest time for next age-out or a time far in the future.
+ */
+static unsigned long fcoe_ctlr_vn_age(struct fcoe_ctlr *fip)
+{
+	struct fc_lport *lport = fip->lp;
+	struct fc_rport_priv *rdata;
+	struct fcoe_rport *frport;
+	unsigned long next_time;
+	unsigned long deadline;
+
+	next_time = jiffies + msecs_to_jiffies(FIP_VN_BEACON_INT * 10);
+	mutex_lock(&lport->disc.disc_mutex);
+	list_for_each_entry_rcu(rdata, &lport->disc.rports, peers) {
+		frport = fcoe_ctlr_rport(rdata);
+		if (!frport->time)
+			continue;
+		deadline = frport->time +
+			   msecs_to_jiffies(FIP_VN_BEACON_INT * 25 / 10);
+		if (time_after_eq(jiffies, deadline)) {
+			frport->time = 0;
+			LIBFCOE_FIP_DBG(fip,
+				"port %16.16llx fc_id %6.6x beacon expired\n",
+				rdata->ids.port_name, rdata->ids.port_id);
+			lport->tt.rport_logoff(rdata);
+		} else if (time_before(deadline, next_time))
+			next_time = deadline;
+	}
+	mutex_unlock(&lport->disc.disc_mutex);
+	return next_time;
+}
+
+/**
+ * fcoe_ctlr_vn_recv() - Receive a FIP frame
+ * @fip: The FCoE controller that received the frame
+ * @skb: The received FIP frame
+ *
+ * Returns non-zero if the frame is dropped.
+ * Always consumes the frame.
+ */
+static int fcoe_ctlr_vn_recv(struct fcoe_ctlr *fip, struct sk_buff *skb)
+{
+	struct fip_header *fiph;
+	enum fip_vn2vn_subcode sub;
+	union {
+		struct fc_rport_priv rdata;
+		struct fcoe_rport frport;
+	} buf;
+	int rc;
+
+	fiph = (struct fip_header *)skb->data;
+	sub = fiph->fip_subcode;
+
+	rc = fcoe_ctlr_vn_parse(fip, skb, &buf.rdata);
+	if (rc) {
+		LIBFCOE_FIP_DBG(fip, "vn_recv vn_parse error %d\n", rc);
+		goto drop;
+	}
+
+	mutex_lock(&fip->ctlr_mutex);
+	switch (sub) {
+	case FIP_SC_VN_PROBE_REQ:
+		fcoe_ctlr_vn_probe_req(fip, &buf.rdata);
+		break;
+	case FIP_SC_VN_PROBE_REP:
+		fcoe_ctlr_vn_probe_reply(fip, &buf.rdata);
+		break;
+	case FIP_SC_VN_CLAIM_NOTIFY:
+		fcoe_ctlr_vn_claim_notify(fip, &buf.rdata);
+		break;
+	case FIP_SC_VN_CLAIM_REP:
+		fcoe_ctlr_vn_claim_resp(fip, &buf.rdata);
+		break;
+	case FIP_SC_VN_BEACON:
+		fcoe_ctlr_vn_beacon(fip, &buf.rdata);
+		break;
+	default:
+		LIBFCOE_FIP_DBG(fip, "vn_recv unknown subcode %d\n", sub);
+		rc = -1;
+		break;
+	}
+	mutex_unlock(&fip->ctlr_mutex);
+drop:
+	kfree_skb(skb);
+	return rc;
+}
+
+/**
+ * fcoe_ctlr_disc_recv - discovery receive handler for VN2VN mode.
+ * @fip: The FCoE controller
+ *
+ * This should never be called since we don't see RSCNs or other
+ * fabric-generated ELSes.
+ */
+static void fcoe_ctlr_disc_recv(struct fc_seq *seq, struct fc_frame *fp,
+				struct fc_lport *lport)
+{
+	struct fc_seq_els_data rjt_data;
+
+	rjt_data.fp = NULL;
+	rjt_data.reason = ELS_RJT_UNSUP;
+	rjt_data.explan = ELS_EXPL_NONE;
+	lport->tt.seq_els_rsp_send(seq, ELS_LS_RJT, &rjt_data);
+	fc_frame_free(fp);
+}
+
+/**
+ * fcoe_ctlr_disc_recv - start discovery for VN2VN mode.
+ * @fip: The FCoE controller
+ *
+ * This sets a flag indicating that remote ports should be created
+ * and started for the peers we discover.  We use the disc_callback
+ * pointer as that flag.  Peers already discovered are created here.
+ *
+ * The lport lock is held during this call. The callback must be done
+ * later, without holding either the lport or discovery locks.
+ * The fcoe_ctlr lock may also be held during this call.
+ */
+static void fcoe_ctlr_disc_start(void (*callback)(struct fc_lport *,
+						  enum fc_disc_event),
+				 struct fc_lport *lport)
+{
+	struct fc_disc *disc = &lport->disc;
+	struct fcoe_ctlr *fip = disc->priv;
+
+	mutex_lock(&disc->disc_mutex);
+	disc->disc_callback = callback;
+	disc->disc_id = (disc->disc_id + 2) | 1;
+	disc->pending = 1;
+	schedule_work(&fip->timer_work);
+	mutex_unlock(&disc->disc_mutex);
+}
+
+/**
+ * fcoe_ctlr_vn_disc() - report FIP VN_port discovery results after claim state.
+ * @fip: The FCoE controller
+ *
+ * Starts the FLOGI and PLOGI login process to each discovered rport for which
+ * we've received at least one beacon.
+ * Performs the discovery complete callback.
+ */
+static void fcoe_ctlr_vn_disc(struct fcoe_ctlr *fip)
+{
+	struct fc_lport *lport = fip->lp;
+	struct fc_disc *disc = &lport->disc;
+	struct fc_rport_priv *rdata;
+	struct fcoe_rport *frport;
+	void (*callback)(struct fc_lport *, enum fc_disc_event);
+
+	mutex_lock(&disc->disc_mutex);
+	callback = disc->pending ? disc->disc_callback : NULL;
+	disc->pending = 0;
+	list_for_each_entry_rcu(rdata, &disc->rports, peers) {
+		frport = fcoe_ctlr_rport(rdata);
+		if (frport->time)
+			lport->tt.rport_login(rdata);
+	}
+	mutex_unlock(&disc->disc_mutex);
+	if (callback)
+		callback(lport, DISC_EV_SUCCESS);
+}
+
+/**
+ * fcoe_ctlr_vn_timeout - timer work function for VN2VN mode.
+ * @fip: The FCoE controller
+ */
+static void fcoe_ctlr_vn_timeout(struct fcoe_ctlr *fip)
+{
+	unsigned long next_time;
+	u8 mac[ETH_ALEN];
+	u32 new_port_id = 0;
+
+	mutex_lock(&fip->ctlr_mutex);
+	switch (fip->state) {
+	case FIP_ST_VNMP_START:
+		fcoe_ctlr_set_state(fip, FIP_ST_VNMP_PROBE1);
+		fcoe_ctlr_vn_send(fip, FIP_SC_VN_PROBE_REQ, fcoe_all_vn2vn, 0);
+		next_time = jiffies + msecs_to_jiffies(FIP_VN_PROBE_WAIT);
+		break;
+	case FIP_ST_VNMP_PROBE1:
+		fcoe_ctlr_set_state(fip, FIP_ST_VNMP_PROBE2);
+		fcoe_ctlr_vn_send(fip, FIP_SC_VN_PROBE_REQ, fcoe_all_vn2vn, 0);
+		next_time = jiffies + msecs_to_jiffies(FIP_VN_ANN_WAIT);
+		break;
+	case FIP_ST_VNMP_PROBE2:
+		fcoe_ctlr_set_state(fip, FIP_ST_VNMP_CLAIM);
+		new_port_id = fip->port_id;
+		hton24(mac, FIP_VN_FC_MAP);
+		hton24(mac + 3, new_port_id);
+		fip->update_mac(fip->lp, mac);
+		fcoe_ctlr_vn_send_claim(fip);
+		next_time = jiffies + msecs_to_jiffies(FIP_VN_ANN_WAIT);
+		break;
+	case FIP_ST_VNMP_CLAIM:
+		/*
+		 * This may be invoked either by starting discovery so don't
+		 * go to the next state unless it's been long enough.
+		 */
+		next_time = fip->sol_time + msecs_to_jiffies(FIP_VN_ANN_WAIT);
+		if (time_after_eq(jiffies, next_time)) {
+			fcoe_ctlr_set_state(fip, FIP_ST_VNMP_UP);
+			fcoe_ctlr_vn_send(fip, FIP_SC_VN_BEACON,
+					  fcoe_all_vn2vn, 0);
+			next_time = jiffies + msecs_to_jiffies(FIP_VN_ANN_WAIT);
+			fip->port_ka_time = next_time;
+		}
+		fcoe_ctlr_vn_disc(fip);
+		break;
+	case FIP_ST_VNMP_UP:
+		next_time = fcoe_ctlr_vn_age(fip);
+		if (time_after_eq(jiffies, fip->port_ka_time)) {
+			fcoe_ctlr_vn_send(fip, FIP_SC_VN_BEACON,
+					  fcoe_all_vn2vn, 0);
+			fip->port_ka_time = jiffies +
+				 msecs_to_jiffies(FIP_VN_BEACON_INT +
+					(random32() % FIP_VN_BEACON_FUZZ));
+		}
+		if (time_before(fip->port_ka_time, next_time))
+			next_time = fip->port_ka_time;
+		break;
+	case FIP_ST_LINK_WAIT:
+		goto unlock;
+	default:
+		WARN(1, "unexpected state %d", fip->state);
+		goto unlock;
+	}
+	mod_timer(&fip->timer, next_time);
+unlock:
+	mutex_unlock(&fip->ctlr_mutex);
+
+	/* If port ID is new, notify local port after dropping ctlr_mutex */
+	if (new_port_id)
+		fc_lport_set_local_id(fip->lp, new_port_id);
+}
+
 /**
  * fcoe_libfc_config() - Sets up libfc related properties for local port
  * @lp: The local port to configure libfc for
+ * @fip: The FCoE controller in use by the local port
  * @tt: The libfc function template
+ * @init_fcp: If non-zero, the FCP portion of libfc should be initialized
  *
  * Returns : 0 for success
  */
-int fcoe_libfc_config(struct fc_lport *lport,
-		      struct libfc_function_template *tt)
+int fcoe_libfc_config(struct fc_lport *lport, struct fcoe_ctlr *fip,
+		      const struct libfc_function_template *tt, int init_fcp)
 {
 	/* Set the function pointers set by the LLDD */
 	memcpy(&lport->tt, tt, sizeof(*tt));
-	if (fc_fcp_init(lport))
+	if (init_fcp && fc_fcp_init(lport))
 		return -ENOMEM;
 	fc_exch_init(lport);
 	fc_elsct_init(lport);
 	fc_lport_init(lport);
+	if (fip->mode == FIP_MODE_VN2VN)
+		lport->rport_priv_size = sizeof(struct fcoe_rport);
 	fc_rport_init(lport);
-	fc_disc_init(lport);
-
+	if (fip->mode == FIP_MODE_VN2VN) {
+		lport->point_to_multipoint = 1;
+		lport->tt.disc_recv_req = fcoe_ctlr_disc_recv;
+		lport->tt.disc_start = fcoe_ctlr_disc_start;
+		lport->tt.disc_stop = fcoe_ctlr_disc_stop;
+		lport->tt.disc_stop_final = fcoe_ctlr_disc_stop_final;
+		mutex_init(&lport->disc.disc_mutex);
+		INIT_LIST_HEAD(&lport->disc.rports);
+		lport->disc.priv = fip;
+	} else {
+		fc_disc_init(lport);
+	}
 	return 0;
 }
 EXPORT_SYMBOL_GPL(fcoe_libfc_config);
-
diff --git a/drivers/scsi/fnic/fnic_main.c b/drivers/scsi/fnic/fnic_main.c
index d0fe1c3345b..9eb7a9ebcca 100644
--- a/drivers/scsi/fnic/fnic_main.c
+++ b/drivers/scsi/fnic/fnic_main.c
@@ -673,7 +673,6 @@ static int __devinit fnic_probe(struct pci_dev *pdev,
 	/* Start local port initiatialization */
 
 	lp->link_up = 0;
-	lp->tt = fnic_transport_template;
 
 	lp->max_retry_count = fnic->config.flogi_retries;
 	lp->max_rport_retry_count = fnic->config.plogi_retries;
@@ -689,11 +688,7 @@ static int __devinit fnic_probe(struct pci_dev *pdev,
 	fc_set_wwnn(lp, fnic->config.node_wwn);
 	fc_set_wwpn(lp, fnic->config.port_wwn);
 
-	fc_lport_init(lp);
-	fc_exch_init(lp);
-	fc_elsct_init(lp);
-	fc_rport_init(lp);
-	fc_disc_init(lp);
+	fcoe_libfc_config(lp, &fnic->ctlr, &fnic_transport_template, 0);
 
 	if (!fc_exch_mgr_alloc(lp, FC_CLASS_3, FCPIO_HOST_EXCH_RANGE_START,
 			       FCPIO_HOST_EXCH_RANGE_END, NULL)) {
diff --git a/include/scsi/libfcoe.h b/include/scsi/libfcoe.h
index 1a84a3182da..06f1b5a8ed1 100644
--- a/include/scsi/libfcoe.h
+++ b/include/scsi/libfcoe.h
@@ -26,6 +26,7 @@
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>
 #include <linux/workqueue.h>
+#include <linux/random.h>
 #include <scsi/fc/fc_fcoe.h>
 #include <scsi/libfc.h>
 
@@ -37,6 +38,7 @@
 #define FCOE_CTLR_START_DELAY	2000	/* mS after first adv. to choose FCF */
 #define FCOE_CTRL_SOL_TOV	2000	/* min. solicitation interval (mS) */
 #define FCOE_CTLR_FCF_LIMIT	20	/* max. number of FCF entries */
+#define FCOE_CTLR_VN2VN_LOGIN_LIMIT 3	/* max. VN2VN rport login retries */
 
 /**
  * enum fip_state - internal state of FCoE controller.
@@ -45,6 +47,11 @@
  * @FIP_ST_AUTO:	determining whether to use FIP or non-FIP mode.
  * @FIP_ST_NON_FIP:	non-FIP mode selected.
  * @FIP_ST_ENABLED:	FIP mode selected.
+ * @FIP_ST_VNMP_START:	VN2VN multipath mode start, wait
+ * @FIP_ST_VNMP_PROBE1:	VN2VN sent first probe, listening
+ * @FIP_ST_VNMP_PROBE2:	VN2VN sent second probe, listening
+ * @FIP_ST_VNMP_CLAIM:	VN2VN sent claim, waiting for responses
+ * @FIP_ST_VNMP_UP:	VN2VN multipath mode operation
  */
 enum fip_state {
 	FIP_ST_DISABLED,
@@ -52,6 +59,11 @@ enum fip_state {
 	FIP_ST_AUTO,
 	FIP_ST_NON_FIP,
 	FIP_ST_ENABLED,
+	FIP_ST_VNMP_START,
+	FIP_ST_VNMP_PROBE1,
+	FIP_ST_VNMP_PROBE2,
+	FIP_ST_VNMP_CLAIM,
+	FIP_ST_VNMP_UP,
 };
 
 /*
@@ -62,6 +74,7 @@ enum fip_state {
 #define FIP_MODE_AUTO		FIP_ST_AUTO
 #define FIP_MODE_NON_FIP	FIP_ST_NON_FIP
 #define FIP_MODE_FABRIC		FIP_ST_ENABLED
+#define FIP_MODE_VN2VN		FIP_ST_VNMP_START
 
 /**
  * struct fcoe_ctlr - FCoE Controller and FIP state
@@ -79,11 +92,14 @@ enum fip_state {
  * @timer_work:	   &work_struct for doing keep-alives and resets.
  * @recv_work:	   &work_struct for receiving FIP frames.
  * @fip_recv_list: list of received FIP frames.
+ * @rnd_state:	   state for pseudo-random number generator.
+ * @port_id:	   proposed or selected local-port ID.
  * @user_mfs:	   configured maximum FC frame size, including FC header.
  * @flogi_oxid:    exchange ID of most recent fabric login.
  * @flogi_count:   number of FLOGI attempts in AUTO mode.
  * @map_dest:	   use the FC_MAP mode for destination MAC addresses.
  * @spma:	   supports SPMA server-provided MACs mode
+ * @probe_tries:   number of FC_IDs probed
  * @dest_addr:	   MAC address of the selected FC forwarder.
  * @ctl_src_addr:  the native MAC address of our local port.
  * @send:	   LLD-supplied function to handle sending FIP Ethernet frames
@@ -110,11 +126,16 @@ struct fcoe_ctlr {
 	struct work_struct timer_work;
 	struct work_struct recv_work;
 	struct sk_buff_head fip_recv_list;
+
+	struct rnd_state rnd_state;
+	u32 port_id;
+
 	u16 user_mfs;
 	u16 flogi_oxid;
 	u8 flogi_count;
 	u8 map_dest;
 	u8 spma;
+	u8 probe_tries;
 	u8 dest_addr[ETH_ALEN];
 	u8 ctl_src_addr[ETH_ALEN];
 
@@ -160,6 +181,24 @@ struct fcoe_fcf {
 	u8 fd_flags:1;
 };
 
+/**
+ * struct fcoe_rport - VN2VN remote port
+ * @time:	time of create or last beacon packet received from node
+ * @fcoe_len:	max FCoE frame size, not including VLAN or Ethernet headers
+ * @flags:	flags from probe or claim
+ * @login_count: number of unsuccessful rport logins to this port
+ * @enode_mac:	E_Node control MAC address
+ * @vn_mac:	VN_Node assigned MAC address for data
+ */
+struct fcoe_rport {
+	unsigned long time;
+	u16 fcoe_len;
+	u16 flags;
+	u8 login_count;
+	u8 enode_mac[ETH_ALEN];
+	u8 vn_mac[ETH_ALEN];
+};
+
 /* FIP API functions */
 void fcoe_ctlr_init(struct fcoe_ctlr *, enum fip_state);
 void fcoe_ctlr_destroy(struct fcoe_ctlr *);
@@ -172,7 +211,8 @@ int fcoe_ctlr_recv_flogi(struct fcoe_ctlr *, struct fc_lport *,
 
 /* libfcoe funcs */
 u64 fcoe_wwn_from_mac(unsigned char mac[], unsigned int, unsigned int);
-int fcoe_libfc_config(struct fc_lport *, struct libfc_function_template *);
+int fcoe_libfc_config(struct fc_lport *, struct fcoe_ctlr *,
+		      const struct libfc_function_template *, int init_fcp);
 
 /**
  * is_fip_mode() - returns true if FIP mode selected.
-- 
cgit v1.2.3-70-g09d2


From 079ecd8cfe95dfd28b74f3a00d66fdbcdfc8c611 Mon Sep 17 00:00:00 2001
From: Joe Eykholt <jeykholt@cisco.com>
Date: Tue, 20 Jul 2010 15:20:51 -0700
Subject: [SCSI] libfc: eliminate rport LOGO state

The LOGO state hasn't been used in a while, except in a brief
transition to DELETE state while holding the rport mutex.
All port LOGO responses have been ignored as well as any timeout
if we don't get a response.

So this patch just removes LOGO state and simplifies the response handler.

Signed-off-by: Joe Eykholt <jeykholt@cisco.com>
Signed-off-by: Robert Love <robert.w.love@intel.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 drivers/scsi/libfc/fc_rport.c | 88 +++++++++++--------------------------------
 include/scsi/libfc.h          |  2 -
 2 files changed, 22 insertions(+), 68 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/libfc/fc_rport.c b/drivers/scsi/libfc/fc_rport.c
index 4d6adf29b4f..c06d63e4a00 100644
--- a/drivers/scsi/libfc/fc_rport.c
+++ b/drivers/scsi/libfc/fc_rport.c
@@ -89,7 +89,6 @@ static const char *fc_rport_state_names[] = {
 	[RPORT_ST_PRLI] = "PRLI",
 	[RPORT_ST_RTV] = "RTV",
 	[RPORT_ST_READY] = "Ready",
-	[RPORT_ST_LOGO] = "LOGO",
 	[RPORT_ST_ADISC] = "ADISC",
 	[RPORT_ST_DELETE] = "Delete",
 };
@@ -514,9 +513,6 @@ static void fc_rport_timeout(struct work_struct *work)
 	case RPORT_ST_RTV:
 		fc_rport_enter_rtv(rdata);
 		break;
-	case RPORT_ST_LOGO:
-		fc_rport_enter_logo(rdata);
-		break;
 	case RPORT_ST_ADISC:
 		fc_rport_enter_adisc(rdata);
 		break;
@@ -547,7 +543,6 @@ static void fc_rport_error(struct fc_rport_priv *rdata, struct fc_frame *fp)
 	switch (rdata->rp_state) {
 	case RPORT_ST_FLOGI:
 	case RPORT_ST_PLOGI:
-	case RPORT_ST_LOGO:
 		rdata->flags &= ~FC_RP_STARTED;
 		fc_rport_enter_delete(rdata, RPORT_EV_FAILED);
 		break;
@@ -791,7 +786,6 @@ static void fc_rport_recv_flogi_req(struct fc_lport *lport,
 
 	switch (rdata->rp_state) {
 	case RPORT_ST_INIT:
-	case RPORT_ST_LOGO:
 	case RPORT_ST_DELETE:
 		mutex_unlock(&rdata->rp_mutex);
 		rjt_data.reason = ELS_RJT_FIP;
@@ -1036,52 +1030,6 @@ err:
 	kref_put(&rdata->kref, rdata->local_port->tt.rport_destroy);
 }
 
-/**
- * fc_rport_logo_resp() - Handler for logout (LOGO) responses
- * @sp:	       The sequence the LOGO was on
- * @fp:	       The LOGO response frame
- * @rdata_arg: The remote port that sent the LOGO response
- *
- * Locking Note: This function will be called without the rport lock
- * held, but it will lock, call an _enter_* function or fc_rport_error
- * and then unlock the rport.
- */
-static void fc_rport_logo_resp(struct fc_seq *sp, struct fc_frame *fp,
-			       void *rdata_arg)
-{
-	struct fc_rport_priv *rdata = rdata_arg;
-	u8 op;
-
-	mutex_lock(&rdata->rp_mutex);
-
-	FC_RPORT_DBG(rdata, "Received a LOGO %s\n", fc_els_resp_type(fp));
-
-	if (rdata->rp_state != RPORT_ST_LOGO) {
-		FC_RPORT_DBG(rdata, "Received a LOGO response, but in state "
-			     "%s\n", fc_rport_state(rdata));
-		if (IS_ERR(fp))
-			goto err;
-		goto out;
-	}
-
-	if (IS_ERR(fp)) {
-		fc_rport_error_retry(rdata, fp);
-		goto err;
-	}
-
-	op = fc_frame_payload_op(fp);
-	if (op != ELS_LS_ACC)
-		FC_RPORT_DBG(rdata, "Bad ELS response op %x for LOGO command\n",
-			     op);
-	fc_rport_enter_delete(rdata, RPORT_EV_LOGO);
-
-out:
-	fc_frame_free(fp);
-err:
-	mutex_unlock(&rdata->rp_mutex);
-	kref_put(&rdata->kref, rdata->local_port->tt.rport_destroy);
-}
-
 /**
  * fc_rport_enter_prli() - Send Process Login (PRLI) request
  * @rdata: The remote port to send the PRLI request to
@@ -1223,6 +1171,24 @@ static void fc_rport_enter_rtv(struct fc_rport_priv *rdata)
 		kref_get(&rdata->kref);
 }
 
+/**
+ * fc_rport_logo_resp() - Handler for logout (LOGO) responses
+ * @sp:	       The sequence the LOGO was on
+ * @fp:	       The LOGO response frame
+ * @lport_arg: The local port
+ */
+static void fc_rport_logo_resp(struct fc_seq *sp, struct fc_frame *fp,
+			       void *lport_arg)
+{
+	struct fc_lport *lport = lport_arg;
+
+	FC_RPORT_ID_DBG(lport, fc_seq_exch(sp)->did,
+			"Received a LOGO %s\n", fc_els_resp_type(fp));
+	if (IS_ERR(fp))
+		return;
+	fc_frame_free(fp);
+}
+
 /**
  * fc_rport_enter_logo() - Send a logout (LOGO) request
  * @rdata: The remote port to send the LOGO request to
@@ -1235,23 +1201,14 @@ static void fc_rport_enter_logo(struct fc_rport_priv *rdata)
 	struct fc_lport *lport = rdata->local_port;
 	struct fc_frame *fp;
 
-	FC_RPORT_DBG(rdata, "Port entered LOGO state from %s state\n",
+	FC_RPORT_DBG(rdata, "Port sending LOGO from %s state\n",
 		     fc_rport_state(rdata));
 
-	fc_rport_state_enter(rdata, RPORT_ST_LOGO);
-
 	fp = fc_frame_alloc(lport, sizeof(struct fc_els_logo));
-	if (!fp) {
-		fc_rport_error_retry(rdata, fp);
+	if (!fp)
 		return;
-	}
-
-	if (!lport->tt.elsct_send(lport, rdata->ids.port_id, fp, ELS_LOGO,
-				  fc_rport_logo_resp, rdata,
-				  2 * lport->r_a_tov))
-		fc_rport_error_retry(rdata, NULL);
-	else
-		kref_get(&rdata->kref);
+	(void)lport->tt.elsct_send(lport, rdata->ids.port_id, fp, ELS_LOGO,
+				   fc_rport_logo_resp, lport, 0);
 }
 
 /**
@@ -1670,7 +1627,6 @@ static void fc_rport_recv_plogi_req(struct fc_lport *lport,
 		break;
 	case RPORT_ST_FLOGI:
 	case RPORT_ST_DELETE:
-	case RPORT_ST_LOGO:
 		FC_RPORT_DBG(rdata, "Received PLOGI in state %s - send busy\n",
 			     fc_rport_state(rdata));
 		mutex_unlock(&rdata->rp_mutex);
diff --git a/include/scsi/libfc.h b/include/scsi/libfc.h
index 8d297f9a0a4..e6f07fba432 100644
--- a/include/scsi/libfc.h
+++ b/include/scsi/libfc.h
@@ -103,7 +103,6 @@ enum fc_disc_event {
  * @RPORT_ST_PRLI:    Waiting for PRLI completion
  * @RPORT_ST_RTV:     Waiting for RTV completion
  * @RPORT_ST_READY:   Ready for use
- * @RPORT_ST_LOGO:    Remote port logout (LOGO) sent
  * @RPORT_ST_ADISC:   Discover Address sent
  * @RPORT_ST_DELETE:  Remote port being deleted
 */
@@ -115,7 +114,6 @@ enum fc_rport_state {
 	RPORT_ST_PRLI,
 	RPORT_ST_RTV,
 	RPORT_ST_READY,
-	RPORT_ST_LOGO,
 	RPORT_ST_ADISC,
 	RPORT_ST_DELETE,
 };
-- 
cgit v1.2.3-70-g09d2


From 251748a99e631a2c46edcf9e519cfc60fae8153d Mon Sep 17 00:00:00 2001
From: Joe Eykholt <jeykholt@cisco.com>
Date: Tue, 20 Jul 2010 15:20:56 -0700
Subject: [SCSI] libfc: add fc_frame_sid() and fc_frame_did() functions

To pave the way for eliminating exchanges from incoming requests,
add simple inline fc_frame_sid() and fc_frame_did() functions
which get the FC_IDs from the frame header.  This can be almost
as efficient as getting them from the sequence/exchange.

Move ntohll, htonll, ntoh24 and hton24 to <scsi/fc_frame.h>
since we need them there and that's included by <scsi/libfc.h>

Signed-off-by: Joe Eykholt <jeykholt@cisco.com>
Signed-off-by: Robert Love <robert.w.love@intel.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 drivers/scsi/libfc/fc_lport.c | 12 ++++--------
 drivers/scsi/libfc/fc_rport.c | 26 ++++++-------------------
 include/scsi/fc_frame.h       | 45 ++++++++++++++++++++++++++++++++++++++++++-
 include/scsi/libfc.h          | 18 -----------------
 4 files changed, 54 insertions(+), 47 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/libfc/fc_lport.c b/drivers/scsi/libfc/fc_lport.c
index ec9850c4617..be3c2cee829 100644
--- a/drivers/scsi/libfc/fc_lport.c
+++ b/drivers/scsi/libfc/fc_lport.c
@@ -800,7 +800,6 @@ static void fc_lport_recv_flogi_req(struct fc_seq *sp_in,
 				    struct fc_lport *lport)
 {
 	struct fc_frame *fp;
-	struct fc_frame_header *fh;
 	struct fc_seq *sp;
 	struct fc_exch *ep;
 	struct fc_els_flogi *flp;
@@ -813,8 +812,7 @@ static void fc_lport_recv_flogi_req(struct fc_seq *sp_in,
 	FC_LPORT_DBG(lport, "Received FLOGI request while in state %s\n",
 		     fc_lport_state(lport));
 
-	fh = fc_frame_header_get(rx_fp);
-	remote_fid = ntoh24(fh->fh_s_id);
+	remote_fid = fc_frame_sid(rx_fp);
 	flp = fc_frame_payload_get(rx_fp, sizeof(*flp));
 	if (!flp)
 		goto out;
@@ -910,7 +908,7 @@ static void fc_lport_recv_req(struct fc_lport *lport, struct fc_seq *sp,
 				recv = fc_lport_recv_flogi_req;
 			break;
 		case ELS_LOGO:
-			if (ntoh24(fh->fh_s_id) == FC_FID_FLOGI)
+			if (fc_frame_sid(fp) == FC_FID_FLOGI)
 				recv = fc_lport_recv_logo_req;
 			break;
 		case ELS_RSCN:
@@ -1468,7 +1466,6 @@ void fc_lport_flogi_resp(struct fc_seq *sp, struct fc_frame *fp,
 			 void *lp_arg)
 {
 	struct fc_lport *lport = lp_arg;
-	struct fc_frame_header *fh;
 	struct fc_els_flogi *flp;
 	u32 did;
 	u16 csp_flags;
@@ -1496,8 +1493,7 @@ void fc_lport_flogi_resp(struct fc_seq *sp, struct fc_frame *fp,
 		goto err;
 	}
 
-	fh = fc_frame_header_get(fp);
-	did = ntoh24(fh->fh_d_id);
+	did = fc_frame_did(fp);
 	if (fc_frame_payload_op(fp) == ELS_LS_ACC && did != 0) {
 		flp = fc_frame_payload_get(fp, sizeof(*flp));
 		if (flp) {
@@ -1523,7 +1519,7 @@ void fc_lport_flogi_resp(struct fc_seq *sp, struct fc_frame *fp,
 				       "Port (%6.6x) entered "
 				       "point-to-point mode\n",
 				       lport->host->host_no, did);
-				fc_lport_ptp_setup(lport, ntoh24(fh->fh_s_id),
+				fc_lport_ptp_setup(lport, fc_frame_sid(fp),
 						   get_unaligned_be64(
 							   &flp->fl_wwpn),
 						   get_unaligned_be64(
diff --git a/drivers/scsi/libfc/fc_rport.c b/drivers/scsi/libfc/fc_rport.c
index c06d63e4a00..12349316682 100644
--- a/drivers/scsi/libfc/fc_rport.c
+++ b/drivers/scsi/libfc/fc_rport.c
@@ -747,13 +747,11 @@ static void fc_rport_recv_flogi_req(struct fc_lport *lport,
 	struct fc_rport_priv *rdata;
 	struct fc_frame *fp = rx_fp;
 	struct fc_exch *ep;
-	struct fc_frame_header *fh;
 	struct fc_seq_els_data rjt_data;
 	u32 sid, f_ctl;
 
 	rjt_data.fp = NULL;
-	fh = fc_frame_header_get(fp);
-	sid = ntoh24(fh->fh_s_id);
+	sid = fc_frame_sid(fp);
 
 	FC_RPORT_ID_DBG(lport, sid, "Received FLOGI request\n");
 
@@ -1430,17 +1428,14 @@ static void fc_rport_recv_els_req(struct fc_lport *lport,
 				  struct fc_seq *sp, struct fc_frame *fp)
 {
 	struct fc_rport_priv *rdata;
-	struct fc_frame_header *fh;
 	struct fc_seq_els_data els_data;
 
 	els_data.fp = NULL;
 	els_data.reason = ELS_RJT_UNAB;
 	els_data.explan = ELS_EXPL_PLOGI_REQD;
 
-	fh = fc_frame_header_get(fp);
-
 	mutex_lock(&lport->disc.disc_mutex);
-	rdata = lport->tt.rport_lookup(lport, ntoh24(fh->fh_s_id));
+	rdata = lport->tt.rport_lookup(lport, fc_frame_sid(fp));
 	if (!rdata) {
 		mutex_unlock(&lport->disc.disc_mutex);
 		goto reject;
@@ -1555,14 +1550,12 @@ static void fc_rport_recv_plogi_req(struct fc_lport *lport,
 	struct fc_rport_priv *rdata;
 	struct fc_frame *fp = rx_fp;
 	struct fc_exch *ep;
-	struct fc_frame_header *fh;
 	struct fc_els_flogi *pl;
 	struct fc_seq_els_data rjt_data;
 	u32 sid, f_ctl;
 
 	rjt_data.fp = NULL;
-	fh = fc_frame_header_get(fp);
-	sid = ntoh24(fh->fh_s_id);
+	sid = fc_frame_sid(fp);
 
 	FC_RPORT_ID_DBG(lport, sid, "Received PLOGI request\n");
 
@@ -1682,7 +1675,6 @@ static void fc_rport_recv_prli_req(struct fc_rport_priv *rdata,
 	struct fc_lport *lport = rdata->local_port;
 	struct fc_exch *ep;
 	struct fc_frame *fp;
-	struct fc_frame_header *fh;
 	struct {
 		struct fc_els_prli prli;
 		struct fc_els_spp spp;
@@ -1698,12 +1690,10 @@ static void fc_rport_recv_prli_req(struct fc_rport_priv *rdata,
 	u32 roles = FC_RPORT_ROLE_UNKNOWN;
 
 	rjt_data.fp = NULL;
-	fh = fc_frame_header_get(rx_fp);
-
 	FC_RPORT_DBG(rdata, "Received PRLI request while in state %s\n",
 		     fc_rport_state(rdata));
 
-	len = fr_len(rx_fp) - sizeof(*fh);
+	len = fr_len(rx_fp) - sizeof(struct fc_frame_header);
 	pp = fc_frame_payload_get(rx_fp, sizeof(*pp));
 	if (!pp)
 		goto reject_len;
@@ -1817,7 +1807,6 @@ static void fc_rport_recv_prlo_req(struct fc_rport_priv *rdata,
 				   struct fc_frame *rx_fp)
 {
 	struct fc_lport *lport = rdata->local_port;
-	struct fc_frame_header *fh;
 	struct fc_exch *ep;
 	struct fc_frame *fp;
 	struct {
@@ -1832,12 +1821,11 @@ static void fc_rport_recv_prlo_req(struct fc_rport_priv *rdata,
 	struct fc_seq_els_data rjt_data;
 
 	rjt_data.fp = NULL;
-	fh = fc_frame_header_get(rx_fp);
 
 	FC_RPORT_DBG(rdata, "Received PRLO request while in state %s\n",
 		     fc_rport_state(rdata));
 
-	len = fr_len(rx_fp) - sizeof(*fh);
+	len = fr_len(rx_fp) - sizeof(struct fc_frame_header);
 	pp = fc_frame_payload_get(rx_fp, sizeof(*pp));
 	if (!pp)
 		goto reject_len;
@@ -1901,14 +1889,12 @@ static void fc_rport_recv_logo_req(struct fc_lport *lport,
 				   struct fc_seq *sp,
 				   struct fc_frame *fp)
 {
-	struct fc_frame_header *fh;
 	struct fc_rport_priv *rdata;
 	u32 sid;
 
 	lport->tt.seq_els_rsp_send(sp, ELS_LS_ACC, NULL);
 
-	fh = fc_frame_header_get(fp);
-	sid = ntoh24(fh->fh_s_id);
+	sid = fc_frame_sid(fp);
 
 	mutex_lock(&lport->disc.disc_mutex);
 	rdata = lport->tt.rport_lookup(lport, sid);
diff --git a/include/scsi/fc_frame.h b/include/scsi/fc_frame.h
index 29dd97d5b53..4ad02041b66 100644
--- a/include/scsi/fc_frame.h
+++ b/include/scsi/fc_frame.h
@@ -30,6 +30,23 @@
 
 #include <linux/if_ether.h>
 
+/* some helpful macros */
+
+#define ntohll(x) be64_to_cpu(x)
+#define htonll(x) cpu_to_be64(x)
+
+static inline u32 ntoh24(const u8 *p)
+{
+	return (p[0] << 16) | (p[1] << 8) | p[2];
+}
+
+static inline void hton24(u8 *p, u32 v)
+{
+	p[0] = (v >> 16) & 0xff;
+	p[1] = (v >> 8) & 0xff;
+	p[2] = v & 0xff;
+}
+
 /*
  * The fc_frame interface is used to pass frame data between functions.
  * The frame includes the data buffer, length, and SOF / EOF delimiter types.
@@ -137,6 +154,16 @@ static inline int fc_frame_is_linear(struct fc_frame *fp)
 	return !skb_is_nonlinear(fp_skb(fp));
 }
 
+/*
+ * Get frame header from message in fc_frame structure.
+ * This version doesn't do a length check.
+ */
+static inline
+struct fc_frame_header *__fc_frame_header_get(const struct fc_frame *fp)
+{
+	return (struct fc_frame_header *)fr_hdr(fp);
+}
+
 /*
  * Get frame header from message in fc_frame structure.
  * This hides a cast and provides a place to add some checking.
@@ -145,7 +172,23 @@ static inline
 struct fc_frame_header *fc_frame_header_get(const struct fc_frame *fp)
 {
 	WARN_ON(fr_len(fp) < sizeof(struct fc_frame_header));
-	return (struct fc_frame_header *) fr_hdr(fp);
+	return __fc_frame_header_get(fp);
+}
+
+/*
+ * Get source FC_ID (S_ID) from frame header in message.
+ */
+static inline u32 fc_frame_sid(const struct fc_frame *fp)
+{
+	return ntoh24(__fc_frame_header_get(fp)->fh_s_id);
+}
+
+/*
+ * Get destination FC_ID (D_ID) from frame header in message.
+ */
+static inline u32 fc_frame_did(const struct fc_frame *fp)
+{
+	return ntoh24(__fc_frame_header_get(fp)->fh_d_id);
 }
 
 /*
diff --git a/include/scsi/libfc.h b/include/scsi/libfc.h
index e6f07fba432..f1ce793f33b 100644
--- a/include/scsi/libfc.h
+++ b/include/scsi/libfc.h
@@ -42,24 +42,6 @@
 #define	FC_EX_TIMEOUT	1	/* Exchange timeout */
 #define	FC_EX_CLOSED	2	/* Exchange closed */
 
-/* some helpful macros */
-
-#define ntohll(x) be64_to_cpu(x)
-#define htonll(x) cpu_to_be64(x)
-
-
-static inline u32 ntoh24(const u8 *p)
-{
-	return (p[0] << 16) | (p[1] << 8) | p[2];
-}
-
-static inline void hton24(u8 *p, u32 v)
-{
-	p[0] = (v >> 16) & 0xff;
-	p[1] = (v >> 8) & 0xff;
-	p[2] = v & 0xff;
-}
-
 /**
  * enum fc_lport_state - Local port states
  * @LPORT_ST_DISABLED: Disabled
-- 
cgit v1.2.3-70-g09d2


From 24f089e2f2c800f88039e9d536d558ec6e349fad Mon Sep 17 00:00:00 2001
From: Joe Eykholt <jeykholt@cisco.com>
Date: Tue, 20 Jul 2010 15:21:01 -0700
Subject: [SCSI] libfc: add fc_fill_reply_hdr() and fc_fill_hdr()

Add functions to fill in an FC header given a request header.
These reduces code lines in fc_lport and fc_rport and works
without an exchange/sequence assigned.

fc_fill_reply_hdr() fills a header for a final reply frame.

fc_fill_hdr() which is similar but allows specifying the
f_ctl parameter.

Add defines for F_CTL values FC_FCTL_REQ and FC_FCTL_RESP.
These can be used for most request and response sequences.

v2 of patch adds a line to copy the frame encapsulation
info from the received frame.

Signed-off-by: Joe Eykholt <jeykholt@cisco.com>
Signed-off-by: Robert Love <robert.w.love@intel.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 drivers/scsi/libfc/fc_elsct.c |  2 +-
 drivers/scsi/libfc/fc_fcp.c   |  6 ++--
 drivers/scsi/libfc/fc_libfc.c | 78 +++++++++++++++++++++++++++++++++++++++++++
 drivers/scsi/libfc/fc_lport.c | 39 +++++++---------------
 drivers/scsi/libfc/fc_rport.c | 64 +++++++++--------------------------
 include/scsi/fc_encode.h      |  7 ++++
 include/scsi/libfc.h          |  4 +++
 7 files changed, 121 insertions(+), 79 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/libfc/fc_elsct.c b/drivers/scsi/libfc/fc_elsct.c
index e9412b710fa..9b25969e2ad 100644
--- a/drivers/scsi/libfc/fc_elsct.c
+++ b/drivers/scsi/libfc/fc_elsct.c
@@ -64,7 +64,7 @@ struct fc_seq *fc_elsct_send(struct fc_lport *lport, u32 did,
 	}
 
 	fc_fill_fc_hdr(fp, r_ctl, did, lport->port_id, fh_type,
-		       FC_FC_FIRST_SEQ | FC_FC_END_SEQ | FC_FC_SEQ_INIT, 0);
+		       FC_FCTL_REQ, 0);
 
 	return lport->tt.exch_seq_send(lport, fp, resp, NULL, arg, timer_msec);
 }
diff --git a/drivers/scsi/libfc/fc_fcp.c b/drivers/scsi/libfc/fc_fcp.c
index 61a12970bd1..eac4d09314e 100644
--- a/drivers/scsi/libfc/fc_fcp.c
+++ b/drivers/scsi/libfc/fc_fcp.c
@@ -1108,7 +1108,7 @@ static int fc_fcp_cmd_send(struct fc_lport *lport, struct fc_fcp_pkt *fsp,
 
 	fc_fill_fc_hdr(fp, FC_RCTL_DD_UNSOL_CMD, rport->port_id,
 		       rpriv->local_port->port_id, FC_TYPE_FCP,
-		       FC_FC_FIRST_SEQ | FC_FC_END_SEQ | FC_FC_SEQ_INIT, 0);
+		       FC_FCTL_REQ, 0);
 
 	seq = lport->tt.exch_seq_send(lport, fp, resp, fc_fcp_pkt_destroy,
 				      fsp, 0);
@@ -1381,7 +1381,7 @@ static void fc_fcp_rec(struct fc_fcp_pkt *fsp)
 	fr_seq(fp) = fsp->seq_ptr;
 	fc_fill_fc_hdr(fp, FC_RCTL_ELS_REQ, rport->port_id,
 		       rpriv->local_port->port_id, FC_TYPE_ELS,
-		       FC_FC_FIRST_SEQ | FC_FC_END_SEQ | FC_FC_SEQ_INIT, 0);
+		       FC_FCTL_REQ, 0);
 	if (lport->tt.elsct_send(lport, rport->port_id, fp, ELS_REC,
 				 fc_fcp_rec_resp, fsp,
 				 jiffies_to_msecs(FC_SCSI_REC_TOV))) {
@@ -1639,7 +1639,7 @@ static void fc_fcp_srr(struct fc_fcp_pkt *fsp, enum fc_rctl r_ctl, u32 offset)
 
 	fc_fill_fc_hdr(fp, FC_RCTL_ELS4_REQ, rport->port_id,
 		       rpriv->local_port->port_id, FC_TYPE_FCP,
-		       FC_FC_FIRST_SEQ | FC_FC_END_SEQ | FC_FC_SEQ_INIT, 0);
+		       FC_FCTL_REQ, 0);
 
 	seq = lport->tt.exch_seq_send(lport, fp, fc_fcp_srr_resp, NULL,
 				      fsp, jiffies_to_msecs(FC_SCSI_REC_TOV));
diff --git a/drivers/scsi/libfc/fc_libfc.c b/drivers/scsi/libfc/fc_libfc.c
index 39f4b6ab04b..6a48c28e442 100644
--- a/drivers/scsi/libfc/fc_libfc.c
+++ b/drivers/scsi/libfc/fc_libfc.c
@@ -23,6 +23,7 @@
 #include <linux/crc32.h>
 
 #include <scsi/libfc.h>
+#include <scsi/fc_encode.h>
 
 #include "fc_libfc.h"
 
@@ -132,3 +133,80 @@ u32 fc_copy_buffer_to_sglist(void *buf, size_t len,
 	}
 	return copy_len;
 }
+
+/**
+ * fc_fill_hdr() -  fill FC header fields based on request
+ * @fp: reply frame containing header to be filled in
+ * @in_fp: request frame containing header to use in filling in reply
+ * @r_ctl: R_CTL value for header
+ * @f_ctl: F_CTL value for header, with 0 pad
+ * @seq_cnt: sequence count for the header, ignored if frame has a sequence
+ * @parm_offset: parameter / offset value
+ */
+void fc_fill_hdr(struct fc_frame *fp, const struct fc_frame *in_fp,
+		 enum fc_rctl r_ctl, u32 f_ctl, u16 seq_cnt, u32 parm_offset)
+{
+	struct fc_frame_header *fh;
+	struct fc_frame_header *in_fh;
+	struct fc_seq *sp;
+	u32 fill;
+
+	fh = __fc_frame_header_get(fp);
+	in_fh = __fc_frame_header_get(in_fp);
+
+	if (f_ctl & FC_FC_END_SEQ) {
+		fill = -fr_len(fp) & 3;
+		if (fill) {
+			/* TODO, this may be a problem with fragmented skb */
+			memset(skb_put(fp_skb(fp), fill), 0, fill);
+			f_ctl |= fill;
+		}
+		fr_eof(fp) = FC_EOF_T;
+	} else {
+		WARN_ON(fr_len(fp) % 4 != 0);	/* no pad to non last frame */
+		fr_eof(fp) = FC_EOF_N;
+	}
+
+	fh->fh_r_ctl = r_ctl;
+	memcpy(fh->fh_d_id, in_fh->fh_s_id, sizeof(fh->fh_d_id));
+	memcpy(fh->fh_s_id, in_fh->fh_d_id, sizeof(fh->fh_s_id));
+	fh->fh_type = in_fh->fh_type;
+	hton24(fh->fh_f_ctl, f_ctl);
+	fh->fh_ox_id = in_fh->fh_ox_id;
+	fh->fh_rx_id = in_fh->fh_rx_id;
+	fh->fh_cs_ctl = 0;
+	fh->fh_df_ctl = 0;
+	fh->fh_parm_offset = htonl(parm_offset);
+
+	sp = fr_seq(in_fp);
+	if (sp) {
+		fr_seq(fp) = sp;
+		fh->fh_seq_id = sp->id;
+		seq_cnt = sp->cnt;
+	} else {
+		fh->fh_seq_id = 0;
+	}
+	fh->fh_seq_cnt = ntohs(seq_cnt);
+	fr_sof(fp) = seq_cnt ? FC_SOF_N3 : FC_SOF_I3;
+	fr_encaps(fp) = fr_encaps(in_fp);
+}
+EXPORT_SYMBOL(fc_fill_hdr);
+
+/**
+ * fc_fill_reply_hdr() -  fill FC reply header fields based on request
+ * @fp: reply frame containing header to be filled in
+ * @in_fp: request frame containing header to use in filling in reply
+ * @r_ctl: R_CTL value for reply
+ * @parm_offset: parameter / offset value
+ */
+void fc_fill_reply_hdr(struct fc_frame *fp, const struct fc_frame *in_fp,
+		       enum fc_rctl r_ctl, u32 parm_offset)
+{
+	struct fc_seq *sp;
+
+	sp = fr_seq(in_fp);
+	if (sp)
+		fr_seq(fp) = fr_dev(in_fp)->tt.seq_start_next(sp);
+	fc_fill_hdr(fp, in_fp, r_ctl, FC_FCTL_RESP, 0, parm_offset);
+}
+EXPORT_SYMBOL(fc_fill_reply_hdr);
diff --git a/drivers/scsi/libfc/fc_lport.c b/drivers/scsi/libfc/fc_lport.c
index be3c2cee829..e50a6606d4b 100644
--- a/drivers/scsi/libfc/fc_lport.c
+++ b/drivers/scsi/libfc/fc_lport.c
@@ -405,11 +405,9 @@ static void fc_lport_recv_echo_req(struct fc_seq *sp, struct fc_frame *in_fp,
 				   struct fc_lport *lport)
 {
 	struct fc_frame *fp;
-	struct fc_exch *ep = fc_seq_exch(sp);
 	unsigned int len;
 	void *pp;
 	void *dp;
-	u32 f_ctl;
 
 	FC_LPORT_DBG(lport, "Received ECHO request while in state %s\n",
 		     fc_lport_state(lport));
@@ -425,11 +423,8 @@ static void fc_lport_recv_echo_req(struct fc_seq *sp, struct fc_frame *in_fp,
 		dp = fc_frame_payload_get(fp, len);
 		memcpy(dp, pp, len);
 		*((__be32 *)dp) = htonl(ELS_LS_ACC << 24);
-		sp = lport->tt.seq_start_next(sp);
-		f_ctl = FC_FC_EX_CTX | FC_FC_LAST_SEQ | FC_FC_END_SEQ;
-		fc_fill_fc_hdr(fp, FC_RCTL_ELS_REP, ep->did, ep->sid,
-			       FC_TYPE_ELS, f_ctl, 0);
-		lport->tt.seq_send(lport, sp, fp);
+		fc_fill_reply_hdr(fp, in_fp, FC_RCTL_ELS_REP, 0);
+		lport->tt.frame_send(lport, fp);
 	}
 	fc_frame_free(in_fp);
 }
@@ -447,7 +442,6 @@ static void fc_lport_recv_rnid_req(struct fc_seq *sp, struct fc_frame *in_fp,
 				   struct fc_lport *lport)
 {
 	struct fc_frame *fp;
-	struct fc_exch *ep = fc_seq_exch(sp);
 	struct fc_els_rnid *req;
 	struct {
 		struct fc_els_rnid_resp rnid;
@@ -457,7 +451,6 @@ static void fc_lport_recv_rnid_req(struct fc_seq *sp, struct fc_frame *in_fp,
 	struct fc_seq_els_data rjt_data;
 	u8 fmt;
 	size_t len;
-	u32 f_ctl;
 
 	FC_LPORT_DBG(lport, "Received RNID request while in state %s\n",
 		     fc_lport_state(lport));
@@ -490,12 +483,8 @@ static void fc_lport_recv_rnid_req(struct fc_seq *sp, struct fc_frame *in_fp,
 				memcpy(&rp->gen, &lport->rnid_gen,
 				       sizeof(rp->gen));
 			}
-			sp = lport->tt.seq_start_next(sp);
-			f_ctl = FC_FC_EX_CTX | FC_FC_LAST_SEQ;
-			f_ctl |= FC_FC_END_SEQ | FC_FC_SEQ_INIT;
-			fc_fill_fc_hdr(fp, FC_RCTL_ELS_REP, ep->did, ep->sid,
-				       FC_TYPE_ELS, f_ctl, 0);
-			lport->tt.seq_send(lport, sp, fp);
+			fc_fill_reply_hdr(fp, in_fp, FC_RCTL_ELS_REP, 0);
+			lport->tt.frame_send(lport, fp);
 		}
 	}
 	fc_frame_free(in_fp);
@@ -800,14 +789,13 @@ static void fc_lport_recv_flogi_req(struct fc_seq *sp_in,
 				    struct fc_lport *lport)
 {
 	struct fc_frame *fp;
+	struct fc_frame_header *fh;
 	struct fc_seq *sp;
-	struct fc_exch *ep;
 	struct fc_els_flogi *flp;
 	struct fc_els_flogi *new_flp;
 	u64 remote_wwpn;
 	u32 remote_fid;
 	u32 local_fid;
-	u32 f_ctl;
 
 	FC_LPORT_DBG(lport, "Received FLOGI request while in state %s\n",
 		     fc_lport_state(lport));
@@ -843,7 +831,6 @@ static void fc_lport_recv_flogi_req(struct fc_seq *sp_in,
 
 	fp = fc_frame_alloc(lport, sizeof(*flp));
 	if (fp) {
-		sp = lport->tt.seq_start_next(fr_seq(rx_fp));
 		new_flp = fc_frame_payload_get(fp, sizeof(*flp));
 		fc_lport_flogi_fill(lport, new_flp, ELS_FLOGI);
 		new_flp->fl_cmd = (u8) ELS_LS_ACC;
@@ -852,11 +839,11 @@ static void fc_lport_recv_flogi_req(struct fc_seq *sp_in,
 		 * Send the response.  If this fails, the originator should
 		 * repeat the sequence.
 		 */
-		f_ctl = FC_FC_EX_CTX | FC_FC_LAST_SEQ | FC_FC_END_SEQ;
-		ep = fc_seq_exch(sp);
-		fc_fill_fc_hdr(fp, FC_RCTL_ELS_REP, remote_fid, local_fid,
-			       FC_TYPE_ELS, f_ctl, 0);
-		lport->tt.seq_send(lport, sp, fp);
+		fc_fill_reply_hdr(fp, rx_fp, FC_RCTL_ELS_REP, 0);
+		fh = fc_frame_header_get(fp);
+		hton24(fh->fh_s_id, local_fid);
+		hton24(fh->fh_d_id, remote_fid);
+		lport->tt.frame_send(lport, fp);
 
 	} else {
 		fc_lport_error(lport, fp);
@@ -1731,8 +1718,7 @@ static int fc_lport_els_request(struct fc_bsg_job *job,
 	hton24(fh->fh_d_id, did);
 	hton24(fh->fh_s_id, lport->port_id);
 	fh->fh_type = FC_TYPE_ELS;
-	hton24(fh->fh_f_ctl, FC_FC_FIRST_SEQ |
-	       FC_FC_END_SEQ | FC_FC_SEQ_INIT);
+	hton24(fh->fh_f_ctl, FC_FCTL_REQ);
 	fh->fh_cs_ctl = 0;
 	fh->fh_df_ctl = 0;
 	fh->fh_parm_offset = 0;
@@ -1791,8 +1777,7 @@ static int fc_lport_ct_request(struct fc_bsg_job *job,
 	hton24(fh->fh_d_id, did);
 	hton24(fh->fh_s_id, lport->port_id);
 	fh->fh_type = FC_TYPE_CT;
-	hton24(fh->fh_f_ctl, FC_FC_FIRST_SEQ |
-	       FC_FC_END_SEQ | FC_FC_SEQ_INIT);
+	hton24(fh->fh_f_ctl, FC_FCTL_REQ);
 	fh->fh_cs_ctl = 0;
 	fh->fh_df_ctl = 0;
 	fh->fh_parm_offset = 0;
diff --git a/drivers/scsi/libfc/fc_rport.c b/drivers/scsi/libfc/fc_rport.c
index 12349316682..59879512321 100644
--- a/drivers/scsi/libfc/fc_rport.c
+++ b/drivers/scsi/libfc/fc_rport.c
@@ -746,9 +746,8 @@ static void fc_rport_recv_flogi_req(struct fc_lport *lport,
 	struct fc_els_flogi *flp;
 	struct fc_rport_priv *rdata;
 	struct fc_frame *fp = rx_fp;
-	struct fc_exch *ep;
 	struct fc_seq_els_data rjt_data;
-	u32 sid, f_ctl;
+	u32 sid;
 
 	rjt_data.fp = NULL;
 	sid = fc_frame_sid(fp);
@@ -813,7 +812,6 @@ static void fc_rport_recv_flogi_req(struct fc_lport *lport,
 		rjt_data.explan = ELS_EXPL_NONE;
 		goto reject;
 	}
-	fc_frame_free(rx_fp);
 
 	fp = fc_frame_alloc(lport, sizeof(*flp));
 	if (!fp)
@@ -824,11 +822,8 @@ static void fc_rport_recv_flogi_req(struct fc_lport *lport,
 	flp = fc_frame_payload_get(fp, sizeof(*flp));
 	flp->fl_cmd = ELS_LS_ACC;
 
-	f_ctl = FC_FC_EX_CTX | FC_FC_LAST_SEQ | FC_FC_END_SEQ | FC_FC_SEQ_INIT;
-	ep = fc_seq_exch(sp);
-	fc_fill_fc_hdr(fp, FC_RCTL_ELS_REP, ep->did, ep->sid,
-		       FC_TYPE_ELS, f_ctl, 0);
-	lport->tt.seq_send(lport, sp, fp);
+	fc_fill_reply_hdr(fp, rx_fp, FC_RCTL_ELS_REP, 0);
+	lport->tt.frame_send(lport, fp);
 
 	if (rdata->ids.port_name < lport->wwpn)
 		fc_rport_enter_plogi(rdata);
@@ -837,12 +832,13 @@ static void fc_rport_recv_flogi_req(struct fc_lport *lport,
 out:
 	mutex_unlock(&rdata->rp_mutex);
 	mutex_unlock(&disc->disc_mutex);
+	fc_frame_free(rx_fp);
 	return;
 
 reject:
 	mutex_unlock(&disc->disc_mutex);
 	lport->tt.seq_els_rsp_send(sp, ELS_LS_RJT, &rjt_data);
-	fc_frame_free(fp);
+	fc_frame_free(rx_fp);
 }
 
 /**
@@ -1310,10 +1306,8 @@ static void fc_rport_recv_adisc_req(struct fc_rport_priv *rdata,
 {
 	struct fc_lport *lport = rdata->local_port;
 	struct fc_frame *fp;
-	struct fc_exch *ep = fc_seq_exch(sp);
 	struct fc_els_adisc *adisc;
 	struct fc_seq_els_data rjt_data;
-	u32 f_ctl;
 
 	FC_RPORT_DBG(rdata, "Received ADISC request\n");
 
@@ -1332,11 +1326,8 @@ static void fc_rport_recv_adisc_req(struct fc_rport_priv *rdata,
 	fc_adisc_fill(lport, fp);
 	adisc = fc_frame_payload_get(fp, sizeof(*adisc));
 	adisc->adisc_cmd = ELS_LS_ACC;
-	sp = lport->tt.seq_start_next(sp);
-	f_ctl = FC_FC_EX_CTX | FC_FC_LAST_SEQ | FC_FC_END_SEQ | FC_FC_SEQ_INIT;
-	fc_fill_fc_hdr(fp, FC_RCTL_ELS_REP, ep->did, ep->sid,
-		       FC_TYPE_ELS, f_ctl, 0);
-	lport->tt.seq_send(lport, sp, fp);
+	fc_fill_reply_hdr(fp, in_fp, FC_RCTL_ELS_REP, 0);
+	lport->tt.frame_send(lport, fp);
 drop:
 	fc_frame_free(in_fp);
 }
@@ -1356,13 +1347,11 @@ static void fc_rport_recv_rls_req(struct fc_rport_priv *rdata,
 {
 	struct fc_lport *lport = rdata->local_port;
 	struct fc_frame *fp;
-	struct fc_exch *ep = fc_seq_exch(sp);
 	struct fc_els_rls *rls;
 	struct fc_els_rls_resp *rsp;
 	struct fc_els_lesb *lesb;
 	struct fc_seq_els_data rjt_data;
 	struct fc_host_statistics *hst;
-	u32 f_ctl;
 
 	FC_RPORT_DBG(rdata, "Received RLS request while in state %s\n",
 		     fc_rport_state(rdata));
@@ -1399,11 +1388,8 @@ static void fc_rport_recv_rls_req(struct fc_rport_priv *rdata,
 		lesb->lesb_inv_crc = htonl(hst->invalid_crc_count);
 	}
 
-	sp = lport->tt.seq_start_next(sp);
-	f_ctl = FC_FC_EX_CTX | FC_FC_LAST_SEQ | FC_FC_END_SEQ;
-	fc_fill_fc_hdr(fp, FC_RCTL_ELS_REP, ep->did, ep->sid,
-		       FC_TYPE_ELS, f_ctl, 0);
-	lport->tt.seq_send(lport, sp, fp);
+	fc_fill_reply_hdr(fp, rx_fp, FC_RCTL_ELS_REP, 0);
+	lport->tt.frame_send(lport, fp);
 	goto out;
 
 out_rjt:
@@ -1549,10 +1535,9 @@ static void fc_rport_recv_plogi_req(struct fc_lport *lport,
 	struct fc_disc *disc;
 	struct fc_rport_priv *rdata;
 	struct fc_frame *fp = rx_fp;
-	struct fc_exch *ep;
 	struct fc_els_flogi *pl;
 	struct fc_seq_els_data rjt_data;
-	u32 sid, f_ctl;
+	u32 sid;
 
 	rjt_data.fp = NULL;
 	sid = fc_frame_sid(fp);
@@ -1632,27 +1617,21 @@ static void fc_rport_recv_plogi_req(struct fc_lport *lport,
 	 * Get session payload size from incoming PLOGI.
 	 */
 	rdata->maxframe_size = fc_plogi_get_maxframe(pl, lport->mfs);
-	fc_frame_free(rx_fp);
 
 	/*
 	 * Send LS_ACC.	 If this fails, the originator should retry.
 	 */
-	sp = lport->tt.seq_start_next(sp);
-	if (!sp)
-		goto out;
 	fp = fc_frame_alloc(lport, sizeof(*pl));
 	if (!fp)
 		goto out;
 
 	fc_plogi_fill(lport, fp, ELS_LS_ACC);
-	f_ctl = FC_FC_EX_CTX | FC_FC_LAST_SEQ | FC_FC_END_SEQ | FC_FC_SEQ_INIT;
-	ep = fc_seq_exch(sp);
-	fc_fill_fc_hdr(fp, FC_RCTL_ELS_REP, ep->did, ep->sid,
-		       FC_TYPE_ELS, f_ctl, 0);
-	lport->tt.seq_send(lport, sp, fp);
+	fc_fill_reply_hdr(fp, rx_fp, FC_RCTL_ELS_REP, 0);
+	lport->tt.frame_send(lport, fp);
 	fc_rport_enter_prli(rdata);
 out:
 	mutex_unlock(&rdata->rp_mutex);
+	fc_frame_free(rx_fp);
 	return;
 
 reject:
@@ -1673,7 +1652,6 @@ static void fc_rport_recv_prli_req(struct fc_rport_priv *rdata,
 				   struct fc_seq *sp, struct fc_frame *rx_fp)
 {
 	struct fc_lport *lport = rdata->local_port;
-	struct fc_exch *ep;
 	struct fc_frame *fp;
 	struct {
 		struct fc_els_prli prli;
@@ -1685,7 +1663,6 @@ static void fc_rport_recv_prli_req(struct fc_rport_priv *rdata,
 	unsigned int plen;
 	enum fc_els_spp_resp resp;
 	struct fc_seq_els_data rjt_data;
-	u32 f_ctl;
 	u32 fcp_parm;
 	u32 roles = FC_RPORT_ROLE_UNKNOWN;
 
@@ -1714,8 +1691,6 @@ static void fc_rport_recv_prli_req(struct fc_rport_priv *rdata,
 		rjt_data.explan = ELS_EXPL_INSUF_RES;
 		goto reject;
 	}
-	sp = lport->tt.seq_start_next(sp);
-	WARN_ON(!sp);
 	pp = fc_frame_payload_get(fp, len);
 	WARN_ON(!pp);
 	memset(pp, 0, len);
@@ -1768,12 +1743,8 @@ static void fc_rport_recv_prli_req(struct fc_rport_priv *rdata,
 	/*
 	 * Send LS_ACC.	 If this fails, the originator should retry.
 	 */
-	f_ctl = FC_FC_EX_CTX | FC_FC_LAST_SEQ;
-	f_ctl |= FC_FC_END_SEQ | FC_FC_SEQ_INIT;
-	ep = fc_seq_exch(sp);
-	fc_fill_fc_hdr(fp, FC_RCTL_ELS_REP, ep->did, ep->sid,
-		       FC_TYPE_ELS, f_ctl, 0);
-	lport->tt.seq_send(lport, sp, fp);
+	fc_fill_reply_hdr(fp, rx_fp, FC_RCTL_ELS_REP, 0);
+	lport->tt.frame_send(lport, fp);
 
 	switch (rdata->rp_state) {
 	case RPORT_ST_PRLI:
@@ -1817,7 +1788,6 @@ static void fc_rport_recv_prlo_req(struct fc_rport_priv *rdata,
 	struct fc_els_spp *spp;		/* response spp */
 	unsigned int len;
 	unsigned int plen;
-	u32 f_ctl;
 	struct fc_seq_els_data rjt_data;
 
 	rjt_data.fp = NULL;
@@ -1859,11 +1829,9 @@ static void fc_rport_recv_prlo_req(struct fc_rport_priv *rdata,
 
 	fc_rport_enter_delete(rdata, RPORT_EV_LOGO);
 
-	f_ctl = FC_FC_EX_CTX | FC_FC_LAST_SEQ;
-	f_ctl |= FC_FC_END_SEQ | FC_FC_SEQ_INIT;
 	ep = fc_seq_exch(sp);
 	fc_fill_fc_hdr(fp, FC_RCTL_ELS_REP, ep->did, ep->sid,
-		       FC_TYPE_ELS, f_ctl, 0);
+		       FC_TYPE_ELS, FC_FCTL_RESP, 0);
 	lport->tt.seq_send(lport, sp, fp);
 	goto drop;
 
diff --git a/include/scsi/fc_encode.h b/include/scsi/fc_encode.h
index 9b4867c9c2d..6d293c846a4 100644
--- a/include/scsi/fc_encode.h
+++ b/include/scsi/fc_encode.h
@@ -21,6 +21,13 @@
 #define _FC_ENCODE_H_
 #include <asm/unaligned.h>
 
+/*
+ * F_CTL values for simple requests and responses.
+ */
+#define FC_FCTL_REQ	(FC_FC_FIRST_SEQ | FC_FC_END_SEQ | FC_FC_SEQ_INIT)
+#define FC_FCTL_RESP	(FC_FC_EX_CTX | FC_FC_LAST_SEQ | \
+			FC_FC_END_SEQ | FC_FC_SEQ_INIT)
+
 struct fc_ns_rft {
 	struct fc_ns_fid fid;	/* port ID object */
 	struct fc_ns_fts fts;	/* FC4-types object */
diff --git a/include/scsi/libfc.h b/include/scsi/libfc.h
index f1ce793f33b..a6414ec6380 100644
--- a/include/scsi/libfc.h
+++ b/include/scsi/libfc.h
@@ -1027,6 +1027,10 @@ struct fc_seq *fc_elsct_send(struct fc_lport *, u32 did,
 				    void *arg, u32 timer_msec);
 void fc_lport_flogi_resp(struct fc_seq *, struct fc_frame *, void *);
 void fc_lport_logo_resp(struct fc_seq *, struct fc_frame *, void *);
+void fc_fill_reply_hdr(struct fc_frame *, const struct fc_frame *,
+		       enum fc_rctl, u32 parm_offset);
+void fc_fill_hdr(struct fc_frame *, const struct fc_frame *,
+		 enum fc_rctl, u32 f_ctl, u16 seq_cnt, u32 parm_offset);
 
 
 /*
-- 
cgit v1.2.3-70-g09d2


From 239e81048b7dcd27448db40c845f88ac7c68424e Mon Sep 17 00:00:00 2001
From: Joe Eykholt <jeykholt@cisco.com>
Date: Tue, 20 Jul 2010 15:21:07 -0700
Subject: [SCSI] libfc: add interface to allocate a sequence for incoming
 requests

For incoming ELS and FCP requests, we often don't require an
exchange and sequence, however, sometimes we do.  For those cases,
(primarily FCP requests for targets) add a function to set up
the exchange and sequence.

Signed-off-by: Joe Eykholt <jeykholt@cisco.com>
Signed-off-by: Robert Love <robert.w.love@intel.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 drivers/scsi/libfc/fc_exch.c | 25 +++++++++++++++++++++++++
 include/scsi/libfc.h         |  7 +++++++
 2 files changed, 32 insertions(+)

(limited to 'include')

diff --git a/drivers/scsi/libfc/fc_exch.c b/drivers/scsi/libfc/fc_exch.c
index 61eabd3ce43..027042a6de3 100644
--- a/drivers/scsi/libfc/fc_exch.c
+++ b/drivers/scsi/libfc/fc_exch.c
@@ -1230,6 +1230,28 @@ free:
 	fc_frame_free(rx_fp);
 }
 
+/**
+ * fc_seq_assign() - Assign exchange and sequence for incoming request
+ * @lport: The local port that received the request
+ * @fp:    The request frame
+ *
+ * On success, the sequence pointer will be returned and also in fr_seq(@fp).
+ */
+static struct fc_seq *fc_seq_assign(struct fc_lport *lport, struct fc_frame *fp)
+{
+	struct fc_exch_mgr_anchor *ema;
+
+	WARN_ON(lport != fr_dev(fp));
+	WARN_ON(fr_seq(fp));
+	fr_seq(fp) = NULL;
+
+	list_for_each_entry(ema, &lport->ema_list, ema_list)
+		if ((!ema->match || ema->match(fp)) &&
+		    fc_seq_lookup_recip(lport, ema->mp, fp) != FC_RJT_NONE)
+			break;
+	return fr_seq(fp);
+}
+
 /**
  * fc_exch_recv_req() - Handler for an incoming request where is other
  *			end is originating the sequence
@@ -2283,6 +2305,9 @@ int fc_exch_init(struct fc_lport *lport)
 	if (!lport->tt.seq_exch_abort)
 		lport->tt.seq_exch_abort = fc_seq_exch_abort;
 
+	if (!lport->tt.seq_assign)
+		lport->tt.seq_assign = fc_seq_assign;
+
 	return 0;
 }
 EXPORT_SYMBOL(fc_exch_init);
diff --git a/include/scsi/libfc.h b/include/scsi/libfc.h
index a6414ec6380..605f1d7861a 100644
--- a/include/scsi/libfc.h
+++ b/include/scsi/libfc.h
@@ -555,6 +555,13 @@ struct libfc_function_template {
 	 */
 	struct fc_seq *(*seq_start_next)(struct fc_seq *);
 
+	/*
+	 * Assign a sequence for an incoming request frame.
+	 *
+	 * STATUS: OPTIONAL
+	 */
+	struct fc_seq *(*seq_assign)(struct fc_lport *, struct fc_frame *);
+
 	/*
 	 * Reset an exchange manager, completing all sequences and exchanges.
 	 * If s_id is non-zero, reset only exchanges originating from that FID.
-- 
cgit v1.2.3-70-g09d2


From 922611569572d3c1aa0ed6491d21583fb3fcca22 Mon Sep 17 00:00:00 2001
From: Joe Eykholt <jeykholt@cisco.com>
Date: Tue, 20 Jul 2010 15:21:12 -0700
Subject: [SCSI] libfc: don't require a local exchange for incoming requests

Incoming requests shouldn't require a local exchange if we're
just going to reply with one or two frames and don't expect
anything further.  Don't allocate exchanges for such requests
until requested by the upper-layer protocol.

The sequence is always NULL for new requests, so remove
that as an argument to request handlers.

Also change the first argument to lport->tt.seq_els_rsp_send
from the sequence pointer to the received frame pointer, to
supply the exchange IDs and destination ID info.

Signed-off-by: Joe Eykholt <jeykholt@cisco.com>
Signed-off-by: Robert Love <robert.w.love@intel.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 drivers/scsi/fcoe/libfcoe.c   |   9 +-
 drivers/scsi/libfc/fc_disc.c  |  19 ++---
 drivers/scsi/libfc/fc_exch.c  | 188 ++++++++++++++++++++++--------------------
 drivers/scsi/libfc/fc_lport.c |  58 +++++--------
 drivers/scsi/libfc/fc_rport.c | 112 +++++++++----------------
 include/scsi/libfc.h          |  16 ++--
 6 files changed, 174 insertions(+), 228 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/fcoe/libfcoe.c b/drivers/scsi/fcoe/libfcoe.c
index 4de8ced1fee..2c265fe9ab3 100644
--- a/drivers/scsi/fcoe/libfcoe.c
+++ b/drivers/scsi/fcoe/libfcoe.c
@@ -2341,20 +2341,19 @@ drop:
 
 /**
  * fcoe_ctlr_disc_recv - discovery receive handler for VN2VN mode.
- * @fip: The FCoE controller
+ * @lport: The local port
+ * @fp: The received frame
  *
  * This should never be called since we don't see RSCNs or other
  * fabric-generated ELSes.
  */
-static void fcoe_ctlr_disc_recv(struct fc_seq *seq, struct fc_frame *fp,
-				struct fc_lport *lport)
+static void fcoe_ctlr_disc_recv(struct fc_lport *lport, struct fc_frame *fp)
 {
 	struct fc_seq_els_data rjt_data;
 
-	rjt_data.fp = NULL;
 	rjt_data.reason = ELS_RJT_UNSUP;
 	rjt_data.explan = ELS_EXPL_NONE;
-	lport->tt.seq_els_rsp_send(seq, ELS_LS_RJT, &rjt_data);
+	lport->tt.seq_els_rsp_send(fp, ELS_LS_RJT, &rjt_data);
 	fc_frame_free(fp);
 }
 
diff --git a/drivers/scsi/libfc/fc_disc.c b/drivers/scsi/libfc/fc_disc.c
index 04474556f2d..32f67c4b03f 100644
--- a/drivers/scsi/libfc/fc_disc.c
+++ b/drivers/scsi/libfc/fc_disc.c
@@ -75,15 +75,13 @@ void fc_disc_stop_rports(struct fc_disc *disc)
 
 /**
  * fc_disc_recv_rscn_req() - Handle Registered State Change Notification (RSCN)
- * @sp:	   The sequence of the RSCN exchange
+ * @disc:  The discovery object to which the RSCN applies
  * @fp:	   The RSCN frame
- * @lport: The local port that the request will be sent on
  *
  * Locking Note: This function expects that the disc_mutex is locked
  *		 before it is called.
  */
-static void fc_disc_recv_rscn_req(struct fc_seq *sp, struct fc_frame *fp,
-				  struct fc_disc *disc)
+static void fc_disc_recv_rscn_req(struct fc_disc *disc, struct fc_frame *fp)
 {
 	struct fc_lport *lport;
 	struct fc_els_rscn *rp;
@@ -151,7 +149,7 @@ static void fc_disc_recv_rscn_req(struct fc_seq *sp, struct fc_frame *fp,
 			break;
 		}
 	}
-	lport->tt.seq_els_rsp_send(sp, ELS_LS_ACC, NULL);
+	lport->tt.seq_els_rsp_send(fp, ELS_LS_ACC, NULL);
 
 	/*
 	 * If not doing a complete rediscovery, do GPN_ID on
@@ -177,25 +175,22 @@ static void fc_disc_recv_rscn_req(struct fc_seq *sp, struct fc_frame *fp,
 	return;
 reject:
 	FC_DISC_DBG(disc, "Received a bad RSCN frame\n");
-	rjt_data.fp = NULL;
 	rjt_data.reason = ELS_RJT_LOGIC;
 	rjt_data.explan = ELS_EXPL_NONE;
-	lport->tt.seq_els_rsp_send(sp, ELS_LS_RJT, &rjt_data);
+	lport->tt.seq_els_rsp_send(fp, ELS_LS_RJT, &rjt_data);
 	fc_frame_free(fp);
 }
 
 /**
  * fc_disc_recv_req() - Handle incoming requests
- * @sp:	   The sequence of the request exchange
- * @fp:	   The request frame
  * @lport: The local port receiving the request
+ * @fp:	   The request frame
  *
  * Locking Note: This function is called from the EM and will lock
  *		 the disc_mutex before calling the handler for the
  *		 request.
  */
-static void fc_disc_recv_req(struct fc_seq *sp, struct fc_frame *fp,
-			     struct fc_lport *lport)
+static void fc_disc_recv_req(struct fc_lport *lport, struct fc_frame *fp)
 {
 	u8 op;
 	struct fc_disc *disc = &lport->disc;
@@ -204,7 +199,7 @@ static void fc_disc_recv_req(struct fc_seq *sp, struct fc_frame *fp,
 	switch (op) {
 	case ELS_RSCN:
 		mutex_lock(&disc->disc_mutex);
-		fc_disc_recv_rscn_req(sp, fp, disc);
+		fc_disc_recv_rscn_req(disc, fp);
 		mutex_unlock(&disc->disc_mutex);
 		break;
 	default:
diff --git a/drivers/scsi/libfc/fc_exch.c b/drivers/scsi/libfc/fc_exch.c
index 027042a6de3..b8560ad8cf6 100644
--- a/drivers/scsi/libfc/fc_exch.c
+++ b/drivers/scsi/libfc/fc_exch.c
@@ -129,11 +129,11 @@ struct fc_exch_mgr_anchor {
 };
 
 static void fc_exch_rrq(struct fc_exch *);
-static void fc_seq_ls_acc(struct fc_seq *);
-static void fc_seq_ls_rjt(struct fc_seq *, enum fc_els_rjt_reason,
+static void fc_seq_ls_acc(struct fc_frame *);
+static void fc_seq_ls_rjt(struct fc_frame *, enum fc_els_rjt_reason,
 			  enum fc_els_rjt_explan);
-static void fc_exch_els_rec(struct fc_seq *, struct fc_frame *);
-static void fc_exch_els_rrq(struct fc_seq *, struct fc_frame *);
+static void fc_exch_els_rec(struct fc_frame *);
+static void fc_exch_els_rrq(struct fc_frame *);
 
 /*
  * Internal implementation notes.
@@ -1003,28 +1003,30 @@ static void fc_exch_set_addr(struct fc_exch *ep,
 /**
  * fc_seq_els_rsp_send() - Send an ELS response using infomation from
  *			   the existing sequence/exchange.
- * @sp:	      The sequence/exchange to get information from
+ * @fp:	      The received frame
  * @els_cmd:  The ELS command to be sent
  * @els_data: The ELS data to be sent
+ *
+ * The received frame is not freed.
  */
-static void fc_seq_els_rsp_send(struct fc_seq *sp, enum fc_els_cmd els_cmd,
+static void fc_seq_els_rsp_send(struct fc_frame *fp, enum fc_els_cmd els_cmd,
 				struct fc_seq_els_data *els_data)
 {
 	switch (els_cmd) {
 	case ELS_LS_RJT:
-		fc_seq_ls_rjt(sp, els_data->reason, els_data->explan);
+		fc_seq_ls_rjt(fp, els_data->reason, els_data->explan);
 		break;
 	case ELS_LS_ACC:
-		fc_seq_ls_acc(sp);
+		fc_seq_ls_acc(fp);
 		break;
 	case ELS_RRQ:
-		fc_exch_els_rrq(sp, els_data->fp);
+		fc_exch_els_rrq(fp);
 		break;
 	case ELS_REC:
-		fc_exch_els_rec(sp, els_data->fp);
+		fc_exch_els_rec(fp);
 		break;
 	default:
-		FC_EXCH_DBG(fc_seq_exch(sp), "Invalid ELS CMD:%x\n", els_cmd);
+		FC_LPORT_DBG(fr_dev(fp), "Invalid ELS CMD:%x\n", els_cmd);
 	}
 }
 
@@ -1253,11 +1255,13 @@ static struct fc_seq *fc_seq_assign(struct fc_lport *lport, struct fc_frame *fp)
 }
 
 /**
- * fc_exch_recv_req() - Handler for an incoming request where is other
- *			end is originating the sequence
+ * fc_exch_recv_req() - Handler for an incoming request
  * @lport: The local port that received the request
  * @mp:	   The EM that the exchange is on
  * @fp:	   The request frame
+ *
+ * This is used when the other end is originating the exchange
+ * and the sequence.
  */
 static void fc_exch_recv_req(struct fc_lport *lport, struct fc_exch_mgr *mp,
 			     struct fc_frame *fp)
@@ -1275,8 +1279,17 @@ static void fc_exch_recv_req(struct fc_lport *lport, struct fc_exch_mgr *mp,
 		fc_frame_free(fp);
 		return;
 	}
+	fr_dev(fp) = lport;
+
+	BUG_ON(fr_seq(fp));		/* XXX remove later */
+
+	/*
+	 * If the RX_ID is 0xffff, don't allocate an exchange.
+	 * The upper-level protocol may request one later, if needed.
+	 */
+	if (fh->fh_rx_id == htons(FC_XID_UNKNOWN))
+		return lport->tt.lport_recv(lport, fp);
 
-	fr_seq(fp) = NULL;
 	reject = fc_seq_lookup_recip(lport, mp, fp);
 	if (reject == FC_RJT_NONE) {
 		sp = fr_seq(fp);	/* sequence will be held */
@@ -1298,7 +1311,7 @@ static void fc_exch_recv_req(struct fc_lport *lport, struct fc_exch_mgr *mp,
 		if (ep->resp)
 			ep->resp(sp, fp, ep->arg);
 		else
-			lport->tt.lport_recv(lport, sp, fp);
+			lport->tt.lport_recv(lport, fp);
 		fc_exch_release(ep);	/* release from lookup */
 	} else {
 		FC_LPORT_DBG(lport, "exch/seq lookup failed: reject %x\n",
@@ -1566,53 +1579,55 @@ static void fc_exch_recv_bls(struct fc_exch_mgr *mp, struct fc_frame *fp)
 
 /**
  * fc_seq_ls_acc() - Accept sequence with LS_ACC
- * @req_sp: The request sequence
+ * @rx_fp: The received frame, not freed here.
  *
  * If this fails due to allocation or transmit congestion, assume the
  * originator will repeat the sequence.
  */
-static void fc_seq_ls_acc(struct fc_seq *req_sp)
+static void fc_seq_ls_acc(struct fc_frame *rx_fp)
 {
-	struct fc_seq *sp;
+	struct fc_lport *lport;
 	struct fc_els_ls_acc *acc;
 	struct fc_frame *fp;
 
-	sp = fc_seq_start_next(req_sp);
-	fp = fc_frame_alloc(fc_seq_exch(sp)->lp, sizeof(*acc));
-	if (fp) {
-		acc = fc_frame_payload_get(fp, sizeof(*acc));
-		memset(acc, 0, sizeof(*acc));
-		acc->la_cmd = ELS_LS_ACC;
-		fc_seq_send_last(sp, fp, FC_RCTL_ELS_REP, FC_TYPE_ELS);
-	}
+	lport = fr_dev(rx_fp);
+	fp = fc_frame_alloc(lport, sizeof(*acc));
+	if (!fp)
+		return;
+	acc = fc_frame_payload_get(fp, sizeof(*acc));
+	memset(acc, 0, sizeof(*acc));
+	acc->la_cmd = ELS_LS_ACC;
+	fc_fill_reply_hdr(fp, rx_fp, FC_RCTL_ELS_REP, 0);
+	lport->tt.frame_send(lport, fp);
 }
 
 /**
  * fc_seq_ls_rjt() - Reject a sequence with ELS LS_RJT
- * @req_sp: The request sequence
+ * @rx_fp: The received frame, not freed here.
  * @reason: The reason the sequence is being rejected
- * @explan: The explaination for the rejection
+ * @explan: The explanation for the rejection
  *
  * If this fails due to allocation or transmit congestion, assume the
  * originator will repeat the sequence.
  */
-static void fc_seq_ls_rjt(struct fc_seq *req_sp, enum fc_els_rjt_reason reason,
+static void fc_seq_ls_rjt(struct fc_frame *rx_fp, enum fc_els_rjt_reason reason,
 			  enum fc_els_rjt_explan explan)
 {
-	struct fc_seq *sp;
+	struct fc_lport *lport;
 	struct fc_els_ls_rjt *rjt;
 	struct fc_frame *fp;
 
-	sp = fc_seq_start_next(req_sp);
-	fp = fc_frame_alloc(fc_seq_exch(sp)->lp, sizeof(*rjt));
-	if (fp) {
-		rjt = fc_frame_payload_get(fp, sizeof(*rjt));
-		memset(rjt, 0, sizeof(*rjt));
-		rjt->er_cmd = ELS_LS_RJT;
-		rjt->er_reason = reason;
-		rjt->er_explan = explan;
-		fc_seq_send_last(sp, fp, FC_RCTL_ELS_REP, FC_TYPE_ELS);
-	}
+	lport = fr_dev(rx_fp);
+	fp = fc_frame_alloc(lport, sizeof(*rjt));
+	if (!fp)
+		return;
+	rjt = fc_frame_payload_get(fp, sizeof(*rjt));
+	memset(rjt, 0, sizeof(*rjt));
+	rjt->er_cmd = ELS_LS_RJT;
+	rjt->er_reason = reason;
+	rjt->er_explan = explan;
+	fc_fill_reply_hdr(fp, rx_fp, FC_RCTL_ELS_REP, 0);
+	lport->tt.frame_send(lport, fp);
 }
 
 /**
@@ -1714,18 +1729,34 @@ void fc_exch_mgr_reset(struct fc_lport *lport, u32 sid, u32 did)
 }
 EXPORT_SYMBOL(fc_exch_mgr_reset);
 
+/**
+ * fc_exch_lookup() - find an exchange
+ * @lport: The local port
+ * @xid: The exchange ID
+ *
+ * Returns exchange pointer with hold for caller, or NULL if not found.
+ */
+static struct fc_exch *fc_exch_lookup(struct fc_lport *lport, u32 xid)
+{
+	struct fc_exch_mgr_anchor *ema;
+
+	list_for_each_entry(ema, &lport->ema_list, ema_list)
+		if (ema->mp->min_xid <= xid && xid <= ema->mp->max_xid)
+			return fc_exch_find(ema->mp, xid);
+	return NULL;
+}
+
 /**
  * fc_exch_els_rec() - Handler for ELS REC (Read Exchange Concise) requests
- * @sp:	 The sequence the REC is on
- * @rfp: The REC frame
+ * @rfp: The REC frame, not freed here.
  *
  * Note that the requesting port may be different than the S_ID in the request.
  */
-static void fc_exch_els_rec(struct fc_seq *sp, struct fc_frame *rfp)
+static void fc_exch_els_rec(struct fc_frame *rfp)
 {
+	struct fc_lport *lport;
 	struct fc_frame *fp;
 	struct fc_exch *ep;
-	struct fc_exch_mgr *em;
 	struct fc_els_rec *rp;
 	struct fc_els_rec_acc *acc;
 	enum fc_els_rjt_reason reason = ELS_RJT_LOGIC;
@@ -1734,6 +1765,7 @@ static void fc_exch_els_rec(struct fc_seq *sp, struct fc_frame *rfp)
 	u16 rxid;
 	u16 oxid;
 
+	lport = fr_dev(rfp);
 	rp = fc_frame_payload_get(rfp, sizeof(*rp));
 	explan = ELS_EXPL_INV_LEN;
 	if (!rp)
@@ -1742,35 +1774,19 @@ static void fc_exch_els_rec(struct fc_seq *sp, struct fc_frame *rfp)
 	rxid = ntohs(rp->rec_rx_id);
 	oxid = ntohs(rp->rec_ox_id);
 
-	/*
-	 * Currently it's hard to find the local S_ID from the exchange
-	 * manager.  This will eventually be fixed, but for now it's easier
-	 * to lookup the subject exchange twice, once as if we were
-	 * the initiator, and then again if we weren't.
-	 */
-	em = fc_seq_exch(sp)->em;
-	ep = fc_exch_find(em, oxid);
+	ep = fc_exch_lookup(lport,
+			    sid == fc_host_port_id(lport->host) ? oxid : rxid);
 	explan = ELS_EXPL_OXID_RXID;
-	if (ep && ep->oid == sid) {
-		if (ep->rxid != FC_XID_UNKNOWN &&
-		    rxid != FC_XID_UNKNOWN &&
-		    ep->rxid != rxid)
-			goto rel;
-	} else {
-		if (ep)
-			fc_exch_release(ep);
-		ep = NULL;
-		if (rxid != FC_XID_UNKNOWN)
-			ep = fc_exch_find(em, rxid);
-		if (!ep)
-			goto reject;
-	}
-
-	fp = fc_frame_alloc(fc_seq_exch(sp)->lp, sizeof(*acc));
-	if (!fp) {
-		fc_exch_done(sp);
+	if (!ep)
+		goto reject;
+	if (ep->oid != sid || oxid != ep->oxid)
+		goto rel;
+	if (rxid != FC_XID_UNKNOWN && rxid != ep->rxid)
+		goto rel;
+	fp = fc_frame_alloc(lport, sizeof(*acc));
+	if (!fp)
 		goto out;
-	}
+
 	acc = fc_frame_payload_get(fp, sizeof(*acc));
 	memset(acc, 0, sizeof(*acc));
 	acc->reca_cmd = ELS_LS_ACC;
@@ -1785,18 +1801,16 @@ static void fc_exch_els_rec(struct fc_seq *sp, struct fc_frame *rfp)
 	acc->reca_e_stat = htonl(ep->esb_stat & (ESB_ST_RESP |
 						 ESB_ST_SEQ_INIT |
 						 ESB_ST_COMPLETE));
-	sp = fc_seq_start_next(sp);
-	fc_seq_send_last(sp, fp, FC_RCTL_ELS_REP, FC_TYPE_ELS);
+	fc_fill_reply_hdr(fp, rfp, FC_RCTL_ELS_REP, 0);
+	lport->tt.frame_send(lport, fp);
 out:
 	fc_exch_release(ep);
-	fc_frame_free(rfp);
 	return;
 
 rel:
 	fc_exch_release(ep);
 reject:
-	fc_seq_ls_rjt(sp, reason, explan);
-	fc_frame_free(rfp);
+	fc_seq_ls_rjt(rfp, reason, explan);
 }
 
 /**
@@ -1971,20 +1985,20 @@ retry:
 	spin_unlock_bh(&ep->ex_lock);
 }
 
-
 /**
  * fc_exch_els_rrq() - Handler for ELS RRQ (Reset Recovery Qualifier) requests
- * @sp: The sequence that the RRQ is on
- * @fp: The RRQ frame
+ * @fp: The RRQ frame, not freed here.
  */
-static void fc_exch_els_rrq(struct fc_seq *sp, struct fc_frame *fp)
+static void fc_exch_els_rrq(struct fc_frame *fp)
 {
+	struct fc_lport *lport;
 	struct fc_exch *ep = NULL;	/* request or subject exchange */
 	struct fc_els_rrq *rp;
 	u32 sid;
 	u16 xid;
 	enum fc_els_rjt_explan explan;
 
+	lport = fr_dev(fp);
 	rp = fc_frame_payload_get(fp, sizeof(*rp));
 	explan = ELS_EXPL_INV_LEN;
 	if (!rp)
@@ -1993,11 +2007,10 @@ static void fc_exch_els_rrq(struct fc_seq *sp, struct fc_frame *fp)
 	/*
 	 * lookup subject exchange.
 	 */
-	ep = fc_seq_exch(sp);
 	sid = ntoh24(rp->rrq_s_id);		/* subject source */
-	xid = ep->did == sid ? ntohs(rp->rrq_ox_id) : ntohs(rp->rrq_rx_id);
-	ep = fc_exch_find(ep->em, xid);
-
+	xid = fc_host_port_id(lport->host) == sid ?
+			ntohs(rp->rrq_ox_id) : ntohs(rp->rrq_rx_id);
+	ep = fc_exch_lookup(lport, xid);
 	explan = ELS_EXPL_OXID_RXID;
 	if (!ep)
 		goto reject;
@@ -2028,15 +2041,14 @@ static void fc_exch_els_rrq(struct fc_seq *sp, struct fc_frame *fp)
 	/*
 	 * Send LS_ACC.
 	 */
-	fc_seq_ls_acc(sp);
+	fc_seq_ls_acc(fp);
 	goto out;
 
 unlock_reject:
 	spin_unlock_bh(&ep->ex_lock);
 reject:
-	fc_seq_ls_rjt(sp, ELS_RJT_LOGIC, explan);
+	fc_seq_ls_rjt(fp, ELS_RJT_LOGIC, explan);
 out:
-	fc_frame_free(fp);
 	if (ep)
 		fc_exch_release(ep);	/* drop hold from fc_exch_find */
 }
@@ -2267,7 +2279,7 @@ void fc_exch_recv(struct fc_lport *lport, struct fc_frame *fp)
 			fc_exch_recv_seq_resp(ema->mp, fp);
 		else if (f_ctl & FC_FC_SEQ_CTX)
 			fc_exch_recv_resp(ema->mp, fp);
-		else
+		else	/* no EX_CTX and no SEQ_CTX */
 			fc_exch_recv_req(lport, ema->mp, fp);
 		break;
 	default:
diff --git a/drivers/scsi/libfc/fc_lport.c b/drivers/scsi/libfc/fc_lport.c
index e50a6606d4b..1998c03634d 100644
--- a/drivers/scsi/libfc/fc_lport.c
+++ b/drivers/scsi/libfc/fc_lport.c
@@ -375,34 +375,31 @@ static void fc_lport_add_fc4_type(struct fc_lport *lport, enum fc_fh_type type)
 
 /**
  * fc_lport_recv_rlir_req() - Handle received Registered Link Incident Report.
- * @sp:	   The sequence in the RLIR exchange
- * @fp:	   The RLIR request frame
  * @lport: Fibre Channel local port recieving the RLIR
+ * @fp:	   The RLIR request frame
  *
  * Locking Note: The lport lock is expected to be held before calling
  * this function.
  */
-static void fc_lport_recv_rlir_req(struct fc_seq *sp, struct fc_frame *fp,
-				   struct fc_lport *lport)
+static void fc_lport_recv_rlir_req(struct fc_lport *lport, struct fc_frame *fp)
 {
 	FC_LPORT_DBG(lport, "Received RLIR request while in state %s\n",
 		     fc_lport_state(lport));
 
-	lport->tt.seq_els_rsp_send(sp, ELS_LS_ACC, NULL);
+	lport->tt.seq_els_rsp_send(fp, ELS_LS_ACC, NULL);
 	fc_frame_free(fp);
 }
 
 /**
  * fc_lport_recv_echo_req() - Handle received ECHO request
- * @sp:	   The sequence in the ECHO exchange
- * @fp:	   ECHO request frame
  * @lport: The local port recieving the ECHO
+ * @fp:	   ECHO request frame
  *
  * Locking Note: The lport lock is expected to be held before calling
  * this function.
  */
-static void fc_lport_recv_echo_req(struct fc_seq *sp, struct fc_frame *in_fp,
-				   struct fc_lport *lport)
+static void fc_lport_recv_echo_req(struct fc_lport *lport,
+				   struct fc_frame *in_fp)
 {
 	struct fc_frame *fp;
 	unsigned int len;
@@ -431,15 +428,14 @@ static void fc_lport_recv_echo_req(struct fc_seq *sp, struct fc_frame *in_fp,
 
 /**
  * fc_lport_recv_rnid_req() - Handle received Request Node ID data request
- * @sp:	   The sequence in the RNID exchange
- * @fp:	   The RNID request frame
  * @lport: The local port recieving the RNID
+ * @fp:	   The RNID request frame
  *
  * Locking Note: The lport lock is expected to be held before calling
  * this function.
  */
-static void fc_lport_recv_rnid_req(struct fc_seq *sp, struct fc_frame *in_fp,
-				   struct fc_lport *lport)
+static void fc_lport_recv_rnid_req(struct fc_lport *lport,
+				   struct fc_frame *in_fp)
 {
 	struct fc_frame *fp;
 	struct fc_els_rnid *req;
@@ -457,10 +453,9 @@ static void fc_lport_recv_rnid_req(struct fc_seq *sp, struct fc_frame *in_fp,
 
 	req = fc_frame_payload_get(in_fp, sizeof(*req));
 	if (!req) {
-		rjt_data.fp = NULL;
 		rjt_data.reason = ELS_RJT_LOGIC;
 		rjt_data.explan = ELS_EXPL_NONE;
-		lport->tt.seq_els_rsp_send(sp, ELS_LS_RJT, &rjt_data);
+		lport->tt.seq_els_rsp_send(in_fp, ELS_LS_RJT, &rjt_data);
 	} else {
 		fmt = req->rnid_fmt;
 		len = sizeof(*rp);
@@ -492,17 +487,15 @@ static void fc_lport_recv_rnid_req(struct fc_seq *sp, struct fc_frame *in_fp,
 
 /**
  * fc_lport_recv_logo_req() - Handle received fabric LOGO request
- * @sp:	   The sequence in the LOGO exchange
- * @fp:	   The LOGO request frame
  * @lport: The local port recieving the LOGO
+ * @fp:	   The LOGO request frame
  *
  * Locking Note: The lport lock is exected to be held before calling
  * this function.
  */
-static void fc_lport_recv_logo_req(struct fc_seq *sp, struct fc_frame *fp,
-				   struct fc_lport *lport)
+static void fc_lport_recv_logo_req(struct fc_lport *lport, struct fc_frame *fp)
 {
-	lport->tt.seq_els_rsp_send(sp, ELS_LS_ACC, NULL);
+	lport->tt.seq_els_rsp_send(fp, ELS_LS_ACC, NULL);
 	fc_lport_enter_reset(lport);
 	fc_frame_free(fp);
 }
@@ -773,9 +766,8 @@ EXPORT_SYMBOL(fc_lport_set_local_id);
 
 /**
  * fc_lport_recv_flogi_req() - Receive a FLOGI request
- * @sp_in: The sequence the FLOGI is on
- * @rx_fp: The FLOGI frame
  * @lport: The local port that recieved the request
+ * @rx_fp: The FLOGI frame
  *
  * A received FLOGI request indicates a point-to-point connection.
  * Accept it with the common service parameters indicating our N port.
@@ -784,13 +776,11 @@ EXPORT_SYMBOL(fc_lport_set_local_id);
  * Locking Note: The lport lock is expected to be held before calling
  * this function.
  */
-static void fc_lport_recv_flogi_req(struct fc_seq *sp_in,
-				    struct fc_frame *rx_fp,
-				    struct fc_lport *lport)
+static void fc_lport_recv_flogi_req(struct fc_lport *lport,
+				    struct fc_frame *rx_fp)
 {
 	struct fc_frame *fp;
 	struct fc_frame_header *fh;
-	struct fc_seq *sp;
 	struct fc_els_flogi *flp;
 	struct fc_els_flogi *new_flp;
 	u64 remote_wwpn;
@@ -850,16 +840,13 @@ static void fc_lport_recv_flogi_req(struct fc_seq *sp_in,
 	}
 	fc_lport_ptp_setup(lport, remote_fid, remote_wwpn,
 			   get_unaligned_be64(&flp->fl_wwnn));
-
 out:
-	sp = fr_seq(rx_fp);
 	fc_frame_free(rx_fp);
 }
 
 /**
  * fc_lport_recv_req() - The generic lport request handler
  * @lport: The local port that received the request
- * @sp:	   The sequence the request is on
  * @fp:	   The request frame
  *
  * This function will see if the lport handles the request or
@@ -868,11 +855,10 @@ out:
  * Locking Note: This function should not be called with the lport
  *		 lock held becuase it will grab the lock.
  */
-static void fc_lport_recv_req(struct fc_lport *lport, struct fc_seq *sp,
-			      struct fc_frame *fp)
+static void fc_lport_recv_req(struct fc_lport *lport, struct fc_frame *fp)
 {
 	struct fc_frame_header *fh = fc_frame_header_get(fp);
-	void (*recv) (struct fc_seq *, struct fc_frame *, struct fc_lport *);
+	void (*recv)(struct fc_lport *, struct fc_frame *);
 
 	mutex_lock(&lport->lp_mutex);
 
@@ -912,19 +898,13 @@ static void fc_lport_recv_req(struct fc_lport *lport, struct fc_seq *sp,
 			break;
 		}
 
-		recv(sp, fp, lport);
+		recv(lport, fp);
 	} else {
 		FC_LPORT_DBG(lport, "dropping invalid frame (eof %x)\n",
 			     fr_eof(fp));
 		fc_frame_free(fp);
 	}
 	mutex_unlock(&lport->lp_mutex);
-
-	/*
-	 *  The common exch_done for all request may not be good
-	 *  if any request requires longer hold on exhange. XXX
-	 */
-	lport->tt.exch_done(sp);
 }
 
 /**
diff --git a/drivers/scsi/libfc/fc_rport.c b/drivers/scsi/libfc/fc_rport.c
index 59879512321..25479cc7f17 100644
--- a/drivers/scsi/libfc/fc_rport.c
+++ b/drivers/scsi/libfc/fc_rport.c
@@ -68,14 +68,10 @@ static void fc_rport_enter_ready(struct fc_rport_priv *);
 static void fc_rport_enter_logo(struct fc_rport_priv *);
 static void fc_rport_enter_adisc(struct fc_rport_priv *);
 
-static void fc_rport_recv_plogi_req(struct fc_lport *,
-				    struct fc_seq *, struct fc_frame *);
-static void fc_rport_recv_prli_req(struct fc_rport_priv *,
-				   struct fc_seq *, struct fc_frame *);
-static void fc_rport_recv_prlo_req(struct fc_rport_priv *,
-				   struct fc_seq *, struct fc_frame *);
-static void fc_rport_recv_logo_req(struct fc_lport *,
-				   struct fc_seq *, struct fc_frame *);
+static void fc_rport_recv_plogi_req(struct fc_lport *, struct fc_frame *);
+static void fc_rport_recv_prli_req(struct fc_rport_priv *, struct fc_frame *);
+static void fc_rport_recv_prlo_req(struct fc_rport_priv *, struct fc_frame *);
+static void fc_rport_recv_logo_req(struct fc_lport *, struct fc_frame *);
 static void fc_rport_timeout(struct work_struct *);
 static void fc_rport_error(struct fc_rport_priv *, struct fc_frame *);
 static void fc_rport_error_retry(struct fc_rport_priv *, struct fc_frame *);
@@ -736,11 +732,10 @@ static void fc_rport_enter_flogi(struct fc_rport_priv *rdata)
 /**
  * fc_rport_recv_flogi_req() - Handle Fabric Login (FLOGI) request in p-mp mode
  * @lport: The local port that received the PLOGI request
- * @sp:	   The sequence that the PLOGI request was on
  * @rx_fp: The PLOGI request frame
  */
 static void fc_rport_recv_flogi_req(struct fc_lport *lport,
-				    struct fc_seq *sp, struct fc_frame *rx_fp)
+				    struct fc_frame *rx_fp)
 {
 	struct fc_disc *disc;
 	struct fc_els_flogi *flp;
@@ -749,7 +744,6 @@ static void fc_rport_recv_flogi_req(struct fc_lport *lport,
 	struct fc_seq_els_data rjt_data;
 	u32 sid;
 
-	rjt_data.fp = NULL;
 	sid = fc_frame_sid(fp);
 
 	FC_RPORT_ID_DBG(lport, sid, "Received FLOGI request\n");
@@ -817,7 +811,6 @@ static void fc_rport_recv_flogi_req(struct fc_lport *lport,
 	if (!fp)
 		goto out;
 
-	sp = lport->tt.seq_start_next(sp);
 	fc_flogi_fill(lport, fp);
 	flp = fc_frame_payload_get(fp, sizeof(*flp));
 	flp->fl_cmd = ELS_LS_ACC;
@@ -837,7 +830,7 @@ out:
 
 reject:
 	mutex_unlock(&disc->disc_mutex);
-	lport->tt.seq_els_rsp_send(sp, ELS_LS_RJT, &rjt_data);
+	lport->tt.seq_els_rsp_send(rx_fp, ELS_LS_RJT, &rjt_data);
 	fc_frame_free(rx_fp);
 }
 
@@ -1296,13 +1289,12 @@ static void fc_rport_enter_adisc(struct fc_rport_priv *rdata)
 /**
  * fc_rport_recv_adisc_req() - Handler for Address Discovery (ADISC) requests
  * @rdata: The remote port that sent the ADISC request
- * @sp:	   The sequence the ADISC request was on
  * @in_fp: The ADISC request frame
  *
  * Locking Note:  Called with the lport and rport locks held.
  */
 static void fc_rport_recv_adisc_req(struct fc_rport_priv *rdata,
-				    struct fc_seq *sp, struct fc_frame *in_fp)
+				    struct fc_frame *in_fp)
 {
 	struct fc_lport *lport = rdata->local_port;
 	struct fc_frame *fp;
@@ -1313,10 +1305,9 @@ static void fc_rport_recv_adisc_req(struct fc_rport_priv *rdata,
 
 	adisc = fc_frame_payload_get(in_fp, sizeof(*adisc));
 	if (!adisc) {
-		rjt_data.fp = NULL;
 		rjt_data.reason = ELS_RJT_PROT;
 		rjt_data.explan = ELS_EXPL_INV_LEN;
-		lport->tt.seq_els_rsp_send(sp, ELS_LS_RJT, &rjt_data);
+		lport->tt.seq_els_rsp_send(in_fp, ELS_LS_RJT, &rjt_data);
 		goto drop;
 	}
 
@@ -1335,14 +1326,13 @@ drop:
 /**
  * fc_rport_recv_rls_req() - Handle received Read Link Status request
  * @rdata: The remote port that sent the RLS request
- * @sp:	The sequence that the RLS was on
  * @rx_fp: The PRLI request frame
  *
  * Locking Note: The rport lock is expected to be held before calling
  * this function.
  */
 static void fc_rport_recv_rls_req(struct fc_rport_priv *rdata,
-				  struct fc_seq *sp, struct fc_frame *rx_fp)
+				  struct fc_frame *rx_fp)
 
 {
 	struct fc_lport *lport = rdata->local_port;
@@ -1393,8 +1383,7 @@ static void fc_rport_recv_rls_req(struct fc_rport_priv *rdata,
 	goto out;
 
 out_rjt:
-	rjt_data.fp = NULL;
-	lport->tt.seq_els_rsp_send(sp, ELS_LS_RJT, &rjt_data);
+	lport->tt.seq_els_rsp_send(rx_fp, ELS_LS_RJT, &rjt_data);
 out:
 	fc_frame_free(rx_fp);
 }
@@ -1402,7 +1391,6 @@ out:
 /**
  * fc_rport_recv_els_req() - Handler for validated ELS requests
  * @lport: The local port that received the ELS request
- * @sp:	   The sequence that the ELS request was on
  * @fp:	   The ELS request frame
  *
  * Handle incoming ELS requests that require port login.
@@ -1410,16 +1398,11 @@ out:
  *
  * Locking Note: Called with the lport lock held.
  */
-static void fc_rport_recv_els_req(struct fc_lport *lport,
-				  struct fc_seq *sp, struct fc_frame *fp)
+static void fc_rport_recv_els_req(struct fc_lport *lport, struct fc_frame *fp)
 {
 	struct fc_rport_priv *rdata;
 	struct fc_seq_els_data els_data;
 
-	els_data.fp = NULL;
-	els_data.reason = ELS_RJT_UNAB;
-	els_data.explan = ELS_EXPL_PLOGI_REQD;
-
 	mutex_lock(&lport->disc.disc_mutex);
 	rdata = lport->tt.rport_lookup(lport, fc_frame_sid(fp));
 	if (!rdata) {
@@ -1442,24 +1425,24 @@ static void fc_rport_recv_els_req(struct fc_lport *lport,
 
 	switch (fc_frame_payload_op(fp)) {
 	case ELS_PRLI:
-		fc_rport_recv_prli_req(rdata, sp, fp);
+		fc_rport_recv_prli_req(rdata, fp);
 		break;
 	case ELS_PRLO:
-		fc_rport_recv_prlo_req(rdata, sp, fp);
+		fc_rport_recv_prlo_req(rdata, fp);
 		break;
 	case ELS_ADISC:
-		fc_rport_recv_adisc_req(rdata, sp, fp);
+		fc_rport_recv_adisc_req(rdata, fp);
 		break;
 	case ELS_RRQ:
-		els_data.fp = fp;
-		lport->tt.seq_els_rsp_send(sp, ELS_RRQ, &els_data);
+		lport->tt.seq_els_rsp_send(fp, ELS_RRQ, NULL);
+		fc_frame_free(fp);
 		break;
 	case ELS_REC:
-		els_data.fp = fp;
-		lport->tt.seq_els_rsp_send(sp, ELS_REC, &els_data);
+		lport->tt.seq_els_rsp_send(fp, ELS_REC, NULL);
+		fc_frame_free(fp);
 		break;
 	case ELS_RLS:
-		fc_rport_recv_rls_req(rdata, sp, fp);
+		fc_rport_recv_rls_req(rdata, fp);
 		break;
 	default:
 		fc_frame_free(fp);	/* can't happen */
@@ -1470,20 +1453,20 @@ static void fc_rport_recv_els_req(struct fc_lport *lport,
 	return;
 
 reject:
-	lport->tt.seq_els_rsp_send(sp, ELS_LS_RJT, &els_data);
+	els_data.reason = ELS_RJT_UNAB;
+	els_data.explan = ELS_EXPL_PLOGI_REQD;
+	lport->tt.seq_els_rsp_send(fp, ELS_LS_RJT, &els_data);
 	fc_frame_free(fp);
 }
 
 /**
  * fc_rport_recv_req() - Handler for requests
- * @sp:	   The sequence the request was on
- * @fp:	   The request frame
  * @lport: The local port that received the request
+ * @fp:	   The request frame
  *
  * Locking Note: Called with the lport lock held.
  */
-void fc_rport_recv_req(struct fc_seq *sp, struct fc_frame *fp,
-		       struct fc_lport *lport)
+void fc_rport_recv_req(struct fc_lport *lport, struct fc_frame *fp)
 {
 	struct fc_seq_els_data els_data;
 
@@ -1495,13 +1478,13 @@ void fc_rport_recv_req(struct fc_seq *sp, struct fc_frame *fp,
 	 */
 	switch (fc_frame_payload_op(fp)) {
 	case ELS_FLOGI:
-		fc_rport_recv_flogi_req(lport, sp, fp);
+		fc_rport_recv_flogi_req(lport, fp);
 		break;
 	case ELS_PLOGI:
-		fc_rport_recv_plogi_req(lport, sp, fp);
+		fc_rport_recv_plogi_req(lport, fp);
 		break;
 	case ELS_LOGO:
-		fc_rport_recv_logo_req(lport, sp, fp);
+		fc_rport_recv_logo_req(lport, fp);
 		break;
 	case ELS_PRLI:
 	case ELS_PRLO:
@@ -1509,14 +1492,13 @@ void fc_rport_recv_req(struct fc_seq *sp, struct fc_frame *fp,
 	case ELS_RRQ:
 	case ELS_REC:
 	case ELS_RLS:
-		fc_rport_recv_els_req(lport, sp, fp);
+		fc_rport_recv_els_req(lport, fp);
 		break;
 	default:
-		fc_frame_free(fp);
-		els_data.fp = NULL;
 		els_data.reason = ELS_RJT_UNSUP;
 		els_data.explan = ELS_EXPL_NONE;
-		lport->tt.seq_els_rsp_send(sp, ELS_LS_RJT, &els_data);
+		lport->tt.seq_els_rsp_send(fp, ELS_LS_RJT, &els_data);
+		fc_frame_free(fp);
 		break;
 	}
 }
@@ -1524,13 +1506,12 @@ void fc_rport_recv_req(struct fc_seq *sp, struct fc_frame *fp,
 /**
  * fc_rport_recv_plogi_req() - Handler for Port Login (PLOGI) requests
  * @lport: The local port that received the PLOGI request
- * @sp:	   The sequence that the PLOGI request was on
  * @rx_fp: The PLOGI request frame
  *
  * Locking Note: The rport lock is held before calling this function.
  */
 static void fc_rport_recv_plogi_req(struct fc_lport *lport,
-				    struct fc_seq *sp, struct fc_frame *rx_fp)
+				    struct fc_frame *rx_fp)
 {
 	struct fc_disc *disc;
 	struct fc_rport_priv *rdata;
@@ -1539,7 +1520,6 @@ static void fc_rport_recv_plogi_req(struct fc_lport *lport,
 	struct fc_seq_els_data rjt_data;
 	u32 sid;
 
-	rjt_data.fp = NULL;
 	sid = fc_frame_sid(fp);
 
 	FC_RPORT_ID_DBG(lport, sid, "Received PLOGI request\n");
@@ -1635,21 +1615,20 @@ out:
 	return;
 
 reject:
-	lport->tt.seq_els_rsp_send(sp, ELS_LS_RJT, &rjt_data);
+	lport->tt.seq_els_rsp_send(fp, ELS_LS_RJT, &rjt_data);
 	fc_frame_free(fp);
 }
 
 /**
  * fc_rport_recv_prli_req() - Handler for process login (PRLI) requests
  * @rdata: The remote port that sent the PRLI request
- * @sp:	   The sequence that the PRLI was on
  * @rx_fp: The PRLI request frame
  *
  * Locking Note: The rport lock is exected to be held before calling
  * this function.
  */
 static void fc_rport_recv_prli_req(struct fc_rport_priv *rdata,
-				   struct fc_seq *sp, struct fc_frame *rx_fp)
+				   struct fc_frame *rx_fp)
 {
 	struct fc_lport *lport = rdata->local_port;
 	struct fc_frame *fp;
@@ -1666,7 +1645,6 @@ static void fc_rport_recv_prli_req(struct fc_rport_priv *rdata,
 	u32 fcp_parm;
 	u32 roles = FC_RPORT_ROLE_UNKNOWN;
 
-	rjt_data.fp = NULL;
 	FC_RPORT_DBG(rdata, "Received PRLI request while in state %s\n",
 		     fc_rport_state(rdata));
 
@@ -1759,7 +1737,7 @@ reject_len:
 	rjt_data.reason = ELS_RJT_PROT;
 	rjt_data.explan = ELS_EXPL_INV_LEN;
 reject:
-	lport->tt.seq_els_rsp_send(sp, ELS_LS_RJT, &rjt_data);
+	lport->tt.seq_els_rsp_send(rx_fp, ELS_LS_RJT, &rjt_data);
 drop:
 	fc_frame_free(rx_fp);
 }
@@ -1767,18 +1745,15 @@ drop:
 /**
  * fc_rport_recv_prlo_req() - Handler for process logout (PRLO) requests
  * @rdata: The remote port that sent the PRLO request
- * @sp:	   The sequence that the PRLO was on
  * @rx_fp: The PRLO request frame
  *
  * Locking Note: The rport lock is exected to be held before calling
  * this function.
  */
 static void fc_rport_recv_prlo_req(struct fc_rport_priv *rdata,
-				   struct fc_seq *sp,
 				   struct fc_frame *rx_fp)
 {
 	struct fc_lport *lport = rdata->local_port;
-	struct fc_exch *ep;
 	struct fc_frame *fp;
 	struct {
 		struct fc_els_prlo prlo;
@@ -1790,8 +1765,6 @@ static void fc_rport_recv_prlo_req(struct fc_rport_priv *rdata,
 	unsigned int plen;
 	struct fc_seq_els_data rjt_data;
 
-	rjt_data.fp = NULL;
-
 	FC_RPORT_DBG(rdata, "Received PRLO request while in state %s\n",
 		     fc_rport_state(rdata));
 
@@ -1814,8 +1787,6 @@ static void fc_rport_recv_prlo_req(struct fc_rport_priv *rdata,
 		goto reject;
 	}
 
-	sp = lport->tt.seq_start_next(sp);
-	WARN_ON(!sp);
 	pp = fc_frame_payload_get(fp, len);
 	WARN_ON(!pp);
 	memset(pp, 0, len);
@@ -1829,17 +1800,15 @@ static void fc_rport_recv_prlo_req(struct fc_rport_priv *rdata,
 
 	fc_rport_enter_delete(rdata, RPORT_EV_LOGO);
 
-	ep = fc_seq_exch(sp);
-	fc_fill_fc_hdr(fp, FC_RCTL_ELS_REP, ep->did, ep->sid,
-		       FC_TYPE_ELS, FC_FCTL_RESP, 0);
-	lport->tt.seq_send(lport, sp, fp);
+	fc_fill_reply_hdr(fp, rx_fp, FC_RCTL_ELS_REP, 0);
+	lport->tt.frame_send(lport, fp);
 	goto drop;
 
 reject_len:
 	rjt_data.reason = ELS_RJT_PROT;
 	rjt_data.explan = ELS_EXPL_INV_LEN;
 reject:
-	lport->tt.seq_els_rsp_send(sp, ELS_LS_RJT, &rjt_data);
+	lport->tt.seq_els_rsp_send(rx_fp, ELS_LS_RJT, &rjt_data);
 drop:
 	fc_frame_free(rx_fp);
 }
@@ -1847,20 +1816,17 @@ drop:
 /**
  * fc_rport_recv_logo_req() - Handler for logout (LOGO) requests
  * @lport: The local port that received the LOGO request
- * @sp:	   The sequence that the LOGO request was on
  * @fp:	   The LOGO request frame
  *
  * Locking Note: The rport lock is exected to be held before calling
  * this function.
  */
-static void fc_rport_recv_logo_req(struct fc_lport *lport,
-				   struct fc_seq *sp,
-				   struct fc_frame *fp)
+static void fc_rport_recv_logo_req(struct fc_lport *lport, struct fc_frame *fp)
 {
 	struct fc_rport_priv *rdata;
 	u32 sid;
 
-	lport->tt.seq_els_rsp_send(sp, ELS_LS_ACC, NULL);
+	lport->tt.seq_els_rsp_send(fp, ELS_LS_ACC, NULL);
 
 	sid = fc_frame_sid(fp);
 
diff --git a/include/scsi/libfc.h b/include/scsi/libfc.h
index 605f1d7861a..14be49b44e8 100644
--- a/include/scsi/libfc.h
+++ b/include/scsi/libfc.h
@@ -249,14 +249,12 @@ struct fcoe_dev_stats {
 
 /**
  * struct fc_seq_els_data - ELS data used for passing ELS specific responses
- * @fp:     The ELS frame
  * @reason: The reason for rejection
  * @explan: The explaination of the rejection
  *
  * Mainly used by the exchange manager layer.
  */
 struct fc_seq_els_data {
-	struct fc_frame *fp;
 	enum fc_els_rjt_reason reason;
 	enum fc_els_rjt_explan explan;
 };
@@ -519,12 +517,11 @@ struct libfc_function_template {
 			struct fc_frame *);
 
 	/*
-	 * Send an ELS response using infomation from a previous
-	 * exchange and sequence.
+	 * Send an ELS response using infomation from the received frame.
 	 *
 	 * STATUS: OPTIONAL
 	 */
-	void (*seq_els_rsp_send)(struct fc_seq *, enum fc_els_cmd,
+	void (*seq_els_rsp_send)(struct fc_frame *, enum fc_els_cmd,
 				 struct fc_seq_els_data *);
 
 	/*
@@ -583,8 +580,7 @@ struct libfc_function_template {
 	 *
 	 * STATUS: OPTIONAL
 	 */
-	void (*lport_recv)(struct fc_lport *, struct fc_seq *,
-			   struct fc_frame *);
+	void (*lport_recv)(struct fc_lport *, struct fc_frame *);
 
 	/*
 	 * Reset the local port.
@@ -646,8 +642,7 @@ struct libfc_function_template {
 	 *
 	 * STATUS: OPTIONAL
 	 */
-	void (*rport_recv_req)(struct fc_seq *, struct fc_frame *,
-			       struct fc_lport *);
+	void (*rport_recv_req)(struct fc_lport *, struct fc_frame *);
 
 	/*
 	 * lookup an rport by it's port ID.
@@ -693,8 +688,7 @@ struct libfc_function_template {
 	 *
 	 * STATUS: OPTIONAL
 	 */
-	void (*disc_recv_req)(struct fc_seq *, struct fc_frame *,
-			      struct fc_lport *);
+	void (*disc_recv_req)(struct fc_lport *, struct fc_frame *);
 
 	/*
 	 * Start discovery for a local port.
-- 
cgit v1.2.3-70-g09d2


From c01be6dcb2b5cce4feaf48035be6395e5cd7d47c Mon Sep 17 00:00:00 2001
From: Mike Christie <michaelc@cs.wisc.edu>
Date: Thu, 22 Jul 2010 16:59:49 +0530
Subject: [SCSI] iscsi_transport: wait on session in error handler path

wait for session to come online in eh_device_reset_handler
and eh_target_reset_handler

Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: Vikas Chaudhary <vikas.chaudhary@qlogic.com>
Signed-off-by: Ravi Anand <ravi.anand@qlogic.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 drivers/scsi/qla4xxx/ql4_os.c       | 11 ++++++++++-
 drivers/scsi/scsi_transport_iscsi.c | 32 ++++++++++++++++++++++++++++++++
 include/scsi/scsi_transport_iscsi.h |  2 ++
 3 files changed, 44 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/scsi/qla4xxx/ql4_os.c b/drivers/scsi/qla4xxx/ql4_os.c
index 821384147a4..5529b2a3974 100644
--- a/drivers/scsi/qla4xxx/ql4_os.c
+++ b/drivers/scsi/qla4xxx/ql4_os.c
@@ -2020,6 +2020,11 @@ static int qla4xxx_eh_device_reset(struct scsi_cmnd *cmd)
 	if (!ddb_entry)
 		return ret;
 
+	ret = iscsi_block_scsi_eh(cmd);
+	if (ret)
+		return ret;
+	ret = FAILED;
+
 	ql4_printk(KERN_INFO, ha,
 		   "scsi%ld:%d:%d:%d: DEVICE RESET ISSUED.\n", ha->host_no,
 		   cmd->device->channel, cmd->device->id, cmd->device->lun);
@@ -2072,11 +2077,15 @@ static int qla4xxx_eh_target_reset(struct scsi_cmnd *cmd)
 {
 	struct scsi_qla_host *ha = to_qla_host(cmd->device->host);
 	struct ddb_entry *ddb_entry = cmd->device->hostdata;
-	int stat;
+	int stat, ret;
 
 	if (!ddb_entry)
 		return FAILED;
 
+	ret = iscsi_block_scsi_eh(cmd);
+	if (ret)
+		return ret;
+
 	starget_printk(KERN_INFO, scsi_target(cmd->device),
 		       "WARM TARGET RESET ISSUED.\n");
 
diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c
index d4b96623aa5..e84026def1f 100644
--- a/drivers/scsi/scsi_transport_iscsi.c
+++ b/drivers/scsi/scsi_transport_iscsi.c
@@ -30,6 +30,7 @@
 #include <scsi/scsi_transport.h>
 #include <scsi/scsi_transport_iscsi.h>
 #include <scsi/iscsi_if.h>
+#include <scsi/scsi_cmnd.h>
 
 #define ISCSI_SESSION_ATTRS 23
 #define ISCSI_CONN_ATTRS 13
@@ -534,6 +535,37 @@ static void iscsi_scan_session(struct work_struct *work)
 	atomic_dec(&ihost->nr_scans);
 }
 
+/**
+ * iscsi_block_scsi_eh - block scsi eh until session state has transistioned
+ * cmd: scsi cmd passed to scsi eh handler
+ *
+ * If the session is down this function will wait for the recovery
+ * timer to fire or for the session to be logged back in. If the
+ * recovery timer fires then FAST_IO_FAIL is returned. The caller
+ * should pass this error value to the scsi eh.
+ */
+int iscsi_block_scsi_eh(struct scsi_cmnd *cmd)
+{
+	struct iscsi_cls_session *session =
+			starget_to_session(scsi_target(cmd->device));
+	unsigned long flags;
+	int ret = 0;
+
+	spin_lock_irqsave(&session->lock, flags);
+	while (session->state != ISCSI_SESSION_LOGGED_IN) {
+		if (session->state == ISCSI_SESSION_FREE) {
+			ret = FAST_IO_FAIL;
+			break;
+		}
+		spin_unlock_irqrestore(&session->lock, flags);
+		msleep(1000);
+		spin_lock_irqsave(&session->lock, flags);
+	}
+	spin_unlock_irqrestore(&session->lock, flags);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(iscsi_block_scsi_eh);
+
 static void session_recovery_timedout(struct work_struct *work)
 {
 	struct iscsi_cls_session *session =
diff --git a/include/scsi/scsi_transport_iscsi.h b/include/scsi/scsi_transport_iscsi.h
index 349c7f30720..7fff94b3b2a 100644
--- a/include/scsi/scsi_transport_iscsi.h
+++ b/include/scsi/scsi_transport_iscsi.h
@@ -32,6 +32,7 @@ struct scsi_transport_template;
 struct iscsi_transport;
 struct iscsi_endpoint;
 struct Scsi_Host;
+struct scsi_cmnd;
 struct iscsi_cls_conn;
 struct iscsi_conn;
 struct iscsi_task;
@@ -255,5 +256,6 @@ extern int iscsi_scan_finished(struct Scsi_Host *shost, unsigned long time);
 extern struct iscsi_endpoint *iscsi_create_endpoint(int dd_size);
 extern void iscsi_destroy_endpoint(struct iscsi_endpoint *ep);
 extern struct iscsi_endpoint *iscsi_lookup_endpoint(u64 handle);
+extern int iscsi_block_scsi_eh(struct scsi_cmnd *cmd);
 
 #endif
-- 
cgit v1.2.3-70-g09d2


From df64d3caab8db6ae17dacd229a03d7689a10c432 Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@suse.de>
Date: Tue, 27 Jul 2010 15:51:13 -0500
Subject: [SCSI] Unify SAM_ and SAM_STAT_ macros

We have two separate definitions for identical constants with nearly the
same name.  One comes from the generic headers in scsi.h; the other is
an enum in libsas.h ... it's causing confusion about which one is
correct (fortunately they both are).

Fix this by eliminating the libsas.h duplicate

Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 drivers/scsi/aic94xx/aic94xx_task.c |  2 +-
 drivers/scsi/libsas/sas_ata.c       | 12 ++++++------
 drivers/scsi/libsas/sas_expander.c  |  2 +-
 drivers/scsi/libsas/sas_scsi_host.c |  4 ++--
 drivers/scsi/libsas/sas_task.c      |  6 +++---
 drivers/scsi/mvsas/mv_sas.c         | 16 ++++++++--------
 drivers/scsi/pm8001/pm8001_hwi.c    | 14 +++++++-------
 drivers/scsi/pm8001/pm8001_sas.c    |  4 ++--
 include/scsi/libsas.h               | 11 +----------
 9 files changed, 31 insertions(+), 40 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/aic94xx/aic94xx_task.c b/drivers/scsi/aic94xx/aic94xx_task.c
index 75d20f72501..532d212b6b2 100644
--- a/drivers/scsi/aic94xx/aic94xx_task.c
+++ b/drivers/scsi/aic94xx/aic94xx_task.c
@@ -223,7 +223,7 @@ Again:
 	switch (opcode) {
 	case TC_NO_ERROR:
 		ts->resp = SAS_TASK_COMPLETE;
-		ts->stat = SAM_GOOD;
+		ts->stat = SAM_STAT_GOOD;
 		break;
 	case TC_UNDERRUN:
 		ts->resp = SAS_TASK_COMPLETE;
diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c
index 8c496b56556..042153cbbde 100644
--- a/drivers/scsi/libsas/sas_ata.c
+++ b/drivers/scsi/libsas/sas_ata.c
@@ -71,7 +71,7 @@ static enum ata_completion_errors sas_to_ata_err(struct task_status_struct *ts)
 		case SAS_SG_ERR:
 			return AC_ERR_INVALID;
 
-		case SAM_CHECK_COND:
+		case SAM_STAT_CHECK_CONDITION:
 		case SAS_OPEN_TO:
 		case SAS_OPEN_REJECT:
 			SAS_DPRINTK("%s: Saw error %d.  What to do?\n",
@@ -107,7 +107,7 @@ static void sas_ata_task_done(struct sas_task *task)
 	sas_ha = dev->port->ha;
 
 	spin_lock_irqsave(dev->sata_dev.ap->lock, flags);
-	if (stat->stat == SAS_PROTO_RESPONSE || stat->stat == SAM_GOOD) {
+	if (stat->stat == SAS_PROTO_RESPONSE || stat->stat == SAM_STAT_GOOD) {
 		ata_tf_from_fis(resp->ending_fis, &dev->sata_dev.tf);
 		qc->err_mask |= ac_err_mask(dev->sata_dev.tf.command);
 		dev->sata_dev.sstatus = resp->sstatus;
@@ -511,12 +511,12 @@ static int sas_execute_task(struct sas_task *task, void *buffer, int size,
 					goto ex_err;
 			}
 		}
-		if (task->task_status.stat == SAM_BUSY ||
-			   task->task_status.stat == SAM_TASK_SET_FULL ||
+		if (task->task_status.stat == SAM_STAT_BUSY ||
+			   task->task_status.stat == SAM_STAT_TASK_SET_FULL ||
 			   task->task_status.stat == SAS_QUEUE_FULL) {
 			SAS_DPRINTK("task: q busy, sleeping...\n");
 			schedule_timeout_interruptible(HZ);
-		} else if (task->task_status.stat == SAM_CHECK_COND) {
+		} else if (task->task_status.stat == SAM_STAT_CHECK_CONDITION) {
 			struct scsi_sense_hdr shdr;
 
 			if (!scsi_normalize_sense(ts->buf, ts->buf_valid_size,
@@ -549,7 +549,7 @@ static int sas_execute_task(struct sas_task *task, void *buffer, int size,
 					    shdr.asc, shdr.ascq);
 			}
 		} else if (task->task_status.resp != SAS_TASK_COMPLETE ||
-			   task->task_status.stat != SAM_GOOD) {
+			   task->task_status.stat != SAM_STAT_GOOD) {
 			SAS_DPRINTK("task finished with resp:0x%x, "
 				    "stat:0x%x\n",
 				    task->task_status.resp,
diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c
index c65af02dcfe..83dd5070a15 100644
--- a/drivers/scsi/libsas/sas_expander.c
+++ b/drivers/scsi/libsas/sas_expander.c
@@ -107,7 +107,7 @@ static int smp_execute_task(struct domain_device *dev, void *req, int req_size,
 			}
 		}
 		if (task->task_status.resp == SAS_TASK_COMPLETE &&
-		    task->task_status.stat == SAM_GOOD) {
+		    task->task_status.stat == SAM_STAT_GOOD) {
 			res = 0;
 			break;
 		} if (task->task_status.resp == SAS_TASK_COMPLETE &&
diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c
index a7890c6d878..f0cfba9a1fc 100644
--- a/drivers/scsi/libsas/sas_scsi_host.c
+++ b/drivers/scsi/libsas/sas_scsi_host.c
@@ -113,10 +113,10 @@ static void sas_scsi_task_done(struct sas_task *task)
 		case SAS_ABORTED_TASK:
 			hs = DID_ABORT;
 			break;
-		case SAM_CHECK_COND:
+		case SAM_STAT_CHECK_CONDITION:
 			memcpy(sc->sense_buffer, ts->buf,
 			       min(SCSI_SENSE_BUFFERSIZE, ts->buf_valid_size));
-			stat = SAM_CHECK_COND;
+			stat = SAM_STAT_CHECK_CONDITION;
 			break;
 		default:
 			stat = ts->stat;
diff --git a/drivers/scsi/libsas/sas_task.c b/drivers/scsi/libsas/sas_task.c
index 594524d5bfa..b13a3346894 100644
--- a/drivers/scsi/libsas/sas_task.c
+++ b/drivers/scsi/libsas/sas_task.c
@@ -15,13 +15,13 @@ void sas_ssp_task_response(struct device *dev, struct sas_task *task,
 	else if (iu->datapres == 1)
 		tstat->stat = iu->resp_data[3];
 	else if (iu->datapres == 2) {
-		tstat->stat = SAM_CHECK_COND;
+		tstat->stat = SAM_STAT_CHECK_CONDITION;
 		tstat->buf_valid_size =
 			min_t(int, SAS_STATUS_BUF_SIZE,
 			      be32_to_cpu(iu->sense_data_len));
 		memcpy(tstat->buf, iu->sense_data, tstat->buf_valid_size);
 
-		if (iu->status != SAM_CHECK_COND)
+		if (iu->status != SAM_STAT_CHECK_CONDITION)
 			dev_printk(KERN_WARNING, dev,
 				   "dev %llx sent sense data, but "
 				   "stat(%x) is not CHECK CONDITION\n",
@@ -30,7 +30,7 @@ void sas_ssp_task_response(struct device *dev, struct sas_task *task,
 	}
 	else
 		/* when datapres contains corrupt/unknown value... */
-		tstat->stat = SAM_CHECK_COND;
+		tstat->stat = SAM_STAT_CHECK_CONDITION;
 }
 EXPORT_SYMBOL_GPL(sas_ssp_task_response);
 
diff --git a/drivers/scsi/mvsas/mv_sas.c b/drivers/scsi/mvsas/mv_sas.c
index cab92423986..adedaa916ec 100644
--- a/drivers/scsi/mvsas/mv_sas.c
+++ b/drivers/scsi/mvsas/mv_sas.c
@@ -1483,7 +1483,7 @@ static int mvs_exec_internal_tmf_task(struct domain_device *dev,
 		}
 
 		if (task->task_status.resp == SAS_TASK_COMPLETE &&
-		    task->task_status.stat == SAM_GOOD) {
+		    task->task_status.stat == SAM_STAT_GOOD) {
 			res = TMF_RESP_FUNC_COMPLETE;
 			break;
 		}
@@ -1758,7 +1758,7 @@ static int mvs_sata_done(struct mvs_info *mvi, struct sas_task *task,
 	struct mvs_device *mvi_dev = task->dev->lldd_dev;
 	struct task_status_struct *tstat = &task->task_status;
 	struct ata_task_resp *resp = (struct ata_task_resp *)tstat->buf;
-	int stat = SAM_GOOD;
+	int stat = SAM_STAT_GOOD;
 
 
 	resp->frame_len = sizeof(struct dev_to_host_fis);
@@ -1790,13 +1790,13 @@ static int mvs_slot_err(struct mvs_info *mvi, struct sas_task *task,
 
 	MVS_CHIP_DISP->command_active(mvi, slot_idx);
 
-	stat = SAM_CHECK_COND;
+	stat = SAM_STAT_CHECK_CONDITION;
 	switch (task->task_proto) {
 	case SAS_PROTOCOL_SSP:
 		stat = SAS_ABORTED_TASK;
 		break;
 	case SAS_PROTOCOL_SMP:
-		stat = SAM_CHECK_COND;
+		stat = SAM_STAT_CHECK_CONDITION;
 		break;
 
 	case SAS_PROTOCOL_SATA:
@@ -1881,7 +1881,7 @@ int mvs_slot_complete(struct mvs_info *mvi, u32 rx_desc, u32 flags)
 	case SAS_PROTOCOL_SSP:
 		/* hw says status == 0, datapres == 0 */
 		if (rx_desc & RXQ_GOOD) {
-			tstat->stat = SAM_GOOD;
+			tstat->stat = SAM_STAT_GOOD;
 			tstat->resp = SAS_TASK_COMPLETE;
 		}
 		/* response frame present */
@@ -1890,12 +1890,12 @@ int mvs_slot_complete(struct mvs_info *mvi, u32 rx_desc, u32 flags)
 						sizeof(struct mvs_err_info);
 			sas_ssp_task_response(mvi->dev, task, iu);
 		} else
-			tstat->stat = SAM_CHECK_COND;
+			tstat->stat = SAM_STAT_CHECK_CONDITION;
 		break;
 
 	case SAS_PROTOCOL_SMP: {
 			struct scatterlist *sg_resp = &task->smp_task.smp_resp;
-			tstat->stat = SAM_GOOD;
+			tstat->stat = SAM_STAT_GOOD;
 			to = kmap_atomic(sg_page(sg_resp), KM_IRQ0);
 			memcpy(to + sg_resp->offset,
 				slot->response + sizeof(struct mvs_err_info),
@@ -1912,7 +1912,7 @@ int mvs_slot_complete(struct mvs_info *mvi, u32 rx_desc, u32 flags)
 		}
 
 	default:
-		tstat->stat = SAM_CHECK_COND;
+		tstat->stat = SAM_STAT_CHECK_CONDITION;
 		break;
 	}
 	if (!slot->port->port_attached) {
diff --git a/drivers/scsi/pm8001/pm8001_hwi.c b/drivers/scsi/pm8001/pm8001_hwi.c
index 5ff8261c5d6..356ad268de6 100644
--- a/drivers/scsi/pm8001/pm8001_hwi.c
+++ b/drivers/scsi/pm8001/pm8001_hwi.c
@@ -1480,7 +1480,7 @@ mpi_ssp_completion(struct pm8001_hba_info *pm8001_ha , void *piomb)
 			",param = %d \n", param));
 		if (param == 0) {
 			ts->resp = SAS_TASK_COMPLETE;
-			ts->stat = SAM_GOOD;
+			ts->stat = SAM_STAT_GOOD;
 		} else {
 			ts->resp = SAS_TASK_COMPLETE;
 			ts->stat = SAS_PROTO_RESPONSE;
@@ -1909,7 +1909,7 @@ mpi_sata_completion(struct pm8001_hba_info *pm8001_ha, void *piomb)
 		PM8001_IO_DBG(pm8001_ha, pm8001_printk("IO_SUCCESS\n"));
 		if (param == 0) {
 			ts->resp = SAS_TASK_COMPLETE;
-			ts->stat = SAM_GOOD;
+			ts->stat = SAM_STAT_GOOD;
 		} else {
 			u8 len;
 			ts->resp = SAS_TASK_COMPLETE;
@@ -2450,7 +2450,7 @@ mpi_smp_completion(struct pm8001_hba_info *pm8001_ha, void *piomb)
 	case IO_SUCCESS:
 		PM8001_IO_DBG(pm8001_ha, pm8001_printk("IO_SUCCESS\n"));
 		ts->resp = SAS_TASK_COMPLETE;
-		ts->stat = SAM_GOOD;
+		ts->stat = SAM_STAT_GOOD;
 	if (pm8001_dev)
 			pm8001_dev->running_req--;
 		break;
@@ -2479,19 +2479,19 @@ mpi_smp_completion(struct pm8001_hba_info *pm8001_ha, void *piomb)
 		PM8001_IO_DBG(pm8001_ha,
 			pm8001_printk("IO_ERROR_HW_TIMEOUT\n"));
 		ts->resp = SAS_TASK_COMPLETE;
-		ts->stat = SAM_BUSY;
+		ts->stat = SAM_STAT_BUSY;
 		break;
 	case IO_XFER_ERROR_BREAK:
 		PM8001_IO_DBG(pm8001_ha,
 			pm8001_printk("IO_XFER_ERROR_BREAK\n"));
 		ts->resp = SAS_TASK_COMPLETE;
-		ts->stat = SAM_BUSY;
+		ts->stat = SAM_STAT_BUSY;
 		break;
 	case IO_XFER_ERROR_PHY_NOT_READY:
 		PM8001_IO_DBG(pm8001_ha,
 			pm8001_printk("IO_XFER_ERROR_PHY_NOT_READY\n"));
 		ts->resp = SAS_TASK_COMPLETE;
-		ts->stat = SAM_BUSY;
+		ts->stat = SAM_STAT_BUSY;
 		break;
 	case IO_OPEN_CNX_ERROR_PROTOCOL_NOT_SUPPORTED:
 		PM8001_IO_DBG(pm8001_ha,
@@ -3260,7 +3260,7 @@ mpi_task_abort_resp(struct pm8001_hba_info *pm8001_ha, void *piomb)
 	case IO_SUCCESS:
 		PM8001_EH_DBG(pm8001_ha, pm8001_printk("IO_SUCCESS\n"));
 		ts->resp = SAS_TASK_COMPLETE;
-		ts->stat = SAM_GOOD;
+		ts->stat = SAM_STAT_GOOD;
 		break;
 	case IO_NOT_VALID:
 		PM8001_EH_DBG(pm8001_ha, pm8001_printk("IO_NOT_VALID\n"));
diff --git a/drivers/scsi/pm8001/pm8001_sas.c b/drivers/scsi/pm8001/pm8001_sas.c
index cd02ceaf67f..6ae059ebb4b 100644
--- a/drivers/scsi/pm8001/pm8001_sas.c
+++ b/drivers/scsi/pm8001/pm8001_sas.c
@@ -763,7 +763,7 @@ static int pm8001_exec_internal_tmf_task(struct domain_device *dev,
 		}
 
 		if (task->task_status.resp == SAS_TASK_COMPLETE &&
-			task->task_status.stat == SAM_GOOD) {
+			task->task_status.stat == SAM_STAT_GOOD) {
 			res = TMF_RESP_FUNC_COMPLETE;
 			break;
 		}
@@ -853,7 +853,7 @@ pm8001_exec_internal_task_abort(struct pm8001_hba_info *pm8001_ha,
 		}
 
 		if (task->task_status.resp == SAS_TASK_COMPLETE &&
-			task->task_status.stat == SAM_GOOD) {
+			task->task_status.stat == SAM_STAT_GOOD) {
 			res = TMF_RESP_FUNC_COMPLETE;
 			break;
 
diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h
index 3b586859669..d06e13be717 100644
--- a/include/scsi/libsas.h
+++ b/include/scsi/libsas.h
@@ -422,16 +422,7 @@ enum service_response {
 };
 
 enum exec_status {
-	SAM_GOOD         = 0,
-	SAM_CHECK_COND   = 2,
-	SAM_COND_MET     = 4,
-	SAM_BUSY         = 8,
-	SAM_INTERMEDIATE = 0x10,
-	SAM_IM_COND_MET  = 0x12,
-	SAM_RESV_CONFLICT= 0x14,
-	SAM_TASK_SET_FULL= 0x28,
-	SAM_ACA_ACTIVE   = 0x30,
-	SAM_TASK_ABORTED = 0x40,
+	/* The SAM_STAT_.. codes fit in the lower 6 bits */
 
 	SAS_DEV_NO_RESPONSE = 0x80,
 	SAS_DATA_UNDERRUN,
-- 
cgit v1.2.3-70-g09d2


From bc4f24014de58f045f169742701a6598884d93db Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Thu, 17 Jun 2010 10:41:42 -0400
Subject: [SCSI] implement runtime Power Management

This patch (as1398b) adds runtime PM support to the SCSI layer.  Only
the machanism is provided; use of it is up to the various high-level
drivers, and the patch doesn't change any of them.  Except for sg --
the patch expicitly prevents a device from being runtime-suspended
while its sg device file is open.

The implementation is simplistic.  In general, hosts and targets are
automatically suspended when all their children are asleep, but for
them the runtime-suspend code doesn't actually do anything.  (A host's
runtime PM status is propagated up the device tree, though, so a
runtime-PM-aware lower-level driver could power down the host adapter
hardware at the appropriate times.)  There are comments indicating
where a transport class might be notified or some other hooks added.

LUNs are runtime-suspended by calling the drivers' existing suspend
handlers (and likewise for runtime-resume).  Somewhat arbitrarily, the
implementation delays for 100 ms before suspending an eligible LUN.
This is because there typically are occasions during bootup when the
same device file is opened and closed several times in quick
succession.

The way this all works is that the SCSI core increments a device's
PM-usage count when it is registered.  If a high-level driver does
nothing then the device will not be eligible for runtime-suspend
because of the elevated usage count.  If a high-level driver wants to
use runtime PM then it can call scsi_autopm_put_device() in its probe
routine to decrement the usage count and scsi_autopm_get_device() in
its remove routine to restore the original count.

Hosts, targets, and LUNs are not suspended while they are being probed
or removed, or while the error handler is running.  In fact, a fairly
large part of the patch consists of code to make sure that things
aren't suspended at such times.

[jejb: fix up compile issues in PM config variations]
Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 drivers/scsi/hosts.c       |  10 ++++-
 drivers/scsi/scsi_error.c  |  16 ++++++-
 drivers/scsi/scsi_pm.c     | 110 +++++++++++++++++++++++++++++++++++++++++++++
 drivers/scsi/scsi_priv.h   |  14 +++++-
 drivers/scsi/scsi_scan.c   |  24 ++++++++--
 drivers/scsi/scsi_sysfs.c  |  20 ++++++++-
 drivers/scsi/sg.c          |  10 ++++-
 include/scsi/scsi_device.h |   8 ++++
 8 files changed, 201 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c
index a2b1414da28..8a8f803439e 100644
--- a/drivers/scsi/hosts.c
+++ b/drivers/scsi/hosts.c
@@ -32,6 +32,7 @@
 #include <linux/completion.h>
 #include <linux/transport_class.h>
 #include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
 
 #include <scsi/scsi_device.h>
 #include <scsi/scsi_host.h>
@@ -156,6 +157,7 @@ EXPORT_SYMBOL(scsi_host_set_state);
 void scsi_remove_host(struct Scsi_Host *shost)
 {
 	unsigned long flags;
+
 	mutex_lock(&shost->scan_mutex);
 	spin_lock_irqsave(shost->host_lock, flags);
 	if (scsi_host_set_state(shost, SHOST_CANCEL))
@@ -165,6 +167,8 @@ void scsi_remove_host(struct Scsi_Host *shost)
 			return;
 		}
 	spin_unlock_irqrestore(shost->host_lock, flags);
+
+	scsi_autopm_get_host(shost);
 	scsi_forget_host(shost);
 	mutex_unlock(&shost->scan_mutex);
 	scsi_proc_host_rm(shost);
@@ -216,12 +220,14 @@ int scsi_add_host_with_dma(struct Scsi_Host *shost, struct device *dev,
 		shost->shost_gendev.parent = dev ? dev : &platform_bus;
 	shost->dma_dev = dma_dev;
 
-	device_enable_async_suspend(&shost->shost_gendev);
-
 	error = device_add(&shost->shost_gendev);
 	if (error)
 		goto out;
 
+	pm_runtime_set_active(&shost->shost_gendev);
+	pm_runtime_enable(&shost->shost_gendev);
+	device_enable_async_suspend(&shost->shost_gendev);
+
 	scsi_host_set_state(shost, SHOST_RUNNING);
 	get_device(shost->shost_gendev.parent);
 
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index c60cffbefa3..2bf98469dc4 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -1775,6 +1775,14 @@ int scsi_error_handler(void *data)
 		 * what we need to do to get it up and online again (if we can).
 		 * If we fail, we end up taking the thing offline.
 		 */
+		if (scsi_autopm_get_host(shost) != 0) {
+			SCSI_LOG_ERROR_RECOVERY(1,
+				printk(KERN_ERR "Error handler scsi_eh_%d "
+						"unable to autoresume\n",
+						shost->host_no));
+			continue;
+		}
+
 		if (shost->transportt->eh_strategy_handler)
 			shost->transportt->eh_strategy_handler(shost);
 		else
@@ -1788,6 +1796,7 @@ int scsi_error_handler(void *data)
 		 * which are still online.
 		 */
 		scsi_restart_operations(shost);
+		scsi_autopm_put_host(shost);
 		set_current_state(TASK_INTERRUPTIBLE);
 	}
 	__set_current_state(TASK_RUNNING);
@@ -1885,12 +1894,16 @@ scsi_reset_provider_done_command(struct scsi_cmnd *scmd)
 int
 scsi_reset_provider(struct scsi_device *dev, int flag)
 {
-	struct scsi_cmnd *scmd = scsi_get_command(dev, GFP_KERNEL);
+	struct scsi_cmnd *scmd;
 	struct Scsi_Host *shost = dev->host;
 	struct request req;
 	unsigned long flags;
 	int rtn;
 
+	if (scsi_autopm_get_host(shost) < 0)
+		return FAILED;
+
+	scmd = scsi_get_command(dev, GFP_KERNEL);
 	blk_rq_init(NULL, &req);
 	scmd->request = &req;
 
@@ -1947,6 +1960,7 @@ scsi_reset_provider(struct scsi_device *dev, int flag)
 	scsi_run_host_queues(shost);
 
 	scsi_next_command(scmd);
+	scsi_autopm_put_host(shost);
 	return rtn;
 }
 EXPORT_SYMBOL(scsi_reset_provider);
diff --git a/drivers/scsi/scsi_pm.c b/drivers/scsi/scsi_pm.c
index cd83758ce0a..d70e91ae60a 100644
--- a/drivers/scsi/scsi_pm.c
+++ b/drivers/scsi/scsi_pm.c
@@ -59,6 +59,12 @@ static int scsi_bus_resume_common(struct device *dev)
 
 	if (scsi_is_sdev_device(dev))
 		err = scsi_dev_type_resume(dev);
+
+	if (err == 0) {
+		pm_runtime_disable(dev);
+		pm_runtime_set_active(dev);
+		pm_runtime_enable(dev);
+	}
 	return err;
 }
 
@@ -86,6 +92,107 @@ static int scsi_bus_poweroff(struct device *dev)
 
 #endif /* CONFIG_PM_SLEEP */
 
+#ifdef CONFIG_PM_RUNTIME
+
+static int scsi_runtime_suspend(struct device *dev)
+{
+	int err = 0;
+
+	dev_dbg(dev, "scsi_runtime_suspend\n");
+	if (scsi_is_sdev_device(dev)) {
+		err = scsi_dev_type_suspend(dev, PMSG_AUTO_SUSPEND);
+		if (err == -EAGAIN)
+			pm_schedule_suspend(dev, jiffies_to_msecs(
+				round_jiffies_up_relative(HZ/10)));
+	}
+
+	/* Insert hooks here for targets, hosts, and transport classes */
+
+	return err;
+}
+
+static int scsi_runtime_resume(struct device *dev)
+{
+	int err = 0;
+
+	dev_dbg(dev, "scsi_runtime_resume\n");
+	if (scsi_is_sdev_device(dev))
+		err = scsi_dev_type_resume(dev);
+
+	/* Insert hooks here for targets, hosts, and transport classes */
+
+	return err;
+}
+
+static int scsi_runtime_idle(struct device *dev)
+{
+	int err;
+
+	dev_dbg(dev, "scsi_runtime_idle\n");
+
+	/* Insert hooks here for targets, hosts, and transport classes */
+
+	if (scsi_is_sdev_device(dev))
+		err = pm_schedule_suspend(dev, 100);
+	else
+		err = pm_runtime_suspend(dev);
+	return err;
+}
+
+int scsi_autopm_get_device(struct scsi_device *sdev)
+{
+	int	err;
+
+	err = pm_runtime_get_sync(&sdev->sdev_gendev);
+	if (err < 0)
+		pm_runtime_put_sync(&sdev->sdev_gendev);
+	else if (err > 0)
+		err = 0;
+	return err;
+}
+EXPORT_SYMBOL_GPL(scsi_autopm_get_device);
+
+void scsi_autopm_put_device(struct scsi_device *sdev)
+{
+	pm_runtime_put_sync(&sdev->sdev_gendev);
+}
+EXPORT_SYMBOL_GPL(scsi_autopm_put_device);
+
+void scsi_autopm_get_target(struct scsi_target *starget)
+{
+	pm_runtime_get_sync(&starget->dev);
+}
+
+void scsi_autopm_put_target(struct scsi_target *starget)
+{
+	pm_runtime_put_sync(&starget->dev);
+}
+
+int scsi_autopm_get_host(struct Scsi_Host *shost)
+{
+	int	err;
+
+	err = pm_runtime_get_sync(&shost->shost_gendev);
+	if (err < 0)
+		pm_runtime_put_sync(&shost->shost_gendev);
+	else if (err > 0)
+		err = 0;
+	return err;
+}
+
+void scsi_autopm_put_host(struct Scsi_Host *shost)
+{
+	pm_runtime_put_sync(&shost->shost_gendev);
+}
+
+#else
+
+#define scsi_runtime_suspend	NULL
+#define scsi_runtime_resume	NULL
+#define scsi_runtime_idle	NULL
+
+#endif /* CONFIG_PM_RUNTIME */
+
 const struct dev_pm_ops scsi_bus_pm_ops = {
 	.suspend =		scsi_bus_suspend,
 	.resume =		scsi_bus_resume_common,
@@ -93,4 +200,7 @@ const struct dev_pm_ops scsi_bus_pm_ops = {
 	.thaw =			scsi_bus_resume_common,
 	.poweroff =		scsi_bus_poweroff,
 	.restore =		scsi_bus_resume_common,
+	.runtime_suspend =	scsi_runtime_suspend,
+	.runtime_resume =	scsi_runtime_resume,
+	.runtime_idle =		scsi_runtime_idle,
 };
diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h
index dddacc73255..026295e2c53 100644
--- a/drivers/scsi/scsi_priv.h
+++ b/drivers/scsi/scsi_priv.h
@@ -7,6 +7,7 @@ struct request_queue;
 struct request;
 struct scsi_cmnd;
 struct scsi_device;
+struct scsi_target;
 struct scsi_host_template;
 struct Scsi_Host;
 struct scsi_nl_hdr;
@@ -147,9 +148,20 @@ static inline void scsi_netlink_exit(void) {}
 /* scsi_pm.c */
 #ifdef CONFIG_PM_OPS
 extern const struct dev_pm_ops scsi_bus_pm_ops;
-#else
+#else /* CONFIG_PM_OPS */
 #define scsi_bus_pm_ops		(*NULL)
 #endif
+#ifdef CONFIG_PM_RUNTIME
+extern void scsi_autopm_get_target(struct scsi_target *);
+extern void scsi_autopm_put_target(struct scsi_target *);
+extern int scsi_autopm_get_host(struct Scsi_Host *);
+extern void scsi_autopm_put_host(struct Scsi_Host *);
+#else
+static inline void scsi_autopm_get_target(struct scsi_target *t) {}
+static inline void scsi_autopm_put_target(struct scsi_target *t) {}
+static inline int scsi_autopm_get_host(struct Scsi_Host *h) { return 0; }
+static inline void scsi_autopm_put_host(struct Scsi_Host *h) {}
+#endif /* CONFIG_PM_RUNTIME */
 
 /* 
  * internal scsi timeout functions: for use by mid-layer and transport
diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index 1c027a97d8b..3d0a1e6e9c4 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -1513,14 +1513,18 @@ struct scsi_device *__scsi_add_device(struct Scsi_Host *shost, uint channel,
 	starget = scsi_alloc_target(parent, channel, id);
 	if (!starget)
 		return ERR_PTR(-ENOMEM);
+	scsi_autopm_get_target(starget);
 
 	mutex_lock(&shost->scan_mutex);
 	if (!shost->async_scan)
 		scsi_complete_async_scans();
 
-	if (scsi_host_scan_allowed(shost))
+	if (scsi_host_scan_allowed(shost) && scsi_autopm_get_host(shost) == 0) {
 		scsi_probe_and_add_lun(starget, lun, NULL, &sdev, 1, hostdata);
+		scsi_autopm_put_host(shost);
+	}
 	mutex_unlock(&shost->scan_mutex);
+	scsi_autopm_put_target(starget);
 	scsi_target_reap(starget);
 	put_device(&starget->dev);
 
@@ -1574,6 +1578,7 @@ static void __scsi_scan_target(struct device *parent, unsigned int channel,
 	starget = scsi_alloc_target(parent, channel, id);
 	if (!starget)
 		return;
+	scsi_autopm_get_target(starget);
 
 	if (lun != SCAN_WILD_CARD) {
 		/*
@@ -1599,6 +1604,7 @@ static void __scsi_scan_target(struct device *parent, unsigned int channel,
 	}
 
  out_reap:
+	scsi_autopm_put_target(starget);
 	/* now determine if the target has any children at all
 	 * and if not, nuke it */
 	scsi_target_reap(starget);
@@ -1633,8 +1639,10 @@ void scsi_scan_target(struct device *parent, unsigned int channel,
 	if (!shost->async_scan)
 		scsi_complete_async_scans();
 
-	if (scsi_host_scan_allowed(shost))
+	if (scsi_host_scan_allowed(shost) && scsi_autopm_get_host(shost) == 0) {
 		__scsi_scan_target(parent, channel, id, lun, rescan);
+		scsi_autopm_put_host(shost);
+	}
 	mutex_unlock(&shost->scan_mutex);
 }
 EXPORT_SYMBOL(scsi_scan_target);
@@ -1686,7 +1694,7 @@ int scsi_scan_host_selected(struct Scsi_Host *shost, unsigned int channel,
 	if (!shost->async_scan)
 		scsi_complete_async_scans();
 
-	if (scsi_host_scan_allowed(shost)) {
+	if (scsi_host_scan_allowed(shost) && scsi_autopm_get_host(shost) == 0) {
 		if (channel == SCAN_WILD_CARD)
 			for (channel = 0; channel <= shost->max_channel;
 			     channel++)
@@ -1694,6 +1702,7 @@ int scsi_scan_host_selected(struct Scsi_Host *shost, unsigned int channel,
 						  rescan);
 		else
 			scsi_scan_channel(shost, channel, id, lun, rescan);
+		scsi_autopm_put_host(shost);
 	}
 	mutex_unlock(&shost->scan_mutex);
 
@@ -1831,8 +1840,11 @@ static void do_scsi_scan_host(struct Scsi_Host *shost)
 static int do_scan_async(void *_data)
 {
 	struct async_scan_data *data = _data;
-	do_scsi_scan_host(data->shost);
+	struct Scsi_Host *shost = data->shost;
+
+	do_scsi_scan_host(shost);
 	scsi_finish_async_scan(data);
+	scsi_autopm_put_host(shost);
 	return 0;
 }
 
@@ -1847,16 +1859,20 @@ void scsi_scan_host(struct Scsi_Host *shost)
 
 	if (strncmp(scsi_scan_type, "none", 4) == 0)
 		return;
+	if (scsi_autopm_get_host(shost) < 0)
+		return;
 
 	data = scsi_prep_async_scan(shost);
 	if (!data) {
 		do_scsi_scan_host(shost);
+		scsi_autopm_put_host(shost);
 		return;
 	}
 
 	p = kthread_run(do_scan_async, data, "scsi_scan_%d", shost->host_no);
 	if (IS_ERR(p))
 		do_scan_async(data);
+	/* scsi_autopm_put_host(shost) is called in do_scan_async() */
 }
 EXPORT_SYMBOL(scsi_scan_host);
 
diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index 5f85f8e831f..562fb3bce26 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -11,6 +11,7 @@
 #include <linux/init.h>
 #include <linux/blkdev.h>
 #include <linux/device.h>
+#include <linux/pm_runtime.h>
 
 #include <scsi/scsi.h>
 #include <scsi/scsi_device.h>
@@ -802,8 +803,6 @@ static int scsi_target_add(struct scsi_target *starget)
 	if (starget->state != STARGET_CREATED)
 		return 0;
 
-	device_enable_async_suspend(&starget->dev);
-
 	error = device_add(&starget->dev);
 	if (error) {
 		dev_err(&starget->dev, "target device_add failed, error %d\n", error);
@@ -812,6 +811,10 @@ static int scsi_target_add(struct scsi_target *starget)
 	transport_add_device(&starget->dev);
 	starget->state = STARGET_RUNNING;
 
+	pm_runtime_set_active(&starget->dev);
+	pm_runtime_enable(&starget->dev);
+	device_enable_async_suspend(&starget->dev);
+
 	return 0;
 }
 
@@ -841,7 +844,20 @@ int scsi_sysfs_add_sdev(struct scsi_device *sdev)
 		return error;
 
 	transport_configure_device(&starget->dev);
+
 	device_enable_async_suspend(&sdev->sdev_gendev);
+	scsi_autopm_get_target(starget);
+	pm_runtime_set_active(&sdev->sdev_gendev);
+	pm_runtime_forbid(&sdev->sdev_gendev);
+	pm_runtime_enable(&sdev->sdev_gendev);
+	scsi_autopm_put_target(starget);
+
+	/* The following call will keep sdev active indefinitely, until
+	 * its driver does a corresponding scsi_autopm_pm_device().  Only
+	 * drivers supporting autosuspend will do this.
+	 */
+	scsi_autopm_get_device(sdev);
+
 	error = device_add(&sdev->sdev_gendev);
 	if (error) {
 		printk(KERN_INFO "error 1\n");
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index d4549092400..2968c6b83dd 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -245,6 +245,10 @@ sg_open(struct inode *inode, struct file *filp)
 	if (retval)
 		goto sg_put;
 
+	retval = scsi_autopm_get_device(sdp->device);
+	if (retval)
+		goto sdp_put;
+
 	if (!((flags & O_NONBLOCK) ||
 	      scsi_block_when_processing_errors(sdp->device))) {
 		retval = -ENXIO;
@@ -302,8 +306,11 @@ sg_open(struct inode *inode, struct file *filp)
 	}
 	retval = 0;
 error_out:
-	if (retval)
+	if (retval) {
+		scsi_autopm_put_device(sdp->device);
+sdp_put:
 		scsi_device_put(sdp->device);
+	}
 sg_put:
 	if (sdp)
 		sg_put_dev(sdp);
@@ -327,6 +334,7 @@ sg_release(struct inode *inode, struct file *filp)
 	sdp->exclude = 0;
 	wake_up_interruptible(&sdp->o_excl_wait);
 
+	scsi_autopm_put_device(sdp->device);
 	kref_put(&sfp->f_ref, sg_remove_sfp);
 	return 0;
 }
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index d80b6dbed1c..50cb34ffef1 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -381,6 +381,14 @@ extern int scsi_execute_req(struct scsi_device *sdev, const unsigned char *cmd,
 			    struct scsi_sense_hdr *, int timeout, int retries,
 			    int *resid);
 
+#ifdef CONFIG_PM_RUNTIME
+extern int scsi_autopm_get_device(struct scsi_device *);
+extern void scsi_autopm_put_device(struct scsi_device *);
+#else
+static inline int scsi_autopm_get_device(struct scsi_device *d) { return 0; }
+static inline void scsi_autopm_put_device(struct scsi_device *d) {}
+#endif /* CONFIG_PM_RUNTIME */
+
 static inline int __must_check scsi_device_reprobe(struct scsi_device *sdev)
 {
 	return device_reprobe(&sdev->sdev_gendev);
-- 
cgit v1.2.3-70-g09d2


From 7d14831e21060fbfbfe8453460ac19205f4ce1c2 Mon Sep 17 00:00:00 2001
From: Anuj Aggarwal <anuj.aggarwal@ti.com>
Date: Mon, 12 Jul 2010 17:54:06 +0530
Subject: regulator: tps6507x: allow driver to use DEFDCDC{2,3}_HIGH register

Acked-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

In TPS6507x, depending on the status of DEFDCDC{2,3} pin either
DEFDCDC{2,3}_LOW or DEFDCDC{2,3}_HIGH register needs to be read or
programmed to change the output voltage.

The current driver assumes DEFDCDC{2,3} pins are always tied low
and thus operates only on DEFDCDC{2,3}_LOW register. This need
not always be the case (as is found on OMAP-L138 EVM).

Unfortunately, software cannot read the status of DEFDCDC{2,3} pins.
So, this information is passed through platform data depending on
how the board is wired.

Signed-off-by: Anuj Aggarwal <anuj.aggarwal@ti.com>
Signed-off-by: Sekhar Nori <nsekhar@ti.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>
---
 drivers/regulator/tps6507x-regulator.c | 36 +++++++++++++++++++++++++++-------
 include/linux/regulator/tps6507x.h     | 32 ++++++++++++++++++++++++++++++
 2 files changed, 61 insertions(+), 7 deletions(-)
 create mode 100644 include/linux/regulator/tps6507x.h

(limited to 'include')

diff --git a/drivers/regulator/tps6507x-regulator.c b/drivers/regulator/tps6507x-regulator.c
index 14b4576281c..8152d65220f 100644
--- a/drivers/regulator/tps6507x-regulator.c
+++ b/drivers/regulator/tps6507x-regulator.c
@@ -22,6 +22,7 @@
 #include <linux/platform_device.h>
 #include <linux/regulator/driver.h>
 #include <linux/regulator/machine.h>
+#include <linux/regulator/tps6507x.h>
 #include <linux/delay.h>
 #include <linux/slab.h>
 #include <linux/mfd/tps6507x.h>
@@ -101,9 +102,12 @@ struct tps_info {
 	unsigned max_uV;
 	u8 table_len;
 	const u16 *table;
+
+	/* Does DCDC high or the low register defines output voltage? */
+	bool defdcdc_default;
 };
 
-static const struct tps_info tps6507x_pmic_regs[] = {
+static struct tps_info tps6507x_pmic_regs[] = {
 	{
 		.name = "VDCDC1",
 		.min_uV = 725000,
@@ -145,7 +149,7 @@ struct tps6507x_pmic {
 	struct regulator_desc desc[TPS6507X_NUM_REGULATOR];
 	struct tps6507x_dev *mfd;
 	struct regulator_dev *rdev[TPS6507X_NUM_REGULATOR];
-	const struct tps_info *info[TPS6507X_NUM_REGULATOR];
+	struct tps_info *info[TPS6507X_NUM_REGULATOR];
 	struct mutex io_lock;
 };
 static inline int tps6507x_pmic_read(struct tps6507x_pmic *tps, u8 reg)
@@ -341,10 +345,16 @@ static int tps6507x_pmic_dcdc_get_voltage(struct regulator_dev *dev)
 		reg = TPS6507X_REG_DEFDCDC1;
 		break;
 	case TPS6507X_DCDC_2:
-		reg = TPS6507X_REG_DEFDCDC2_LOW;
+		if (tps->info[dcdc]->defdcdc_default)
+			reg = TPS6507X_REG_DEFDCDC2_HIGH;
+		else
+			reg = TPS6507X_REG_DEFDCDC2_LOW;
 		break;
 	case TPS6507X_DCDC_3:
-		reg = TPS6507X_REG_DEFDCDC3_LOW;
+		if (tps->info[dcdc]->defdcdc_default)
+			reg = TPS6507X_REG_DEFDCDC3_HIGH;
+		else
+			reg = TPS6507X_REG_DEFDCDC3_LOW;
 		break;
 	default:
 		return -EINVAL;
@@ -370,10 +380,16 @@ static int tps6507x_pmic_dcdc_set_voltage(struct regulator_dev *dev,
 		reg = TPS6507X_REG_DEFDCDC1;
 		break;
 	case TPS6507X_DCDC_2:
-		reg = TPS6507X_REG_DEFDCDC2_LOW;
+		if (tps->info[dcdc]->defdcdc_default)
+			reg = TPS6507X_REG_DEFDCDC2_HIGH;
+		else
+			reg = TPS6507X_REG_DEFDCDC2_LOW;
 		break;
 	case TPS6507X_DCDC_3:
-		reg = TPS6507X_REG_DEFDCDC3_LOW;
+		if (tps->info[dcdc]->defdcdc_default)
+			reg = TPS6507X_REG_DEFDCDC3_HIGH;
+		else
+			reg = TPS6507X_REG_DEFDCDC3_LOW;
 		break;
 	default:
 		return -EINVAL;
@@ -532,7 +548,7 @@ int tps6507x_pmic_probe(struct platform_device *pdev)
 {
 	struct tps6507x_dev *tps6507x_dev = dev_get_drvdata(pdev->dev.parent);
 	static int desc_id;
-	const struct tps_info *info = &tps6507x_pmic_regs[0];
+	struct tps_info *info = &tps6507x_pmic_regs[0];
 	struct regulator_init_data *init_data;
 	struct regulator_dev *rdev;
 	struct tps6507x_pmic *tps;
@@ -569,6 +585,12 @@ int tps6507x_pmic_probe(struct platform_device *pdev)
 	for (i = 0; i < TPS6507X_NUM_REGULATOR; i++, info++, init_data++) {
 		/* Register the regulators */
 		tps->info[i] = info;
+		if (init_data->driver_data) {
+			struct tps6507x_reg_platform_data *data =
+							init_data->driver_data;
+			tps->info[i]->defdcdc_default = data->defdcdc_default;
+		}
+
 		tps->desc[i].name = info->name;
 		tps->desc[i].id = desc_id++;
 		tps->desc[i].n_voltages = num_voltages[i];
diff --git a/include/linux/regulator/tps6507x.h b/include/linux/regulator/tps6507x.h
new file mode 100644
index 00000000000..4892f591bab
--- /dev/null
+++ b/include/linux/regulator/tps6507x.h
@@ -0,0 +1,32 @@
+/*
+ * tps6507x.h  --  Voltage regulation for the Texas Instruments TPS6507X
+ *
+ * Copyright (C) 2010 Texas Instruments, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#ifndef REGULATOR_TPS6507X
+#define REGULATOR_TPS6507X
+
+/**
+ * tps6507x_reg_platform_data - platform data for tps6507x
+ * @defdcdc_default: Defines whether DCDC high or the low register controls
+ *	output voltage by default. Valid for DCDC2 and DCDC3 outputs only.
+ */
+struct tps6507x_reg_platform_data {
+	bool defdcdc_default;
+};
+
+#endif
-- 
cgit v1.2.3-70-g09d2


From 4552124543141debf40a94b67155e57aa6bb34d6 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@openwrt.org>
Date: Wed, 28 Jul 2010 02:40:49 +0200
Subject: mac80211: inform drivers about the off-channel status on channel
 changes

For some drivers it can be useful to know whether the channel they're
supposed to switch to is going to be used for short off-channel work or
scanning, or whether the hardware is expected to stay on it for a while
longer. This is important for various kinds of calibration work, which
takes longer to complete and should keep some persistent state, even if
the channel temporarily changes.

Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/mac80211.h | 3 +++
 net/mac80211/main.c    | 3 +++
 2 files changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 20d372edec2..c7027ef51c7 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -625,11 +625,14 @@ struct ieee80211_rx_status {
  *	may turn the device off as much as possible. Typically, this flag will
  *	be set when an interface is set UP but not associated or scanning, but
  *	it can also be unset in that case when monitor interfaces are active.
+ * @IEEE80211_CONF_OFFCHANNEL: The device is currently not on its main
+ *	operating channel.
  */
 enum ieee80211_conf_flags {
 	IEEE80211_CONF_MONITOR		= (1<<0),
 	IEEE80211_CONF_PS		= (1<<1),
 	IEEE80211_CONF_IDLE		= (1<<2),
+	IEEE80211_CONF_OFFCHANNEL	= (1<<3),
 };
 
 
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 0e95c750ded..7cc4f913a43 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -107,12 +107,15 @@ int ieee80211_hw_config(struct ieee80211_local *local, u32 changed)
 	if (scan_chan) {
 		chan = scan_chan;
 		channel_type = NL80211_CHAN_NO_HT;
+		local->hw.conf.flags |= IEEE80211_CONF_OFFCHANNEL;
 	} else if (local->tmp_channel) {
 		chan = scan_chan = local->tmp_channel;
 		channel_type = local->tmp_channel_type;
+		local->hw.conf.flags |= IEEE80211_CONF_OFFCHANNEL;
 	} else {
 		chan = local->oper_channel;
 		channel_type = local->_oper_channel_type;
+		local->hw.conf.flags &= ~IEEE80211_CONF_OFFCHANNEL;
 	}
 
 	if (chan != local->hw.conf.channel ||
-- 
cgit v1.2.3-70-g09d2


From f430a27f05d42d26d3e438aa262a92565170573f Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@nokia.com>
Date: Wed, 28 Jul 2010 15:26:54 +0300
Subject: ASoC: tlv320dac33: Revisit the FIFO Mode1 handling

Replace the hardwired latency definition with platform data
parameter, and simplify the nSample parameter calculation.

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@nokia.com>
Acked-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>
---
 include/sound/tlv320dac33-plat.h |  1 +
 sound/soc/codecs/tlv320dac33.c   | 71 +++++++++++++++++++---------------------
 2 files changed, 35 insertions(+), 37 deletions(-)

(limited to 'include')

diff --git a/include/sound/tlv320dac33-plat.h b/include/sound/tlv320dac33-plat.h
index 3f428d53195..1aa7bdbc208 100644
--- a/include/sound/tlv320dac33-plat.h
+++ b/include/sound/tlv320dac33-plat.h
@@ -15,6 +15,7 @@
 
 struct tlv320dac33_platform_data {
 	int power_gpio;
+	int mode1_latency; /* latency caused by the i2c writes in us */
 	int keep_bclk;	/* Keep the BCLK running in FIFO modes */
 	u8 burst_bclkdiv;
 };
diff --git a/sound/soc/codecs/tlv320dac33.c b/sound/soc/codecs/tlv320dac33.c
index 2fa946ce23a..ced6fbbc9d9 100644
--- a/sound/soc/codecs/tlv320dac33.c
+++ b/sound/soc/codecs/tlv320dac33.c
@@ -49,8 +49,6 @@
 
 #define NSAMPLE_MAX		5700
 
-#define LATENCY_TIME_MS		20
-
 #define MODE7_LTHR		10
 #define MODE7_UTHR		(DAC33_BUFFER_SIZE_SAMPLES - 10)
 
@@ -107,6 +105,8 @@ struct tlv320dac33_priv {
 					 * this */
 	enum dac33_fifo_modes fifo_mode;/* FIFO mode selection */
 	unsigned int nsample;		/* burst read amount from host */
+	int mode1_latency;		/* latency caused by the i2c writes in
+					 * us */
 	u8 burst_bclkdiv;		/* BCLK divider value in burst mode */
 	unsigned int burst_rate;	/* Interface speed in Burst modes */
 
@@ -649,7 +649,7 @@ static inline void dac33_prefill_handler(struct tlv320dac33_priv *dac33)
 	switch (dac33->fifo_mode) {
 	case DAC33_FIFO_MODE1:
 		dac33_write16(codec, DAC33_NSAMPLE_MSB,
-			DAC33_THRREG(dac33->nsample + dac33->alarm_threshold));
+			DAC33_THRREG(dac33->nsample));
 
 		/* Take the timestamps */
 		spin_lock_irq(&dac33->lock);
@@ -798,6 +798,10 @@ static void dac33_shutdown(struct snd_pcm_substream *substream,
 	struct tlv320dac33_priv *dac33 = snd_soc_codec_get_drvdata(codec);
 
 	dac33->substream = NULL;
+
+	/* Reset the nSample restrictions */
+	dac33->nsample_min = 0;
+	dac33->nsample_max = NSAMPLE_MAX;
 }
 
 static int dac33_hw_params(struct snd_pcm_substream *substream,
@@ -1040,48 +1044,38 @@ static void dac33_calculate_times(struct snd_pcm_substream *substream)
 	struct snd_soc_device *socdev = rtd->socdev;
 	struct snd_soc_codec *codec = socdev->card->codec;
 	struct tlv320dac33_priv *dac33 = snd_soc_codec_get_drvdata(codec);
+	unsigned int period_size = substream->runtime->period_size;
+	unsigned int rate = substream->runtime->rate;
 	unsigned int nsample_limit;
 
 	/* In bypass mode we don't need to calculate */
 	if (!dac33->fifo_mode)
 		return;
 
-	/* Number of samples (16bit, stereo) in one period */
-	dac33->nsample_min = snd_pcm_lib_period_bytes(substream) / 4;
-
-	/* Number of samples (16bit, stereo) in ALSA buffer */
-	dac33->nsample_max = snd_pcm_lib_buffer_bytes(substream) / 4;
-	/* Subtract one period from the total */
-	dac33->nsample_max -= dac33->nsample_min;
-
-	/* Number of samples for LATENCY_TIME_MS / 2 */
-	dac33->alarm_threshold = substream->runtime->rate /
-				 (1000 / (LATENCY_TIME_MS / 2));
-
-	/* Find and fix up the lowest nsmaple limit */
-	nsample_limit = substream->runtime->rate / (1000 / LATENCY_TIME_MS);
-
-	if (dac33->nsample_min < nsample_limit)
-		dac33->nsample_min = nsample_limit;
-
-	if (dac33->nsample < dac33->nsample_min)
-		dac33->nsample = dac33->nsample_min;
-
-	/*
-	 * Find and fix up the highest nsmaple limit
-	 * In order to not overflow the DAC33 buffer substract the
-	 * alarm_threshold value from the size of the DAC33 buffer
-	 */
-	nsample_limit = DAC33_BUFFER_SIZE_SAMPLES - dac33->alarm_threshold;
-
-	if (dac33->nsample_max > nsample_limit)
-		dac33->nsample_max = nsample_limit;
-
-	if (dac33->nsample > dac33->nsample_max)
-		dac33->nsample = dac33->nsample_max;
-
 	switch (dac33->fifo_mode) {
 	case DAC33_FIFO_MODE1:
+		/* Number of samples under i2c latency */
+		dac33->alarm_threshold = US_TO_SAMPLES(rate,
+						dac33->mode1_latency);
+		/* nSample time shall not be shorter than i2c latency */
+		dac33->nsample_min = dac33->alarm_threshold;
+		/*
+		 * nSample should not be bigger than alsa buffer minus
+		 * size of one period to avoid overruns
+		 */
+		dac33->nsample_max = substream->runtime->buffer_size -
+					period_size;
+		nsample_limit = DAC33_BUFFER_SIZE_SAMPLES -
+				dac33->alarm_threshold;
+		if (dac33->nsample_max > nsample_limit)
+			dac33->nsample_max = nsample_limit;
+
+		/* Correct the nSample if it is outside of the ranges */
+		if (dac33->nsample < dac33->nsample_min)
+			dac33->nsample = dac33->nsample_min;
+		if (dac33->nsample > dac33->nsample_max)
+			dac33->nsample = dac33->nsample_max;
+
 		dac33->mode1_us_burst = SAMPLES_TO_US(dac33->burst_rate,
 						      dac33->nsample);
 		dac33->t_stamp1 = 0;
@@ -1519,6 +1513,9 @@ static int __devinit dac33_i2c_probe(struct i2c_client *client,
 	/* Pre calculate the burst rate */
 	dac33->burst_rate = BURST_BASEFREQ_HZ / dac33->burst_bclkdiv / 32;
 	dac33->keep_bclk = pdata->keep_bclk;
+	dac33->mode1_latency = pdata->mode1_latency;
+	if (!dac33->mode1_latency)
+		dac33->mode1_latency = 10000; /* 10ms */
 	dac33->irq = client->irq;
 	dac33->nsample = NSAMPLE_MAX;
 	dac33->nsample_max = NSAMPLE_MAX;
-- 
cgit v1.2.3-70-g09d2


From a577b318fc7cb0c46f9f0cdefb5b267490ff8ce5 Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@nokia.com>
Date: Wed, 28 Jul 2010 15:26:55 +0300
Subject: ASoC: tlv320dac33: Add support for automatic FIFO configuration

Platform parameter to enable automatic FIFO configuration when
the codec is in Mode1 or Mode7 FIFO mode.
When this mode is selected, the controls for changing
nSample (in Mode1), and UTHR (in Mode7) are not added.
The driver configures the FIFO configuration based on
the stream's period size in a way, that every burst will
read period size of data from the host.
In Mode7 we need to use a formula, which gives close enough
aproximation for the burst length from the host point
of view.

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@nokia.com>
Acked-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>
---
 include/sound/tlv320dac33-plat.h |  1 +
 sound/soc/codecs/tlv320dac33.c   | 90 ++++++++++++++++++++++++++++------------
 2 files changed, 65 insertions(+), 26 deletions(-)

(limited to 'include')

diff --git a/include/sound/tlv320dac33-plat.h b/include/sound/tlv320dac33-plat.h
index 1aa7bdbc208..6c664965679 100644
--- a/include/sound/tlv320dac33-plat.h
+++ b/include/sound/tlv320dac33-plat.h
@@ -16,6 +16,7 @@
 struct tlv320dac33_platform_data {
 	int power_gpio;
 	int mode1_latency; /* latency caused by the i2c writes in us */
+	int auto_fifo_config; /* FIFO config based on the period size */
 	int keep_bclk;	/* Keep the BCLK running in FIFO modes */
 	u8 burst_bclkdiv;
 };
diff --git a/sound/soc/codecs/tlv320dac33.c b/sound/soc/codecs/tlv320dac33.c
index ced6fbbc9d9..8651b01ed22 100644
--- a/sound/soc/codecs/tlv320dac33.c
+++ b/sound/soc/codecs/tlv320dac33.c
@@ -60,6 +60,9 @@
 #define US_TO_SAMPLES(rate, us) \
 	(rate / (1000000 / us))
 
+#define UTHR_FROM_PERIOD_SIZE(samples, playrate, burstrate) \
+	((samples * 5000) / ((burstrate * 5000) / (burstrate - playrate)))
+
 static void dac33_calculate_times(struct snd_pcm_substream *substream);
 static int dac33_prepare_chip(struct snd_pcm_substream *substream);
 
@@ -107,6 +110,8 @@ struct tlv320dac33_priv {
 	unsigned int nsample;		/* burst read amount from host */
 	int mode1_latency;		/* latency caused by the i2c writes in
 					 * us */
+	int auto_fifo_config; 		/* Configure the FIFO based on the
+					 * period size */
 	u8 burst_bclkdiv;		/* BCLK divider value in burst mode */
 	unsigned int burst_rate;	/* Interface speed in Burst modes */
 
@@ -538,13 +543,16 @@ static const struct snd_kcontrol_new dac33_snd_controls[] = {
 		 DAC33_LINEL_TO_LLO_VOL, DAC33_LINER_TO_RLO_VOL, 0, 127, 1),
 };
 
-static const struct snd_kcontrol_new dac33_nsample_snd_controls[] = {
+static const struct snd_kcontrol_new dac33_mode_snd_controls[] = {
+	SOC_ENUM_EXT("FIFO Mode", dac33_fifo_mode_enum,
+		 dac33_get_fifo_mode, dac33_set_fifo_mode),
+};
+
+static const struct snd_kcontrol_new dac33_fifo_snd_controls[] = {
 	SOC_SINGLE_EXT("nSample", 0, 0, 5900, 0,
-		 dac33_get_nsample, dac33_set_nsample),
+		dac33_get_nsample, dac33_set_nsample),
 	SOC_SINGLE_EXT("UTHR", 0, 0, MODE7_UTHR, 0,
 		 dac33_get_uthr, dac33_set_uthr),
-	SOC_ENUM_EXT("FIFO Mode", dac33_fifo_mode_enum,
-		 dac33_get_fifo_mode, dac33_set_fifo_mode),
 };
 
 /* Analog bypass */
@@ -1057,24 +1065,38 @@ static void dac33_calculate_times(struct snd_pcm_substream *substream)
 		/* Number of samples under i2c latency */
 		dac33->alarm_threshold = US_TO_SAMPLES(rate,
 						dac33->mode1_latency);
-		/* nSample time shall not be shorter than i2c latency */
-		dac33->nsample_min = dac33->alarm_threshold;
-		/*
-		 * nSample should not be bigger than alsa buffer minus
-		 * size of one period to avoid overruns
-		 */
-		dac33->nsample_max = substream->runtime->buffer_size -
-					period_size;
-		nsample_limit = DAC33_BUFFER_SIZE_SAMPLES -
-				dac33->alarm_threshold;
-		if (dac33->nsample_max > nsample_limit)
-			dac33->nsample_max = nsample_limit;
-
-		/* Correct the nSample if it is outside of the ranges */
-		if (dac33->nsample < dac33->nsample_min)
-			dac33->nsample = dac33->nsample_min;
-		if (dac33->nsample > dac33->nsample_max)
-			dac33->nsample = dac33->nsample_max;
+		if (dac33->auto_fifo_config) {
+			if (period_size <= dac33->alarm_threshold)
+				/*
+				 * Configure nSamaple to number of periods,
+				 * which covers the latency requironment.
+				 */
+				dac33->nsample = period_size *
+				       ((dac33->alarm_threshold / period_size) +
+				       (dac33->alarm_threshold % period_size ?
+				       1 : 0));
+			else
+				dac33->nsample = period_size;
+		} else {
+			/* nSample time shall not be shorter than i2c latency */
+			dac33->nsample_min = dac33->alarm_threshold;
+			/*
+			 * nSample should not be bigger than alsa buffer minus
+			 * size of one period to avoid overruns
+			 */
+			dac33->nsample_max = substream->runtime->buffer_size -
+						period_size;
+			nsample_limit = DAC33_BUFFER_SIZE_SAMPLES -
+					dac33->alarm_threshold;
+			if (dac33->nsample_max > nsample_limit)
+				dac33->nsample_max = nsample_limit;
+
+			/* Correct the nSample if it is outside of the ranges */
+			if (dac33->nsample < dac33->nsample_min)
+				dac33->nsample = dac33->nsample_min;
+			if (dac33->nsample > dac33->nsample_max)
+				dac33->nsample = dac33->nsample_max;
+		}
 
 		dac33->mode1_us_burst = SAMPLES_TO_US(dac33->burst_rate,
 						      dac33->nsample);
@@ -1082,6 +1104,16 @@ static void dac33_calculate_times(struct snd_pcm_substream *substream)
 		dac33->t_stamp2 = 0;
 		break;
 	case DAC33_FIFO_MODE7:
+		if (dac33->auto_fifo_config) {
+			dac33->uthr = UTHR_FROM_PERIOD_SIZE(
+					period_size,
+					rate,
+					dac33->burst_rate) + 9;
+			if (dac33->uthr > MODE7_UTHR)
+				dac33->uthr = MODE7_UTHR;
+			if (dac33->uthr < (MODE7_LTHR + 10))
+				dac33->uthr = (MODE7_LTHR + 10);
+		}
 		dac33->mode7_us_to_lthr =
 				SAMPLES_TO_US(substream->runtime->rate,
 					dac33->uthr - MODE7_LTHR + 1);
@@ -1379,10 +1411,15 @@ static int dac33_soc_probe(struct platform_device *pdev)
 
 	snd_soc_add_controls(codec, dac33_snd_controls,
 			     ARRAY_SIZE(dac33_snd_controls));
-	/* Only add the nSample controls, if we have valid IRQ number */
-	if (dac33->irq >= 0)
-		snd_soc_add_controls(codec, dac33_nsample_snd_controls,
-				     ARRAY_SIZE(dac33_nsample_snd_controls));
+	/* Only add the FIFO controls, if we have valid IRQ number */
+	if (dac33->irq >= 0) {
+		snd_soc_add_controls(codec, dac33_mode_snd_controls,
+				     ARRAY_SIZE(dac33_mode_snd_controls));
+		/* FIFO usage controls only, if autoio config is not selected */
+		if (!dac33->auto_fifo_config)
+			snd_soc_add_controls(codec, dac33_fifo_snd_controls,
+					ARRAY_SIZE(dac33_fifo_snd_controls));
+	}
 
 	dac33_add_widgets(codec);
 
@@ -1513,6 +1550,7 @@ static int __devinit dac33_i2c_probe(struct i2c_client *client,
 	/* Pre calculate the burst rate */
 	dac33->burst_rate = BURST_BASEFREQ_HZ / dac33->burst_bclkdiv / 32;
 	dac33->keep_bclk = pdata->keep_bclk;
+	dac33->auto_fifo_config = pdata->auto_fifo_config;
 	dac33->mode1_latency = pdata->mode1_latency;
 	if (!dac33->mode1_latency)
 		dac33->mode1_latency = 10000; /* 10ms */
-- 
cgit v1.2.3-70-g09d2


From 685fd0b4ea3f0f1d5385610b0d5b57775a8d5842 Mon Sep 17 00:00:00 2001
From: Ian Campbell <ian.campbell@citrix.com>
Date: Thu, 29 Jul 2010 11:16:32 +0100
Subject: irq: Add new IRQ flag IRQF_NO_SUSPEND

A small number of users of IRQF_TIMER are using it for the implied no
suspend behaviour on interrupts which are not timer interrupts.

Therefore add a new IRQF_NO_SUSPEND flag, rename IRQF_TIMER to
__IRQF_TIMER and redefine IRQF_TIMER in terms of these new flags.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Cc: Jeremy Fitzhardinge <jeremy@goop.org>
Cc: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Grant Likely <grant.likely@secretlab.ca>
Cc: xen-devel@lists.xensource.com
Cc: linux-input@vger.kernel.org
Cc: linuxppc-dev@ozlabs.org
Cc: devicetree-discuss@lists.ozlabs.org
LKML-Reference: <1280398595-29708-1-git-send-email-ian.campbell@citrix.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/interrupt.h | 7 ++++++-
 kernel/irq/manage.c       | 2 +-
 2 files changed, 7 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index c2331138ca1..a0384a4d1e6 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -53,16 +53,21 @@
  * IRQF_ONESHOT - Interrupt is not reenabled after the hardirq handler finished.
  *                Used by threaded interrupts which need to keep the
  *                irq line disabled until the threaded handler has been run.
+ * IRQF_NO_SUSPEND - Do not disable this IRQ during suspend
+ *
  */
 #define IRQF_DISABLED		0x00000020
 #define IRQF_SAMPLE_RANDOM	0x00000040
 #define IRQF_SHARED		0x00000080
 #define IRQF_PROBE_SHARED	0x00000100
-#define IRQF_TIMER		0x00000200
+#define __IRQF_TIMER		0x00000200
 #define IRQF_PERCPU		0x00000400
 #define IRQF_NOBALANCING	0x00000800
 #define IRQF_IRQPOLL		0x00001000
 #define IRQF_ONESHOT		0x00002000
+#define IRQF_NO_SUSPEND		0x00004000
+
+#define IRQF_TIMER		(__IRQF_TIMER | IRQF_NO_SUSPEND)
 
 /*
  * Bits used by threaded handlers:
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index e1497481fe8..c3003e9d91a 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -216,7 +216,7 @@ static inline int setup_affinity(unsigned int irq, struct irq_desc *desc)
 void __disable_irq(struct irq_desc *desc, unsigned int irq, bool suspend)
 {
 	if (suspend) {
-		if (!desc->action || (desc->action->flags & IRQF_TIMER))
+		if (!desc->action || (desc->action->flags & IRQF_NO_SUSPEND))
 			return;
 		desc->status |= IRQ_SUSPENDED;
 	}
-- 
cgit v1.2.3-70-g09d2


From bb8f563c848faa113059973f68c24a3bb6a9585e Mon Sep 17 00:00:00 2001
From: Rabin Vincent <rabin.vincent@stericsson.com>
Date: Wed, 21 Jul 2010 12:53:57 +0100
Subject: ARM: 6243/1: mmci: pass power_mode to the translate_vdd callback

Platforms may have some external power control which need to be
controlled from board specific code.  Rename the translate_vdd()
callback to vdd_handler() and pass it the power mode.

Acked-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Rabin Vincent <rabin.vincent@stericsson.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 drivers/mmc/host/mmci.c   | 13 +++----------
 include/linux/amba/mmci.h | 10 ++++++----
 2 files changed, 9 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c
index 3eaa0e9373c..7ae3eeeefc2 100644
--- a/drivers/mmc/host/mmci.c
+++ b/drivers/mmc/host/mmci.c
@@ -493,16 +493,9 @@ static void mmci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 			/* This implicitly enables the regulator */
 			mmc_regulator_set_ocr(host->vcc, ios->vdd);
 #endif
-		/*
-		 * The translate_vdd function is not used if you have
-		 * an external regulator, or your design is really weird.
-		 * Using it would mean sending in power control BOTH using
-		 * a regulator AND the 4 MMCIPWR bits. If we don't have
-		 * a regulator, we might have some other platform specific
-		 * power control behind this translate function.
-		 */
-		if (!host->vcc && host->plat->translate_vdd)
-			pwr |= host->plat->translate_vdd(mmc_dev(mmc), ios->vdd);
+		if (host->plat->vdd_handler)
+			pwr |= host->plat->vdd_handler(mmc_dev(mmc), ios->vdd,
+						       ios->power_mode);
 		/* The ST version does not have this, fall through to POWER_ON */
 		if (host->hw_designer != AMBA_VENDOR_ST) {
 			pwr |= MCI_PWR_UP;
diff --git a/include/linux/amba/mmci.h b/include/linux/amba/mmci.h
index 7e466fe7202..ca84ce70d5d 100644
--- a/include/linux/amba/mmci.h
+++ b/include/linux/amba/mmci.h
@@ -15,9 +15,10 @@
  * @ocr_mask: available voltages on the 4 pins from the block, this
  * is ignored if a regulator is used, see the MMC_VDD_* masks in
  * mmc/host.h
- * @translate_vdd: a callback function to translate a MMC_VDD_*
- * mask into a value to be binary or:ed and written into the
- * MMCIPWR register of the block
+ * @vdd_handler: a callback function to translate a MMC_VDD_*
+ * mask into a value to be binary (or set some other custom bits
+ * in MMCIPWR) or:ed and written into the MMCIPWR register of the
+ * block.  May also control external power based on the power_mode.
  * @status: if no GPIO read function was given to the block in
  * gpio_wp (below) this function will be called to determine
  * whether a card is present in the MMC slot or not
@@ -29,7 +30,8 @@
 struct mmci_platform_data {
 	unsigned int f_max;
 	unsigned int ocr_mask;
-	u32 (*translate_vdd)(struct device *, unsigned int);
+	u32 (*vdd_handler)(struct device *, unsigned int vdd,
+			   unsigned char power_mode);
 	unsigned int (*status)(struct device *);
 	int	gpio_wp;
 	int	gpio_cd;
-- 
cgit v1.2.3-70-g09d2


From e5b900d228b76d445a4240d9aeb3cd8f79205a91 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 29 Jul 2010 16:08:55 +0200
Subject: mac80211: allow drivers to request DTIM period

Some features require knowing the DTIM period
before associating. This implements the ability
to wait for a beacon in mac80211 before assoc
to provide this value. It is optional since
most likely not all drivers will need this.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/mac80211.h     |  9 +++++++--
 net/mac80211/ieee80211_i.h |  1 +
 net/mac80211/mlme.c        | 32 +++++++++++++++++++++++++++++---
 net/mac80211/scan.c        |  4 ++++
 net/mac80211/work.c        | 43 +++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 84 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index c7027ef51c7..f85fc8a140d 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -194,7 +194,9 @@ enum ieee80211_bss_change {
  *	if the hardware cannot handle this it must set the
  *	IEEE80211_HW_2GHZ_SHORT_SLOT_INCAPABLE hardware flag
  * @dtim_period: num of beacons before the next DTIM, for beaconing,
- *	not valid in station mode (cf. hw conf ps_dtim_period)
+ *	valid in station mode only while @assoc is true and if also
+ *	requested by %IEEE80211_HW_NEED_DTIM_PERIOD (cf. also hw conf
+ *	@ps_dtim_period)
  * @timestamp: beacon timestamp
  * @beacon_int: beacon interval
  * @assoc_capability: capabilities taken from assoc resp
@@ -1027,6 +1029,9 @@ enum ieee80211_tkip_key_type {
  *	connection quality related parameters, such as the RSSI level and
  *	provide notifications if configured trigger levels are reached.
  *
+ * @IEEE80211_HW_NEED_DTIM_PERIOD:
+ *	This device needs to know the DTIM period for the BSS before
+ *	associating.
  */
 enum ieee80211_hw_flags {
 	IEEE80211_HW_HAS_RATE_CONTROL			= 1<<0,
@@ -1036,7 +1041,7 @@ enum ieee80211_hw_flags {
 	IEEE80211_HW_2GHZ_SHORT_PREAMBLE_INCAPABLE	= 1<<4,
 	IEEE80211_HW_SIGNAL_UNSPEC			= 1<<5,
 	IEEE80211_HW_SIGNAL_DBM				= 1<<6,
-	/* use this hole */
+	IEEE80211_HW_NEED_DTIM_PERIOD			= 1<<7,
 	IEEE80211_HW_SPECTRUM_MGMT			= 1<<8,
 	IEEE80211_HW_AMPDU_AGGREGATION			= 1<<9,
 	IEEE80211_HW_SUPPORTS_PS			= 1<<10,
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index ef470064b15..65e0ed6c297 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -238,6 +238,7 @@ enum ieee80211_work_type {
 	IEEE80211_WORK_ABORT,
 	IEEE80211_WORK_DIRECT_PROBE,
 	IEEE80211_WORK_AUTH,
+	IEEE80211_WORK_ASSOC_BEACON_WAIT,
 	IEEE80211_WORK_ASSOC,
 	IEEE80211_WORK_REMAIN_ON_CHANNEL,
 };
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index cf8d72196c6..b6c163ac22d 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -870,6 +870,11 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
 
 	ieee80211_led_assoc(local, 1);
 
+	if (local->hw.flags & IEEE80211_HW_NEED_DTIM_PERIOD)
+		bss_conf->dtim_period = bss->dtim_period;
+	else
+		bss_conf->dtim_period = 0;
+
 	bss_conf->assoc = 1;
 	/*
 	 * For now just always ask the driver to update the basic rateset
@@ -1751,7 +1756,8 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
 			if (wk->sdata != sdata)
 				continue;
 
-			if (wk->type != IEEE80211_WORK_ASSOC)
+			if (wk->type != IEEE80211_WORK_ASSOC &&
+			    wk->type != IEEE80211_WORK_ASSOC_BEACON_WAIT)
 				continue;
 
 			if (memcmp(mgmt->bssid, wk->filter_ta, ETH_ALEN))
@@ -2086,6 +2092,8 @@ static enum work_done_result ieee80211_assoc_done(struct ieee80211_work *wk,
 						  struct sk_buff *skb)
 {
 	struct ieee80211_mgmt *mgmt;
+	struct ieee80211_rx_status *rx_status;
+	struct ieee802_11_elems elems;
 	u16 status;
 
 	if (!skb) {
@@ -2093,6 +2101,19 @@ static enum work_done_result ieee80211_assoc_done(struct ieee80211_work *wk,
 		return WORK_DONE_DESTROY;
 	}
 
+	if (wk->type == IEEE80211_WORK_ASSOC_BEACON_WAIT) {
+		mutex_lock(&wk->sdata->u.mgd.mtx);
+		rx_status = (void *) skb->cb;
+		ieee802_11_parse_elems(skb->data + 24 + 12, skb->len - 24 - 12, &elems);
+		ieee80211_rx_bss_info(wk->sdata, (void *)skb->data, skb->len, rx_status,
+				      &elems, true);
+		mutex_unlock(&wk->sdata->u.mgd.mtx);
+
+		wk->type = IEEE80211_WORK_ASSOC;
+		/* not really done yet */
+		return WORK_DONE_REQUEUE;
+	}
+
 	mgmt = (void *)skb->data;
 	status = le16_to_cpu(mgmt->u.assoc_resp.status_code);
 
@@ -2206,10 +2227,14 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
 	if (req->prev_bssid)
 		memcpy(wk->assoc.prev_bssid, req->prev_bssid, ETH_ALEN);
 
-	wk->type = IEEE80211_WORK_ASSOC;
 	wk->chan = req->bss->channel;
 	wk->sdata = sdata;
 	wk->done = ieee80211_assoc_done;
+	if (!bss->dtim_period &&
+	    sdata->local->hw.flags & IEEE80211_HW_NEED_DTIM_PERIOD)
+		wk->type = IEEE80211_WORK_ASSOC_BEACON_WAIT;
+	else
+		wk->type = IEEE80211_WORK_ASSOC;
 
 	if (req->use_mfp) {
 		ifmgd->mfp = IEEE80211_MFP_REQUIRED;
@@ -2257,7 +2282,8 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
 
 			if (wk->type != IEEE80211_WORK_DIRECT_PROBE &&
 			    wk->type != IEEE80211_WORK_AUTH &&
-			    wk->type != IEEE80211_WORK_ASSOC)
+			    wk->type != IEEE80211_WORK_ASSOC &&
+			    wk->type != IEEE80211_WORK_ASSOC_BEACON_WAIT)
 				continue;
 
 			if (memcmp(req->bss->bssid, wk->filter_ta, ETH_ALEN))
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 41635b2c91b..41f20fb7e67 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -114,6 +114,10 @@ ieee80211_bss_info_update(struct ieee80211_local *local,
 		bss->dtim_period = tim_ie->dtim_period;
 	}
 
+	/* If the beacon had no TIM IE, or it was invalid, use 1 */
+	if (beacon && !bss->dtim_period)
+		bss->dtim_period = 1;
+
 	/* replace old supported rates if we get new values */
 	srlen = 0;
 	if (elems->supp_rates) {
diff --git a/net/mac80211/work.c b/net/mac80211/work.c
index c22a71c5cb4..81d4ad64184 100644
--- a/net/mac80211/work.c
+++ b/net/mac80211/work.c
@@ -560,6 +560,22 @@ ieee80211_remain_on_channel_timeout(struct ieee80211_work *wk)
 	return WORK_ACT_TIMEOUT;
 }
 
+static enum work_action __must_check
+ieee80211_assoc_beacon_wait(struct ieee80211_work *wk)
+{
+	if (wk->started)
+		return WORK_ACT_TIMEOUT;
+
+	/*
+	 * Wait up to one beacon interval ...
+	 * should this be more if we miss one?
+	 */
+	printk(KERN_DEBUG "%s: waiting for beacon from %pM\n",
+	       wk->sdata->name, wk->filter_ta);
+	wk->timeout = TU_TO_EXP_TIME(wk->assoc.bss->beacon_interval);
+	return WORK_ACT_NONE;
+}
+
 static void ieee80211_auth_challenge(struct ieee80211_work *wk,
 				     struct ieee80211_mgmt *mgmt,
 				     size_t len)
@@ -709,6 +725,25 @@ ieee80211_rx_mgmt_probe_resp(struct ieee80211_work *wk,
 	return WORK_ACT_DONE;
 }
 
+static enum work_action __must_check
+ieee80211_rx_mgmt_beacon(struct ieee80211_work *wk,
+			 struct ieee80211_mgmt *mgmt, size_t len)
+{
+	struct ieee80211_sub_if_data *sdata = wk->sdata;
+	struct ieee80211_local *local = sdata->local;
+
+	ASSERT_WORK_MTX(local);
+
+	if (wk->type != IEEE80211_WORK_ASSOC_BEACON_WAIT)
+		return WORK_ACT_MISMATCH;
+
+	if (len < 24 + 12)
+		return WORK_ACT_NONE;
+
+	printk(KERN_DEBUG "%s: beacon received\n", sdata->name);
+	return WORK_ACT_DONE;
+}
+
 static void ieee80211_work_rx_queued_mgmt(struct ieee80211_local *local,
 					  struct sk_buff *skb)
 {
@@ -731,6 +766,7 @@ static void ieee80211_work_rx_queued_mgmt(struct ieee80211_local *local,
 		case IEEE80211_WORK_DIRECT_PROBE:
 		case IEEE80211_WORK_AUTH:
 		case IEEE80211_WORK_ASSOC:
+		case IEEE80211_WORK_ASSOC_BEACON_WAIT:
 			bssid = wk->filter_ta;
 			break;
 		default:
@@ -745,6 +781,9 @@ static void ieee80211_work_rx_queued_mgmt(struct ieee80211_local *local,
 			continue;
 
 		switch (fc & IEEE80211_FCTL_STYPE) {
+		case IEEE80211_STYPE_BEACON:
+			rma = ieee80211_rx_mgmt_beacon(wk, mgmt, skb->len);
+			break;
 		case IEEE80211_STYPE_PROBE_RESP:
 			rma = ieee80211_rx_mgmt_probe_resp(wk, mgmt, skb->len,
 							   rx_status);
@@ -916,6 +955,9 @@ static void ieee80211_work_work(struct work_struct *work)
 		case IEEE80211_WORK_REMAIN_ON_CHANNEL:
 			rma = ieee80211_remain_on_channel_timeout(wk);
 			break;
+		case IEEE80211_WORK_ASSOC_BEACON_WAIT:
+			rma = ieee80211_assoc_beacon_wait(wk);
+			break;
 		}
 
 		wk->started = started;
@@ -1065,6 +1107,7 @@ ieee80211_rx_result ieee80211_work_rx_mgmt(struct ieee80211_sub_if_data *sdata,
 		case IEEE80211_STYPE_PROBE_RESP:
 		case IEEE80211_STYPE_ASSOC_RESP:
 		case IEEE80211_STYPE_REASSOC_RESP:
+		case IEEE80211_STYPE_BEACON:
 			skb_queue_tail(&local->work_skb_queue, skb);
 			ieee80211_queue_work(&local->hw, &local->work_work);
 			return RX_QUEUED;
-- 
cgit v1.2.3-70-g09d2


From b7753c8cd51dce67a0b152efb456a21ff1cc241b Mon Sep 17 00:00:00 2001
From: Christian Lamparter <chunkeey@googlemail.com>
Date: Thu, 29 Jul 2010 01:28:46 +0200
Subject: cfg80211: fix dev <-> wiphy typo

Cc: Joe Perches <joe@perches.com>
Signed-off-by: Christian Lamparter <chunkeey@googlemail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/cfg80211.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index ae80f8fb17f..2fd06c60ffb 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -2451,7 +2451,7 @@ int wiphy_debug(const struct wiphy *wiphy, const char *format, ...)
 	wiphy_printk(KERN_DEBUG, wiphy, format, ##args)
 #elif defined(CONFIG_DYNAMIC_DEBUG)
 #define wiphy_dbg(wiphy, format, args...)			\
-	dynamic_pr_debug("%s: " format,	wiphy_name(dev), ##args)
+	dynamic_pr_debug("%s: " format,	wiphy_name(wiphy), ##args)
 #else
 #define wiphy_dbg(wiphy, format, args...)				\
 ({									\
-- 
cgit v1.2.3-70-g09d2


From 3bc280708e7b9a84cc6307c1f9acca57e0fafaac Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Thu, 29 Jul 2010 16:48:32 +0900
Subject: ASoC: fsi: Add new funtion for SPDIF

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Acked-by: Liam Girdwood <lrg@slimlogic.co.uk>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 include/sound/sh_fsi.h |  2 ++
 sound/soc/sh/fsi.c     | 60 +++++++++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 57 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/sound/sh_fsi.h b/include/sound/sh_fsi.h
index 6ac71863c70..9d51d6f3589 100644
--- a/include/sound/sh_fsi.h
+++ b/include/sound/sh_fsi.h
@@ -64,6 +64,8 @@
 #define SH_FSI_FMT_I2S		3
 #define SH_FSI_FMT_TDM		4
 #define SH_FSI_FMT_TDM_DELAY	5
+#define SH_FSI_FMT_SPDIF	6
+
 
 #define SH_FSI_IFMT_TDM_CH(x) \
 	(SH_FSI_IFMT(TDM)	| SH_FSI_SET_CH_I(x))
diff --git a/sound/soc/sh/fsi.c b/sound/soc/sh/fsi.c
index 4b09b3dfcc0..58c6bec642d 100644
--- a/sound/soc/sh/fsi.c
+++ b/sound/soc/sh/fsi.c
@@ -30,9 +30,11 @@
 #define DIDT		0x0020
 #define DODT		0x0024
 #define MUTE_ST		0x0028
-#define REG_END		MUTE_ST
-
+#define OUT_SEL		0x0030
+#define REG_END		OUT_SEL
 
+#define A_MST_CTLR	0x0180
+#define B_MST_CTLR	0x01A0
 #define CPU_INT_ST	0x01F4
 #define CPU_IEMSK	0x01F8
 #define CPU_IMSK	0x01FC
@@ -43,7 +45,7 @@
 #define CLK_RST		0x0210
 #define SOFT_RST	0x0214
 #define FIFO_SZ		0x0218
-#define MREG_START	CPU_INT_ST
+#define MREG_START	A_MST_CTLR
 #define MREG_END	FIFO_SZ
 
 /* DO_FMT */
@@ -54,6 +56,7 @@
 #define CR_I2S		(0x3 << 4)
 #define CR_TDM		(0x4 << 4)
 #define CR_TDM_D	(0x5 << 4)
+#define CR_SPDIF	0x00100120
 
 /* DOFF_CTL */
 /* DIFF_CTL */
@@ -69,6 +72,10 @@
 #define ACKMD_MASK	0x00007000
 #define BPFMD_MASK	0x00000700
 
+/* A/B MST_CTLR */
+#define BP	(1 << 4)	/* Fix the signal of Biphase output */
+#define SE	(1 << 0)	/* Fix the master clock */
+
 /* CLK_RST */
 #define B_CLK		0x00000010
 #define A_CLK		0x00000001
@@ -113,6 +120,8 @@ struct fsi_priv {
 	int period_len;
 	int buffer_len;
 	int periods;
+
+	u32 mst_ctrl;
 };
 
 struct fsi_core {
@@ -392,6 +401,29 @@ static void fsi_irq_clear_status(struct fsi_priv *fsi)
 	fsi_master_mask_set(master, master->core->int_st, data, 0);
 }
 
+/************************************************************************
+
+
+		SPDIF master clock function
+
+These functions are used later FSI2
+************************************************************************/
+static void fsi_spdif_clk_ctrl(struct fsi_priv *fsi, int enable)
+{
+	struct fsi_master *master = fsi_get_master(fsi);
+	u32 val = BP | SE;
+
+	if (master->core->ver < 2) {
+		pr_err("fsi: register access err (%s)\n", __func__);
+		return;
+	}
+
+	if (enable)
+		fsi_master_mask_set(master, fsi->mst_ctrl, val, val);
+	else
+		fsi_master_mask_set(master, fsi->mst_ctrl, val, 0);
+}
+
 /************************************************************************
 
 
@@ -671,6 +703,7 @@ static int fsi_dai_startup(struct snd_pcm_substream *substream,
 {
 	struct fsi_priv *fsi = fsi_get_priv(substream);
 	u32 flags = fsi_get_info_flags(fsi);
+	struct fsi_master *master = fsi_get_master(fsi);
 	u32 fmt;
 	u32 reg;
 	u32 data;
@@ -732,6 +765,16 @@ static int fsi_dai_startup(struct snd_pcm_substream *substream,
 			SH_FSI_GET_CH_O(flags) : SH_FSI_GET_CH_I(flags);
 		data = CR_TDM_D | (fsi->chan - 1);
 		break;
+	case SH_FSI_FMT_SPDIF:
+		if (master->core->ver < 2) {
+			dev_err(dai->dev, "This FSI can not use SPDIF\n");
+			return -EINVAL;
+		}
+		data = CR_SPDIF;
+		fsi->chan = 2;
+		fsi_spdif_clk_ctrl(fsi, 1);
+		fsi_reg_mask_set(fsi, OUT_SEL, 0x0010, 0x0010);
+		break;
 	default:
 		dev_err(dai->dev, "unknown format.\n");
 		return -EINVAL;
@@ -1071,14 +1114,21 @@ static int fsi_probe(struct platform_device *pdev)
 		goto exit_kfree;
 	}
 
+	/* master setting */
 	master->irq		= irq;
 	master->info		= pdev->dev.platform_data;
+	master->core		= (struct fsi_core *)id_entry->driver_data;
+	spin_lock_init(&master->lock);
+
+	/* FSI A setting */
 	master->fsia.base	= master->base;
 	master->fsia.master	= master;
+	master->fsia.mst_ctrl	= A_MST_CTLR;
+
+	/* FSI B setting */
 	master->fsib.base	= master->base + 0x40;
 	master->fsib.master	= master;
-	master->core		= (struct fsi_core *)id_entry->driver_data;
-	spin_lock_init(&master->lock);
+	master->fsib.mst_ctrl	= B_MST_CTLR;
 
 	pm_runtime_enable(&pdev->dev);
 	pm_runtime_resume(&pdev->dev);
-- 
cgit v1.2.3-70-g09d2


From 872e330e38806d835bd6c311c93ab998e2fb9058 Mon Sep 17 00:00:00 2001
From: Stefan Richter <stefanr@s5r6.in-berlin.de>
Date: Thu, 29 Jul 2010 18:19:22 +0200
Subject: firewire: add isochronous multichannel reception

This adds the DMA context programming and userspace ABI for multichannel
reception, i.e. for listening on multiple channel numbers by means of a
single DMA context.

The use case is reception of more streams than there are IR DMA units
offered by the link layer.  This is already implemented by the older
ohci1394 + ieee1394 + raw1394 stack.  And as discussed recently on
linux1394-devel, this feature is occasionally used in practice.

The big drawbacks of this mode are that buffer layout and interrupt
generation necessarily differ from single-channel reception:  Headers
and trailers are not stripped from packets, packets are not aligned with
buffer chunks, interrupts are per buffer chunk, not per packet.

These drawbacks also cause a rather hefty code footprint to support this
rarely used OHCI-1394 feature.  (367 lines added, among them 94 lines of
added userspace ABI documentation.)

This implementation enforces that a multichannel reception context may
only listen to channels to which no single-channel context on the same
link layer is presently listening to.  OHCI-1394 would allow to overlay
single-channel contexts by the multi-channel context, but this would be
a departure from the present first-come-first-served policy of IR
context creation.

The implementation is heavily based on an earlier one by Jay Fenlason.
Thanks Jay.

Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
---
 drivers/firewire/core-cdev.c  |  93 ++++++++++---
 drivers/firewire/core-iso.c   |  32 ++++-
 drivers/firewire/core.h       |   2 +
 drivers/firewire/ohci.c       | 316 +++++++++++++++++++++++++++++++++---------
 include/linux/firewire-cdev.h | 281 +++++++++++++++++++++++++------------
 include/linux/firewire.h      |  29 ++--
 6 files changed, 560 insertions(+), 193 deletions(-)

(limited to 'include')

diff --git a/drivers/firewire/core-cdev.c b/drivers/firewire/core-cdev.c
index cf989e1635e..ba23646bb10 100644
--- a/drivers/firewire/core-cdev.c
+++ b/drivers/firewire/core-cdev.c
@@ -193,6 +193,11 @@ struct iso_interrupt_event {
 	struct fw_cdev_event_iso_interrupt interrupt;
 };
 
+struct iso_interrupt_mc_event {
+	struct event event;
+	struct fw_cdev_event_iso_interrupt_mc interrupt;
+};
+
 struct iso_resource_event {
 	struct event event;
 	struct fw_cdev_event_iso_resource iso_resource;
@@ -415,6 +420,7 @@ union ioctl_arg {
 	struct fw_cdev_get_cycle_timer2		get_cycle_timer2;
 	struct fw_cdev_send_phy_packet		send_phy_packet;
 	struct fw_cdev_receive_phy_packets	receive_phy_packets;
+	struct fw_cdev_set_iso_channels		set_iso_channels;
 };
 
 static int ioctl_get_info(struct client *client, union ioctl_arg *arg)
@@ -932,26 +938,54 @@ static void iso_callback(struct fw_iso_context *context, u32 cycle,
 		    sizeof(e->interrupt) + header_length, NULL, 0);
 }
 
+static void iso_mc_callback(struct fw_iso_context *context,
+			    dma_addr_t completed, void *data)
+{
+	struct client *client = data;
+	struct iso_interrupt_mc_event *e;
+
+	e = kmalloc(sizeof(*e), GFP_ATOMIC);
+	if (e == NULL) {
+		fw_notify("Out of memory when allocating event\n");
+		return;
+	}
+	e->interrupt.type      = FW_CDEV_EVENT_ISO_INTERRUPT_MULTICHANNEL;
+	e->interrupt.closure   = client->iso_closure;
+	e->interrupt.completed = fw_iso_buffer_lookup(&client->buffer,
+						      completed);
+	queue_event(client, &e->event, &e->interrupt,
+		    sizeof(e->interrupt), NULL, 0);
+}
+
 static int ioctl_create_iso_context(struct client *client, union ioctl_arg *arg)
 {
 	struct fw_cdev_create_iso_context *a = &arg->create_iso_context;
 	struct fw_iso_context *context;
+	fw_iso_callback_t cb;
 
 	BUILD_BUG_ON(FW_CDEV_ISO_CONTEXT_TRANSMIT != FW_ISO_CONTEXT_TRANSMIT ||
-		     FW_CDEV_ISO_CONTEXT_RECEIVE  != FW_ISO_CONTEXT_RECEIVE);
-
-	if (a->channel > 63)
-		return -EINVAL;
+		     FW_CDEV_ISO_CONTEXT_RECEIVE  != FW_ISO_CONTEXT_RECEIVE  ||
+		     FW_CDEV_ISO_CONTEXT_RECEIVE_MULTICHANNEL !=
+					FW_ISO_CONTEXT_RECEIVE_MULTICHANNEL);
 
 	switch (a->type) {
-	case FW_ISO_CONTEXT_RECEIVE:
-		if (a->header_size < 4 || (a->header_size & 3))
+	case FW_ISO_CONTEXT_TRANSMIT:
+		if (a->speed > SCODE_3200 || a->channel > 63)
 			return -EINVAL;
+
+		cb = iso_callback;
 		break;
 
-	case FW_ISO_CONTEXT_TRANSMIT:
-		if (a->speed > SCODE_3200)
+	case FW_ISO_CONTEXT_RECEIVE:
+		if (a->header_size < 4 || (a->header_size & 3) ||
+		    a->channel > 63)
 			return -EINVAL;
+
+		cb = iso_callback;
+		break;
+
+	case FW_ISO_CONTEXT_RECEIVE_MULTICHANNEL:
+		cb = (fw_iso_callback_t)iso_mc_callback;
 		break;
 
 	default:
@@ -959,8 +993,7 @@ static int ioctl_create_iso_context(struct client *client, union ioctl_arg *arg)
 	}
 
 	context = fw_iso_context_create(client->device->card, a->type,
-					a->channel, a->speed, a->header_size,
-					iso_callback, client);
+			a->channel, a->speed, a->header_size, cb, client);
 	if (IS_ERR(context))
 		return PTR_ERR(context);
 
@@ -980,6 +1013,17 @@ static int ioctl_create_iso_context(struct client *client, union ioctl_arg *arg)
 	return 0;
 }
 
+static int ioctl_set_iso_channels(struct client *client, union ioctl_arg *arg)
+{
+	struct fw_cdev_set_iso_channels *a = &arg->set_iso_channels;
+	struct fw_iso_context *ctx = client->iso_context;
+
+	if (ctx == NULL || a->handle != 0)
+		return -EINVAL;
+
+	return fw_iso_context_set_channels(ctx, &a->channels);
+}
+
 /* Macros for decoding the iso packet control header. */
 #define GET_PAYLOAD_LENGTH(v)	((v) & 0xffff)
 #define GET_INTERRUPT(v)	(((v) >> 16) & 0x01)
@@ -993,7 +1037,7 @@ static int ioctl_queue_iso(struct client *client, union ioctl_arg *arg)
 	struct fw_cdev_queue_iso *a = &arg->queue_iso;
 	struct fw_cdev_iso_packet __user *p, *end, *next;
 	struct fw_iso_context *ctx = client->iso_context;
-	unsigned long payload, buffer_end, transmit_header_bytes;
+	unsigned long payload, buffer_end, transmit_header_bytes = 0;
 	u32 control;
 	int count;
 	struct {
@@ -1013,7 +1057,6 @@ static int ioctl_queue_iso(struct client *client, union ioctl_arg *arg)
 	 * use the indirect payload, the iso buffer need not be mapped
 	 * and the a->data pointer is ignored.
 	 */
-
 	payload = (unsigned long)a->data - client->vm_start;
 	buffer_end = client->buffer.page_count << PAGE_SHIFT;
 	if (a->data == 0 || client->buffer.pages == NULL ||
@@ -1022,8 +1065,10 @@ static int ioctl_queue_iso(struct client *client, union ioctl_arg *arg)
 		buffer_end = 0;
 	}
 
-	p = (struct fw_cdev_iso_packet __user *)u64_to_uptr(a->packets);
+	if (ctx->type == FW_ISO_CONTEXT_RECEIVE_MULTICHANNEL && payload & 3)
+		return -EINVAL;
 
+	p = (struct fw_cdev_iso_packet __user *)u64_to_uptr(a->packets);
 	if (!access_ok(VERIFY_READ, p, a->size))
 		return -EFAULT;
 
@@ -1039,19 +1084,24 @@ static int ioctl_queue_iso(struct client *client, union ioctl_arg *arg)
 		u.packet.sy = GET_SY(control);
 		u.packet.header_length = GET_HEADER_LENGTH(control);
 
-		if (ctx->type == FW_ISO_CONTEXT_TRANSMIT) {
-			if (u.packet.header_length % 4 != 0)
+		switch (ctx->type) {
+		case FW_ISO_CONTEXT_TRANSMIT:
+			if (u.packet.header_length & 3)
 				return -EINVAL;
 			transmit_header_bytes = u.packet.header_length;
-		} else {
-			/*
-			 * We require that header_length is a multiple of
-			 * the fixed header size, ctx->header_size.
-			 */
+			break;
+
+		case FW_ISO_CONTEXT_RECEIVE:
 			if (u.packet.header_length == 0 ||
 			    u.packet.header_length % ctx->header_size != 0)
 				return -EINVAL;
-			transmit_header_bytes = 0;
+			break;
+
+		case FW_ISO_CONTEXT_RECEIVE_MULTICHANNEL:
+			if (u.packet.payload_length == 0 ||
+			    u.packet.payload_length & 3)
+				return -EINVAL;
+			break;
 		}
 
 		next = (struct fw_cdev_iso_packet __user *)
@@ -1534,6 +1584,7 @@ static int (* const ioctl_handlers[])(struct client *, union ioctl_arg *) = {
 	[0x14] = ioctl_get_cycle_timer2,
 	[0x15] = ioctl_send_phy_packet,
 	[0x16] = ioctl_receive_phy_packets,
+	[0x17] = ioctl_set_iso_channels,
 };
 
 static int dispatch_ioctl(struct client *client,
diff --git a/drivers/firewire/core-iso.c b/drivers/firewire/core-iso.c
index 4fe932e60fb..0c8e662a5da 100644
--- a/drivers/firewire/core-iso.c
+++ b/drivers/firewire/core-iso.c
@@ -117,6 +117,23 @@ void fw_iso_buffer_destroy(struct fw_iso_buffer *buffer,
 }
 EXPORT_SYMBOL(fw_iso_buffer_destroy);
 
+/* Convert DMA address to offset into virtually contiguous buffer. */
+size_t fw_iso_buffer_lookup(struct fw_iso_buffer *buffer, dma_addr_t completed)
+{
+	int i;
+	dma_addr_t address;
+	ssize_t offset;
+
+	for (i = 0; i < buffer->page_count; i++) {
+		address = page_private(buffer->pages[i]);
+		offset = (ssize_t)completed - (ssize_t)address;
+		if (offset > 0 && offset <= PAGE_SIZE)
+			return (i << PAGE_SHIFT) + offset;
+	}
+
+	return 0;
+}
+
 struct fw_iso_context *fw_iso_context_create(struct fw_card *card,
 		int type, int channel, int speed, size_t header_size,
 		fw_iso_callback_t callback, void *callback_data)
@@ -133,7 +150,7 @@ struct fw_iso_context *fw_iso_context_create(struct fw_card *card,
 	ctx->channel = channel;
 	ctx->speed = speed;
 	ctx->header_size = header_size;
-	ctx->callback = callback;
+	ctx->callback.sc = callback;
 	ctx->callback_data = callback_data;
 
 	return ctx;
@@ -142,9 +159,7 @@ EXPORT_SYMBOL(fw_iso_context_create);
 
 void fw_iso_context_destroy(struct fw_iso_context *ctx)
 {
-	struct fw_card *card = ctx->card;
-
-	card->driver->free_iso_context(ctx);
+	ctx->card->driver->free_iso_context(ctx);
 }
 EXPORT_SYMBOL(fw_iso_context_destroy);
 
@@ -155,14 +170,17 @@ int fw_iso_context_start(struct fw_iso_context *ctx,
 }
 EXPORT_SYMBOL(fw_iso_context_start);
 
+int fw_iso_context_set_channels(struct fw_iso_context *ctx, u64 *channels)
+{
+	return ctx->card->driver->set_iso_channels(ctx, channels);
+}
+
 int fw_iso_context_queue(struct fw_iso_context *ctx,
 			 struct fw_iso_packet *packet,
 			 struct fw_iso_buffer *buffer,
 			 unsigned long payload)
 {
-	struct fw_card *card = ctx->card;
-
-	return card->driver->queue_iso(ctx, packet, buffer, payload);
+	return ctx->card->driver->queue_iso(ctx, packet, buffer, payload);
 }
 EXPORT_SYMBOL(fw_iso_context_queue);
 
diff --git a/drivers/firewire/core.h b/drivers/firewire/core.h
index 28621e44b11..e6239f971be 100644
--- a/drivers/firewire/core.h
+++ b/drivers/firewire/core.h
@@ -90,6 +90,8 @@ struct fw_card_driver {
 	int (*start_iso)(struct fw_iso_context *ctx,
 			 s32 cycle, u32 sync, u32 tags);
 
+	int (*set_iso_channels)(struct fw_iso_context *ctx, u64 *channels);
+
 	int (*queue_iso)(struct fw_iso_context *ctx,
 			 struct fw_iso_packet *packet,
 			 struct fw_iso_buffer *buffer,
diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c
index 2e4b425847a..4bda1c1b74b 100644
--- a/drivers/firewire/ohci.c
+++ b/drivers/firewire/ohci.c
@@ -190,11 +190,13 @@ struct fw_ohci {
 	struct context at_request_ctx;
 	struct context at_response_ctx;
 
-	u32 it_context_mask;
+	u32 it_context_mask;     /* unoccupied IT contexts */
 	struct iso_context *it_context_list;
-	u64 ir_context_channels;
-	u32 ir_context_mask;
+	u64 ir_context_channels; /* unoccupied channels */
+	u32 ir_context_mask;     /* unoccupied IR contexts */
 	struct iso_context *ir_context_list;
+	u64 mc_channels; /* channels in use by the multichannel IR context */
+	bool mc_allocated;
 
 	__be32    *config_rom;
 	dma_addr_t config_rom_bus;
@@ -2197,10 +2199,9 @@ static int handle_ir_packet_per_buffer(struct context *context,
 	__le32 *ir_header;
 	void *p;
 
-	for (pd = d; pd <= last; pd++) {
+	for (pd = d; pd <= last; pd++)
 		if (pd->transfer_status)
 			break;
-	}
 	if (pd > last)
 		/* Descriptor(s) not done yet, stop iteration */
 		return 0;
@@ -2210,16 +2211,38 @@ static int handle_ir_packet_per_buffer(struct context *context,
 
 	if (le16_to_cpu(last->control) & DESCRIPTOR_IRQ_ALWAYS) {
 		ir_header = (__le32 *) p;
-		ctx->base.callback(&ctx->base,
-				   le32_to_cpu(ir_header[0]) & 0xffff,
-				   ctx->header_length, ctx->header,
-				   ctx->base.callback_data);
+		ctx->base.callback.sc(&ctx->base,
+				      le32_to_cpu(ir_header[0]) & 0xffff,
+				      ctx->header_length, ctx->header,
+				      ctx->base.callback_data);
 		ctx->header_length = 0;
 	}
 
 	return 1;
 }
 
+/* d == last because each descriptor block is only a single descriptor. */
+static int handle_ir_buffer_fill(struct context *context,
+				 struct descriptor *d,
+				 struct descriptor *last)
+{
+	struct iso_context *ctx =
+		container_of(context, struct iso_context, context);
+
+	if (!last->transfer_status)
+		/* Descriptor(s) not done yet, stop iteration */
+		return 0;
+
+	if (le16_to_cpu(last->control) & DESCRIPTOR_IRQ_ALWAYS)
+		ctx->base.callback.mc(&ctx->base,
+				      le32_to_cpu(last->data_address) +
+				      le16_to_cpu(last->req_count) -
+				      le16_to_cpu(last->res_count),
+				      ctx->base.callback_data);
+
+	return 1;
+}
+
 static int handle_it_packet(struct context *context,
 			    struct descriptor *d,
 			    struct descriptor *last)
@@ -2245,72 +2268,118 @@ static int handle_it_packet(struct context *context,
 		ctx->header_length += 4;
 	}
 	if (le16_to_cpu(last->control) & DESCRIPTOR_IRQ_ALWAYS) {
-		ctx->base.callback(&ctx->base, le16_to_cpu(last->res_count),
-				   ctx->header_length, ctx->header,
-				   ctx->base.callback_data);
+		ctx->base.callback.sc(&ctx->base, le16_to_cpu(last->res_count),
+				      ctx->header_length, ctx->header,
+				      ctx->base.callback_data);
 		ctx->header_length = 0;
 	}
 	return 1;
 }
 
+static void set_multichannel_mask(struct fw_ohci *ohci, u64 channels)
+{
+	u32 hi = channels >> 32, lo = channels;
+
+	reg_write(ohci, OHCI1394_IRMultiChanMaskHiClear, ~hi);
+	reg_write(ohci, OHCI1394_IRMultiChanMaskLoClear, ~lo);
+	reg_write(ohci, OHCI1394_IRMultiChanMaskHiSet, hi);
+	reg_write(ohci, OHCI1394_IRMultiChanMaskLoSet, lo);
+	mmiowb();
+	ohci->mc_channels = channels;
+}
+
 static struct fw_iso_context *ohci_allocate_iso_context(struct fw_card *card,
 				int type, int channel, size_t header_size)
 {
 	struct fw_ohci *ohci = fw_ohci(card);
-	struct iso_context *ctx, *list;
-	descriptor_callback_t callback;
-	u64 *channels, dont_care = ~0ULL;
-	u32 *mask, regs;
+	struct iso_context *uninitialized_var(ctx);
+	descriptor_callback_t uninitialized_var(callback);
+	u64 *uninitialized_var(channels);
+	u32 *uninitialized_var(mask), uninitialized_var(regs);
 	unsigned long flags;
-	int index, ret = -ENOMEM;
+	int index, ret = -EBUSY;
 
-	if (type == FW_ISO_CONTEXT_TRANSMIT) {
-		channels = &dont_care;
-		mask = &ohci->it_context_mask;
-		list = ohci->it_context_list;
+	spin_lock_irqsave(&ohci->lock, flags);
+
+	switch (type) {
+	case FW_ISO_CONTEXT_TRANSMIT:
+		mask     = &ohci->it_context_mask;
 		callback = handle_it_packet;
-	} else {
+		index    = ffs(*mask) - 1;
+		if (index >= 0) {
+			*mask &= ~(1 << index);
+			regs = OHCI1394_IsoXmitContextBase(index);
+			ctx  = &ohci->it_context_list[index];
+		}
+		break;
+
+	case FW_ISO_CONTEXT_RECEIVE:
 		channels = &ohci->ir_context_channels;
-		mask = &ohci->ir_context_mask;
-		list = ohci->ir_context_list;
+		mask     = &ohci->ir_context_mask;
 		callback = handle_ir_packet_per_buffer;
-	}
+		index    = *channels & 1ULL << channel ? ffs(*mask) - 1 : -1;
+		if (index >= 0) {
+			*channels &= ~(1ULL << channel);
+			*mask     &= ~(1 << index);
+			regs = OHCI1394_IsoRcvContextBase(index);
+			ctx  = &ohci->ir_context_list[index];
+		}
+		break;
 
-	spin_lock_irqsave(&ohci->lock, flags);
-	index = *channels & 1ULL << channel ? ffs(*mask) - 1 : -1;
-	if (index >= 0) {
-		*channels &= ~(1ULL << channel);
-		*mask &= ~(1 << index);
+	case FW_ISO_CONTEXT_RECEIVE_MULTICHANNEL:
+		mask     = &ohci->ir_context_mask;
+		callback = handle_ir_buffer_fill;
+		index    = !ohci->mc_allocated ? ffs(*mask) - 1 : -1;
+		if (index >= 0) {
+			ohci->mc_allocated = true;
+			*mask &= ~(1 << index);
+			regs = OHCI1394_IsoRcvContextBase(index);
+			ctx  = &ohci->ir_context_list[index];
+		}
+		break;
+
+	default:
+		index = -1;
+		ret = -ENOSYS;
 	}
+
 	spin_unlock_irqrestore(&ohci->lock, flags);
 
 	if (index < 0)
-		return ERR_PTR(-EBUSY);
-
-	if (type == FW_ISO_CONTEXT_TRANSMIT)
-		regs = OHCI1394_IsoXmitContextBase(index);
-	else
-		regs = OHCI1394_IsoRcvContextBase(index);
+		return ERR_PTR(ret);
 
-	ctx = &list[index];
 	memset(ctx, 0, sizeof(*ctx));
 	ctx->header_length = 0;
 	ctx->header = (void *) __get_free_page(GFP_KERNEL);
-	if (ctx->header == NULL)
+	if (ctx->header == NULL) {
+		ret = -ENOMEM;
 		goto out;
-
+	}
 	ret = context_init(&ctx->context, ohci, regs, callback);
 	if (ret < 0)
 		goto out_with_header;
 
+	if (type == FW_ISO_CONTEXT_RECEIVE_MULTICHANNEL)
+		set_multichannel_mask(ohci, 0);
+
 	return &ctx->base;
 
  out_with_header:
 	free_page((unsigned long)ctx->header);
  out:
 	spin_lock_irqsave(&ohci->lock, flags);
-	*channels |= 1ULL << channel;
+
+	switch (type) {
+	case FW_ISO_CONTEXT_RECEIVE:
+		*channels |= 1ULL << channel;
+		break;
+
+	case FW_ISO_CONTEXT_RECEIVE_MULTICHANNEL:
+		ohci->mc_allocated = false;
+		break;
+	}
 	*mask |= 1 << index;
+
 	spin_unlock_irqrestore(&ohci->lock, flags);
 
 	return ERR_PTR(ret);
@@ -2321,10 +2390,11 @@ static int ohci_start_iso(struct fw_iso_context *base,
 {
 	struct iso_context *ctx = container_of(base, struct iso_context, base);
 	struct fw_ohci *ohci = ctx->context.ohci;
-	u32 control, match;
+	u32 control = IR_CONTEXT_ISOCH_HEADER, match;
 	int index;
 
-	if (ctx->base.type == FW_ISO_CONTEXT_TRANSMIT) {
+	switch (ctx->base.type) {
+	case FW_ISO_CONTEXT_TRANSMIT:
 		index = ctx - ohci->it_context_list;
 		match = 0;
 		if (cycle >= 0)
@@ -2334,9 +2404,13 @@ static int ohci_start_iso(struct fw_iso_context *base,
 		reg_write(ohci, OHCI1394_IsoXmitIntEventClear, 1 << index);
 		reg_write(ohci, OHCI1394_IsoXmitIntMaskSet, 1 << index);
 		context_run(&ctx->context, match);
-	} else {
+		break;
+
+	case FW_ISO_CONTEXT_RECEIVE_MULTICHANNEL:
+		control |= IR_CONTEXT_BUFFER_FILL|IR_CONTEXT_MULTI_CHANNEL_MODE;
+		/* fall through */
+	case FW_ISO_CONTEXT_RECEIVE:
 		index = ctx - ohci->ir_context_list;
-		control = IR_CONTEXT_ISOCH_HEADER;
 		match = (tags << 28) | (sync << 8) | ctx->base.channel;
 		if (cycle >= 0) {
 			match |= (cycle & 0x07fff) << 12;
@@ -2347,6 +2421,7 @@ static int ohci_start_iso(struct fw_iso_context *base,
 		reg_write(ohci, OHCI1394_IsoRecvIntMaskSet, 1 << index);
 		reg_write(ohci, CONTEXT_MATCH(ctx->context.regs), match);
 		context_run(&ctx->context, control);
+		break;
 	}
 
 	return 0;
@@ -2358,12 +2433,17 @@ static int ohci_stop_iso(struct fw_iso_context *base)
 	struct iso_context *ctx = container_of(base, struct iso_context, base);
 	int index;
 
-	if (ctx->base.type == FW_ISO_CONTEXT_TRANSMIT) {
+	switch (ctx->base.type) {
+	case FW_ISO_CONTEXT_TRANSMIT:
 		index = ctx - ohci->it_context_list;
 		reg_write(ohci, OHCI1394_IsoXmitIntMaskClear, 1 << index);
-	} else {
+		break;
+
+	case FW_ISO_CONTEXT_RECEIVE:
+	case FW_ISO_CONTEXT_RECEIVE_MULTICHANNEL:
 		index = ctx - ohci->ir_context_list;
 		reg_write(ohci, OHCI1394_IsoRecvIntMaskClear, 1 << index);
+		break;
 	}
 	flush_writes(ohci);
 	context_stop(&ctx->context);
@@ -2384,24 +2464,65 @@ static void ohci_free_iso_context(struct fw_iso_context *base)
 
 	spin_lock_irqsave(&ohci->lock, flags);
 
-	if (ctx->base.type == FW_ISO_CONTEXT_TRANSMIT) {
+	switch (base->type) {
+	case FW_ISO_CONTEXT_TRANSMIT:
 		index = ctx - ohci->it_context_list;
 		ohci->it_context_mask |= 1 << index;
-	} else {
+		break;
+
+	case FW_ISO_CONTEXT_RECEIVE:
 		index = ctx - ohci->ir_context_list;
 		ohci->ir_context_mask |= 1 << index;
 		ohci->ir_context_channels |= 1ULL << base->channel;
+		break;
+
+	case FW_ISO_CONTEXT_RECEIVE_MULTICHANNEL:
+		index = ctx - ohci->ir_context_list;
+		ohci->ir_context_mask |= 1 << index;
+		ohci->ir_context_channels |= ohci->mc_channels;
+		ohci->mc_channels = 0;
+		ohci->mc_allocated = false;
+		break;
 	}
 
 	spin_unlock_irqrestore(&ohci->lock, flags);
 }
 
-static int ohci_queue_iso_transmit(struct fw_iso_context *base,
-				   struct fw_iso_packet *packet,
-				   struct fw_iso_buffer *buffer,
-				   unsigned long payload)
+static int ohci_set_iso_channels(struct fw_iso_context *base, u64 *channels)
+{
+	struct fw_ohci *ohci = fw_ohci(base->card);
+	unsigned long flags;
+	int ret;
+
+	switch (base->type) {
+	case FW_ISO_CONTEXT_RECEIVE_MULTICHANNEL:
+
+		spin_lock_irqsave(&ohci->lock, flags);
+
+		/* Don't allow multichannel to grab other contexts' channels. */
+		if (~ohci->ir_context_channels & ~ohci->mc_channels & *channels) {
+			*channels = ohci->ir_context_channels;
+			ret = -EBUSY;
+		} else {
+			set_multichannel_mask(ohci, *channels);
+			ret = 0;
+		}
+
+		spin_unlock_irqrestore(&ohci->lock, flags);
+
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
+
+static int queue_iso_transmit(struct iso_context *ctx,
+			      struct fw_iso_packet *packet,
+			      struct fw_iso_buffer *buffer,
+			      unsigned long payload)
 {
-	struct iso_context *ctx = container_of(base, struct iso_context, base);
 	struct descriptor *d, *last, *pd;
 	struct fw_iso_packet *p;
 	__le32 *header;
@@ -2497,14 +2618,12 @@ static int ohci_queue_iso_transmit(struct fw_iso_context *base,
 	return 0;
 }
 
-static int ohci_queue_iso_receive_packet_per_buffer(struct fw_iso_context *base,
-					struct fw_iso_packet *packet,
-					struct fw_iso_buffer *buffer,
-					unsigned long payload)
+static int queue_iso_packet_per_buffer(struct iso_context *ctx,
+				       struct fw_iso_packet *packet,
+				       struct fw_iso_buffer *buffer,
+				       unsigned long payload)
 {
-	struct iso_context *ctx = container_of(base, struct iso_context, base);
 	struct descriptor *d, *pd;
-	struct fw_iso_packet *p = packet;
 	dma_addr_t d_bus, page_bus;
 	u32 z, header_z, rest;
 	int i, j, length;
@@ -2514,14 +2633,14 @@ static int ohci_queue_iso_receive_packet_per_buffer(struct fw_iso_context *base,
 	 * The OHCI controller puts the isochronous header and trailer in the
 	 * buffer, so we need at least 8 bytes.
 	 */
-	packet_count = p->header_length / ctx->base.header_size;
+	packet_count = packet->header_length / ctx->base.header_size;
 	header_size  = max(ctx->base.header_size, (size_t)8);
 
 	/* Get header size in number of descriptors. */
 	header_z = DIV_ROUND_UP(header_size, sizeof(*d));
 	page     = payload >> PAGE_SHIFT;
 	offset   = payload & ~PAGE_MASK;
-	payload_per_buffer = p->payload_length / packet_count;
+	payload_per_buffer = packet->payload_length / packet_count;
 
 	for (i = 0; i < packet_count; i++) {
 		/* d points to the header descriptor */
@@ -2533,7 +2652,7 @@ static int ohci_queue_iso_receive_packet_per_buffer(struct fw_iso_context *base,
 
 		d->control      = cpu_to_le16(DESCRIPTOR_STATUS |
 					      DESCRIPTOR_INPUT_MORE);
-		if (p->skip && i == 0)
+		if (packet->skip && i == 0)
 			d->control |= cpu_to_le16(DESCRIPTOR_WAIT);
 		d->req_count    = cpu_to_le16(header_size);
 		d->res_count    = d->req_count;
@@ -2566,7 +2685,7 @@ static int ohci_queue_iso_receive_packet_per_buffer(struct fw_iso_context *base,
 		pd->control = cpu_to_le16(DESCRIPTOR_STATUS |
 					  DESCRIPTOR_INPUT_LAST |
 					  DESCRIPTOR_BRANCH_ALWAYS);
-		if (p->interrupt && i == packet_count - 1)
+		if (packet->interrupt && i == packet_count - 1)
 			pd->control |= cpu_to_le16(DESCRIPTOR_IRQ_ALWAYS);
 
 		context_append(&ctx->context, d, z, header_z);
@@ -2575,6 +2694,58 @@ static int ohci_queue_iso_receive_packet_per_buffer(struct fw_iso_context *base,
 	return 0;
 }
 
+static int queue_iso_buffer_fill(struct iso_context *ctx,
+				 struct fw_iso_packet *packet,
+				 struct fw_iso_buffer *buffer,
+				 unsigned long payload)
+{
+	struct descriptor *d;
+	dma_addr_t d_bus, page_bus;
+	int page, offset, rest, z, i, length;
+
+	page   = payload >> PAGE_SHIFT;
+	offset = payload & ~PAGE_MASK;
+	rest   = packet->payload_length;
+
+	/* We need one descriptor for each page in the buffer. */
+	z = DIV_ROUND_UP(offset + rest, PAGE_SIZE);
+
+	if (WARN_ON(offset & 3 || rest & 3 || page + z > buffer->page_count))
+		return -EFAULT;
+
+	for (i = 0; i < z; i++) {
+		d = context_get_descriptors(&ctx->context, 1, &d_bus);
+		if (d == NULL)
+			return -ENOMEM;
+
+		d->control = cpu_to_le16(DESCRIPTOR_INPUT_MORE |
+					 DESCRIPTOR_BRANCH_ALWAYS);
+		if (packet->skip && i == 0)
+			d->control |= cpu_to_le16(DESCRIPTOR_WAIT);
+		if (packet->interrupt && i == z - 1)
+			d->control |= cpu_to_le16(DESCRIPTOR_IRQ_ALWAYS);
+
+		if (offset + rest < PAGE_SIZE)
+			length = rest;
+		else
+			length = PAGE_SIZE - offset;
+		d->req_count = cpu_to_le16(length);
+		d->res_count = d->req_count;
+		d->transfer_status = 0;
+
+		page_bus = page_private(buffer->pages[page]);
+		d->data_address = cpu_to_le32(page_bus + offset);
+
+		rest -= length;
+		offset = 0;
+		page++;
+
+		context_append(&ctx->context, d, 1, 0);
+	}
+
+	return 0;
+}
+
 static int ohci_queue_iso(struct fw_iso_context *base,
 			  struct fw_iso_packet *packet,
 			  struct fw_iso_buffer *buffer,
@@ -2582,14 +2753,20 @@ static int ohci_queue_iso(struct fw_iso_context *base,
 {
 	struct iso_context *ctx = container_of(base, struct iso_context, base);
 	unsigned long flags;
-	int ret;
+	int ret = -ENOSYS;
 
 	spin_lock_irqsave(&ctx->context.ohci->lock, flags);
-	if (base->type == FW_ISO_CONTEXT_TRANSMIT)
-		ret = ohci_queue_iso_transmit(base, packet, buffer, payload);
-	else
-		ret = ohci_queue_iso_receive_packet_per_buffer(base, packet,
-							buffer, payload);
+	switch (base->type) {
+	case FW_ISO_CONTEXT_TRANSMIT:
+		ret = queue_iso_transmit(ctx, packet, buffer, payload);
+		break;
+	case FW_ISO_CONTEXT_RECEIVE:
+		ret = queue_iso_packet_per_buffer(ctx, packet, buffer, payload);
+		break;
+	case FW_ISO_CONTEXT_RECEIVE_MULTICHANNEL:
+		ret = queue_iso_buffer_fill(ctx, packet, buffer, payload);
+		break;
+	}
 	spin_unlock_irqrestore(&ctx->context.ohci->lock, flags);
 
 	return ret;
@@ -2609,6 +2786,7 @@ static const struct fw_card_driver ohci_driver = {
 
 	.allocate_iso_context	= ohci_allocate_iso_context,
 	.free_iso_context	= ohci_free_iso_context,
+	.set_iso_channels	= ohci_set_iso_channels,
 	.queue_iso		= ohci_queue_iso,
 	.start_iso		= ohci_start_iso,
 	.stop_iso		= ohci_stop_iso,
diff --git a/include/linux/firewire-cdev.h b/include/linux/firewire-cdev.h
index 14831119ff7..bc5c26fc1c6 100644
--- a/include/linux/firewire-cdev.h
+++ b/include/linux/firewire-cdev.h
@@ -25,17 +25,18 @@
 #include <linux/types.h>
 #include <linux/firewire-constants.h>
 
-#define FW_CDEV_EVENT_BUS_RESET			0x00
-#define FW_CDEV_EVENT_RESPONSE			0x01
-#define FW_CDEV_EVENT_REQUEST			0x02
-#define FW_CDEV_EVENT_ISO_INTERRUPT		0x03
-#define FW_CDEV_EVENT_ISO_RESOURCE_ALLOCATED	0x04
-#define FW_CDEV_EVENT_ISO_RESOURCE_DEALLOCATED	0x05
+#define FW_CDEV_EVENT_BUS_RESET				0x00
+#define FW_CDEV_EVENT_RESPONSE				0x01
+#define FW_CDEV_EVENT_REQUEST				0x02
+#define FW_CDEV_EVENT_ISO_INTERRUPT			0x03
+#define FW_CDEV_EVENT_ISO_RESOURCE_ALLOCATED		0x04
+#define FW_CDEV_EVENT_ISO_RESOURCE_DEALLOCATED		0x05
 
 /* available since kernel version 2.6.36 */
-#define FW_CDEV_EVENT_REQUEST2			0x06
-#define FW_CDEV_EVENT_PHY_PACKET_SENT		0x07
-#define FW_CDEV_EVENT_PHY_PACKET_RECEIVED	0x08
+#define FW_CDEV_EVENT_REQUEST2				0x06
+#define FW_CDEV_EVENT_PHY_PACKET_SENT			0x07
+#define FW_CDEV_EVENT_PHY_PACKET_RECEIVED		0x08
+#define FW_CDEV_EVENT_ISO_INTERRUPT_MULTICHANNEL	0x09
 
 /**
  * struct fw_cdev_event_common - Common part of all fw_cdev_event_ types
@@ -218,35 +219,41 @@ struct fw_cdev_event_request2 {
  * This event is sent when the controller has completed an &fw_cdev_iso_packet
  * with the %FW_CDEV_ISO_INTERRUPT bit set.
  *
- * Isochronous transmit events:
+ * Isochronous transmit events (context type %FW_CDEV_ISO_CONTEXT_TRANSMIT):
  *
- * In version 1 of the ABI, &header_length is 0.  In version 3 and some
- * implementations of version 2 of the ABI, &header_length is a multiple of 4
- * and &header contains timestamps of all packets up until the interrupt packet.
- * The format of the timestamps is as described below for isochronous reception.
+ * In version 3 and some implementations of version 2 of the ABI, &header_length
+ * is a multiple of 4 and &header contains timestamps of all packets up until
+ * the interrupt packet.  The format of the timestamps is as described below for
+ * isochronous reception.  In version 1 of the ABI, &header_length was 0.
  *
- * Isochronous receive events:
+ * Isochronous receive events (context type %FW_CDEV_ISO_CONTEXT_RECEIVE):
  *
  * The headers stripped of all packets up until and including the interrupt
  * packet are returned in the @header field.  The amount of header data per
  * packet is as specified at iso context creation by
  * &fw_cdev_create_iso_context.header_size.
  *
- * In version 1 of this ABI, header data consisted of the 1394 isochronous
- * packet header, followed by quadlets from the packet payload if
- * &fw_cdev_create_iso_context.header_size > 4.
+ * Hence, _interrupt.header_length / _context.header_size is the number of
+ * packets received in this interrupt event.  The client can now iterate
+ * through the mmap()'ed DMA buffer according to this number of packets and
+ * to the buffer sizes as the client specified in &fw_cdev_queue_iso.
  *
- * In version 2 of this ABI, header data consist of the 1394 isochronous
- * packet header, followed by a timestamp quadlet if
- * &fw_cdev_create_iso_context.header_size > 4, followed by quadlets from the
- * packet payload if &fw_cdev_create_iso_context.header_size > 8.
+ * Since version 2 of this ABI, the portion for each packet in _interrupt.header
+ * consists of the 1394 isochronous packet header, followed by a timestamp
+ * quadlet if &fw_cdev_create_iso_context.header_size > 4, followed by quadlets
+ * from the packet payload if &fw_cdev_create_iso_context.header_size > 8.
  *
- * Behaviour of ver. 1 of this ABI is no longer available since ABI ver. 2.
+ * Format of 1394 iso packet header:  16 bits data_length, 2 bits tag, 6 bits
+ * channel, 4 bits tcode, 4 bits sy, in big endian byte order.
+ * data_length is the actual received size of the packet without the four
+ * 1394 iso packet header bytes.
+ *
+ * Format of timestamp:  16 bits invalid, 3 bits cycleSeconds, 13 bits
+ * cycleCount, in big endian byte order.
  *
- * Format of 1394 iso packet header: 16 bits len, 2 bits tag, 6 bits channel,
- * 4 bits tcode, 4 bits sy, in big endian byte order.  Format of timestamp:
- * 16 bits invalid, 3 bits cycleSeconds, 13 bits cycleCount, in big endian byte
- * order.
+ * In version 1 of the ABI, no timestamp quadlet was inserted; instead, payload
+ * data followed directly after the 1394 is header if header_size > 4.
+ * Behaviour of ver. 1 of this ABI is no longer available since ABI ver. 2.
  */
 struct fw_cdev_event_iso_interrupt {
 	__u64 closure;
@@ -256,6 +263,43 @@ struct fw_cdev_event_iso_interrupt {
 	__u32 header[0];
 };
 
+/**
+ * struct fw_cdev_event_iso_interrupt_mc - An iso buffer chunk was completed
+ * @closure:	See &fw_cdev_event_common;
+ *		set by %FW_CDEV_CREATE_ISO_CONTEXT ioctl
+ * @type:	%FW_CDEV_EVENT_ISO_INTERRUPT_MULTICHANNEL
+ * @completed:	Offset into the receive buffer; data before this offest is valid
+ *
+ * This event is sent in multichannel contexts (context type
+ * %FW_CDEV_ISO_CONTEXT_RECEIVE_MULTICHANNEL) for &fw_cdev_iso_packet buffer
+ * chunks that have the %FW_CDEV_ISO_INTERRUPT bit set.  Whether this happens
+ * when a packet is completed and/or when a buffer chunk is completed depends
+ * on the hardware implementation.
+ *
+ * The buffer is continuously filled with the following data, per packet:
+ *  - the 1394 iso packet header as described at &fw_cdev_event_iso_interrupt,
+ *    but in little endian byte order,
+ *  - packet payload (as many bytes as specified in the data_length field of
+ *    the 1394 iso packet header) in big endian byte order,
+ *  - 0...3 padding bytes as needed to align the following trailer quadlet,
+ *  - trailer quadlet, containing the reception timestamp as described at
+ *    &fw_cdev_event_iso_interrupt, but in little endian byte order.
+ *
+ * Hence the per-packet size is data_length (rounded up to a multiple of 4) + 8.
+ * When processing the data, stop before a packet that would cross the
+ * @completed offset.
+ *
+ * A packet near the end of a buffer chunk will typically spill over into the
+ * next queued buffer chunk.  It is the responsibility of the client to check
+ * for this condition, assemble a broken-up packet from its parts, and not to
+ * re-queue any buffer chunks in which as yet unread packet parts reside.
+ */
+struct fw_cdev_event_iso_interrupt_mc {
+	__u64 closure;
+	__u32 type;
+	__u32 completed;
+};
+
 /**
  * struct fw_cdev_event_iso_resource - Iso resources were allocated or freed
  * @closure:	See &fw_cdev_event_common;
@@ -311,16 +355,18 @@ struct fw_cdev_event_phy_packet {
 
 /**
  * union fw_cdev_event - Convenience union of fw_cdev_event_ types
- * @common:        Valid for all types
- * @bus_reset:     Valid if @common.type == %FW_CDEV_EVENT_BUS_RESET
- * @response:      Valid if @common.type == %FW_CDEV_EVENT_RESPONSE
- * @request:       Valid if @common.type == %FW_CDEV_EVENT_REQUEST
- * @request2:      Valid if @common.type == %FW_CDEV_EVENT_REQUEST2
- * @iso_interrupt: Valid if @common.type == %FW_CDEV_EVENT_ISO_INTERRUPT
- * @iso_resource:  Valid if @common.type ==
+ * @common:		Valid for all types
+ * @bus_reset:		Valid if @common.type == %FW_CDEV_EVENT_BUS_RESET
+ * @response:		Valid if @common.type == %FW_CDEV_EVENT_RESPONSE
+ * @request:		Valid if @common.type == %FW_CDEV_EVENT_REQUEST
+ * @request2:		Valid if @common.type == %FW_CDEV_EVENT_REQUEST2
+ * @iso_interrupt:	Valid if @common.type == %FW_CDEV_EVENT_ISO_INTERRUPT
+ * @iso_interrupt_mc:	Valid if @common.type ==
+ *				%FW_CDEV_EVENT_ISO_INTERRUPT_MULTICHANNEL
+ * @iso_resource:	Valid if @common.type ==
  *				%FW_CDEV_EVENT_ISO_RESOURCE_ALLOCATED or
  *				%FW_CDEV_EVENT_ISO_RESOURCE_DEALLOCATED
- * @phy_packet:    Valid if @common.type ==
+ * @phy_packet:		Valid if @common.type ==
  *				%FW_CDEV_EVENT_PHY_PACKET_SENT or
  *				%FW_CDEV_EVENT_PHY_PACKET_RECEIVED
  *
@@ -337,10 +383,11 @@ union fw_cdev_event {
 	struct fw_cdev_event_bus_reset		bus_reset;
 	struct fw_cdev_event_response		response;
 	struct fw_cdev_event_request		request;
-	struct fw_cdev_event_request2		request2;     /* added in 2.6.36 */
+	struct fw_cdev_event_request2		request2;		/* added in 2.6.36 */
 	struct fw_cdev_event_iso_interrupt	iso_interrupt;
-	struct fw_cdev_event_iso_resource	iso_resource; /* added in 2.6.30 */
-	struct fw_cdev_event_phy_packet		phy_packet;   /* added in 2.6.36 */
+	struct fw_cdev_event_iso_interrupt_mc	iso_interrupt_mc;	/* added in 2.6.36 */
+	struct fw_cdev_event_iso_resource	iso_resource;		/* added in 2.6.30 */
+	struct fw_cdev_event_phy_packet		phy_packet;		/* added in 2.6.36 */
 };
 
 /* available since kernel version 2.6.22 */
@@ -375,6 +422,7 @@ union fw_cdev_event {
 /* available since kernel version 2.6.36 */
 #define FW_CDEV_IOC_SEND_PHY_PACKET    _IOWR('#', 0x15, struct fw_cdev_send_phy_packet)
 #define FW_CDEV_IOC_RECEIVE_PHY_PACKETS _IOW('#', 0x16, struct fw_cdev_receive_phy_packets)
+#define FW_CDEV_IOC_SET_ISO_CHANNELS    _IOW('#', 0x17, struct fw_cdev_set_iso_channels)
 
 /*
  * ABI version history
@@ -391,10 +439,13 @@ union fw_cdev_event {
  *               - shared use and auto-response for FCP registers
  *  3  (2.6.34)  - made &fw_cdev_get_cycle_timer reliable
  *               - added %FW_CDEV_IOC_GET_CYCLE_TIMER2
- *  4  (2.6.36)  - added %FW_CDEV_EVENT_REQUEST2, %FW_CDEV_EVENT_PHY_PACKET_*
+ *  4  (2.6.36)  - added %FW_CDEV_EVENT_REQUEST2, %FW_CDEV_EVENT_PHY_PACKET_*,
+ *                 and &fw_cdev_allocate.region_end
  *               - implemented &fw_cdev_event_bus_reset.bm_node_id
  *               - added %FW_CDEV_IOC_SEND_PHY_PACKET, _RECEIVE_PHY_PACKETS
- *               - added &fw_cdev_allocate.region_end
+ *               - added %FW_CDEV_EVENT_ISO_INTERRUPT_MULTICHANNEL,
+ *                 %FW_CDEV_ISO_CONTEXT_RECEIVE_MULTICHANNEL, and
+ *                 %FW_CDEV_IOC_SET_ISO_CHANNELS
  */
 #define FW_CDEV_VERSION 3 /* Meaningless; don't use this macro. */
 
@@ -597,34 +648,43 @@ struct fw_cdev_remove_descriptor {
 	__u32 handle;
 };
 
-#define FW_CDEV_ISO_CONTEXT_TRANSMIT	0
-#define FW_CDEV_ISO_CONTEXT_RECEIVE	1
+#define FW_CDEV_ISO_CONTEXT_TRANSMIT			0
+#define FW_CDEV_ISO_CONTEXT_RECEIVE			1
+#define FW_CDEV_ISO_CONTEXT_RECEIVE_MULTICHANNEL	2 /* added in 2.6.36 */
 
 /**
- * struct fw_cdev_create_iso_context - Create a context for isochronous IO
- * @type:	%FW_CDEV_ISO_CONTEXT_TRANSMIT or %FW_CDEV_ISO_CONTEXT_RECEIVE
- * @header_size: Header size to strip for receive contexts
- * @channel:	Channel to bind to
- * @speed:	Speed for transmit contexts
- * @closure:	To be returned in &fw_cdev_event_iso_interrupt
+ * struct fw_cdev_create_iso_context - Create a context for isochronous I/O
+ * @type:	%FW_CDEV_ISO_CONTEXT_TRANSMIT or %FW_CDEV_ISO_CONTEXT_RECEIVE or
+ *		%FW_CDEV_ISO_CONTEXT_RECEIVE_MULTICHANNEL
+ * @header_size: Header size to strip in single-channel reception
+ * @channel:	Channel to bind to in single-channel reception or transmission
+ * @speed:	Transmission speed
+ * @closure:	To be returned in &fw_cdev_event_iso_interrupt or
+ *		&fw_cdev_event_iso_interrupt_multichannel
  * @handle:	Handle to context, written back by kernel
  *
  * Prior to sending or receiving isochronous I/O, a context must be created.
  * The context records information about the transmit or receive configuration
  * and typically maps to an underlying hardware resource.  A context is set up
  * for either sending or receiving.  It is bound to a specific isochronous
- * channel.
+ * @channel.
  *
- * If a context was successfully created, the kernel writes back a handle to the
- * context, which must be passed in for subsequent operations on that context.
+ * In case of multichannel reception, @header_size and @channel are ignored
+ * and the channels are selected by %FW_CDEV_IOC_SET_ISO_CHANNELS.
+ *
+ * For %FW_CDEV_ISO_CONTEXT_RECEIVE contexts, @header_size must be at least 4
+ * and must be a multiple of 4.  It is ignored in other context types.
  *
- * For receive contexts, @header_size must be at least 4 and must be a multiple
- * of 4.
+ * @speed is ignored in receive context types.
  *
- * Note that the effect of a @header_size > 4 depends on
- * &fw_cdev_get_info.version, as documented at &fw_cdev_event_iso_interrupt.
+ * If a context was successfully created, the kernel writes back a handle to the
+ * context, which must be passed in for subsequent operations on that context.
  *
+ * Limitations:
  * No more than one iso context can be created per fd.
+ * The total number of contexts that all userspace and kernelspace drivers can
+ * create on a card at a time is a hardware limit, typically 4 or 8 contexts per
+ * direction, and of them at most one multichannel receive context.
  */
 struct fw_cdev_create_iso_context {
 	__u32 type;
@@ -635,6 +695,22 @@ struct fw_cdev_create_iso_context {
 	__u32 handle;
 };
 
+/**
+ * struct fw_cdev_set_iso_channels - Select channels in multichannel reception
+ * @channels:	Bitmask of channels to listen to
+ * @handle:	Handle of the mutichannel receive context
+ *
+ * @channels is the bitwise or of 1ULL << n for each channel n to listen to.
+ *
+ * The ioctl fails with errno %EBUSY if there is already another receive context
+ * on a channel in @channels.  In that case, the bitmask of all unoccupied
+ * channels is returned in @channels.
+ */
+struct fw_cdev_set_iso_channels {
+	__u64 channels;
+	__u32 handle;
+};
+
 #define FW_CDEV_ISO_PAYLOAD_LENGTH(v)	(v)
 #define FW_CDEV_ISO_INTERRUPT		(1 << 16)
 #define FW_CDEV_ISO_SKIP		(1 << 17)
@@ -645,42 +721,72 @@ struct fw_cdev_create_iso_context {
 
 /**
  * struct fw_cdev_iso_packet - Isochronous packet
- * @control:	Contains the header length (8 uppermost bits), the sy field
- *		(4 bits), the tag field (2 bits), a sync flag (1 bit),
- *		a skip flag (1 bit), an interrupt flag (1 bit), and the
+ * @control:	Contains the header length (8 uppermost bits),
+ *		the sy field (4 bits), the tag field (2 bits), a sync flag
+ *		or a skip flag (1 bit), an interrupt flag (1 bit), and the
  *		payload length (16 lowermost bits)
- * @header:	Header and payload
+ * @header:	Header and payload in case of a transmit context.
  *
  * &struct fw_cdev_iso_packet is used to describe isochronous packet queues.
- *
  * Use the FW_CDEV_ISO_ macros to fill in @control.
+ * The @header array is empty in case of receive contexts.
+ *
+ * Context type %FW_CDEV_ISO_CONTEXT_TRANSMIT:
+ *
+ * @control.HEADER_LENGTH must be a multiple of 4.  It specifies the numbers of
+ * bytes in @header that will be prepended to the packet's payload.  These bytes
+ * are copied into the kernel and will not be accessed after the ioctl has
+ * returned.
+ *
+ * The @control.SY and TAG fields are copied to the iso packet header.  These
+ * fields are specified by IEEE 1394a and IEC 61883-1.
+ *
+ * The @control.SKIP flag specifies that no packet is to be sent in a frame.
+ * When using this, all other fields except @control.INTERRUPT must be zero.
+ *
+ * When a packet with the @control.INTERRUPT flag set has been completed, an
+ * &fw_cdev_event_iso_interrupt event will be sent.
+ *
+ * Context type %FW_CDEV_ISO_CONTEXT_RECEIVE:
  *
- * For transmit packets, the header length must be a multiple of 4 and specifies
- * the numbers of bytes in @header that will be prepended to the packet's
- * payload; these bytes are copied into the kernel and will not be accessed
- * after the ioctl has returned.  The sy and tag fields are copied to the iso
- * packet header (these fields are specified by IEEE 1394a and IEC 61883-1).
- * The skip flag specifies that no packet is to be sent in a frame; when using
- * this, all other fields except the interrupt flag must be zero.
- *
- * For receive packets, the header length must be a multiple of the context's
- * header size; if the header length is larger than the context's header size,
- * multiple packets are queued for this entry.  The sy and tag fields are
- * ignored.  If the sync flag is set, the context drops all packets until
- * a packet with a matching sy field is received (the sync value to wait for is
- * specified in the &fw_cdev_start_iso structure).  The payload length defines
- * how many payload bytes can be received for one packet (in addition to payload
- * quadlets that have been defined as headers and are stripped and returned in
- * the &fw_cdev_event_iso_interrupt structure).  If more bytes are received, the
- * additional bytes are dropped.  If less bytes are received, the remaining
- * bytes in this part of the payload buffer will not be written to, not even by
- * the next packet, i.e., packets received in consecutive frames will not
- * necessarily be consecutive in memory.  If an entry has queued multiple
- * packets, the payload length is divided equally among them.
- *
- * When a packet with the interrupt flag set has been completed, the
+ * @control.HEADER_LENGTH must be a multiple of the context's header_size.
+ * If the HEADER_LENGTH is larger than the context's header_size, multiple
+ * packets are queued for this entry.
+ *
+ * The @control.SY and TAG fields are ignored.
+ *
+ * If the @control.SYNC flag is set, the context drops all packets until a
+ * packet with a sy field is received which matches &fw_cdev_start_iso.sync.
+ *
+ * @control.PAYLOAD_LENGTH defines how many payload bytes can be received for
+ * one packet (in addition to payload quadlets that have been defined as headers
+ * and are stripped and returned in the &fw_cdev_event_iso_interrupt structure).
+ * If more bytes are received, the additional bytes are dropped.  If less bytes
+ * are received, the remaining bytes in this part of the payload buffer will not
+ * be written to, not even by the next packet.  I.e., packets received in
+ * consecutive frames will not necessarily be consecutive in memory.  If an
+ * entry has queued multiple packets, the PAYLOAD_LENGTH is divided equally
+ * among them.
+ *
+ * When a packet with the @control.INTERRUPT flag set has been completed, an
  * &fw_cdev_event_iso_interrupt event will be sent.  An entry that has queued
  * multiple receive packets is completed when its last packet is completed.
+ *
+ * Context type %FW_CDEV_ISO_CONTEXT_RECEIVE_MULTICHANNEL:
+ *
+ * Here, &fw_cdev_iso_packet would be more aptly named _iso_buffer_chunk since
+ * it specifies a chunk of the mmap()'ed buffer, while the number and alignment
+ * of packets to be placed into the buffer chunk is not known beforehand.
+ *
+ * @control.PAYLOAD_LENGTH is the size of the buffer chunk and specifies room
+ * for header, payload, padding, and trailer bytes of one or more packets.
+ * It must be a multiple of 4.
+ *
+ * @control.HEADER_LENGTH, TAG and SY are ignored.  SYNC is treated as described
+ * for single-channel reception.
+ *
+ * When a buffer chunk with the @control.INTERRUPT flag set has been filled
+ * entirely, an &fw_cdev_event_iso_interrupt_mc event will be sent.
  */
 struct fw_cdev_iso_packet {
 	__u32 control;
@@ -689,9 +795,9 @@ struct fw_cdev_iso_packet {
 
 /**
  * struct fw_cdev_queue_iso - Queue isochronous packets for I/O
- * @packets:	Userspace pointer to packet data
+ * @packets:	Userspace pointer to an array of &fw_cdev_iso_packet
  * @data:	Pointer into mmap()'ed payload buffer
- * @size:	Size of packet data in bytes
+ * @size:	Size of the @packets array, in bytes
  * @handle:	Isochronous context handle
  *
  * Queue a number of isochronous packets for reception or transmission.
@@ -704,6 +810,9 @@ struct fw_cdev_iso_packet {
  * The kernel may or may not queue all packets, but will write back updated
  * values of the @packets, @data and @size fields, so the ioctl can be
  * resubmitted easily.
+ *
+ * In case of a multichannel receive context, @data must be quadlet-aligned
+ * relative to the buffer start.
  */
 struct fw_cdev_queue_iso {
 	__u64 packets;
diff --git a/include/linux/firewire.h b/include/linux/firewire.h
index d974aa4a24c..1cd637ef62d 100644
--- a/include/linux/firewire.h
+++ b/include/linux/firewire.h
@@ -372,17 +372,19 @@ void fw_core_remove_descriptor(struct fw_descriptor *desc);
  * scatter-gather streaming (e.g. assembling video frame automatically).
  */
 struct fw_iso_packet {
-	u16 payload_length;	/* Length of indirect payload. */
-	u32 interrupt:1;	/* Generate interrupt on this packet */
-	u32 skip:1;		/* Set to not send packet at all. */
-	u32 tag:2;
-	u32 sy:4;
-	u32 header_length:8;	/* Length of immediate header. */
-	u32 header[0];
+	u16 payload_length;	/* Length of indirect payload		*/
+	u32 interrupt:1;	/* Generate interrupt on this packet	*/
+	u32 skip:1;		/* tx: Set to not send packet at all	*/
+				/* rx: Sync bit, wait for matching sy	*/
+	u32 tag:2;		/* tx: Tag in packet header		*/
+	u32 sy:4;		/* tx: Sy in packet header		*/
+	u32 header_length:8;	/* Length of immediate header		*/
+	u32 header[0];		/* tx: Top of 1394 isoch. data_block	*/
 };
 
-#define FW_ISO_CONTEXT_TRANSMIT	0
-#define FW_ISO_CONTEXT_RECEIVE	1
+#define FW_ISO_CONTEXT_TRANSMIT			0
+#define FW_ISO_CONTEXT_RECEIVE			1
+#define FW_ISO_CONTEXT_RECEIVE_MULTICHANNEL	2
 
 #define FW_ISO_CONTEXT_MATCH_TAG0	 1
 #define FW_ISO_CONTEXT_MATCH_TAG1	 2
@@ -406,24 +408,31 @@ struct fw_iso_buffer {
 int fw_iso_buffer_init(struct fw_iso_buffer *buffer, struct fw_card *card,
 		       int page_count, enum dma_data_direction direction);
 void fw_iso_buffer_destroy(struct fw_iso_buffer *buffer, struct fw_card *card);
+size_t fw_iso_buffer_lookup(struct fw_iso_buffer *buffer, dma_addr_t completed);
 
 struct fw_iso_context;
 typedef void (*fw_iso_callback_t)(struct fw_iso_context *context,
 				  u32 cycle, size_t header_length,
 				  void *header, void *data);
+typedef void (*fw_iso_mc_callback_t)(struct fw_iso_context *context,
+				     dma_addr_t completed, void *data);
 struct fw_iso_context {
 	struct fw_card *card;
 	int type;
 	int channel;
 	int speed;
 	size_t header_size;
-	fw_iso_callback_t callback;
+	union {
+		fw_iso_callback_t sc;
+		fw_iso_mc_callback_t mc;
+	} callback;
 	void *callback_data;
 };
 
 struct fw_iso_context *fw_iso_context_create(struct fw_card *card,
 		int type, int channel, int speed, size_t header_size,
 		fw_iso_callback_t callback, void *callback_data);
+int fw_iso_context_set_channels(struct fw_iso_context *ctx, u64 *channels);
 int fw_iso_context_queue(struct fw_iso_context *ctx,
 			 struct fw_iso_packet *packet,
 			 struct fw_iso_buffer *buffer,
-- 
cgit v1.2.3-70-g09d2


From de09a9771a5346029f4d11e4ac886be7f9bfdd75 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 29 Jul 2010 12:45:49 +0100
Subject: CRED: Fix get_task_cred() and task_state() to not resurrect dead
 credentials

It's possible for get_task_cred() as it currently stands to 'corrupt' a set of
credentials by incrementing their usage count after their replacement by the
task being accessed.

What happens is that get_task_cred() can race with commit_creds():

	TASK_1			TASK_2			RCU_CLEANER
	-->get_task_cred(TASK_2)
	rcu_read_lock()
	__cred = __task_cred(TASK_2)
				-->commit_creds()
				old_cred = TASK_2->real_cred
				TASK_2->real_cred = ...
				put_cred(old_cred)
				  call_rcu(old_cred)
		[__cred->usage == 0]
	get_cred(__cred)
		[__cred->usage == 1]
	rcu_read_unlock()
							-->put_cred_rcu()
							[__cred->usage == 1]
							panic()

However, since a tasks credentials are generally not changed very often, we can
reasonably make use of a loop involving reading the creds pointer and using
atomic_inc_not_zero() to attempt to increment it if it hasn't already hit zero.

If successful, we can safely return the credentials in the knowledge that, even
if the task we're accessing has released them, they haven't gone to the RCU
cleanup code.

We then change task_state() in procfs to use get_task_cred() rather than
calling get_cred() on the result of __task_cred(), as that suffers from the
same problem.

Without this change, a BUG_ON in __put_cred() or in put_cred_rcu() can be
tripped when it is noticed that the usage count is not zero as it ought to be,
for example:

kernel BUG at kernel/cred.c:168!
invalid opcode: 0000 [#1] SMP
last sysfs file: /sys/kernel/mm/ksm/run
CPU 0
Pid: 2436, comm: master Not tainted 2.6.33.3-85.fc13.x86_64 #1 0HR330/OptiPlex
745
RIP: 0010:[<ffffffff81069881>]  [<ffffffff81069881>] __put_cred+0xc/0x45
RSP: 0018:ffff88019e7e9eb8  EFLAGS: 00010202
RAX: 0000000000000001 RBX: ffff880161514480 RCX: 00000000ffffffff
RDX: 00000000ffffffff RSI: ffff880140c690c0 RDI: ffff880140c690c0
RBP: ffff88019e7e9eb8 R08: 00000000000000d0 R09: 0000000000000000
R10: 0000000000000001 R11: 0000000000000040 R12: ffff880140c690c0
R13: ffff88019e77aea0 R14: 00007fff336b0a5c R15: 0000000000000001
FS:  00007f12f50d97c0(0000) GS:ffff880007400000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007f8f461bc000 CR3: 00000001b26ce000 CR4: 00000000000006f0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
Process master (pid: 2436, threadinfo ffff88019e7e8000, task ffff88019e77aea0)
Stack:
 ffff88019e7e9ec8 ffffffff810698cd ffff88019e7e9ef8 ffffffff81069b45
<0> ffff880161514180 ffff880161514480 ffff880161514180 0000000000000000
<0> ffff88019e7e9f28 ffffffff8106aace 0000000000000001 0000000000000246
Call Trace:
 [<ffffffff810698cd>] put_cred+0x13/0x15
 [<ffffffff81069b45>] commit_creds+0x16b/0x175
 [<ffffffff8106aace>] set_current_groups+0x47/0x4e
 [<ffffffff8106ac89>] sys_setgroups+0xf6/0x105
 [<ffffffff81009b02>] system_call_fastpath+0x16/0x1b
Code: 48 8d 71 ff e8 7e 4e 15 00 85 c0 78 0b 8b 75 ec 48 89 df e8 ef 4a 15 00
48 83 c4 18 5b c9 c3 55 8b 07 8b 07 48 89 e5 85 c0 74 04 <0f> 0b eb fe 65 48 8b
04 25 00 cc 00 00 48 3b b8 58 04 00 00 75
RIP  [<ffffffff81069881>] __put_cred+0xc/0x45
 RSP <ffff88019e7e9eb8>
---[ end trace df391256a100ebdd ]---

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/array.c      |  2 +-
 include/linux/cred.h | 21 +--------------------
 kernel/cred.c        | 25 +++++++++++++++++++++++++
 3 files changed, 27 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/fs/proc/array.c b/fs/proc/array.c
index 9b58d38bc91..fff6572676a 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -176,7 +176,7 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
 		if (tracer)
 			tpid = task_pid_nr_ns(tracer, ns);
 	}
-	cred = get_cred((struct cred *) __task_cred(p));
+	cred = get_task_cred(p);
 	seq_printf(m,
 		"State:\t%s\n"
 		"Tgid:\t%d\n"
diff --git a/include/linux/cred.h b/include/linux/cred.h
index 75c0fa88130..ce40cbc791e 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -153,6 +153,7 @@ struct cred {
 extern void __put_cred(struct cred *);
 extern void exit_creds(struct task_struct *);
 extern int copy_creds(struct task_struct *, unsigned long);
+extern const struct cred *get_task_cred(struct task_struct *);
 extern struct cred *cred_alloc_blank(void);
 extern struct cred *prepare_creds(void);
 extern struct cred *prepare_exec_creds(void);
@@ -281,26 +282,6 @@ static inline void put_cred(const struct cred *_cred)
 #define __task_cred(task) \
 	((const struct cred *)(rcu_dereference_check((task)->real_cred, rcu_read_lock_held() || lockdep_tasklist_lock_is_held())))
 
-/**
- * get_task_cred - Get another task's objective credentials
- * @task: The task to query
- *
- * Get the objective credentials of a task, pinning them so that they can't go
- * away.  Accessing a task's credentials directly is not permitted.
- *
- * The caller must make sure task doesn't go away, either by holding a ref on
- * task or by holding tasklist_lock to prevent it from being unlinked.
- */
-#define get_task_cred(task)				\
-({							\
-	struct cred *__cred;				\
-	rcu_read_lock();				\
-	__cred = (struct cred *) __task_cred((task));	\
-	get_cred(__cred);				\
-	rcu_read_unlock();				\
-	__cred;						\
-})
-
 /**
  * get_current_cred - Get the current task's subjective credentials
  *
diff --git a/kernel/cred.c b/kernel/cred.c
index a2d5504fbcc..60bc8b1e32e 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -209,6 +209,31 @@ void exit_creds(struct task_struct *tsk)
 	}
 }
 
+/**
+ * get_task_cred - Get another task's objective credentials
+ * @task: The task to query
+ *
+ * Get the objective credentials of a task, pinning them so that they can't go
+ * away.  Accessing a task's credentials directly is not permitted.
+ *
+ * The caller must also make sure task doesn't get deleted, either by holding a
+ * ref on task or by holding tasklist_lock to prevent it from being unlinked.
+ */
+const struct cred *get_task_cred(struct task_struct *task)
+{
+	const struct cred *cred;
+
+	rcu_read_lock();
+
+	do {
+		cred = __task_cred((task));
+		BUG_ON(!cred);
+	} while (!atomic_inc_not_zero(&((struct cred *)cred)->usage));
+
+	rcu_read_unlock();
+	return cred;
+}
+
 /*
  * Allocate blank credentials, such that the credentials can be filled in at a
  * later date without risk of ENOMEM.
-- 
cgit v1.2.3-70-g09d2


From 8f92054e7ca1d3a3ae50fb42d2253ac8730d9b2a Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 29 Jul 2010 12:45:55 +0100
Subject: CRED: Fix __task_cred()'s lockdep check and banner comment

Fix __task_cred()'s lockdep check by removing the following validation
condition:

	lockdep_tasklist_lock_is_held()

as commit_creds() does not take the tasklist_lock, and nor do most of the
functions that call it, so this check is pointless and it can prevent
detection of the RCU lock not being held if the tasklist_lock is held.

Instead, add the following validation condition:

	task->exit_state >= 0

to permit the access if the target task is dead and therefore unable to change
its own credentials.

Fix __task_cred()'s comment to:

 (1) discard the bit that says that the caller must prevent the target task
     from being deleted.  That shouldn't need saying.

 (2) Add a comment indicating the result of __task_cred() should not be passed
     directly to get_cred(), but rather than get_task_cred() should be used
     instead.

Also put a note into the documentation to enforce this point there too.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/credentials.txt |  3 +++
 include/linux/cred.h          | 15 ++++++++++-----
 include/linux/sched.h         |  1 +
 3 files changed, 14 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/Documentation/credentials.txt b/Documentation/credentials.txt
index a2db3528700..995baf379c0 100644
--- a/Documentation/credentials.txt
+++ b/Documentation/credentials.txt
@@ -417,6 +417,9 @@ reference on them using:
 This does all the RCU magic inside of it.  The caller must call put_cred() on
 the credentials so obtained when they're finished with.
 
+ [*] Note: The result of __task_cred() should not be passed directly to
+     get_cred() as this may race with commit_cred().
+
 There are a couple of convenience functions to access bits of another task's
 credentials, hiding the RCU magic from the caller:
 
diff --git a/include/linux/cred.h b/include/linux/cred.h
index ce40cbc791e..4d2c39573f3 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -274,13 +274,18 @@ static inline void put_cred(const struct cred *_cred)
  * @task: The task to query
  *
  * Access the objective credentials of a task.  The caller must hold the RCU
- * readlock.
+ * readlock or the task must be dead and unable to change its own credentials.
  *
- * The caller must make sure task doesn't go away, either by holding a ref on
- * task or by holding tasklist_lock to prevent it from being unlinked.
+ * The result of this function should not be passed directly to get_cred();
+ * rather get_task_cred() should be used instead.
  */
-#define __task_cred(task) \
-	((const struct cred *)(rcu_dereference_check((task)->real_cred, rcu_read_lock_held() || lockdep_tasklist_lock_is_held())))
+#define __task_cred(task)						\
+	({								\
+		const struct task_struct *__t = (task);			\
+		rcu_dereference_check(__t->real_cred,			\
+				      rcu_read_lock_held() ||		\
+				      task_is_dead(__t));		\
+	})
 
 /**
  * get_current_cred - Get the current task's subjective credentials
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 747fcaedddb..0478888c689 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -214,6 +214,7 @@ extern char ___assert_task_state[1 - 2*!!(
 
 #define task_is_traced(task)	((task->state & __TASK_TRACED) != 0)
 #define task_is_stopped(task)	((task->state & __TASK_STOPPED) != 0)
+#define task_is_dead(task)	((task)->exit_state != 0)
 #define task_is_stopped_or_traced(task)	\
 			((task->state & (__TASK_STOPPED | __TASK_TRACED)) != 0)
 #define task_contributes_to_load(task)	\
-- 
cgit v1.2.3-70-g09d2


From c6601225380088018ae93df2ba7f0bb65334d63b Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@secretlab.ca>
Date: Fri, 23 Jul 2010 15:04:01 -0600
Subject: of/device: Make of_device_make_bus_id() usable by other code.

The AMBA bus should also use of_device_make_bus_id() when populating device
out of device tree data.  This patch makes the function non-static, and
adds a suitable prototype in of_device.h

Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 drivers/of/platform.c     | 2 +-
 include/linux/of_device.h | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/of/platform.c b/drivers/of/platform.c
index 033a224a9fd..30a4641e798 100644
--- a/drivers/of/platform.c
+++ b/drivers/of/platform.c
@@ -508,7 +508,7 @@ EXPORT_SYMBOL(of_unregister_driver);
  * value to derive a unique name.  As a last resort it will use the node
  * name followed by a unique number.
  */
-static void of_device_make_bus_id(struct device *dev)
+void of_device_make_bus_id(struct device *dev)
 {
 	static atomic_t bus_no_reg_magic;
 	struct device_node *node = dev->of_node;
diff --git a/include/linux/of_device.h b/include/linux/of_device.h
index e11a0be7893..35aa44ad9f2 100644
--- a/include/linux/of_device.h
+++ b/include/linux/of_device.h
@@ -27,6 +27,7 @@
 
 extern const struct of_device_id *of_match_device(
 	const struct of_device_id *matches, const struct device *dev);
+extern void of_device_make_bus_id(struct device *dev);
 
 /**
  * of_driver_match_device - Tell if a driver's of_match_table matches a device.
-- 
cgit v1.2.3-70-g09d2


From 559e2b7ee7a1c7753d534abcb2742a4775339293 Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@secretlab.ca>
Date: Fri, 23 Jul 2010 20:11:18 -0600
Subject: of: Provide default of_node_to_nid() implementation.

of_node_to_nid() is only relevant in a few architectures.  Don't force
everyone to implement it anyway.

Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 arch/microblaze/include/asm/topology.h | 10 ----------
 arch/powerpc/include/asm/prom.h        |  7 +++++++
 arch/powerpc/include/asm/topology.h    |  7 -------
 arch/sparc/include/asm/prom.h          |  3 +--
 include/linux/of.h                     |  5 +++++
 5 files changed, 13 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/arch/microblaze/include/asm/topology.h b/arch/microblaze/include/asm/topology.h
index 96bcea5a992..5428f333a02 100644
--- a/arch/microblaze/include/asm/topology.h
+++ b/arch/microblaze/include/asm/topology.h
@@ -1,11 +1 @@
 #include <asm-generic/topology.h>
-
-#ifndef _ASM_MICROBLAZE_TOPOLOGY_H
-#define _ASM_MICROBLAZE_TOPOLOGY_H
-
-struct device_node;
-static inline int of_node_to_nid(struct device_node *device)
-{
-	return 0;
-}
-#endif /* _ASM_MICROBLAZE_TOPOLOGY_H */
diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h
index da7dd634e7c..55bccc0a21c 100644
--- a/arch/powerpc/include/asm/prom.h
+++ b/arch/powerpc/include/asm/prom.h
@@ -103,6 +103,13 @@ struct device_node *of_find_next_cache_node(struct device_node *np);
 /* Get the MAC address */
 extern const void *of_get_mac_address(struct device_node *np);
 
+#ifdef CONFIG_NUMA
+extern int of_node_to_nid(struct device_node *device);
+#else
+static inline int of_node_to_nid(struct device_node *device) { return 0; }
+#endif
+#define of_node_to_nid of_node_to_nid
+
 /**
  * of_irq_map_pci - Resolve the interrupt for a PCI device
  * @pdev:	the device whose interrupt is to be resolved
diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h
index 32adf728072..09dd38c8882 100644
--- a/arch/powerpc/include/asm/topology.h
+++ b/arch/powerpc/include/asm/topology.h
@@ -41,8 +41,6 @@ static inline int cpu_to_node(int cpu)
 			       cpu_all_mask :				\
 			       node_to_cpumask_map[node])
 
-int of_node_to_nid(struct device_node *device);
-
 struct pci_bus;
 #ifdef CONFIG_PCI
 extern int pcibus_to_node(struct pci_bus *bus);
@@ -94,11 +92,6 @@ extern void sysfs_remove_device_from_node(struct sys_device *dev, int nid);
 
 #else
 
-static inline int of_node_to_nid(struct device_node *device)
-{
-	return 0;
-}
-
 static inline void dump_numa_cpu_topology(void) {}
 
 static inline int sysfs_add_device_to_node(struct sys_device *dev, int nid)
diff --git a/arch/sparc/include/asm/prom.h b/arch/sparc/include/asm/prom.h
index c82a7da25f9..291f12575ed 100644
--- a/arch/sparc/include/asm/prom.h
+++ b/arch/sparc/include/asm/prom.h
@@ -43,8 +43,7 @@ extern int of_getintprop_default(struct device_node *np,
 extern int of_find_in_proplist(const char *list, const char *match, int len);
 #ifdef CONFIG_NUMA
 extern int of_node_to_nid(struct device_node *dp);
-#else
-#define of_node_to_nid(dp)	(-1)
+#define of_node_to_nid of_node_to_nid
 #endif
 
 extern void prom_build_devicetree(void);
diff --git a/include/linux/of.h b/include/linux/of.h
index b0756f33249..cad7cf0ab27 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -146,6 +146,11 @@ static inline unsigned long of_read_ulong(const __be32 *cell, int size)
 
 #define OF_BAD_ADDR	((u64)-1)
 
+#ifndef of_node_to_nid
+static inline int of_node_to_nid(struct device_node *np) { return -1; }
+#define of_node_to_nid of_node_to_nid
+#endif
+
 extern struct device_node *of_find_node_by_name(struct device_node *from,
 	const char *name);
 #define for_each_node_by_name(dn, name) \
-- 
cgit v1.2.3-70-g09d2


From 12b15e83289bc7cf2ec9a342412e0c955beeb395 Mon Sep 17 00:00:00 2001
From: Anatolij Gustschin <agust@denx.de>
Date: Tue, 27 Jul 2010 22:35:58 +0200
Subject: of/spi: call of_register_spi_devices() from spi core code

Move of_register_spi_devices() call from drivers to
spi_register_master(). Also change the function to use
the struct device_node pointer from master spi device
instead of passing it as function argument.

Signed-off-by: Anatolij Gustschin <agust@denx.de>
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 drivers/of/of_spi.c           | 10 ++++++----
 drivers/spi/mpc512x_psc_spi.c |  1 +
 drivers/spi/mpc52xx_psc_spi.c | 10 ++--------
 drivers/spi/mpc52xx_spi.c     |  3 +--
 drivers/spi/spi.c             |  4 ++++
 drivers/spi/spi_mpc8xxx.c     |  4 +---
 drivers/spi/spi_ppc4xx.c      |  2 +-
 drivers/spi/xilinx_spi.c      |  3 +++
 drivers/spi/xilinx_spi_of.c   |  3 ---
 include/linux/of_spi.h        | 11 ++++++++---
 10 files changed, 27 insertions(+), 24 deletions(-)

(limited to 'include')

diff --git a/drivers/of/of_spi.c b/drivers/of/of_spi.c
index d504f1d1324..1dbce58a58b 100644
--- a/drivers/of/of_spi.c
+++ b/drivers/of/of_spi.c
@@ -15,12 +15,11 @@
 /**
  * of_register_spi_devices - Register child devices onto the SPI bus
  * @master:	Pointer to spi_master device
- * @np:		parent node of SPI device nodes
  *
- * Registers an spi_device for each child node of 'np' which has a 'reg'
+ * Registers an spi_device for each child node of master node which has a 'reg'
  * property.
  */
-void of_register_spi_devices(struct spi_master *master, struct device_node *np)
+void of_register_spi_devices(struct spi_master *master)
 {
 	struct spi_device *spi;
 	struct device_node *nc;
@@ -28,7 +27,10 @@ void of_register_spi_devices(struct spi_master *master, struct device_node *np)
 	int rc;
 	int len;
 
-	for_each_child_of_node(np, nc) {
+	if (!master->dev.of_node)
+		return;
+
+	for_each_child_of_node(master->dev.of_node, nc) {
 		/* Alloc an spi_device */
 		spi = spi_alloc_device(master);
 		if (!spi) {
diff --git a/drivers/spi/mpc512x_psc_spi.c b/drivers/spi/mpc512x_psc_spi.c
index 2534b1ec3ed..1bb4315f5f8 100644
--- a/drivers/spi/mpc512x_psc_spi.c
+++ b/drivers/spi/mpc512x_psc_spi.c
@@ -440,6 +440,7 @@ static int __init mpc512x_psc_spi_do_probe(struct device *dev, u32 regaddr,
 	master->setup = mpc512x_psc_spi_setup;
 	master->transfer = mpc512x_psc_spi_transfer;
 	master->cleanup = mpc512x_psc_spi_cleanup;
+	master->dev.of_node = dev->of_node;
 
 	tempp = ioremap(regaddr, size);
 	if (!tempp) {
diff --git a/drivers/spi/mpc52xx_psc_spi.c b/drivers/spi/mpc52xx_psc_spi.c
index 7104cb739da..bd81ff90cfb 100644
--- a/drivers/spi/mpc52xx_psc_spi.c
+++ b/drivers/spi/mpc52xx_psc_spi.c
@@ -17,7 +17,6 @@
 #include <linux/errno.h>
 #include <linux/interrupt.h>
 #include <linux/of_platform.h>
-#include <linux/of_spi.h>
 #include <linux/workqueue.h>
 #include <linux/completion.h>
 #include <linux/io.h>
@@ -398,6 +397,7 @@ static int __init mpc52xx_psc_spi_do_probe(struct device *dev, u32 regaddr,
 	master->setup = mpc52xx_psc_spi_setup;
 	master->transfer = mpc52xx_psc_spi_transfer;
 	master->cleanup = mpc52xx_psc_spi_cleanup;
+	master->dev.of_node = dev->of_node;
 
 	mps->psc = ioremap(regaddr, size);
 	if (!mps->psc) {
@@ -470,7 +470,6 @@ static int __init mpc52xx_psc_spi_of_probe(struct of_device *op,
 	const u32 *regaddr_p;
 	u64 regaddr64, size64;
 	s16 id = -1;
-	int rc;
 
 	regaddr_p = of_get_address(op->dev.of_node, 0, &size64, NULL);
 	if (!regaddr_p) {
@@ -491,13 +490,8 @@ static int __init mpc52xx_psc_spi_of_probe(struct of_device *op,
 		id = *psc_nump + 1;
 	}
 
-	rc = mpc52xx_psc_spi_do_probe(&op->dev, (u32)regaddr64, (u32)size64,
+	return mpc52xx_psc_spi_do_probe(&op->dev, (u32)regaddr64, (u32)size64,
 				irq_of_parse_and_map(op->dev.of_node, 0), id);
-	if (rc == 0)
-		of_register_spi_devices(dev_get_drvdata(&op->dev),
-					op->dev.of_node);
-
-	return rc;
 }
 
 static int __exit mpc52xx_psc_spi_of_remove(struct of_device *op)
diff --git a/drivers/spi/mpc52xx_spi.c b/drivers/spi/mpc52xx_spi.c
index b1a76bff775..56136ff00e0 100644
--- a/drivers/spi/mpc52xx_spi.c
+++ b/drivers/spi/mpc52xx_spi.c
@@ -18,7 +18,6 @@
 #include <linux/interrupt.h>
 #include <linux/delay.h>
 #include <linux/spi/spi.h>
-#include <linux/of_spi.h>
 #include <linux/io.h>
 #include <linux/of_gpio.h>
 #include <linux/slab.h>
@@ -439,6 +438,7 @@ static int __devinit mpc52xx_spi_probe(struct of_device *op,
 	master->setup = mpc52xx_spi_setup;
 	master->transfer = mpc52xx_spi_transfer;
 	master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_LSB_FIRST;
+	master->dev.of_node = op->dev.of_node;
 
 	dev_set_drvdata(&op->dev, master);
 
@@ -512,7 +512,6 @@ static int __devinit mpc52xx_spi_probe(struct of_device *op,
 	if (rc)
 		goto err_register;
 
-	of_register_spi_devices(master, op->dev.of_node);
 	dev_info(&ms->master->dev, "registered MPC5200 SPI bus\n");
 
 	return rc;
diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index b3a1f9259b6..1bb1b88780c 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -26,6 +26,7 @@
 #include <linux/slab.h>
 #include <linux/mod_devicetable.h>
 #include <linux/spi/spi.h>
+#include <linux/of_spi.h>
 
 
 /* SPI bustype and spi_master class are registered after board init code
@@ -540,6 +541,9 @@ int spi_register_master(struct spi_master *master)
 	/* populate children from any spi device tables */
 	scan_boardinfo(master);
 	status = 0;
+
+	/* Register devices from the device tree */
+	of_register_spi_devices(master);
 done:
 	return status;
 }
diff --git a/drivers/spi/spi_mpc8xxx.c b/drivers/spi/spi_mpc8xxx.c
index 97ab0a81338..aad9ae1b9c6 100644
--- a/drivers/spi/spi_mpc8xxx.c
+++ b/drivers/spi/spi_mpc8xxx.c
@@ -38,7 +38,6 @@
 #include <linux/of_platform.h>
 #include <linux/gpio.h>
 #include <linux/of_gpio.h>
-#include <linux/of_spi.h>
 #include <linux/slab.h>
 
 #include <sysdev/fsl_soc.h>
@@ -1009,6 +1008,7 @@ mpc8xxx_spi_probe(struct device *dev, struct resource *mem, unsigned int irq)
 	master->setup = mpc8xxx_spi_setup;
 	master->transfer = mpc8xxx_spi_transfer;
 	master->cleanup = mpc8xxx_spi_cleanup;
+	master->dev.of_node = dev->of_node;
 
 	mpc8xxx_spi = spi_master_get_devdata(master);
 	mpc8xxx_spi->dev = dev;
@@ -1299,8 +1299,6 @@ static int __devinit of_mpc8xxx_spi_probe(struct of_device *ofdev,
 		goto err;
 	}
 
-	of_register_spi_devices(master, np);
-
 	return 0;
 
 err:
diff --git a/drivers/spi/spi_ppc4xx.c b/drivers/spi/spi_ppc4xx.c
index d53466a249d..0f5fa7e2a55 100644
--- a/drivers/spi/spi_ppc4xx.c
+++ b/drivers/spi/spi_ppc4xx.c
@@ -407,6 +407,7 @@ static int __init spi_ppc4xx_of_probe(struct of_device *op,
 	master = spi_alloc_master(dev, sizeof *hw);
 	if (master == NULL)
 		return -ENOMEM;
+	master->dev.of_node = np;
 	dev_set_drvdata(dev, master);
 	hw = spi_master_get_devdata(master);
 	hw->master = spi_master_get(master);
@@ -545,7 +546,6 @@ static int __init spi_ppc4xx_of_probe(struct of_device *op,
 	}
 
 	dev_info(dev, "driver initialized\n");
-	of_register_spi_devices(master, np);
 
 	return 0;
 
diff --git a/drivers/spi/xilinx_spi.c b/drivers/spi/xilinx_spi.c
index 1b47363cb73..80f2db5bcfd 100644
--- a/drivers/spi/xilinx_spi.c
+++ b/drivers/spi/xilinx_spi.c
@@ -390,6 +390,9 @@ struct spi_master *xilinx_spi_init(struct device *dev, struct resource *mem,
 
 	master->bus_num = bus_num;
 	master->num_chipselect = pdata->num_chipselect;
+#ifdef CONFIG_OF
+	master->dev.of_node = dev->of_node;
+#endif
 
 	xspi->mem = *mem;
 	xspi->irq = irq;
diff --git a/drivers/spi/xilinx_spi_of.c b/drivers/spi/xilinx_spi_of.c
index 4654805b08d..87cda0956a8 100644
--- a/drivers/spi/xilinx_spi_of.c
+++ b/drivers/spi/xilinx_spi_of.c
@@ -80,9 +80,6 @@ static int __devinit xilinx_spi_of_probe(struct of_device *ofdev,
 
 	dev_set_drvdata(&ofdev->dev, master);
 
-	/* Add any subnodes on the SPI bus */
-	of_register_spi_devices(master, ofdev->dev.of_node);
-
 	return 0;
 }
 
diff --git a/include/linux/of_spi.h b/include/linux/of_spi.h
index 5f71ee8c086..9e3e70f78ae 100644
--- a/include/linux/of_spi.h
+++ b/include/linux/of_spi.h
@@ -9,10 +9,15 @@
 #ifndef __LINUX_OF_SPI_H
 #define __LINUX_OF_SPI_H
 
-#include <linux/of.h>
 #include <linux/spi/spi.h>
 
-extern void of_register_spi_devices(struct spi_master *master,
-				    struct device_node *np);
+#if defined(CONFIG_OF_SPI) || defined(CONFIG_OF_SPI_MODULE)
+extern void of_register_spi_devices(struct spi_master *master);
+#else
+static inline void of_register_spi_devices(struct spi_master *master)
+{
+	return;
+}
+#endif /* CONFIG_OF_SPI */
 
 #endif /* __LINUX_OF_SPI */
-- 
cgit v1.2.3-70-g09d2


From 253d2e549818f5a4a52e2db0aba3dacee21e5b38 Mon Sep 17 00:00:00 2001
From: Jacob Pan <jacob.jun.pan@linux.intel.com>
Date: Fri, 16 Jul 2010 10:19:22 -0700
Subject: PCI: disable mmio during bar sizing

It is a known issue that mmio decoding shall be disabled while doing PCI
bar sizing. Host bridge and other devices (PCI PIC) shall be excluded for
certain platforms. This patch mainly comes from Mathew Willcox's
patch in http://kerneltrap.org/mailarchive/linux-kernel/2007/9/13/258969.

A new flag bit "mmio_alway_on" is added to pci_dev with the intention that
devices with their mmio decoding cannot be disabled during BAR sizing shall
have this bit set, preferrablly in their quirks.

Without this patch, Intel Moorestown platform graphics unit will be
corrupted during bar sizing activities.

Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/probe.c  | 10 ++++++++++
 drivers/pci/quirks.c | 13 +++++++++++++
 include/linux/pci.h  |  2 ++
 3 files changed, 25 insertions(+)

(limited to 'include')

diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index f4adba2d1dd..12625d90f8b 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -163,9 +163,16 @@ int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type,
 			struct resource *res, unsigned int pos)
 {
 	u32 l, sz, mask;
+	u16 orig_cmd;
 
 	mask = type ? PCI_ROM_ADDRESS_MASK : ~0;
 
+	if (!dev->mmio_always_on) {
+		pci_read_config_word(dev, PCI_COMMAND, &orig_cmd);
+		pci_write_config_word(dev, PCI_COMMAND,
+			orig_cmd & ~(PCI_COMMAND_MEMORY | PCI_COMMAND_IO));
+	}
+
 	res->name = pci_name(dev);
 
 	pci_read_config_dword(dev, pos, &l);
@@ -173,6 +180,9 @@ int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type,
 	pci_read_config_dword(dev, pos, &sz);
 	pci_write_config_dword(dev, pos, l);
 
+	if (!dev->mmio_always_on)
+		pci_write_config_word(dev, PCI_COMMAND, orig_cmd);
+
 	/*
 	 * All bits set in sz means the device isn't working properly.
 	 * If the BAR isn't implemented, all bits must be 0.  If it's a
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 3a81d9d4401..202efa6f57c 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -91,6 +91,19 @@ static void __devinit quirk_resource_alignment(struct pci_dev *dev)
 }
 DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, quirk_resource_alignment);
 
+/*
+ * Decoding should be disabled for a PCI device during BAR sizing to avoid
+ * conflict. But doing so may cause problems on host bridge and perhaps other
+ * key system devices. For devices that need to have mmio decoding always-on,
+ * we need to set the dev->mmio_always_on bit.
+ */
+static void __devinit quirk_mmio_always_on(struct pci_dev *dev)
+{
+	if ((dev->class >> 8) == PCI_CLASS_BRIDGE_HOST)
+		dev->mmio_always_on = 1;
+}
+DECLARE_PCI_FIXUP_EARLY(PCI_ANY_ID, PCI_ANY_ID, quirk_mmio_always_on);
+
 /* The Mellanox Tavor device gives false positive parity errors
  * Mark this device with a broken_parity_status, to allow
  * PCI scanning code to "skip" this now blacklisted device.
diff --git a/include/linux/pci.h b/include/linux/pci.h
index f26fda76b87..b1d17956a15 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -270,6 +270,8 @@ struct pci_dev {
 	unsigned int	d1_support:1;	/* Low power state D1 is supported */
 	unsigned int	d2_support:1;	/* Low power state D2 is supported */
 	unsigned int	no_d1d2:1;	/* Only allow D0 and D3 */
+	unsigned int	mmio_always_on:1;	/* disallow turning off io/mem
+						   decoding during bar sizing */
 	unsigned int	wakeup_prepared:1;
 	unsigned int	d3_delay;	/* D3->D0 transition time in ms */
 
-- 
cgit v1.2.3-70-g09d2


From 911e1c9b05a8e3559a7aa89083930700a0b9e7ee Mon Sep 17 00:00:00 2001
From: Narendra K <Narendra_K@dell.com>
Date: Mon, 26 Jul 2010 05:56:50 -0500
Subject: PCI: export SMBIOS provided firmware instance and label to sysfs

This patch exports SMBIOS provided firmware instance and label of
onboard PCI devices to sysfs.  New files are:
  /sys/bus/pci/devices/.../label which contains the firmware name for
the device in question, and
  /sys/bus/pci/devices/.../index which contains the firmware device type
instance for the given device.

Signed-off-by: Jordan Hargrave <jordan_hargrave@dell.com>
Signed-off-by: Narendra K <narendra_k@dell.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 Documentation/ABI/testing/sysfs-bus-pci |  27 ++++++
 drivers/firmware/dmi_scan.c             |  25 ++++++
 drivers/pci/Makefile                    |   3 +
 drivers/pci/pci-label.c                 | 143 ++++++++++++++++++++++++++++++++
 drivers/pci/pci-sysfs.c                 |   5 ++
 drivers/pci/pci.h                       |   9 ++
 include/linux/dmi.h                     |   9 ++
 7 files changed, 221 insertions(+)
 create mode 100644 drivers/pci/pci-label.c

(limited to 'include')

diff --git a/Documentation/ABI/testing/sysfs-bus-pci b/Documentation/ABI/testing/sysfs-bus-pci
index 25be3250f7d..f979d825d11 100644
--- a/Documentation/ABI/testing/sysfs-bus-pci
+++ b/Documentation/ABI/testing/sysfs-bus-pci
@@ -139,3 +139,30 @@ Contact:	linux-pci@vger.kernel.org
 Description:
 		This symbolic link points to the PCI hotplug controller driver
 		module that manages the hotplug slot.
+
+What:		/sys/bus/pci/devices/.../label
+Date:		July 2010
+Contact:	Narendra K <narendra_k@dell.com>, linux-bugs@dell.com
+Description:
+		Reading this attribute will provide the firmware
+		given name(SMBIOS type 41 string) of the PCI device.
+		The attribute will be created only if the firmware
+		has given a name to the PCI device.
+Users:
+		Userspace applications interested in knowing the
+		firmware assigned name of the PCI device.
+
+What:		/sys/bus/pci/devices/.../index
+Date:		July 2010
+Contact:	Narendra K <narendra_k@dell.com>, linux-bugs@dell.com
+Description:
+		Reading this attribute will provide the firmware
+		given instance(SMBIOS type 41 device type instance)
+		of the PCI device. The attribute will be created
+		only if the firmware has given a device type instance
+		to the PCI device.
+Users:
+		Userspace applications interested in knowing the
+		firmware assigned device type instance of the PCI
+		device that can help in understanding the firmware
+		intended order of the PCI device.
diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c
index d4646727134..b3d22d65999 100644
--- a/drivers/firmware/dmi_scan.c
+++ b/drivers/firmware/dmi_scan.c
@@ -277,6 +277,29 @@ static void __init dmi_save_ipmi_device(const struct dmi_header *dm)
 	list_add_tail(&dev->list, &dmi_devices);
 }
 
+static void __init dmi_save_dev_onboard(int instance, int segment, int bus,
+					int devfn, const char *name)
+{
+	struct dmi_dev_onboard *onboard_dev;
+
+	onboard_dev = dmi_alloc(sizeof(*onboard_dev) + strlen(name) + 1);
+	if (!onboard_dev) {
+		printk(KERN_ERR "dmi_save_dev_onboard: out of memory.\n");
+		return;
+	}
+	onboard_dev->instance = instance;
+	onboard_dev->segment = segment;
+	onboard_dev->bus = bus;
+	onboard_dev->devfn = devfn;
+
+	strcpy((char *)&onboard_dev[1], name);
+	onboard_dev->dev.type = DMI_DEV_TYPE_DEV_ONBOARD;
+	onboard_dev->dev.name = (char *)&onboard_dev[1];
+	onboard_dev->dev.device_data = onboard_dev;
+
+	list_add(&onboard_dev->dev.list, &dmi_devices);
+}
+
 static void __init dmi_save_extended_devices(const struct dmi_header *dm)
 {
 	const u8 *d = (u8*) dm + 5;
@@ -285,6 +308,8 @@ static void __init dmi_save_extended_devices(const struct dmi_header *dm)
 	if ((*d & 0x80) == 0)
 		return;
 
+	dmi_save_dev_onboard(*(d+1), *(u16 *)(d+2), *(d+4), *(d+5),
+			     dmi_string_nosave(dm, *(d-1)));
 	dmi_save_one_device(*d & 0x7f, dmi_string_nosave(dm, *(d - 1)));
 }
 
diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile
index 0b51857fbaf..dc1aa092286 100644
--- a/drivers/pci/Makefile
+++ b/drivers/pci/Makefile
@@ -55,6 +55,9 @@ obj-$(CONFIG_MICROBLAZE) += setup-bus.o
 #
 obj-$(CONFIG_ACPI)    += pci-acpi.o
 
+# SMBIOS provided firmware instance and labels
+obj-$(CONFIG_DMI)    += pci-label.o
+
 # Cardbus & CompactPCI use setup-bus
 obj-$(CONFIG_HOTPLUG) += setup-bus.o
 
diff --git a/drivers/pci/pci-label.c b/drivers/pci/pci-label.c
new file mode 100644
index 00000000000..111500e86f9
--- /dev/null
+++ b/drivers/pci/pci-label.c
@@ -0,0 +1,143 @@
+/*
+ * Purpose: Export the firmware instance and label associated with
+ * a pci device to sysfs
+ * Copyright (C) 2010 Dell Inc.
+ * by Narendra K <Narendra_K@dell.com>,
+ * Jordan Hargrave <Jordan_Hargrave@dell.com>
+ *
+ * SMBIOS defines type 41 for onboard pci devices. This code retrieves
+ * the instance number and string from the type 41 record and exports
+ * it to sysfs.
+ *
+ * Please see http://linux.dell.com/wiki/index.php/Oss/libnetdevname for more
+ * information.
+ */
+
+#include <linux/dmi.h>
+#include <linux/sysfs.h>
+#include <linux/pci.h>
+#include <linux/pci_ids.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include "pci.h"
+
+enum smbios_attr_enum {
+	SMBIOS_ATTR_NONE = 0,
+	SMBIOS_ATTR_LABEL_SHOW,
+	SMBIOS_ATTR_INSTANCE_SHOW,
+};
+
+static mode_t
+find_smbios_instance_string(struct pci_dev *pdev, char *buf,
+			    enum smbios_attr_enum attribute)
+{
+	const struct dmi_device *dmi;
+	struct dmi_dev_onboard *donboard;
+	int bus;
+	int devfn;
+
+	bus = pdev->bus->number;
+	devfn = pdev->devfn;
+
+	dmi = NULL;
+	while ((dmi = dmi_find_device(DMI_DEV_TYPE_DEV_ONBOARD,
+				      NULL, dmi)) != NULL) {
+		donboard = dmi->device_data;
+		if (donboard && donboard->bus == bus &&
+					donboard->devfn == devfn) {
+			if (buf) {
+				if (attribute == SMBIOS_ATTR_INSTANCE_SHOW)
+					return scnprintf(buf, PAGE_SIZE,
+							 "%d\n",
+							 donboard->instance);
+				else if (attribute == SMBIOS_ATTR_LABEL_SHOW)
+					return scnprintf(buf, PAGE_SIZE,
+							 "%s\n",
+							 dmi->name);
+			}
+			return strlen(dmi->name);
+		}
+	}
+	return 0;
+}
+
+static mode_t
+smbios_instance_string_exist(struct kobject *kobj, struct attribute *attr,
+			     int n)
+{
+	struct device *dev;
+	struct pci_dev *pdev;
+
+	dev = container_of(kobj, struct device, kobj);
+	pdev = to_pci_dev(dev);
+
+	return find_smbios_instance_string(pdev, NULL, SMBIOS_ATTR_NONE) ?
+					   S_IRUGO : 0;
+}
+
+static ssize_t
+smbioslabel_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct pci_dev *pdev;
+	pdev = to_pci_dev(dev);
+
+	return find_smbios_instance_string(pdev, buf,
+					   SMBIOS_ATTR_LABEL_SHOW);
+}
+
+static ssize_t
+smbiosinstance_show(struct device *dev,
+		    struct device_attribute *attr, char *buf)
+{
+	struct pci_dev *pdev;
+	pdev = to_pci_dev(dev);
+
+	return find_smbios_instance_string(pdev, buf,
+					   SMBIOS_ATTR_INSTANCE_SHOW);
+}
+
+static struct device_attribute smbios_attr_label = {
+	.attr = {.name = "label", .mode = 0444, .owner = THIS_MODULE},
+	.show = smbioslabel_show,
+};
+
+static struct device_attribute smbios_attr_instance = {
+	.attr = {.name = "index", .mode = 0444, .owner = THIS_MODULE},
+	.show = smbiosinstance_show,
+};
+
+static struct attribute *smbios_attributes[] = {
+	&smbios_attr_label.attr,
+	&smbios_attr_instance.attr,
+	NULL,
+};
+
+static struct attribute_group smbios_attr_group = {
+	.attrs = smbios_attributes,
+	.is_visible = smbios_instance_string_exist,
+};
+
+static int
+pci_create_smbiosname_file(struct pci_dev *pdev)
+{
+	if (!sysfs_create_group(&pdev->dev.kobj, &smbios_attr_group))
+		return 0;
+	return -ENODEV;
+}
+
+static void
+pci_remove_smbiosname_file(struct pci_dev *pdev)
+{
+	sysfs_remove_group(&pdev->dev.kobj, &smbios_attr_group);
+}
+
+void pci_create_firmware_label_files(struct pci_dev *pdev)
+{
+	if (!pci_create_smbiosname_file(pdev))
+		;
+}
+
+void pci_remove_firmware_label_files(struct pci_dev *pdev)
+{
+	pci_remove_smbiosname_file(pdev);
+}
diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
index f7692dc531e..b5a7d9bfcb2 100644
--- a/drivers/pci/pci-sysfs.c
+++ b/drivers/pci/pci-sysfs.c
@@ -1165,6 +1165,8 @@ int __must_check pci_create_sysfs_dev_files (struct pci_dev *pdev)
 	if (retval)
 		goto err_vga_file;
 
+	pci_create_firmware_label_files(pdev);
+
 	return 0;
 
 err_vga_file:
@@ -1232,6 +1234,9 @@ void pci_remove_sysfs_dev_files(struct pci_dev *pdev)
 		sysfs_remove_bin_file(&pdev->dev.kobj, pdev->rom_attr);
 		kfree(pdev->rom_attr);
 	}
+
+	pci_remove_firmware_label_files(pdev);
+
 }
 
 static int __init pci_sysfs_init(void)
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index f8077b3c8c8..d930338e092 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -11,6 +11,15 @@
 extern int pci_uevent(struct device *dev, struct kobj_uevent_env *env);
 extern int pci_create_sysfs_dev_files(struct pci_dev *pdev);
 extern void pci_remove_sysfs_dev_files(struct pci_dev *pdev);
+#ifndef CONFIG_DMI
+static inline void pci_create_firmware_label_files(struct pci_dev *pdev)
+{ return 0; }
+static inline void pci_remove_firmware_label_files(struct pci_dev *pdev)
+{ return 0; }
+#else
+extern void pci_create_firmware_label_files(struct pci_dev *pdev);
+extern void pci_remove_firmware_label_files(struct pci_dev *pdev);
+#endif
 extern void pci_cleanup_rom(struct pci_dev *dev);
 #ifdef HAVE_PCI_MMAP
 extern int pci_mmap_fits(struct pci_dev *pdev, int resno,
diff --git a/include/linux/dmi.h b/include/linux/dmi.h
index a8a3e1ac281..90e087f8d95 100644
--- a/include/linux/dmi.h
+++ b/include/linux/dmi.h
@@ -20,6 +20,7 @@ enum dmi_device_type {
 	DMI_DEV_TYPE_SAS,
 	DMI_DEV_TYPE_IPMI = -1,
 	DMI_DEV_TYPE_OEM_STRING = -2,
+	DMI_DEV_TYPE_DEV_ONBOARD = -3,
 };
 
 struct dmi_header {
@@ -37,6 +38,14 @@ struct dmi_device {
 
 #ifdef CONFIG_DMI
 
+struct dmi_dev_onboard {
+	struct dmi_device dev;
+	int instance;
+	int segment;
+	int bus;
+	int devfn;
+};
+
 extern int dmi_check_system(const struct dmi_system_id *list);
 const struct dmi_system_id *dmi_first_match(const struct dmi_system_id *list);
 extern const char * dmi_get_system_info(int field);
-- 
cgit v1.2.3-70-g09d2


From 30da55242818a8ca08583188ebcbaccd283ad4d9 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Fri, 23 Jul 2010 14:56:28 +0100
Subject: PCI: MSI: Restore read_msi_msg_desc(); add get_cached_msi_msg_desc()

commit 2ca1af9aa3285c6a5f103ed31ad09f7399fc65d7 "PCI: MSI: Remove
unsafe and unnecessary hardware access" changed read_msi_msg_desc() to
return the last MSI message written instead of reading it from the
device, since it may be called while the device is in a reduced
power state.

However, the pSeries platform code really does need to read messages
from the device, since they are initially written by firmware.
Therefore:
- Restore the previous behaviour of read_msi_msg_desc()
- Add new functions get_cached_msi_msg{,_desc}() which return the
  last MSI message written
- Use the new functions where appropriate

Acked-by: Michael Ellerman <michael@ellerman.id.au>
Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 arch/ia64/kernel/msi_ia64.c    |  2 +-
 arch/ia64/sn/kernel/msi_sn.c   |  2 +-
 arch/x86/kernel/apic/io_apic.c |  2 +-
 drivers/pci/msi.c              | 47 +++++++++++++++++++++++++++++++++++++-----
 include/linux/msi.h            |  2 ++
 5 files changed, 47 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/arch/ia64/kernel/msi_ia64.c b/arch/ia64/kernel/msi_ia64.c
index 6c892285604..4a746ea838f 100644
--- a/arch/ia64/kernel/msi_ia64.c
+++ b/arch/ia64/kernel/msi_ia64.c
@@ -25,7 +25,7 @@ static int ia64_set_msi_irq_affinity(unsigned int irq,
 	if (irq_prepare_move(irq, cpu))
 		return -1;
 
-	read_msi_msg(irq, &msg);
+	get_cached_msi_msg(irq, &msg);
 
 	addr = msg.address_lo;
 	addr &= MSI_ADDR_DEST_ID_MASK;
diff --git a/arch/ia64/sn/kernel/msi_sn.c b/arch/ia64/sn/kernel/msi_sn.c
index ebfdd6a9ae1..0c72dd46383 100644
--- a/arch/ia64/sn/kernel/msi_sn.c
+++ b/arch/ia64/sn/kernel/msi_sn.c
@@ -175,7 +175,7 @@ static int sn_set_msi_irq_affinity(unsigned int irq,
 	 * Release XIO resources for the old MSI PCI address
 	 */
 
-	read_msi_msg(irq, &msg);
+	get_cached_msi_msg(irq, &msg);
         sn_pdev = (struct pcidev_info *)sn_irq_info->irq_pciioinfo;
 	pdev = sn_pdev->pdi_linux_pcidev;
 	provider = SN_PCIDEV_BUSPROVIDER(pdev);
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index e41ed24ab26..4dc0084ec1b 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -3397,7 +3397,7 @@ static int set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
 
 	cfg = desc->chip_data;
 
-	read_msi_msg_desc(desc, &msg);
+	get_cached_msi_msg_desc(desc, &msg);
 
 	msg.data &= ~MSI_DATA_VECTOR_MASK;
 	msg.data |= MSI_DATA_VECTOR(cfg->vector);
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 4c14f31f2b4..69b7be33b3a 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -197,9 +197,46 @@ void read_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg)
 {
 	struct msi_desc *entry = get_irq_desc_msi(desc);
 
-	/* We do not touch the hardware (which may not even be
-	 * accessible at the moment) but return the last message
-	 * written.  Assert that this is valid, assuming that
+	BUG_ON(entry->dev->current_state != PCI_D0);
+
+	if (entry->msi_attrib.is_msix) {
+		void __iomem *base = entry->mask_base +
+			entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE;
+
+		msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR);
+		msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR);
+		msg->data = readl(base + PCI_MSIX_ENTRY_DATA);
+	} else {
+		struct pci_dev *dev = entry->dev;
+		int pos = entry->msi_attrib.pos;
+		u16 data;
+
+		pci_read_config_dword(dev, msi_lower_address_reg(pos),
+					&msg->address_lo);
+		if (entry->msi_attrib.is_64) {
+			pci_read_config_dword(dev, msi_upper_address_reg(pos),
+						&msg->address_hi);
+			pci_read_config_word(dev, msi_data_reg(pos, 1), &data);
+		} else {
+			msg->address_hi = 0;
+			pci_read_config_word(dev, msi_data_reg(pos, 0), &data);
+		}
+		msg->data = data;
+	}
+}
+
+void read_msi_msg(unsigned int irq, struct msi_msg *msg)
+{
+	struct irq_desc *desc = irq_to_desc(irq);
+
+	read_msi_msg_desc(desc, msg);
+}
+
+void get_cached_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg)
+{
+	struct msi_desc *entry = get_irq_desc_msi(desc);
+
+	/* Assert that the cache is valid, assuming that
 	 * valid messages are not all-zeroes. */
 	BUG_ON(!(entry->msg.address_hi | entry->msg.address_lo |
 		 entry->msg.data));
@@ -207,11 +244,11 @@ void read_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg)
 	*msg = entry->msg;
 }
 
-void read_msi_msg(unsigned int irq, struct msi_msg *msg)
+void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
 
-	read_msi_msg_desc(desc, msg);
+	get_cached_msi_msg_desc(desc, msg);
 }
 
 void write_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg)
diff --git a/include/linux/msi.h b/include/linux/msi.h
index 6991ab5b24d..91b05c17185 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -14,8 +14,10 @@ struct irq_desc;
 extern void mask_msi_irq(unsigned int irq);
 extern void unmask_msi_irq(unsigned int irq);
 extern void read_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg);
+extern void get_cached_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg);
 extern void write_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg);
 extern void read_msi_msg(unsigned int irq, struct msi_msg *msg);
+extern void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg);
 extern void write_msi_msg(unsigned int irq, struct msi_msg *msg);
 
 struct msi_desc {
-- 
cgit v1.2.3-70-g09d2


From f11ac8db5d07b6e99d41ff4aa39d878ee5cef1c5 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Fri, 25 Jun 2010 16:35:53 -0400
Subject: NFSv4: Ensure that we track the NFSv4 lock state in read/write
 requests.

This patch fixes bugzilla entry 14501:
  https://bugzilla.kernel.org/show_bug.cgi?id=14501

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/direct.c          | 29 +++++++++++++++-----
 fs/nfs/inode.c           | 70 +++++++++++++++++++++++++++++++++++++++++++++---
 fs/nfs/nfs4xdr.c         |  8 +++---
 fs/nfs/pagelist.c        |  8 +++++-
 fs/nfs/read.c            |  1 +
 fs/nfs/write.c           |  5 +++-
 include/linux/nfs_fs.h   | 13 +++++++--
 include/linux/nfs_page.h |  1 +
 include/linux/nfs_xdr.h  |  2 ++
 9 files changed, 118 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index ad4cd31d605..064a8096167 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -69,6 +69,7 @@ struct nfs_direct_req {
 
 	/* I/O parameters */
 	struct nfs_open_context	*ctx;		/* file open context info */
+	struct nfs_lock_context *l_ctx;		/* Lock context info */
 	struct kiocb *		iocb;		/* controlling i/o request */
 	struct inode *		inode;		/* target file of i/o */
 
@@ -160,6 +161,7 @@ static inline struct nfs_direct_req *nfs_direct_req_alloc(void)
 	INIT_LIST_HEAD(&dreq->rewrite_list);
 	dreq->iocb = NULL;
 	dreq->ctx = NULL;
+	dreq->l_ctx = NULL;
 	spin_lock_init(&dreq->lock);
 	atomic_set(&dreq->io_count, 0);
 	dreq->count = 0;
@@ -173,6 +175,8 @@ static void nfs_direct_req_free(struct kref *kref)
 {
 	struct nfs_direct_req *dreq = container_of(kref, struct nfs_direct_req, kref);
 
+	if (dreq->l_ctx != NULL)
+		nfs_put_lock_context(dreq->l_ctx);
 	if (dreq->ctx != NULL)
 		put_nfs_open_context(dreq->ctx);
 	kmem_cache_free(nfs_direct_cachep, dreq);
@@ -336,6 +340,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq,
 		data->cred = msg.rpc_cred;
 		data->args.fh = NFS_FH(inode);
 		data->args.context = ctx;
+		data->args.lock_context = dreq->l_ctx;
 		data->args.offset = pos;
 		data->args.pgbase = pgbase;
 		data->args.pages = data->pagevec;
@@ -416,24 +421,28 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
 static ssize_t nfs_direct_read(struct kiocb *iocb, const struct iovec *iov,
 			       unsigned long nr_segs, loff_t pos)
 {
-	ssize_t result = 0;
+	ssize_t result = -ENOMEM;
 	struct inode *inode = iocb->ki_filp->f_mapping->host;
 	struct nfs_direct_req *dreq;
 
 	dreq = nfs_direct_req_alloc();
-	if (!dreq)
-		return -ENOMEM;
+	if (dreq == NULL)
+		goto out;
 
 	dreq->inode = inode;
 	dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp));
+	dreq->l_ctx = nfs_get_lock_context(dreq->ctx);
+	if (dreq->l_ctx == NULL)
+		goto out_release;
 	if (!is_sync_kiocb(iocb))
 		dreq->iocb = iocb;
 
 	result = nfs_direct_read_schedule_iovec(dreq, iov, nr_segs, pos);
 	if (!result)
 		result = nfs_direct_wait(dreq);
+out_release:
 	nfs_direct_req_release(dreq);
-
+out:
 	return result;
 }
 
@@ -574,6 +583,7 @@ static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
 	data->args.offset = 0;
 	data->args.count = 0;
 	data->args.context = dreq->ctx;
+	data->args.lock_context = dreq->l_ctx;
 	data->res.count = 0;
 	data->res.fattr = &data->fattr;
 	data->res.verf = &data->verf;
@@ -761,6 +771,7 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq,
 		data->cred = msg.rpc_cred;
 		data->args.fh = NFS_FH(inode);
 		data->args.context = ctx;
+		data->args.lock_context = dreq->l_ctx;
 		data->args.offset = pos;
 		data->args.pgbase = pgbase;
 		data->args.pages = data->pagevec;
@@ -845,7 +856,7 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov,
 				unsigned long nr_segs, loff_t pos,
 				size_t count)
 {
-	ssize_t result = 0;
+	ssize_t result = -ENOMEM;
 	struct inode *inode = iocb->ki_filp->f_mapping->host;
 	struct nfs_direct_req *dreq;
 	size_t wsize = NFS_SERVER(inode)->wsize;
@@ -853,7 +864,7 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov,
 
 	dreq = nfs_direct_req_alloc();
 	if (!dreq)
-		return -ENOMEM;
+		goto out;
 	nfs_alloc_commit_data(dreq);
 
 	if (dreq->commit_data == NULL || count < wsize)
@@ -861,14 +872,18 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov,
 
 	dreq->inode = inode;
 	dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp));
+	dreq->l_ctx = nfs_get_lock_context(dreq->ctx);
+	if (dreq->l_ctx != NULL)
+		goto out_release;
 	if (!is_sync_kiocb(iocb))
 		dreq->iocb = iocb;
 
 	result = nfs_direct_write_schedule_iovec(dreq, iov, nr_segs, pos, sync);
 	if (!result)
 		result = nfs_direct_wait(dreq);
+out_release:
 	nfs_direct_req_release(dreq);
-
+out:
 	return result;
 }
 
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 099b3518fee..ec7a8f96a2c 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -530,6 +530,68 @@ out:
 	return err;
 }
 
+static void nfs_init_lock_context(struct nfs_lock_context *l_ctx)
+{
+	atomic_set(&l_ctx->count, 1);
+	l_ctx->lockowner = current->files;
+	l_ctx->pid = current->tgid;
+	INIT_LIST_HEAD(&l_ctx->list);
+}
+
+static struct nfs_lock_context *__nfs_find_lock_context(struct nfs_open_context *ctx)
+{
+	struct nfs_lock_context *pos;
+
+	list_for_each_entry(pos, &ctx->lock_context.list, list) {
+		if (pos->lockowner != current->files)
+			continue;
+		if (pos->pid != current->tgid)
+			continue;
+		atomic_inc(&pos->count);
+		return pos;
+	}
+	return NULL;
+}
+
+struct nfs_lock_context *nfs_get_lock_context(struct nfs_open_context *ctx)
+{
+	struct nfs_lock_context *res, *new = NULL;
+	struct inode *inode = ctx->path.dentry->d_inode;
+
+	spin_lock(&inode->i_lock);
+	res = __nfs_find_lock_context(ctx);
+	if (res == NULL) {
+		spin_unlock(&inode->i_lock);
+		new = kmalloc(sizeof(*new), GFP_KERNEL);
+		if (new == NULL)
+			return NULL;
+		nfs_init_lock_context(new);
+		spin_lock(&inode->i_lock);
+		res = __nfs_find_lock_context(ctx);
+		if (res == NULL) {
+			list_add_tail(&new->list, &ctx->lock_context.list);
+			new->open_context = ctx;
+			res = new;
+			new = NULL;
+		}
+	}
+	spin_unlock(&inode->i_lock);
+	kfree(new);
+	return res;
+}
+
+void nfs_put_lock_context(struct nfs_lock_context *l_ctx)
+{
+	struct nfs_open_context *ctx = l_ctx->open_context;
+	struct inode *inode = ctx->path.dentry->d_inode;
+
+	if (!atomic_dec_and_lock(&l_ctx->count, &inode->i_lock))
+		return;
+	list_del(&l_ctx->list);
+	spin_unlock(&inode->i_lock);
+	kfree(l_ctx);
+}
+
 /**
  * nfs_close_context - Common close_context() routine NFSv2/v3
  * @ctx: pointer to context
@@ -566,11 +628,11 @@ static struct nfs_open_context *alloc_nfs_open_context(struct path *path, struct
 		path_get(&ctx->path);
 		ctx->cred = get_rpccred(cred);
 		ctx->state = NULL;
-		ctx->lockowner = current->files;
 		ctx->flags = 0;
 		ctx->error = 0;
 		ctx->dir_cookie = 0;
-		atomic_set(&ctx->count, 1);
+		nfs_init_lock_context(&ctx->lock_context);
+		ctx->lock_context.open_context = ctx;
 	}
 	return ctx;
 }
@@ -578,7 +640,7 @@ static struct nfs_open_context *alloc_nfs_open_context(struct path *path, struct
 struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx)
 {
 	if (ctx != NULL)
-		atomic_inc(&ctx->count);
+		atomic_inc(&ctx->lock_context.count);
 	return ctx;
 }
 
@@ -586,7 +648,7 @@ static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync)
 {
 	struct inode *inode = ctx->path.dentry->d_inode;
 
-	if (!atomic_dec_and_lock(&ctx->count, &inode->i_lock))
+	if (!atomic_dec_and_lock(&ctx->lock_context.count, &inode->i_lock))
 		return;
 	list_del(&ctx->list);
 	spin_unlock(&inode->i_lock);
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 1f7781d636a..873b62f209e 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -1324,14 +1324,14 @@ static void encode_putrootfh(struct xdr_stream *xdr, struct compound_hdr *hdr)
 	hdr->replen += decode_putrootfh_maxsz;
 }
 
-static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context *ctx)
+static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context *ctx, const struct nfs_lock_context *l_ctx)
 {
 	nfs4_stateid stateid;
 	__be32 *p;
 
 	p = reserve_space(xdr, NFS4_STATEID_SIZE);
 	if (ctx->state != NULL) {
-		nfs4_copy_stateid(&stateid, ctx->state, ctx->lockowner);
+		nfs4_copy_stateid(&stateid, ctx->state, l_ctx->lockowner);
 		xdr_encode_opaque_fixed(p, stateid.data, NFS4_STATEID_SIZE);
 	} else
 		xdr_encode_opaque_fixed(p, zero_stateid.data, NFS4_STATEID_SIZE);
@@ -1344,7 +1344,7 @@ static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args,
 	p = reserve_space(xdr, 4);
 	*p = cpu_to_be32(OP_READ);
 
-	encode_stateid(xdr, args->context);
+	encode_stateid(xdr, args->context, args->lock_context);
 
 	p = reserve_space(xdr, 12);
 	p = xdr_encode_hyper(p, args->offset);
@@ -1523,7 +1523,7 @@ static void encode_write(struct xdr_stream *xdr, const struct nfs_writeargs *arg
 	p = reserve_space(xdr, 4);
 	*p = cpu_to_be32(OP_WRITE);
 
-	encode_stateid(xdr, args->context);
+	encode_stateid(xdr, args->context, args->lock_context);
 
 	p = reserve_space(xdr, 16);
 	p = xdr_encode_hyper(p, args->offset);
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index a3654e57b58..919490232e1 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -79,6 +79,7 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
 	req->wb_pgbase	= offset;
 	req->wb_bytes   = count;
 	req->wb_context = get_nfs_open_context(ctx);
+	req->wb_lock_context = nfs_get_lock_context(ctx);
 	kref_init(&req->wb_kref);
 	return req;
 }
@@ -141,11 +142,16 @@ void nfs_clear_request(struct nfs_page *req)
 {
 	struct page *page = req->wb_page;
 	struct nfs_open_context *ctx = req->wb_context;
+	struct nfs_lock_context *l_ctx = req->wb_lock_context;
 
 	if (page != NULL) {
 		page_cache_release(page);
 		req->wb_page = NULL;
 	}
+	if (l_ctx != NULL) {
+		nfs_put_lock_context(l_ctx);
+		req->wb_lock_context = NULL;
+	}
 	if (ctx != NULL) {
 		put_nfs_open_context(ctx);
 		req->wb_context = NULL;
@@ -235,7 +241,7 @@ static int nfs_can_coalesce_requests(struct nfs_page *prev,
 {
 	if (req->wb_context->cred != prev->wb_context->cred)
 		return 0;
-	if (req->wb_context->lockowner != prev->wb_context->lockowner)
+	if (req->wb_lock_context->lockowner != prev->wb_lock_context->lockowner)
 		return 0;
 	if (req->wb_context->state != prev->wb_context->state)
 		return 0;
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 5a33a92e816..87adc274424 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -190,6 +190,7 @@ static int nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
 	data->args.pages  = data->pagevec;
 	data->args.count  = count;
 	data->args.context = get_nfs_open_context(req->wb_context);
+	data->args.lock_context = req->wb_lock_context;
 
 	data->res.fattr   = &data->fattr;
 	data->res.count   = count;
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 03df22822c4..5eccea127ca 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -689,7 +689,9 @@ int nfs_flush_incompatible(struct file *file, struct page *page)
 		req = nfs_page_find_request(page);
 		if (req == NULL)
 			return 0;
-		do_flush = req->wb_page != page || req->wb_context != ctx;
+		do_flush = req->wb_page != page || req->wb_context != ctx ||
+			req->wb_lock_context->lockowner != current->files ||
+			req->wb_lock_context->pid != current->tgid;
 		nfs_release_request(req);
 		if (!do_flush)
 			return 0;
@@ -813,6 +815,7 @@ static int nfs_write_rpcsetup(struct nfs_page *req,
 	data->args.pages  = data->pagevec;
 	data->args.count  = count;
 	data->args.context = get_nfs_open_context(req->wb_context);
+	data->args.lock_context = req->wb_lock_context;
 	data->args.stable  = NFS_UNSTABLE;
 	if (how & FLUSH_STABLE) {
 		data->args.stable = NFS_DATA_SYNC;
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 77c2ae53431..a9d80261508 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -72,13 +72,20 @@ struct nfs_access_entry {
 	int			mask;
 };
 
+struct nfs_lock_context {
+	atomic_t count;
+	struct list_head list;
+	struct nfs_open_context *open_context;
+	fl_owner_t lockowner;
+	pid_t pid;
+};
+
 struct nfs4_state;
 struct nfs_open_context {
-	atomic_t count;
+	struct nfs_lock_context lock_context;
 	struct path path;
 	struct rpc_cred *cred;
 	struct nfs4_state *state;
-	fl_owner_t lockowner;
 	fmode_t mode;
 
 	unsigned long flags;
@@ -353,6 +360,8 @@ extern void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr);
 extern struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx);
 extern void put_nfs_open_context(struct nfs_open_context *ctx);
 extern struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_cred *cred, fmode_t mode);
+extern struct nfs_lock_context *nfs_get_lock_context(struct nfs_open_context *ctx);
+extern void nfs_put_lock_context(struct nfs_lock_context *l_ctx);
 extern u64 nfs_compat_user_ino64(u64 fileid);
 extern void nfs_fattr_init(struct nfs_fattr *fattr);
 
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index 3c60685d972..f8b60e7f4c4 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -39,6 +39,7 @@ struct nfs_page {
 	struct list_head	wb_list;	/* Defines state of page: */
 	struct page		*wb_page;	/* page to read in/write out */
 	struct nfs_open_context	*wb_context;	/* File state context info */
+	struct nfs_lock_context	*wb_lock_context;	/* lock context info */
 	atomic_t		wb_complete;	/* i/os we're waiting for */
 	pgoff_t			wb_index;	/* Offset >> PAGE_CACHE_SHIFT */
 	unsigned int		wb_offset,	/* Offset & ~PAGE_CACHE_MASK */
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index a319cb926ab..87202c7026e 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -334,6 +334,7 @@ struct nfs4_delegreturnres {
 struct nfs_readargs {
 	struct nfs_fh *		fh;
 	struct nfs_open_context *context;
+	struct nfs_lock_context *lock_context;
 	__u64			offset;
 	__u32			count;
 	unsigned int		pgbase;
@@ -354,6 +355,7 @@ struct nfs_readres {
 struct nfs_writeargs {
 	struct nfs_fh *		fh;
 	struct nfs_open_context *context;
+	struct nfs_lock_context *lock_context;
 	__u64			offset;
 	__u32			count;
 	enum nfs3_stable_how	stable;
-- 
cgit v1.2.3-70-g09d2


From d3c7b7ccc199ee564177ee914c04771d6bc00295 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 1 Jul 2010 12:49:01 -0400
Subject: NFSv4: Add support for the RELEASE_LOCKOWNER operation

This is needed by NFSv4.0 servers in order to keep the number of locking
stateids at a manageable level.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4_fs.h        |  1 +
 fs/nfs/nfs4proc.c       | 28 +++++++++++++++++++++++++
 fs/nfs/nfs4state.c      |  2 ++
 fs/nfs/nfs4xdr.c        | 55 +++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/nfs4.h    |  1 +
 include/linux/nfs_xdr.h |  4 ++++
 6 files changed, 91 insertions(+)

(limited to 'include')

diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index cee871471e8..deaf37f5a7a 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -236,6 +236,7 @@ extern int nfs4_open_revalidate(struct inode *, struct dentry *, int, struct nam
 extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle);
 extern int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name,
 		struct nfs4_fs_locations *fs_locations, struct page *page);
+extern void nfs4_release_lockowner(const struct nfs4_lock_state *);
 
 #if defined(CONFIG_NFS_V4_1)
 static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *server)
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index de9ff1505a2..5d3e8a2db99 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -4414,6 +4414,34 @@ out:
 	return err;
 }
 
+static void nfs4_release_lockowner_release(void *calldata)
+{
+	kfree(calldata);
+}
+
+const struct rpc_call_ops nfs4_release_lockowner_ops = {
+	.rpc_release = nfs4_release_lockowner_release,
+};
+
+void nfs4_release_lockowner(const struct nfs4_lock_state *lsp)
+{
+	struct nfs_server *server = lsp->ls_state->owner->so_server;
+	struct nfs_release_lockowner_args *args;
+	struct rpc_message msg = {
+		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RELEASE_LOCKOWNER],
+	};
+
+	if (server->nfs_client->cl_mvops->minor_version != 0)
+		return;
+	args = kmalloc(sizeof(*args), GFP_NOFS);
+	if (!args)
+		return;
+	args->lock_owner.clientid = server->nfs_client->cl_clientid;
+	args->lock_owner.id = lsp->ls_id.id;
+	msg.rpc_argp = args;
+	rpc_call_async(server->client, &msg, 0, &nfs4_release_lockowner_ops, args);
+}
+
 #define XATTR_NAME_NFSV4_ACL "system.nfs4_acl"
 
 int nfs4_setxattr(struct dentry *dentry, const char *key, const void *buf,
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 13e17e32e3e..13a4f27e727 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -701,6 +701,8 @@ void nfs4_put_lock_state(struct nfs4_lock_state *lsp)
 	if (list_empty(&state->lock_states))
 		clear_bit(LK_STATE_IN_USE, &state->flags);
 	spin_unlock(&state->state_lock);
+	if (lsp->ls_flags & NFS_LOCK_INITIALIZED)
+		nfs4_release_lockowner(lsp);
 	nfs4_free_lock_state(lsp);
 }
 
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 49df05afdc6..15185c2abd1 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -220,6 +220,11 @@ static int nfs4_stat_to_errno(int);
 				 4)
 #define decode_locku_maxsz	(op_decode_hdr_maxsz + \
 				 decode_stateid_maxsz)
+#define encode_release_lockowner_maxsz \
+				(op_encode_hdr_maxsz + \
+				 encode_lockowner_maxsz)
+#define decode_release_lockowner_maxsz \
+				(op_decode_hdr_maxsz)
 #define encode_access_maxsz	(op_encode_hdr_maxsz + 1)
 #define decode_access_maxsz	(op_decode_hdr_maxsz + 2)
 #define encode_symlink_maxsz	(op_encode_hdr_maxsz + \
@@ -474,6 +479,12 @@ static int nfs4_stat_to_errno(int);
 				decode_sequence_maxsz + \
 				decode_putfh_maxsz + \
 				decode_locku_maxsz)
+#define NFS4_enc_release_lockowner_sz \
+				(compound_encode_hdr_maxsz + \
+				 encode_lockowner_maxsz)
+#define NFS4_dec_release_lockowner_sz \
+				(compound_decode_hdr_maxsz + \
+				 decode_lockowner_maxsz)
 #define NFS4_enc_access_sz	(compound_encode_hdr_maxsz + \
 				encode_sequence_maxsz + \
 				encode_putfh_maxsz + \
@@ -1116,6 +1127,17 @@ static void encode_locku(struct xdr_stream *xdr, const struct nfs_locku_args *ar
 	hdr->replen += decode_locku_maxsz;
 }
 
+static void encode_release_lockowner(struct xdr_stream *xdr, const struct nfs_lowner *lowner, struct compound_hdr *hdr)
+{
+	__be32 *p;
+
+	p = reserve_space(xdr, 4);
+	*p = cpu_to_be32(OP_RELEASE_LOCKOWNER);
+	encode_lockowner(xdr, lowner);
+	hdr->nops++;
+	hdr->replen += decode_release_lockowner_maxsz;
+}
+
 static void encode_lookup(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr)
 {
 	int len = name->len;
@@ -2056,6 +2078,20 @@ static int nfs4_xdr_enc_locku(struct rpc_rqst *req, __be32 *p, struct nfs_locku_
 	return 0;
 }
 
+static int nfs4_xdr_enc_release_lockowner(struct rpc_rqst *req, __be32 *p, struct nfs_release_lockowner_args *args)
+{
+	struct xdr_stream xdr;
+	struct compound_hdr hdr = {
+		.minorversion = 0,
+	};
+
+	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+	encode_compound_hdr(&xdr, req, &hdr);
+	encode_release_lockowner(&xdr, &args->lock_owner, &hdr);
+	encode_nops(&hdr);
+	return 0;
+}
+
 /*
  * Encode a READLINK request
  */
@@ -3981,6 +4017,11 @@ static int decode_locku(struct xdr_stream *xdr, struct nfs_locku_res *res)
 	return status;
 }
 
+static int decode_release_lockowner(struct xdr_stream *xdr)
+{
+	return decode_op_hdr(xdr, OP_RELEASE_LOCKOWNER);
+}
+
 static int decode_lookup(struct xdr_stream *xdr)
 {
 	return decode_op_hdr(xdr, OP_LOOKUP);
@@ -5267,6 +5308,19 @@ out:
 	return status;
 }
 
+static int nfs4_xdr_dec_release_lockowner(struct rpc_rqst *rqstp, __be32 *p, void *dummy)
+{
+	struct xdr_stream xdr;
+	struct compound_hdr hdr;
+	int status;
+
+	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
+	status = decode_compound_hdr(&xdr, &hdr);
+	if (!status)
+		status = decode_release_lockowner(&xdr);
+	return status;
+}
+
 /*
  * Decode READLINK response
  */
@@ -5874,6 +5928,7 @@ struct rpc_procinfo	nfs4_procedures[] = {
   PROC(GETACL,		enc_getacl,	dec_getacl),
   PROC(SETACL,		enc_setacl,	dec_setacl),
   PROC(FS_LOCATIONS,	enc_fs_locations, dec_fs_locations),
+  PROC(RELEASE_LOCKOWNER, enc_release_lockowner, dec_release_lockowner),
 #if defined(CONFIG_NFS_V4_1)
   PROC(EXCHANGE_ID,	enc_exchange_id,	dec_exchange_id),
   PROC(CREATE_SESSION,	enc_create_session,	dec_create_session),
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index 9b8299af374..07e40c62597 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -523,6 +523,7 @@ enum {
 	NFSPROC4_CLNT_GETACL,
 	NFSPROC4_CLNT_SETACL,
 	NFSPROC4_CLNT_FS_LOCATIONS,
+	NFSPROC4_CLNT_RELEASE_LOCKOWNER,
 
 	/* nfs41 */
 	NFSPROC4_CLNT_EXCHANGE_ID,
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 87202c7026e..fc461926c41 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -315,6 +315,10 @@ struct nfs_lockt_res {
 	struct nfs4_sequence_res	seq_res;
 };
 
+struct nfs_release_lockowner_args {
+	struct nfs_lowner	lock_owner;
+};
+
 struct nfs4_delegreturnargs {
 	const struct nfs_fh *fhandle;
 	const nfs4_stateid *stateid;
-- 
cgit v1.2.3-70-g09d2


From 5716d415f8c5a17d44f6e1d5a1e4998f7306a93b Mon Sep 17 00:00:00 2001
From: Dominik Brodowski <linux@dominikbrodowski.net>
Date: Sun, 11 Jul 2010 09:51:14 +0200
Subject: pcmcia: remove obsolete ioctl

Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
---
 Documentation/feature-removal-schedule.txt |   23 -
 drivers/pcmcia/Makefile                    |    1 -
 drivers/pcmcia/cs_internal.h               |   40 +-
 drivers/pcmcia/ds.c                        |   26 +-
 drivers/pcmcia/pcmcia_ioctl.c              | 1077 ----------------------------
 drivers/pcmcia/rsrc_iodyn.c                |    2 -
 drivers/pcmcia/rsrc_mgr.c                  |    2 -
 drivers/pcmcia/rsrc_nonstatic.c            |    2 -
 include/pcmcia/ds.h                        |  209 ------
 include/pcmcia/ss.h                        |    8 +-
 10 files changed, 7 insertions(+), 1383 deletions(-)
 delete mode 100644 drivers/pcmcia/pcmcia_ioctl.c

(limited to 'include')

diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index c268783bc4e..27ed68d95ad 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -116,29 +116,6 @@ Who:	Mauro Carvalho Chehab <mchehab@infradead.org>
 
 ---------------------------
 
-What:	PCMCIA control ioctl (needed for pcmcia-cs [cardmgr, cardctl])
-When:	2.6.35/2.6.36
-Files:	drivers/pcmcia/: pcmcia_ioctl.c
-Why:	With the 16-bit PCMCIA subsystem now behaving (almost) like a
-	normal hotpluggable bus, and with it using the default kernel
-	infrastructure (hotplug, driver core, sysfs) keeping the PCMCIA
-	control ioctl needed by cardmgr and cardctl from pcmcia-cs is
-	unnecessary and potentially harmful (it does not provide for
-	proper locking), and makes further cleanups and integration of the
-	PCMCIA subsystem into the Linux kernel device driver model more
-	difficult. The features provided by cardmgr and cardctl are either
-	handled by the kernel itself now or are available in the new
-	pcmciautils package available at
-	http://kernel.org/pub/linux/utils/kernel/pcmcia/
-
-	For all architectures except ARM, the associated config symbol
-	has been removed from kernel 2.6.34; for ARM, it will be likely
-	be removed from kernel 2.6.35. The actual code will then likely
-	be removed from kernel 2.6.36.
-Who:	Dominik Brodowski <linux@dominikbrodowski.net>
-
----------------------------
-
 What:	sys_sysctl
 When:	September 2010
 Option: CONFIG_SYSCTL_SYSCALL
diff --git a/drivers/pcmcia/Makefile b/drivers/pcmcia/Makefile
index d006e8beab9..7a2b1604bf1 100644
--- a/drivers/pcmcia/Makefile
+++ b/drivers/pcmcia/Makefile
@@ -7,7 +7,6 @@ pcmcia_core-$(CONFIG_CARDBUS)			+= cardbus.o
 obj-$(CONFIG_PCCARD)				+= pcmcia_core.o
 
 pcmcia-y					+= ds.o pcmcia_resource.o cistpl.o pcmcia_cis.o
-pcmcia-$(CONFIG_PCMCIA_IOCTL)			+= pcmcia_ioctl.o
 obj-$(CONFIG_PCMCIA)				+= pcmcia.o
 
 pcmcia_rsrc-y					+= rsrc_mgr.o
diff --git a/drivers/pcmcia/cs_internal.h b/drivers/pcmcia/cs_internal.h
index 4126a75445e..a6cc63db8c8 100644
--- a/drivers/pcmcia/cs_internal.h
+++ b/drivers/pcmcia/cs_internal.h
@@ -60,14 +60,6 @@ struct pccard_resource_ops {
 	struct resource* (*find_mem)	(unsigned long base, unsigned long num,
 					 unsigned long align, int low,
 					 struct pcmcia_socket *s);
-	int	(*add_io)		(struct pcmcia_socket *s,
-					 unsigned int action,
-					 unsigned long r_start,
-					 unsigned long r_end);
-	int	(*add_mem)		(struct pcmcia_socket *s,
-					 unsigned int action,
-					 unsigned long r_start,
-					 unsigned long r_end);
 	int	(*init)			(struct pcmcia_socket *s);
 	void	(*exit)			(struct pcmcia_socket *s);
 };
@@ -146,6 +138,8 @@ void pcmcia_put_socket(struct pcmcia_socket *skt);
 /* ds.c */
 extern struct bus_type pcmcia_bus_type;
 
+struct pcmcia_device;
+
 /* pcmcia_resource.c */
 extern int pcmcia_release_configuration(struct pcmcia_device *p_dev);
 extern int pcmcia_validate_mem(struct pcmcia_socket *s);
@@ -188,34 +182,4 @@ int pccard_get_next_tuple(struct pcmcia_socket *s, unsigned int function,
 
 int pccard_get_tuple_data(struct pcmcia_socket *s, tuple_t *tuple);
 
-
-#ifdef CONFIG_PCMCIA_IOCTL
-/* ds.c */
-extern struct pcmcia_device *pcmcia_get_dev(struct pcmcia_device *p_dev);
-extern void pcmcia_put_dev(struct pcmcia_device *p_dev);
-
-struct pcmcia_device *pcmcia_device_add(struct pcmcia_socket *s,
-					unsigned int function);
-
-/* pcmcia_ioctl.c */
-extern void __init pcmcia_setup_ioctl(void);
-extern void __exit pcmcia_cleanup_ioctl(void);
-extern void handle_event(struct pcmcia_socket *s, event_t event);
-extern int handle_request(struct pcmcia_socket *s, event_t event);
-
-#else /* CONFIG_PCMCIA_IOCTL */
-
-static inline void __init pcmcia_setup_ioctl(void) { return; }
-static inline void __exit pcmcia_cleanup_ioctl(void) { return; }
-static inline void handle_event(struct pcmcia_socket *s, event_t event)
-{
-	return;
-}
-static inline int handle_request(struct pcmcia_socket *s, event_t event)
-{
-	return 0;
-}
-
-#endif /* CONFIG_PCMCIA_IOCTL */
-
 #endif /* _LINUX_CS_INTERNAL_H */
diff --git a/drivers/pcmcia/ds.c b/drivers/pcmcia/ds.c
index eac961463be..d2ec4584841 100644
--- a/drivers/pcmcia/ds.c
+++ b/drivers/pcmcia/ds.c
@@ -213,7 +213,7 @@ EXPORT_SYMBOL(pcmcia_unregister_driver);
 
 /* pcmcia_device handling */
 
-struct pcmcia_device *pcmcia_get_dev(struct pcmcia_device *p_dev)
+static struct pcmcia_device *pcmcia_get_dev(struct pcmcia_device *p_dev)
 {
 	struct device *tmp_dev;
 	tmp_dev = get_device(&p_dev->dev);
@@ -222,7 +222,7 @@ struct pcmcia_device *pcmcia_get_dev(struct pcmcia_device *p_dev)
 	return to_pcmcia_dev(tmp_dev);
 }
 
-void pcmcia_put_dev(struct pcmcia_device *p_dev)
+static void pcmcia_put_dev(struct pcmcia_device *p_dev)
 {
 	if (p_dev)
 		put_device(&p_dev->dev);
@@ -477,7 +477,8 @@ static int pcmcia_device_query(struct pcmcia_device *p_dev)
 }
 
 
-struct pcmcia_device *pcmcia_device_add(struct pcmcia_socket *s, unsigned int function)
+static struct pcmcia_device *pcmcia_device_add(struct pcmcia_socket *s,
+					       unsigned int function)
 {
 	struct pcmcia_device *p_dev, *tmp_dev;
 	int i;
@@ -885,14 +886,6 @@ static int pcmcia_bus_match(struct device *dev, struct device_driver *drv)
 	}
 	mutex_unlock(&p_drv->dynids.lock);
 
-#ifdef CONFIG_PCMCIA_IOCTL
-	/* matching by cardmgr */
-	if (p_dev->cardmgr == p_drv) {
-		dev_dbg(dev, "cardmgr matched to %s\n", drv->name);
-		return 1;
-	}
-#endif
-
 	while (did && did->match_flags) {
 		dev_dbg(dev, "trying to match to %s\n", drv->name);
 		if (pcmcia_devmatch(p_dev, did)) {
@@ -1245,7 +1238,6 @@ static int ds_event(struct pcmcia_socket *skt, event_t event, int priority)
 	case CS_EVENT_CARD_REMOVAL:
 		atomic_set(&skt->present, 0);
 		pcmcia_card_remove(skt, NULL);
-		handle_event(skt, event);
 		mutex_lock(&s->ops_mutex);
 		destroy_cis_cache(s);
 		pcmcia_cleanup_irq(s);
@@ -1259,7 +1251,6 @@ static int ds_event(struct pcmcia_socket *skt, event_t event, int priority)
 		destroy_cis_cache(s); /* to be on the safe side... */
 		mutex_unlock(&s->ops_mutex);
 		pcmcia_card_add(skt);
-		handle_event(skt, event);
 		break;
 
 	case CS_EVENT_EJECTION_REQUEST:
@@ -1280,14 +1271,12 @@ static int ds_event(struct pcmcia_socket *skt, event_t event, int priority)
 			ds_event(skt, CS_EVENT_CARD_INSERTION,
 				 CS_EVENT_PRI_LOW);
 		}
-		handle_event(skt, event);
 		break;
 
 	case CS_EVENT_PM_SUSPEND:
 	case CS_EVENT_RESET_PHYSICAL:
 	case CS_EVENT_CARD_RESET:
 	default:
-		handle_event(skt, event);
 		break;
     }
 
@@ -1350,9 +1339,6 @@ static int __devinit pcmcia_bus_add_socket(struct device *dev,
 		return ret;
 	}
 
-#ifdef CONFIG_PCMCIA_IOCTL
-	init_waitqueue_head(&socket->queue);
-#endif
 	INIT_LIST_HEAD(&socket->devices_list);
 	memset(&socket->pcmcia_state, 0, sizeof(u8));
 	socket->device_count = 0;
@@ -1429,8 +1415,6 @@ static int __init init_pcmcia_bus(void)
 		return ret;
 	}
 
-	pcmcia_setup_ioctl();
-
 	return 0;
 }
 fs_initcall(init_pcmcia_bus); /* one level after subsys_initcall so that
@@ -1439,8 +1423,6 @@ fs_initcall(init_pcmcia_bus); /* one level after subsys_initcall so that
 
 static void __exit exit_pcmcia_bus(void)
 {
-	pcmcia_cleanup_ioctl();
-
 	class_interface_unregister(&pcmcia_bus_interface);
 
 	bus_unregister(&pcmcia_bus_type);
diff --git a/drivers/pcmcia/pcmcia_ioctl.c b/drivers/pcmcia/pcmcia_ioctl.c
deleted file mode 100644
index d007a2a0383..00000000000
--- a/drivers/pcmcia/pcmcia_ioctl.c
+++ /dev/null
@@ -1,1077 +0,0 @@
-/*
- * pcmcia_ioctl.c -- ioctl interface for cardmgr and cardctl
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * The initial developer of the original code is David A. Hinds
- * <dahinds@users.sourceforge.net>.  Portions created by David A. Hinds
- * are Copyright (C) 1999 David A. Hinds.  All Rights Reserved.
- *
- * (C) 1999		David A. Hinds
- * (C) 2003 - 2004	Dominik Brodowski
- */
-
-/*
- * This file will go away soon.
- */
-
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/major.h>
-#include <linux/errno.h>
-#include <linux/ioctl.h>
-#include <linux/proc_fs.h>
-#include <linux/poll.h>
-#include <linux/pci.h>
-#include <linux/slab.h>
-#include <linux/seq_file.h>
-#include <linux/smp_lock.h>
-#include <linux/workqueue.h>
-
-#include <pcmcia/cs_types.h>
-#include <pcmcia/cs.h>
-#include <pcmcia/cistpl.h>
-#include <pcmcia/cisreg.h>
-#include <pcmcia/ds.h>
-#include <pcmcia/ss.h>
-
-#include "cs_internal.h"
-
-static int major_dev = -1;
-
-
-/* Device user information */
-#define MAX_EVENTS	32
-#define USER_MAGIC	0x7ea4
-#define CHECK_USER(u) \
-    (((u) == NULL) || ((u)->user_magic != USER_MAGIC))
-
-typedef struct user_info_t {
-	u_int			user_magic;
-	int			event_head, event_tail;
-	event_t			event[MAX_EVENTS];
-	struct user_info_t	*next;
-	struct pcmcia_socket	*socket;
-} user_info_t;
-
-
-static struct pcmcia_device *get_pcmcia_device(struct pcmcia_socket *s,
-						unsigned int function)
-{
-	struct pcmcia_device *p_dev = NULL;
-
-	mutex_lock(&s->ops_mutex);
-	list_for_each_entry(p_dev, &s->devices_list, socket_device_list) {
-		if (p_dev->func == function) {
-			mutex_unlock(&s->ops_mutex);
-			return pcmcia_get_dev(p_dev);
-		}
-	}
-	mutex_unlock(&s->ops_mutex);
-	return NULL;
-}
-
-/* backwards-compatible accessing of driver --- by name! */
-
-static struct pcmcia_driver *get_pcmcia_driver(dev_info_t *dev_info)
-{
-	struct device_driver *drv;
-	struct pcmcia_driver *p_drv;
-
-	drv = driver_find((char *) dev_info, &pcmcia_bus_type);
-	if (!drv)
-		return NULL;
-
-	p_drv = container_of(drv, struct pcmcia_driver, drv);
-
-	return p_drv;
-}
-
-
-#ifdef CONFIG_PROC_FS
-static struct proc_dir_entry *proc_pccard;
-
-static int proc_read_drivers_callback(struct device_driver *driver, void *_m)
-{
-	struct seq_file *m = _m;
-	struct pcmcia_driver *p_drv = container_of(driver,
-						   struct pcmcia_driver, drv);
-
-	seq_printf(m, "%-24.24s 1 %d\n", p_drv->drv.name,
-#ifdef CONFIG_MODULE_UNLOAD
-		      (p_drv->owner) ? module_refcount(p_drv->owner) : 1
-#else
-		      1
-#endif
-	);
-	return 0;
-}
-
-static int pccard_drivers_proc_show(struct seq_file *m, void *v)
-{
-	return bus_for_each_drv(&pcmcia_bus_type, NULL,
-				m, proc_read_drivers_callback);
-}
-
-static int pccard_drivers_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, pccard_drivers_proc_show, NULL);
-}
-
-static const struct file_operations pccard_drivers_proc_fops = {
-	.owner		= THIS_MODULE,
-	.open		= pccard_drivers_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-#endif
-
-
-#ifdef CONFIG_PCMCIA_PROBE
-
-static int adjust_irq(struct pcmcia_socket *s, adjust_t *adj)
-{
-	int irq;
-	u32 mask;
-
-	irq = adj->resource.irq.IRQ;
-	if ((irq < 0) || (irq > 15))
-		return -EINVAL;
-
-	if (adj->Action != REMOVE_MANAGED_RESOURCE)
-		return 0;
-
-	mask = 1 << irq;
-
-	if (!(s->irq_mask & mask))
-		return 0;
-
-	s->irq_mask &= ~mask;
-
-	return 0;
-}
-
-#else
-
-static inline int adjust_irq(struct pcmcia_socket *s, adjust_t *adj)
-{
-	return 0;
-}
-
-#endif
-
-static int pcmcia_adjust_resource_info(adjust_t *adj)
-{
-	struct pcmcia_socket *s;
-	int ret = -ENOSYS;
-
-	down_read(&pcmcia_socket_list_rwsem);
-	list_for_each_entry(s, &pcmcia_socket_list, socket_list) {
-
-		if (adj->Resource == RES_IRQ)
-			ret = adjust_irq(s, adj);
-
-		else if (s->resource_ops->add_io) {
-			unsigned long begin, end;
-
-			/* you can't use the old interface if the new
-			 * one was used before */
-			mutex_lock(&s->ops_mutex);
-			if ((s->resource_setup_new) &&
-			    !(s->resource_setup_old)) {
-				mutex_unlock(&s->ops_mutex);
-				continue;
-			} else if (!(s->resource_setup_old))
-				s->resource_setup_old = 1;
-
-			switch (adj->Resource) {
-			case RES_MEMORY_RANGE:
-				begin = adj->resource.memory.Base;
-				end = adj->resource.memory.Base + adj->resource.memory.Size - 1;
-				if (s->resource_ops->add_mem)
-					ret = s->resource_ops->add_mem(s, adj->Action, begin, end);
-			case RES_IO_RANGE:
-				begin = adj->resource.io.BasePort;
-				end = adj->resource.io.BasePort + adj->resource.io.NumPorts - 1;
-				if (s->resource_ops->add_io)
-					ret = s->resource_ops->add_io(s, adj->Action, begin, end);
-			}
-			if (!ret) {
-				/* as there's no way we know this is the
-				 * last call to adjust_resource_info, we
-				 * always need to assume this is the latest
-				 * one... */
-				s->resource_setup_done = 1;
-			}
-			mutex_unlock(&s->ops_mutex);
-		}
-	}
-	up_read(&pcmcia_socket_list_rwsem);
-
-	return ret;
-}
-
-
-/** pcmcia_get_window
- */
-static int pcmcia_get_window(struct pcmcia_socket *s, window_handle_t *wh_out,
-			window_handle_t wh, win_req_t *req)
-{
-	pccard_mem_map *win;
-	window_handle_t w;
-
-	wh--;
-	if (!s || !(s->state & SOCKET_PRESENT))
-		return -ENODEV;
-	if (wh >= MAX_WIN)
-		return -EINVAL;
-	for (w = wh; w < MAX_WIN; w++)
-		if (s->state & SOCKET_WIN_REQ(w))
-			break;
-	if (w == MAX_WIN)
-		return -EINVAL;
-	win = &s->win[w];
-	req->Base = win->res->start;
-	req->Size = win->res->end - win->res->start + 1;
-	req->AccessSpeed = win->speed;
-	req->Attributes = 0;
-	if (win->flags & MAP_ATTRIB)
-		req->Attributes |= WIN_MEMORY_TYPE_AM;
-	if (win->flags & MAP_ACTIVE)
-		req->Attributes |= WIN_ENABLE;
-	if (win->flags & MAP_16BIT)
-		req->Attributes |= WIN_DATA_WIDTH_16;
-	if (win->flags & MAP_USE_WAIT)
-		req->Attributes |= WIN_USE_WAIT;
-
-	*wh_out = w + 1;
-	return 0;
-} /* pcmcia_get_window */
-
-
-/** pcmcia_get_mem_page
- *
- * Change the card address of an already open memory window.
- */
-static int pcmcia_get_mem_page(struct pcmcia_socket *skt, window_handle_t wh,
-			memreq_t *req)
-{
-	wh--;
-	if (wh >= MAX_WIN)
-		return -EINVAL;
-
-	req->Page = 0;
-	req->CardOffset = skt->win[wh].card_start;
-	return 0;
-} /* pcmcia_get_mem_page */
-
-
-/** pccard_get_status
- *
- * Get the current socket state bits.  We don't support the latched
- * SocketState yet: I haven't seen any point for it.
- */
-
-static int pccard_get_status(struct pcmcia_socket *s,
-			     struct pcmcia_device *p_dev,
-			     cs_status_t *status)
-{
-	config_t *c;
-	int val;
-
-	s->ops->get_status(s, &val);
-	status->CardState = status->SocketState = 0;
-	status->CardState |= (val & SS_DETECT) ? CS_EVENT_CARD_DETECT : 0;
-	status->CardState |= (val & SS_CARDBUS) ? CS_EVENT_CB_DETECT : 0;
-	status->CardState |= (val & SS_3VCARD) ? CS_EVENT_3VCARD : 0;
-	status->CardState |= (val & SS_XVCARD) ? CS_EVENT_XVCARD : 0;
-	if (s->state & SOCKET_SUSPEND)
-		status->CardState |= CS_EVENT_PM_SUSPEND;
-	if (!(s->state & SOCKET_PRESENT))
-		return -ENODEV;
-
-	c = (p_dev) ? p_dev->function_config : NULL;
-
-	if ((c != NULL) && (c->state & CONFIG_LOCKED) &&
-	    (c->IntType & (INT_MEMORY_AND_IO | INT_ZOOMED_VIDEO))) {
-		u_char reg;
-		if (c->CardValues & PRESENT_PIN_REPLACE) {
-			mutex_lock(&s->ops_mutex);
-			pcmcia_read_cis_mem(s, 1, (c->ConfigBase+CISREG_PRR)>>1, 1, &reg);
-			mutex_unlock(&s->ops_mutex);
-			status->CardState |=
-				(reg & PRR_WP_STATUS) ? CS_EVENT_WRITE_PROTECT : 0;
-			status->CardState |=
-				(reg & PRR_READY_STATUS) ? CS_EVENT_READY_CHANGE : 0;
-			status->CardState |=
-				(reg & PRR_BVD2_STATUS) ? CS_EVENT_BATTERY_LOW : 0;
-			status->CardState |=
-				(reg & PRR_BVD1_STATUS) ? CS_EVENT_BATTERY_DEAD : 0;
-		} else {
-			/* No PRR?  Then assume we're always ready */
-			status->CardState |= CS_EVENT_READY_CHANGE;
-		}
-		if (c->CardValues & PRESENT_EXT_STATUS) {
-			mutex_lock(&s->ops_mutex);
-			pcmcia_read_cis_mem(s, 1, (c->ConfigBase+CISREG_ESR)>>1, 1, &reg);
-			mutex_unlock(&s->ops_mutex);
-			status->CardState |=
-				(reg & ESR_REQ_ATTN) ? CS_EVENT_REQUEST_ATTENTION : 0;
-		}
-		return 0;
-	}
-	status->CardState |=
-		(val & SS_WRPROT) ? CS_EVENT_WRITE_PROTECT : 0;
-	status->CardState |=
-		(val & SS_BATDEAD) ? CS_EVENT_BATTERY_DEAD : 0;
-	status->CardState |=
-		(val & SS_BATWARN) ? CS_EVENT_BATTERY_LOW : 0;
-	status->CardState |=
-		(val & SS_READY) ? CS_EVENT_READY_CHANGE : 0;
-	return 0;
-} /* pccard_get_status */
-
-static int pccard_get_configuration_info(struct pcmcia_socket *s,
-				  struct pcmcia_device *p_dev,
-				  config_info_t *config)
-{
-	config_t *c;
-
-	if (!(s->state & SOCKET_PRESENT))
-		return -ENODEV;
-
-
-#ifdef CONFIG_CARDBUS
-	if (s->state & SOCKET_CARDBUS) {
-		memset(config, 0, sizeof(config_info_t));
-		config->Vcc = s->socket.Vcc;
-		config->Vpp1 = config->Vpp2 = s->socket.Vpp;
-		config->Option = s->cb_dev->subordinate->number;
-		if (s->state & SOCKET_CARDBUS_CONFIG) {
-			config->Attributes = CONF_VALID_CLIENT;
-			config->IntType = INT_CARDBUS;
-			config->AssignedIRQ = s->pcmcia_irq;
-			if (config->AssignedIRQ)
-				config->Attributes |= CONF_ENABLE_IRQ;
-			if (s->io[0].res) {
-				config->BasePort1 = s->io[0].res->start;
-				config->NumPorts1 = s->io[0].res->end -
-					config->BasePort1 + 1;
-			}
-		}
-		return 0;
-	}
-#endif
-
-	if (p_dev) {
-		c = p_dev->function_config;
-		config->Function = p_dev->func;
-	} else {
-		c = NULL;
-		config->Function = 0;
-	}
-
-	if ((c == NULL) || !(c->state & CONFIG_LOCKED)) {
-		config->Attributes = 0;
-		config->Vcc = s->socket.Vcc;
-		config->Vpp1 = config->Vpp2 = s->socket.Vpp;
-		return 0;
-	}
-
-	config->Attributes = c->Attributes | CONF_VALID_CLIENT;
-	config->Vcc = s->socket.Vcc;
-	config->Vpp1 = config->Vpp2 = s->socket.Vpp;
-	config->IntType = c->IntType;
-	config->ConfigBase = c->ConfigBase;
-	config->Status = c->Status;
-	config->Pin = c->Pin;
-	config->Copy = c->Copy;
-	config->Option = c->Option;
-	config->ExtStatus = c->ExtStatus;
-	config->Present = config->CardValues = c->CardValues;
-	config->IRQAttributes = c->irq.Attributes;
-	config->AssignedIRQ = s->pcmcia_irq;
-	config->BasePort1 = c->io.BasePort1;
-	config->NumPorts1 = c->io.NumPorts1;
-	config->Attributes1 = c->io.Attributes1;
-	config->BasePort2 = c->io.BasePort2;
-	config->NumPorts2 = c->io.NumPorts2;
-	config->Attributes2 = c->io.Attributes2;
-	config->IOAddrLines = c->io.IOAddrLines;
-
-	return 0;
-} /* pccard_get_configuration_info */
-
-
-/*======================================================================
-
-    These manage a ring buffer of events pending for one user process
-
-======================================================================*/
-
-
-static int queue_empty(user_info_t *user)
-{
-    return (user->event_head == user->event_tail);
-}
-
-static event_t get_queued_event(user_info_t *user)
-{
-    user->event_tail = (user->event_tail+1) % MAX_EVENTS;
-    return user->event[user->event_tail];
-}
-
-static void queue_event(user_info_t *user, event_t event)
-{
-    user->event_head = (user->event_head+1) % MAX_EVENTS;
-    if (user->event_head == user->event_tail)
-	user->event_tail = (user->event_tail+1) % MAX_EVENTS;
-    user->event[user->event_head] = event;
-}
-
-void handle_event(struct pcmcia_socket *s, event_t event)
-{
-    user_info_t *user;
-    for (user = s->user; user; user = user->next)
-	queue_event(user, event);
-    wake_up_interruptible(&s->queue);
-}
-
-
-/*======================================================================
-
-    bind_request() and bind_device() are merged by now. Register_client()
-    is called right at the end of bind_request(), during the driver's
-    ->attach() call. Individual descriptions:
-
-    bind_request() connects a socket to a particular client driver.
-    It looks up the specified device ID in the list of registered
-    drivers, binds it to the socket, and tries to create an instance
-    of the device.  unbind_request() deletes a driver instance.
-
-    Bind_device() associates a device driver with a particular socket.
-    It is normally called by Driver Services after it has identified
-    a newly inserted card.  An instance of that driver will then be
-    eligible to register as a client of this socket.
-
-    Register_client() uses the dev_info_t handle to match the
-    caller with a socket.  The driver must have already been bound
-    to a socket with bind_device() -- in fact, bind_device()
-    allocates the client structure that will be used.
-
-======================================================================*/
-
-static int bind_request(struct pcmcia_socket *s, bind_info_t *bind_info)
-{
-	struct pcmcia_driver *p_drv;
-	struct pcmcia_device *p_dev;
-	int ret = 0;
-
-	s = pcmcia_get_socket(s);
-	if (!s)
-		return -EINVAL;
-
-	pr_debug("bind_request(%d, '%s')\n", s->sock,
-	       (char *)bind_info->dev_info);
-
-	p_drv = get_pcmcia_driver(&bind_info->dev_info);
-	if (!p_drv) {
-		ret = -EINVAL;
-		goto err_put;
-	}
-
-	if (!try_module_get(p_drv->owner)) {
-		ret = -EINVAL;
-		goto err_put_driver;
-	}
-
-	mutex_lock(&s->ops_mutex);
-	list_for_each_entry(p_dev, &s->devices_list, socket_device_list) {
-		if (p_dev->func == bind_info->function) {
-			if ((p_dev->dev.driver == &p_drv->drv)) {
-				if (p_dev->cardmgr) {
-					/* if there's already a device
-					 * registered, and it was registered
-					 * by userspace before, we need to
-					 * return the "instance". */
-					mutex_unlock(&s->ops_mutex);
-					bind_info->instance = p_dev;
-					ret = -EBUSY;
-					goto err_put_module;
-				} else {
-					/* the correct driver managed to bind
-					 * itself magically to the correct
-					 * device. */
-					mutex_unlock(&s->ops_mutex);
-					p_dev->cardmgr = p_drv;
-					ret = 0;
-					goto err_put_module;
-				}
-			} else if (!p_dev->dev.driver) {
-				/* there's already a device available where
-				 * no device has been bound to yet. So we don't
-				 * need to register a device! */
-				mutex_unlock(&s->ops_mutex);
-				goto rescan;
-			}
-		}
-	}
-	mutex_unlock(&s->ops_mutex);
-
-	p_dev = pcmcia_device_add(s, bind_info->function);
-	if (!p_dev) {
-		ret = -EIO;
-		goto err_put_module;
-	}
-
-rescan:
-	p_dev->cardmgr = p_drv;
-
-	/* if a driver is already running, we can abort */
-	if (p_dev->dev.driver)
-		goto err_put_module;
-
-	/*
-	 * Prevent this racing with a card insertion.
-	 */
-	mutex_lock(&s->skt_mutex);
-	ret = bus_rescan_devices(&pcmcia_bus_type);
-	mutex_unlock(&s->skt_mutex);
-	if (ret)
-		goto err_put_module;
-
-	/* check whether the driver indeed matched. I don't care if this
-	 * is racy or not, because it can only happen on cardmgr access
-	 * paths...
-	 */
-	if (!(p_dev->dev.driver == &p_drv->drv))
-		p_dev->cardmgr = NULL;
-
- err_put_module:
-	module_put(p_drv->owner);
- err_put_driver:
-	put_driver(&p_drv->drv);
- err_put:
-	pcmcia_put_socket(s);
-
-	return ret;
-} /* bind_request */
-
-#ifdef CONFIG_CARDBUS
-
-static struct pci_bus *pcmcia_lookup_bus(struct pcmcia_socket *s)
-{
-	if (!s || !(s->state & SOCKET_CARDBUS))
-		return NULL;
-
-	return s->cb_dev->subordinate;
-}
-#endif
-
-static int get_device_info(struct pcmcia_socket *s, bind_info_t *bind_info, int first)
-{
-	struct pcmcia_device *p_dev;
-	struct pcmcia_driver *p_drv;
-	int ret = 0;
-
-#ifdef CONFIG_CARDBUS
-	/*
-	 * Some unbelievably ugly code to associate the PCI cardbus
-	 * device and its driver with the PCMCIA "bind" information.
-	 */
-	{
-		struct pci_bus *bus;
-
-		bus = pcmcia_lookup_bus(s);
-		if (bus) {
-			struct list_head *list;
-			struct pci_dev *dev = NULL;
-
-			list = bus->devices.next;
-			while (list != &bus->devices) {
-				struct pci_dev *pdev = pci_dev_b(list);
-				list = list->next;
-
-				if (first) {
-					dev = pdev;
-					break;
-				}
-
-				/* Try to handle "next" here some way? */
-			}
-			if (dev && dev->driver) {
-				strlcpy(bind_info->name, dev->driver->name, DEV_NAME_LEN);
-				bind_info->major = 0;
-				bind_info->minor = 0;
-				bind_info->next = NULL;
-				return 0;
-			}
-		}
-	}
-#endif
-
-	mutex_lock(&s->ops_mutex);
-	list_for_each_entry(p_dev, &s->devices_list, socket_device_list) {
-		if (p_dev->func == bind_info->function) {
-			p_dev = pcmcia_get_dev(p_dev);
-			if (!p_dev)
-				continue;
-			goto found;
-		}
-	}
-	mutex_unlock(&s->ops_mutex);
-	return -ENODEV;
-
- found:
-	mutex_unlock(&s->ops_mutex);
-
-	p_drv = to_pcmcia_drv(p_dev->dev.driver);
-	if (p_drv && !p_dev->_locked) {
-		ret = -EAGAIN;
-		goto err_put;
-	}
-
-	if (!first) {
-		ret = -ENODEV;
-		goto err_put;
-	}
-
-	strlcpy(bind_info->name, dev_name(&p_dev->dev), DEV_NAME_LEN);
-	bind_info->next = NULL;
-
- err_put:
-	pcmcia_put_dev(p_dev);
-	return ret;
-} /* get_device_info */
-
-
-static int ds_open(struct inode *inode, struct file *file)
-{
-    socket_t i = iminor(inode);
-    struct pcmcia_socket *s;
-    user_info_t *user;
-    static int warning_printed;
-    int ret = 0;
-
-    pr_debug("ds_open(socket %d)\n", i);
-
-    lock_kernel();
-    s = pcmcia_get_socket_by_nr(i);
-    if (!s) {
-	    ret = -ENODEV;
-	    goto out;
-    }
-    s = pcmcia_get_socket(s);
-    if (!s) {
-	    ret = -ENODEV;
-	    goto out;
-    }
-
-    if ((file->f_flags & O_ACCMODE) != O_RDONLY) {
-	    if (s->pcmcia_state.busy) {
-		    pcmcia_put_socket(s);
-		    ret = -EBUSY;
-		    goto out;
-	    }
-	else
-	    s->pcmcia_state.busy = 1;
-    }
-
-    user = kmalloc(sizeof(user_info_t), GFP_KERNEL);
-    if (!user) {
-	    pcmcia_put_socket(s);
-	    ret = -ENOMEM;
-	    goto out;
-    }
-    user->event_tail = user->event_head = 0;
-    user->next = s->user;
-    user->user_magic = USER_MAGIC;
-    user->socket = s;
-    s->user = user;
-    file->private_data = user;
-
-    if (!warning_printed) {
-	    printk(KERN_INFO "pcmcia: Detected deprecated PCMCIA ioctl "
-			"usage from process: %s.\n", current->comm);
-	    printk(KERN_INFO "pcmcia: This interface will soon be removed from "
-			"the kernel; please expect breakage unless you upgrade "
-			"to new tools.\n");
-	    printk(KERN_INFO "pcmcia: see http://www.kernel.org/pub/linux/"
-			"utils/kernel/pcmcia/pcmcia.html for details.\n");
-	    warning_printed = 1;
-    }
-
-    if (atomic_read(&s->present))
-	queue_event(user, CS_EVENT_CARD_INSERTION);
-out:
-    unlock_kernel();
-    return ret;
-} /* ds_open */
-
-/*====================================================================*/
-
-static int ds_release(struct inode *inode, struct file *file)
-{
-    struct pcmcia_socket *s;
-    user_info_t *user, **link;
-
-    pr_debug("ds_release(socket %d)\n", iminor(inode));
-
-    user = file->private_data;
-    if (CHECK_USER(user))
-	goto out;
-
-    s = user->socket;
-
-    /* Unlink user data structure */
-    if ((file->f_flags & O_ACCMODE) != O_RDONLY)
-	s->pcmcia_state.busy = 0;
-
-    file->private_data = NULL;
-    for (link = &s->user; *link; link = &(*link)->next)
-	if (*link == user)
-		break;
-    if (link == NULL)
-	goto out;
-    *link = user->next;
-    user->user_magic = 0;
-    kfree(user);
-    pcmcia_put_socket(s);
-out:
-    return 0;
-} /* ds_release */
-
-/*====================================================================*/
-
-static ssize_t ds_read(struct file *file, char __user *buf,
-		       size_t count, loff_t *ppos)
-{
-    struct pcmcia_socket *s;
-    user_info_t *user;
-    int ret;
-
-    pr_debug("ds_read(socket %d)\n", iminor(file->f_path.dentry->d_inode));
-
-    if (count < 4)
-	return -EINVAL;
-
-    user = file->private_data;
-    if (CHECK_USER(user))
-	return -EIO;
-
-    s = user->socket;
-    ret = wait_event_interruptible(s->queue, !queue_empty(user));
-    if (ret == 0)
-	ret = put_user(get_queued_event(user), (int __user *)buf) ? -EFAULT : 4;
-
-    return ret;
-} /* ds_read */
-
-/*====================================================================*/
-
-static ssize_t ds_write(struct file *file, const char __user *buf,
-			size_t count, loff_t *ppos)
-{
-    pr_debug("ds_write(socket %d)\n", iminor(file->f_path.dentry->d_inode));
-
-    if (count != 4)
-	return -EINVAL;
-    if ((file->f_flags & O_ACCMODE) == O_RDONLY)
-	return -EBADF;
-
-    return -EIO;
-} /* ds_write */
-
-/*====================================================================*/
-
-/* No kernel lock - fine */
-static u_int ds_poll(struct file *file, poll_table *wait)
-{
-    struct pcmcia_socket *s;
-    user_info_t *user;
-
-    pr_debug("ds_poll(socket %d)\n", iminor(file->f_path.dentry->d_inode));
-
-    user = file->private_data;
-    if (CHECK_USER(user))
-	return POLLERR;
-    s = user->socket;
-    /*
-     * We don't check for a dead socket here since that
-     * will send cardmgr into an endless spin.
-     */
-    poll_wait(file, &s->queue, wait);
-    if (!queue_empty(user))
-	return POLLIN | POLLRDNORM;
-    return 0;
-} /* ds_poll */
-
-/*====================================================================*/
-
-static int ds_ioctl(struct file *file, u_int cmd, u_long arg)
-{
-    struct pcmcia_socket *s;
-    void __user *uarg = (char __user *)arg;
-    u_int size;
-    int ret, err;
-    ds_ioctl_arg_t *buf;
-    user_info_t *user;
-
-    pr_debug("ds_ioctl(socket %d, %#x, %#lx)\n", iminor(inode), cmd, arg);
-
-    user = file->private_data;
-    if (CHECK_USER(user))
-	return -EIO;
-
-    s = user->socket;
-
-    size = (cmd & IOCSIZE_MASK) >> IOCSIZE_SHIFT;
-    if (size > sizeof(ds_ioctl_arg_t))
-	return -EINVAL;
-
-    /* Permission check */
-    if (!(cmd & IOC_OUT) && !capable(CAP_SYS_ADMIN))
-	return -EPERM;
-
-    if (cmd & IOC_IN) {
-	if (!access_ok(VERIFY_READ, uarg, size)) {
-	    pr_debug("ds_ioctl(): verify_read = %d\n", -EFAULT);
-	    return -EFAULT;
-	}
-    }
-    if (cmd & IOC_OUT) {
-	if (!access_ok(VERIFY_WRITE, uarg, size)) {
-	    pr_debug("ds_ioctl(): verify_write = %d\n", -EFAULT);
-	    return -EFAULT;
-	}
-    }
-    buf = kmalloc(sizeof(ds_ioctl_arg_t), GFP_KERNEL);
-    if (!buf)
-	return -ENOMEM;
-
-    err = ret = 0;
-
-    if (cmd & IOC_IN) {
-	if (__copy_from_user((char *)buf, uarg, size)) {
-	    err = -EFAULT;
-	    goto free_out;
-	}
-    }
-
-    switch (cmd) {
-    case DS_ADJUST_RESOURCE_INFO:
-	ret = pcmcia_adjust_resource_info(&buf->adjust);
-	break;
-    case DS_GET_CONFIGURATION_INFO:
-	if (buf->config.Function &&
-	   (buf->config.Function >= s->functions))
-	    ret = -EINVAL;
-	else {
-	    struct pcmcia_device *p_dev = get_pcmcia_device(s, buf->config.Function);
-	    ret = pccard_get_configuration_info(s, p_dev, &buf->config);
-	    pcmcia_put_dev(p_dev);
-	}
-	break;
-    case DS_GET_FIRST_TUPLE:
-	mutex_lock(&s->skt_mutex);
-	pcmcia_validate_mem(s);
-	mutex_unlock(&s->skt_mutex);
-	ret = pccard_get_first_tuple(s, BIND_FN_ALL, &buf->tuple);
-	break;
-    case DS_GET_NEXT_TUPLE:
-	ret = pccard_get_next_tuple(s, BIND_FN_ALL, &buf->tuple);
-	break;
-    case DS_GET_TUPLE_DATA:
-	buf->tuple.TupleData = buf->tuple_parse.data;
-	buf->tuple.TupleDataMax = sizeof(buf->tuple_parse.data);
-	ret = pccard_get_tuple_data(s, &buf->tuple);
-	break;
-    case DS_PARSE_TUPLE:
-	buf->tuple.TupleData = buf->tuple_parse.data;
-	ret = pcmcia_parse_tuple(&buf->tuple, &buf->tuple_parse.parse);
-	break;
-    case DS_RESET_CARD:
-	ret = pcmcia_reset_card(s);
-	break;
-    case DS_GET_STATUS:
-	    if (buf->status.Function &&
-		(buf->status.Function >= s->functions))
-		    ret = -EINVAL;
-	    else {
-		    struct pcmcia_device *p_dev = get_pcmcia_device(s, buf->status.Function);
-		    ret = pccard_get_status(s, p_dev, &buf->status);
-		    pcmcia_put_dev(p_dev);
-	    }
-	    break;
-    case DS_VALIDATE_CIS:
-	mutex_lock(&s->skt_mutex);
-	pcmcia_validate_mem(s);
-	mutex_unlock(&s->skt_mutex);
-	ret = pccard_validate_cis(s, &buf->cisinfo.Chains);
-	break;
-    case DS_SUSPEND_CARD:
-	pcmcia_parse_uevents(s, PCMCIA_UEVENT_SUSPEND);
-	break;
-    case DS_RESUME_CARD:
-	pcmcia_parse_uevents(s, PCMCIA_UEVENT_RESUME);
-	break;
-    case DS_EJECT_CARD:
-	pcmcia_parse_uevents(s, PCMCIA_UEVENT_EJECT);
-	break;
-    case DS_INSERT_CARD:
-	pcmcia_parse_uevents(s, PCMCIA_UEVENT_INSERT);
-	break;
-    case DS_ACCESS_CONFIGURATION_REGISTER:
-	if ((buf->conf_reg.Action == CS_WRITE) && !capable(CAP_SYS_ADMIN)) {
-	    err = -EPERM;
-	    goto free_out;
-	}
-
-	ret = -EINVAL;
-
-	if (!(buf->conf_reg.Function &&
-	     (buf->conf_reg.Function >= s->functions))) {
-		struct pcmcia_device *p_dev = get_pcmcia_device(s, buf->conf_reg.Function);
-		if (p_dev) {
-			ret = pcmcia_access_configuration_register(p_dev, &buf->conf_reg);
-			pcmcia_put_dev(p_dev);
-		}
-	}
-	break;
-    case DS_GET_FIRST_REGION:
-    case DS_GET_NEXT_REGION:
-    case DS_BIND_MTD:
-	if (!capable(CAP_SYS_ADMIN)) {
-		err = -EPERM;
-		goto free_out;
-	} else {
-			printk_once(KERN_WARNING
-				"2.6. kernels use pcmciamtd instead of memory_cs.c and do not require special\n");
-			printk_once(KERN_WARNING "MTD handling any more.\n");
-	}
-	err = -EINVAL;
-	goto free_out;
-	break;
-    case DS_GET_FIRST_WINDOW:
-	ret = pcmcia_get_window(s, &buf->win_info.handle, 1,
-			&buf->win_info.window);
-	break;
-    case DS_GET_NEXT_WINDOW:
-	ret = pcmcia_get_window(s, &buf->win_info.handle,
-			buf->win_info.handle + 1, &buf->win_info.window);
-	break;
-    case DS_GET_MEM_PAGE:
-	ret = pcmcia_get_mem_page(s, buf->win_info.handle,
-			   &buf->win_info.map);
-	break;
-    case DS_REPLACE_CIS:
-	ret = pcmcia_replace_cis(s, buf->cisdump.Data, buf->cisdump.Length);
-	break;
-    case DS_BIND_REQUEST:
-	if (!capable(CAP_SYS_ADMIN)) {
-		err = -EPERM;
-		goto free_out;
-	}
-	err = bind_request(s, &buf->bind_info);
-	break;
-    case DS_GET_DEVICE_INFO:
-	err = get_device_info(s, &buf->bind_info, 1);
-	break;
-    case DS_GET_NEXT_DEVICE:
-	err = get_device_info(s, &buf->bind_info, 0);
-	break;
-    case DS_UNBIND_REQUEST:
-	err = 0;
-	break;
-    default:
-	err = -EINVAL;
-    }
-
-    if ((err == 0) && (ret != 0)) {
-	pr_debug("ds_ioctl: ret = %d\n", ret);
-	switch (ret) {
-	case -ENODEV:
-	case -EINVAL:
-	case -EBUSY:
-	case -ENOSYS:
-	    err = ret;
-	    break;
-	case -ENOMEM:
-	    err = -ENOSPC; break;
-	case -ENOSPC:
-	    err = -ENODATA; break;
-	default:
-	    err = -EIO; break;
-	}
-    }
-
-    if (cmd & IOC_OUT) {
-	if (__copy_to_user(uarg, (char *)buf, size))
-		err = -EFAULT;
-    }
-
-free_out:
-    kfree(buf);
-    return err;
-} /* ds_ioctl */
-
-static long ds_unlocked_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
-{
-	int ret;
-
-	lock_kernel();
-	ret = ds_ioctl(file, cmd, arg);
-	unlock_kernel();
-
-	return ret;
-}
-
-
-/*====================================================================*/
-
-static const struct file_operations ds_fops = {
-	.owner		= THIS_MODULE,
-	.open		= ds_open,
-	.release	= ds_release,
-	.unlocked_ioctl	= ds_unlocked_ioctl,
-	.read		= ds_read,
-	.write		= ds_write,
-	.poll		= ds_poll,
-};
-
-void __init pcmcia_setup_ioctl(void)
-{
-	int i;
-
-	/* Set up character device for user mode clients */
-	i = register_chrdev(0, "pcmcia", &ds_fops);
-	if (i < 0)
-		printk(KERN_NOTICE "unable to find a free device # for "
-		       "Driver Services (error=%d)\n", i);
-	else
-		major_dev = i;
-
-#ifdef CONFIG_PROC_FS
-	proc_pccard = proc_mkdir("bus/pccard", NULL);
-	if (proc_pccard)
-		proc_create("drivers", 0, proc_pccard, &pccard_drivers_proc_fops);
-#endif
-}
-
-
-void __exit pcmcia_cleanup_ioctl(void)
-{
-#ifdef CONFIG_PROC_FS
-	if (proc_pccard) {
-		remove_proc_entry("drivers", proc_pccard);
-		remove_proc_entry("bus/pccard", NULL);
-	}
-#endif
-	if (major_dev != -1)
-		unregister_chrdev(major_dev, "pcmcia");
-}
diff --git a/drivers/pcmcia/rsrc_iodyn.c b/drivers/pcmcia/rsrc_iodyn.c
index d0bf3502106..6ed7bf171ca 100644
--- a/drivers/pcmcia/rsrc_iodyn.c
+++ b/drivers/pcmcia/rsrc_iodyn.c
@@ -164,8 +164,6 @@ struct pccard_resource_ops pccard_iodyn_ops = {
 	.validate_mem = NULL,
 	.find_io = iodyn_find_io,
 	.find_mem = NULL,
-	.add_io = NULL,
-	.add_mem = NULL,
 	.init = static_init,
 	.exit = NULL,
 };
diff --git a/drivers/pcmcia/rsrc_mgr.c b/drivers/pcmcia/rsrc_mgr.c
index 142efac3c38..b12ecf7c32b 100644
--- a/drivers/pcmcia/rsrc_mgr.c
+++ b/drivers/pcmcia/rsrc_mgr.c
@@ -62,8 +62,6 @@ struct pccard_resource_ops pccard_static_ops = {
 	.validate_mem = NULL,
 	.find_io = static_find_io,
 	.find_mem = NULL,
-	.add_io = NULL,
-	.add_mem = NULL,
 	.init = static_init,
 	.exit = NULL,
 };
diff --git a/drivers/pcmcia/rsrc_nonstatic.c b/drivers/pcmcia/rsrc_nonstatic.c
index dcd1a4ad3d6..d217dc1d426 100644
--- a/drivers/pcmcia/rsrc_nonstatic.c
+++ b/drivers/pcmcia/rsrc_nonstatic.c
@@ -1055,8 +1055,6 @@ struct pccard_resource_ops pccard_nonstatic_ops = {
 	.validate_mem = pcmcia_nonstatic_validate_mem,
 	.find_io = nonstatic_find_io,
 	.find_mem = nonstatic_find_mem_region,
-	.add_io = adjust_io,
-	.add_mem = adjust_memory,
 	.init = nonstatic_init,
 	.exit = nonstatic_release_resource_db,
 };
diff --git a/include/pcmcia/ds.h b/include/pcmcia/ds.h
index c180165fbd3..7d7721e8603 100644
--- a/include/pcmcia/ds.h
+++ b/include/pcmcia/ds.h
@@ -117,11 +117,6 @@ struct pcmcia_device {
 	u64			dma_mask;
 	struct device		dev;
 
-#ifdef CONFIG_PCMCIA_IOCTL
-	/* device driver wanted by cardmgr */
-	struct pcmcia_driver	*cardmgr;
-#endif
-
 	/* data private to drivers */
 	void			*priv;
 };
@@ -211,208 +206,4 @@ void pcmcia_disable_device(struct pcmcia_device *p_dev);
 
 #endif /* __KERNEL__ */
 
-
-
-/* Below, there are only definitions which are used by
- * - the PCMCIA ioctl
- * - deprecated PCMCIA userspace tools only
- *
- * here be dragons ... here be dragons ... here be dragons ... here be drag
- */
-
-#if defined(CONFIG_PCMCIA_IOCTL) || !defined(__KERNEL__)
-
-#if defined(__arm__) || defined(__mips__) || defined(__avr32__) || \
-	defined(__bfin__)
-/* This (ioaddr_t) is exposed to userspace & hence cannot be changed. */
-typedef u_int   ioaddr_t;
-#else
-typedef u_short	ioaddr_t;
-#endif
-
-/* for AdjustResourceInfo */
-typedef struct adjust_t {
-	u_int			Action;
-	u_int			Resource;
-	u_int			Attributes;
-	union {
-		struct memory {
-			u_long		Base;
-			u_long		Size;
-		} memory;
-		struct io {
-			ioaddr_t	BasePort;
-			ioaddr_t	NumPorts;
-			u_int		IOAddrLines;
-		} io;
-		struct irq {
-			u_int		IRQ;
-		} irq;
-	} resource;
-} adjust_t;
-
-/* Action field */
-#define REMOVE_MANAGED_RESOURCE		1
-#define ADD_MANAGED_RESOURCE		2
-#define GET_FIRST_MANAGED_RESOURCE	3
-#define GET_NEXT_MANAGED_RESOURCE	4
-/* Resource field */
-#define RES_MEMORY_RANGE		1
-#define RES_IO_RANGE			2
-#define RES_IRQ				3
-/* Attribute field */
-#define RES_IRQ_TYPE			0x03
-#define RES_IRQ_TYPE_EXCLUSIVE		0
-#define RES_IRQ_TYPE_TIME		1
-#define RES_IRQ_TYPE_DYNAMIC		2
-#define RES_IRQ_CSC			0x04
-#define RES_SHARED			0x08
-#define RES_RESERVED			0x10
-#define RES_ALLOCATED			0x20
-#define RES_REMOVED			0x40
-
-
-typedef struct tuple_parse_t {
-	tuple_t			tuple;
-	cisdata_t		data[255];
-	cisparse_t		parse;
-} tuple_parse_t;
-
-typedef struct win_info_t {
-	window_handle_t		handle;
-	win_req_t		window;
-	memreq_t		map;
-} win_info_t;
-
-typedef struct bind_info_t {
-	dev_info_t		dev_info;
-	u_char			function;
-	struct pcmcia_device	*instance;
-	char			name[DEV_NAME_LEN];
-	u_short			major, minor;
-	void			*next;
-} bind_info_t;
-
-typedef struct mtd_info_t {
-	dev_info_t     		dev_info;
-	u_int			Attributes;
-	u_int			CardOffset;
-} mtd_info_t;
-
-typedef struct region_info_t {
-	u_int			Attributes;
-	u_int			CardOffset;
-	u_int			RegionSize;
-	u_int			AccessSpeed;
-	u_int			BlockSize;
-	u_int			PartMultiple;
-	u_char			JedecMfr, JedecInfo;
-	memory_handle_t		next;
-} region_info_t;
-
-#define REGION_TYPE		0x0001
-#define REGION_TYPE_CM		0x0000
-#define REGION_TYPE_AM		0x0001
-#define REGION_PREFETCH		0x0008
-#define REGION_CACHEABLE	0x0010
-#define REGION_BAR_MASK		0xe000
-#define REGION_BAR_SHIFT	13
-
-/* For ReplaceCIS */
-typedef struct cisdump_t {
-	u_int			Length;
-	cisdata_t		Data[CISTPL_MAX_CIS_SIZE];
-} cisdump_t;
-
-/* for GetConfigurationInfo */
-typedef struct config_info_t {
-	u_char			Function;
-	u_int			Attributes;
-	u_int			Vcc, Vpp1, Vpp2;
-	u_int			IntType;
-	u_int			ConfigBase;
-	u_char			Status, Pin, Copy, Option, ExtStatus;
-	u_int			Present;
-	u_int			CardValues;
-	u_int			AssignedIRQ;
-	u_int			IRQAttributes;
-	ioaddr_t		BasePort1;
-	ioaddr_t		NumPorts1;
-	u_int			Attributes1;
-	ioaddr_t		BasePort2;
-	ioaddr_t		NumPorts2;
-	u_int			Attributes2;
-	u_int			IOAddrLines;
-} config_info_t;
-
-/* For ValidateCIS */
-typedef struct cisinfo_t {
-	u_int			Chains;
-} cisinfo_t;
-
-typedef struct cs_status_t {
-	u_char			Function;
-	event_t 		CardState;
-	event_t			SocketState;
-} cs_status_t;
-
-typedef union ds_ioctl_arg_t {
-	adjust_t		adjust;
-	config_info_t		config;
-	tuple_t			tuple;
-	tuple_parse_t		tuple_parse;
-	client_req_t		client_req;
-	cs_status_t		status;
-	conf_reg_t		conf_reg;
-	cisinfo_t		cisinfo;
-	region_info_t		region;
-	bind_info_t		bind_info;
-	mtd_info_t		mtd_info;
-	win_info_t		win_info;
-	cisdump_t		cisdump;
-} ds_ioctl_arg_t;
-
-#define DS_ADJUST_RESOURCE_INFO			_IOWR('d',  2, adjust_t)
-#define DS_GET_CONFIGURATION_INFO		_IOWR('d',  3, config_info_t)
-#define DS_GET_FIRST_TUPLE			_IOWR('d',  4, tuple_t)
-#define DS_GET_NEXT_TUPLE			_IOWR('d',  5, tuple_t)
-#define DS_GET_TUPLE_DATA			_IOWR('d',  6, tuple_parse_t)
-#define DS_PARSE_TUPLE				_IOWR('d',  7, tuple_parse_t)
-#define DS_RESET_CARD				_IO  ('d',  8)
-#define DS_GET_STATUS				_IOWR('d',  9, cs_status_t)
-#define DS_ACCESS_CONFIGURATION_REGISTER	_IOWR('d', 10, conf_reg_t)
-#define DS_VALIDATE_CIS				_IOR ('d', 11, cisinfo_t)
-#define DS_SUSPEND_CARD				_IO  ('d', 12)
-#define DS_RESUME_CARD				_IO  ('d', 13)
-#define DS_EJECT_CARD				_IO  ('d', 14)
-#define DS_INSERT_CARD				_IO  ('d', 15)
-#define DS_GET_FIRST_REGION			_IOWR('d', 16, region_info_t)
-#define DS_GET_NEXT_REGION			_IOWR('d', 17, region_info_t)
-#define DS_REPLACE_CIS				_IOWR('d', 18, cisdump_t)
-#define DS_GET_FIRST_WINDOW			_IOR ('d', 19, win_info_t)
-#define DS_GET_NEXT_WINDOW			_IOWR('d', 20, win_info_t)
-#define DS_GET_MEM_PAGE				_IOWR('d', 21, win_info_t)
-
-#define DS_BIND_REQUEST				_IOWR('d', 60, bind_info_t)
-#define DS_GET_DEVICE_INFO			_IOWR('d', 61, bind_info_t)
-#define DS_GET_NEXT_DEVICE			_IOWR('d', 62, bind_info_t)
-#define DS_UNBIND_REQUEST			_IOW ('d', 63, bind_info_t)
-#define DS_BIND_MTD				_IOWR('d', 64, mtd_info_t)
-
-
-/* used in userspace only */
-#define CS_IN_USE			0x1e
-
-#define INFO_MASTER_CLIENT	0x01
-#define INFO_IO_CLIENT		0x02
-#define INFO_MTD_CLIENT		0x04
-#define INFO_MEM_CLIENT		0x08
-#define MAX_NUM_CLIENTS		3
-
-#define INFO_CARD_SHARE		0x10
-#define INFO_CARD_EXCL		0x20
-
-
-#endif /* !defined(__KERNEL__) || defined(CONFIG_PCMCIA_IOCTL) */
-
 #endif /* _LINUX_DS_H */
diff --git a/include/pcmcia/ss.h b/include/pcmcia/ss.h
index 764281b2921..66740b764da 100644
--- a/include/pcmcia/ss.h
+++ b/include/pcmcia/ss.h
@@ -220,12 +220,10 @@ struct pcmcia_socket {
 
 	/* 16-bit state: */
 	struct {
-		/* "master" ioctl is used */
-		u8			busy:1;
 		/* the PCMCIA card consists of two pseudo devices */
 		u8			has_pfc:1;
 
-		u8			reserved:6;
+		u8			reserved:7;
 	} pcmcia_state;
 
 	/* non-zero if PCMCIA card is present */
@@ -234,10 +232,6 @@ struct pcmcia_socket {
 	/* IRQ to be used by PCMCIA devices. May not be IRQ 0. */
 	unsigned int			pcmcia_irq;
 
-#ifdef CONFIG_PCMCIA_IOCTL
-	struct user_info_t		*user;
-	wait_queue_head_t		queue;
-#endif /* CONFIG_PCMCIA_IOCTL */
 #endif /* CONFIG_PCMCIA */
 
 	/* socket device */
-- 
cgit v1.2.3-70-g09d2


From 134716f19bc53dc22e8aba34f2af195b805328b5 Mon Sep 17 00:00:00 2001
From: Dominik Brodowski <linux@dominikbrodowski.net>
Date: Sun, 11 Jul 2010 10:45:02 +0200
Subject: pcmcia: remove obsolete CS_EVENT_ definitions

Remove some definitions which became obsolete when the central
event handler got removed.

Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
---
 include/pcmcia/cs.h | 45 ---------------------------------------------
 1 file changed, 45 deletions(-)

(limited to 'include')

diff --git a/include/pcmcia/cs.h b/include/pcmcia/cs.h
index 57d8d039356..c943c967ac7 100644
--- a/include/pcmcia/cs.h
+++ b/include/pcmcia/cs.h
@@ -36,12 +36,6 @@ typedef struct conf_reg_t {
 #define REMOVE_MANAGED_RESOURCE		1
 #define ADD_MANAGED_RESOURCE		2
 
-
-typedef struct event_callback_args_t {
-	struct pcmcia_device	*client_handle;
-	void			*client_data;
-} event_callback_args_t;
-
 /* For CardValues field */
 #define CV_OPTION_VALUE		0x01
 #define CV_STATUS_VALUE		0x02
@@ -124,13 +118,6 @@ typedef struct io_req_t {
 #define IRQ_PULSE_ID		0x40
 #define IRQ_SHARE_ID		0x80
 
-typedef struct eventmask_t {
-    u_int	Attributes;
-    u_int	EventMask;
-} eventmask_t;
-
-#define CONF_EVENT_MASK_VALID	0x01
-
 /* Configuration registers present */
 #define PRESENT_OPTION		0x001
 #define PRESENT_STATUS		0x002
@@ -186,39 +173,7 @@ typedef struct win_req_t {
 #define WIN_BAR_MASK		0xe000
 #define WIN_BAR_SHIFT		13
 
-typedef struct error_info_t {
-    int		func;
-    int		retcode;
-} error_info_t;
-
 /* Flag to bind to all functions */
 #define BIND_FN_ALL	0xff
 
-/* Events */
-#define CS_EVENT_PRI_LOW		0
-#define CS_EVENT_PRI_HIGH		1
-
-#define CS_EVENT_WRITE_PROTECT		0x000001
-#define CS_EVENT_CARD_LOCK		0x000002
-#define CS_EVENT_CARD_INSERTION		0x000004
-#define CS_EVENT_CARD_REMOVAL		0x000008
-#define CS_EVENT_BATTERY_DEAD		0x000010
-#define CS_EVENT_BATTERY_LOW		0x000020
-#define CS_EVENT_READY_CHANGE		0x000040
-#define CS_EVENT_CARD_DETECT		0x000080
-#define CS_EVENT_RESET_REQUEST		0x000100
-#define CS_EVENT_RESET_PHYSICAL		0x000200
-#define CS_EVENT_CARD_RESET		0x000400
-#define CS_EVENT_REGISTRATION_COMPLETE	0x000800
-#define CS_EVENT_PM_SUSPEND		0x002000
-#define CS_EVENT_PM_RESUME		0x004000
-#define CS_EVENT_INSERTION_REQUEST	0x008000
-#define CS_EVENT_EJECTION_REQUEST	0x010000
-#define CS_EVENT_MTD_REQUEST		0x020000
-#define CS_EVENT_ERASE_COMPLETE		0x040000
-#define CS_EVENT_REQUEST_ATTENTION	0x080000
-#define CS_EVENT_CB_DETECT		0x100000
-#define CS_EVENT_3VCARD			0x200000
-#define CS_EVENT_XVCARD			0x400000
-
 #endif /* _LINUX_CS_H */
-- 
cgit v1.2.3-70-g09d2


From ce3f9d71bd9c4268698109ad425625a2a8f51e22 Mon Sep 17 00:00:00 2001
From: Dominik Brodowski <linux@dominikbrodowski.net>
Date: Wed, 21 Jul 2010 14:43:05 +0200
Subject: pcmcia: remove unused flag, simplify headers

As we only provide one way to set up resources now, we can remove
the resource-setup-related bitfield (except resource_setup_done).
In addition, pcmcia_state only consisted of one entry, so remove
this bitfield as well.

Suggested-by: Komuro <komurojun-mbn@nifty.com>
Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
---
 drivers/pcmcia/ds.c             | 14 +++++++-------
 drivers/pcmcia/rsrc_nonstatic.c |  4 ----
 include/pcmcia/ss.h             | 18 +++---------------
 3 files changed, 10 insertions(+), 26 deletions(-)

(limited to 'include')

diff --git a/drivers/pcmcia/ds.c b/drivers/pcmcia/ds.c
index 78b5b65f9f7..08617719d3a 100644
--- a/drivers/pcmcia/ds.c
+++ b/drivers/pcmcia/ds.c
@@ -294,7 +294,7 @@ static int pcmcia_device_probe(struct device *dev)
 	}
 
 	mutex_lock(&s->ops_mutex);
-	if ((s->pcmcia_state.has_pfc) &&
+	if ((s->pcmcia_pfc) &&
 	    (p_dev->socket->device_count == 1) && (p_dev->device_no == 0))
 		pcmcia_parse_uevents(s, PCMCIA_UEVENT_REQUERY);
 	mutex_unlock(&s->ops_mutex);
@@ -359,7 +359,7 @@ static int pcmcia_device_remove(struct device *dev)
 	 * pseudo multi-function card, we need to unbind
 	 * all devices
 	 */
-	if ((p_dev->socket->pcmcia_state.has_pfc) &&
+	if ((p_dev->socket->pcmcia_pfc) &&
 	    (p_dev->socket->device_count > 0) &&
 	    (p_dev->device_no == 0))
 		pcmcia_card_remove(p_dev->socket, p_dev);
@@ -681,7 +681,7 @@ static void pcmcia_requery(struct pcmcia_socket *s)
 	 * call pcmcia_device_add() -- which will fail if both
 	 * devices are already registered. */
 	mutex_lock(&s->ops_mutex);
-	has_pfc = s->pcmcia_state.has_pfc;
+	has_pfc = s->pcmcia_pfc;
 	mutex_unlock(&s->ops_mutex);
 	if (has_pfc)
 		pcmcia_device_add(s, 0);
@@ -813,7 +813,7 @@ static inline int pcmcia_devmatch(struct pcmcia_device *dev,
 	if (did->match_flags & PCMCIA_DEV_ID_MATCH_DEVICE_NO) {
 		dev_dbg(&dev->dev, "this is a pseudo-multi-function device\n");
 		mutex_lock(&dev->socket->ops_mutex);
-		dev->socket->pcmcia_state.has_pfc = 1;
+		dev->socket->pcmcia_pfc = 1;
 		mutex_unlock(&dev->socket->ops_mutex);
 		if (dev->device_no != did->device_no)
 			return 0;
@@ -827,7 +827,7 @@ static inline int pcmcia_devmatch(struct pcmcia_device *dev,
 
 		/* if this is a pseudo-multi-function device,
 		 * we need explicit matches */
-		if (dev->socket->pcmcia_state.has_pfc)
+		if (dev->socket->pcmcia_pfc)
 			return 0;
 		if (dev->device_no)
 			return 0;
@@ -1226,7 +1226,7 @@ static int pcmcia_bus_add(struct pcmcia_socket *skt)
 	atomic_set(&skt->present, 1);
 
 	mutex_lock(&skt->ops_mutex);
-	skt->pcmcia_state.has_pfc = 0;
+	skt->pcmcia_pfc = 0;
 	destroy_cis_cache(skt); /* to be on the safe side... */
 	mutex_unlock(&skt->ops_mutex);
 
@@ -1317,7 +1317,7 @@ static int __devinit pcmcia_bus_add_socket(struct device *dev,
 	}
 
 	INIT_LIST_HEAD(&socket->devices_list);
-	memset(&socket->pcmcia_state, 0, sizeof(u8));
+	socket->pcmcia_pfc = 0;
 	socket->device_count = 0;
 	atomic_set(&socket->present, 0);
 
diff --git a/drivers/pcmcia/rsrc_nonstatic.c b/drivers/pcmcia/rsrc_nonstatic.c
index d217dc1d426..13245a2986f 100644
--- a/drivers/pcmcia/rsrc_nonstatic.c
+++ b/drivers/pcmcia/rsrc_nonstatic.c
@@ -1113,8 +1113,6 @@ static ssize_t store_io_db(struct device *dev,
 
 	mutex_lock(&s->ops_mutex);
 	ret = adjust_io(s, add, start_addr, end_addr);
-	if (!ret)
-		s->resource_setup_new = 1;
 	mutex_unlock(&s->ops_mutex);
 
 	return ret ? ret : count;
@@ -1181,8 +1179,6 @@ static ssize_t store_mem_db(struct device *dev,
 
 	mutex_lock(&s->ops_mutex);
 	ret = adjust_memory(s, add, start_addr, end_addr);
-	if (!ret)
-		s->resource_setup_new = 1;
 	mutex_unlock(&s->ops_mutex);
 
 	return ret ? ret : count;
diff --git a/include/pcmcia/ss.h b/include/pcmcia/ss.h
index 66740b764da..aeac2710983 100644
--- a/include/pcmcia/ss.h
+++ b/include/pcmcia/ss.h
@@ -162,17 +162,10 @@ struct pcmcia_socket {
 	u_int				pci_irq;
 	struct pci_dev			*cb_dev;
 
-
 	/* socket setup is done so resources should be able to be allocated.
 	 * Only if set to 1, calls to find_{io,mem}_region are handled, and
 	 * insertio events are actually managed by the PCMCIA layer.*/
-	u8				resource_setup_done:1;
-
-	/* It's old if resource setup is done using adjust_resource_info() */
-	u8				resource_setup_old:1;
-	u8				resource_setup_new:1;
-
-	u8				reserved:5;
+	u8				resource_setup_done;
 
 	/* socket operations */
 	struct pccard_operations	*ops;
@@ -218,13 +211,8 @@ struct pcmcia_socket {
 	 * incorrectness and change */
 	u8				device_count;
 
-	/* 16-bit state: */
-	struct {
-		/* the PCMCIA card consists of two pseudo devices */
-		u8			has_pfc:1;
-
-		u8			reserved:7;
-	} pcmcia_state;
+	/* does the PCMCIA card consist of two pseudo devices? */
+	u8				pcmcia_pfc;
 
 	/* non-zero if PCMCIA card is present */
 	atomic_t			present;
-- 
cgit v1.2.3-70-g09d2


From ac8b422838046ffc26be4874a3cbae0d313f4209 Mon Sep 17 00:00:00 2001
From: Dominik Brodowski <linux@dominikbrodowski.net>
Date: Wed, 21 Jul 2010 22:38:13 +0200
Subject: pcmcia: remove cs_types.h

Remove cs_types.h which is no longer needed: Most definitions aren't
used at all, a few can be made away with, and two remaining definitions
(typedefs, unfortunatley) may be moved to more specific places.

CC: linux-ide@vger.kernel.org
CC: linux-usb@vger.kernel.org
CC: laforge@gnumonks.org
CC: linux-mtd@lists.infradead.org
CC: alsa-devel@alsa-project.org
CC: linux-serial@vger.kernel.org
Acked-by: Marcel Holtmann <marcel@holtmann.org> (for drivers/bluetooth/)
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
---
 Documentation/pcmcia/driver-changes.txt          |  5 +++
 drivers/ata/pata_pcmcia.c                        |  1 -
 drivers/bluetooth/bluecard_cs.c                  |  1 -
 drivers/bluetooth/bt3c_cs.c                      |  1 -
 drivers/bluetooth/btuart_cs.c                    |  1 -
 drivers/bluetooth/dtl1_cs.c                      |  1 -
 drivers/char/pcmcia/cm4000_cs.c                  |  1 -
 drivers/char/pcmcia/cm4040_cs.c                  |  1 -
 drivers/char/pcmcia/ipwireless/main.h            |  1 -
 drivers/char/pcmcia/ipwireless/tty.h             |  1 -
 drivers/char/pcmcia/synclink_cs.c                |  1 -
 drivers/ide/ide-cs.c                             |  1 -
 drivers/isdn/hardware/avm/avm_cs.c               |  1 -
 drivers/isdn/hisax/avma1_cs.c                    |  1 -
 drivers/isdn/hisax/elsa_cs.c                     |  1 -
 drivers/isdn/hisax/sedlbauer_cs.c                |  1 -
 drivers/isdn/hisax/teles_cs.c                    |  1 -
 drivers/mmc/host/sdricoh_cs.c                    |  1 -
 drivers/mtd/maps/pcmciamtd.c                     |  1 -
 drivers/net/pcmcia/3c574_cs.c                    |  1 -
 drivers/net/pcmcia/3c589_cs.c                    |  1 -
 drivers/net/pcmcia/axnet_cs.c                    |  1 -
 drivers/net/pcmcia/com20020_cs.c                 |  1 -
 drivers/net/pcmcia/fmvj18x_cs.c                  |  1 -
 drivers/net/pcmcia/ibmtr_cs.c                    |  1 -
 drivers/net/pcmcia/nmclan_cs.c                   |  1 -
 drivers/net/pcmcia/pcnet_cs.c                    |  5 +--
 drivers/net/pcmcia/smc91c92_cs.c                 |  1 -
 drivers/net/pcmcia/xirc2ps_cs.c                  |  1 -
 drivers/net/wireless/airo_cs.c                   |  1 -
 drivers/net/wireless/atmel_cs.c                  |  1 -
 drivers/net/wireless/b43/pcmcia.c                |  1 -
 drivers/net/wireless/hostap/hostap_cs.c          |  3 +-
 drivers/net/wireless/libertas/if_cs.c            |  1 -
 drivers/net/wireless/orinoco/orinoco_cs.c        |  1 -
 drivers/net/wireless/orinoco/spectrum_cs.c       |  1 -
 drivers/net/wireless/ray_cs.c                    |  1 -
 drivers/net/wireless/wl3501_cs.c                 | 10 +-----
 drivers/parport/parport_cs.c                     |  1 -
 drivers/pcmcia/au1000_generic.h                  |  1 -
 drivers/pcmcia/au1000_pb1x00.c                   |  2 --
 drivers/pcmcia/cistpl.c                          |  1 -
 drivers/pcmcia/cs.c                              |  1 -
 drivers/pcmcia/db1xxx_ss.c                       |  1 -
 drivers/pcmcia/ds.c                              |  1 -
 drivers/pcmcia/i82092.c                          |  1 -
 drivers/pcmcia/i82365.c                          |  1 -
 drivers/pcmcia/m32r_cfc.c                        |  1 -
 drivers/pcmcia/m32r_pcc.c                        |  1 -
 drivers/pcmcia/m8xx_pcmcia.c                     |  1 -
 drivers/pcmcia/pcmcia_cis.c                      |  1 -
 drivers/pcmcia/pcmcia_resource.c                 |  1 -
 drivers/pcmcia/pd6729.c                          |  1 -
 drivers/pcmcia/pxa2xx_base.c                     |  1 -
 drivers/pcmcia/rsrc_iodyn.c                      |  1 -
 drivers/pcmcia/rsrc_mgr.c                        |  1 -
 drivers/pcmcia/rsrc_nonstatic.c                  |  1 -
 drivers/pcmcia/sa1100_generic.c                  |  1 -
 drivers/pcmcia/soc_common.h                      |  1 -
 drivers/pcmcia/socket_sysfs.c                    |  1 -
 drivers/pcmcia/tcic.c                            |  1 -
 drivers/pcmcia/xxs1500_ss.c                      |  1 -
 drivers/pcmcia/yenta_socket.c                    |  1 -
 drivers/scsi/pcmcia/aha152x_stub.c               |  1 -
 drivers/scsi/pcmcia/fdomain_stub.c               |  1 -
 drivers/scsi/pcmcia/nsp_cs.c                     |  1 -
 drivers/scsi/pcmcia/qlogic_stub.c                |  1 -
 drivers/scsi/pcmcia/sym53c500_cs.c               |  1 -
 drivers/serial/serial_cs.c                       |  1 -
 drivers/ssb/main.c                               |  1 -
 drivers/ssb/pcmcia.c                             |  1 -
 drivers/ssb/scan.c                               |  1 -
 drivers/staging/comedi/drivers/cb_das16_cs.c     |  1 -
 drivers/staging/comedi/drivers/das08_cs.c        |  1 -
 drivers/staging/comedi/drivers/ni_daq_700.c      |  1 -
 drivers/staging/comedi/drivers/ni_daq_dio24.c    |  1 -
 drivers/staging/comedi/drivers/ni_labpc_cs.c     |  1 -
 drivers/staging/comedi/drivers/ni_mio_cs.c       |  1 -
 drivers/staging/comedi/drivers/quatech_daqp_cs.c |  1 -
 drivers/staging/wlags49_h2/wl_cs.c               |  1 -
 drivers/staging/wlags49_h2/wl_internal.h         |  1 -
 drivers/telephony/ixj_pcmcia.c                   |  1 -
 drivers/usb/host/sl811_cs.c                      |  5 +--
 include/pcmcia/cistpl.h                          |  2 ++
 include/pcmcia/cs.h                              | 10 +-----
 include/pcmcia/cs_types.h                        | 40 ------------------------
 include/pcmcia/ds.h                              |  3 +-
 include/pcmcia/ss.h                              |  1 -
 sound/pcmcia/pdaudiocf/pdaudiocf.h               |  1 -
 sound/pcmcia/vx/vxpocket.h                       |  1 -
 90 files changed, 14 insertions(+), 151 deletions(-)
 delete mode 100644 include/pcmcia/cs_types.h

(limited to 'include')

diff --git a/Documentation/pcmcia/driver-changes.txt b/Documentation/pcmcia/driver-changes.txt
index 61bc4e94311..ff5f0be2470 100644
--- a/Documentation/pcmcia/driver-changes.txt
+++ b/Documentation/pcmcia/driver-changes.txt
@@ -1,4 +1,9 @@
 This file details changes in 2.6 which affect PCMCIA card driver authors:
+* No dev_info_t, no cs_types.h (as of 2.6.36)
+   dev_info_t and a few other typedefs are removed. No longer use them
+   in PCMCIA device drivers. Also, do not include pcmcia/cs_types.h, as
+   this file is gone.
+
 * No dev_node_t (as of 2.6.35)
    There is no more need to fill out a "dev_node_t" structure.
 
diff --git a/drivers/ata/pata_pcmcia.c b/drivers/ata/pata_pcmcia.c
index 118c28e8aba..3dcb2b1b60e 100644
--- a/drivers/ata/pata_pcmcia.c
+++ b/drivers/ata/pata_pcmcia.c
@@ -34,7 +34,6 @@
 #include <linux/ata.h>
 #include <linux/libata.h>
 
-#include <pcmcia/cs_types.h>
 #include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/ds.h>
diff --git a/drivers/bluetooth/bluecard_cs.c b/drivers/bluetooth/bluecard_cs.c
index 6d34f405a2f..eb085de1671 100644
--- a/drivers/bluetooth/bluecard_cs.c
+++ b/drivers/bluetooth/bluecard_cs.c
@@ -39,7 +39,6 @@
 #include <linux/skbuff.h>
 #include <linux/io.h>
 
-#include <pcmcia/cs_types.h>
 #include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/ciscode.h>
diff --git a/drivers/bluetooth/bt3c_cs.c b/drivers/bluetooth/bt3c_cs.c
index 21e05fdc912..457b603f867 100644
--- a/drivers/bluetooth/bt3c_cs.c
+++ b/drivers/bluetooth/bt3c_cs.c
@@ -45,7 +45,6 @@
 #include <linux/device.h>
 #include <linux/firmware.h>
 
-#include <pcmcia/cs_types.h>
 #include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/ciscode.h>
diff --git a/drivers/bluetooth/btuart_cs.c b/drivers/bluetooth/btuart_cs.c
index 4ed7288f99d..e7e0a17aecc 100644
--- a/drivers/bluetooth/btuart_cs.c
+++ b/drivers/bluetooth/btuart_cs.c
@@ -41,7 +41,6 @@
 #include <asm/system.h>
 #include <asm/io.h>
 
-#include <pcmcia/cs_types.h>
 #include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/ciscode.h>
diff --git a/drivers/bluetooth/dtl1_cs.c b/drivers/bluetooth/dtl1_cs.c
index ef044d55cb2..7c94aad0b79 100644
--- a/drivers/bluetooth/dtl1_cs.c
+++ b/drivers/bluetooth/dtl1_cs.c
@@ -41,7 +41,6 @@
 #include <asm/system.h>
 #include <asm/io.h>
 
-#include <pcmcia/cs_types.h>
 #include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/ciscode.h>
diff --git a/drivers/char/pcmcia/cm4000_cs.c b/drivers/char/pcmcia/cm4000_cs.c
index e7956acf2ad..a8be2a7906e 100644
--- a/drivers/char/pcmcia/cm4000_cs.c
+++ b/drivers/char/pcmcia/cm4000_cs.c
@@ -34,7 +34,6 @@
 #include <linux/uaccess.h>
 #include <linux/io.h>
 
-#include <pcmcia/cs_types.h>
 #include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/cisreg.h>
diff --git a/drivers/char/pcmcia/cm4040_cs.c b/drivers/char/pcmcia/cm4040_cs.c
index c0775c844e0..44adae98c57 100644
--- a/drivers/char/pcmcia/cm4040_cs.c
+++ b/drivers/char/pcmcia/cm4040_cs.c
@@ -29,7 +29,6 @@
 #include <asm/uaccess.h>
 #include <asm/io.h>
 
-#include <pcmcia/cs_types.h>
 #include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/cisreg.h>
diff --git a/drivers/char/pcmcia/ipwireless/main.h b/drivers/char/pcmcia/ipwireless/main.h
index 96d0ef31b17..c207be87b59 100644
--- a/drivers/char/pcmcia/ipwireless/main.h
+++ b/drivers/char/pcmcia/ipwireless/main.h
@@ -21,7 +21,6 @@
 #include <linux/sched.h>
 #include <linux/types.h>
 
-#include <pcmcia/cs_types.h>
 #include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/ds.h>
diff --git a/drivers/char/pcmcia/ipwireless/tty.h b/drivers/char/pcmcia/ipwireless/tty.h
index 4da6c201f72..3e163d4cab1 100644
--- a/drivers/char/pcmcia/ipwireless/tty.h
+++ b/drivers/char/pcmcia/ipwireless/tty.h
@@ -21,7 +21,6 @@
 #include <linux/types.h>
 #include <linux/sched.h>
 
-#include <pcmcia/cs_types.h>
 #include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/ds.h>
diff --git a/drivers/char/pcmcia/synclink_cs.c b/drivers/char/pcmcia/synclink_cs.c
index 308903ec8bf..522992ed6e4 100644
--- a/drivers/char/pcmcia/synclink_cs.c
+++ b/drivers/char/pcmcia/synclink_cs.c
@@ -70,7 +70,6 @@
 #include <linux/workqueue.h>
 #include <linux/hdlc.h>
 
-#include <pcmcia/cs_types.h>
 #include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/cisreg.h>
diff --git a/drivers/ide/ide-cs.c b/drivers/ide/ide-cs.c
index 0b7815d2581..27dbab84142 100644
--- a/drivers/ide/ide-cs.c
+++ b/drivers/ide/ide-cs.c
@@ -43,7 +43,6 @@
 #include <asm/io.h>
 #include <asm/system.h>
 
-#include <pcmcia/cs_types.h>
 #include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/ds.h>
diff --git a/drivers/isdn/hardware/avm/avm_cs.c b/drivers/isdn/hardware/avm/avm_cs.c
index f410d0eb2fe..e804a01ecdf 100644
--- a/drivers/isdn/hardware/avm/avm_cs.c
+++ b/drivers/isdn/hardware/avm/avm_cs.c
@@ -20,7 +20,6 @@
 #include <asm/io.h>
 #include <asm/system.h>
 
-#include <pcmcia/cs_types.h>
 #include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/ciscode.h>
diff --git a/drivers/isdn/hisax/avma1_cs.c b/drivers/isdn/hisax/avma1_cs.c
index a80a7617f16..49e141e49aa 100644
--- a/drivers/isdn/hisax/avma1_cs.c
+++ b/drivers/isdn/hisax/avma1_cs.c
@@ -20,7 +20,6 @@
 #include <asm/io.h>
 #include <asm/system.h>
 
-#include <pcmcia/cs_types.h>
 #include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/ds.h>
diff --git a/drivers/isdn/hisax/elsa_cs.c b/drivers/isdn/hisax/elsa_cs.c
index 218927e3a4e..425deea1dd6 100644
--- a/drivers/isdn/hisax/elsa_cs.c
+++ b/drivers/isdn/hisax/elsa_cs.c
@@ -46,7 +46,6 @@
 #include <asm/io.h>
 #include <asm/system.h>
 
-#include <pcmcia/cs_types.h>
 #include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/cisreg.h>
diff --git a/drivers/isdn/hisax/sedlbauer_cs.c b/drivers/isdn/hisax/sedlbauer_cs.c
index 1f4feaab21a..5dbad966a29 100644
--- a/drivers/isdn/hisax/sedlbauer_cs.c
+++ b/drivers/isdn/hisax/sedlbauer_cs.c
@@ -46,7 +46,6 @@
 #include <asm/io.h>
 #include <asm/system.h>
 
-#include <pcmcia/cs_types.h>
 #include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/cisreg.h>
diff --git a/drivers/isdn/hisax/teles_cs.c b/drivers/isdn/hisax/teles_cs.c
index 5771955cc53..d3fb1b71680 100644
--- a/drivers/isdn/hisax/teles_cs.c
+++ b/drivers/isdn/hisax/teles_cs.c
@@ -27,7 +27,6 @@
 #include <asm/io.h>
 #include <asm/system.h>
 
-#include <pcmcia/cs_types.h>
 #include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/cisreg.h>
diff --git a/drivers/mmc/host/sdricoh_cs.c b/drivers/mmc/host/sdricoh_cs.c
index e7507af3856..7aa65bb2af4 100644
--- a/drivers/mmc/host/sdricoh_cs.c
+++ b/drivers/mmc/host/sdricoh_cs.c
@@ -30,7 +30,6 @@
 #include <linux/ioport.h>
 #include <linux/scatterlist.h>
 
-#include <pcmcia/cs_types.h>
 #include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/ds.h>
diff --git a/drivers/mtd/maps/pcmciamtd.c b/drivers/mtd/maps/pcmciamtd.c
index e699e6ac23d..79488164e43 100644
--- a/drivers/mtd/maps/pcmciamtd.c
+++ b/drivers/mtd/maps/pcmciamtd.c
@@ -16,7 +16,6 @@
 #include <asm/io.h>
 #include <asm/system.h>
 
-#include <pcmcia/cs_types.h>
 #include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/ds.h>
diff --git a/drivers/net/pcmcia/3c574_cs.c b/drivers/net/pcmcia/3c574_cs.c
index 10ee106a161..e249b898075 100644
--- a/drivers/net/pcmcia/3c574_cs.c
+++ b/drivers/net/pcmcia/3c574_cs.c
@@ -87,7 +87,6 @@ earlier 3Com products.
 #include <linux/bitops.h>
 #include <linux/mii.h>
 
-#include <pcmcia/cs_types.h>
 #include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/cisreg.h>
diff --git a/drivers/net/pcmcia/3c589_cs.c b/drivers/net/pcmcia/3c589_cs.c
index ce63c3773b4..b0772df3105 100644
--- a/drivers/net/pcmcia/3c589_cs.c
+++ b/drivers/net/pcmcia/3c589_cs.c
@@ -41,7 +41,6 @@
 #include <linux/bitops.h>
 #include <linux/jiffies.h>
 
-#include <pcmcia/cs_types.h>
 #include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/cisreg.h>
diff --git a/drivers/net/pcmcia/axnet_cs.c b/drivers/net/pcmcia/axnet_cs.c
index 33525bf2a3d..467fd4bfb2b 100644
--- a/drivers/net/pcmcia/axnet_cs.c
+++ b/drivers/net/pcmcia/axnet_cs.c
@@ -39,7 +39,6 @@
 #include <linux/mii.h>
 #include "../8390.h"
 
-#include <pcmcia/cs_types.h>
 #include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/ciscode.h>
diff --git a/drivers/net/pcmcia/com20020_cs.c b/drivers/net/pcmcia/com20020_cs.c
index 5643f94541b..99957af4032 100644
--- a/drivers/net/pcmcia/com20020_cs.c
+++ b/drivers/net/pcmcia/com20020_cs.c
@@ -43,7 +43,6 @@
 #include <linux/arcdevice.h>
 #include <linux/com20020.h>
 
-#include <pcmcia/cs_types.h>
 #include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/ds.h>
diff --git a/drivers/net/pcmcia/fmvj18x_cs.c b/drivers/net/pcmcia/fmvj18x_cs.c
index 7c27c50211a..95a991beaa3 100644
--- a/drivers/net/pcmcia/fmvj18x_cs.c
+++ b/drivers/net/pcmcia/fmvj18x_cs.c
@@ -49,7 +49,6 @@
 #include <linux/ioport.h>
 #include <linux/crc32.h>
 
-#include <pcmcia/cs_types.h>
 #include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/ciscode.h>
diff --git a/drivers/net/pcmcia/ibmtr_cs.c b/drivers/net/pcmcia/ibmtr_cs.c
index 67ee9851a8e..c36dcd14ec4 100644
--- a/drivers/net/pcmcia/ibmtr_cs.c
+++ b/drivers/net/pcmcia/ibmtr_cs.c
@@ -57,7 +57,6 @@
 #include <linux/trdevice.h>
 #include <linux/ibmtr.h>
 
-#include <pcmcia/cs_types.h>
 #include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/ds.h>
diff --git a/drivers/net/pcmcia/nmclan_cs.c b/drivers/net/pcmcia/nmclan_cs.c
index 9b63dec549c..c0eacfae151 100644
--- a/drivers/net/pcmcia/nmclan_cs.c
+++ b/drivers/net/pcmcia/nmclan_cs.c
@@ -146,7 +146,6 @@ Include Files
 #include <linux/ioport.h>
 #include <linux/bitops.h>
 
-#include <pcmcia/cs_types.h>
 #include <pcmcia/cs.h>
 #include <pcmcia/cisreg.h>
 #include <pcmcia/cistpl.h>
diff --git a/drivers/net/pcmcia/pcnet_cs.c b/drivers/net/pcmcia/pcnet_cs.c
index bfdef72c5d5..db6dbdabb70 100644
--- a/drivers/net/pcmcia/pcnet_cs.c
+++ b/drivers/net/pcmcia/pcnet_cs.c
@@ -42,7 +42,6 @@
 #include <linux/mii.h>
 #include "../8390.h"
 
-#include <pcmcia/cs_types.h>
 #include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/ciscode.h>
@@ -113,8 +112,6 @@ static int setup_dma_config(struct pcmcia_device *link, int start_pg,
 
 static void pcnet_detach(struct pcmcia_device *p_dev);
 
-static dev_info_t dev_info = "pcnet_cs";
-
 /*====================================================================*/
 
 typedef struct hw_info_t {
@@ -956,7 +953,7 @@ static int pcnet_open(struct net_device *dev)
     set_misc_reg(dev);
 
     outb_p(0xFF, nic_base + EN0_ISR); /* Clear bogus intr. */
-    ret = request_irq(dev->irq, ei_irq_wrapper, IRQF_SHARED, dev_info, dev);
+    ret = request_irq(dev->irq, ei_irq_wrapper, IRQF_SHARED, dev->name, dev);
     if (ret)
 	    return ret;
 
diff --git a/drivers/net/pcmcia/smc91c92_cs.c b/drivers/net/pcmcia/smc91c92_cs.c
index 307cd1721e9..88f503a80a8 100644
--- a/drivers/net/pcmcia/smc91c92_cs.c
+++ b/drivers/net/pcmcia/smc91c92_cs.c
@@ -44,7 +44,6 @@
 #include <linux/jiffies.h>
 #include <linux/firmware.h>
 
-#include <pcmcia/cs_types.h>
 #include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/cisreg.h>
diff --git a/drivers/net/pcmcia/xirc2ps_cs.c b/drivers/net/pcmcia/xirc2ps_cs.c
index b6c3644888c..a7662f0832e 100644
--- a/drivers/net/pcmcia/xirc2ps_cs.c
+++ b/drivers/net/pcmcia/xirc2ps_cs.c
@@ -82,7 +82,6 @@
 #include <linux/bitops.h>
 #include <linux/mii.h>
 
-#include <pcmcia/cs_types.h>
 #include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/cisreg.h>
diff --git a/drivers/net/wireless/airo_cs.c b/drivers/net/wireless/airo_cs.c
index 33bdc6a84e8..9389ba004fb 100644
--- a/drivers/net/wireless/airo_cs.c
+++ b/drivers/net/wireless/airo_cs.c
@@ -32,7 +32,6 @@
 #include <linux/timer.h>
 #include <linux/netdevice.h>
 
-#include <pcmcia/cs_types.h>
 #include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/cisreg.h>
diff --git a/drivers/net/wireless/atmel_cs.c b/drivers/net/wireless/atmel_cs.c
index c2746fc7f2b..91ee74a8801 100644
--- a/drivers/net/wireless/atmel_cs.c
+++ b/drivers/net/wireless/atmel_cs.c
@@ -42,7 +42,6 @@
 #include <linux/moduleparam.h>
 #include <linux/device.h>
 
-#include <pcmcia/cs_types.h>
 #include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/cisreg.h>
diff --git a/drivers/net/wireless/b43/pcmcia.c b/drivers/net/wireless/b43/pcmcia.c
index 0e99b634267..f71bc782137 100644
--- a/drivers/net/wireless/b43/pcmcia.c
+++ b/drivers/net/wireless/b43/pcmcia.c
@@ -26,7 +26,6 @@
 #include <linux/ssb/ssb.h>
 #include <linux/slab.h>
 
-#include <pcmcia/cs_types.h>
 #include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/ciscode.h>
diff --git a/drivers/net/wireless/hostap/hostap_cs.c b/drivers/net/wireless/hostap/hostap_cs.c
index 29b31a694b5..2f4b6d4350a 100644
--- a/drivers/net/wireless/hostap/hostap_cs.c
+++ b/drivers/net/wireless/hostap/hostap_cs.c
@@ -12,7 +12,6 @@
 #include <linux/wireless.h>
 #include <net/iw_handler.h>
 
-#include <pcmcia/cs_types.h>
 #include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/cisreg.h>
@@ -23,7 +22,7 @@
 #include "hostap_wlan.h"
 
 
-static dev_info_t dev_info = "hostap_cs";
+static char *dev_info = "hostap_cs";
 
 MODULE_AUTHOR("Jouni Malinen");
 MODULE_DESCRIPTION("Support for Intersil Prism2-based 802.11 wireless LAN "
diff --git a/drivers/net/wireless/libertas/if_cs.c b/drivers/net/wireless/libertas/if_cs.c
index 08e4e390800..1d3a7e0e5f1 100644
--- a/drivers/net/wireless/libertas/if_cs.c
+++ b/drivers/net/wireless/libertas/if_cs.c
@@ -28,7 +28,6 @@
 #include <linux/firmware.h>
 #include <linux/netdevice.h>
 
-#include <pcmcia/cs_types.h>
 #include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/ds.h>
diff --git a/drivers/net/wireless/orinoco/orinoco_cs.c b/drivers/net/wireless/orinoco/orinoco_cs.c
index b16d5db52a4..41ca4f1b395 100644
--- a/drivers/net/wireless/orinoco/orinoco_cs.c
+++ b/drivers/net/wireless/orinoco/orinoco_cs.c
@@ -17,7 +17,6 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/delay.h>
-#include <pcmcia/cs_types.h>
 #include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/cisreg.h>
diff --git a/drivers/net/wireless/orinoco/spectrum_cs.c b/drivers/net/wireless/orinoco/spectrum_cs.c
index b51a9adc80f..cad30e499db 100644
--- a/drivers/net/wireless/orinoco/spectrum_cs.c
+++ b/drivers/net/wireless/orinoco/spectrum_cs.c
@@ -25,7 +25,6 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/delay.h>
-#include <pcmcia/cs_types.h>
 #include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/cisreg.h>
diff --git a/drivers/net/wireless/ray_cs.c b/drivers/net/wireless/ray_cs.c
index abff8934db1..165beb6af84 100644
--- a/drivers/net/wireless/ray_cs.c
+++ b/drivers/net/wireless/ray_cs.c
@@ -46,7 +46,6 @@
 #include <linux/ethtool.h>
 #include <linux/ieee80211.h>
 
-#include <pcmcia/cs_types.h>
 #include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/cisreg.h>
diff --git a/drivers/net/wireless/wl3501_cs.c b/drivers/net/wireless/wl3501_cs.c
index 376c6b964a9..35f431bf97d 100644
--- a/drivers/net/wireless/wl3501_cs.c
+++ b/drivers/net/wireless/wl3501_cs.c
@@ -48,7 +48,6 @@
 
 #include <net/iw_handler.h>
 
-#include <pcmcia/cs_types.h>
 #include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/cisreg.h>
@@ -89,13 +88,6 @@
 static int wl3501_config(struct pcmcia_device *link);
 static void wl3501_release(struct pcmcia_device *link);
 
-/*
- * The dev_info variable is the "key" that is used to match up this
- * device driver with appropriate cards, through the card configuration
- * database.
- */
-static dev_info_t wl3501_dev_info = "wl3501_cs";
-
 static const struct {
 	int reg_domain;
 	int min, max, deflt;
@@ -1421,7 +1413,7 @@ static struct iw_statistics *wl3501_get_wireless_stats(struct net_device *dev)
 
 static void wl3501_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
 {
-	strlcpy(info->driver, wl3501_dev_info, sizeof(info->driver));
+	strlcpy(info->driver, "wl3501_cs", sizeof(info->driver));
 }
 
 static const struct ethtool_ops ops = {
diff --git a/drivers/parport/parport_cs.c b/drivers/parport/parport_cs.c
index fd8cfe95f0a..ee56fd66d5d 100644
--- a/drivers/parport/parport_cs.c
+++ b/drivers/parport/parport_cs.c
@@ -48,7 +48,6 @@
 #include <linux/parport.h>
 #include <linux/parport_pc.h>
 
-#include <pcmcia/cs_types.h>
 #include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/ds.h>
diff --git a/drivers/pcmcia/au1000_generic.h b/drivers/pcmcia/au1000_generic.h
index a324d329dea..67530cefcf3 100644
--- a/drivers/pcmcia/au1000_generic.h
+++ b/drivers/pcmcia/au1000_generic.h
@@ -23,7 +23,6 @@
 
 /* include the world */
 
-#include <pcmcia/cs_types.h>
 #include <pcmcia/cs.h>
 #include <pcmcia/ss.h>
 #include <pcmcia/cistpl.h>
diff --git a/drivers/pcmcia/au1000_pb1x00.c b/drivers/pcmcia/au1000_pb1x00.c
index 5a979cb8f3e..807f2d75dad 100644
--- a/drivers/pcmcia/au1000_pb1x00.c
+++ b/drivers/pcmcia/au1000_pb1x00.c
@@ -31,11 +31,9 @@
 #include <linux/proc_fs.h>
 #include <linux/types.h>
 
-#include <pcmcia/cs_types.h>
 #include <pcmcia/cs.h>
 #include <pcmcia/ss.h>
 #include <pcmcia/cistpl.h>
-#include <pcmcia/bus_ops.h>
 
 #include <asm/io.h>
 #include <asm/irq.h>
diff --git a/drivers/pcmcia/cistpl.c b/drivers/pcmcia/cistpl.c
index 8844bc3e311..ba4a5acc2e9 100644
--- a/drivers/pcmcia/cistpl.c
+++ b/drivers/pcmcia/cistpl.c
@@ -27,7 +27,6 @@
 #include <asm/byteorder.h>
 #include <asm/unaligned.h>
 
-#include <pcmcia/cs_types.h>
 #include <pcmcia/ss.h>
 #include <pcmcia/cs.h>
 #include <pcmcia/cisreg.h>
diff --git a/drivers/pcmcia/cs.c b/drivers/pcmcia/cs.c
index efa30b84a75..2ec8ac97445 100644
--- a/drivers/pcmcia/cs.c
+++ b/drivers/pcmcia/cs.c
@@ -32,7 +32,6 @@
 #include <asm/system.h>
 #include <asm/irq.h>
 
-#include <pcmcia/cs_types.h>
 #include <pcmcia/ss.h>
 #include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
diff --git a/drivers/pcmcia/db1xxx_ss.c b/drivers/pcmcia/db1xxx_ss.c
index 0f4cc3f0002..27575e6378a 100644
--- a/drivers/pcmcia/db1xxx_ss.c
+++ b/drivers/pcmcia/db1xxx_ss.c
@@ -29,7 +29,6 @@
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 
-#include <pcmcia/cs_types.h>
 #include <pcmcia/ss.h>
 
 #include <asm/mach-au1x00/au1000.h>
diff --git a/drivers/pcmcia/ds.c b/drivers/pcmcia/ds.c
index 08617719d3a..bacfc55f202 100644
--- a/drivers/pcmcia/ds.c
+++ b/drivers/pcmcia/ds.c
@@ -26,7 +26,6 @@
 #include <linux/dma-mapping.h>
 #include <linux/slab.h>
 
-#include <pcmcia/cs_types.h>
 #include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/ds.h>
diff --git a/drivers/pcmcia/i82092.c b/drivers/pcmcia/i82092.c
index 3003bb3dfcc..05d0879ce93 100644
--- a/drivers/pcmcia/i82092.c
+++ b/drivers/pcmcia/i82092.c
@@ -15,7 +15,6 @@
 #include <linux/interrupt.h>
 #include <linux/device.h>
 
-#include <pcmcia/cs_types.h>
 #include <pcmcia/ss.h>
 #include <pcmcia/cs.h>
 
diff --git a/drivers/pcmcia/i82365.c b/drivers/pcmcia/i82365.c
index 9e2a15628de..61746bd598b 100644
--- a/drivers/pcmcia/i82365.c
+++ b/drivers/pcmcia/i82365.c
@@ -50,7 +50,6 @@
 #include <asm/io.h>
 #include <asm/system.h>
 
-#include <pcmcia/cs_types.h>
 #include <pcmcia/ss.h>
 #include <pcmcia/cs.h>
 
diff --git a/drivers/pcmcia/m32r_cfc.c b/drivers/pcmcia/m32r_cfc.c
index 7e16ed8eb0a..24de4992586 100644
--- a/drivers/pcmcia/m32r_cfc.c
+++ b/drivers/pcmcia/m32r_cfc.c
@@ -26,7 +26,6 @@
 #include <asm/io.h>
 #include <asm/system.h>
 
-#include <pcmcia/cs_types.h>
 #include <pcmcia/ss.h>
 #include <pcmcia/cs.h>
 
diff --git a/drivers/pcmcia/m32r_pcc.c b/drivers/pcmcia/m32r_pcc.c
index 6c5c3f910d7..8e4723844ad 100644
--- a/drivers/pcmcia/m32r_pcc.c
+++ b/drivers/pcmcia/m32r_pcc.c
@@ -27,7 +27,6 @@
 #include <asm/system.h>
 #include <asm/addrspace.h>
 
-#include <pcmcia/cs_types.h>
 #include <pcmcia/ss.h>
 #include <pcmcia/cs.h>
 
diff --git a/drivers/pcmcia/m8xx_pcmcia.c b/drivers/pcmcia/m8xx_pcmcia.c
index 25e5e30a18a..f2f90a7d3e1 100644
--- a/drivers/pcmcia/m8xx_pcmcia.c
+++ b/drivers/pcmcia/m8xx_pcmcia.c
@@ -59,7 +59,6 @@
 #include <asm/irq.h>
 #include <asm/fs_pd.h>
 
-#include <pcmcia/cs_types.h>
 #include <pcmcia/cs.h>
 #include <pcmcia/ss.h>
 
diff --git a/drivers/pcmcia/pcmcia_cis.c b/drivers/pcmcia/pcmcia_cis.c
index 4a65eaf96b0..0ac54da1588 100644
--- a/drivers/pcmcia/pcmcia_cis.c
+++ b/drivers/pcmcia/pcmcia_cis.c
@@ -19,7 +19,6 @@
 #include <linux/kernel.h>
 #include <linux/netdevice.h>
 
-#include <pcmcia/cs_types.h>
 #include <pcmcia/cisreg.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/ss.h>
diff --git a/drivers/pcmcia/pcmcia_resource.c b/drivers/pcmcia/pcmcia_resource.c
index a4cd9adfcbc..2394de46860 100644
--- a/drivers/pcmcia/pcmcia_resource.c
+++ b/drivers/pcmcia/pcmcia_resource.c
@@ -25,7 +25,6 @@
 
 #include <asm/irq.h>
 
-#include <pcmcia/cs_types.h>
 #include <pcmcia/ss.h>
 #include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
diff --git a/drivers/pcmcia/pd6729.c b/drivers/pcmcia/pd6729.c
index b61a13663a0..b8a869af0f4 100644
--- a/drivers/pcmcia/pd6729.c
+++ b/drivers/pcmcia/pd6729.c
@@ -17,7 +17,6 @@
 #include <linux/device.h>
 #include <linux/io.h>
 
-#include <pcmcia/cs_types.h>
 #include <pcmcia/ss.h>
 #include <pcmcia/cs.h>
 
diff --git a/drivers/pcmcia/pxa2xx_base.c b/drivers/pcmcia/pxa2xx_base.c
index df4532e91b1..66c022579d9 100644
--- a/drivers/pcmcia/pxa2xx_base.c
+++ b/drivers/pcmcia/pxa2xx_base.c
@@ -32,7 +32,6 @@
 #include <mach/pxa2xx-regs.h>
 #include <asm/mach-types.h>
 
-#include <pcmcia/cs_types.h>
 #include <pcmcia/ss.h>
 #include <pcmcia/cistpl.h>
 
diff --git a/drivers/pcmcia/rsrc_iodyn.c b/drivers/pcmcia/rsrc_iodyn.c
index 6ed7bf171ca..3b1dce2df26 100644
--- a/drivers/pcmcia/rsrc_iodyn.c
+++ b/drivers/pcmcia/rsrc_iodyn.c
@@ -16,7 +16,6 @@
 #include <linux/module.h>
 #include <linux/kernel.h>
 
-#include <pcmcia/cs_types.h>
 #include <pcmcia/ss.h>
 #include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
diff --git a/drivers/pcmcia/rsrc_mgr.c b/drivers/pcmcia/rsrc_mgr.c
index b12ecf7c32b..b433a799565 100644
--- a/drivers/pcmcia/rsrc_mgr.c
+++ b/drivers/pcmcia/rsrc_mgr.c
@@ -16,7 +16,6 @@
 #include <linux/module.h>
 #include <linux/kernel.h>
 
-#include <pcmcia/cs_types.h>
 #include <pcmcia/ss.h>
 #include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
diff --git a/drivers/pcmcia/rsrc_nonstatic.c b/drivers/pcmcia/rsrc_nonstatic.c
index 13245a2986f..0cca08ff65a 100644
--- a/drivers/pcmcia/rsrc_nonstatic.c
+++ b/drivers/pcmcia/rsrc_nonstatic.c
@@ -28,7 +28,6 @@
 
 #include <asm/irq.h>
 
-#include <pcmcia/cs_types.h>
 #include <pcmcia/ss.h>
 #include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
diff --git a/drivers/pcmcia/sa1100_generic.c b/drivers/pcmcia/sa1100_generic.c
index edbd8c47262..e0985148029 100644
--- a/drivers/pcmcia/sa1100_generic.c
+++ b/drivers/pcmcia/sa1100_generic.c
@@ -35,7 +35,6 @@
 #include <linux/slab.h>
 #include <linux/platform_device.h>
 
-#include <pcmcia/cs_types.h>
 #include <pcmcia/cs.h>
 #include <pcmcia/ss.h>
 
diff --git a/drivers/pcmcia/soc_common.h b/drivers/pcmcia/soc_common.h
index e40824ce6b0..3fba3a67912 100644
--- a/drivers/pcmcia/soc_common.h
+++ b/drivers/pcmcia/soc_common.h
@@ -11,7 +11,6 @@
 
 /* include the world */
 #include <linux/cpufreq.h>
-#include <pcmcia/cs_types.h>
 #include <pcmcia/cs.h>
 #include <pcmcia/ss.h>
 #include <pcmcia/cistpl.h>
diff --git a/drivers/pcmcia/socket_sysfs.c b/drivers/pcmcia/socket_sysfs.c
index 80e36bc407d..cb0d3ace18b 100644
--- a/drivers/pcmcia/socket_sysfs.c
+++ b/drivers/pcmcia/socket_sysfs.c
@@ -26,7 +26,6 @@
 #include <asm/system.h>
 #include <asm/irq.h>
 
-#include <pcmcia/cs_types.h>
 #include <pcmcia/ss.h>
 #include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
diff --git a/drivers/pcmcia/tcic.c b/drivers/pcmcia/tcic.c
index 56004a1b5bb..be0d841c7eb 100644
--- a/drivers/pcmcia/tcic.c
+++ b/drivers/pcmcia/tcic.c
@@ -49,7 +49,6 @@
 #include <asm/io.h>
 #include <asm/system.h>
 
-#include <pcmcia/cs_types.h>
 #include <pcmcia/cs.h>
 #include <pcmcia/ss.h>
 #include "tcic.h"
diff --git a/drivers/pcmcia/xxs1500_ss.c b/drivers/pcmcia/xxs1500_ss.c
index 201ccfa1e97..fa88c360c37 100644
--- a/drivers/pcmcia/xxs1500_ss.c
+++ b/drivers/pcmcia/xxs1500_ss.c
@@ -17,7 +17,6 @@
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 
-#include <pcmcia/cs_types.h>
 #include <pcmcia/cs.h>
 #include <pcmcia/ss.h>
 #include <pcmcia/cistpl.h>
diff --git a/drivers/pcmcia/yenta_socket.c b/drivers/pcmcia/yenta_socket.c
index f1d41374eea..414d9a6f9a3 100644
--- a/drivers/pcmcia/yenta_socket.c
+++ b/drivers/pcmcia/yenta_socket.c
@@ -19,7 +19,6 @@
 #include <linux/io.h>
 #include <linux/slab.h>
 
-#include <pcmcia/cs_types.h>
 #include <pcmcia/ss.h>
 #include <pcmcia/cs.h>
 
diff --git a/drivers/scsi/pcmcia/aha152x_stub.c b/drivers/scsi/pcmcia/aha152x_stub.c
index 9d70aef9922..b07b53ef3c0 100644
--- a/drivers/scsi/pcmcia/aha152x_stub.c
+++ b/drivers/scsi/pcmcia/aha152x_stub.c
@@ -49,7 +49,6 @@
 #include <scsi/scsi_host.h>
 #include "aha152x.h"
 
-#include <pcmcia/cs_types.h>
 #include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/ds.h>
diff --git a/drivers/scsi/pcmcia/fdomain_stub.c b/drivers/scsi/pcmcia/fdomain_stub.c
index 21b141151df..ee048976253 100644
--- a/drivers/scsi/pcmcia/fdomain_stub.c
+++ b/drivers/scsi/pcmcia/fdomain_stub.c
@@ -46,7 +46,6 @@
 #include <scsi/scsi_host.h>
 #include "fdomain.h"
 
-#include <pcmcia/cs_types.h>
 #include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/ds.h>
diff --git a/drivers/scsi/pcmcia/nsp_cs.c b/drivers/scsi/pcmcia/nsp_cs.c
index 0f0e112c3f8..d4142075be2 100644
--- a/drivers/scsi/pcmcia/nsp_cs.c
+++ b/drivers/scsi/pcmcia/nsp_cs.c
@@ -47,7 +47,6 @@
 #include <scsi/scsi.h>
 #include <scsi/scsi_ioctl.h>
 
-#include <pcmcia/cs_types.h>
 #include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/cisreg.h>
diff --git a/drivers/scsi/pcmcia/qlogic_stub.c b/drivers/scsi/pcmcia/qlogic_stub.c
index f0fc6baed9f..c1cf7f43313 100644
--- a/drivers/scsi/pcmcia/qlogic_stub.c
+++ b/drivers/scsi/pcmcia/qlogic_stub.c
@@ -48,7 +48,6 @@
 #include <scsi/scsi_host.h>
 #include "../qlogicfas408.h"
 
-#include <pcmcia/cs_types.h>
 #include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/ds.h>
diff --git a/drivers/scsi/pcmcia/sym53c500_cs.c b/drivers/scsi/pcmcia/sym53c500_cs.c
index a5116417117..bd79e45ab85 100644
--- a/drivers/scsi/pcmcia/sym53c500_cs.c
+++ b/drivers/scsi/pcmcia/sym53c500_cs.c
@@ -71,7 +71,6 @@
 #include <scsi/scsi.h>
 #include <scsi/scsi_host.h>
 
-#include <pcmcia/cs_types.h>
 #include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/ds.h>
diff --git a/drivers/serial/serial_cs.c b/drivers/serial/serial_cs.c
index ab17c08ddc0..2b99c7baf35 100644
--- a/drivers/serial/serial_cs.c
+++ b/drivers/serial/serial_cs.c
@@ -45,7 +45,6 @@
 #include <asm/io.h>
 #include <asm/system.h>
 
-#include <pcmcia/cs_types.h>
 #include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/ciscode.h>
diff --git a/drivers/ssb/main.c b/drivers/ssb/main.c
index 51275aac5b3..06f04b42cb2 100644
--- a/drivers/ssb/main.c
+++ b/drivers/ssb/main.c
@@ -20,7 +20,6 @@
 #include <linux/mmc/sdio_func.h>
 #include <linux/slab.h>
 
-#include <pcmcia/cs_types.h>
 #include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/ds.h>
diff --git a/drivers/ssb/pcmcia.c b/drivers/ssb/pcmcia.c
index e72f4046a5e..21520308178 100644
--- a/drivers/ssb/pcmcia.c
+++ b/drivers/ssb/pcmcia.c
@@ -13,7 +13,6 @@
 #include <linux/io.h>
 #include <linux/etherdevice.h>
 
-#include <pcmcia/cs_types.h>
 #include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/ciscode.h>
diff --git a/drivers/ssb/scan.c b/drivers/ssb/scan.c
index 0d6c0280eb3..9738cad4ba1 100644
--- a/drivers/ssb/scan.c
+++ b/drivers/ssb/scan.c
@@ -17,7 +17,6 @@
 #include <linux/pci.h>
 #include <linux/io.h>
 
-#include <pcmcia/cs_types.h>
 #include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/ds.h>
diff --git a/drivers/staging/comedi/drivers/cb_das16_cs.c b/drivers/staging/comedi/drivers/cb_das16_cs.c
index cfeb11f443e..acef29ca212 100644
--- a/drivers/staging/comedi/drivers/cb_das16_cs.c
+++ b/drivers/staging/comedi/drivers/cb_das16_cs.c
@@ -37,7 +37,6 @@ Status: experimental
 #include <linux/delay.h>
 #include <linux/pci.h>
 
-#include <pcmcia/cs_types.h>
 #include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/ds.h>
diff --git a/drivers/staging/comedi/drivers/das08_cs.c b/drivers/staging/comedi/drivers/das08_cs.c
index 8761a6d285d..3eddb7c4b1b 100644
--- a/drivers/staging/comedi/drivers/das08_cs.c
+++ b/drivers/staging/comedi/drivers/das08_cs.c
@@ -48,7 +48,6 @@ Command support does not exist, but could be added for this board.
 #include "das08.h"
 
 /* pcmcia includes */
-#include <pcmcia/cs_types.h>
 #include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/ds.h>
diff --git a/drivers/staging/comedi/drivers/ni_daq_700.c b/drivers/staging/comedi/drivers/ni_daq_700.c
index 6ec77bf88c6..f3c4d2f929f 100644
--- a/drivers/staging/comedi/drivers/ni_daq_700.c
+++ b/drivers/staging/comedi/drivers/ni_daq_700.c
@@ -47,7 +47,6 @@ IRQ is assigned but not used.
 
 #include <linux/ioport.h>
 
-#include <pcmcia/cs_types.h>
 #include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/cisreg.h>
diff --git a/drivers/staging/comedi/drivers/ni_daq_dio24.c b/drivers/staging/comedi/drivers/ni_daq_dio24.c
index e4865b1c231..f0c4367dc10 100644
--- a/drivers/staging/comedi/drivers/ni_daq_dio24.c
+++ b/drivers/staging/comedi/drivers/ni_daq_dio24.c
@@ -48,7 +48,6 @@ the PCMCIA interface.
 
 #include "8255.h"
 
-#include <pcmcia/cs_types.h>
 #include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/cisreg.h>
diff --git a/drivers/staging/comedi/drivers/ni_labpc_cs.c b/drivers/staging/comedi/drivers/ni_labpc_cs.c
index 163245ebb31..1ee78f806ed 100644
--- a/drivers/staging/comedi/drivers/ni_labpc_cs.c
+++ b/drivers/staging/comedi/drivers/ni_labpc_cs.c
@@ -71,7 +71,6 @@ NI manuals:
 #include "comedi_fc.h"
 #include "ni_labpc.h"
 
-#include <pcmcia/cs_types.h>
 #include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/cisreg.h>
diff --git a/drivers/staging/comedi/drivers/ni_mio_cs.c b/drivers/staging/comedi/drivers/ni_mio_cs.c
index 3a46f0c0bff..0bce220c47b 100644
--- a/drivers/staging/comedi/drivers/ni_mio_cs.c
+++ b/drivers/staging/comedi/drivers/ni_mio_cs.c
@@ -48,7 +48,6 @@ See the notes in the ni_atmio.o driver.
 #include "ni_stc.h"
 #include "8255.h"
 
-#include <pcmcia/cs_types.h>
 #include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/ds.h>
diff --git a/drivers/staging/comedi/drivers/quatech_daqp_cs.c b/drivers/staging/comedi/drivers/quatech_daqp_cs.c
index a91db6c4202..67c0fa6a2b0 100644
--- a/drivers/staging/comedi/drivers/quatech_daqp_cs.c
+++ b/drivers/staging/comedi/drivers/quatech_daqp_cs.c
@@ -50,7 +50,6 @@ Devices: [Quatech] DAQP-208 (daqp), DAQP-308
 #include "../comedidev.h"
 #include <linux/semaphore.h>
 
-#include <pcmcia/cs_types.h>
 #include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/cisreg.h>
diff --git a/drivers/staging/wlags49_h2/wl_cs.c b/drivers/staging/wlags49_h2/wl_cs.c
index 10abd406b09..afe45754117 100644
--- a/drivers/staging/wlags49_h2/wl_cs.c
+++ b/drivers/staging/wlags49_h2/wl_cs.c
@@ -83,7 +83,6 @@
 #include <linux/if_arp.h>
 #include <linux/ioport.h>
 
-#include <pcmcia/cs_types.h>
 #include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/cisreg.h>
diff --git a/drivers/staging/wlags49_h2/wl_internal.h b/drivers/staging/wlags49_h2/wl_internal.h
index d9a0ad039c1..02f0a20e178 100644
--- a/drivers/staging/wlags49_h2/wl_internal.h
+++ b/drivers/staging/wlags49_h2/wl_internal.h
@@ -69,7 +69,6 @@
  ******************************************************************************/
 #include <linux/version.h>
 #ifdef BUS_PCMCIA
-#include <pcmcia/cs_types.h>
 #include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/cisreg.h>
diff --git a/drivers/telephony/ixj_pcmcia.c b/drivers/telephony/ixj_pcmcia.c
index 99cb2246ac7..f6c7e6fd7ee 100644
--- a/drivers/telephony/ixj_pcmcia.c
+++ b/drivers/telephony/ixj_pcmcia.c
@@ -8,7 +8,6 @@
 #include <linux/errno.h>	/* error codes */
 #include <linux/slab.h>
 
-#include <pcmcia/cs_types.h>
 #include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/ds.h>
diff --git a/drivers/usb/host/sl811_cs.c b/drivers/usb/host/sl811_cs.c
index 58cb73c8420..acb7e255a83 100644
--- a/drivers/usb/host/sl811_cs.c
+++ b/drivers/usb/host/sl811_cs.c
@@ -20,7 +20,6 @@
 #include <linux/ioport.h>
 #include <linux/platform_device.h>
 
-#include <pcmcia/cs_types.h>
 #include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/cisreg.h>
@@ -43,8 +42,6 @@ MODULE_LICENSE("GPL");
 /* VARIABLES                                                          */
 /*====================================================================*/
 
-static const char driver_name[DEV_NAME_LEN]  = "sl811_cs";
-
 typedef struct local_info_t {
 	struct pcmcia_device	*p_dev;
 } local_info_t;
@@ -246,7 +243,7 @@ MODULE_DEVICE_TABLE(pcmcia, sl811_ids);
 static struct pcmcia_driver sl811_cs_driver = {
 	.owner		= THIS_MODULE,
 	.drv		= {
-		.name	= (char *)driver_name,
+		.name	= "sl811_cs",
 	},
 	.probe		= sl811_cs_probe,
 	.remove		= sl811_cs_detach,
diff --git a/include/pcmcia/cistpl.h b/include/pcmcia/cistpl.h
index cfdd5af77dc..1c5088c9f7b 100644
--- a/include/pcmcia/cistpl.h
+++ b/include/pcmcia/cistpl.h
@@ -15,6 +15,8 @@
 #ifndef _LINUX_CISTPL_H
 #define _LINUX_CISTPL_H
 
+typedef unsigned char cisdata_t;
+
 #define CISTPL_NULL		0x00
 #define CISTPL_DEVICE		0x01
 #define CISTPL_LONGLINK_CB	0x02
diff --git a/include/pcmcia/cs.h b/include/pcmcia/cs.h
index c943c967ac7..c78d9b11208 100644
--- a/include/pcmcia/cs.h
+++ b/include/pcmcia/cs.h
@@ -43,14 +43,6 @@ typedef struct conf_reg_t {
 #define CV_COPY_VALUE		0x08
 #define CV_EXT_STATUS		0x10
 
-/* For GetFirst/NextClient */
-typedef struct client_req_t {
-    socket_t	Socket;
-    u_int	Attributes;
-} client_req_t;
-
-#define CLIENT_THIS_SOCKET	0x01
-
 /* ModifyConfiguration */
 typedef struct modconf_t {
     u_int	Attributes;
@@ -133,7 +125,7 @@ typedef struct io_req_t {
 /* For GetMemPage, MapMemPage */
 typedef struct memreq_t {
     u_int	CardOffset;
-    page_t	Page;
+    u_short	Page;
 } memreq_t;
 
 /* For ModifyWindow */
diff --git a/include/pcmcia/cs_types.h b/include/pcmcia/cs_types.h
deleted file mode 100644
index f5e3b8386c8..00000000000
--- a/include/pcmcia/cs_types.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * cs_types.h
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * The initial developer of the original code is David A. Hinds
- * <dahinds@users.sourceforge.net>.  Portions created by David A. Hinds
- * are Copyright (C) 1999 David A. Hinds.  All Rights Reserved.
- *
- * (C) 1999             David A. Hinds
- */
-
-#ifndef _LINUX_CS_TYPES_H
-#define _LINUX_CS_TYPES_H
-
-#ifdef __KERNEL__
-#include <linux/types.h>
-#else
-#include <sys/types.h>
-#endif
-
-typedef u_short	socket_t;
-typedef u_int	event_t;
-typedef u_char	cisdata_t;
-typedef u_short	page_t;
-
-typedef unsigned long window_handle_t;
-
-struct region_t;
-typedef struct region_t *memory_handle_t;
-
-#ifndef DEV_NAME_LEN
-#define DEV_NAME_LEN 32
-#endif
-
-typedef char dev_info_t[DEV_NAME_LEN];
-
-#endif /* _LINUX_CS_TYPES_H */
diff --git a/include/pcmcia/ds.h b/include/pcmcia/ds.h
index 7d7721e8603..e614aa0ca2a 100644
--- a/include/pcmcia/ds.h
+++ b/include/pcmcia/ds.h
@@ -20,7 +20,6 @@
 #include <linux/mod_devicetable.h>
 #endif
 
-#include <pcmcia/cs_types.h>
 #include <pcmcia/device_id.h>
 
 #ifdef __KERNEL__
@@ -37,6 +36,8 @@ struct pcmcia_device;
 struct config_t;
 struct net_device;
 
+typedef unsigned long window_handle_t;
+
 /* dynamic device IDs for PCMCIA device drivers. See
  * Documentation/pcmcia/driver.txt for details.
 */
diff --git a/include/pcmcia/ss.h b/include/pcmcia/ss.h
index aeac2710983..626b63c33d9 100644
--- a/include/pcmcia/ss.h
+++ b/include/pcmcia/ss.h
@@ -19,7 +19,6 @@
 #include <linux/sched.h>	/* task_struct, completion */
 #include <linux/mutex.h>
 
-#include <pcmcia/cs_types.h>
 #include <pcmcia/cs.h>
 #ifdef CONFIG_CARDBUS
 #include <linux/pci.h>
diff --git a/sound/pcmcia/pdaudiocf/pdaudiocf.h b/sound/pcmcia/pdaudiocf/pdaudiocf.h
index a0a7ec64222..5cc3e457307 100644
--- a/sound/pcmcia/pdaudiocf/pdaudiocf.h
+++ b/sound/pcmcia/pdaudiocf/pdaudiocf.h
@@ -24,7 +24,6 @@
 #include <sound/pcm.h>
 #include <asm/io.h>
 #include <linux/interrupt.h>
-#include <pcmcia/cs_types.h>
 #include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/ds.h>
diff --git a/sound/pcmcia/vx/vxpocket.h b/sound/pcmcia/vx/vxpocket.h
index ea4df16a28e..d9110669d04 100644
--- a/sound/pcmcia/vx/vxpocket.h
+++ b/sound/pcmcia/vx/vxpocket.h
@@ -23,7 +23,6 @@
 
 #include <sound/vx_core.h>
 
-#include <pcmcia/cs_types.h>
 #include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/ds.h>
-- 
cgit v1.2.3-70-g09d2


From b608b283a962caaa280756bc8563016a71712acf Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Fri, 30 Jul 2010 15:31:54 -0400
Subject: NFS: kswapd must not block in nfs_release_page

See https://bugzilla.kernel.org/show_bug.cgi?id=16056

If other processes are blocked waiting for kswapd to free up some memory so
that they can make progress, then we cannot allow kswapd to block on those
processes.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: stable@kernel.org
---
 fs/nfs/file.c          | 13 +++++++++++--
 fs/nfs/write.c         |  4 ++--
 include/linux/nfs_fs.h |  1 +
 3 files changed, 14 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 36a5e74f51b..f036153d9f5 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -27,6 +27,7 @@
 #include <linux/pagemap.h>
 #include <linux/aio.h>
 #include <linux/gfp.h>
+#include <linux/swap.h>
 
 #include <asm/uaccess.h>
 #include <asm/system.h>
@@ -493,11 +494,19 @@ static void nfs_invalidate_page(struct page *page, unsigned long offset)
  */
 static int nfs_release_page(struct page *page, gfp_t gfp)
 {
+	struct address_space *mapping = page->mapping;
+
 	dfprintk(PAGECACHE, "NFS: release_page(%p)\n", page);
 
 	/* Only do I/O if gfp is a superset of GFP_KERNEL */
-	if ((gfp & GFP_KERNEL) == GFP_KERNEL)
-		nfs_wb_page(page->mapping->host, page);
+	if (mapping && (gfp & GFP_KERNEL) == GFP_KERNEL) {
+		int how = FLUSH_SYNC;
+
+		/* Don't let kswapd deadlock waiting for OOM RPC calls */
+		if (current_is_kswapd())
+			how = 0;
+		nfs_commit_inode(mapping->host, how);
+	}
 	/* If PagePrivate() is set, then the page is not freeable */
 	if (PagePrivate(page))
 		return 0;
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 91679e2631e..0a6c65a1f9d 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1379,7 +1379,7 @@ static const struct rpc_call_ops nfs_commit_ops = {
 	.rpc_release = nfs_commit_release,
 };
 
-static int nfs_commit_inode(struct inode *inode, int how)
+int nfs_commit_inode(struct inode *inode, int how)
 {
 	LIST_HEAD(head);
 	int may_wait = how & FLUSH_SYNC;
@@ -1443,7 +1443,7 @@ out_mark_dirty:
 	return ret;
 }
 #else
-static int nfs_commit_inode(struct inode *inode, int how)
+int nfs_commit_inode(struct inode *inode, int how)
 {
 	return 0;
 }
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 77c2ae53431..f6e2455f13d 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -493,6 +493,7 @@ extern int nfs_wb_all(struct inode *inode);
 extern int nfs_wb_page(struct inode *inode, struct page* page);
 extern int nfs_wb_page_cancel(struct inode *inode, struct page* page);
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
+extern int  nfs_commit_inode(struct inode *, int);
 extern struct nfs_write_data *nfs_commitdata_alloc(void);
 extern void nfs_commit_free(struct nfs_write_data *wdata);
 #endif
-- 
cgit v1.2.3-70-g09d2


From 60347c194acec7ff1b4291ac8e62a5345244c2ee Mon Sep 17 00:00:00 2001
From: Samuli Konttila <samuli.konttila@aavamobile.com>
Date: Fri, 30 Jul 2010 09:02:43 -0700
Subject: Input: cy8ctmg110 - capacitive touchscreen support

Add support for the cy8ctmg110 capacitive touchscreen used on some
embedded devices.

(Some clean up by Alan Cox)

Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/touchscreen/Kconfig         |  14 ++
 drivers/input/touchscreen/Makefile        |   3 +-
 drivers/input/touchscreen/cy8ctmg110_ts.c | 363 ++++++++++++++++++++++++++++++
 include/linux/input/cy8ctmg110_pdata.h    |  10 +
 4 files changed, 389 insertions(+), 1 deletion(-)
 create mode 100644 drivers/input/touchscreen/cy8ctmg110_ts.c
 create mode 100644 include/linux/input/cy8ctmg110_pdata.h

(limited to 'include')

diff --git a/drivers/input/touchscreen/Kconfig b/drivers/input/touchscreen/Kconfig
index 7bfcfdff6cf..61f35184f76 100644
--- a/drivers/input/touchscreen/Kconfig
+++ b/drivers/input/touchscreen/Kconfig
@@ -98,6 +98,20 @@ config TOUCHSCREEN_BITSY
 	  To compile this driver as a module, choose M here: the
 	  module will be called h3600_ts_input.
 
+config TOUCHSCREEN_CY8CTMG110
+	tristate "cy8ctmg110 touchscreen"
+	depends on I2C
+	depends on GPIOLIB
+
+	help
+	  Say Y here if you have a cy8ctmg110 capacitive touchscreen on
+	  an AAVA device.
+
+	  If unsure, say N.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called cy8ctmg110_ts.
+
 config TOUCHSCREEN_DA9034
 	tristate "Touchscreen support for Dialog Semiconductor DA9034"
 	depends on PMIC_DA903X
diff --git a/drivers/input/touchscreen/Makefile b/drivers/input/touchscreen/Makefile
index 779de0d9d41..bd6f30b4ff7 100644
--- a/drivers/input/touchscreen/Makefile
+++ b/drivers/input/touchscreen/Makefile
@@ -14,6 +14,8 @@ obj-$(CONFIG_TOUCHSCREEN_AD7879_SPI)	+= ad7879-spi.o
 obj-$(CONFIG_TOUCHSCREEN_ADS7846)	+= ads7846.o
 obj-$(CONFIG_TOUCHSCREEN_ATMEL_TSADCC)	+= atmel_tsadcc.o
 obj-$(CONFIG_TOUCHSCREEN_BITSY)		+= h3600_ts_input.o
+obj-$(CONFIG_TOUCHSCREEN_CY8CTMG110)	+= cy8ctmg110_ts.o
+obj-$(CONFIG_TOUCHSCREEN_DA9034)	+= da9034-ts.o
 obj-$(CONFIG_TOUCHSCREEN_DYNAPRO)	+= dynapro.o
 obj-$(CONFIG_TOUCHSCREEN_HAMPSHIRE)	+= hampshire.o
 obj-$(CONFIG_TOUCHSCREEN_GUNZE)		+= gunze.o
@@ -41,7 +43,6 @@ obj-$(CONFIG_TOUCHSCREEN_TSC2007)	+= tsc2007.o
 obj-$(CONFIG_TOUCHSCREEN_UCB1400)	+= ucb1400_ts.o
 obj-$(CONFIG_TOUCHSCREEN_WACOM_W8001)	+= wacom_w8001.o
 obj-$(CONFIG_TOUCHSCREEN_WM97XX)	+= wm97xx-ts.o
-obj-$(CONFIG_TOUCHSCREEN_DA9034)	+= da9034-ts.o
 wm97xx-ts-$(CONFIG_TOUCHSCREEN_WM9705)	+= wm9705.o
 wm97xx-ts-$(CONFIG_TOUCHSCREEN_WM9712)	+= wm9712.o
 wm97xx-ts-$(CONFIG_TOUCHSCREEN_WM9713)	+= wm9713.o
diff --git a/drivers/input/touchscreen/cy8ctmg110_ts.c b/drivers/input/touchscreen/cy8ctmg110_ts.c
new file mode 100644
index 00000000000..4eb7df0b7f8
--- /dev/null
+++ b/drivers/input/touchscreen/cy8ctmg110_ts.c
@@ -0,0 +1,363 @@
+/*
+ * Driver for cypress touch screen controller
+ *
+ * Copyright (c) 2009 Aava Mobile
+ *
+ * Some cleanups by Alan Cox <alan@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/input.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/i2c.h>
+#include <linux/gpio.h>
+#include <linux/input/cy8ctmg110_pdata.h>
+
+#define CY8CTMG110_DRIVER_NAME      "cy8ctmg110"
+
+/* Touch coordinates */
+#define CY8CTMG110_X_MIN		0
+#define CY8CTMG110_Y_MIN		0
+#define CY8CTMG110_X_MAX		759
+#define CY8CTMG110_Y_MAX		465
+
+
+/* cy8ctmg110 register definitions */
+#define CY8CTMG110_TOUCH_WAKEUP_TIME	0
+#define CY8CTMG110_TOUCH_SLEEP_TIME	2
+#define CY8CTMG110_TOUCH_X1		3
+#define CY8CTMG110_TOUCH_Y1		5
+#define CY8CTMG110_TOUCH_X2		7
+#define CY8CTMG110_TOUCH_Y2		9
+#define CY8CTMG110_FINGERS		11
+#define CY8CTMG110_GESTURE		12
+#define CY8CTMG110_REG_MAX		13
+
+
+/*
+ * The touch driver structure.
+ */
+struct cy8ctmg110 {
+	struct input_dev *input;
+	char phys[32];
+	struct i2c_client *client;
+	int reset_pin;
+	int irq_pin;
+};
+
+/*
+ * cy8ctmg110_power is the routine that is called when touch hardware
+ * will powered off or on.
+ */
+static void cy8ctmg110_power(struct cy8ctmg110 *ts, bool poweron)
+{
+	if (ts->reset_pin)
+		gpio_direction_output(ts->reset_pin, 1 - poweron);
+}
+
+static int cy8ctmg110_write_regs(struct cy8ctmg110 *tsc, unsigned char reg,
+		unsigned char len, unsigned char *value)
+{
+	struct i2c_client *client = tsc->client;
+	unsigned int ret;
+	unsigned char i2c_data[6];
+
+	BUG_ON(len > 5);
+
+	i2c_data[0] = reg;
+	memcpy(i2c_data + 1, value, len);
+
+	ret = i2c_master_send(client, i2c_data, len + 1);
+	if (ret != 1) {
+		dev_err(&client->dev, "i2c write data cmd failed\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static int cy8ctmg110_read_regs(struct cy8ctmg110 *tsc,
+		unsigned char *data, unsigned char len, unsigned char cmd)
+{
+	struct i2c_client *client = tsc->client;
+	unsigned int ret;
+	struct i2c_msg msg[2] = {
+		/* first write slave position to i2c devices */
+		{ client->addr, 0, 1, &cmd },
+		/* Second read data from position */
+		{ client->addr, I2C_M_RD, len, data }
+	};
+
+	ret = i2c_transfer(client->adapter, msg, 2);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+static int cy8ctmg110_touch_pos(struct cy8ctmg110 *tsc)
+{
+	struct input_dev *input = tsc->input;
+	unsigned char reg_p[CY8CTMG110_REG_MAX];
+	int x, y;
+
+	memset(reg_p, 0, CY8CTMG110_REG_MAX);
+
+	/* Reading coordinates */
+	if (cy8ctmg110_read_regs(tsc, reg_p, 9, CY8CTMG110_TOUCH_X1) != 0)
+		return -EIO;
+
+	y = reg_p[2] << 8 | reg_p[3];
+	x = reg_p[0] << 8 | reg_p[1];
+
+	/* Number of touch */
+	if (reg_p[8] == 0) {
+		input_report_key(input, BTN_TOUCH, 0);
+	} else  {
+		input_report_key(input, BTN_TOUCH, 1);
+		input_report_abs(input, ABS_X, x);
+		input_report_abs(input, ABS_Y, y);
+	}
+
+	input_sync(input);
+
+	return 0;
+}
+
+static int cy8ctmg110_set_sleepmode(struct cy8ctmg110 *ts, bool sleep)
+{
+	unsigned char reg_p[3];
+
+	if (sleep) {
+		reg_p[0] = 0x00;
+		reg_p[1] = 0xff;
+		reg_p[2] = 5;
+	} else {
+		reg_p[0] = 0x10;
+		reg_p[1] = 0xff;
+		reg_p[2] = 0;
+	}
+
+	return cy8ctmg110_write_regs(ts, CY8CTMG110_TOUCH_WAKEUP_TIME, 3, reg_p);
+}
+
+static irqreturn_t cy8ctmg110_irq_thread(int irq, void *dev_id)
+{
+	struct cy8ctmg110 *tsc = dev_id;
+
+	cy8ctmg110_touch_pos(tsc);
+
+	return IRQ_HANDLED;
+}
+
+static int __devinit cy8ctmg110_probe(struct i2c_client *client,
+					const struct i2c_device_id *id)
+{
+	const struct cy8ctmg110_pdata *pdata = client->dev.platform_data;
+	struct cy8ctmg110 *ts;
+	struct input_dev *input_dev;
+	int err;
+
+	/* No pdata no way forward */
+	if (pdata == NULL) {
+		dev_err(&client->dev, "no pdata\n");
+		return -ENODEV;
+	}
+
+	if (!i2c_check_functionality(client->adapter,
+					I2C_FUNC_SMBUS_READ_WORD_DATA))
+		return -EIO;
+
+	ts = kzalloc(sizeof(struct cy8ctmg110), GFP_KERNEL);
+	input_dev = input_allocate_device();
+	if (!ts || !input_dev) {
+		err = -ENOMEM;
+		goto err_free_mem;
+	}
+
+	ts->client = client;
+	ts->input = input_dev;
+
+	snprintf(ts->phys, sizeof(ts->phys),
+		 "%s/input0", dev_name(&client->dev));
+
+	input_dev->name = CY8CTMG110_DRIVER_NAME " Touchscreen";
+	input_dev->phys = ts->phys;
+	input_dev->id.bustype = BUS_I2C;
+	input_dev->dev.parent = &client->dev;
+
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
+	input_dev->keybit[BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH);
+
+	input_set_abs_params(input_dev, ABS_X,
+			CY8CTMG110_X_MIN, CY8CTMG110_X_MAX, 0, 0);
+	input_set_abs_params(input_dev, ABS_Y,
+			CY8CTMG110_Y_MIN, CY8CTMG110_Y_MAX, 0, 0);
+
+	if (ts->reset_pin) {
+		err = gpio_request(ts->reset_pin, NULL);
+		if (err) {
+			dev_err(&client->dev,
+				"Unable to request GPIO pin %d.\n",
+				ts->reset_pin);
+			goto err_free_mem;
+		}
+	}
+
+	cy8ctmg110_power(ts, true);
+	cy8ctmg110_set_sleepmode(ts, false);
+
+	err = gpio_request(ts->irq_pin, "touch_irq_key");
+	if (err < 0) {
+		dev_err(&client->dev,
+			"Failed to request GPIO %d, error %d\n",
+			ts->irq_pin, err);
+		goto err_shutoff_device;
+	}
+
+	err = gpio_direction_input(ts->irq_pin);
+	if (err < 0) {
+		dev_err(&client->dev,
+			"Failed to configure input direction for GPIO %d, error %d\n",
+			ts->irq_pin, err);
+		goto err_free_irq_gpio;
+	}
+
+	client->irq = gpio_to_irq(ts->irq_pin);
+	if (client->irq < 0) {
+		err = client->irq;
+		dev_err(&client->dev,
+			"Unable to get irq number for GPIO %d, error %d\n",
+			ts->irq_pin, err);
+		goto err_free_irq_gpio;
+	}
+
+	err = request_threaded_irq(client->irq, NULL, cy8ctmg110_irq_thread,
+				   IRQF_TRIGGER_RISING, "touch_reset_key", ts);
+	if (err < 0) {
+		dev_err(&client->dev,
+			"irq %d busy? error %d\n", client->irq, err);
+		goto err_free_irq_gpio;
+	}
+
+	err = input_register_device(input_dev);
+	if (err)
+		goto err_free_irq;
+
+	i2c_set_clientdata(client, ts);
+	device_init_wakeup(&client->dev, 1);
+	return 0;
+
+err_free_irq:
+	free_irq(client->irq, ts);
+err_free_irq_gpio:
+	gpio_free(ts->irq_pin);
+err_shutoff_device:
+	cy8ctmg110_set_sleepmode(ts, true);
+	cy8ctmg110_power(ts, false);
+	if (ts->reset_pin)
+		gpio_free(ts->reset_pin);
+err_free_mem:
+	input_free_device(input_dev);
+	kfree(ts);
+	return err;
+}
+
+#ifdef CONFIG_PM
+static int cy8ctmg110_suspend(struct i2c_client *client, pm_message_t mesg)
+{
+	struct cy8ctmg110 *ts = i2c_get_clientdata(client);
+
+	if (device_may_wakeup(&client->dev))
+		enable_irq_wake(client->irq);
+	else {
+		cy8ctmg110_set_sleepmode(ts, true);
+		cy8ctmg110_power(ts, false);
+	}
+	return 0;
+}
+
+static int cy8ctmg110_resume(struct i2c_client *client)
+{
+	struct cy8ctmg110 *ts = i2c_get_clientdata(client);
+
+	if (device_may_wakeup(&client->dev))
+		disable_irq_wake(client->irq);
+	else {
+		cy8ctmg110_power(ts, true);
+		cy8ctmg110_set_sleepmode(ts, false);
+	}
+	return 0;
+}
+#endif
+
+static int __devexit cy8ctmg110_remove(struct i2c_client *client)
+{
+	struct cy8ctmg110 *ts = i2c_get_clientdata(client);
+
+	cy8ctmg110_set_sleepmode(ts, true);
+	cy8ctmg110_power(ts, false);
+
+	free_irq(client->irq, ts);
+	input_unregister_device(ts->input);
+	gpio_free(ts->irq_pin);
+	if (ts->reset_pin)
+		gpio_free(ts->reset_pin);
+	kfree(ts);
+
+	return 0;
+}
+
+static struct i2c_device_id cy8ctmg110_idtable[] = {
+	{ CY8CTMG110_DRIVER_NAME, 1 },
+	{ }
+};
+
+MODULE_DEVICE_TABLE(i2c, cy8ctmg110_idtable);
+
+static struct i2c_driver cy8ctmg110_driver = {
+	.driver		= {
+		.owner	= THIS_MODULE,
+		.name	= CY8CTMG110_DRIVER_NAME,
+	},
+	.id_table	= cy8ctmg110_idtable,
+	.probe		= cy8ctmg110_probe,
+	.remove		= __devexit_p(cy8ctmg110_remove),
+#ifdef CONFIG_PM
+	.suspend	= cy8ctmg110_suspend,
+	.resume		= cy8ctmg110_resume,
+#endif
+};
+
+static int __init cy8ctmg110_init(void)
+{
+	return i2c_add_driver(&cy8ctmg110_driver);
+}
+
+static void __exit cy8ctmg110_exit(void)
+{
+	i2c_del_driver(&cy8ctmg110_driver);
+}
+
+module_init(cy8ctmg110_init);
+module_exit(cy8ctmg110_exit);
+
+MODULE_AUTHOR("Samuli Konttila <samuli.konttila@aavamobile.com>");
+MODULE_DESCRIPTION("cy8ctmg110 TouchScreen Driver");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/input/cy8ctmg110_pdata.h b/include/linux/input/cy8ctmg110_pdata.h
new file mode 100644
index 00000000000..09522cb5991
--- /dev/null
+++ b/include/linux/input/cy8ctmg110_pdata.h
@@ -0,0 +1,10 @@
+#ifndef _LINUX_CY8CTMG110_PDATA_H
+#define _LINUX_CY8CTMG110_PDATA_H
+
+struct cy8ctmg110_pdata
+{
+	int reset_pin;		/* Reset pin is wired to this GPIO (optional) */
+	int irq_pin;		/* IRQ pin is wired to this GPIO */
+};
+
+#endif
-- 
cgit v1.2.3-70-g09d2


From e6cc11707661770ca2bd4db4b0256d28f48e7541 Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Tue, 27 Jul 2010 07:14:28 +0200
Subject: padata: Rename padata_alloc functions

We rename padata_alloc to padata_alloc_possible because this
function allocates a padata_instance and uses the cpu_possible
mask for parallel and serial workers. Also we rename __padata_alloc
to padata_alloc to avoid to export underlined functions. Underlined
functions are considered to be private to padata. Users are updated
accordingly.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/pcrypt.c        |  2 +-
 include/linux/padata.h |  9 +++++----
 kernel/padata.c        | 24 ++++++++++++------------
 3 files changed, 18 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/crypto/pcrypt.c b/crypto/pcrypt.c
index 794c172b99f..55460839624 100644
--- a/crypto/pcrypt.c
+++ b/crypto/pcrypt.c
@@ -457,7 +457,7 @@ static int __pcrypt_init_instance(struct pcrypt_instance *pcrypt,
 	if (!pcrypt->wq)
 		goto err;
 
-	pcrypt->pinst = padata_alloc(pcrypt->wq);
+	pcrypt->pinst = padata_alloc_possible(pcrypt->wq);
 	if (!pcrypt->pinst)
 		goto err_destroy_workqueue;
 
diff --git a/include/linux/padata.h b/include/linux/padata.h
index 293ad46ffce..71dfc9d1f85 100644
--- a/include/linux/padata.h
+++ b/include/linux/padata.h
@@ -165,10 +165,11 @@ struct padata_instance {
 #define	PADATA_INVALID	4
 };
 
-extern struct padata_instance *padata_alloc(struct workqueue_struct *wq);
-extern struct padata_instance *__padata_alloc(struct workqueue_struct *wq,
-					      const struct cpumask *pcpumask,
-					      const struct cpumask *cbcpumask);
+extern struct padata_instance *padata_alloc_possible(
+					struct workqueue_struct *wq);
+extern struct padata_instance *padata_alloc(struct workqueue_struct *wq,
+					    const struct cpumask *pcpumask,
+					    const struct cpumask *cbcpumask);
 extern void padata_free(struct padata_instance *pinst);
 extern int padata_do_parallel(struct padata_instance *pinst,
 			      struct padata_priv *padata, int cb_cpu);
diff --git a/kernel/padata.c b/kernel/padata.c
index 7f895e2b4ef..12860bce6b7 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -1060,29 +1060,29 @@ static struct kobj_type padata_attr_type = {
 };
 
 /**
- * padata_alloc - Allocate and initialize padata instance.
- *                Use default cpumask(cpu_possible_mask)
- *                for serial and parallel workes.
+ * padata_alloc_possible - Allocate and initialize padata instance.
+ *                         Use the cpu_possible_mask for serial and
+ *                         parallel workers.
  *
  * @wq: workqueue to use for the allocated padata instance
  */
-struct padata_instance *padata_alloc(struct workqueue_struct *wq)
+struct padata_instance *padata_alloc_possible(struct workqueue_struct *wq)
 {
-	return __padata_alloc(wq, cpu_possible_mask, cpu_possible_mask);
+	return padata_alloc(wq, cpu_possible_mask, cpu_possible_mask);
 }
-EXPORT_SYMBOL(padata_alloc);
+EXPORT_SYMBOL(padata_alloc_possible);
 
 /**
- * __padata_alloc - allocate and initialize a padata instance
- *                  and specify cpumasks for serial and parallel workers.
+ * padata_alloc - allocate and initialize a padata instance and specify
+ *                cpumasks for serial and parallel workers.
  *
  * @wq: workqueue to use for the allocated padata instance
  * @pcpumask: cpumask that will be used for padata parallelization
  * @cbcpumask: cpumask that will be used for padata serialization
  */
-struct padata_instance *__padata_alloc(struct workqueue_struct *wq,
-				       const struct cpumask *pcpumask,
-				       const struct cpumask *cbcpumask)
+struct padata_instance *padata_alloc(struct workqueue_struct *wq,
+				     const struct cpumask *pcpumask,
+				     const struct cpumask *cbcpumask)
 {
 	struct padata_instance *pinst;
 	struct parallel_data *pd = NULL;
@@ -1138,7 +1138,7 @@ err_free_inst:
 err:
 	return NULL;
 }
-EXPORT_SYMBOL(__padata_alloc);
+EXPORT_SYMBOL(padata_alloc);
 
 /**
  * padata_free - free a padata instance
-- 
cgit v1.2.3-70-g09d2


From 65ff577e6b6e482ee9de3569e058edebdc02f069 Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Tue, 27 Jul 2010 07:15:06 +0200
Subject: padata: Rearrange set_cpumask functions

padata_set_cpumask needs to be protected by a lock. We make
__padata_set_cpumasks unlocked and static. So this function
can be used by the exported and locked padata_set_cpumask and
padata_set_cpumasks functions.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/padata.h |   6 +--
 kernel/padata.c        | 117 ++++++++++++++++++++++++++++---------------------
 2 files changed, 70 insertions(+), 53 deletions(-)

(limited to 'include')

diff --git a/include/linux/padata.h b/include/linux/padata.h
index 71dfc9d1f85..bb0fc5dd0bb 100644
--- a/include/linux/padata.h
+++ b/include/linux/padata.h
@@ -178,9 +178,9 @@ extern int padata_get_cpumask(struct padata_instance *pinst,
 			      int cpumask_type, struct cpumask *out_mask);
 extern int padata_set_cpumask(struct padata_instance *pinst, int cpumask_type,
 			      cpumask_var_t cpumask);
-extern int __padata_set_cpumasks(struct padata_instance *pinst,
-				 cpumask_var_t pcpumask,
-				 cpumask_var_t cbcpumask);
+extern int padata_set_cpumasks(struct padata_instance *pinst,
+			       cpumask_var_t pcpumask,
+			       cpumask_var_t cbcpumask);
 extern int padata_add_cpu(struct padata_instance *pinst, int cpu, int mask);
 extern int padata_remove_cpu(struct padata_instance *pinst, int cpu, int mask);
 extern int padata_start(struct padata_instance *pinst);
diff --git a/kernel/padata.c b/kernel/padata.c
index 12860bce6b7..4987203770b 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -623,6 +623,66 @@ int padata_get_cpumask(struct padata_instance *pinst,
 }
 EXPORT_SYMBOL(padata_get_cpumask);
 
+static int __padata_set_cpumasks(struct padata_instance *pinst,
+				 cpumask_var_t pcpumask,
+				 cpumask_var_t cbcpumask)
+{
+	int valid;
+	struct parallel_data *pd;
+
+	valid = padata_validate_cpumask(pinst, pcpumask);
+	if (!valid) {
+		__padata_stop(pinst);
+		goto out_replace;
+	}
+
+	valid = padata_validate_cpumask(pinst, cbcpumask);
+	if (!valid)
+		__padata_stop(pinst);
+
+out_replace:
+	pd = padata_alloc_pd(pinst, pcpumask, cbcpumask);
+	if (!pd)
+		return -ENOMEM;
+
+	cpumask_copy(pinst->cpumask.pcpu, pcpumask);
+	cpumask_copy(pinst->cpumask.cbcpu, cbcpumask);
+
+	padata_replace(pinst, pd);
+
+	if (valid)
+		__padata_start(pinst);
+
+	return 0;
+}
+
+/**
+ * padata_set_cpumasks - Set both parallel and serial cpumasks. The first
+ *                       one is used by parallel workers and the second one
+ *                       by the wokers doing serialization.
+ *
+ * @pinst: padata instance
+ * @pcpumask: the cpumask to use for parallel workers
+ * @cbcpumask: the cpumsak to use for serial workers
+ */
+int padata_set_cpumasks(struct padata_instance *pinst, cpumask_var_t pcpumask,
+			cpumask_var_t cbcpumask)
+{
+	int err;
+
+	mutex_lock(&pinst->lock);
+	get_online_cpus();
+
+	err = __padata_set_cpumasks(pinst, pcpumask, cbcpumask);
+
+	put_online_cpus();
+	mutex_unlock(&pinst->lock);
+
+	return err;
+
+}
+EXPORT_SYMBOL(padata_set_cpumasks);
+
 /**
  * padata_set_cpumask: Sets specified by @cpumask_type cpumask to the value
  *                     equivalent to @cpumask.
@@ -636,6 +696,10 @@ int padata_set_cpumask(struct padata_instance *pinst, int cpumask_type,
 		       cpumask_var_t cpumask)
 {
 	struct cpumask *serial_mask, *parallel_mask;
+	int err = -EINVAL;
+
+	mutex_lock(&pinst->lock);
+	get_online_cpus();
 
 	switch (cpumask_type) {
 	case PADATA_CPU_PARALLEL:
@@ -647,65 +711,18 @@ int padata_set_cpumask(struct padata_instance *pinst, int cpumask_type,
 		serial_mask = cpumask;
 		break;
 	default:
-		return -EINVAL;
+		 goto out;
 	}
 
-	return __padata_set_cpumasks(pinst, parallel_mask, serial_mask);
-}
-EXPORT_SYMBOL(padata_set_cpumask);
-
-/**
- * __padata_set_cpumasks - Set both parallel and serial cpumasks. The first
- *                         one is used by parallel workers and the second one
- *                         by the wokers doing serialization.
- *
- * @pinst: padata instance
- * @pcpumask: the cpumask to use for parallel workers
- * @cbcpumask: the cpumsak to use for serial workers
- */
-int __padata_set_cpumasks(struct padata_instance *pinst,
-			  cpumask_var_t pcpumask, cpumask_var_t cbcpumask)
-{
-	int valid;
-	int err = 0;
-	struct parallel_data *pd = NULL;
-
-	mutex_lock(&pinst->lock);
-	get_online_cpus();
-
-	valid = padata_validate_cpumask(pinst, pcpumask);
-	if (!valid) {
-		__padata_stop(pinst);
-		goto out_replace;
-	}
-
-	valid = padata_validate_cpumask(pinst, cbcpumask);
-	if (!valid)
-		__padata_stop(pinst);
-
-out_replace:
-	pd = padata_alloc_pd(pinst, pcpumask, cbcpumask);
-	if (!pd) {
-		err = -ENOMEM;
-		goto out;
-	}
-
-	cpumask_copy(pinst->cpumask.pcpu, pcpumask);
-	cpumask_copy(pinst->cpumask.cbcpu, cbcpumask);
-
-	padata_replace(pinst, pd);
-
-	if (valid)
-		__padata_start(pinst);
+	err =  __padata_set_cpumasks(pinst, parallel_mask, serial_mask);
 
 out:
 	put_online_cpus();
 	mutex_unlock(&pinst->lock);
 
 	return err;
-
 }
-EXPORT_SYMBOL(__padata_set_cpumasks);
+EXPORT_SYMBOL(padata_set_cpumask);
 
 static int __padata_add_cpu(struct padata_instance *pinst, int cpu)
 {
-- 
cgit v1.2.3-70-g09d2


From c635696c7c0fbc720698dbec34bb83e53df6a967 Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Tue, 27 Jul 2010 07:15:50 +0200
Subject: padata: Pass the padata cpumasks to the cpumask_change_notifier chain

We pass a pointer to the new padata cpumasks to the cpumask_change_notifier
chain. So users can access the cpumasks without the need of an extra
padata_get_cpumask function.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/padata.h | 40 +++++++++++++++++++++-------------------
 kernel/padata.c        |  3 ++-
 2 files changed, 23 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/include/linux/padata.h b/include/linux/padata.h
index bb0fc5dd0bb..43db792f44d 100644
--- a/include/linux/padata.h
+++ b/include/linux/padata.h
@@ -98,6 +98,16 @@ struct padata_parallel_queue {
        int                   cpu_index;
 };
 
+/**
+ * struct padata_cpumask - The cpumasks for the parallel/serial workers
+ *
+ * @pcpu: cpumask for the parallel workers.
+ * @cbcpu: cpumask for the serial (callback) workers.
+ */
+struct padata_cpumask {
+	cpumask_var_t	pcpu;
+	cpumask_var_t	cbcpu;
+};
 
 /**
  * struct parallel_data - Internal control structure, covers everything
@@ -110,8 +120,7 @@ struct padata_parallel_queue {
  * @reorder_objects: Number of objects waiting in the reorder queues.
  * @refcnt: Number of objects holding a reference on this parallel_data.
  * @max_seq_nr:  Maximal used sequence number.
- * @cpumask: Contains two cpumasks: pcpu and cbcpu for
- *           parallel and serial workers respectively.
+ * @cpumask: The cpumasks in use for parallel and serial workers.
  * @lock: Reorder lock.
  * @processed: Number of already processed objects.
  * @timer: Reorder timer.
@@ -120,17 +129,14 @@ struct parallel_data {
 	struct padata_instance		*pinst;
 	struct padata_parallel_queue	*pqueue;
 	struct padata_serial_queue	*squeue;
-	atomic_t			 seq_nr;
-	atomic_t			 reorder_objects;
-	atomic_t			 refcnt;
-	unsigned int			 max_seq_nr;
-	struct {
-		cpumask_var_t		 pcpu;
-		cpumask_var_t		 cbcpu;
-	} cpumask;
-	spinlock_t                       lock ____cacheline_aligned;
-	unsigned int			 processed;
-	struct timer_list		 timer;
+	atomic_t			seq_nr;
+	atomic_t			reorder_objects;
+	atomic_t			refcnt;
+	unsigned int			max_seq_nr;
+	struct padata_cpumask		cpumask;
+	spinlock_t                      lock ____cacheline_aligned;
+	unsigned int			processed;
+	struct timer_list		timer;
 };
 
 /**
@@ -139,8 +145,7 @@ struct parallel_data {
  * @cpu_notifier: cpu hotplug notifier.
  * @wq: The workqueue in use.
  * @pd: The internal control structure.
- * @cpumask: User supplied cpumask. Contains two cpumasks: pcpu and
- *           cbcpu for parallel and serial works respectivly.
+ * @cpumask: User supplied cpumasks for parallel and serial works.
  * @cpumask_change_notifier: Notifiers chain for user-defined notify
  *            callbacks that will be called when either @pcpu or @cbcpu
  *            or both cpumasks change.
@@ -152,10 +157,7 @@ struct padata_instance {
 	struct notifier_block		 cpu_notifier;
 	struct workqueue_struct		*wq;
 	struct parallel_data		*pd;
-	struct {
-		cpumask_var_t		 pcpu;
-		cpumask_var_t		 cbcpu;
-	} cpumask;
+	struct padata_cpumask		cpumask;
 	struct blocking_notifier_head	 cpumask_change_notifier;
 	struct kobject                   kobj;
 	struct mutex			 lock;
diff --git a/kernel/padata.c b/kernel/padata.c
index 4987203770b..1c8c1d1d301 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -538,7 +538,8 @@ static void padata_replace(struct padata_instance *pinst,
 
 	if (notification_mask)
 		blocking_notifier_call_chain(&pinst->cpumask_change_notifier,
-					     notification_mask, pinst);
+					     notification_mask,
+					     &pd_new->cpumask);
 
 	pinst->flags &= ~PADATA_RESET;
 }
-- 
cgit v1.2.3-70-g09d2


From 0500e9b3f11ce84fc6ee48a3e29909145e58ba48 Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Tue, 27 Jul 2010 07:19:27 +0200
Subject: padata: Remove padata_get_cpumask

A function that copies the padata cpumasks to a user buffer
is a bit error prone. The cpumask can change any time so we
can't be sure to have the right cpumask when using this function.
A user who is interested in the padata cpumasks should register
to the padata cpumask notifier chain instead. Users of
padata_get_cpumask are already updated, so we can remove it.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/padata.h |  2 --
 kernel/padata.c        | 35 -----------------------------------
 2 files changed, 37 deletions(-)

(limited to 'include')

diff --git a/include/linux/padata.h b/include/linux/padata.h
index 43db792f44d..bdcd1e9eace 100644
--- a/include/linux/padata.h
+++ b/include/linux/padata.h
@@ -176,8 +176,6 @@ extern void padata_free(struct padata_instance *pinst);
 extern int padata_do_parallel(struct padata_instance *pinst,
 			      struct padata_priv *padata, int cb_cpu);
 extern void padata_do_serial(struct padata_priv *padata);
-extern int padata_get_cpumask(struct padata_instance *pinst,
-			      int cpumask_type, struct cpumask *out_mask);
 extern int padata_set_cpumask(struct padata_instance *pinst, int cpumask_type,
 			      cpumask_var_t cpumask);
 extern int padata_set_cpumasks(struct padata_instance *pinst,
diff --git a/kernel/padata.c b/kernel/padata.c
index 1c8c1d1d301..fd4679266ed 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -589,41 +589,6 @@ static bool padata_validate_cpumask(struct padata_instance *pinst,
 	return true;
 }
 
-/**
- * padata_get_cpumask: Fetch serial or parallel cpumask from the
- *                     given padata instance and copy it to @out_mask
- *
- * @pinst: A pointer to padata instance
- * @cpumask_type: Specifies which cpumask will be copied.
- *                Possible values are PADATA_CPU_SERIAL *or* PADATA_CPU_PARALLEL
- *                corresponding to serial and parallel cpumask respectively.
- * @out_mask: A pointer to cpumask structure where selected
- *            cpumask will be copied.
- */
-int padata_get_cpumask(struct padata_instance *pinst,
-		       int cpumask_type, struct cpumask *out_mask)
-{
-	struct parallel_data *pd;
-	int ret = 0;
-
-	rcu_read_lock_bh();
-	pd = rcu_dereference(pinst->pd);
-	switch (cpumask_type) {
-	case PADATA_CPU_SERIAL:
-		cpumask_copy(out_mask, pd->cpumask.cbcpu);
-		break;
-	case PADATA_CPU_PARALLEL:
-		cpumask_copy(out_mask, pd->cpumask.pcpu);
-		break;
-	default:
-		ret = -EINVAL;
-	}
-
-	rcu_read_unlock_bh();
-	return ret;
-}
-EXPORT_SYMBOL(padata_get_cpumask);
-
 static int __padata_set_cpumasks(struct padata_instance *pinst,
 				 cpumask_var_t pcpumask,
 				 cpumask_var_t cbcpumask)
-- 
cgit v1.2.3-70-g09d2


From 7cfe249475fdd82ad3c2767a9b906cc775dab868 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Thu, 15 Jul 2010 10:47:14 +0100
Subject: ARM: AMBA: Add pclk support to AMBA bus infrastructure

Some platforms gate the pclk (APB - the bus - clock) to the peripherals
for power saving, along with the functional clock.  When devices are
accessed without pclk enabled, the kernel will oops.

This gives them two options:

1. Leave all clocks on all the time.
2. Attempt to gate pclk along with the functional clock.

(With some hardware, pclk and the functional clock are gated by a single
bit in a register.)

(1) has the disadvantage that it causes increased power usage, which is
bad news for battery operated devices.  (2) can lead to kernel oops if
registers are accessed without the functional clock being enabled.

So, introduce the apb_pclk signal in such a way existing drivers don't
need to be updated.  Essentially, this means we guarantee that:

1. pclk will be enabled whenever the driver is bound to a device -
   from probe() to remove() time.
2. pclk will also be enabled when reading the primecell IDs from the device.

In order to allow drivers to be incrementally updated to achieve greater
power savings, we provide two additional calls to allow drivers to
manage the pclk - amba_pclk_enable()/amba_pclk_disable().

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 drivers/amba/bus.c       | 88 +++++++++++++++++++++++++++++++++++++-----------
 include/linux/amba/bus.h | 11 ++++++
 2 files changed, 80 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/drivers/amba/bus.c b/drivers/amba/bus.c
index f60b2b6a093..d31590e7011 100644
--- a/drivers/amba/bus.c
+++ b/drivers/amba/bus.c
@@ -122,6 +122,31 @@ static int __init amba_init(void)
 
 postcore_initcall(amba_init);
 
+static int amba_get_enable_pclk(struct amba_device *pcdev)
+{
+	struct clk *pclk = clk_get(&pcdev->dev, "apb_pclk");
+	int ret;
+
+	pcdev->pclk = pclk;
+
+	if (IS_ERR(pclk))
+		return PTR_ERR(pclk);
+
+	ret = clk_enable(pclk);
+	if (ret)
+		clk_put(pclk);
+
+	return ret;
+}
+
+static void amba_put_disable_pclk(struct amba_device *pcdev)
+{
+	struct clk *pclk = pcdev->pclk;
+
+	clk_disable(pclk);
+	clk_put(pclk);
+}
+
 /*
  * These are the device model conversion veneers; they convert the
  * device model structures to our more specific structures.
@@ -130,17 +155,33 @@ static int amba_probe(struct device *dev)
 {
 	struct amba_device *pcdev = to_amba_device(dev);
 	struct amba_driver *pcdrv = to_amba_driver(dev->driver);
-	struct amba_id *id;
+	struct amba_id *id = amba_lookup(pcdrv->id_table, pcdev);
+	int ret;
 
-	id = amba_lookup(pcdrv->id_table, pcdev);
+	do {
+		ret = amba_get_enable_pclk(pcdev);
+		if (ret)
+			break;
+
+		ret = pcdrv->probe(pcdev, id);
+		if (ret == 0)
+			break;
 
-	return pcdrv->probe(pcdev, id);
+		amba_put_disable_pclk(pcdev);
+	} while (0);
+
+	return ret;
 }
 
 static int amba_remove(struct device *dev)
 {
+	struct amba_device *pcdev = to_amba_device(dev);
 	struct amba_driver *drv = to_amba_driver(dev->driver);
-	return drv->remove(to_amba_device(dev));
+	int ret = drv->remove(pcdev);
+
+	amba_put_disable_pclk(pcdev);
+
+	return ret;
 }
 
 static void amba_shutdown(struct device *dev)
@@ -203,7 +244,6 @@ static void amba_device_release(struct device *dev)
  */
 int amba_device_register(struct amba_device *dev, struct resource *parent)
 {
-	u32 pid, cid;
 	u32 size;
 	void __iomem *tmp;
 	int i, ret;
@@ -241,25 +281,35 @@ int amba_device_register(struct amba_device *dev, struct resource *parent)
 		goto err_release;
 	}
 
-	/*
-	 * Read pid and cid based on size of resource
-	 * they are located at end of region
-	 */
-	for (pid = 0, i = 0; i < 4; i++)
-		pid |= (readl(tmp + size - 0x20 + 4 * i) & 255) << (i * 8);
-	for (cid = 0, i = 0; i < 4; i++)
-		cid |= (readl(tmp + size - 0x10 + 4 * i) & 255) << (i * 8);
+	ret = amba_get_enable_pclk(dev);
+	if (ret == 0) {
+		u32 pid, cid;
 
-	iounmap(tmp);
+		/*
+		 * Read pid and cid based on size of resource
+		 * they are located at end of region
+		 */
+		for (pid = 0, i = 0; i < 4; i++)
+			pid |= (readl(tmp + size - 0x20 + 4 * i) & 255) <<
+				(i * 8);
+		for (cid = 0, i = 0; i < 4; i++)
+			cid |= (readl(tmp + size - 0x10 + 4 * i) & 255) <<
+				(i * 8);
 
-	if (cid == 0xb105f00d)
-		dev->periphid = pid;
+		amba_put_disable_pclk(dev);
 
-	if (!dev->periphid) {
-		ret = -ENODEV;
-		goto err_release;
+		if (cid == 0xb105f00d)
+			dev->periphid = pid;
+
+		if (!dev->periphid)
+			ret = -ENODEV;
 	}
 
+	iounmap(tmp);
+
+	if (ret)
+		goto err_release;
+
 	ret = device_add(&dev->dev);
 	if (ret)
 		goto err_release;
diff --git a/include/linux/amba/bus.h b/include/linux/amba/bus.h
index 8b103860783..b0c17401243 100644
--- a/include/linux/amba/bus.h
+++ b/include/linux/amba/bus.h
@@ -14,14 +14,19 @@
 #ifndef ASMARM_AMBA_H
 #define ASMARM_AMBA_H
 
+#include <linux/clk.h>
 #include <linux/device.h>
+#include <linux/err.h>
 #include <linux/resource.h>
 
 #define AMBA_NR_IRQS	2
 
+struct clk;
+
 struct amba_device {
 	struct device		dev;
 	struct resource		res;
+	struct clk		*pclk;
 	u64			dma_mask;
 	unsigned int		periphid;
 	unsigned int		irq[AMBA_NR_IRQS];
@@ -59,6 +64,12 @@ struct amba_device *amba_find_device(const char *, struct device *, unsigned int
 int amba_request_regions(struct amba_device *, const char *);
 void amba_release_regions(struct amba_device *);
 
+#define amba_pclk_enable(d)	\
+	(IS_ERR((d)->pclk) ? 0 : clk_enable((d)->pclk))
+
+#define amba_pclk_disable(d)	\
+	do { if (!IS_ERR((d)->pclk)) clk_disable((d)->pclk); } while (0)
+
 #define amba_config(d)	(((d)->periphid >> 24) & 0xff)
 #define amba_rev(d)	(((d)->periphid >> 20) & 0x0f)
 #define amba_manf(d)	(((d)->periphid >> 12) & 0xff)
-- 
cgit v1.2.3-70-g09d2


From bf998156d24bcb127318ad5bf531ac3bdfcd6449 Mon Sep 17 00:00:00 2001
From: Huang Ying <ying.huang@intel.com>
Date: Mon, 31 May 2010 14:28:19 +0800
Subject: KVM: Avoid killing userspace through guest SRAO MCE on unmapped pages

In common cases, guest SRAO MCE will cause corresponding poisoned page
be un-mapped and SIGBUS be sent to QEMU-KVM, then QEMU-KVM will relay
the MCE to guest OS.

But it is reported that if the poisoned page is accessed in guest
after unmapping and before MCE is relayed to guest OS, userspace will
be killed.

The reason is as follows. Because poisoned page has been un-mapped,
guest access will cause guest exit and kvm_mmu_page_fault will be
called. kvm_mmu_page_fault can not get the poisoned page for fault
address, so kernel and user space MMIO processing is tried in turn. In
user MMIO processing, poisoned page is accessed again, then userspace
is killed by force_sig_info.

To fix the bug, kvm_mmu_page_fault send HWPOISON signal to QEMU-KVM
and do not try kernel and user space MMIO processing for poisoned
page.

[xiao: fix warning introduced by avi]

Reported-by: Max Asbock <masbock@linux.vnet.ibm.com>
Signed-off-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/mmu.c         | 34 ++++++++++++++++++++++++++--------
 arch/x86/kvm/paging_tmpl.h |  7 ++-----
 include/linux/kvm_host.h   |  1 +
 include/linux/mm.h         |  8 ++++++++
 mm/memory-failure.c        | 30 ++++++++++++++++++++++++++++++
 virt/kvm/kvm_main.c        | 30 ++++++++++++++++++++++++++++--
 6 files changed, 95 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index b1ed0a1a591..b666d8d106a 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -32,6 +32,7 @@
 #include <linux/compiler.h>
 #include <linux/srcu.h>
 #include <linux/slab.h>
+#include <linux/uaccess.h>
 
 #include <asm/page.h>
 #include <asm/cmpxchg.h>
@@ -1960,6 +1961,27 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
 	return pt_write;
 }
 
+static void kvm_send_hwpoison_signal(struct kvm *kvm, gfn_t gfn)
+{
+	char buf[1];
+	void __user *hva;
+	int r;
+
+	/* Touch the page, so send SIGBUS */
+	hva = (void __user *)gfn_to_hva(kvm, gfn);
+	r = copy_from_user(buf, hva, 1);
+}
+
+static int kvm_handle_bad_page(struct kvm *kvm, gfn_t gfn, pfn_t pfn)
+{
+	kvm_release_pfn_clean(pfn);
+	if (is_hwpoison_pfn(pfn)) {
+		kvm_send_hwpoison_signal(kvm, gfn);
+		return 0;
+	}
+	return 1;
+}
+
 static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
 {
 	int r;
@@ -1983,10 +2005,8 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
 	pfn = gfn_to_pfn(vcpu->kvm, gfn);
 
 	/* mmio */
-	if (is_error_pfn(pfn)) {
-		kvm_release_pfn_clean(pfn);
-		return 1;
-	}
+	if (is_error_pfn(pfn))
+		return kvm_handle_bad_page(vcpu->kvm, gfn, pfn);
 
 	spin_lock(&vcpu->kvm->mmu_lock);
 	if (mmu_notifier_retry(vcpu, mmu_seq))
@@ -2198,10 +2218,8 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa,
 	mmu_seq = vcpu->kvm->mmu_notifier_seq;
 	smp_rmb();
 	pfn = gfn_to_pfn(vcpu->kvm, gfn);
-	if (is_error_pfn(pfn)) {
-		kvm_release_pfn_clean(pfn);
-		return 1;
-	}
+	if (is_error_pfn(pfn))
+		return kvm_handle_bad_page(vcpu->kvm, gfn, pfn);
 	spin_lock(&vcpu->kvm->mmu_lock);
 	if (mmu_notifier_retry(vcpu, mmu_seq))
 		goto out_unlock;
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 2331bdc2b54..c7f27779c99 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -431,11 +431,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
 	pfn = gfn_to_pfn(vcpu->kvm, walker.gfn);
 
 	/* mmio */
-	if (is_error_pfn(pfn)) {
-		pgprintk("gfn %lx is mmio\n", walker.gfn);
-		kvm_release_pfn_clean(pfn);
-		return 1;
-	}
+	if (is_error_pfn(pfn))
+		return kvm_handle_bad_page(vcpu->kvm, walker.gfn, pfn);
 
 	spin_lock(&vcpu->kvm->mmu_lock);
 	if (mmu_notifier_retry(vcpu, mmu_seq))
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 7cb116afa1c..a0e019769f5 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -266,6 +266,7 @@ extern pfn_t bad_pfn;
 
 int is_error_page(struct page *page);
 int is_error_pfn(pfn_t pfn);
+int is_hwpoison_pfn(pfn_t pfn);
 int kvm_is_error_hva(unsigned long addr);
 int kvm_set_memory_region(struct kvm *kvm,
 			  struct kvm_userspace_memory_region *mem,
diff --git a/include/linux/mm.h b/include/linux/mm.h
index a2b48041b91..7a9ab7db197 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1465,6 +1465,14 @@ extern int sysctl_memory_failure_recovery;
 extern void shake_page(struct page *p, int access);
 extern atomic_long_t mce_bad_pages;
 extern int soft_offline_page(struct page *page, int flags);
+#ifdef CONFIG_MEMORY_FAILURE
+int is_hwpoison_address(unsigned long addr);
+#else
+static inline int is_hwpoison_address(unsigned long addr)
+{
+	return 0;
+}
+#endif
 
 extern void dump_page(struct page *page);
 
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 620b0b46159..378b0f61fd3 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -45,6 +45,7 @@
 #include <linux/page-isolation.h>
 #include <linux/suspend.h>
 #include <linux/slab.h>
+#include <linux/swapops.h>
 #include "internal.h"
 
 int sysctl_memory_failure_early_kill __read_mostly = 0;
@@ -1296,3 +1297,32 @@ done:
 	/* keep elevated page count for bad page */
 	return ret;
 }
+
+int is_hwpoison_address(unsigned long addr)
+{
+	pgd_t *pgdp;
+	pud_t pud, *pudp;
+	pmd_t pmd, *pmdp;
+	pte_t pte, *ptep;
+	swp_entry_t entry;
+
+	pgdp = pgd_offset(current->mm, addr);
+	if (!pgd_present(*pgdp))
+		return 0;
+	pudp = pud_offset(pgdp, addr);
+	pud = *pudp;
+	if (!pud_present(pud) || pud_large(pud))
+		return 0;
+	pmdp = pmd_offset(pudp, addr);
+	pmd = *pmdp;
+	if (!pmd_present(pmd) || pmd_large(pmd))
+		return 0;
+	ptep = pte_offset_map(pmdp, addr);
+	pte = *ptep;
+	pte_unmap(ptep);
+	if (!is_swap_pte(pte))
+		return 0;
+	entry = pte_to_swp_entry(pte);
+	return is_hwpoison_entry(entry);
+}
+EXPORT_SYMBOL_GPL(is_hwpoison_address);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index f032806a212..187aa8d984a 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -92,6 +92,9 @@ static bool kvm_rebooting;
 
 static bool largepages_enabled = true;
 
+struct page *hwpoison_page;
+pfn_t hwpoison_pfn;
+
 inline int kvm_is_mmio_pfn(pfn_t pfn)
 {
 	if (pfn_valid(pfn)) {
@@ -810,16 +813,22 @@ EXPORT_SYMBOL_GPL(kvm_disable_largepages);
 
 int is_error_page(struct page *page)
 {
-	return page == bad_page;
+	return page == bad_page || page == hwpoison_page;
 }
 EXPORT_SYMBOL_GPL(is_error_page);
 
 int is_error_pfn(pfn_t pfn)
 {
-	return pfn == bad_pfn;
+	return pfn == bad_pfn || pfn == hwpoison_pfn;
 }
 EXPORT_SYMBOL_GPL(is_error_pfn);
 
+int is_hwpoison_pfn(pfn_t pfn)
+{
+	return pfn == hwpoison_pfn;
+}
+EXPORT_SYMBOL_GPL(is_hwpoison_pfn);
+
 static inline unsigned long bad_hva(void)
 {
 	return PAGE_OFFSET;
@@ -945,6 +954,11 @@ static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr)
 	if (unlikely(npages != 1)) {
 		struct vm_area_struct *vma;
 
+		if (is_hwpoison_address(addr)) {
+			get_page(hwpoison_page);
+			return page_to_pfn(hwpoison_page);
+		}
+
 		down_read(&current->mm->mmap_sem);
 		vma = find_vma(current->mm, addr);
 
@@ -2197,6 +2211,15 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
 
 	bad_pfn = page_to_pfn(bad_page);
 
+	hwpoison_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+
+	if (hwpoison_page == NULL) {
+		r = -ENOMEM;
+		goto out_free_0;
+	}
+
+	hwpoison_pfn = page_to_pfn(hwpoison_page);
+
 	if (!zalloc_cpumask_var(&cpus_hardware_enabled, GFP_KERNEL)) {
 		r = -ENOMEM;
 		goto out_free_0;
@@ -2269,6 +2292,8 @@ out_free_1:
 out_free_0a:
 	free_cpumask_var(cpus_hardware_enabled);
 out_free_0:
+	if (hwpoison_page)
+		__free_page(hwpoison_page);
 	__free_page(bad_page);
 out:
 	kvm_arch_exit();
@@ -2290,6 +2315,7 @@ void kvm_exit(void)
 	kvm_arch_hardware_unsetup();
 	kvm_arch_exit();
 	free_cpumask_var(cpus_hardware_enabled);
+	__free_page(hwpoison_page);
 	__free_page(bad_page);
 }
 EXPORT_SYMBOL_GPL(kvm_exit);
-- 
cgit v1.2.3-70-g09d2


From d94e1dc9af60e3431a586c3edfbe42d8a0d3932b Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Mon, 3 May 2010 16:54:48 +0300
Subject: KVM: Get rid of KVM_REQ_KICK

KVM_REQ_KICK poisons vcpu->requests by having a bit set during normal
operation.  This causes the fast path check for a clear vcpu->requests
to fail all the time, triggering tons of atomic operations.

Fix by replacing KVM_REQ_KICK with a vcpu->guest_mode atomic.

Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/x86.c       | 17 ++++++++++-------
 include/linux/kvm_host.h |  1 +
 2 files changed, 11 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 63c87adcec4..fc5611b4007 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4604,13 +4604,15 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 	if (vcpu->fpu_active)
 		kvm_load_guest_fpu(vcpu);
 
-	local_irq_disable();
+	atomic_set(&vcpu->guest_mode, 1);
+	smp_wmb();
 
-	clear_bit(KVM_REQ_KICK, &vcpu->requests);
-	smp_mb__after_clear_bit();
+	local_irq_disable();
 
-	if (vcpu->requests || need_resched() || signal_pending(current)) {
-		set_bit(KVM_REQ_KICK, &vcpu->requests);
+	if (!atomic_read(&vcpu->guest_mode) || vcpu->requests
+	    || need_resched() || signal_pending(current)) {
+		atomic_set(&vcpu->guest_mode, 0);
+		smp_wmb();
 		local_irq_enable();
 		preempt_enable();
 		r = 1;
@@ -4655,7 +4657,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 	if (hw_breakpoint_active())
 		hw_breakpoint_restore();
 
-	set_bit(KVM_REQ_KICK, &vcpu->requests);
+	atomic_set(&vcpu->guest_mode, 0);
+	smp_wmb();
 	local_irq_enable();
 
 	++vcpu->stat.exits;
@@ -5580,7 +5583,7 @@ void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
 
 	me = get_cpu();
 	if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu))
-		if (!test_and_set_bit(KVM_REQ_KICK, &vcpu->requests))
+		if (atomic_xchg(&vcpu->guest_mode, 0))
 			smp_send_reschedule(cpu);
 	put_cpu();
 }
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index a0e019769f5..2c62319727e 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -81,6 +81,7 @@ struct kvm_vcpu {
 	int vcpu_id;
 	struct mutex mutex;
 	int   cpu;
+	atomic_t guest_mode;
 	struct kvm_run *run;
 	unsigned long requests;
 	unsigned long guest_debug;
-- 
cgit v1.2.3-70-g09d2


From 22ae782f86b726f9cea752c0f269ff6dcdf2f6e1 Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@secretlab.ca>
Date: Thu, 29 Jul 2010 11:49:01 -0600
Subject: of/address: Clean up function declarations

This patch moves the declaration of of_get_address(), of_get_pci_address(),
and of_pci_address_to_resource() out of arch code and into the common
linux/of_address header file.

This patch also fixes some of the asm/prom.h ordering issues.  It still
includes some header files that it ideally shouldn't be, but at least the
ordering is consistent now so that of_* overrides work.

Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 arch/microblaze/include/asm/prom.h        | 33 +++++++--------------
 arch/powerpc/include/asm/prom.h           | 49 +++++++------------------------
 arch/powerpc/kernel/legacy_serial.c       |  1 +
 arch/powerpc/kernel/pci-common.c          |  1 +
 arch/powerpc/platforms/52xx/lite5200.c    |  1 +
 arch/powerpc/platforms/amigaone/setup.c   |  3 +-
 arch/powerpc/platforms/iseries/mf.c       |  1 +
 arch/powerpc/platforms/powermac/feature.c |  2 ++
 arch/powerpc/sysdev/bestcomm/sram.c       |  1 +
 arch/powerpc/sysdev/fsl_gtm.c             |  1 +
 drivers/char/bsr.c                        |  1 +
 drivers/net/fsl_pq_mdio.c                 |  1 +
 drivers/net/xilinx_emaclite.c             |  2 +-
 drivers/serial/uartlite.c                 |  1 +
 drivers/spi/mpc512x_psc_spi.c             |  1 +
 drivers/spi/mpc52xx_psc_spi.c             |  1 +
 drivers/spi/xilinx_spi_of.c               |  1 +
 drivers/usb/gadget/fsl_qe_udc.c           |  1 +
 drivers/video/controlfb.c                 |  2 ++
 drivers/video/offb.c                      |  3 +-
 include/linux/of_address.h                | 32 ++++++++++++++++++++
 21 files changed, 76 insertions(+), 63 deletions(-)

(limited to 'include')

diff --git a/arch/microblaze/include/asm/prom.h b/arch/microblaze/include/asm/prom.h
index cb9c3dd9a23..101fa098f62 100644
--- a/arch/microblaze/include/asm/prom.h
+++ b/arch/microblaze/include/asm/prom.h
@@ -20,11 +20,6 @@
 #ifndef __ASSEMBLY__
 
 #include <linux/types.h>
-#include <linux/of_address.h>
-#include <linux/of_irq.h>
-#include <linux/of_fdt.h>
-#include <linux/proc_fs.h>
-#include <linux/platform_device.h>
 #include <asm/irq.h>
 #include <asm/atomic.h>
 
@@ -52,25 +47,9 @@ extern void pci_create_OF_bus_map(void);
  * OF address retreival & translation
  */
 
-/* Extract an address from a device, returns the region size and
- * the address space flags too. The PCI version uses a BAR number
- * instead of an absolute index
- */
-extern const u32 *of_get_address(struct device_node *dev, int index,
-			u64 *size, unsigned int *flags);
-extern const u32 *of_get_pci_address(struct device_node *dev, int bar_no,
-			u64 *size, unsigned int *flags);
-
-extern int of_pci_address_to_resource(struct device_node *dev, int bar,
-				struct resource *r);
-
 #ifdef CONFIG_PCI
 extern unsigned long pci_address_to_pio(phys_addr_t address);
-#else
-static inline unsigned long pci_address_to_pio(phys_addr_t address)
-{
-	return (unsigned long)-1;
-}
+#define pci_address_to_pio pci_address_to_pio
 #endif	/* CONFIG_PCI */
 
 /* Parse the ibm,dma-window property of an OF node into the busno, phys and
@@ -99,8 +78,18 @@ extern const void *of_get_mac_address(struct device_node *np);
  * resolving using the OF tree walking.
  */
 struct pci_dev;
+struct of_irq;
 extern int of_irq_map_pci(struct pci_dev *pdev, struct of_irq *out_irq);
 
 #endif /* __ASSEMBLY__ */
 #endif /* __KERNEL__ */
+
+/* These includes are put at the bottom because they may contain things
+ * that are overridden by this file.  Ideally they shouldn't be included
+ * by this file, but there are a bunch of .c files that currently depend
+ * on it.  Eventually they will be cleaned up. */
+#include <linux/of_fdt.h>
+#include <linux/of_irq.h>
+#include <linux/platform_device.h>
+
 #endif /* _ASM_MICROBLAZE_PROM_H */
diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h
index 55bccc0a21c..ae26f2efd08 100644
--- a/arch/powerpc/include/asm/prom.h
+++ b/arch/powerpc/include/asm/prom.h
@@ -17,11 +17,6 @@
  * 2 of the License, or (at your option) any later version.
  */
 #include <linux/types.h>
-#include <linux/of_fdt.h>
-#include <linux/of_address.h>
-#include <linux/of_irq.h>
-#include <linux/proc_fs.h>
-#include <linux/platform_device.h>
 #include <asm/irq.h>
 #include <asm/atomic.h>
 
@@ -49,41 +44,9 @@ extern void pci_create_OF_bus_map(void);
 extern u64 of_translate_dma_address(struct device_node *dev,
 				    const u32 *in_addr);
 
-/* Extract an address from a device, returns the region size and
- * the address space flags too. The PCI version uses a BAR number
- * instead of an absolute index
- */
-extern const u32 *of_get_address(struct device_node *dev, int index,
-			   u64 *size, unsigned int *flags);
-#ifdef CONFIG_PCI
-extern const u32 *of_get_pci_address(struct device_node *dev, int bar_no,
-			       u64 *size, unsigned int *flags);
-#else
-static inline const u32 *of_get_pci_address(struct device_node *dev,
-		int bar_no, u64 *size, unsigned int *flags)
-{
-	return NULL;
-}
-#endif /* CONFIG_PCI */
-
-#ifdef CONFIG_PCI
-extern int of_pci_address_to_resource(struct device_node *dev, int bar,
-				      struct resource *r);
-#else
-static inline int of_pci_address_to_resource(struct device_node *dev, int bar,
-		struct resource *r)
-{
-	return -ENOSYS;
-}
-#endif /* CONFIG_PCI */
-
 #ifdef CONFIG_PCI
 extern unsigned long pci_address_to_pio(phys_addr_t address);
-#else
-static inline unsigned long pci_address_to_pio(phys_addr_t address)
-{
-	return (unsigned long)-1;
-}
+#define pci_address_to_pio pci_address_to_pio
 #endif	/* CONFIG_PCI */
 
 /* Parse the ibm,dma-window property of an OF node into the busno, phys and
@@ -122,9 +85,19 @@ static inline int of_node_to_nid(struct device_node *device) { return 0; }
  * resolving using the OF tree walking.
  */
 struct pci_dev;
+struct of_irq;
 extern int of_irq_map_pci(struct pci_dev *pdev, struct of_irq *out_irq);
 
 extern void of_instantiate_rtc(void);
 
+/* These includes are put at the bottom because they may contain things
+ * that are overridden by this file.  Ideally they shouldn't be included
+ * by this file, but there are a bunch of .c files that currently depend
+ * on it.  Eventually they will be cleaned up. */
+#include <linux/of_fdt.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/platform_device.h>
+
 #endif /* __KERNEL__ */
 #endif /* _POWERPC_PROM_H */
diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c
index 035ada5443e..c1fd0f9658f 100644
--- a/arch/powerpc/kernel/legacy_serial.c
+++ b/arch/powerpc/kernel/legacy_serial.c
@@ -4,6 +4,7 @@
 #include <linux/serial_core.h>
 #include <linux/console.h>
 #include <linux/pci.h>
+#include <linux/of_address.h>
 #include <linux/of_device.h>
 #include <asm/io.h>
 #include <asm/mmu.h>
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index 5b38f6ae2b2..9021c4ad4bb 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -21,6 +21,7 @@
 #include <linux/string.h>
 #include <linux/init.h>
 #include <linux/bootmem.h>
+#include <linux/of_address.h>
 #include <linux/mm.h>
 #include <linux/list.h>
 #include <linux/syscalls.h>
diff --git a/arch/powerpc/platforms/52xx/lite5200.c b/arch/powerpc/platforms/52xx/lite5200.c
index 6d584f4e3c9..de55bc0584b 100644
--- a/arch/powerpc/platforms/52xx/lite5200.c
+++ b/arch/powerpc/platforms/52xx/lite5200.c
@@ -18,6 +18,7 @@
 #include <linux/init.h>
 #include <linux/pci.h>
 #include <linux/of.h>
+#include <linux/of_address.h>
 #include <linux/root_dev.h>
 #include <linux/initrd.h>
 #include <asm/time.h>
diff --git a/arch/powerpc/platforms/amigaone/setup.c b/arch/powerpc/platforms/amigaone/setup.c
index fb4eb0df054..03aabc0e16a 100644
--- a/arch/powerpc/platforms/amigaone/setup.c
+++ b/arch/powerpc/platforms/amigaone/setup.c
@@ -13,12 +13,13 @@
  */
 
 #include <linux/kernel.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
 #include <linux/seq_file.h>
 #include <generated/utsrelease.h>
 
 #include <asm/machdep.h>
 #include <asm/cputable.h>
-#include <asm/prom.h>
 #include <asm/pci-bridge.h>
 #include <asm/i8259.h>
 #include <asm/time.h>
diff --git a/arch/powerpc/platforms/iseries/mf.c b/arch/powerpc/platforms/iseries/mf.c
index d2c1d497846..33e5fc7334f 100644
--- a/arch/powerpc/platforms/iseries/mf.c
+++ b/arch/powerpc/platforms/iseries/mf.c
@@ -30,6 +30,7 @@
 #include <linux/init.h>
 #include <linux/completion.h>
 #include <linux/delay.h>
+#include <linux/proc_fs.h>
 #include <linux/dma-mapping.h>
 #include <linux/bcd.h>
 #include <linux/rtc.h>
diff --git a/arch/powerpc/platforms/powermac/feature.c b/arch/powerpc/platforms/powermac/feature.c
index 9e1b9fd7520..75eec031e7a 100644
--- a/arch/powerpc/platforms/powermac/feature.c
+++ b/arch/powerpc/platforms/powermac/feature.c
@@ -21,6 +21,8 @@
 #include <linux/delay.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
 #include <linux/spinlock.h>
 #include <linux/adb.h>
 #include <linux/pmu.h>
diff --git a/arch/powerpc/sysdev/bestcomm/sram.c b/arch/powerpc/sysdev/bestcomm/sram.c
index 5d74ef7a651..1225012a681 100644
--- a/arch/powerpc/sysdev/bestcomm/sram.c
+++ b/arch/powerpc/sysdev/bestcomm/sram.c
@@ -11,6 +11,7 @@
  * kind, whether express or implied.
  */
 
+#include <linux/err.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/slab.h>
diff --git a/arch/powerpc/sysdev/fsl_gtm.c b/arch/powerpc/sysdev/fsl_gtm.c
index eca4545dd52..7dd2885321a 100644
--- a/arch/powerpc/sysdev/fsl_gtm.c
+++ b/arch/powerpc/sysdev/fsl_gtm.c
@@ -14,6 +14,7 @@
  */
 
 #include <linux/kernel.h>
+#include <linux/err.h>
 #include <linux/errno.h>
 #include <linux/list.h>
 #include <linux/io.h>
diff --git a/drivers/char/bsr.c b/drivers/char/bsr.c
index 89d871ef8c2..91917133ae0 100644
--- a/drivers/char/bsr.c
+++ b/drivers/char/bsr.c
@@ -23,6 +23,7 @@
 #include <linux/of.h>
 #include <linux/of_device.h>
 #include <linux/of_platform.h>
+#include <linux/fs.h>
 #include <linux/module.h>
 #include <linux/cdev.h>
 #include <linux/list.h>
diff --git a/drivers/net/fsl_pq_mdio.c b/drivers/net/fsl_pq_mdio.c
index b4c41d72c42..f53f850b641 100644
--- a/drivers/net/fsl_pq_mdio.c
+++ b/drivers/net/fsl_pq_mdio.c
@@ -35,6 +35,7 @@
 #include <linux/mii.h>
 #include <linux/phy.h>
 #include <linux/of.h>
+#include <linux/of_address.h>
 #include <linux/of_mdio.h>
 #include <linux/of_platform.h>
 
diff --git a/drivers/net/xilinx_emaclite.c b/drivers/net/xilinx_emaclite.c
index d04c5b26205..b2c2f391b29 100644
--- a/drivers/net/xilinx_emaclite.c
+++ b/drivers/net/xilinx_emaclite.c
@@ -20,7 +20,7 @@
 #include <linux/skbuff.h>
 #include <linux/io.h>
 #include <linux/slab.h>
-
+#include <linux/of_address.h>
 #include <linux/of_device.h>
 #include <linux/of_platform.h>
 #include <linux/of_mdio.h>
diff --git a/drivers/serial/uartlite.c b/drivers/serial/uartlite.c
index 8acccd56437..caf085d3a76 100644
--- a/drivers/serial/uartlite.c
+++ b/drivers/serial/uartlite.c
@@ -21,6 +21,7 @@
 #include <asm/io.h>
 #if defined(CONFIG_OF) && (defined(CONFIG_PPC32) || defined(CONFIG_MICROBLAZE))
 #include <linux/of.h>
+#include <linux/of_address.h>
 #include <linux/of_device.h>
 #include <linux/of_platform.h>
 
diff --git a/drivers/spi/mpc512x_psc_spi.c b/drivers/spi/mpc512x_psc_spi.c
index 1bb4315f5f8..10baac3f8ea 100644
--- a/drivers/spi/mpc512x_psc_spi.c
+++ b/drivers/spi/mpc512x_psc_spi.c
@@ -19,6 +19,7 @@
 #include <linux/init.h>
 #include <linux/errno.h>
 #include <linux/interrupt.h>
+#include <linux/of_address.h>
 #include <linux/of_platform.h>
 #include <linux/workqueue.h>
 #include <linux/completion.h>
diff --git a/drivers/spi/mpc52xx_psc_spi.c b/drivers/spi/mpc52xx_psc_spi.c
index bd81ff90cfb..66d170147dc 100644
--- a/drivers/spi/mpc52xx_psc_spi.c
+++ b/drivers/spi/mpc52xx_psc_spi.c
@@ -16,6 +16,7 @@
 #include <linux/types.h>
 #include <linux/errno.h>
 #include <linux/interrupt.h>
+#include <linux/of_address.h>
 #include <linux/of_platform.h>
 #include <linux/workqueue.h>
 #include <linux/completion.h>
diff --git a/drivers/spi/xilinx_spi_of.c b/drivers/spi/xilinx_spi_of.c
index 87cda0956a8..f53d3f6b9f6 100644
--- a/drivers/spi/xilinx_spi_of.c
+++ b/drivers/spi/xilinx_spi_of.c
@@ -29,6 +29,7 @@
 #include <linux/io.h>
 #include <linux/slab.h>
 
+#include <linux/of_address.h>
 #include <linux/of_platform.h>
 #include <linux/of_device.h>
 #include <linux/of_spi.h>
diff --git a/drivers/usb/gadget/fsl_qe_udc.c b/drivers/usb/gadget/fsl_qe_udc.c
index 82506ca297d..9648b75f028 100644
--- a/drivers/usb/gadget/fsl_qe_udc.c
+++ b/drivers/usb/gadget/fsl_qe_udc.c
@@ -32,6 +32,7 @@
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/moduleparam.h>
+#include <linux/of_address.h>
 #include <linux/of_platform.h>
 #include <linux/dma-mapping.h>
 #include <linux/usb/ch9.h>
diff --git a/drivers/video/controlfb.c b/drivers/video/controlfb.c
index 49fcbe8f18a..c225dcce89e 100644
--- a/drivers/video/controlfb.c
+++ b/drivers/video/controlfb.c
@@ -40,6 +40,8 @@
 #include <linux/vmalloc.h>
 #include <linux/delay.h>
 #include <linux/interrupt.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
 #include <linux/fb.h>
 #include <linux/init.h>
 #include <linux/pci.h>
diff --git a/drivers/video/offb.c b/drivers/video/offb.c
index 46dda7d8aae..cb163a5397b 100644
--- a/drivers/video/offb.c
+++ b/drivers/video/offb.c
@@ -19,13 +19,14 @@
 #include <linux/mm.h>
 #include <linux/vmalloc.h>
 #include <linux/delay.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
 #include <linux/interrupt.h>
 #include <linux/fb.h>
 #include <linux/init.h>
 #include <linux/ioport.h>
 #include <linux/pci.h>
 #include <asm/io.h>
-#include <asm/prom.h>
 
 #ifdef CONFIG_PPC64
 #include <asm/pci-bridge.h>
diff --git a/include/linux/of_address.h b/include/linux/of_address.h
index cc567df9a00..8aea06f0564 100644
--- a/include/linux/of_address.h
+++ b/include/linux/of_address.h
@@ -8,5 +8,37 @@ extern int of_address_to_resource(struct device_node *dev, int index,
 				  struct resource *r);
 extern void __iomem *of_iomap(struct device_node *device, int index);
 
+/* Extract an address from a device, returns the region size and
+ * the address space flags too. The PCI version uses a BAR number
+ * instead of an absolute index
+ */
+extern const u32 *of_get_address(struct device_node *dev, int index,
+			   u64 *size, unsigned int *flags);
+
+#ifndef pci_address_to_pio
+static inline unsigned long pci_address_to_pio(phys_addr_t addr) { return -1; }
+#define pci_address_to_pio pci_address_to_pio
+#endif
+
+#ifdef CONFIG_PCI
+extern const u32 *of_get_pci_address(struct device_node *dev, int bar_no,
+			       u64 *size, unsigned int *flags);
+extern int of_pci_address_to_resource(struct device_node *dev, int bar,
+				      struct resource *r);
+#else /* CONFIG_PCI */
+static inline int of_pci_address_to_resource(struct device_node *dev, int bar,
+				             struct resource *r)
+{
+	return -ENOSYS;
+}
+
+static inline const u32 *of_get_pci_address(struct device_node *dev,
+		int bar_no, u64 *size, unsigned int *flags)
+{
+	return NULL;
+}
+#endif /* CONFIG_PCI */
+
+
 #endif /* __OF_ADDRESS_H */
 
-- 
cgit v1.2.3-70-g09d2


From 2acf923e38fb6a4ce0c57115decbb38d334902ac Mon Sep 17 00:00:00 2001
From: Dexuan Cui <dexuan.cui@intel.com>
Date: Thu, 10 Jun 2010 11:27:12 +0800
Subject: KVM: VMX: Enable XSAVE/XRSTOR for guest

This patch enable guest to use XSAVE/XRSTOR instructions.

We assume that host_xcr0 would use all possible bits that OS supported.

And we loaded xcr0 in the same way we handled fpu - do it as late as we can.

Signed-off-by: Dexuan Cui <dexuan.cui@intel.com>
Signed-off-by: Sheng Yang <sheng@linux.intel.com>
Reviewed-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |   2 +
 arch/x86/include/asm/vmx.h      |   1 +
 arch/x86/kvm/kvm_cache_regs.h   |   6 ++
 arch/x86/kvm/vmx.c              |  13 ++++
 arch/x86/kvm/x86.c              | 130 +++++++++++++++++++++++++++++++++++++---
 include/linux/kvm_host.h        |   2 +-
 6 files changed, 146 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 0cd0f2923af..91631b8b209 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -302,6 +302,7 @@ struct kvm_vcpu_arch {
 	} update_pte;
 
 	struct fpu guest_fpu;
+	u64 xcr0;
 
 	gva_t mmio_fault_cr2;
 	struct kvm_pio_request pio;
@@ -605,6 +606,7 @@ int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val);
 unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu);
 void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw);
 void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l);
+int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr);
 
 int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata);
 int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data);
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 96a5886d384..9f0cbd987d5 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -267,6 +267,7 @@ enum vmcs_field {
 #define EXIT_REASON_EPT_VIOLATION       48
 #define EXIT_REASON_EPT_MISCONFIG       49
 #define EXIT_REASON_WBINVD		54
+#define EXIT_REASON_XSETBV		55
 
 /*
  * Interruption-information format
diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h
index d2a98f8f9af..6491ac8e755 100644
--- a/arch/x86/kvm/kvm_cache_regs.h
+++ b/arch/x86/kvm/kvm_cache_regs.h
@@ -71,4 +71,10 @@ static inline ulong kvm_read_cr4(struct kvm_vcpu *vcpu)
 	return kvm_read_cr4_bits(vcpu, ~0UL);
 }
 
+static inline u64 kvm_read_edx_eax(struct kvm_vcpu *vcpu)
+{
+	return (kvm_register_read(vcpu, VCPU_REGS_RAX) & -1u)
+		| ((u64)(kvm_register_read(vcpu, VCPU_REGS_RDX) & -1u) << 32);
+}
+
 #endif
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 26ba61d6af8..864a1b6d155 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -37,6 +37,8 @@
 #include <asm/vmx.h>
 #include <asm/virtext.h>
 #include <asm/mce.h>
+#include <asm/i387.h>
+#include <asm/xcr.h>
 
 #include "trace.h"
 
@@ -3390,6 +3392,16 @@ static int handle_wbinvd(struct kvm_vcpu *vcpu)
 	return 1;
 }
 
+static int handle_xsetbv(struct kvm_vcpu *vcpu)
+{
+	u64 new_bv = kvm_read_edx_eax(vcpu);
+	u32 index = kvm_register_read(vcpu, VCPU_REGS_RCX);
+
+	if (kvm_set_xcr(vcpu, index, new_bv) == 0)
+		skip_emulated_instruction(vcpu);
+	return 1;
+}
+
 static int handle_apic_access(struct kvm_vcpu *vcpu)
 {
 	return emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DONE;
@@ -3668,6 +3680,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
 	[EXIT_REASON_TPR_BELOW_THRESHOLD]     = handle_tpr_below_threshold,
 	[EXIT_REASON_APIC_ACCESS]             = handle_apic_access,
 	[EXIT_REASON_WBINVD]                  = handle_wbinvd,
+	[EXIT_REASON_XSETBV]                  = handle_xsetbv,
 	[EXIT_REASON_TASK_SWITCH]             = handle_task_switch,
 	[EXIT_REASON_MCE_DURING_VMENTRY]      = handle_machine_check,
 	[EXIT_REASON_EPT_VIOLATION]	      = handle_ept_violation,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index b08c0052e33..b5e644701cc 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -65,6 +65,7 @@
 	(~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\
 			  | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE	\
 			  | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR	\
+			  | X86_CR4_OSXSAVE \
 			  | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE))
 
 #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
@@ -150,6 +151,13 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 	{ NULL }
 };
 
+u64 __read_mostly host_xcr0;
+
+static inline u32 bit(int bitno)
+{
+	return 1 << (bitno & 31);
+}
+
 static void kvm_on_user_return(struct user_return_notifier *urn)
 {
 	unsigned slot;
@@ -474,6 +482,61 @@ void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
 }
 EXPORT_SYMBOL_GPL(kvm_lmsw);
 
+int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
+{
+	u64 xcr0;
+
+	/* Only support XCR_XFEATURE_ENABLED_MASK(xcr0) now  */
+	if (index != XCR_XFEATURE_ENABLED_MASK)
+		return 1;
+	xcr0 = xcr;
+	if (kvm_x86_ops->get_cpl(vcpu) != 0)
+		return 1;
+	if (!(xcr0 & XSTATE_FP))
+		return 1;
+	if ((xcr0 & XSTATE_YMM) && !(xcr0 & XSTATE_SSE))
+		return 1;
+	if (xcr0 & ~host_xcr0)
+		return 1;
+	vcpu->arch.xcr0 = xcr0;
+	vcpu->guest_xcr0_loaded = 0;
+	return 0;
+}
+
+int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
+{
+	if (__kvm_set_xcr(vcpu, index, xcr)) {
+		kvm_inject_gp(vcpu, 0);
+		return 1;
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(kvm_set_xcr);
+
+static bool guest_cpuid_has_xsave(struct kvm_vcpu *vcpu)
+{
+	struct kvm_cpuid_entry2 *best;
+
+	best = kvm_find_cpuid_entry(vcpu, 1, 0);
+	return best && (best->ecx & bit(X86_FEATURE_XSAVE));
+}
+
+static void update_cpuid(struct kvm_vcpu *vcpu)
+{
+	struct kvm_cpuid_entry2 *best;
+
+	best = kvm_find_cpuid_entry(vcpu, 1, 0);
+	if (!best)
+		return;
+
+	/* Update OSXSAVE bit */
+	if (cpu_has_xsave && best->function == 0x1) {
+		best->ecx &= ~(bit(X86_FEATURE_OSXSAVE));
+		if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE))
+			best->ecx |= bit(X86_FEATURE_OSXSAVE);
+	}
+}
+
 int __kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 {
 	unsigned long old_cr4 = kvm_read_cr4(vcpu);
@@ -482,6 +545,9 @@ int __kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 	if (cr4 & CR4_RESERVED_BITS)
 		return 1;
 
+	if (!guest_cpuid_has_xsave(vcpu) && (cr4 & X86_CR4_OSXSAVE))
+		return 1;
+
 	if (is_long_mode(vcpu)) {
 		if (!(cr4 & X86_CR4_PAE))
 			return 1;
@@ -498,6 +564,9 @@ int __kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 	if ((cr4 ^ old_cr4) & pdptr_bits)
 		kvm_mmu_reset_context(vcpu);
 
+	if ((cr4 ^ old_cr4) & X86_CR4_OSXSAVE)
+		update_cpuid(vcpu);
+
 	return 0;
 }
 
@@ -666,11 +735,6 @@ int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
 }
 EXPORT_SYMBOL_GPL(kvm_get_dr);
 
-static inline u32 bit(int bitno)
-{
-	return 1 << (bitno & 31);
-}
-
 /*
  * List of msr numbers which we expose to userspace through KVM_GET_MSRS
  * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST.
@@ -1814,6 +1878,7 @@ static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
 	r = 0;
 	kvm_apic_set_version(vcpu);
 	kvm_x86_ops->cpuid_update(vcpu);
+	update_cpuid(vcpu);
 
 out_free:
 	vfree(cpuid_entries);
@@ -1837,6 +1902,7 @@ static int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu,
 	vcpu->arch.cpuid_nent = cpuid->nent;
 	kvm_apic_set_version(vcpu);
 	kvm_x86_ops->cpuid_update(vcpu);
+	update_cpuid(vcpu);
 	return 0;
 
 out:
@@ -1917,7 +1983,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 		0 /* Reserved */ | F(CX16) | 0 /* xTPR Update, PDCM */ |
 		0 /* Reserved, DCA */ | F(XMM4_1) |
 		F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) |
-		0 /* Reserved, XSAVE, OSXSAVE */;
+		0 /* Reserved, AES */ | F(XSAVE) | 0 /* OSXSAVE */;
 	/* cpuid 0x80000001.ecx */
 	const u32 kvm_supported_word6_x86_features =
 		F(LAHF_LM) | F(CMP_LEGACY) | F(SVM) | 0 /* ExtApicSpace */ |
@@ -1932,7 +1998,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 
 	switch (function) {
 	case 0:
-		entry->eax = min(entry->eax, (u32)0xb);
+		entry->eax = min(entry->eax, (u32)0xd);
 		break;
 	case 1:
 		entry->edx &= kvm_supported_word0_x86_features;
@@ -1990,6 +2056,20 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 		}
 		break;
 	}
+	case 0xd: {
+		int i;
+
+		entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
+		for (i = 1; *nent < maxnent; ++i) {
+			if (entry[i - 1].eax == 0 && i != 2)
+				break;
+			do_cpuid_1_ent(&entry[i], function, i);
+			entry[i].flags |=
+			       KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
+			++*nent;
+		}
+		break;
+	}
 	case KVM_CPUID_SIGNATURE: {
 		char signature[12] = "KVMKVMKVM\0\0";
 		u32 *sigptr = (u32 *)signature;
@@ -4125,6 +4205,9 @@ int kvm_arch_init(void *opaque)
 
 	perf_register_guest_info_callbacks(&kvm_guest_cbs);
 
+	if (cpu_has_xsave)
+		host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
+
 	return 0;
 
 out:
@@ -4523,6 +4606,25 @@ static void inject_pending_event(struct kvm_vcpu *vcpu)
 	}
 }
 
+static void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu)
+{
+	if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE) &&
+			!vcpu->guest_xcr0_loaded) {
+		/* kvm_set_xcr() also depends on this */
+		xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0);
+		vcpu->guest_xcr0_loaded = 1;
+	}
+}
+
+static void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu)
+{
+	if (vcpu->guest_xcr0_loaded) {
+		if (vcpu->arch.xcr0 != host_xcr0)
+			xsetbv(XCR_XFEATURE_ENABLED_MASK, host_xcr0);
+		vcpu->guest_xcr0_loaded = 0;
+	}
+}
+
 static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 {
 	int r;
@@ -4568,6 +4670,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 	kvm_x86_ops->prepare_guest_switch(vcpu);
 	if (vcpu->fpu_active)
 		kvm_load_guest_fpu(vcpu);
+	kvm_load_guest_xcr0(vcpu);
 
 	atomic_set(&vcpu->guest_mode, 1);
 	smp_wmb();
@@ -5124,6 +5227,11 @@ int fx_init(struct kvm_vcpu *vcpu)
 
 	fpu_finit(&vcpu->arch.guest_fpu);
 
+	/*
+	 * Ensure guest xcr0 is valid for loading
+	 */
+	vcpu->arch.xcr0 = XSTATE_FP;
+
 	vcpu->arch.cr0 |= X86_CR0_ET;
 
 	return 0;
@@ -5140,6 +5248,12 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
 	if (vcpu->guest_fpu_loaded)
 		return;
 
+	/*
+	 * Restore all possible states in the guest,
+	 * and assume host would use all available bits.
+	 * Guest xcr0 would be loaded later.
+	 */
+	kvm_put_guest_xcr0(vcpu);
 	vcpu->guest_fpu_loaded = 1;
 	unlazy_fpu(current);
 	fpu_restore_checking(&vcpu->arch.guest_fpu);
@@ -5148,6 +5262,8 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
 
 void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
 {
+	kvm_put_guest_xcr0(vcpu);
+
 	if (!vcpu->guest_fpu_loaded)
 		return;
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 2c62319727e..2d96555cd4e 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -88,7 +88,7 @@ struct kvm_vcpu {
 	int srcu_idx;
 
 	int fpu_active;
-	int guest_fpu_loaded;
+	int guest_fpu_loaded, guest_xcr0_loaded;
 	wait_queue_head_t wq;
 	int sigset_active;
 	sigset_t sigset;
-- 
cgit v1.2.3-70-g09d2


From 2d5b5a665508c60577c1088e0405850a965b6795 Mon Sep 17 00:00:00 2001
From: Sheng Yang <sheng@linux.intel.com>
Date: Sun, 13 Jun 2010 17:29:39 +0800
Subject: KVM: x86: XSAVE/XRSTOR live migration support

This patch enable save/restore of xsave state.

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 Documentation/kvm/api.txt    |  74 +++++++++++++++++++++++
 arch/x86/include/asm/kvm.h   |  22 +++++++
 arch/x86/include/asm/xsave.h |   7 ++-
 arch/x86/kvm/x86.c           | 139 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/kvm.h          |  12 ++++
 5 files changed, 252 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/Documentation/kvm/api.txt b/Documentation/kvm/api.txt
index 159b4efe1b0..ffba03f55bd 100644
--- a/Documentation/kvm/api.txt
+++ b/Documentation/kvm/api.txt
@@ -922,6 +922,80 @@ Define which vcpu is the Bootstrap Processor (BSP).  Values are the same
 as the vcpu id in KVM_CREATE_VCPU.  If this ioctl is not called, the default
 is vcpu 0.
 
+4.41 KVM_GET_XSAVE
+
+Capability: KVM_CAP_XSAVE
+Architectures: x86
+Type: vcpu ioctl
+Parameters: struct kvm_xsave (out)
+Returns: 0 on success, -1 on error
+
+struct kvm_xsave {
+	__u32 region[1024];
+};
+
+This ioctl would copy current vcpu's xsave struct to the userspace.
+
+4.42 KVM_SET_XSAVE
+
+Capability: KVM_CAP_XSAVE
+Architectures: x86
+Type: vcpu ioctl
+Parameters: struct kvm_xsave (in)
+Returns: 0 on success, -1 on error
+
+struct kvm_xsave {
+	__u32 region[1024];
+};
+
+This ioctl would copy userspace's xsave struct to the kernel.
+
+4.43 KVM_GET_XCRS
+
+Capability: KVM_CAP_XCRS
+Architectures: x86
+Type: vcpu ioctl
+Parameters: struct kvm_xcrs (out)
+Returns: 0 on success, -1 on error
+
+struct kvm_xcr {
+	__u32 xcr;
+	__u32 reserved;
+	__u64 value;
+};
+
+struct kvm_xcrs {
+	__u32 nr_xcrs;
+	__u32 flags;
+	struct kvm_xcr xcrs[KVM_MAX_XCRS];
+	__u64 padding[16];
+};
+
+This ioctl would copy current vcpu's xcrs to the userspace.
+
+4.44 KVM_SET_XCRS
+
+Capability: KVM_CAP_XCRS
+Architectures: x86
+Type: vcpu ioctl
+Parameters: struct kvm_xcrs (in)
+Returns: 0 on success, -1 on error
+
+struct kvm_xcr {
+	__u32 xcr;
+	__u32 reserved;
+	__u64 value;
+};
+
+struct kvm_xcrs {
+	__u32 nr_xcrs;
+	__u32 flags;
+	struct kvm_xcr xcrs[KVM_MAX_XCRS];
+	__u64 padding[16];
+};
+
+This ioctl would set vcpu's xcr to the value userspace specified.
+
 5. The kvm_run structure
 
 Application code obtains a pointer to the kvm_run structure by
diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h
index ff90055c7f0..4d8dcbdfc12 100644
--- a/arch/x86/include/asm/kvm.h
+++ b/arch/x86/include/asm/kvm.h
@@ -22,6 +22,8 @@
 #define __KVM_HAVE_XEN_HVM
 #define __KVM_HAVE_VCPU_EVENTS
 #define __KVM_HAVE_DEBUGREGS
+#define __KVM_HAVE_XSAVE
+#define __KVM_HAVE_XCRS
 
 /* Architectural interrupt line count. */
 #define KVM_NR_INTERRUPTS 256
@@ -299,4 +301,24 @@ struct kvm_debugregs {
 	__u64 reserved[9];
 };
 
+/* for KVM_CAP_XSAVE */
+struct kvm_xsave {
+	__u32 region[1024];
+};
+
+#define KVM_MAX_XCRS	16
+
+struct kvm_xcr {
+	__u32 xcr;
+	__u32 reserved;
+	__u64 value;
+};
+
+struct kvm_xcrs {
+	__u32 nr_xcrs;
+	__u32 flags;
+	struct kvm_xcr xcrs[KVM_MAX_XCRS];
+	__u64 padding[16];
+};
+
 #endif /* _ASM_X86_KVM_H */
diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h
index 29ee4e4c64c..32c36668fa7 100644
--- a/arch/x86/include/asm/xsave.h
+++ b/arch/x86/include/asm/xsave.h
@@ -13,8 +13,11 @@
 
 #define FXSAVE_SIZE	512
 
-#define XSTATE_YMM_SIZE 256
-#define XSTATE_YMM_OFFSET (512 + 64)
+#define XSAVE_HDR_SIZE	    64
+#define XSAVE_HDR_OFFSET    FXSAVE_SIZE
+
+#define XSAVE_YMM_SIZE	    256
+#define XSAVE_YMM_OFFSET    (XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET)
 
 /*
  * These are the features that the OS can handle currently.
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 795999e1ac1..0c8dc9614e7 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1680,6 +1680,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_PCI_SEGMENT:
 	case KVM_CAP_DEBUGREGS:
 	case KVM_CAP_X86_ROBUST_SINGLESTEP:
+	case KVM_CAP_XSAVE:
 		r = 1;
 		break;
 	case KVM_CAP_COALESCED_MMIO:
@@ -1703,6 +1704,9 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_MCE:
 		r = KVM_MAX_MCE_BANKS;
 		break;
+	case KVM_CAP_XCRS:
+		r = cpu_has_xsave;
+		break;
 	default:
 		r = 0;
 		break;
@@ -2355,6 +2359,77 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
 	return 0;
 }
 
+static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
+					 struct kvm_xsave *guest_xsave)
+{
+	if (cpu_has_xsave)
+		memcpy(guest_xsave->region,
+			&vcpu->arch.guest_fpu.state->xsave,
+			sizeof(struct xsave_struct));
+	else {
+		memcpy(guest_xsave->region,
+			&vcpu->arch.guest_fpu.state->fxsave,
+			sizeof(struct i387_fxsave_struct));
+		*(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)] =
+			XSTATE_FPSSE;
+	}
+}
+
+static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
+					struct kvm_xsave *guest_xsave)
+{
+	u64 xstate_bv =
+		*(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)];
+
+	if (cpu_has_xsave)
+		memcpy(&vcpu->arch.guest_fpu.state->xsave,
+			guest_xsave->region, sizeof(struct xsave_struct));
+	else {
+		if (xstate_bv & ~XSTATE_FPSSE)
+			return -EINVAL;
+		memcpy(&vcpu->arch.guest_fpu.state->fxsave,
+			guest_xsave->region, sizeof(struct i387_fxsave_struct));
+	}
+	return 0;
+}
+
+static void kvm_vcpu_ioctl_x86_get_xcrs(struct kvm_vcpu *vcpu,
+					struct kvm_xcrs *guest_xcrs)
+{
+	if (!cpu_has_xsave) {
+		guest_xcrs->nr_xcrs = 0;
+		return;
+	}
+
+	guest_xcrs->nr_xcrs = 1;
+	guest_xcrs->flags = 0;
+	guest_xcrs->xcrs[0].xcr = XCR_XFEATURE_ENABLED_MASK;
+	guest_xcrs->xcrs[0].value = vcpu->arch.xcr0;
+}
+
+static int kvm_vcpu_ioctl_x86_set_xcrs(struct kvm_vcpu *vcpu,
+				       struct kvm_xcrs *guest_xcrs)
+{
+	int i, r = 0;
+
+	if (!cpu_has_xsave)
+		return -EINVAL;
+
+	if (guest_xcrs->nr_xcrs > KVM_MAX_XCRS || guest_xcrs->flags)
+		return -EINVAL;
+
+	for (i = 0; i < guest_xcrs->nr_xcrs; i++)
+		/* Only support XCR0 currently */
+		if (guest_xcrs->xcrs[0].xcr == XCR_XFEATURE_ENABLED_MASK) {
+			r = __kvm_set_xcr(vcpu, XCR_XFEATURE_ENABLED_MASK,
+				guest_xcrs->xcrs[0].value);
+			break;
+		}
+	if (r)
+		r = -EINVAL;
+	return r;
+}
+
 long kvm_arch_vcpu_ioctl(struct file *filp,
 			 unsigned int ioctl, unsigned long arg)
 {
@@ -2556,6 +2631,70 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 		r = kvm_vcpu_ioctl_x86_set_debugregs(vcpu, &dbgregs);
 		break;
 	}
+	case KVM_GET_XSAVE: {
+		struct kvm_xsave *xsave;
+
+		xsave = kzalloc(sizeof(struct kvm_xsave), GFP_KERNEL);
+		r = -ENOMEM;
+		if (!xsave)
+			break;
+
+		kvm_vcpu_ioctl_x86_get_xsave(vcpu, xsave);
+
+		r = -EFAULT;
+		if (copy_to_user(argp, xsave, sizeof(struct kvm_xsave)))
+			break;
+		r = 0;
+		break;
+	}
+	case KVM_SET_XSAVE: {
+		struct kvm_xsave *xsave;
+
+		xsave = kzalloc(sizeof(struct kvm_xsave), GFP_KERNEL);
+		r = -ENOMEM;
+		if (!xsave)
+			break;
+
+		r = -EFAULT;
+		if (copy_from_user(xsave, argp, sizeof(struct kvm_xsave)))
+			break;
+
+		r = kvm_vcpu_ioctl_x86_set_xsave(vcpu, xsave);
+		break;
+	}
+	case KVM_GET_XCRS: {
+		struct kvm_xcrs *xcrs;
+
+		xcrs = kzalloc(sizeof(struct kvm_xcrs), GFP_KERNEL);
+		r = -ENOMEM;
+		if (!xcrs)
+			break;
+
+		kvm_vcpu_ioctl_x86_get_xcrs(vcpu, xcrs);
+
+		r = -EFAULT;
+		if (copy_to_user(argp, xcrs,
+				 sizeof(struct kvm_xcrs)))
+			break;
+		r = 0;
+		break;
+	}
+	case KVM_SET_XCRS: {
+		struct kvm_xcrs *xcrs;
+
+		xcrs = kzalloc(sizeof(struct kvm_xcrs), GFP_KERNEL);
+		r = -ENOMEM;
+		if (!xcrs)
+			break;
+
+		r = -EFAULT;
+		if (copy_from_user(xcrs, argp,
+				   sizeof(struct kvm_xcrs)))
+			break;
+
+		r = kvm_vcpu_ioctl_x86_set_xcrs(vcpu, xcrs);
+		break;
+	}
 	default:
 		r = -EINVAL;
 	}
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 23ea0225390..6fd40f540a8 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -524,6 +524,12 @@ struct kvm_enable_cap {
 #define KVM_CAP_PPC_OSI 52
 #define KVM_CAP_PPC_UNSET_IRQ 53
 #define KVM_CAP_ENABLE_CAP 54
+#ifdef __KVM_HAVE_XSAVE
+#define KVM_CAP_XSAVE 55
+#endif
+#ifdef __KVM_HAVE_XCRS
+#define KVM_CAP_XCRS 56
+#endif
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -714,6 +720,12 @@ struct kvm_clock_data {
 #define KVM_GET_DEBUGREGS         _IOR(KVMIO,  0xa1, struct kvm_debugregs)
 #define KVM_SET_DEBUGREGS         _IOW(KVMIO,  0xa2, struct kvm_debugregs)
 #define KVM_ENABLE_CAP            _IOW(KVMIO,  0xa3, struct kvm_enable_cap)
+/* Available with KVM_CAP_XSAVE */
+#define KVM_GET_XSAVE		  _IOR(KVMIO,  0xa4, struct kvm_xsave)
+#define KVM_SET_XSAVE		  _IOW(KVMIO,  0xa5, struct kvm_xsave)
+/* Available with KVM_CAP_XCRS */
+#define KVM_GET_XCRS		  _IOR(KVMIO,  0xa6, struct kvm_xcrs)
+#define KVM_SET_XCRS		  _IOW(KVMIO,  0xa7, struct kvm_xcrs)
 
 #define KVM_DEV_ASSIGN_ENABLE_IOMMU	(1 << 0)
 
-- 
cgit v1.2.3-70-g09d2


From a1f4d39500ad8ed61825eff061debff42386ab5b Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Mon, 21 Jun 2010 11:44:20 +0300
Subject: KVM: Remove memory alias support

As advertised in feature-removal-schedule.txt.  Equivalent support is provided
by overlapping memory regions.

Signed-off-by: Avi Kivity <avi@redhat.com>
---
 Documentation/feature-removal-schedule.txt |  11 ---
 Documentation/kvm/api.txt                  |  12 +--
 arch/ia64/kvm/kvm-ia64.c                   |   5 --
 arch/powerpc/kvm/powerpc.c                 |   5 --
 arch/s390/kvm/kvm-s390.c                   |   5 --
 arch/x86/include/asm/kvm_host.h            |  21 -----
 arch/x86/kvm/mmu.c                         |  17 +---
 arch/x86/kvm/paging_tmpl.h                 |   3 +-
 arch/x86/kvm/x86.c                         | 125 -----------------------------
 arch/x86/kvm/x86.h                         |   7 --
 include/linux/kvm.h                        |   1 +
 include/linux/kvm_host.h                   |   6 --
 virt/kvm/kvm_main.c                        |  18 +----
 13 files changed, 11 insertions(+), 225 deletions(-)

(limited to 'include')

diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index 1571c0c83db..ad1e90dd278 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -538,17 +538,6 @@ Who:	Jan Kiszka <jan.kiszka@web.de>
 
 ----------------------------
 
-What:	KVM memory aliases support
-When:	July 2010
-Why:	Memory aliasing support is used for speeding up guest vga access
-	through the vga windows.
-
-	Modern userspace no longer uses this feature, so it's just bitrotted
-	code and can be removed with no impact.
-Who:	Avi Kivity <avi@redhat.com>
-
-----------------------------
-
 What:	xtime, wall_to_monotonic
 When:	2.6.36+
 Files:	kernel/time/timekeeping.c include/linux/time.h
diff --git a/Documentation/kvm/api.txt b/Documentation/kvm/api.txt
index ffba03f55bd..7e415943a11 100644
--- a/Documentation/kvm/api.txt
+++ b/Documentation/kvm/api.txt
@@ -226,17 +226,7 @@ Type: vm ioctl
 Parameters: struct kvm_memory_alias (in)
 Returns: 0 (success), -1 (error)
 
-struct kvm_memory_alias {
-	__u32 slot;  /* this has a different namespace than memory slots */
-	__u32 flags;
-	__u64 guest_phys_addr;
-	__u64 memory_size;
-	__u64 target_phys_addr;
-};
-
-Defines a guest physical address space region as an alias to another
-region.  Useful for aliased address, for example the VGA low memory
-window. Should not be used with userspace memory.
+This ioctl is obsolete and has been removed.
 
 4.9 KVM_RUN
 
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 91760e80e26..bd510beb43a 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -1946,11 +1946,6 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
 	return vcpu->arch.timer_fired;
 }
 
-gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
-{
-	return gfn;
-}
-
 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
 {
 	return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE) ||
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index b5ebdfbed20..72a4ad86ee9 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -36,11 +36,6 @@
 #define CREATE_TRACE_POINTS
 #include "trace.h"
 
-gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
-{
-	return gfn;
-}
-
 int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
 {
 	return !(v->arch.msr & MSR_WE) || !!(v->arch.pending_exceptions);
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 08a3b35d30b..4fe68650535 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -723,11 +723,6 @@ void kvm_arch_flush_shadow(struct kvm *kvm)
 {
 }
 
-gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
-{
-	return gfn;
-}
-
 static int __init kvm_s390_init(void)
 {
 	int ret;
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 2ec2e27a403..a57cdeacc4d 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -69,8 +69,6 @@
 
 #define IOPL_SHIFT 12
 
-#define KVM_ALIAS_SLOTS 4
-
 #define KVM_PERMILLE_MMU_PAGES 20
 #define KVM_MIN_ALLOC_MMU_PAGES 64
 #define KVM_MMU_HASH_SHIFT 10
@@ -362,24 +360,7 @@ struct kvm_vcpu_arch {
 	u64 hv_vapic;
 };
 
-struct kvm_mem_alias {
-	gfn_t base_gfn;
-	unsigned long npages;
-	gfn_t target_gfn;
-#define KVM_ALIAS_INVALID     1UL
-	unsigned long flags;
-};
-
-#define KVM_ARCH_HAS_UNALIAS_INSTANTIATION
-
-struct kvm_mem_aliases {
-	struct kvm_mem_alias aliases[KVM_ALIAS_SLOTS];
-	int naliases;
-};
-
 struct kvm_arch {
-	struct kvm_mem_aliases *aliases;
-
 	unsigned int n_free_mmu_pages;
 	unsigned int n_requested_mmu_pages;
 	unsigned int n_alloc_mmu_pages;
@@ -655,8 +636,6 @@ void kvm_disable_tdp(void);
 int complete_pio(struct kvm_vcpu *vcpu);
 bool kvm_check_iopl(struct kvm_vcpu *vcpu);
 
-struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn);
-
 static inline struct kvm_mmu_page *page_header(hpa_t shadow_page)
 {
 	struct page *page = pfn_to_page(shadow_page >> PAGE_SHIFT);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 8c2f580956d..c5501bc1010 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -434,9 +434,7 @@ static void account_shadowed(struct kvm *kvm, gfn_t gfn)
 	int *write_count;
 	int i;
 
-	gfn = unalias_gfn(kvm, gfn);
-
-	slot = gfn_to_memslot_unaliased(kvm, gfn);
+	slot = gfn_to_memslot(kvm, gfn);
 	for (i = PT_DIRECTORY_LEVEL;
 	     i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) {
 		write_count   = slot_largepage_idx(gfn, slot, i);
@@ -450,8 +448,7 @@ static void unaccount_shadowed(struct kvm *kvm, gfn_t gfn)
 	int *write_count;
 	int i;
 
-	gfn = unalias_gfn(kvm, gfn);
-	slot = gfn_to_memslot_unaliased(kvm, gfn);
+	slot = gfn_to_memslot(kvm, gfn);
 	for (i = PT_DIRECTORY_LEVEL;
 	     i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) {
 		write_count   = slot_largepage_idx(gfn, slot, i);
@@ -467,8 +464,7 @@ static int has_wrprotected_page(struct kvm *kvm,
 	struct kvm_memory_slot *slot;
 	int *largepage_idx;
 
-	gfn = unalias_gfn(kvm, gfn);
-	slot = gfn_to_memslot_unaliased(kvm, gfn);
+	slot = gfn_to_memslot(kvm, gfn);
 	if (slot) {
 		largepage_idx = slot_largepage_idx(gfn, slot, level);
 		return *largepage_idx;
@@ -521,7 +517,6 @@ static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn)
 
 /*
  * Take gfn and return the reverse mapping to it.
- * Note: gfn must be unaliased before this function get called
  */
 
 static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, int level)
@@ -561,7 +556,6 @@ static int rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
 
 	if (!is_rmap_spte(*spte))
 		return count;
-	gfn = unalias_gfn(vcpu->kvm, gfn);
 	sp = page_header(__pa(spte));
 	kvm_mmu_page_set_gfn(sp, spte - sp->spt, gfn);
 	rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level);
@@ -698,7 +692,6 @@ static int rmap_write_protect(struct kvm *kvm, u64 gfn)
 	u64 *spte;
 	int i, write_protected = 0;
 
-	gfn = unalias_gfn(kvm, gfn);
 	rmapp = gfn_to_rmap(kvm, gfn, PT_PAGE_TABLE_LEVEL);
 
 	spte = rmap_next(kvm, rmapp, NULL);
@@ -885,7 +878,6 @@ static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
 
 	sp = page_header(__pa(spte));
 
-	gfn = unalias_gfn(vcpu->kvm, gfn);
 	rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level);
 
 	kvm_unmap_rmapp(vcpu->kvm, rmapp, 0);
@@ -3510,8 +3502,7 @@ static void audit_write_protection(struct kvm_vcpu *vcpu)
 		if (sp->unsync)
 			continue;
 
-		gfn = unalias_gfn(vcpu->kvm, sp->gfn);
-		slot = gfn_to_memslot_unaliased(vcpu->kvm, sp->gfn);
+		slot = gfn_to_memslot(vcpu->kvm, sp->gfn);
 		rmapp = &slot->rmap[gfn - slot->base_gfn];
 
 		spte = rmap_next(vcpu->kvm, rmapp, NULL);
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 863920f649f..a21a86ef9e2 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -576,7 +576,6 @@ static void FNAME(prefetch_page)(struct kvm_vcpu *vcpu,
  * Using the cached information from sp->gfns is safe because:
  * - The spte has a reference to the struct page, so the pfn for a given gfn
  *   can't change unless all sptes pointing to it are nuked first.
- * - Alias changes zap the entire shadow cache.
  */
 static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
 			    bool clear_unsync)
@@ -611,7 +610,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
 			return -EINVAL;
 
 		gfn = gpte_to_gfn(gpte);
-		if (unalias_gfn(vcpu->kvm, gfn) != sp->gfns[i] ||
+		if (gfn != sp->gfns[i] ||
 		      !is_present_gpte(gpte) || !(gpte & PT_ACCESSED_MASK)) {
 			u64 nonpresent;
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 8e60b6c9c0b..62596d373a4 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2740,115 +2740,6 @@ static int kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm)
 	return kvm->arch.n_alloc_mmu_pages;
 }
 
-gfn_t unalias_gfn_instantiation(struct kvm *kvm, gfn_t gfn)
-{
-	int i;
-	struct kvm_mem_alias *alias;
-	struct kvm_mem_aliases *aliases;
-
-	aliases = kvm_aliases(kvm);
-
-	for (i = 0; i < aliases->naliases; ++i) {
-		alias = &aliases->aliases[i];
-		if (alias->flags & KVM_ALIAS_INVALID)
-			continue;
-		if (gfn >= alias->base_gfn
-		    && gfn < alias->base_gfn + alias->npages)
-			return alias->target_gfn + gfn - alias->base_gfn;
-	}
-	return gfn;
-}
-
-gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
-{
-	int i;
-	struct kvm_mem_alias *alias;
-	struct kvm_mem_aliases *aliases;
-
-	aliases = kvm_aliases(kvm);
-
-	for (i = 0; i < aliases->naliases; ++i) {
-		alias = &aliases->aliases[i];
-		if (gfn >= alias->base_gfn
-		    && gfn < alias->base_gfn + alias->npages)
-			return alias->target_gfn + gfn - alias->base_gfn;
-	}
-	return gfn;
-}
-
-/*
- * Set a new alias region.  Aliases map a portion of physical memory into
- * another portion.  This is useful for memory windows, for example the PC
- * VGA region.
- */
-static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm,
-					 struct kvm_memory_alias *alias)
-{
-	int r, n;
-	struct kvm_mem_alias *p;
-	struct kvm_mem_aliases *aliases, *old_aliases;
-
-	r = -EINVAL;
-	/* General sanity checks */
-	if (alias->memory_size & (PAGE_SIZE - 1))
-		goto out;
-	if (alias->guest_phys_addr & (PAGE_SIZE - 1))
-		goto out;
-	if (alias->slot >= KVM_ALIAS_SLOTS)
-		goto out;
-	if (alias->guest_phys_addr + alias->memory_size
-	    < alias->guest_phys_addr)
-		goto out;
-	if (alias->target_phys_addr + alias->memory_size
-	    < alias->target_phys_addr)
-		goto out;
-
-	r = -ENOMEM;
-	aliases = kzalloc(sizeof(struct kvm_mem_aliases), GFP_KERNEL);
-	if (!aliases)
-		goto out;
-
-	mutex_lock(&kvm->slots_lock);
-
-	/* invalidate any gfn reference in case of deletion/shrinking */
-	memcpy(aliases, kvm->arch.aliases, sizeof(struct kvm_mem_aliases));
-	aliases->aliases[alias->slot].flags |= KVM_ALIAS_INVALID;
-	old_aliases = kvm->arch.aliases;
-	rcu_assign_pointer(kvm->arch.aliases, aliases);
-	synchronize_srcu_expedited(&kvm->srcu);
-	kvm_mmu_zap_all(kvm);
-	kfree(old_aliases);
-
-	r = -ENOMEM;
-	aliases = kzalloc(sizeof(struct kvm_mem_aliases), GFP_KERNEL);
-	if (!aliases)
-		goto out_unlock;
-
-	memcpy(aliases, kvm->arch.aliases, sizeof(struct kvm_mem_aliases));
-
-	p = &aliases->aliases[alias->slot];
-	p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT;
-	p->npages = alias->memory_size >> PAGE_SHIFT;
-	p->target_gfn = alias->target_phys_addr >> PAGE_SHIFT;
-	p->flags &= ~(KVM_ALIAS_INVALID);
-
-	for (n = KVM_ALIAS_SLOTS; n > 0; --n)
-		if (aliases->aliases[n - 1].npages)
-			break;
-	aliases->naliases = n;
-
-	old_aliases = kvm->arch.aliases;
-	rcu_assign_pointer(kvm->arch.aliases, aliases);
-	synchronize_srcu_expedited(&kvm->srcu);
-	kfree(old_aliases);
-	r = 0;
-
-out_unlock:
-	mutex_unlock(&kvm->slots_lock);
-out:
-	return r;
-}
-
 static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
 {
 	int r;
@@ -3056,7 +2947,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
 	union {
 		struct kvm_pit_state ps;
 		struct kvm_pit_state2 ps2;
-		struct kvm_memory_alias alias;
 		struct kvm_pit_config pit_config;
 	} u;
 
@@ -3101,14 +2991,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
 	case KVM_GET_NR_MMU_PAGES:
 		r = kvm_vm_ioctl_get_nr_mmu_pages(kvm);
 		break;
-	case KVM_SET_MEMORY_ALIAS:
-		r = -EFAULT;
-		if (copy_from_user(&u.alias, argp, sizeof(struct kvm_memory_alias)))
-			goto out;
-		r = kvm_vm_ioctl_set_memory_alias(kvm, &u.alias);
-		if (r)
-			goto out;
-		break;
 	case KVM_CREATE_IRQCHIP: {
 		struct kvm_pic *vpic;
 
@@ -5559,12 +5441,6 @@ struct  kvm *kvm_arch_create_vm(void)
 	if (!kvm)
 		return ERR_PTR(-ENOMEM);
 
-	kvm->arch.aliases = kzalloc(sizeof(struct kvm_mem_aliases), GFP_KERNEL);
-	if (!kvm->arch.aliases) {
-		kfree(kvm);
-		return ERR_PTR(-ENOMEM);
-	}
-
 	INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
 	INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
 
@@ -5622,7 +5498,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
 	if (kvm->arch.ept_identity_pagetable)
 		put_page(kvm->arch.ept_identity_pagetable);
 	cleanup_srcu_struct(&kvm->srcu);
-	kfree(kvm->arch.aliases);
 	kfree(kvm);
 }
 
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index f4b54458285..b7a404722d2 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -65,13 +65,6 @@ static inline int is_paging(struct kvm_vcpu *vcpu)
 	return kvm_read_cr0_bits(vcpu, X86_CR0_PG);
 }
 
-static inline struct kvm_mem_aliases *kvm_aliases(struct kvm *kvm)
-{
-	return rcu_dereference_check(kvm->arch.aliases,
-			srcu_read_lock_held(&kvm->srcu)
-			|| lockdep_is_held(&kvm->slots_lock));
-}
-
 void kvm_before_handle_nmi(struct kvm_vcpu *vcpu);
 void kvm_after_handle_nmi(struct kvm_vcpu *vcpu);
 
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 6fd40f540a8..636fc381c89 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -619,6 +619,7 @@ struct kvm_clock_data {
  */
 #define KVM_CREATE_VCPU           _IO(KVMIO,   0x41)
 #define KVM_GET_DIRTY_LOG         _IOW(KVMIO,  0x42, struct kvm_dirty_log)
+/* KVM_SET_MEMORY_ALIAS is obsolete: */
 #define KVM_SET_MEMORY_ALIAS      _IOW(KVMIO,  0x43, struct kvm_memory_alias)
 #define KVM_SET_NR_MMU_PAGES      _IO(KVMIO,   0x44)
 #define KVM_GET_NR_MMU_PAGES      _IO(KVMIO,   0x45)
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 2d96555cd4e..240e460777b 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -286,8 +286,6 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
 				int user_alloc);
 void kvm_disable_largepages(void);
 void kvm_arch_flush_shadow(struct kvm *kvm);
-gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn);
-gfn_t unalias_gfn_instantiation(struct kvm *kvm, gfn_t gfn);
 
 struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn);
 unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn);
@@ -564,10 +562,6 @@ static inline int mmu_notifier_retry(struct kvm_vcpu *vcpu, unsigned long mmu_se
 }
 #endif
 
-#ifndef KVM_ARCH_HAS_UNALIAS_INSTANTIATION
-#define unalias_gfn_instantiation unalias_gfn
-#endif
-
 #ifdef CONFIG_HAVE_KVM_IRQCHIP
 
 #define KVM_MAX_IRQ_ROUTES 1024
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 84a090644d9..65417e3d846 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -841,7 +841,7 @@ int kvm_is_error_hva(unsigned long addr)
 }
 EXPORT_SYMBOL_GPL(kvm_is_error_hva);
 
-struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn)
+struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn)
 {
 	int i;
 	struct kvm_memslots *slots = kvm_memslots(kvm);
@@ -855,20 +855,13 @@ struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn)
 	}
 	return NULL;
 }
-EXPORT_SYMBOL_GPL(gfn_to_memslot_unaliased);
-
-struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn)
-{
-	gfn = unalias_gfn(kvm, gfn);
-	return gfn_to_memslot_unaliased(kvm, gfn);
-}
+EXPORT_SYMBOL_GPL(gfn_to_memslot);
 
 int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn)
 {
 	int i;
 	struct kvm_memslots *slots = kvm_memslots(kvm);
 
-	gfn = unalias_gfn_instantiation(kvm, gfn);
 	for (i = 0; i < KVM_MEMORY_SLOTS; ++i) {
 		struct kvm_memory_slot *memslot = &slots->memslots[i];
 
@@ -913,7 +906,6 @@ int memslot_id(struct kvm *kvm, gfn_t gfn)
 	struct kvm_memslots *slots = kvm_memslots(kvm);
 	struct kvm_memory_slot *memslot = NULL;
 
-	gfn = unalias_gfn(kvm, gfn);
 	for (i = 0; i < slots->nmemslots; ++i) {
 		memslot = &slots->memslots[i];
 
@@ -934,8 +926,7 @@ unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
 {
 	struct kvm_memory_slot *slot;
 
-	gfn = unalias_gfn_instantiation(kvm, gfn);
-	slot = gfn_to_memslot_unaliased(kvm, gfn);
+	slot = gfn_to_memslot(kvm, gfn);
 	if (!slot || slot->flags & KVM_MEMSLOT_INVALID)
 		return bad_hva();
 	return gfn_to_hva_memslot(slot, gfn);
@@ -1202,8 +1193,7 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn)
 {
 	struct kvm_memory_slot *memslot;
 
-	gfn = unalias_gfn(kvm, gfn);
-	memslot = gfn_to_memslot_unaliased(kvm, gfn);
+	memslot = gfn_to_memslot(kvm, gfn);
 	if (memslot && memslot->dirty_bitmap) {
 		unsigned long rel_gfn = gfn - memslot->base_gfn;
 
-- 
cgit v1.2.3-70-g09d2


From a8eeb04a44dd6dc4c8158953d9bae48849c9a188 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Mon, 10 May 2010 12:34:53 +0300
Subject: KVM: Add mini-API for vcpu->requests

Makes it a little more readable and hackable.

Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/lapic.c     |  2 +-
 arch/x86/kvm/mmu.c       |  6 +++---
 arch/x86/kvm/svm.c       |  2 +-
 arch/x86/kvm/timer.c     |  2 +-
 arch/x86/kvm/vmx.c       |  2 +-
 arch/x86/kvm/x86.c       | 27 +++++++++++++--------------
 include/linux/kvm_host.h | 15 +++++++++++++++
 virt/kvm/kvm_main.c      |  4 ++--
 8 files changed, 37 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 49573c78c24..77d8c0f4817 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -534,7 +534,7 @@ static void __report_tpr_access(struct kvm_lapic *apic, bool write)
 	struct kvm_vcpu *vcpu = apic->vcpu;
 	struct kvm_run *run = vcpu->run;
 
-	set_bit(KVM_REQ_REPORT_TPR_ACCESS, &vcpu->requests);
+	kvm_make_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu);
 	run->tpr_access.rip = kvm_rip_read(vcpu);
 	run->tpr_access.is_write = write;
 }
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index c5501bc1010..690a7fc58c1 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1378,7 +1378,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
 
 		mmu_page_add_parent_pte(vcpu, sp, parent_pte);
 		if (sp->unsync_children) {
-			set_bit(KVM_REQ_MMU_SYNC, &vcpu->requests);
+			kvm_make_request(KVM_REQ_MMU_SYNC, vcpu);
 			kvm_mmu_mark_parents_unsync(sp);
 		} else if (sp->unsync)
 			kvm_mmu_mark_parents_unsync(sp);
@@ -2131,7 +2131,7 @@ static int mmu_check_root(struct kvm_vcpu *vcpu, gfn_t root_gfn)
 	int ret = 0;
 
 	if (!kvm_is_visible_gfn(vcpu->kvm, root_gfn)) {
-		set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests);
+		kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
 		ret = 1;
 	}
 
@@ -2329,7 +2329,7 @@ static int nonpaging_init_context(struct kvm_vcpu *vcpu)
 void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu)
 {
 	++vcpu->stat.tlb_flush;
-	set_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests);
+	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
 }
 
 static void paging_new_cr3(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index f7a6fdcf8ef..587b99d37d4 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1494,7 +1494,7 @@ static void svm_handle_mce(struct vcpu_svm *svm)
 		 */
 		pr_err("KVM: Guest triggered AMD Erratum 383\n");
 
-		set_bit(KVM_REQ_TRIPLE_FAULT, &svm->vcpu.requests);
+		kvm_make_request(KVM_REQ_TRIPLE_FAULT, &svm->vcpu);
 
 		return;
 	}
diff --git a/arch/x86/kvm/timer.c b/arch/x86/kvm/timer.c
index 564548fbb3d..e16a0dbe74d 100644
--- a/arch/x86/kvm/timer.c
+++ b/arch/x86/kvm/timer.c
@@ -32,7 +32,7 @@ static int __kvm_timer_fn(struct kvm_vcpu *vcpu, struct kvm_timer *ktimer)
 	if (ktimer->reinject || !atomic_read(&ktimer->pending)) {
 		atomic_inc(&ktimer->pending);
 		/* FIXME: this code should not know anything about vcpus */
-		set_bit(KVM_REQ_PENDING_TIMER, &vcpu->requests);
+		kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu);
 	}
 
 	if (waitqueue_active(q))
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 345a3547051..661c6e199b4 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -899,7 +899,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 		unsigned long sysenter_esp;
 
 		kvm_migrate_timers(vcpu);
-		set_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests);
+		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
 		local_irq_disable();
 		list_add(&vmx->local_vcpus_link,
 			 &per_cpu(vcpus_on_cpu, cpu));
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 9be6e4e5e8e..7ef44107a14 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -296,7 +296,7 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
 	prev_nr = vcpu->arch.exception.nr;
 	if (prev_nr == DF_VECTOR) {
 		/* triple fault -> shutdown */
-		set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests);
+		kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
 		return;
 	}
 	class1 = exception_class(prev_nr);
@@ -948,7 +948,7 @@ static int kvm_request_guest_time_update(struct kvm_vcpu *v)
 
 	if (!vcpu->time_page)
 		return 0;
-	set_bit(KVM_REQ_KVMCLOCK_UPDATE, &v->requests);
+	kvm_make_request(KVM_REQ_KVMCLOCK_UPDATE, v);
 	return 1;
 }
 
@@ -2253,7 +2253,7 @@ static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu,
 			printk(KERN_DEBUG "kvm: set_mce: "
 			       "injects mce exception while "
 			       "previous one is in progress!\n");
-			set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests);
+			kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
 			return 0;
 		}
 		if (banks[1] & MCI_STATUS_VAL)
@@ -4617,7 +4617,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 		vcpu->run->request_interrupt_window;
 
 	if (vcpu->requests)
-		if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests))
+		if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu))
 			kvm_mmu_unload(vcpu);
 
 	r = kvm_mmu_reload(vcpu);
@@ -4625,26 +4625,25 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 		goto out;
 
 	if (vcpu->requests) {
-		if (test_and_clear_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests))
+		if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu))
 			__kvm_migrate_timers(vcpu);
-		if (test_and_clear_bit(KVM_REQ_KVMCLOCK_UPDATE, &vcpu->requests))
+		if (kvm_check_request(KVM_REQ_KVMCLOCK_UPDATE, vcpu))
 			kvm_write_guest_time(vcpu);
-		if (test_and_clear_bit(KVM_REQ_MMU_SYNC, &vcpu->requests))
+		if (kvm_check_request(KVM_REQ_MMU_SYNC, vcpu))
 			kvm_mmu_sync_roots(vcpu);
-		if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))
+		if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu))
 			kvm_x86_ops->tlb_flush(vcpu);
-		if (test_and_clear_bit(KVM_REQ_REPORT_TPR_ACCESS,
-				       &vcpu->requests)) {
+		if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) {
 			vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS;
 			r = 0;
 			goto out;
 		}
-		if (test_and_clear_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests)) {
+		if (kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)) {
 			vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;
 			r = 0;
 			goto out;
 		}
-		if (test_and_clear_bit(KVM_REQ_DEACTIVATE_FPU, &vcpu->requests)) {
+		if (kvm_check_request(KVM_REQ_DEACTIVATE_FPU, vcpu)) {
 			vcpu->fpu_active = 0;
 			kvm_x86_ops->fpu_deactivate(vcpu);
 		}
@@ -4773,7 +4772,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
 			srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
 			kvm_vcpu_block(vcpu);
 			vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
-			if (test_and_clear_bit(KVM_REQ_UNHALT, &vcpu->requests))
+			if (kvm_check_request(KVM_REQ_UNHALT, vcpu))
 			{
 				switch(vcpu->arch.mp_state) {
 				case KVM_MP_STATE_HALTED:
@@ -5255,7 +5254,7 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
 	vcpu->guest_fpu_loaded = 0;
 	fpu_save_init(&vcpu->arch.guest_fpu);
 	++vcpu->stat.fpu_reload;
-	set_bit(KVM_REQ_DEACTIVATE_FPU, &vcpu->requests);
+	kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu);
 	trace_kvm_fpu(0);
 }
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 240e460777b..c8a9d628898 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -624,5 +624,20 @@ static inline long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
 
 #endif
 
+static inline void kvm_make_request(int req, struct kvm_vcpu *vcpu)
+{
+	set_bit(req, &vcpu->requests);
+}
+
+static inline bool kvm_make_check_request(int req, struct kvm_vcpu *vcpu)
+{
+	return test_and_set_bit(req, &vcpu->requests);
+}
+
+static inline bool kvm_check_request(int req, struct kvm_vcpu *vcpu)
+{
+	return test_and_clear_bit(req, &vcpu->requests);
+}
+
 #endif
 
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 65417e3d846..5bd2f34ba57 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -145,7 +145,7 @@ static bool make_all_cpus_request(struct kvm *kvm, unsigned int req)
 	raw_spin_lock(&kvm->requests_lock);
 	me = smp_processor_id();
 	kvm_for_each_vcpu(i, vcpu, kvm) {
-		if (test_and_set_bit(req, &vcpu->requests))
+		if (kvm_make_check_request(req, vcpu))
 			continue;
 		cpu = vcpu->cpu;
 		if (cpus != NULL && cpu != -1 && cpu != me)
@@ -1212,7 +1212,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
 		prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
 
 		if (kvm_arch_vcpu_runnable(vcpu)) {
-			set_bit(KVM_REQ_UNHALT, &vcpu->requests);
+			kvm_make_request(KVM_REQ_UNHALT, vcpu);
 			break;
 		}
 		if (kvm_cpu_has_pending_timer(vcpu))
-- 
cgit v1.2.3-70-g09d2


From 0719837c0832a7b305e42327caa7d330462360ea Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Mon, 10 May 2010 13:08:26 +0300
Subject: KVM: Reduce atomic operations on vcpu->requests

Usually the vcpu->requests bitmap is sparse, so a test_and_clear_bit() for
each request generates a large number of unneeded atomics if a bit is set.

Replace with a separate test/clear sequence.  This is safe since there is
no clear_bit() outside the vcpu thread.

Signed-off-by: Avi Kivity <avi@redhat.com>
---
 include/linux/kvm_host.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index c8a9d628898..e820eb57910 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -636,7 +636,12 @@ static inline bool kvm_make_check_request(int req, struct kvm_vcpu *vcpu)
 
 static inline bool kvm_check_request(int req, struct kvm_vcpu *vcpu)
 {
-	return test_and_clear_bit(req, &vcpu->requests);
+	if (test_bit(req, &vcpu->requests)) {
+		clear_bit(req, &vcpu->requests);
+		return true;
+	} else {
+		return false;
+	}
 }
 
 #endif
-- 
cgit v1.2.3-70-g09d2


From e36d96f7cfaa71870c407131eb4fbd38ea285c01 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Mon, 21 Jun 2010 10:56:36 +0300
Subject: KVM: Keep slot ID in memory slot structure

May be used for distinguishing between internal and user slots, or for sorting
slots in size order.

Signed-off-by: Avi Kivity <avi@redhat.com>
---
 include/linux/kvm_host.h | 1 +
 virt/kvm/kvm_main.c      | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index e820eb57910..e796326f364 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -124,6 +124,7 @@ struct kvm_memory_slot {
 	} *lpage_info[KVM_NR_PAGE_SIZES - 1];
 	unsigned long userspace_addr;
 	int user_alloc;
+	int id;
 };
 
 static inline unsigned long kvm_dirty_bitmap_bytes(struct kvm_memory_slot *memslot)
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 5bd2f34ba57..74f73192094 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -570,6 +570,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
 
 	new = old = *memslot;
 
+	new.id = mem->slot;
 	new.base_gfn = base_gfn;
 	new.npages = npages;
 	new.flags = mem->flags;
-- 
cgit v1.2.3-70-g09d2


From 6ee0578b4daaea01c96b172c6aacca43fd9807a6 Mon Sep 17 00:00:00 2001
From: Suresh Siddha <suresh.b.siddha@intel.com>
Date: Fri, 30 Jul 2010 14:57:37 -0700
Subject: workqueue: mark init_workqueues() as early_initcall()

Mark init_workqueues() as early_initcall() and thus it will be initialized
before smp bringup. init_workqueues() registers for the hotcpu notifier
and thus it should cope with the processors that are brought online after
the workqueues are initialized.

x86 smp bringup code uses workqueues and uses a workaround for the
cold boot process (as the workqueues are initialized post smp_init()).
Marking init_workqueues() as early_initcall() will pave the way for
cleaning up this code.

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/workqueue.h | 1 -
 init/main.c               | 2 --
 kernel/workqueue.c        | 4 +++-
 3 files changed, 3 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 5f76001c4e6..51dc9a727e5 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -327,7 +327,6 @@ extern int schedule_delayed_work_on(int cpu, struct delayed_work *work,
 extern int schedule_on_each_cpu(work_func_t func);
 extern int keventd_up(void);
 
-extern void init_workqueues(void);
 int execute_in_process_context(work_func_t fn, struct execute_work *);
 
 extern int flush_work(struct work_struct *work);
diff --git a/init/main.c b/init/main.c
index 3bdb152f412..5f2ec2cdd90 100644
--- a/init/main.c
+++ b/init/main.c
@@ -32,7 +32,6 @@
 #include <linux/start_kernel.h>
 #include <linux/security.h>
 #include <linux/smp.h>
-#include <linux/workqueue.h>
 #include <linux/profile.h>
 #include <linux/rcupdate.h>
 #include <linux/moduleparam.h>
@@ -786,7 +785,6 @@ static void __init do_initcalls(void)
  */
 static void __init do_basic_setup(void)
 {
-	init_workqueues();
 	cpuset_init_smp();
 	usermodehelper_init();
 	init_tmpfs();
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 1105c474073..e2eb351d915 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -3507,7 +3507,7 @@ out_unlock:
 }
 #endif /* CONFIG_FREEZER */
 
-void __init init_workqueues(void)
+static int __init init_workqueues(void)
 {
 	unsigned int cpu;
 	int i;
@@ -3559,4 +3559,6 @@ void __init init_workqueues(void)
 	system_unbound_wq = alloc_workqueue("events_unbound", WQ_UNBOUND,
 					    WQ_UNBOUND_MAX_ACTIVE);
 	BUG_ON(!system_wq || !system_long_wq || !system_nrt_wq);
+	return 0;
 }
+early_initcall(init_workqueues);
-- 
cgit v1.2.3-70-g09d2


From 77a63f3d1e0a3e7ede8d10f569e8481b13ff47c5 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sun, 1 Aug 2010 13:40:40 -0400
Subject: NFS: Fix a typo in include/linux/nfs_fs.h

nfs_commit_inode() needs to be defined irrespectively of whether or not
we are supporting NFSv3 and NFSv4.

Allow the compiler to optimise away code in the NFSv2-only case by
converting it into an inlined stub function.

Reported-and-tested-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/nfs/write.c         | 5 -----
 include/linux/nfs_fs.h | 6 ++++++
 2 files changed, 6 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index bb72ad34d51..9f81bdd91c5 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1454,11 +1454,6 @@ out_mark_dirty:
 	return ret;
 }
 #else
-int nfs_commit_inode(struct inode *inode, int how)
-{
-	return 0;
-}
-
 static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_control *wbc)
 {
 	return 0;
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index f6e2455f13d..bad4d121b16 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -496,6 +496,12 @@ extern int nfs_wb_page_cancel(struct inode *inode, struct page* page);
 extern int  nfs_commit_inode(struct inode *, int);
 extern struct nfs_write_data *nfs_commitdata_alloc(void);
 extern void nfs_commit_free(struct nfs_write_data *wdata);
+#else
+static inline int
+nfs_commit_inode(struct inode *inode, int how)
+{
+	return 0;
+}
 #endif
 
 static inline int
-- 
cgit v1.2.3-70-g09d2


From 0814a979a64a5ae61c7567496d090e204ecabd2b Mon Sep 17 00:00:00 2001
From: Anatolij Gustschin <agust@denx.de>
Date: Fri, 23 Jul 2010 04:00:36 +0000
Subject: powerpc/5121: move fsl-diu-fb.h to include/linux

Some DIU structures will be used in platform code in
subsequent MPC5121 DIU patch, so we move this header
to be able to include it elsewhere.

Signed-off-by: Anatolij Gustschin <agust@denx.de>
Acked-by: Timur Tabi <timur@freescale.com>
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 drivers/video/fsl-diu-fb.c |   2 +-
 drivers/video/fsl-diu-fb.h | 223 ---------------------------------------------
 include/linux/fsl-diu-fb.h | 223 +++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 224 insertions(+), 224 deletions(-)
 delete mode 100644 drivers/video/fsl-diu-fb.h
 create mode 100644 include/linux/fsl-diu-fb.h

(limited to 'include')

diff --git a/drivers/video/fsl-diu-fb.c b/drivers/video/fsl-diu-fb.c
index 9b8c9911122..48905d5f4e8 100644
--- a/drivers/video/fsl-diu-fb.c
+++ b/drivers/video/fsl-diu-fb.c
@@ -34,7 +34,7 @@
 #include <linux/of_platform.h>
 
 #include <sysdev/fsl_soc.h>
-#include "fsl-diu-fb.h"
+#include <linux/fsl-diu-fb.h>
 
 /*
  * These parameters give default parameters
diff --git a/drivers/video/fsl-diu-fb.h b/drivers/video/fsl-diu-fb.h
deleted file mode 100644
index fc295d7ea46..00000000000
--- a/drivers/video/fsl-diu-fb.h
+++ /dev/null
@@ -1,223 +0,0 @@
-/*
- * Copyright 2008 Freescale Semiconductor, Inc. All Rights Reserved.
- *
- *  Freescale DIU Frame Buffer device driver
- *
- *  Authors: Hongjun Chen <hong-jun.chen@freescale.com>
- *           Paul Widmer <paul.widmer@freescale.com>
- *           Srikanth Srinivasan <srikanth.srinivasan@freescale.com>
- *           York Sun <yorksun@freescale.com>
- *
- *   Based on imxfb.c Copyright (C) 2004 S.Hauer, Pengutronix
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- *
- */
-
-#ifndef __FSL_DIU_FB_H__
-#define __FSL_DIU_FB_H__
-
-/* Arbitrary threshold to determine the allocation method
- * See mpc8610fb_set_par(), map_video_memory(), and unmap_video_memory()
- */
-#define MEM_ALLOC_THRESHOLD (1024*768*4+32)
-/* Minimum value that the pixel clock can be set to in pico seconds
- * This is determined by platform clock/3 where the minimum platform
- * clock is 533MHz. This gives 5629 pico seconds.
- */
-#define MIN_PIX_CLK 5629
-#define MAX_PIX_CLK 96096
-
-#include <linux/types.h>
-
-struct mfb_alpha {
-	int enable;
-	int alpha;
-};
-
-struct mfb_chroma_key {
-	int enable;
-	__u8  red_max;
-	__u8  green_max;
-	__u8  blue_max;
-	__u8  red_min;
-	__u8  green_min;
-	__u8  blue_min;
-};
-
-struct aoi_display_offset {
-	int x_aoi_d;
-	int y_aoi_d;
-};
-
-#define MFB_SET_CHROMA_KEY	_IOW('M', 1, struct mfb_chroma_key)
-#define MFB_WAIT_FOR_VSYNC	_IOW('F', 0x20, u_int32_t)
-#define MFB_SET_BRIGHTNESS	_IOW('M', 3, __u8)
-
-#define MFB_SET_ALPHA		0x80014d00
-#define MFB_GET_ALPHA		0x40014d00
-#define MFB_SET_AOID		0x80084d04
-#define MFB_GET_AOID		0x40084d04
-#define MFB_SET_PIXFMT		0x80014d08
-#define MFB_GET_PIXFMT		0x40014d08
-
-#define FBIOGET_GWINFO		0x46E0
-#define FBIOPUT_GWINFO		0x46E1
-
-#ifdef __KERNEL__
-#include <linux/spinlock.h>
-
-/*
- * These are the fields of area descriptor(in DDR memory) for every plane
- */
-struct diu_ad {
-	/* Word 0(32-bit) in DDR memory */
-/* 	__u16 comp; */
-/* 	__u16 pixel_s:2; */
-/* 	__u16 pallete:1; */
-/* 	__u16 red_c:2; */
-/* 	__u16 green_c:2; */
-/* 	__u16 blue_c:2; */
-/* 	__u16 alpha_c:3; */
-/* 	__u16 byte_f:1; */
-/* 	__u16 res0:3; */
-
-	__be32 pix_fmt; /* hard coding pixel format */
-
-	/* Word 1(32-bit) in DDR memory */
-	__le32 addr;
-
-	/* Word 2(32-bit) in DDR memory */
-/* 	__u32 delta_xs:11; */
-/* 	__u32 res1:1; */
-/* 	__u32 delta_ys:11; */
-/* 	__u32 res2:1; */
-/* 	__u32 g_alpha:8; */
-	__le32 src_size_g_alpha;
-
-	/* Word 3(32-bit) in DDR memory */
-/* 	__u32 delta_xi:11; */
-/* 	__u32 res3:5; */
-/* 	__u32 delta_yi:11; */
-/* 	__u32 res4:3; */
-/* 	__u32 flip:2; */
-	__le32 aoi_size;
-
-	/* Word 4(32-bit) in DDR memory */
-	/*__u32 offset_xi:11;
-	__u32 res5:5;
-	__u32 offset_yi:11;
-	__u32 res6:5;
-	*/
-	__le32 offset_xyi;
-
-	/* Word 5(32-bit) in DDR memory */
-	/*__u32 offset_xd:11;
-	__u32 res7:5;
-	__u32 offset_yd:11;
-	__u32 res8:5; */
-	__le32 offset_xyd;
-
-
-	/* Word 6(32-bit) in DDR memory */
-	__u8 ckmax_r;
-	__u8 ckmax_g;
-	__u8 ckmax_b;
-	__u8 res9;
-
-	/* Word 7(32-bit) in DDR memory */
-	__u8 ckmin_r;
-	__u8 ckmin_g;
-	__u8 ckmin_b;
-	__u8 res10;
-/* 	__u32 res10:8; */
-
-	/* Word 8(32-bit) in DDR memory */
-	__le32 next_ad;
-
-	/* Word 9(32-bit) in DDR memory, just for 64-bit aligned */
-	__u32 paddr;
-} __attribute__ ((packed));
-
-/* DIU register map */
-struct diu {
-	__be32 desc[3];
-	__be32 gamma;
-	__be32 pallete;
-	__be32 cursor;
-	__be32 curs_pos;
-	__be32 diu_mode;
-	__be32 bgnd;
-	__be32 bgnd_wb;
-	__be32 disp_size;
-	__be32 wb_size;
-	__be32 wb_mem_addr;
-	__be32 hsyn_para;
-	__be32 vsyn_para;
-	__be32 syn_pol;
-	__be32 thresholds;
-	__be32 int_status;
-	__be32 int_mask;
-	__be32 colorbar[8];
-	__be32 filling;
-	__be32 plut;
-} __attribute__ ((packed));
-
-struct diu_hw {
-	struct diu *diu_reg;
-	spinlock_t reg_lock;
-
-	__u32 mode;		/* DIU operation mode */
-};
-
-struct diu_addr {
-	__u8 __iomem *vaddr;	/* Virtual address */
-	dma_addr_t paddr;	/* Physical address */
-	__u32 	   offset;
-};
-
-struct diu_pool {
-	struct diu_addr ad;
-	struct diu_addr gamma;
-	struct diu_addr pallete;
-	struct diu_addr cursor;
-};
-
-#define FSL_DIU_BASE_OFFSET	0x2C000	/* Offset of DIU */
-#define INT_LCDC		64	/* DIU interrupt number */
-
-#define FSL_AOI_NUM	6	/* 5 AOIs and one dummy AOI */
-				/* 1 for plane 0, 2 for plane 1&2 each */
-
-/* Minimum X and Y resolutions */
-#define MIN_XRES	64
-#define MIN_YRES	64
-
-/* HW cursor parameters */
-#define MAX_CURS		32
-
-/* Modes of operation of DIU */
-#define MFB_MODE0	0	/* DIU off */
-#define MFB_MODE1	1	/* All three planes output to display */
-#define MFB_MODE2	2	/* Plane 1 to display, planes 2+3 written back*/
-#define MFB_MODE3	3	/* All three planes written back to memory */
-#define MFB_MODE4	4	/* Color bar generation */
-
-/* INT_STATUS/INT_MASK field descriptions */
-#define INT_VSYNC	0x01	/* Vsync interrupt  */
-#define INT_VSYNC_WB	0x02	/* Vsync interrupt for write back operation */
-#define INT_UNDRUN	0x04	/* Under run exception interrupt */
-#define INT_PARERR	0x08	/* Display parameters error interrupt */
-#define INT_LS_BF_VS	0x10	/* Lines before vsync. interrupt */
-
-/* Panels'operation modes */
-#define MFB_TYPE_OUTPUT	0	/* Panel output to display */
-#define MFB_TYPE_OFF	1	/* Panel off */
-#define MFB_TYPE_WB	2	/* Panel written back to memory */
-#define MFB_TYPE_TEST	3	/* Panel generate color bar */
-
-#endif /* __KERNEL__ */
-#endif /* __FSL_DIU_FB_H__ */
diff --git a/include/linux/fsl-diu-fb.h b/include/linux/fsl-diu-fb.h
new file mode 100644
index 00000000000..fc295d7ea46
--- /dev/null
+++ b/include/linux/fsl-diu-fb.h
@@ -0,0 +1,223 @@
+/*
+ * Copyright 2008 Freescale Semiconductor, Inc. All Rights Reserved.
+ *
+ *  Freescale DIU Frame Buffer device driver
+ *
+ *  Authors: Hongjun Chen <hong-jun.chen@freescale.com>
+ *           Paul Widmer <paul.widmer@freescale.com>
+ *           Srikanth Srinivasan <srikanth.srinivasan@freescale.com>
+ *           York Sun <yorksun@freescale.com>
+ *
+ *   Based on imxfb.c Copyright (C) 2004 S.Hauer, Pengutronix
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ */
+
+#ifndef __FSL_DIU_FB_H__
+#define __FSL_DIU_FB_H__
+
+/* Arbitrary threshold to determine the allocation method
+ * See mpc8610fb_set_par(), map_video_memory(), and unmap_video_memory()
+ */
+#define MEM_ALLOC_THRESHOLD (1024*768*4+32)
+/* Minimum value that the pixel clock can be set to in pico seconds
+ * This is determined by platform clock/3 where the minimum platform
+ * clock is 533MHz. This gives 5629 pico seconds.
+ */
+#define MIN_PIX_CLK 5629
+#define MAX_PIX_CLK 96096
+
+#include <linux/types.h>
+
+struct mfb_alpha {
+	int enable;
+	int alpha;
+};
+
+struct mfb_chroma_key {
+	int enable;
+	__u8  red_max;
+	__u8  green_max;
+	__u8  blue_max;
+	__u8  red_min;
+	__u8  green_min;
+	__u8  blue_min;
+};
+
+struct aoi_display_offset {
+	int x_aoi_d;
+	int y_aoi_d;
+};
+
+#define MFB_SET_CHROMA_KEY	_IOW('M', 1, struct mfb_chroma_key)
+#define MFB_WAIT_FOR_VSYNC	_IOW('F', 0x20, u_int32_t)
+#define MFB_SET_BRIGHTNESS	_IOW('M', 3, __u8)
+
+#define MFB_SET_ALPHA		0x80014d00
+#define MFB_GET_ALPHA		0x40014d00
+#define MFB_SET_AOID		0x80084d04
+#define MFB_GET_AOID		0x40084d04
+#define MFB_SET_PIXFMT		0x80014d08
+#define MFB_GET_PIXFMT		0x40014d08
+
+#define FBIOGET_GWINFO		0x46E0
+#define FBIOPUT_GWINFO		0x46E1
+
+#ifdef __KERNEL__
+#include <linux/spinlock.h>
+
+/*
+ * These are the fields of area descriptor(in DDR memory) for every plane
+ */
+struct diu_ad {
+	/* Word 0(32-bit) in DDR memory */
+/* 	__u16 comp; */
+/* 	__u16 pixel_s:2; */
+/* 	__u16 pallete:1; */
+/* 	__u16 red_c:2; */
+/* 	__u16 green_c:2; */
+/* 	__u16 blue_c:2; */
+/* 	__u16 alpha_c:3; */
+/* 	__u16 byte_f:1; */
+/* 	__u16 res0:3; */
+
+	__be32 pix_fmt; /* hard coding pixel format */
+
+	/* Word 1(32-bit) in DDR memory */
+	__le32 addr;
+
+	/* Word 2(32-bit) in DDR memory */
+/* 	__u32 delta_xs:11; */
+/* 	__u32 res1:1; */
+/* 	__u32 delta_ys:11; */
+/* 	__u32 res2:1; */
+/* 	__u32 g_alpha:8; */
+	__le32 src_size_g_alpha;
+
+	/* Word 3(32-bit) in DDR memory */
+/* 	__u32 delta_xi:11; */
+/* 	__u32 res3:5; */
+/* 	__u32 delta_yi:11; */
+/* 	__u32 res4:3; */
+/* 	__u32 flip:2; */
+	__le32 aoi_size;
+
+	/* Word 4(32-bit) in DDR memory */
+	/*__u32 offset_xi:11;
+	__u32 res5:5;
+	__u32 offset_yi:11;
+	__u32 res6:5;
+	*/
+	__le32 offset_xyi;
+
+	/* Word 5(32-bit) in DDR memory */
+	/*__u32 offset_xd:11;
+	__u32 res7:5;
+	__u32 offset_yd:11;
+	__u32 res8:5; */
+	__le32 offset_xyd;
+
+
+	/* Word 6(32-bit) in DDR memory */
+	__u8 ckmax_r;
+	__u8 ckmax_g;
+	__u8 ckmax_b;
+	__u8 res9;
+
+	/* Word 7(32-bit) in DDR memory */
+	__u8 ckmin_r;
+	__u8 ckmin_g;
+	__u8 ckmin_b;
+	__u8 res10;
+/* 	__u32 res10:8; */
+
+	/* Word 8(32-bit) in DDR memory */
+	__le32 next_ad;
+
+	/* Word 9(32-bit) in DDR memory, just for 64-bit aligned */
+	__u32 paddr;
+} __attribute__ ((packed));
+
+/* DIU register map */
+struct diu {
+	__be32 desc[3];
+	__be32 gamma;
+	__be32 pallete;
+	__be32 cursor;
+	__be32 curs_pos;
+	__be32 diu_mode;
+	__be32 bgnd;
+	__be32 bgnd_wb;
+	__be32 disp_size;
+	__be32 wb_size;
+	__be32 wb_mem_addr;
+	__be32 hsyn_para;
+	__be32 vsyn_para;
+	__be32 syn_pol;
+	__be32 thresholds;
+	__be32 int_status;
+	__be32 int_mask;
+	__be32 colorbar[8];
+	__be32 filling;
+	__be32 plut;
+} __attribute__ ((packed));
+
+struct diu_hw {
+	struct diu *diu_reg;
+	spinlock_t reg_lock;
+
+	__u32 mode;		/* DIU operation mode */
+};
+
+struct diu_addr {
+	__u8 __iomem *vaddr;	/* Virtual address */
+	dma_addr_t paddr;	/* Physical address */
+	__u32 	   offset;
+};
+
+struct diu_pool {
+	struct diu_addr ad;
+	struct diu_addr gamma;
+	struct diu_addr pallete;
+	struct diu_addr cursor;
+};
+
+#define FSL_DIU_BASE_OFFSET	0x2C000	/* Offset of DIU */
+#define INT_LCDC		64	/* DIU interrupt number */
+
+#define FSL_AOI_NUM	6	/* 5 AOIs and one dummy AOI */
+				/* 1 for plane 0, 2 for plane 1&2 each */
+
+/* Minimum X and Y resolutions */
+#define MIN_XRES	64
+#define MIN_YRES	64
+
+/* HW cursor parameters */
+#define MAX_CURS		32
+
+/* Modes of operation of DIU */
+#define MFB_MODE0	0	/* DIU off */
+#define MFB_MODE1	1	/* All three planes output to display */
+#define MFB_MODE2	2	/* Plane 1 to display, planes 2+3 written back*/
+#define MFB_MODE3	3	/* All three planes written back to memory */
+#define MFB_MODE4	4	/* Color bar generation */
+
+/* INT_STATUS/INT_MASK field descriptions */
+#define INT_VSYNC	0x01	/* Vsync interrupt  */
+#define INT_VSYNC_WB	0x02	/* Vsync interrupt for write back operation */
+#define INT_UNDRUN	0x04	/* Under run exception interrupt */
+#define INT_PARERR	0x08	/* Display parameters error interrupt */
+#define INT_LS_BF_VS	0x10	/* Lines before vsync. interrupt */
+
+/* Panels'operation modes */
+#define MFB_TYPE_OUTPUT	0	/* Panel output to display */
+#define MFB_TYPE_OFF	1	/* Panel off */
+#define MFB_TYPE_WB	2	/* Panel written back to memory */
+#define MFB_TYPE_TEST	3	/* Panel generate color bar */
+
+#endif /* __KERNEL__ */
+#endif /* __FSL_DIU_FB_H__ */
-- 
cgit v1.2.3-70-g09d2


From 819ce45afebd77a9de736fa5304ba8352d11dff9 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Tue, 20 Jul 2010 18:41:24 +0200
Subject: tracing: Drop cpparg() macro

Drop the cpparg() macro that wraps CPP parameters. We already have
the PARAM() macro for that, no need to have several versions.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Li Zefan <lizf@cn.fujitsu.com>
---
 include/trace/ftrace.h | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index fb783d94fc5..a9377c0083a 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -75,15 +75,12 @@
 #define DEFINE_EVENT_PRINT(template, name, proto, args, print)	\
 	DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))
 
-#undef __cpparg
-#define __cpparg(arg...) arg
-
 /* Callbacks are meaningless to ftrace. */
 #undef TRACE_EVENT_FN
 #define TRACE_EVENT_FN(name, proto, args, tstruct,			\
 		assign, print, reg, unreg)				\
-	TRACE_EVENT(name, __cpparg(proto), __cpparg(args),		\
-		__cpparg(tstruct), __cpparg(assign), __cpparg(print))	\
+	TRACE_EVENT(name, PARAMS(proto), PARAMS(args),			\
+		PARAMS(tstruct), PARAMS(assign), PARAMS(print))		\
 
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 
-- 
cgit v1.2.3-70-g09d2


From 08354809d6c73eb73973e132502a0a4e53250971 Mon Sep 17 00:00:00 2001
From: Jassi Brar <jassi.brar@samsung.com>
Date: Fri, 25 Jun 2010 18:21:19 +0900
Subject: ahci_platform: Provide for vendor specific init

Some AHCI implementations may use Vendor Specific HBA[A0h, FFh]
and/or Port[70h, 7Fh] registers to 'prepare' for initialization.
For that, the platform needs memory mapped address of AHCI registers.

This patch adds the 'mmio' argument and reorders the call to
platform init function.

Signed-off-by: Jassi Brar <jassi.brar@samsung.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/ahci_platform.c   | 25 +++++++++++++++----------
 include/linux/ahci_platform.h |  4 +++-
 2 files changed, 18 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/drivers/ata/ahci_platform.c b/drivers/ata/ahci_platform.c
index 5e11b160f24..68ef6b563b7 100644
--- a/drivers/ata/ahci_platform.c
+++ b/drivers/ata/ahci_platform.c
@@ -54,19 +54,13 @@ static int __init ahci_probe(struct platform_device *pdev)
 		return -EINVAL;
 	}
 
-	if (pdata && pdata->init) {
-		rc = pdata->init(dev);
-		if (rc)
-			return rc;
-	}
-
 	if (pdata && pdata->ata_port_info)
 		pi = *pdata->ata_port_info;
 
 	hpriv = devm_kzalloc(dev, sizeof(*hpriv), GFP_KERNEL);
 	if (!hpriv) {
-		rc = -ENOMEM;
-		goto err0;
+		dev_err(dev, "can't alloc ahci_host_priv\n");
+		return -ENOMEM;
 	}
 
 	hpriv->flags |= (unsigned long)pi.private_data;
@@ -74,8 +68,19 @@ static int __init ahci_probe(struct platform_device *pdev)
 	hpriv->mmio = devm_ioremap(dev, mem->start, resource_size(mem));
 	if (!hpriv->mmio) {
 		dev_err(dev, "can't map %pR\n", mem);
-		rc = -ENOMEM;
-		goto err0;
+		return -ENOMEM;
+	}
+
+	/*
+	 * Some platforms might need to prepare for mmio region access,
+	 * which could be done in the following init call. So, the mmio
+	 * region shouldn't be accessed before init (if provided) has
+	 * returned successfully.
+	 */
+	if (pdata && pdata->init) {
+		rc = pdata->init(dev, hpriv->mmio);
+		if (rc)
+			return rc;
 	}
 
 	ahci_save_initial_config(dev, hpriv,
diff --git a/include/linux/ahci_platform.h b/include/linux/ahci_platform.h
index f7dd576dd5a..be3d9a77d6e 100644
--- a/include/linux/ahci_platform.h
+++ b/include/linux/ahci_platform.h
@@ -15,11 +15,13 @@
 #ifndef _AHCI_PLATFORM_H
 #define _AHCI_PLATFORM_H
 
+#include <linux/compiler.h>
+
 struct device;
 struct ata_port_info;
 
 struct ahci_platform_data {
-	int (*init)(struct device *dev);
+	int (*init)(struct device *dev, void __iomem *addr);
 	void (*exit)(struct device *dev);
 	const struct ata_port_info *ata_port_info;
 	unsigned int force_port_map;
-- 
cgit v1.2.3-70-g09d2


From 5b6ae5ba0c45c4d04721537308728688414c9e6b Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 30 Jul 2010 11:42:42 +0200
Subject: libata: more PCI IDs for jmicron controllers

Add support for JMB364 and 369.

Patch-originally-from: Aries Lee <arieslee@jmicron.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/pci/quirks.c    | 6 ++++++
 include/linux/pci_ids.h | 2 ++
 2 files changed, 8 insertions(+)

(limited to 'include')

diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 477345d4164..a0c20d9e839 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -1459,6 +1459,7 @@ static void quirk_jmicron_ata(struct pci_dev *pdev)
 	switch (pdev->device) {
 	case PCI_DEVICE_ID_JMICRON_JMB360: /* SATA single port */
 	case PCI_DEVICE_ID_JMICRON_JMB362: /* SATA dual ports */
+	case PCI_DEVICE_ID_JMICRON_JMB364: /* SATA dual ports */
 		/* The controller should be in single function ahci mode */
 		conf1 |= 0x0002A100; /* Set 8, 13, 15, 17 */
 		break;
@@ -1470,6 +1471,7 @@ static void quirk_jmicron_ata(struct pci_dev *pdev)
 		/* Fall through */
 	case PCI_DEVICE_ID_JMICRON_JMB361:
 	case PCI_DEVICE_ID_JMICRON_JMB363:
+	case PCI_DEVICE_ID_JMICRON_JMB369:
 		/* Enable dual function mode, AHCI on fn 0, IDE fn1 */
 		/* Set the class codes correctly and then direct IDE 0 */
 		conf1 |= 0x00C2A1B3; /* Set 0, 1, 4, 5, 7, 8, 13, 15, 17, 22, 23 */
@@ -1496,16 +1498,20 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB360, qui
 DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB361, quirk_jmicron_ata);
 DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB362, quirk_jmicron_ata);
 DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB363, quirk_jmicron_ata);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB364, quirk_jmicron_ata);
 DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB365, quirk_jmicron_ata);
 DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB366, quirk_jmicron_ata);
 DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB368, quirk_jmicron_ata);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB369, quirk_jmicron_ata);
 DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB360, quirk_jmicron_ata);
 DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB361, quirk_jmicron_ata);
 DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB362, quirk_jmicron_ata);
 DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB363, quirk_jmicron_ata);
+DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB364, quirk_jmicron_ata);
 DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB365, quirk_jmicron_ata);
 DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB366, quirk_jmicron_ata);
 DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB368, quirk_jmicron_ata);
+DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB369, quirk_jmicron_ata);
 
 #endif
 
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 3bedcc149c8..eb200e6beb6 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2324,9 +2324,11 @@
 #define PCI_DEVICE_ID_JMICRON_JMB361	0x2361
 #define PCI_DEVICE_ID_JMICRON_JMB362	0x2362
 #define PCI_DEVICE_ID_JMICRON_JMB363	0x2363
+#define PCI_DEVICE_ID_JMICRON_JMB364	0x2364
 #define PCI_DEVICE_ID_JMICRON_JMB365	0x2365
 #define PCI_DEVICE_ID_JMICRON_JMB366	0x2366
 #define PCI_DEVICE_ID_JMICRON_JMB368	0x2368
+#define PCI_DEVICE_ID_JMICRON_JMB369	0x2369
 #define PCI_DEVICE_ID_JMICRON_JMB38X_SD	0x2381
 #define PCI_DEVICE_ID_JMICRON_JMB38X_MMC 0x2382
 #define PCI_DEVICE_ID_JMICRON_JMB38X_MS	0x2383
-- 
cgit v1.2.3-70-g09d2


From e7aeeba6a8fb86ac52bcffa0b72942f784f2b37f Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexdeucher@gmail.com>
Date: Fri, 4 Jun 2010 13:10:12 -0400
Subject: drm/radeon/kms/r6xx+: add query for tile config (v2)

Userspace needs this information to access tiled
buffers via the CPU.

v2: rebased on evergreen accel changes

Signed-off-by: Alex Deucher <alexdeucher@gmail.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/radeon/evergreen.c  |  1 +
 drivers/gpu/drm/radeon/r600.c       |  2 +-
 drivers/gpu/drm/radeon/radeon.h     |  3 +++
 drivers/gpu/drm/radeon/radeon_drv.c |  3 ++-
 drivers/gpu/drm/radeon/radeon_kms.c | 12 ++++++++++++
 drivers/gpu/drm/radeon/rv770.c      |  3 ++-
 include/drm/radeon_drm.h            |  1 +
 7 files changed, 22 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
index 1b7da39cc58..957d5067ad9 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -1132,6 +1132,7 @@ static void evergreen_gpu_init(struct radeon_device *rdev)
 								 rdev->config.evergreen.max_backends) &
 								EVERGREEN_MAX_BACKENDS_MASK));
 
+	rdev->config.evergreen.tile_config = gb_addr_config;
 	WREG32(GB_BACKEND_MAP, gb_backend_map);
 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
 	WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index 15fe6c21403..aa36ef69ba6 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -1623,7 +1623,7 @@ void r600_gpu_init(struct radeon_device *rdev)
 							 r600_count_pipe_bits((cc_rb_backend_disable &
 									       R6XX_MAX_BACKENDS_MASK) >> 16)),
 							(cc_rb_backend_disable >> 16));
-
+	rdev->config.r600.tile_config = tiling_config;
 	tiling_config |= BACKEND_MAP(backend_map);
 	WREG32(GB_TILING_CONFIG, tiling_config);
 	WREG32(DCP_TILING_CONFIG, tiling_config & 0xffff);
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index d4d776d2f1e..be8420e65f0 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -914,6 +914,7 @@ struct r600_asic {
 	unsigned		tiling_nbanks;
 	unsigned		tiling_npipes;
 	unsigned		tiling_group_size;
+	unsigned		tile_config;
 	struct r100_gpu_lockup	lockup;
 };
 
@@ -938,6 +939,7 @@ struct rv770_asic {
 	unsigned		tiling_nbanks;
 	unsigned		tiling_npipes;
 	unsigned		tiling_group_size;
+	unsigned		tile_config;
 	struct r100_gpu_lockup	lockup;
 };
 
@@ -963,6 +965,7 @@ struct evergreen_asic {
 	unsigned tiling_nbanks;
 	unsigned tiling_npipes;
 	unsigned tiling_group_size;
+	unsigned tile_config;
 };
 
 union radeon_asic_config {
diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c
index ed0ceb3fc40..6f8a2e57287 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.c
+++ b/drivers/gpu/drm/radeon/radeon_drv.c
@@ -46,9 +46,10 @@
  * - 2.3.0 - add MSPOS + 3D texture + r500 VAP regs
  * - 2.4.0 - add crtc id query
  * - 2.5.0 - add get accel 2 to work around ddx breakage for evergreen
+ * - 2.6.0 - add tiling config query (r6xx+)
  */
 #define KMS_DRIVER_MAJOR	2
-#define KMS_DRIVER_MINOR	5
+#define KMS_DRIVER_MINOR	6
 #define KMS_DRIVER_PATCHLEVEL	0
 int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags);
 int radeon_driver_unload_kms(struct drm_device *dev);
diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c
index 70fda6361cd..9012e6fbadb 100644
--- a/drivers/gpu/drm/radeon/radeon_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_kms.c
@@ -147,6 +147,18 @@ int radeon_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 	case RADEON_INFO_ACCEL_WORKING2:
 		value = rdev->accel_working;
 		break;
+	case RADEON_INFO_TILING_CONFIG:
+		if (rdev->family >= CHIP_CEDAR)
+			value = rdev->config.evergreen.tile_config;
+		else if (rdev->family >= CHIP_RV770)
+			value = rdev->config.rv770.tile_config;
+		else if (rdev->family >= CHIP_R600)
+			value = rdev->config.r600.tile_config;
+		else {
+			DRM_DEBUG("tiling config is r6xx+ only!\n");
+			return -EINVAL;
+		}
+		break;
 	default:
 		DRM_DEBUG("Invalid request %d\n", info->request);
 		return -EINVAL;
diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c
index 836c15ab84d..236fe668192 100644
--- a/drivers/gpu/drm/radeon/rv770.c
+++ b/drivers/gpu/drm/radeon/rv770.c
@@ -674,8 +674,9 @@ static void rv770_gpu_init(struct radeon_device *rdev)
 								 r600_count_pipe_bits((cc_rb_backend_disable &
 										       R7XX_MAX_BACKENDS_MASK) >> 16)),
 								(cc_rb_backend_disable >> 16));
-	gb_tiling_config |= BACKEND_MAP(backend_map);
 
+	rdev->config.rv770.tile_config = gb_tiling_config;
+	gb_tiling_config |= BACKEND_MAP(backend_map);
 
 	WREG32(GB_TILING_CONFIG, gb_tiling_config);
 	WREG32(DCP_TILING_CONFIG, (gb_tiling_config & 0xffff));
diff --git a/include/drm/radeon_drm.h b/include/drm/radeon_drm.h
index 5347063e9d5..ac5f0403d53 100644
--- a/include/drm/radeon_drm.h
+++ b/include/drm/radeon_drm.h
@@ -904,6 +904,7 @@ struct drm_radeon_cs {
 #define RADEON_INFO_ACCEL_WORKING	0x03
 #define RADEON_INFO_CRTC_FROM_ID	0x04
 #define RADEON_INFO_ACCEL_WORKING2	0x05
+#define RADEON_INFO_TILING_CONFIG	0x06
 
 struct drm_radeon_info {
 	uint32_t		request;
-- 
cgit v1.2.3-70-g09d2


From e190bfe56841551b1ad5abb42ebd0c4798cc8c01 Mon Sep 17 00:00:00 2001
From: Francisco Jerez <currojerez@riseup.net>
Date: Thu, 22 Jul 2010 17:06:18 +0200
Subject: drm: Import driver for the sil164 I2C TMDS transmitter.

sil164 transmitters are used for DVI outputs on Intel/nvidia and ATI setups.

So far only nouveau can use this driver.

Signed-off-by: Francisco Jerez <currojerez@riseup.net>
Tested-by: Patrice Mandin <patmandin@gmail.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/i2c/Makefile     |   3 +
 drivers/gpu/drm/i2c/sil164_drv.c | 462 +++++++++++++++++++++++++++++++++++++++
 drivers/gpu/drm/nouveau/Kconfig  |   9 +
 include/drm/i2c/sil164.h         |  63 ++++++
 4 files changed, 537 insertions(+)
 create mode 100644 drivers/gpu/drm/i2c/sil164_drv.c
 create mode 100644 include/drm/i2c/sil164.h

(limited to 'include')

diff --git a/drivers/gpu/drm/i2c/Makefile b/drivers/gpu/drm/i2c/Makefile
index 6d2abaf35ba..92862563e7e 100644
--- a/drivers/gpu/drm/i2c/Makefile
+++ b/drivers/gpu/drm/i2c/Makefile
@@ -2,3 +2,6 @@ ccflags-y := -Iinclude/drm
 
 ch7006-y := ch7006_drv.o ch7006_mode.o
 obj-$(CONFIG_DRM_I2C_CH7006) += ch7006.o
+
+sil164-y := sil164_drv.o
+obj-$(CONFIG_DRM_I2C_SIL164) += sil164.o
diff --git a/drivers/gpu/drm/i2c/sil164_drv.c b/drivers/gpu/drm/i2c/sil164_drv.c
new file mode 100644
index 00000000000..0b6773290c0
--- /dev/null
+++ b/drivers/gpu/drm/i2c/sil164_drv.c
@@ -0,0 +1,462 @@
+/*
+ * Copyright (C) 2010 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "drmP.h"
+#include "drm_crtc_helper.h"
+#include "drm_encoder_slave.h"
+#include "i2c/sil164.h"
+
+struct sil164_priv {
+	struct sil164_encoder_params config;
+	struct i2c_client *duallink_slave;
+
+	uint8_t saved_state[0x10];
+	uint8_t saved_slave_state[0x10];
+};
+
+#define to_sil164_priv(x) \
+	((struct sil164_priv *)to_encoder_slave(x)->slave_priv)
+
+#define sil164_dbg(client, format, ...) do {				\
+		if (drm_debug & DRM_UT_KMS)				\
+			dev_printk(KERN_DEBUG, &client->dev,		\
+				   "%s: " format, __func__, ## __VA_ARGS__); \
+	} while (0)
+#define sil164_info(client, format, ...)		\
+	dev_info(&client->dev, format, __VA_ARGS__)
+#define sil164_err(client, format, ...)			\
+	dev_err(&client->dev, format, __VA_ARGS__)
+
+#define SIL164_I2C_ADDR_MASTER			0x38
+#define SIL164_I2C_ADDR_SLAVE			0x39
+
+/* HW register definitions */
+
+#define SIL164_VENDOR_LO			0x0
+#define SIL164_VENDOR_HI			0x1
+#define SIL164_DEVICE_LO			0x2
+#define SIL164_DEVICE_HI			0x3
+#define SIL164_REVISION				0x4
+#define SIL164_FREQ_MIN				0x6
+#define SIL164_FREQ_MAX				0x7
+#define SIL164_CONTROL0				0x8
+#  define SIL164_CONTROL0_POWER_ON		0x01
+#  define SIL164_CONTROL0_EDGE_RISING		0x02
+#  define SIL164_CONTROL0_INPUT_24BIT		0x04
+#  define SIL164_CONTROL0_DUAL_EDGE		0x08
+#  define SIL164_CONTROL0_HSYNC_ON		0x10
+#  define SIL164_CONTROL0_VSYNC_ON		0x20
+#define SIL164_DETECT				0x9
+#  define SIL164_DETECT_INTR_STAT		0x01
+#  define SIL164_DETECT_HOTPLUG_STAT		0x02
+#  define SIL164_DETECT_RECEIVER_STAT		0x04
+#  define SIL164_DETECT_INTR_MODE_RECEIVER	0x00
+#  define SIL164_DETECT_INTR_MODE_HOTPLUG	0x08
+#  define SIL164_DETECT_OUT_MODE_HIGH		0x00
+#  define SIL164_DETECT_OUT_MODE_INTR		0x10
+#  define SIL164_DETECT_OUT_MODE_RECEIVER	0x20
+#  define SIL164_DETECT_OUT_MODE_HOTPLUG	0x30
+#  define SIL164_DETECT_VSWING_STAT		0x80
+#define SIL164_CONTROL1				0xa
+#  define SIL164_CONTROL1_DESKEW_ENABLE		0x10
+#  define SIL164_CONTROL1_DESKEW_INCR_SHIFT	5
+#define SIL164_GPIO				0xb
+#define SIL164_CONTROL2				0xc
+#  define SIL164_CONTROL2_FILTER_ENABLE		0x01
+#  define SIL164_CONTROL2_FILTER_SETTING_SHIFT	1
+#  define SIL164_CONTROL2_DUALLINK_MASTER	0x40
+#  define SIL164_CONTROL2_SYNC_CONT		0x80
+#define SIL164_DUALLINK				0xd
+#  define SIL164_DUALLINK_ENABLE		0x10
+#  define SIL164_DUALLINK_SKEW_SHIFT		5
+#define SIL164_PLLZONE				0xe
+#  define SIL164_PLLZONE_STAT			0x08
+#  define SIL164_PLLZONE_FORCE_ON		0x10
+#  define SIL164_PLLZONE_FORCE_HIGH		0x20
+
+/* HW access functions */
+
+static void
+sil164_write(struct i2c_client *client, uint8_t addr, uint8_t val)
+{
+	uint8_t buf[] = {addr, val};
+	int ret;
+
+	ret = i2c_master_send(client, buf, ARRAY_SIZE(buf));
+	if (ret < 0)
+		sil164_err(client, "Error %d writing to subaddress 0x%x\n",
+			   ret, addr);
+}
+
+static uint8_t
+sil164_read(struct i2c_client *client, uint8_t addr)
+{
+	uint8_t val;
+	int ret;
+
+	ret = i2c_master_send(client, &addr, sizeof(addr));
+	if (ret < 0)
+		goto fail;
+
+	ret = i2c_master_recv(client, &val, sizeof(val));
+	if (ret < 0)
+		goto fail;
+
+	return val;
+
+fail:
+	sil164_err(client, "Error %d reading from subaddress 0x%x\n",
+		   ret, addr);
+	return 0;
+}
+
+static void
+sil164_save_state(struct i2c_client *client, uint8_t *state)
+{
+	int i;
+
+	for (i = 0x8; i <= 0xe; i++)
+		state[i] = sil164_read(client, i);
+}
+
+static void
+sil164_restore_state(struct i2c_client *client, uint8_t *state)
+{
+	int i;
+
+	for (i = 0x8; i <= 0xe; i++)
+		sil164_write(client, i, state[i]);
+}
+
+static void
+sil164_set_power_state(struct i2c_client *client, bool on)
+{
+	uint8_t control0 = sil164_read(client, SIL164_CONTROL0);
+
+	if (on)
+		control0 |= SIL164_CONTROL0_POWER_ON;
+	else
+		control0 &= ~SIL164_CONTROL0_POWER_ON;
+
+	sil164_write(client, SIL164_CONTROL0, control0);
+}
+
+static void
+sil164_init_state(struct i2c_client *client,
+		  struct sil164_encoder_params *config,
+		  bool duallink)
+{
+	sil164_write(client, SIL164_CONTROL0,
+		     SIL164_CONTROL0_HSYNC_ON |
+		     SIL164_CONTROL0_VSYNC_ON |
+		     (config->input_edge ? SIL164_CONTROL0_EDGE_RISING : 0) |
+		     (config->input_width ? SIL164_CONTROL0_INPUT_24BIT : 0) |
+		     (config->input_dual ? SIL164_CONTROL0_DUAL_EDGE : 0));
+
+	sil164_write(client, SIL164_DETECT,
+		     SIL164_DETECT_INTR_STAT |
+		     SIL164_DETECT_OUT_MODE_RECEIVER);
+
+	sil164_write(client, SIL164_CONTROL1,
+		     (config->input_skew ? SIL164_CONTROL1_DESKEW_ENABLE : 0) |
+		     (((config->input_skew + 4) & 0x7)
+		      << SIL164_CONTROL1_DESKEW_INCR_SHIFT));
+
+	sil164_write(client, SIL164_CONTROL2,
+		     SIL164_CONTROL2_SYNC_CONT |
+		     (config->pll_filter ? 0 : SIL164_CONTROL2_FILTER_ENABLE) |
+		     (4 << SIL164_CONTROL2_FILTER_SETTING_SHIFT));
+
+	sil164_write(client, SIL164_PLLZONE, 0);
+
+	if (duallink)
+		sil164_write(client, SIL164_DUALLINK,
+			     SIL164_DUALLINK_ENABLE |
+			     (((config->duallink_skew + 4) & 0x7)
+			      << SIL164_DUALLINK_SKEW_SHIFT));
+	else
+		sil164_write(client, SIL164_DUALLINK, 0);
+}
+
+/* DRM encoder functions */
+
+static void
+sil164_encoder_set_config(struct drm_encoder *encoder, void *params)
+{
+	struct sil164_priv *priv = to_sil164_priv(encoder);
+
+	priv->config = *(struct sil164_encoder_params *)params;
+}
+
+static void
+sil164_encoder_dpms(struct drm_encoder *encoder, int mode)
+{
+	struct sil164_priv *priv = to_sil164_priv(encoder);
+	bool on = (mode == DRM_MODE_DPMS_ON);
+	bool duallink = (on && encoder->crtc->mode.clock > 165000);
+
+	sil164_set_power_state(drm_i2c_encoder_get_client(encoder), on);
+
+	if (priv->duallink_slave)
+		sil164_set_power_state(priv->duallink_slave, duallink);
+}
+
+static void
+sil164_encoder_save(struct drm_encoder *encoder)
+{
+	struct sil164_priv *priv = to_sil164_priv(encoder);
+
+	sil164_save_state(drm_i2c_encoder_get_client(encoder),
+			  priv->saved_state);
+
+	if (priv->duallink_slave)
+		sil164_save_state(priv->duallink_slave,
+				  priv->saved_slave_state);
+}
+
+static void
+sil164_encoder_restore(struct drm_encoder *encoder)
+{
+	struct sil164_priv *priv = to_sil164_priv(encoder);
+
+	sil164_restore_state(drm_i2c_encoder_get_client(encoder),
+			     priv->saved_state);
+
+	if (priv->duallink_slave)
+		sil164_restore_state(priv->duallink_slave,
+				     priv->saved_slave_state);
+}
+
+static bool
+sil164_encoder_mode_fixup(struct drm_encoder *encoder,
+			  struct drm_display_mode *mode,
+			  struct drm_display_mode *adjusted_mode)
+{
+	return true;
+}
+
+static int
+sil164_encoder_mode_valid(struct drm_encoder *encoder,
+			  struct drm_display_mode *mode)
+{
+	struct sil164_priv *priv = to_sil164_priv(encoder);
+
+	if (mode->clock < 32000)
+		return MODE_CLOCK_LOW;
+
+	if (mode->clock > 330000 ||
+	    (mode->clock > 165000 && !priv->duallink_slave))
+		return MODE_CLOCK_HIGH;
+
+	return MODE_OK;
+}
+
+static void
+sil164_encoder_mode_set(struct drm_encoder *encoder,
+			struct drm_display_mode *mode,
+			struct drm_display_mode *adjusted_mode)
+{
+	struct sil164_priv *priv = to_sil164_priv(encoder);
+	bool duallink = adjusted_mode->clock > 165000;
+
+	sil164_init_state(drm_i2c_encoder_get_client(encoder),
+			  &priv->config, duallink);
+
+	if (priv->duallink_slave)
+		sil164_init_state(priv->duallink_slave,
+				  &priv->config, duallink);
+
+	sil164_encoder_dpms(encoder, DRM_MODE_DPMS_ON);
+}
+
+static enum drm_connector_status
+sil164_encoder_detect(struct drm_encoder *encoder,
+		      struct drm_connector *connector)
+{
+	struct i2c_client *client = drm_i2c_encoder_get_client(encoder);
+
+	if (sil164_read(client, SIL164_DETECT) & SIL164_DETECT_HOTPLUG_STAT)
+		return connector_status_connected;
+	else
+		return connector_status_disconnected;
+}
+
+static int
+sil164_encoder_get_modes(struct drm_encoder *encoder,
+			 struct drm_connector *connector)
+{
+	return 0;
+}
+
+static int
+sil164_encoder_create_resources(struct drm_encoder *encoder,
+				struct drm_connector *connector)
+{
+	return 0;
+}
+
+static int
+sil164_encoder_set_property(struct drm_encoder *encoder,
+			    struct drm_connector *connector,
+			    struct drm_property *property,
+			    uint64_t val)
+{
+	return 0;
+}
+
+static void
+sil164_encoder_destroy(struct drm_encoder *encoder)
+{
+	struct sil164_priv *priv = to_sil164_priv(encoder);
+
+	if (priv->duallink_slave)
+		i2c_unregister_device(priv->duallink_slave);
+
+	kfree(priv);
+	drm_i2c_encoder_destroy(encoder);
+}
+
+static struct drm_encoder_slave_funcs sil164_encoder_funcs = {
+	.set_config = sil164_encoder_set_config,
+	.destroy = sil164_encoder_destroy,
+	.dpms = sil164_encoder_dpms,
+	.save = sil164_encoder_save,
+	.restore = sil164_encoder_restore,
+	.mode_fixup = sil164_encoder_mode_fixup,
+	.mode_valid = sil164_encoder_mode_valid,
+	.mode_set = sil164_encoder_mode_set,
+	.detect = sil164_encoder_detect,
+	.get_modes = sil164_encoder_get_modes,
+	.create_resources = sil164_encoder_create_resources,
+	.set_property = sil164_encoder_set_property,
+};
+
+/* I2C driver functions */
+
+static int
+sil164_probe(struct i2c_client *client, const struct i2c_device_id *id)
+{
+	int vendor = sil164_read(client, SIL164_VENDOR_HI) << 8 |
+		sil164_read(client, SIL164_VENDOR_LO);
+	int device = sil164_read(client, SIL164_DEVICE_HI) << 8 |
+		sil164_read(client, SIL164_DEVICE_LO);
+	int rev = sil164_read(client, SIL164_REVISION);
+
+	if (vendor != 0x1 || device != 0x6) {
+		sil164_dbg(client, "Unknown device %x:%x.%x\n",
+			   vendor, device, rev);
+		return -ENODEV;
+	}
+
+	sil164_info(client, "Detected device %x:%x.%x\n",
+		    vendor, device, rev);
+
+	return 0;
+}
+
+static int
+sil164_remove(struct i2c_client *client)
+{
+	return 0;
+}
+
+static struct i2c_client *
+sil164_detect_slave(struct i2c_client *client)
+{
+	struct i2c_adapter *adap = client->adapter;
+	struct i2c_msg msg = {
+		.addr = SIL164_I2C_ADDR_SLAVE,
+		.len = 0,
+	};
+	const struct i2c_board_info info = {
+		I2C_BOARD_INFO("sil164", SIL164_I2C_ADDR_SLAVE)
+	};
+
+	if (i2c_transfer(adap, &msg, 1) != 1) {
+		sil164_dbg(adap, "No dual-link slave found.");
+		return NULL;
+	}
+
+	return i2c_new_device(adap, &info);
+}
+
+static int
+sil164_encoder_init(struct i2c_client *client,
+		    struct drm_device *dev,
+		    struct drm_encoder_slave *encoder)
+{
+	struct sil164_priv *priv;
+
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	encoder->slave_priv = priv;
+	encoder->slave_funcs = &sil164_encoder_funcs;
+
+	priv->duallink_slave = sil164_detect_slave(client);
+
+	return 0;
+}
+
+static struct i2c_device_id sil164_ids[] = {
+	{ "sil164", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, sil164_ids);
+
+static struct drm_i2c_encoder_driver sil164_driver = {
+	.i2c_driver = {
+		.probe = sil164_probe,
+		.remove = sil164_remove,
+		.driver = {
+			.name = "sil164",
+		},
+		.id_table = sil164_ids,
+	},
+	.encoder_init = sil164_encoder_init,
+};
+
+/* Module initialization */
+
+static int __init
+sil164_init(void)
+{
+	return drm_i2c_encoder_register(THIS_MODULE, &sil164_driver);
+}
+
+static void __exit
+sil164_exit(void)
+{
+	drm_i2c_encoder_unregister(&sil164_driver);
+}
+
+MODULE_AUTHOR("Francisco Jerez <currojerez@riseup.net>");
+MODULE_DESCRIPTION("Silicon Image sil164 TMDS transmitter driver");
+MODULE_LICENSE("GPL and additional rights");
+
+module_init(sil164_init);
+module_exit(sil164_exit);
diff --git a/drivers/gpu/drm/nouveau/Kconfig b/drivers/gpu/drm/nouveau/Kconfig
index b6f5239c2ef..d2d28048efb 100644
--- a/drivers/gpu/drm/nouveau/Kconfig
+++ b/drivers/gpu/drm/nouveau/Kconfig
@@ -41,4 +41,13 @@ config DRM_I2C_CH7006
 
 	  This driver is currently only useful if you're also using
 	  the nouveau driver.
+
+config DRM_I2C_SIL164
+	tristate "Silicon Image sil164 TMDS transmitter"
+	default m if DRM_NOUVEAU
+	help
+	  Support for sil164 and similar single-link (or dual-link
+	  when used in pairs) TMDS transmitters, used in some nVidia
+	  video cards.
+
 endmenu
diff --git a/include/drm/i2c/sil164.h b/include/drm/i2c/sil164.h
new file mode 100644
index 00000000000..205e27384c8
--- /dev/null
+++ b/include/drm/i2c/sil164.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (C) 2010 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __DRM_I2C_SIL164_H__
+#define __DRM_I2C_SIL164_H__
+
+/**
+ * struct sil164_encoder_params
+ *
+ * Describes how the sil164 is connected to the GPU. It should be used
+ * as the @params parameter of its @set_config method.
+ *
+ * See "http://www.siliconimage.com/docs/SiI-DS-0021-E-164.pdf".
+ */
+struct sil164_encoder_params {
+	enum {
+		SIL164_INPUT_EDGE_FALLING = 0,
+		SIL164_INPUT_EDGE_RISING
+	} input_edge;
+
+	enum {
+		SIL164_INPUT_WIDTH_12BIT = 0,
+		SIL164_INPUT_WIDTH_24BIT
+	} input_width;
+
+	enum {
+		SIL164_INPUT_SINGLE_EDGE = 0,
+		SIL164_INPUT_DUAL_EDGE
+	} input_dual;
+
+	enum {
+		SIL164_PLL_FILTER_ON = 0,
+		SIL164_PLL_FILTER_OFF,
+	} pll_filter;
+
+	int input_skew; /** < Allowed range [-4, 3], use 0 for no de-skew. */
+	int duallink_skew; /** < Allowed range [-4, 3]. */
+};
+
+#endif
-- 
cgit v1.2.3-70-g09d2


From ab9e1f5966591dc3e811418e96ba04f284c52458 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Tue, 13 Jul 2010 11:11:11 +1000
Subject: drm/radeon: add basic zmask/hiz support (v4)

This interface allows userspace to request hyperz support, it probably
needs more locking, and really reporting that you can have hyperz is racy
since someone else might get it before you do.

v2: modify so we pass 0 valued packets to let DDX/r300c keep working.
also fixed incorrect 0x4f1c reference.

v3: fixup zb_bw_cntl so older drivers keep working

v4: add locking, fixup SC_HYPERZ_EN - patch stream to disable hiz

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/radeon/r100.c         |  5 ++++
 drivers/gpu/drm/radeon/r100d.h        |  2 ++
 drivers/gpu/drm/radeon/r300.c         | 44 ++++++++++++++++++++++++++++++++---
 drivers/gpu/drm/radeon/r300d.h        |  2 ++
 drivers/gpu/drm/radeon/radeon.h       |  2 ++
 drivers/gpu/drm/radeon/radeon_drv.c   |  2 +-
 drivers/gpu/drm/radeon/radeon_kms.c   | 13 ++++++++++-
 drivers/gpu/drm/radeon/reg_srcs/r300  | 13 -----------
 drivers/gpu/drm/radeon/reg_srcs/r420  | 14 +----------
 drivers/gpu/drm/radeon/reg_srcs/rs600 | 13 -----------
 drivers/gpu/drm/radeon/reg_srcs/rv515 | 13 -----------
 include/drm/radeon_drm.h              |  1 +
 12 files changed, 67 insertions(+), 57 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index 4c48df46435..e817a0bb5eb 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -1803,6 +1803,11 @@ static int r100_packet3_check(struct radeon_cs_parser *p,
 			return r;
 		break;
 		/* triggers drawing using indices to vertex buffer */
+	case PACKET3_3D_CLEAR_HIZ:
+	case PACKET3_3D_CLEAR_ZMASK:
+		if (p->rdev->hyperz_filp != p->filp)
+			return -EINVAL;
+		break;
 	case PACKET3_NOP:
 		break;
 	default:
diff --git a/drivers/gpu/drm/radeon/r100d.h b/drivers/gpu/drm/radeon/r100d.h
index d016b16fa11..b121b6c678d 100644
--- a/drivers/gpu/drm/radeon/r100d.h
+++ b/drivers/gpu/drm/radeon/r100d.h
@@ -48,10 +48,12 @@
 #define		PACKET3_3D_DRAW_IMMD		0x29
 #define		PACKET3_3D_DRAW_INDX		0x2A
 #define		PACKET3_3D_LOAD_VBPNTR		0x2F
+#define		PACKET3_3D_CLEAR_ZMASK		0x32
 #define		PACKET3_INDX_BUFFER		0x33
 #define		PACKET3_3D_DRAW_VBUF_2		0x34
 #define		PACKET3_3D_DRAW_IMMD_2		0x35
 #define		PACKET3_3D_DRAW_INDX_2		0x36
+#define		PACKET3_3D_CLEAR_HIZ		0x37
 #define		PACKET3_BITBLT_MULTI		0x9B
 
 #define PACKET0(reg, n)	(CP_PACKET0 |					\
diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c
index 58eab5d4730..c827738ad7d 100644
--- a/drivers/gpu/drm/radeon/r300.c
+++ b/drivers/gpu/drm/radeon/r300.c
@@ -1048,14 +1048,47 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
 		/* RB3D_COLOR_CHANNEL_MASK */
 		track->color_channel_mask = idx_value;
 		break;
-	case 0x4d1c:
+	case 0x43a4:
+		/* SC_HYPERZ_EN */
+		/* r300c emits this register - we need to disable hyperz for it
+		 * without complaining */
+		if (p->rdev->hyperz_filp != p->filp) {
+			if (idx_value & 0x1)
+				ib[idx] = idx_value & ~1;
+		}
+		break;
+	case 0x4f1c:
 		/* ZB_BW_CNTL */
 		track->zb_cb_clear = !!(idx_value & (1 << 5));
+		if (p->rdev->hyperz_filp != p->filp) {
+			if (idx_value & (R300_HIZ_ENABLE |
+					 R300_RD_COMP_ENABLE |
+					 R300_WR_COMP_ENABLE |
+					 R300_FAST_FILL_ENABLE))
+				goto fail;
+		}
 		break;
 	case 0x4e04:
 		/* RB3D_BLENDCNTL */
 		track->blend_read_enable = !!(idx_value & (1 << 2));
 		break;
+	case 0x4f28: /* ZB_DEPTHCLEARVALUE */
+		break;
+	case 0x4f30: /* ZB_MASK_OFFSET */
+	case 0x4f34: /* ZB_ZMASK_PITCH */
+	case 0x4f44: /* ZB_HIZ_OFFSET */
+	case 0x4f54: /* ZB_HIZ_PITCH */
+		if (idx_value && (p->rdev->hyperz_filp != p->filp))
+			goto fail;
+		break;
+	case 0x4028:
+		if (idx_value && (p->rdev->hyperz_filp != p->filp))
+			goto fail;
+		/* GB_Z_PEQ_CONFIG */
+		if (p->rdev->family >= CHIP_RV350)
+			break;
+		goto fail;
+		break;
 	case 0x4be8:
 		/* valid register only on RV530 */
 		if (p->rdev->family == CHIP_RV530)
@@ -1066,8 +1099,8 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
 	}
 	return 0;
 fail:
-	printk(KERN_ERR "Forbidden register 0x%04X in cs at %d\n",
-	       reg, idx);
+	printk(KERN_ERR "Forbidden register 0x%04X in cs at %d (val=%08x)\n",
+	       reg, idx, idx_value);
 	return -EINVAL;
 }
 
@@ -1161,6 +1194,11 @@ static int r300_packet3_check(struct radeon_cs_parser *p,
 			return r;
 		}
 		break;
+	case PACKET3_3D_CLEAR_HIZ:
+	case PACKET3_3D_CLEAR_ZMASK:
+		if (p->rdev->hyperz_filp != p->filp)
+			return -EINVAL;
+		break;
 	case PACKET3_NOP:
 		break;
 	default:
diff --git a/drivers/gpu/drm/radeon/r300d.h b/drivers/gpu/drm/radeon/r300d.h
index 968a33317fb..0c036c60d9d 100644
--- a/drivers/gpu/drm/radeon/r300d.h
+++ b/drivers/gpu/drm/radeon/r300d.h
@@ -48,10 +48,12 @@
 #define		PACKET3_3D_DRAW_IMMD		0x29
 #define		PACKET3_3D_DRAW_INDX		0x2A
 #define		PACKET3_3D_LOAD_VBPNTR		0x2F
+#define		PACKET3_3D_CLEAR_ZMASK		0x32
 #define		PACKET3_INDX_BUFFER		0x33
 #define		PACKET3_3D_DRAW_VBUF_2		0x34
 #define		PACKET3_3D_DRAW_IMMD_2		0x35
 #define		PACKET3_3D_DRAW_INDX_2		0x36
+#define		PACKET3_3D_CLEAR_HIZ		0x37
 #define		PACKET3_BITBLT_MULTI		0x9B
 
 #define PACKET0(reg, n)	(CP_PACKET0 |					\
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index c84f9a31155..368fecf0c2b 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -1098,6 +1098,8 @@ struct radeon_device {
 
 	bool powered_down;
 	struct notifier_block acpi_nb;
+	/* only one userspace can use Hyperz features at a time */
+	struct drm_file *hyperz_filp;
 };
 
 int radeon_device_init(struct radeon_device *rdev,
diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c
index 6f8a2e57287..795403b0e2c 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.c
+++ b/drivers/gpu/drm/radeon/radeon_drv.c
@@ -46,7 +46,7 @@
  * - 2.3.0 - add MSPOS + 3D texture + r500 VAP regs
  * - 2.4.0 - add crtc id query
  * - 2.5.0 - add get accel 2 to work around ddx breakage for evergreen
- * - 2.6.0 - add tiling config query (r6xx+)
+ * - 2.6.0 - add tiling config query (r6xx+), add initial HiZ support (r300->r500)
  */
 #define KMS_DRIVER_MAJOR	2
 #define KMS_DRIVER_MINOR	6
diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c
index dd0a78e954a..e5b70542738 100644
--- a/drivers/gpu/drm/radeon/radeon_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_kms.c
@@ -159,6 +159,15 @@ int radeon_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 			DRM_DEBUG_KMS("tiling config is r6xx+ only!\n");
 			return -EINVAL;
 		}
+	case RADEON_INFO_WANT_HYPERZ:
+		mutex_lock(&dev->struct_mutex);
+		if (rdev->hyperz_filp)
+			value = 0;
+		else {
+			rdev->hyperz_filp = filp;
+			value = 1;
+		}
+		mutex_unlock(&dev->struct_mutex);
 		break;
 	default:
 		DRM_DEBUG_KMS("Invalid request %d\n", info->request);
@@ -199,9 +208,11 @@ void radeon_driver_postclose_kms(struct drm_device *dev,
 void radeon_driver_preclose_kms(struct drm_device *dev,
 				struct drm_file *file_priv)
 {
+	struct radeon_device *rdev = dev->dev_private;
+	if (rdev->hyperz_filp == file_priv)
+		rdev->hyperz_filp = NULL;
 }
 
-
 /*
  * VBlank related functions.
  */
diff --git a/drivers/gpu/drm/radeon/reg_srcs/r300 b/drivers/gpu/drm/radeon/reg_srcs/r300
index 1e97b2d129f..b506ec1cab4 100644
--- a/drivers/gpu/drm/radeon/reg_srcs/r300
+++ b/drivers/gpu/drm/radeon/reg_srcs/r300
@@ -187,7 +187,6 @@ r300 0x4f60
 0x4364 RS_INST_13
 0x4368 RS_INST_14
 0x436C RS_INST_15
-0x43A4 SC_HYPERZ_EN
 0x43A8 SC_EDGERULE
 0x43B0 SC_CLIP_0_A
 0x43B4 SC_CLIP_0_B
@@ -716,16 +715,4 @@ r300 0x4f60
 0x4F08 ZB_STENCILREFMASK
 0x4F14 ZB_ZTOP
 0x4F18 ZB_ZCACHE_CTLSTAT
-0x4F1C ZB_BW_CNTL
-0x4F28 ZB_DEPTHCLEARVALUE
-0x4F30 ZB_ZMASK_OFFSET
-0x4F34 ZB_ZMASK_PITCH
-0x4F38 ZB_ZMASK_WRINDEX
-0x4F3C ZB_ZMASK_DWORD
-0x4F40 ZB_ZMASK_RDINDEX
-0x4F44 ZB_HIZ_OFFSET
-0x4F48 ZB_HIZ_WRINDEX
-0x4F4C ZB_HIZ_DWORD
-0x4F50 ZB_HIZ_RDINDEX
-0x4F54 ZB_HIZ_PITCH
 0x4F58 ZB_ZPASS_DATA
diff --git a/drivers/gpu/drm/radeon/reg_srcs/r420 b/drivers/gpu/drm/radeon/reg_srcs/r420
index e958980d00f..8c1214c2390 100644
--- a/drivers/gpu/drm/radeon/reg_srcs/r420
+++ b/drivers/gpu/drm/radeon/reg_srcs/r420
@@ -130,6 +130,7 @@ r420 0x4f60
 0x401C GB_SELECT
 0x4020 GB_AA_CONFIG
 0x4024 GB_FIFO_SIZE
+0x4028 GB_Z_PEQ_CONFIG
 0x4100 TX_INVALTAGS
 0x4200 GA_POINT_S0
 0x4204 GA_POINT_T0
@@ -187,7 +188,6 @@ r420 0x4f60
 0x4364 RS_INST_13
 0x4368 RS_INST_14
 0x436C RS_INST_15
-0x43A4 SC_HYPERZ_EN
 0x43A8 SC_EDGERULE
 0x43B0 SC_CLIP_0_A
 0x43B4 SC_CLIP_0_B
@@ -782,16 +782,4 @@ r420 0x4f60
 0x4F08 ZB_STENCILREFMASK
 0x4F14 ZB_ZTOP
 0x4F18 ZB_ZCACHE_CTLSTAT
-0x4F1C ZB_BW_CNTL
-0x4F28 ZB_DEPTHCLEARVALUE
-0x4F30 ZB_ZMASK_OFFSET
-0x4F34 ZB_ZMASK_PITCH
-0x4F38 ZB_ZMASK_WRINDEX
-0x4F3C ZB_ZMASK_DWORD
-0x4F40 ZB_ZMASK_RDINDEX
-0x4F44 ZB_HIZ_OFFSET
-0x4F48 ZB_HIZ_WRINDEX
-0x4F4C ZB_HIZ_DWORD
-0x4F50 ZB_HIZ_RDINDEX
-0x4F54 ZB_HIZ_PITCH
 0x4F58 ZB_ZPASS_DATA
diff --git a/drivers/gpu/drm/radeon/reg_srcs/rs600 b/drivers/gpu/drm/radeon/reg_srcs/rs600
index 83e8bc0c2bb..0828d80396f 100644
--- a/drivers/gpu/drm/radeon/reg_srcs/rs600
+++ b/drivers/gpu/drm/radeon/reg_srcs/rs600
@@ -187,7 +187,6 @@ rs600 0x6d40
 0x4364 RS_INST_13
 0x4368 RS_INST_14
 0x436C RS_INST_15
-0x43A4 SC_HYPERZ_EN
 0x43A8 SC_EDGERULE
 0x43B0 SC_CLIP_0_A
 0x43B4 SC_CLIP_0_B
@@ -782,16 +781,4 @@ rs600 0x6d40
 0x4F08 ZB_STENCILREFMASK
 0x4F14 ZB_ZTOP
 0x4F18 ZB_ZCACHE_CTLSTAT
-0x4F1C ZB_BW_CNTL
-0x4F28 ZB_DEPTHCLEARVALUE
-0x4F30 ZB_ZMASK_OFFSET
-0x4F34 ZB_ZMASK_PITCH
-0x4F38 ZB_ZMASK_WRINDEX
-0x4F3C ZB_ZMASK_DWORD
-0x4F40 ZB_ZMASK_RDINDEX
-0x4F44 ZB_HIZ_OFFSET
-0x4F48 ZB_HIZ_WRINDEX
-0x4F4C ZB_HIZ_DWORD
-0x4F50 ZB_HIZ_RDINDEX
-0x4F54 ZB_HIZ_PITCH
 0x4F58 ZB_ZPASS_DATA
diff --git a/drivers/gpu/drm/radeon/reg_srcs/rv515 b/drivers/gpu/drm/radeon/reg_srcs/rv515
index 1e46233985e..8293855f5f0 100644
--- a/drivers/gpu/drm/radeon/reg_srcs/rv515
+++ b/drivers/gpu/drm/radeon/reg_srcs/rv515
@@ -235,7 +235,6 @@ rv515 0x6d40
 0x4354 RS_INST_13
 0x4358 RS_INST_14
 0x435C RS_INST_15
-0x43A4 SC_HYPERZ_EN
 0x43A8 SC_EDGERULE
 0x43B0 SC_CLIP_0_A
 0x43B4 SC_CLIP_0_B
@@ -479,17 +478,5 @@ rv515 0x6d40
 0x4F08 ZB_STENCILREFMASK
 0x4F14 ZB_ZTOP
 0x4F18 ZB_ZCACHE_CTLSTAT
-0x4F1C ZB_BW_CNTL
-0x4F28 ZB_DEPTHCLEARVALUE
-0x4F30 ZB_ZMASK_OFFSET
-0x4F34 ZB_ZMASK_PITCH
-0x4F38 ZB_ZMASK_WRINDEX
-0x4F3C ZB_ZMASK_DWORD
-0x4F40 ZB_ZMASK_RDINDEX
-0x4F44 ZB_HIZ_OFFSET
-0x4F48 ZB_HIZ_WRINDEX
-0x4F4C ZB_HIZ_DWORD
-0x4F50 ZB_HIZ_RDINDEX
-0x4F54 ZB_HIZ_PITCH
 0x4F58 ZB_ZPASS_DATA
 0x4FD4 ZB_STENCILREFMASK_BF
diff --git a/include/drm/radeon_drm.h b/include/drm/radeon_drm.h
index ac5f0403d53..0acaf8f9143 100644
--- a/include/drm/radeon_drm.h
+++ b/include/drm/radeon_drm.h
@@ -905,6 +905,7 @@ struct drm_radeon_cs {
 #define RADEON_INFO_CRTC_FROM_ID	0x04
 #define RADEON_INFO_ACCEL_WORKING2	0x05
 #define RADEON_INFO_TILING_CONFIG	0x06
+#define RADEON_INFO_WANT_HYPERZ		0x07
 
 struct drm_radeon_info {
 	uint32_t		request;
-- 
cgit v1.2.3-70-g09d2


From 5689cc53fa9d09b5bf41b1b1a7c90bd6c112ab40 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Thu, 1 Jul 2010 16:00:12 +0200
Subject: KVM: Use u64 for frame data types

For 32bit machines where the physical address width is
larger than the virtual address width the frame number types
in KVM may overflow. Fix this by changing them to u64.

[sfr: fix build on 32-bit ppc]

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/powerpc/kvm/44x_tlb.c | 3 ++-
 include/linux/kvm_types.h  | 4 ++--
 virt/kvm/iommu.c           | 2 +-
 3 files changed, 5 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c
index 812312542e5..9b9b5cdea84 100644
--- a/arch/powerpc/kvm/44x_tlb.c
+++ b/arch/powerpc/kvm/44x_tlb.c
@@ -316,7 +316,8 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr,
 	gfn = gpaddr >> PAGE_SHIFT;
 	new_page = gfn_to_page(vcpu->kvm, gfn);
 	if (is_error_page(new_page)) {
-		printk(KERN_ERR "Couldn't get guest page for gfn %lx!\n", gfn);
+		printk(KERN_ERR "Couldn't get guest page for gfn %llx!\n",
+			(unsigned long long)gfn);
 		kvm_release_page_clean(new_page);
 		return;
 	}
diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
index fb46efbeabe..7ac0d4eee43 100644
--- a/include/linux/kvm_types.h
+++ b/include/linux/kvm_types.h
@@ -32,11 +32,11 @@
 
 typedef unsigned long  gva_t;
 typedef u64            gpa_t;
-typedef unsigned long  gfn_t;
+typedef u64            gfn_t;
 
 typedef unsigned long  hva_t;
 typedef u64            hpa_t;
-typedef unsigned long  hfn_t;
+typedef u64            hfn_t;
 
 typedef hfn_t pfn_t;
 
diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c
index 779559552ce..62a9caf0563 100644
--- a/virt/kvm/iommu.c
+++ b/virt/kvm/iommu.c
@@ -108,7 +108,7 @@ int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
 			      get_order(page_size), flags);
 		if (r) {
 			printk(KERN_ERR "kvm_iommu_map_address:"
-			       "iommu failed to map pfn=%lx\n", pfn);
+			       "iommu failed to map pfn=%llx\n", pfn);
 			goto unmap_pages;
 		}
 
-- 
cgit v1.2.3-70-g09d2


From edba23e51578f7cb6781461568489fc1825db4ac Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Wed, 7 Jul 2010 20:16:45 +0300
Subject: KVM: Return EFAULT from kvm ioctl when guest accesses bad area

Currently if guest access address that belongs to memory slot but is not
backed up by page or page is read only KVM treats it like MMIO access.
Remove that capability. It was never part of the interface and should
not be relied upon.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/mmu.c       |  4 +++-
 include/linux/kvm_host.h |  1 +
 virt/kvm/kvm_main.c      | 28 ++++++++++++++++++++++++----
 3 files changed, 28 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index d8d48329cb8..89d7a2cae53 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2078,7 +2078,9 @@ static int kvm_handle_bad_page(struct kvm *kvm, gfn_t gfn, pfn_t pfn)
 	if (is_hwpoison_pfn(pfn)) {
 		kvm_send_hwpoison_signal(kvm, gfn);
 		return 0;
-	}
+	} else if (is_fault_pfn(pfn))
+		return -EFAULT;
+
 	return 1;
 }
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index e796326f364..8055067b6be 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -269,6 +269,7 @@ extern pfn_t bad_pfn;
 int is_error_page(struct page *page);
 int is_error_pfn(pfn_t pfn);
 int is_hwpoison_pfn(pfn_t pfn);
+int is_fault_pfn(pfn_t pfn);
 int kvm_is_error_hva(unsigned long addr);
 int kvm_set_memory_region(struct kvm *kvm,
 			  struct kvm_userspace_memory_region *mem,
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 630d1224f18..b78b794c103 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -96,6 +96,9 @@ static bool largepages_enabled = true;
 static struct page *hwpoison_page;
 static pfn_t hwpoison_pfn;
 
+static struct page *fault_page;
+static pfn_t fault_pfn;
+
 inline int kvm_is_mmio_pfn(pfn_t pfn)
 {
 	if (pfn_valid(pfn)) {
@@ -815,13 +818,13 @@ EXPORT_SYMBOL_GPL(kvm_disable_largepages);
 
 int is_error_page(struct page *page)
 {
-	return page == bad_page || page == hwpoison_page;
+	return page == bad_page || page == hwpoison_page || page == fault_page;
 }
 EXPORT_SYMBOL_GPL(is_error_page);
 
 int is_error_pfn(pfn_t pfn)
 {
-	return pfn == bad_pfn || pfn == hwpoison_pfn;
+	return pfn == bad_pfn || pfn == hwpoison_pfn || pfn == fault_pfn;
 }
 EXPORT_SYMBOL_GPL(is_error_pfn);
 
@@ -831,6 +834,12 @@ int is_hwpoison_pfn(pfn_t pfn)
 }
 EXPORT_SYMBOL_GPL(is_hwpoison_pfn);
 
+int is_fault_pfn(pfn_t pfn)
+{
+	return pfn == fault_pfn;
+}
+EXPORT_SYMBOL_GPL(is_fault_pfn);
+
 static inline unsigned long bad_hva(void)
 {
 	return PAGE_OFFSET;
@@ -959,8 +968,8 @@ static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr)
 		if (vma == NULL || addr < vma->vm_start ||
 		    !(vma->vm_flags & VM_PFNMAP)) {
 			up_read(&current->mm->mmap_sem);
-			get_page(bad_page);
-			return page_to_pfn(bad_page);
+			get_page(fault_page);
+			return page_to_pfn(fault_page);
 		}
 
 		pfn = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
@@ -2226,6 +2235,15 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
 
 	hwpoison_pfn = page_to_pfn(hwpoison_page);
 
+	fault_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+
+	if (fault_page == NULL) {
+		r = -ENOMEM;
+		goto out_free_0;
+	}
+
+	fault_pfn = page_to_pfn(fault_page);
+
 	if (!zalloc_cpumask_var(&cpus_hardware_enabled, GFP_KERNEL)) {
 		r = -ENOMEM;
 		goto out_free_0;
@@ -2298,6 +2316,8 @@ out_free_1:
 out_free_0a:
 	free_cpumask_var(cpus_hardware_enabled);
 out_free_0:
+	if (fault_page)
+		__free_page(fault_page);
 	if (hwpoison_page)
 		__free_page(hwpoison_page);
 	__free_page(bad_page);
-- 
cgit v1.2.3-70-g09d2


From 4a994358b919c3b14de61be5e30d9edc9089ba3f Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Sun, 11 Jul 2010 15:32:23 +0300
Subject: KVM: Convert mask notifiers to use irqchip/pin instead of gsi

Devices register mask notifier using gsi, but irqchip knows about
irqchip/pin, so conversion from irqchip/pin to gsi should be done before
looking for mask notifier to call.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 include/linux/kvm_host.h |  3 ++-
 virt/kvm/ioapic.c        |  2 +-
 virt/kvm/irq_comm.c      | 12 ++++++++----
 3 files changed, 11 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 8055067b6be..c13cc48697a 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -447,7 +447,8 @@ void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq,
 				    struct kvm_irq_mask_notifier *kimn);
 void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
 				      struct kvm_irq_mask_notifier *kimn);
-void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask);
+void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
+			     bool mask);
 
 #ifdef __KVM_HAVE_IOAPIC
 void kvm_get_intr_delivery_bitmask(struct kvm_ioapic *ioapic,
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index 1149c60b198..0b9df8303dc 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -152,7 +152,7 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
 		update_handled_vectors(ioapic);
 		mask_after = e->fields.mask;
 		if (mask_before != mask_after)
-			kvm_fire_mask_notifiers(ioapic->kvm, index, mask_after);
+			kvm_fire_mask_notifiers(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index, mask_after);
 		if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG
 		    && ioapic->irr & (1 << index))
 			ioapic_service(ioapic, index);
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index 06cf61e729d..369e38010ad 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -279,15 +279,19 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
 	synchronize_rcu();
 }
 
-void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
+void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
+			     bool mask)
 {
 	struct kvm_irq_mask_notifier *kimn;
 	struct hlist_node *n;
+	int gsi;
 
 	rcu_read_lock();
-	hlist_for_each_entry_rcu(kimn, n, &kvm->mask_notifier_list, link)
-		if (kimn->irq == irq)
-			kimn->func(kimn, mask);
+	gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin];
+	if (gsi != -1)
+		hlist_for_each_entry_rcu(kimn, n, &kvm->mask_notifier_list, link)
+			if (kimn->irq == gsi)
+				kimn->func(kimn, mask);
 	rcu_read_unlock();
 }
 
-- 
cgit v1.2.3-70-g09d2


From ea0d3ab239fba48d6e998b19c28d78f765963007 Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Date: Wed, 2 Jun 2010 13:24:43 +0900
Subject: LSM: Remove unused arguments from security_path_truncate().

When commit be6d3e56a6b9b3a4ee44a0685e39e595073c6f0d "introduce new LSM hooks
where vfsmount is available." was proposed, regarding security_path_truncate(),
only "struct file *" argument (which AppArmor wanted to use) was removed.
But length and time_attrs arguments are not used by TOMOYO nor AppArmor.
Thus, let's remove these arguments.

Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Acked-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: James Morris <jmorris@namei.org>
---
 fs/namei.c               |  3 +--
 fs/open.c                |  5 ++---
 include/linux/security.h | 11 +++--------
 security/capability.c    |  3 +--
 security/security.c      |  5 ++---
 security/tomoyo/tomoyo.c |  3 +--
 6 files changed, 10 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/fs/namei.c b/fs/namei.c
index 868d0cb9d47..fe34c2b879f 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1484,8 +1484,7 @@ static int handle_truncate(struct path *path)
 	 */
 	error = locks_verify_locked(inode);
 	if (!error)
-		error = security_path_truncate(path, 0,
-				       ATTR_MTIME|ATTR_CTIME|ATTR_OPEN);
+		error = security_path_truncate(path);
 	if (!error) {
 		error = do_truncate(path->dentry, 0,
 				    ATTR_MTIME|ATTR_CTIME|ATTR_OPEN,
diff --git a/fs/open.c b/fs/open.c
index 5463266db9e..a54ed85209c 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -110,7 +110,7 @@ static long do_sys_truncate(const char __user *pathname, loff_t length)
 
 	error = locks_verify_truncate(inode, NULL, length);
 	if (!error)
-		error = security_path_truncate(&path, length, 0);
+		error = security_path_truncate(&path);
 	if (!error)
 		error = do_truncate(path.dentry, length, 0, NULL);
 
@@ -165,8 +165,7 @@ static long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
 
 	error = locks_verify_truncate(inode, file, length);
 	if (!error)
-		error = security_path_truncate(&file->f_path, length,
-					       ATTR_MTIME|ATTR_CTIME);
+		error = security_path_truncate(&file->f_path);
 	if (!error)
 		error = do_truncate(dentry, length, ATTR_MTIME|ATTR_CTIME, file);
 out_putf:
diff --git a/include/linux/security.h b/include/linux/security.h
index 0c881917046..723a93df756 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -470,8 +470,6 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  * @path_truncate:
  *	Check permission before truncating a file.
  *	@path contains the path structure for the file.
- *	@length is the new length of the file.
- *	@time_attrs is the flags passed to do_truncate().
  *	Return 0 if permission is granted.
  * @inode_getattr:
  *	Check permission before obtaining file attributes.
@@ -1412,8 +1410,7 @@ struct security_operations {
 	int (*path_rmdir) (struct path *dir, struct dentry *dentry);
 	int (*path_mknod) (struct path *dir, struct dentry *dentry, int mode,
 			   unsigned int dev);
-	int (*path_truncate) (struct path *path, loff_t length,
-			      unsigned int time_attrs);
+	int (*path_truncate) (struct path *path);
 	int (*path_symlink) (struct path *dir, struct dentry *dentry,
 			     const char *old_name);
 	int (*path_link) (struct dentry *old_dentry, struct path *new_dir,
@@ -2806,8 +2803,7 @@ int security_path_mkdir(struct path *dir, struct dentry *dentry, int mode);
 int security_path_rmdir(struct path *dir, struct dentry *dentry);
 int security_path_mknod(struct path *dir, struct dentry *dentry, int mode,
 			unsigned int dev);
-int security_path_truncate(struct path *path, loff_t length,
-			   unsigned int time_attrs);
+int security_path_truncate(struct path *path);
 int security_path_symlink(struct path *dir, struct dentry *dentry,
 			  const char *old_name);
 int security_path_link(struct dentry *old_dentry, struct path *new_dir,
@@ -2841,8 +2837,7 @@ static inline int security_path_mknod(struct path *dir, struct dentry *dentry,
 	return 0;
 }
 
-static inline int security_path_truncate(struct path *path, loff_t length,
-					 unsigned int time_attrs)
+static inline int security_path_truncate(struct path *path)
 {
 	return 0;
 }
diff --git a/security/capability.c b/security/capability.c
index 8168e3ecd5b..4aeb699da1b 100644
--- a/security/capability.c
+++ b/security/capability.c
@@ -268,8 +268,7 @@ static int cap_path_rename(struct path *old_path, struct dentry *old_dentry,
 	return 0;
 }
 
-static int cap_path_truncate(struct path *path, loff_t length,
-			     unsigned int time_attrs)
+static int cap_path_truncate(struct path *path)
 {
 	return 0;
 }
diff --git a/security/security.c b/security/security.c
index 351942a4ca0..e8c87b8601b 100644
--- a/security/security.c
+++ b/security/security.c
@@ -417,12 +417,11 @@ int security_path_rename(struct path *old_dir, struct dentry *old_dentry,
 					 new_dentry);
 }
 
-int security_path_truncate(struct path *path, loff_t length,
-			   unsigned int time_attrs)
+int security_path_truncate(struct path *path)
 {
 	if (unlikely(IS_PRIVATE(path->dentry->d_inode)))
 		return 0;
-	return security_ops->path_truncate(path, length, time_attrs);
+	return security_ops->path_truncate(path);
 }
 
 int security_path_chmod(struct dentry *dentry, struct vfsmount *mnt,
diff --git a/security/tomoyo/tomoyo.c b/security/tomoyo/tomoyo.c
index 57d442e7339..7be732cadd4 100644
--- a/security/tomoyo/tomoyo.c
+++ b/security/tomoyo/tomoyo.c
@@ -93,8 +93,7 @@ static int tomoyo_bprm_check_security(struct linux_binprm *bprm)
 	return tomoyo_check_open_permission(domain, &bprm->file->f_path, O_RDONLY);
 }
 
-static int tomoyo_path_truncate(struct path *path, loff_t length,
-				unsigned int time_attrs)
+static int tomoyo_path_truncate(struct path *path)
 {
 	return tomoyo_path_perm(TOMOYO_TYPE_TRUNCATE, path);
 }
-- 
cgit v1.2.3-70-g09d2


From af4f136056c984b0aa67feed7d3170b958370b2f Mon Sep 17 00:00:00 2001
From: Mimi Zohar <zohar@linux.vnet.ibm.com>
Date: Thu, 1 Jul 2010 15:07:43 -0400
Subject: security: move LSM xattrnames to xattr.h

Make the security extended attributes names global. Updated to move
the remaining Smack xattrs.

Signed-off-by: Mimi Zohar <zohar@us.ibm.com>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/capability.h |  3 ---
 include/linux/xattr.h      | 14 ++++++++++++++
 security/selinux/hooks.c   |  3 ---
 security/smack/smack.h     | 10 ----------
 4 files changed, 14 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/linux/capability.h b/include/linux/capability.h
index 39e5ff512fb..90012b9ddbf 100644
--- a/include/linux/capability.h
+++ b/include/linux/capability.h
@@ -49,9 +49,6 @@ typedef struct __user_cap_data_struct {
 } __user *cap_user_data_t;
 
 
-#define XATTR_CAPS_SUFFIX "capability"
-#define XATTR_NAME_CAPS XATTR_SECURITY_PREFIX XATTR_CAPS_SUFFIX
-
 #define VFS_CAP_REVISION_MASK	0xFF000000
 #define VFS_CAP_REVISION_SHIFT	24
 #define VFS_CAP_FLAGS_MASK	~VFS_CAP_REVISION_MASK
diff --git a/include/linux/xattr.h b/include/linux/xattr.h
index 0cfa1e9c4cc..f1e5bde4b35 100644
--- a/include/linux/xattr.h
+++ b/include/linux/xattr.h
@@ -33,6 +33,20 @@
 #define XATTR_USER_PREFIX "user."
 #define XATTR_USER_PREFIX_LEN (sizeof (XATTR_USER_PREFIX) - 1)
 
+/* Security namespace */
+#define XATTR_SELINUX_SUFFIX "selinux"
+#define XATTR_NAME_SELINUX XATTR_SECURITY_PREFIX XATTR_SELINUX_SUFFIX
+
+#define XATTR_SMACK_SUFFIX "SMACK64"
+#define XATTR_SMACK_IPIN "SMACK64IPIN"
+#define XATTR_SMACK_IPOUT "SMACK64IPOUT"
+#define XATTR_NAME_SMACK XATTR_SECURITY_PREFIX XATTR_SMACK_SUFFIX
+#define XATTR_NAME_SMACKIPIN	XATTR_SECURITY_PREFIX XATTR_SMACK_IPIN
+#define XATTR_NAME_SMACKIPOUT	XATTR_SECURITY_PREFIX XATTR_SMACK_IPOUT
+
+#define XATTR_CAPS_SUFFIX "capability"
+#define XATTR_NAME_CAPS XATTR_SECURITY_PREFIX XATTR_CAPS_SUFFIX
+
 struct inode;
 struct dentry;
 
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 0f524b7d102..85338f0c048 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -87,9 +87,6 @@
 #include "netlabel.h"
 #include "audit.h"
 
-#define XATTR_SELINUX_SUFFIX "selinux"
-#define XATTR_NAME_SELINUX XATTR_SECURITY_PREFIX XATTR_SELINUX_SUFFIX
-
 #define NUM_SEL_MNT_OPTS 5
 
 extern int selinux_nlmsg_lookup(u16 sclass, u16 nlmsg_type, u32 *perm);
diff --git a/security/smack/smack.h b/security/smack/smack.h
index c6e9acae72e..43ae747a5aa 100644
--- a/security/smack/smack.h
+++ b/security/smack/smack.h
@@ -123,16 +123,6 @@ struct smack_known {
 #define SMK_FSHAT	"smackfshat="
 #define SMK_FSROOT	"smackfsroot="
 
-/*
- * xattr names
- */
-#define XATTR_SMACK_SUFFIX	"SMACK64"
-#define XATTR_SMACK_IPIN	"SMACK64IPIN"
-#define XATTR_SMACK_IPOUT	"SMACK64IPOUT"
-#define XATTR_NAME_SMACK	XATTR_SECURITY_PREFIX XATTR_SMACK_SUFFIX
-#define XATTR_NAME_SMACKIPIN	XATTR_SECURITY_PREFIX XATTR_SMACK_IPIN
-#define XATTR_NAME_SMACKIPOUT	XATTR_SECURITY_PREFIX XATTR_SMACK_IPOUT
-
 #define SMACK_CIPSO_OPTION 	"-CIPSO"
 
 /*
-- 
cgit v1.2.3-70-g09d2


From 9cfcac810e8993fa7a5bfd24b1a21f1dbbb03a7b Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Fri, 23 Jul 2010 11:43:51 -0400
Subject: vfs: re-introduce MAY_CHDIR

Currently MAY_ACCESS means that filesystems must check the permissions
right then and not rely on cached results or the results of future
operations on the object.  This can be because of a call to sys_access() or
because of a call to chdir() which needs to check search without relying on
any future operations inside that dir.  I plan to use MAY_ACCESS for other
purposes in the security system, so I split the MAY_ACCESS and the
MAY_CHDIR cases.

Signed-off-by: Eric Paris <eparis@redhat.com>
Acked-by:  Stephen D. Smalley <sds@tycho.nsa.gov>
Signed-off-by: James Morris <jmorris@namei.org>
---
 fs/fuse/dir.c      | 2 +-
 fs/nfs/dir.c       | 2 +-
 fs/open.c          | 6 +++---
 include/linux/fs.h | 1 +
 4 files changed, 6 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 3cdc5f78a40..431be0795b6 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1016,7 +1016,7 @@ static int fuse_permission(struct inode *inode, int mask)
 		   exist.  So if permissions are revoked this won't be
 		   noticed immediately, only after the attribute
 		   timeout has expired */
-	} else if (mask & MAY_ACCESS) {
+	} else if (mask & (MAY_ACCESS | MAY_CHDIR)) {
 		err = fuse_access(inode, mask);
 	} else if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) {
 		if (!(inode->i_mode & S_IXUGO)) {
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index e60416d3f81..832e9e23932 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1953,7 +1953,7 @@ int nfs_permission(struct inode *inode, int mask)
 	if ((mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) == 0)
 		goto out;
 	/* Is this sys_access() ? */
-	if (mask & MAY_ACCESS)
+	if (mask & (MAY_ACCESS | MAY_CHDIR))
 		goto force_lookup;
 
 	switch (inode->i_mode & S_IFMT) {
diff --git a/fs/open.c b/fs/open.c
index a54ed85209c..0d1fa3dc0ef 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -366,7 +366,7 @@ SYSCALL_DEFINE1(chdir, const char __user *, filename)
 	if (error)
 		goto out;
 
-	error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_ACCESS);
+	error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_CHDIR);
 	if (error)
 		goto dput_and_out;
 
@@ -395,7 +395,7 @@ SYSCALL_DEFINE1(fchdir, unsigned int, fd)
 	if (!S_ISDIR(inode->i_mode))
 		goto out_putf;
 
-	error = inode_permission(inode, MAY_EXEC | MAY_ACCESS);
+	error = inode_permission(inode, MAY_EXEC | MAY_CHDIR);
 	if (!error)
 		set_fs_pwd(current->fs, &file->f_path);
 out_putf:
@@ -413,7 +413,7 @@ SYSCALL_DEFINE1(chroot, const char __user *, filename)
 	if (error)
 		goto out;
 
-	error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_ACCESS);
+	error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_CHDIR);
 	if (error)
 		goto dput_and_out;
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 68ca1b0491a..7d94b72f034 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -53,6 +53,7 @@ struct inodes_stat_t {
 #define MAY_APPEND 8
 #define MAY_ACCESS 16
 #define MAY_OPEN 32
+#define MAY_CHDIR 64
 
 /*
  * flags in file.f_mode.  Note that FMODE_READ and FMODE_WRITE must correspond
-- 
cgit v1.2.3-70-g09d2


From b782e0a68d17894d9a618ffea55b33639faa6bb4 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Fri, 23 Jul 2010 11:44:03 -0400
Subject: SELinux: special dontaudit for access checks

Currently there are a number of applications (nautilus being the main one) which
calls access() on files in order to determine how they should be displayed.  It
is normal and expected that nautilus will want to see if files are executable
or if they are really read/write-able.  access() should return the real
permission.  SELinux policy checks are done in access() and can result in lots
of AVC denials as policy denies RWX on files which DAC allows.  Currently
SELinux must dontaudit actual attempts to read/write/execute a file in
order to silence these messages (and not flood the logs.)  But dontaudit rules
like that can hide real attacks.  This patch addes a new common file
permission audit_access.  This permission is special in that it is meaningless
and should never show up in an allow rule.  Instead the only place this
permission has meaning is in a dontaudit rule like so:

dontaudit nautilus_t sbin_t:file audit_access

With such a rule if nautilus just checks access() we will still get denied and
thus userspace will still get the correct answer but we will not log the denial.
If nautilus attempted to actually perform one of the forbidden actions
(rather than just querying access(2) about it) we would still log a denial.
This type of dontaudit rule should be used sparingly, as it could be a
method for an attacker to probe the system permissions without detection.

Signed-off-by: Eric Paris <eparis@redhat.com>
Acked-by:  Stephen D. Smalley <sds@tycho.nsa.gov>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/lsm_audit.h           |  5 +++++
 security/selinux/avc.c              | 24 ++++++++++++++++++++++--
 security/selinux/hooks.c            | 20 +++++++++++++++-----
 security/selinux/include/classmap.h |  2 +-
 4 files changed, 43 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/lsm_audit.h b/include/linux/lsm_audit.h
index 6907251d520..788f0ab937a 100644
--- a/include/linux/lsm_audit.h
+++ b/include/linux/lsm_audit.h
@@ -90,6 +90,11 @@ struct common_audit_data {
 			u32 requested;
 			u32 audited;
 			u32 denied;
+			/*
+			 * auditdeny is a bit tricky and unintuitive.  See the
+			 * comments in avc.c for it's meaning and usage.
+			 */
+			u32 auditdeny;
 			struct av_decision *avd;
 			int result;
 		} selinux_audit_data;
diff --git a/security/selinux/avc.c b/security/selinux/avc.c
index 3662b0f15ec..9da6420e205 100644
--- a/security/selinux/avc.c
+++ b/security/selinux/avc.c
@@ -488,9 +488,29 @@ void avc_audit(u32 ssid, u32 tsid,
 	struct common_audit_data stack_data;
 	u32 denied, audited;
 	denied = requested & ~avd->allowed;
-	if (denied)
+	if (denied) {
 		audited = denied & avd->auditdeny;
-	else if (result)
+		/*
+		 * a->selinux_audit_data.auditdeny is TRICKY!  Setting a bit in
+		 * this field means that ANY denials should NOT be audited if
+		 * the policy contains an explicit dontaudit rule for that
+		 * permission.  Take notice that this is unrelated to the
+		 * actual permissions that were denied.  As an example lets
+		 * assume:
+		 *
+		 * denied == READ
+		 * avd.auditdeny & ACCESS == 0 (not set means explicit rule)
+		 * selinux_audit_data.auditdeny & ACCESS == 1
+		 *
+		 * We will NOT audit the denial even though the denied
+		 * permission was READ and the auditdeny checks were for
+		 * ACCESS
+		 */
+		if (a &&
+		    a->selinux_audit_data.auditdeny &&
+		    !(a->selinux_audit_data.auditdeny & avd->auditdeny))
+			audited = 0;
+	} else if (result)
 		audited = denied = requested;
 	else
 		audited = requested & avd->auditallow;
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 0c98846f188..650947a72a2 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -2644,16 +2644,26 @@ static int selinux_inode_follow_link(struct dentry *dentry, struct nameidata *na
 static int selinux_inode_permission(struct inode *inode, int mask)
 {
 	const struct cred *cred = current_cred();
+	struct common_audit_data ad;
+	u32 perms;
+	bool from_access;
 
+	from_access = mask & MAY_ACCESS;
 	mask &= (MAY_READ|MAY_WRITE|MAY_EXEC|MAY_APPEND);
 
-	if (!mask) {
-		/* No permission to check.  Existence test. */
+	/* No permission to check.  Existence test. */
+	if (!mask)
 		return 0;
-	}
 
-	return inode_has_perm(cred, inode,
-			      file_mask_to_av(inode->i_mode, mask), NULL);
+	COMMON_AUDIT_DATA_INIT(&ad, FS);
+	ad.u.fs.inode = inode;
+
+	if (from_access)
+		ad.selinux_audit_data.auditdeny |= FILE__AUDIT_ACCESS;
+
+	perms = file_mask_to_av(inode->i_mode, mask);
+
+	return inode_has_perm(cred, inode, perms, &ad);
 }
 
 static int selinux_inode_setattr(struct dentry *dentry, struct iattr *iattr)
diff --git a/security/selinux/include/classmap.h b/security/selinux/include/classmap.h
index 8b32e959bb2..d64603e10db 100644
--- a/security/selinux/include/classmap.h
+++ b/security/selinux/include/classmap.h
@@ -2,7 +2,7 @@
     "getattr", "setattr", "lock", "relabelfrom", "relabelto", "append"
 
 #define COMMON_FILE_PERMS COMMON_FILE_SOCK_PERMS, "unlink", "link", \
-    "rename", "execute", "swapon", "quotaon", "mounton"
+    "rename", "execute", "swapon", "quotaon", "mounton", "audit_access"
 
 #define COMMON_SOCK_PERMS COMMON_FILE_SOCK_PERMS, "bind", "connect", \
     "listen", "accept", "getopt", "setopt", "shutdown", "recvfrom",  \
-- 
cgit v1.2.3-70-g09d2


From 67012e8209df95a8290d135753ff5145431a666e Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Thu, 29 Jul 2010 14:47:58 -0700
Subject: AppArmor: basic auditing infrastructure.

Update lsm_audit for AppArmor specific data, and add the core routines for
AppArmor uses for auditing.

Signed-off-by: John Johansen <john.johansen@canonical.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/lsm_audit.h         |  27 +++++
 security/apparmor/audit.c         | 215 ++++++++++++++++++++++++++++++++++++++
 security/apparmor/include/audit.h | 123 ++++++++++++++++++++++
 3 files changed, 365 insertions(+)
 create mode 100644 security/apparmor/audit.c
 create mode 100644 security/apparmor/include/audit.h

(limited to 'include')

diff --git a/include/linux/lsm_audit.h b/include/linux/lsm_audit.h
index 788f0ab937a..112a5503335 100644
--- a/include/linux/lsm_audit.h
+++ b/include/linux/lsm_audit.h
@@ -98,6 +98,33 @@ struct common_audit_data {
 			struct av_decision *avd;
 			int result;
 		} selinux_audit_data;
+#endif
+#ifdef CONFIG_SECURITY_APPARMOR
+		struct {
+			int error;
+			int op;
+			int type;
+			void *profile;
+			const char *name;
+			const char *info;
+			union {
+				void *target;
+				struct {
+					long pos;
+					void *target;
+				} iface;
+				struct {
+					int rlim;
+					unsigned long max;
+				} rlim;
+				struct {
+					const char *target;
+					u32 request;
+					u32 denied;
+					uid_t ouid;
+				} fs;
+			};
+		} apparmor_audit_data;
 #endif
 	};
 	/* these callback will be implemented by a specific LSM */
diff --git a/security/apparmor/audit.c b/security/apparmor/audit.c
new file mode 100644
index 00000000000..96502b22b26
--- /dev/null
+++ b/security/apparmor/audit.c
@@ -0,0 +1,215 @@
+/*
+ * AppArmor security module
+ *
+ * This file contains AppArmor auditing functions
+ *
+ * Copyright (C) 1998-2008 Novell/SUSE
+ * Copyright 2009-2010 Canonical Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation, version 2 of the
+ * License.
+ */
+
+#include <linux/audit.h>
+#include <linux/socket.h>
+
+#include "include/apparmor.h"
+#include "include/audit.h"
+#include "include/policy.h"
+
+const char *op_table[] = {
+	"null",
+
+	"sysctl",
+	"capable",
+
+	"unlink",
+	"mkdir",
+	"rmdir",
+	"mknod",
+	"truncate",
+	"link",
+	"symlink",
+	"rename_src",
+	"rename_dest",
+	"chmod",
+	"chown",
+	"getattr",
+	"open",
+
+	"file_perm",
+	"file_lock",
+	"file_mmap",
+	"file_mprotect",
+
+	"create",
+	"post_create",
+	"bind",
+	"connect",
+	"listen",
+	"accept",
+	"sendmsg",
+	"recvmsg",
+	"getsockname",
+	"getpeername",
+	"getsockopt",
+	"setsockopt",
+	"socket_shutdown",
+
+	"ptrace",
+
+	"exec",
+	"change_hat",
+	"change_profile",
+	"change_onexec",
+
+	"setprocattr",
+	"setrlimit",
+
+	"profile_replace",
+	"profile_load",
+	"profile_remove"
+};
+
+const char *audit_mode_names[] = {
+	"normal",
+	"quiet_denied",
+	"quiet",
+	"noquiet",
+	"all"
+};
+
+static char *aa_audit_type[] = {
+	"AUDIT",
+	"ALLOWED",
+	"DENIED",
+	"HINT",
+	"STATUS",
+	"ERROR",
+	"KILLED"
+};
+
+/*
+ * Currently AppArmor auditing is fed straight into the audit framework.
+ *
+ * TODO:
+ * netlink interface for complain mode
+ * user auditing, - send user auditing to netlink interface
+ * system control of whether user audit messages go to system log
+ */
+
+/**
+ * audit_base - core AppArmor function.
+ * @ab: audit buffer to fill (NOT NULL)
+ * @ca: audit structure containing data to audit (NOT NULL)
+ *
+ * Record common AppArmor audit data from @sa
+ */
+static void audit_pre(struct audit_buffer *ab, void *ca)
+{
+	struct common_audit_data *sa = ca;
+	struct task_struct *tsk = sa->tsk ? sa->tsk : current;
+
+	if (aa_g_audit_header) {
+		audit_log_format(ab, "apparmor=");
+		audit_log_string(ab, aa_audit_type[sa->aad.type]);
+	}
+
+	if (sa->aad.op) {
+		audit_log_format(ab, " operation=");
+		audit_log_string(ab, op_table[sa->aad.op]);
+	}
+
+	if (sa->aad.info) {
+		audit_log_format(ab, " info=");
+		audit_log_string(ab, sa->aad.info);
+		if (sa->aad.error)
+			audit_log_format(ab, " error=%d", sa->aad.error);
+	}
+
+	if (sa->aad.profile) {
+		struct aa_profile *profile = sa->aad.profile;
+		pid_t pid;
+		rcu_read_lock();
+		pid = tsk->real_parent->pid;
+		rcu_read_unlock();
+		audit_log_format(ab, " parent=%d", pid);
+		if (profile->ns != root_ns) {
+			audit_log_format(ab, " namespace=");
+			audit_log_untrustedstring(ab, profile->ns->base.hname);
+		}
+		audit_log_format(ab, " profile=");
+		audit_log_untrustedstring(ab, profile->base.hname);
+	}
+
+	if (sa->aad.name) {
+		audit_log_format(ab, " name=");
+		audit_log_untrustedstring(ab, sa->aad.name);
+	}
+}
+
+/**
+ * aa_audit_msg - Log a message to the audit subsystem
+ * @sa: audit event structure (NOT NULL)
+ * @cb: optional callback fn for type specific fields (MAYBE NULL)
+ */
+void aa_audit_msg(int type, struct common_audit_data *sa,
+		  void (*cb) (struct audit_buffer *, void *))
+{
+	sa->aad.type = type;
+	sa->lsm_pre_audit = audit_pre;
+	sa->lsm_post_audit = cb;
+	common_lsm_audit(sa);
+}
+
+/**
+ * aa_audit - Log a profile based audit event to the audit subsystem
+ * @type: audit type for the message
+ * @profile: profile to check against (NOT NULL)
+ * @gfp: allocation flags to use
+ * @sa: audit event (NOT NULL)
+ * @cb: optional callback fn for type specific fields (MAYBE NULL)
+ *
+ * Handle default message switching based off of audit mode flags
+ *
+ * Returns: error on failure
+ */
+int aa_audit(int type, struct aa_profile *profile, gfp_t gfp,
+	     struct common_audit_data *sa,
+	     void (*cb) (struct audit_buffer *, void *))
+{
+	BUG_ON(!profile);
+
+	if (type == AUDIT_APPARMOR_AUTO) {
+		if (likely(!sa->aad.error)) {
+			if (AUDIT_MODE(profile) != AUDIT_ALL)
+				return 0;
+			type = AUDIT_APPARMOR_AUDIT;
+		} else if (COMPLAIN_MODE(profile))
+			type = AUDIT_APPARMOR_ALLOWED;
+		else
+			type = AUDIT_APPARMOR_DENIED;
+	}
+	if (AUDIT_MODE(profile) == AUDIT_QUIET ||
+	    (type == AUDIT_APPARMOR_DENIED &&
+	     AUDIT_MODE(profile) == AUDIT_QUIET))
+		return sa->aad.error;
+
+	if (KILL_MODE(profile) && type == AUDIT_APPARMOR_DENIED)
+		type = AUDIT_APPARMOR_KILL;
+
+	if (!unconfined(profile))
+		sa->aad.profile = profile;
+
+	aa_audit_msg(type, sa, cb);
+
+	if (sa->aad.type == AUDIT_APPARMOR_KILL)
+		(void)send_sig_info(SIGKILL, NULL, sa->tsk ? sa->tsk : current);
+
+	if (sa->aad.type == AUDIT_APPARMOR_ALLOWED)
+		return complain_error(sa->aad.error);
+
+	return sa->aad.error;
+}
diff --git a/security/apparmor/include/audit.h b/security/apparmor/include/audit.h
new file mode 100644
index 00000000000..1951786d32e
--- /dev/null
+++ b/security/apparmor/include/audit.h
@@ -0,0 +1,123 @@
+/*
+ * AppArmor security module
+ *
+ * This file contains AppArmor auditing function definitions.
+ *
+ * Copyright (C) 1998-2008 Novell/SUSE
+ * Copyright 2009-2010 Canonical Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation, version 2 of the
+ * License.
+ */
+
+#ifndef __AA_AUDIT_H
+#define __AA_AUDIT_H
+
+#include <linux/audit.h>
+#include <linux/fs.h>
+#include <linux/lsm_audit.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+
+#include "file.h"
+
+struct aa_profile;
+
+extern const char *audit_mode_names[];
+#define AUDIT_MAX_INDEX 5
+
+#define AUDIT_APPARMOR_AUTO 0	/* auto choose audit message type */
+
+enum audit_mode {
+	AUDIT_NORMAL,		/* follow normal auditing of accesses */
+	AUDIT_QUIET_DENIED,	/* quiet all denied access messages */
+	AUDIT_QUIET,		/* quiet all messages */
+	AUDIT_NOQUIET,		/* do not quiet audit messages */
+	AUDIT_ALL		/* audit all accesses */
+};
+
+enum audit_type {
+	AUDIT_APPARMOR_AUDIT,
+	AUDIT_APPARMOR_ALLOWED,
+	AUDIT_APPARMOR_DENIED,
+	AUDIT_APPARMOR_HINT,
+	AUDIT_APPARMOR_STATUS,
+	AUDIT_APPARMOR_ERROR,
+	AUDIT_APPARMOR_KILL
+};
+
+extern const char *op_table[];
+enum aa_ops {
+	OP_NULL,
+
+	OP_SYSCTL,
+	OP_CAPABLE,
+
+	OP_UNLINK,
+	OP_MKDIR,
+	OP_RMDIR,
+	OP_MKNOD,
+	OP_TRUNC,
+	OP_LINK,
+	OP_SYMLINK,
+	OP_RENAME_SRC,
+	OP_RENAME_DEST,
+	OP_CHMOD,
+	OP_CHOWN,
+	OP_GETATTR,
+	OP_OPEN,
+
+	OP_FPERM,
+	OP_FLOCK,
+	OP_FMMAP,
+	OP_FMPROT,
+
+	OP_CREATE,
+	OP_POST_CREATE,
+	OP_BIND,
+	OP_CONNECT,
+	OP_LISTEN,
+	OP_ACCEPT,
+	OP_SENDMSG,
+	OP_RECVMSG,
+	OP_GETSOCKNAME,
+	OP_GETPEERNAME,
+	OP_GETSOCKOPT,
+	OP_SETSOCKOPT,
+	OP_SOCK_SHUTDOWN,
+
+	OP_PTRACE,
+
+	OP_EXEC,
+	OP_CHANGE_HAT,
+	OP_CHANGE_PROFILE,
+	OP_CHANGE_ONEXEC,
+
+	OP_SETPROCATTR,
+	OP_SETRLIMIT,
+
+	OP_PROF_REPL,
+	OP_PROF_LOAD,
+	OP_PROF_RM,
+};
+
+
+/* define a short hand for apparmor_audit_data portion of common_audit_data */
+#define aad apparmor_audit_data
+
+void aa_audit_msg(int type, struct common_audit_data *sa,
+		  void (*cb) (struct audit_buffer *, void *));
+int aa_audit(int type, struct aa_profile *profile, gfp_t gfp,
+	     struct common_audit_data *sa,
+	     void (*cb) (struct audit_buffer *, void *));
+
+static inline int complain_error(int error)
+{
+	if (error == -EPERM || error == -EACCES)
+		return 0;
+	return error;
+}
+
+#endif /* __AA_AUDIT_H */
-- 
cgit v1.2.3-70-g09d2


From a51dca9cd3bb4ec5a05bfb6feabf024a5c808a37 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Mon, 2 Aug 2010 08:43:25 -0400
Subject: jbd2: Use atomic variables to avoid taking t_handle_lock in
 jbd2_journal_stop

By using an atomic_t for t_updates and t_outstanding credits, this
should allow us to not need to take transaction t_handle_lock in
jbd2_journal_stop().

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/jbd2/checkpoint.c  |  2 +-
 fs/jbd2/commit.c      | 13 +++++-----
 fs/jbd2/transaction.c | 69 +++++++++++++++++++++++++++++----------------------
 include/linux/jbd2.h  |  8 +++---
 4 files changed, 51 insertions(+), 41 deletions(-)

(limited to 'include')

diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 076d1cc44f9..f8cdc02520f 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -775,7 +775,7 @@ void __jbd2_journal_drop_transaction(journal_t *journal, transaction_t *transact
 	J_ASSERT(transaction->t_log_list == NULL);
 	J_ASSERT(transaction->t_checkpoint_list == NULL);
 	J_ASSERT(transaction->t_checkpoint_io_list == NULL);
-	J_ASSERT(transaction->t_updates == 0);
+	J_ASSERT(atomic_read(&transaction->t_updates) == 0);
 	J_ASSERT(journal->j_committing_transaction != transaction);
 	J_ASSERT(journal->j_running_transaction != transaction);
 
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index af056810acb..fbd2c564e91 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -417,12 +417,12 @@ void jbd2_journal_commit_transaction(journal_t *journal)
 					      stats.run.rs_locked);
 
 	spin_lock(&commit_transaction->t_handle_lock);
-	while (commit_transaction->t_updates) {
+	while (atomic_read(&commit_transaction->t_updates)) {
 		DEFINE_WAIT(wait);
 
 		prepare_to_wait(&journal->j_wait_updates, &wait,
 					TASK_UNINTERRUPTIBLE);
-		if (commit_transaction->t_updates) {
+		if (atomic_read(&commit_transaction->t_updates)) {
 			spin_unlock(&commit_transaction->t_handle_lock);
 			spin_unlock(&journal->j_state_lock);
 			schedule();
@@ -433,7 +433,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
 	}
 	spin_unlock(&commit_transaction->t_handle_lock);
 
-	J_ASSERT (commit_transaction->t_outstanding_credits <=
+	J_ASSERT (atomic_read(&commit_transaction->t_outstanding_credits) <=
 			journal->j_max_transaction_buffers);
 
 	/*
@@ -527,11 +527,12 @@ void jbd2_journal_commit_transaction(journal_t *journal)
 	stats.run.rs_logging = jiffies;
 	stats.run.rs_flushing = jbd2_time_diff(stats.run.rs_flushing,
 					       stats.run.rs_logging);
-	stats.run.rs_blocks = commit_transaction->t_outstanding_credits;
+	stats.run.rs_blocks =
+		atomic_read(&commit_transaction->t_outstanding_credits);
 	stats.run.rs_blocks_logged = 0;
 
 	J_ASSERT(commit_transaction->t_nr_buffers <=
-		 commit_transaction->t_outstanding_credits);
+		 atomic_read(&commit_transaction->t_outstanding_credits));
 
 	err = 0;
 	descriptor = NULL;
@@ -616,7 +617,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
 		 * the free space in the log, but this counter is changed
 		 * by jbd2_journal_next_log_block() also.
 		 */
-		commit_transaction->t_outstanding_credits--;
+		atomic_dec(&commit_transaction->t_outstanding_credits);
 
 		/* Bump b_count to prevent truncate from stumbling over
                    the shadowed buffer!  @@@ This can go if we ever get
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 001e95fb0fe..9c64c7ec48d 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -55,6 +55,8 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction)
 	transaction->t_tid = journal->j_transaction_sequence++;
 	transaction->t_expires = jiffies + journal->j_commit_interval;
 	spin_lock_init(&transaction->t_handle_lock);
+	atomic_set(&transaction->t_updates, 0);
+	atomic_set(&transaction->t_outstanding_credits, 0);
 	INIT_LIST_HEAD(&transaction->t_inode_list);
 	INIT_LIST_HEAD(&transaction->t_private_list);
 
@@ -177,7 +179,7 @@ repeat_locked:
 	 * checkpoint to free some more log space.
 	 */
 	spin_lock(&transaction->t_handle_lock);
-	needed = transaction->t_outstanding_credits + nblocks;
+	needed = atomic_read(&transaction->t_outstanding_credits) + nblocks;
 
 	if (needed > journal->j_max_transaction_buffers) {
 		/*
@@ -240,11 +242,12 @@ repeat_locked:
 	}
 
 	handle->h_transaction = transaction;
-	transaction->t_outstanding_credits += nblocks;
-	transaction->t_updates++;
+	atomic_add(nblocks, &transaction->t_outstanding_credits);
+	atomic_inc(&transaction->t_updates);
 	transaction->t_handle_count++;
 	jbd_debug(4, "Handle %p given %d credits (total %d, free %d)\n",
-		  handle, nblocks, transaction->t_outstanding_credits,
+		  handle, nblocks,
+		  atomic_read(&transaction->t_outstanding_credits),
 		  __jbd2_log_space_left(journal));
 	spin_unlock(&transaction->t_handle_lock);
 	spin_unlock(&journal->j_state_lock);
@@ -369,7 +372,7 @@ int jbd2_journal_extend(handle_t *handle, int nblocks)
 	}
 
 	spin_lock(&transaction->t_handle_lock);
-	wanted = transaction->t_outstanding_credits + nblocks;
+	wanted = atomic_read(&transaction->t_outstanding_credits) + nblocks;
 
 	if (wanted > journal->j_max_transaction_buffers) {
 		jbd_debug(3, "denied handle %p %d blocks: "
@@ -384,7 +387,7 @@ int jbd2_journal_extend(handle_t *handle, int nblocks)
 	}
 
 	handle->h_buffer_credits += nblocks;
-	transaction->t_outstanding_credits += nblocks;
+	atomic_add(nblocks, &transaction->t_outstanding_credits);
 	result = 0;
 
 	jbd_debug(3, "extended handle %p by %d\n", handle, nblocks);
@@ -426,15 +429,14 @@ int jbd2__journal_restart(handle_t *handle, int nblocks, int gfp_mask)
 	 * First unlink the handle from its current transaction, and start the
 	 * commit on that.
 	 */
-	J_ASSERT(transaction->t_updates > 0);
+	J_ASSERT(atomic_read(&transaction->t_updates) > 0);
 	J_ASSERT(journal_current_handle() == handle);
 
 	spin_lock(&journal->j_state_lock);
 	spin_lock(&transaction->t_handle_lock);
-	transaction->t_outstanding_credits -= handle->h_buffer_credits;
-	transaction->t_updates--;
-
-	if (!transaction->t_updates)
+	atomic_sub(handle->h_buffer_credits,
+		   &transaction->t_outstanding_credits);
+	if (atomic_dec_and_test(&transaction->t_updates))
 		wake_up(&journal->j_wait_updates);
 	spin_unlock(&transaction->t_handle_lock);
 
@@ -481,7 +483,7 @@ void jbd2_journal_lock_updates(journal_t *journal)
 			break;
 
 		spin_lock(&transaction->t_handle_lock);
-		if (!transaction->t_updates) {
+		if (!atomic_read(&transaction->t_updates)) {
 			spin_unlock(&transaction->t_handle_lock);
 			break;
 		}
@@ -1258,7 +1260,8 @@ int jbd2_journal_stop(handle_t *handle)
 {
 	transaction_t *transaction = handle->h_transaction;
 	journal_t *journal = transaction->t_journal;
-	int err;
+	int err, wait_for_commit = 0;
+	tid_t tid;
 	pid_t pid;
 
 	J_ASSERT(journal_current_handle() == handle);
@@ -1266,7 +1269,7 @@ int jbd2_journal_stop(handle_t *handle)
 	if (is_handle_aborted(handle))
 		err = -EIO;
 	else {
-		J_ASSERT(transaction->t_updates > 0);
+		J_ASSERT(atomic_read(&transaction->t_updates) > 0);
 		err = 0;
 	}
 
@@ -1334,14 +1337,8 @@ int jbd2_journal_stop(handle_t *handle)
 	if (handle->h_sync)
 		transaction->t_synchronous_commit = 1;
 	current->journal_info = NULL;
-	spin_lock(&transaction->t_handle_lock);
-	transaction->t_outstanding_credits -= handle->h_buffer_credits;
-	transaction->t_updates--;
-	if (!transaction->t_updates) {
-		wake_up(&journal->j_wait_updates);
-		if (journal->j_barrier_count)
-			wake_up(&journal->j_wait_transaction_locked);
-	}
+	atomic_sub(handle->h_buffer_credits,
+		   &transaction->t_outstanding_credits);
 
 	/*
 	 * If the handle is marked SYNC, we need to set another commit
@@ -1350,15 +1347,13 @@ int jbd2_journal_stop(handle_t *handle)
 	 * transaction is too old now.
 	 */
 	if (handle->h_sync ||
-			transaction->t_outstanding_credits >
-				journal->j_max_transaction_buffers ||
-			time_after_eq(jiffies, transaction->t_expires)) {
+	    (atomic_read(&transaction->t_outstanding_credits) >
+	     journal->j_max_transaction_buffers) ||
+	    time_after_eq(jiffies, transaction->t_expires)) {
 		/* Do this even for aborted journals: an abort still
 		 * completes the commit thread, it just doesn't write
 		 * anything to disk. */
-		tid_t tid = transaction->t_tid;
 
-		spin_unlock(&transaction->t_handle_lock);
 		jbd_debug(2, "transaction too old, requesting commit for "
 					"handle %p\n", handle);
 		/* This is non-blocking */
@@ -1369,11 +1364,25 @@ int jbd2_journal_stop(handle_t *handle)
 		 * to wait for the commit to complete.
 		 */
 		if (handle->h_sync && !(current->flags & PF_MEMALLOC))
-			err = jbd2_log_wait_commit(journal, tid);
-	} else {
-		spin_unlock(&transaction->t_handle_lock);
+			wait_for_commit = 1;
 	}
 
+	/*
+	 * Once we drop t_updates, if it goes to zero the transaction
+	 * could start commiting on us and eventually disappear.  So
+	 * once we do this, we must not dereference transaction
+	 * pointer again.
+	 */
+	tid = transaction->t_tid;
+	if (atomic_dec_and_test(&transaction->t_updates)) {
+		wake_up(&journal->j_wait_updates);
+		if (journal->j_barrier_count)
+			wake_up(&journal->j_wait_transaction_locked);
+	}
+
+	if (wait_for_commit)
+		err = jbd2_log_wait_commit(journal, tid);
+
 	lock_map_release(&handle->h_lockdep_map);
 
 	jbd2_free_handle(handle);
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 5a72bc75b27..a72ce21de0e 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -601,13 +601,13 @@ struct transaction_s
 	 * Number of outstanding updates running on this transaction
 	 * [t_handle_lock]
 	 */
-	int			t_updates;
+	atomic_t		t_updates;
 
 	/*
 	 * Number of buffers reserved for use by all handles in this transaction
 	 * handle but not yet modified. [t_handle_lock]
 	 */
-	int			t_outstanding_credits;
+	atomic_t		t_outstanding_credits;
 
 	/*
 	 * Forward and backward links for the circular list of all transactions
@@ -1258,8 +1258,8 @@ static inline int jbd_space_needed(journal_t *journal)
 {
 	int nblocks = journal->j_max_transaction_buffers;
 	if (journal->j_committing_transaction)
-		nblocks += journal->j_committing_transaction->
-					t_outstanding_credits;
+		nblocks += atomic_read(&journal->j_committing_transaction->
+				       t_outstanding_credits);
 	return nblocks;
 }
 
-- 
cgit v1.2.3-70-g09d2


From ee92d37861a90b8f14fa621ae5abcfb29a89aaa9 Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Mon, 2 Aug 2010 17:06:19 +0200
Subject: netfilter: nf_conntrack_extend: introduce __nf_ct_ext_exist()

some users of nf_ct_ext_exist() know ct->ext isn't NULL. For these users, the
check for ct->ext isn't necessary, the function __nf_ct_ext_exist() can be
used instead.

the type of the return value of nf_ct_ext_exist() is changed to bool.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/net/netfilter/nf_conntrack_extend.h |  9 +++++++--
 net/netfilter/nf_conntrack_extend.c         | 22 ++++++++++++----------
 2 files changed, 19 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_conntrack_extend.h b/include/net/netfilter/nf_conntrack_extend.h
index 32d15bd6efa..0772d296dfd 100644
--- a/include/net/netfilter/nf_conntrack_extend.h
+++ b/include/net/netfilter/nf_conntrack_extend.h
@@ -28,9 +28,14 @@ struct nf_ct_ext {
 	char data[0];
 };
 
-static inline int nf_ct_ext_exist(const struct nf_conn *ct, u8 id)
+static inline bool __nf_ct_ext_exist(const struct nf_ct_ext *ext, u8 id)
 {
-	return (ct->ext && ct->ext->offset[id]);
+	return !!ext->offset[id];
+}
+
+static inline bool nf_ct_ext_exist(const struct nf_conn *ct, u8 id)
+{
+	return (ct->ext && __nf_ct_ext_exist(ct->ext, id));
 }
 
 static inline void *__nf_ct_ext_find(const struct nf_conn *ct, u8 id)
diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c
index fdc8fb4ae10..7dcf7a40419 100644
--- a/net/netfilter/nf_conntrack_extend.c
+++ b/net/netfilter/nf_conntrack_extend.c
@@ -23,9 +23,10 @@ void __nf_ct_ext_destroy(struct nf_conn *ct)
 {
 	unsigned int i;
 	struct nf_ct_ext_type *t;
+	struct nf_ct_ext *ext = ct->ext;
 
 	for (i = 0; i < NF_CT_EXT_NUM; i++) {
-		if (!nf_ct_ext_exist(ct, i))
+		if (!__nf_ct_ext_exist(ext, i))
 			continue;
 
 		rcu_read_lock();
@@ -73,44 +74,45 @@ static void __nf_ct_ext_free_rcu(struct rcu_head *head)
 
 void *__nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp)
 {
-	struct nf_ct_ext *new;
+	struct nf_ct_ext *old, *new;
 	int i, newlen, newoff;
 	struct nf_ct_ext_type *t;
 
 	/* Conntrack must not be confirmed to avoid races on reallocation. */
 	NF_CT_ASSERT(!nf_ct_is_confirmed(ct));
 
-	if (!ct->ext)
+	old = ct->ext;
+	if (!old)
 		return nf_ct_ext_create(&ct->ext, id, gfp);
 
-	if (nf_ct_ext_exist(ct, id))
+	if (__nf_ct_ext_exist(old, id))
 		return NULL;
 
 	rcu_read_lock();
 	t = rcu_dereference(nf_ct_ext_types[id]);
 	BUG_ON(t == NULL);
 
-	newoff = ALIGN(ct->ext->len, t->align);
+	newoff = ALIGN(old->len, t->align);
 	newlen = newoff + t->len;
 	rcu_read_unlock();
 
-	new = __krealloc(ct->ext, newlen, gfp);
+	new = __krealloc(old, newlen, gfp);
 	if (!new)
 		return NULL;
 
-	if (new != ct->ext) {
+	if (new != old) {
 		for (i = 0; i < NF_CT_EXT_NUM; i++) {
-			if (!nf_ct_ext_exist(ct, i))
+			if (!__nf_ct_ext_exist(old, i))
 				continue;
 
 			rcu_read_lock();
 			t = rcu_dereference(nf_ct_ext_types[i]);
 			if (t && t->move)
 				t->move((void *)new + new->offset[i],
-					(void *)ct->ext + ct->ext->offset[i]);
+					(void *)old + old->offset[i]);
 			rcu_read_unlock();
 		}
-		call_rcu(&ct->ext->rcu, __nf_ct_ext_free_rcu);
+		call_rcu(&old->rcu, __nf_ct_ext_free_rcu);
 		ct->ext = new;
 	}
 
-- 
cgit v1.2.3-70-g09d2


From 5c0d2374a16fcb52096df914ee57720987677be5 Mon Sep 17 00:00:00 2001
From: Simon Horman <horms@verge.net.au>
Date: Mon, 2 Aug 2010 17:12:44 +0200
Subject: ipvs: provide default ip_vs_conn_{in,out}_get_proto

This removes duplicate code by providing a default implementation
which is used by 3 of the 4 modules that provide these call.

Signed-off-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/net/ip_vs.h                   | 12 ++++++++
 net/netfilter/ipvs/ip_vs_conn.c       | 45 ++++++++++++++++++++++++++++
 net/netfilter/ipvs/ip_vs_proto_sctp.c | 53 ++-------------------------------
 net/netfilter/ipvs/ip_vs_proto_tcp.c  | 50 ++-----------------------------
 net/netfilter/ipvs/ip_vs_proto_udp.c  | 56 ++---------------------------------
 5 files changed, 63 insertions(+), 153 deletions(-)

(limited to 'include')

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 1f9e51180bd..a4747a0f730 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -632,10 +632,22 @@ extern struct ip_vs_conn *ip_vs_ct_in_get
 (int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
  const union nf_inet_addr *d_addr, __be16 d_port);
 
+struct ip_vs_conn * ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb,
+					    struct ip_vs_protocol *pp,
+					    const struct ip_vs_iphdr *iph,
+					    unsigned int proto_off,
+					    int inverse);
+
 extern struct ip_vs_conn *ip_vs_conn_out_get
 (int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
  const union nf_inet_addr *d_addr, __be16 d_port);
 
+struct ip_vs_conn * ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb,
+					     struct ip_vs_protocol *pp,
+					     const struct ip_vs_iphdr *iph,
+					     unsigned int proto_off,
+					     int inverse);
+
 /* put back the conn without restarting its timer */
 static inline void __ip_vs_conn_put(struct ip_vs_conn *cp)
 {
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 654544e7226..b71c69a2db1 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -271,6 +271,29 @@ struct ip_vs_conn *ip_vs_conn_in_get
 	return cp;
 }
 
+struct ip_vs_conn *
+ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb,
+			struct ip_vs_protocol *pp,
+			const struct ip_vs_iphdr *iph,
+			unsigned int proto_off, int inverse)
+{
+	__be16 _ports[2], *pptr;
+
+	pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
+	if (pptr == NULL)
+		return NULL;
+
+	if (likely(!inverse))
+		return ip_vs_conn_in_get(af, iph->protocol,
+					 &iph->saddr, pptr[0],
+					 &iph->daddr, pptr[1]);
+	else
+		return ip_vs_conn_in_get(af, iph->protocol,
+					 &iph->daddr, pptr[1],
+					 &iph->saddr, pptr[0]);
+}
+EXPORT_SYMBOL_GPL(ip_vs_conn_in_get_proto);
+
 /* Get reference to connection template */
 struct ip_vs_conn *ip_vs_ct_in_get
 (int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
@@ -356,6 +379,28 @@ struct ip_vs_conn *ip_vs_conn_out_get
 	return ret;
 }
 
+struct ip_vs_conn *
+ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb,
+			 struct ip_vs_protocol *pp,
+			 const struct ip_vs_iphdr *iph,
+			 unsigned int proto_off, int inverse)
+{
+	__be16 _ports[2], *pptr;
+
+	pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
+	if (pptr == NULL)
+		return NULL;
+
+	if (likely(!inverse))
+		return ip_vs_conn_out_get(af, iph->protocol,
+					  &iph->saddr, pptr[0],
+					  &iph->daddr, pptr[1]);
+	else
+		return ip_vs_conn_out_get(af, iph->protocol,
+					  &iph->daddr, pptr[1],
+					  &iph->saddr, pptr[0]);
+}
+EXPORT_SYMBOL_GPL(ip_vs_conn_out_get_proto);
 
 /*
  *      Put back the conn and restart its timer with its timeout
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index db5575967c1..4c0855cb006 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -8,55 +8,6 @@
 #include <net/sctp/checksum.h>
 #include <net/ip_vs.h>
 
-
-static struct ip_vs_conn *
-sctp_conn_in_get(int af,
-		 const struct sk_buff *skb,
-		 struct ip_vs_protocol *pp,
-		 const struct ip_vs_iphdr *iph,
-		 unsigned int proto_off,
-		 int inverse)
-{
-	__be16 _ports[2], *pptr;
-
-	pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
-	if (pptr == NULL)
-		return NULL;
-
-	if (likely(!inverse)) 
-		return ip_vs_conn_in_get(af, iph->protocol,
-					 &iph->saddr, pptr[0],
-					 &iph->daddr, pptr[1]);
-	else 
-		return ip_vs_conn_in_get(af, iph->protocol,
-					 &iph->daddr, pptr[1],
-					 &iph->saddr, pptr[0]);
-}
-
-static struct ip_vs_conn *
-sctp_conn_out_get(int af,
-		  const struct sk_buff *skb,
-		  struct ip_vs_protocol *pp,
-		  const struct ip_vs_iphdr *iph,
-		  unsigned int proto_off,
-		  int inverse)
-{
-	__be16 _ports[2], *pptr;
-
-	pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
-	if (pptr == NULL)
-		return NULL;
-
-	if (likely(!inverse)) 
-		return ip_vs_conn_out_get(af, iph->protocol,
-					  &iph->saddr, pptr[0],
-					  &iph->daddr, pptr[1]);
-	else 
-		return ip_vs_conn_out_get(af, iph->protocol,
-					  &iph->daddr, pptr[1],
-					  &iph->saddr, pptr[0]);
-}
-
 static int
 sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
 		   int *verdict, struct ip_vs_conn **cpp)
@@ -1169,8 +1120,8 @@ struct ip_vs_protocol ip_vs_protocol_sctp = {
 	.register_app = sctp_register_app,
 	.unregister_app = sctp_unregister_app,
 	.conn_schedule = sctp_conn_schedule,
-	.conn_in_get = sctp_conn_in_get,
-	.conn_out_get = sctp_conn_out_get,
+	.conn_in_get = ip_vs_conn_in_get_proto,
+	.conn_out_get = ip_vs_conn_out_get_proto,
 	.snat_handler = sctp_snat_handler,
 	.dnat_handler = sctp_dnat_handler,
 	.csum_check = sctp_csum_check,
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c
index 91d28e07374..282d24de859 100644
--- a/net/netfilter/ipvs/ip_vs_proto_tcp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -27,52 +27,6 @@
 
 #include <net/ip_vs.h>
 
-
-static struct ip_vs_conn *
-tcp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
-		const struct ip_vs_iphdr *iph, unsigned int proto_off,
-		int inverse)
-{
-	__be16 _ports[2], *pptr;
-
-	pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
-	if (pptr == NULL)
-		return NULL;
-
-	if (likely(!inverse)) {
-		return ip_vs_conn_in_get(af, iph->protocol,
-					 &iph->saddr, pptr[0],
-					 &iph->daddr, pptr[1]);
-	} else {
-		return ip_vs_conn_in_get(af, iph->protocol,
-					 &iph->daddr, pptr[1],
-					 &iph->saddr, pptr[0]);
-	}
-}
-
-static struct ip_vs_conn *
-tcp_conn_out_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
-		 const struct ip_vs_iphdr *iph, unsigned int proto_off,
-		 int inverse)
-{
-	__be16 _ports[2], *pptr;
-
-	pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
-	if (pptr == NULL)
-		return NULL;
-
-	if (likely(!inverse)) {
-		return ip_vs_conn_out_get(af, iph->protocol,
-					  &iph->saddr, pptr[0],
-					  &iph->daddr, pptr[1]);
-	} else {
-		return ip_vs_conn_out_get(af, iph->protocol,
-					  &iph->daddr, pptr[1],
-					  &iph->saddr, pptr[0]);
-	}
-}
-
-
 static int
 tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
 		  int *verdict, struct ip_vs_conn **cpp)
@@ -721,8 +675,8 @@ struct ip_vs_protocol ip_vs_protocol_tcp = {
 	.register_app =		tcp_register_app,
 	.unregister_app =	tcp_unregister_app,
 	.conn_schedule =	tcp_conn_schedule,
-	.conn_in_get =		tcp_conn_in_get,
-	.conn_out_get =		tcp_conn_out_get,
+	.conn_in_get =		ip_vs_conn_in_get_proto,
+	.conn_out_get =		ip_vs_conn_out_get_proto,
 	.snat_handler =		tcp_snat_handler,
 	.dnat_handler =		tcp_dnat_handler,
 	.csum_check =		tcp_csum_check,
diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c
index e7a6885e016..8553231b5d4 100644
--- a/net/netfilter/ipvs/ip_vs_proto_udp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_udp.c
@@ -27,58 +27,6 @@
 #include <net/ip.h>
 #include <net/ip6_checksum.h>
 
-static struct ip_vs_conn *
-udp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
-		const struct ip_vs_iphdr *iph, unsigned int proto_off,
-		int inverse)
-{
-	struct ip_vs_conn *cp;
-	__be16 _ports[2], *pptr;
-
-	pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
-	if (pptr == NULL)
-		return NULL;
-
-	if (likely(!inverse)) {
-		cp = ip_vs_conn_in_get(af, iph->protocol,
-				       &iph->saddr, pptr[0],
-				       &iph->daddr, pptr[1]);
-	} else {
-		cp = ip_vs_conn_in_get(af, iph->protocol,
-				       &iph->daddr, pptr[1],
-				       &iph->saddr, pptr[0]);
-	}
-
-	return cp;
-}
-
-
-static struct ip_vs_conn *
-udp_conn_out_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
-		 const struct ip_vs_iphdr *iph, unsigned int proto_off,
-		 int inverse)
-{
-	struct ip_vs_conn *cp;
-	__be16 _ports[2], *pptr;
-
-	pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
-	if (pptr == NULL)
-		return NULL;
-
-	if (likely(!inverse)) {
-		cp = ip_vs_conn_out_get(af, iph->protocol,
-					&iph->saddr, pptr[0],
-					&iph->daddr, pptr[1]);
-	} else {
-		cp = ip_vs_conn_out_get(af, iph->protocol,
-					&iph->daddr, pptr[1],
-					&iph->saddr, pptr[0]);
-	}
-
-	return cp;
-}
-
-
 static int
 udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
 		  int *verdict, struct ip_vs_conn **cpp)
@@ -520,8 +468,8 @@ struct ip_vs_protocol ip_vs_protocol_udp = {
 	.init =			udp_init,
 	.exit =			udp_exit,
 	.conn_schedule =	udp_conn_schedule,
-	.conn_in_get =		udp_conn_in_get,
-	.conn_out_get =		udp_conn_out_get,
+	.conn_in_get =		ip_vs_conn_in_get_proto,
+	.conn_out_get =		ip_vs_conn_out_get_proto,
 	.snat_handler =		udp_snat_handler,
 	.dnat_handler =		udp_dnat_handler,
 	.csum_check =		udp_csum_check,
-- 
cgit v1.2.3-70-g09d2


From f43dc98b3be36551143e3bbaf1bb3067835c24f4 Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Mon, 2 Aug 2010 17:20:54 +0200
Subject: netfilter: nf_nat: make unique_tuple return void

The only user of unique_tuple() get_unique_tuple() doesn't care about the
return value of unique_tuple(), so make unique_tuple() return void (nothing).

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/net/netfilter/nf_nat_protocol.h   | 8 ++++----
 net/ipv4/netfilter/nf_nat_proto_common.c  | 8 ++++----
 net/ipv4/netfilter/nf_nat_proto_dccp.c    | 6 +++---
 net/ipv4/netfilter/nf_nat_proto_gre.c     | 8 ++++----
 net/ipv4/netfilter/nf_nat_proto_icmp.c    | 6 +++---
 net/ipv4/netfilter/nf_nat_proto_sctp.c    | 6 +++---
 net/ipv4/netfilter/nf_nat_proto_tcp.c     | 5 ++---
 net/ipv4/netfilter/nf_nat_proto_udp.c     | 5 ++---
 net/ipv4/netfilter/nf_nat_proto_udplite.c | 6 +++---
 net/ipv4/netfilter/nf_nat_proto_unknown.c | 4 ++--
 10 files changed, 30 insertions(+), 32 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_nat_protocol.h b/include/net/netfilter/nf_nat_protocol.h
index c398017ccfa..df17bac46bf 100644
--- a/include/net/netfilter/nf_nat_protocol.h
+++ b/include/net/netfilter/nf_nat_protocol.h
@@ -27,9 +27,9 @@ struct nf_nat_protocol {
 
 	/* Alter the per-proto part of the tuple (depending on
 	   maniptype), to give a unique tuple in the given range if
-	   possible; return false if not.  Per-protocol part of tuple
-	   is initialized to the incoming packet. */
-	bool (*unique_tuple)(struct nf_conntrack_tuple *tuple,
+	   possible.  Per-protocol part of tuple is initialized to the
+	   incoming packet. */
+	void (*unique_tuple)(struct nf_conntrack_tuple *tuple,
 			     const struct nf_nat_range *range,
 			     enum nf_nat_manip_type maniptype,
 			     const struct nf_conn *ct);
@@ -63,7 +63,7 @@ extern bool nf_nat_proto_in_range(const struct nf_conntrack_tuple *tuple,
 				  const union nf_conntrack_man_proto *min,
 				  const union nf_conntrack_man_proto *max);
 
-extern bool nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple,
+extern void nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple,
 				      const struct nf_nat_range *range,
 				      enum nf_nat_manip_type maniptype,
 				      const struct nf_conn *ct,
diff --git a/net/ipv4/netfilter/nf_nat_proto_common.c b/net/ipv4/netfilter/nf_nat_proto_common.c
index 6c4f11f5144..2844a0383a1 100644
--- a/net/ipv4/netfilter/nf_nat_proto_common.c
+++ b/net/ipv4/netfilter/nf_nat_proto_common.c
@@ -34,7 +34,7 @@ bool nf_nat_proto_in_range(const struct nf_conntrack_tuple *tuple,
 }
 EXPORT_SYMBOL_GPL(nf_nat_proto_in_range);
 
-bool nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple,
+void nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple,
 			       const struct nf_nat_range *range,
 			       enum nf_nat_manip_type maniptype,
 			       const struct nf_conn *ct,
@@ -53,7 +53,7 @@ bool nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple,
 	if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) {
 		/* If it's dst rewrite, can't change port */
 		if (maniptype == IP_NAT_MANIP_DST)
-			return false;
+			return;
 
 		if (ntohs(*portptr) < 1024) {
 			/* Loose convention: >> 512 is credential passing */
@@ -87,9 +87,9 @@ bool nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple,
 			continue;
 		if (!(range->flags & IP_NAT_RANGE_PROTO_RANDOM))
 			*rover = off;
-		return true;
+		return;
 	}
-	return false;
+	return;
 }
 EXPORT_SYMBOL_GPL(nf_nat_proto_unique_tuple);
 
diff --git a/net/ipv4/netfilter/nf_nat_proto_dccp.c b/net/ipv4/netfilter/nf_nat_proto_dccp.c
index 22485ce306d..570faf2667b 100644
--- a/net/ipv4/netfilter/nf_nat_proto_dccp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_dccp.c
@@ -22,14 +22,14 @@
 
 static u_int16_t dccp_port_rover;
 
-static bool
+static void
 dccp_unique_tuple(struct nf_conntrack_tuple *tuple,
 		  const struct nf_nat_range *range,
 		  enum nf_nat_manip_type maniptype,
 		  const struct nf_conn *ct)
 {
-	return nf_nat_proto_unique_tuple(tuple, range, maniptype, ct,
-					 &dccp_port_rover);
+	nf_nat_proto_unique_tuple(tuple, range, maniptype, ct,
+				  &dccp_port_rover);
 }
 
 static bool
diff --git a/net/ipv4/netfilter/nf_nat_proto_gre.c b/net/ipv4/netfilter/nf_nat_proto_gre.c
index d7e89201351..89933ab6f63 100644
--- a/net/ipv4/netfilter/nf_nat_proto_gre.c
+++ b/net/ipv4/netfilter/nf_nat_proto_gre.c
@@ -37,7 +37,7 @@ MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
 MODULE_DESCRIPTION("Netfilter NAT protocol helper module for GRE");
 
 /* generate unique tuple ... */
-static bool
+static void
 gre_unique_tuple(struct nf_conntrack_tuple *tuple,
 		 const struct nf_nat_range *range,
 		 enum nf_nat_manip_type maniptype,
@@ -50,7 +50,7 @@ gre_unique_tuple(struct nf_conntrack_tuple *tuple,
 	/* If there is no master conntrack we are not PPTP,
 	   do not change tuples */
 	if (!ct->master)
-		return false;
+		return;
 
 	if (maniptype == IP_NAT_MANIP_SRC)
 		keyptr = &tuple->src.u.gre.key;
@@ -71,11 +71,11 @@ gre_unique_tuple(struct nf_conntrack_tuple *tuple,
 	for (i = 0; i < range_size; i++, key++) {
 		*keyptr = htons(min + key % range_size);
 		if (!nf_nat_used_tuple(tuple, ct))
-			return true;
+			return;
 	}
 
 	pr_debug("%p: no NAT mapping\n", ct);
-	return false;
+	return;
 }
 
 /* manipulate a GRE packet according to maniptype */
diff --git a/net/ipv4/netfilter/nf_nat_proto_icmp.c b/net/ipv4/netfilter/nf_nat_proto_icmp.c
index 19a8b0b07d8..97003fe312e 100644
--- a/net/ipv4/netfilter/nf_nat_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_icmp.c
@@ -27,7 +27,7 @@ icmp_in_range(const struct nf_conntrack_tuple *tuple,
 	       ntohs(tuple->src.u.icmp.id) <= ntohs(max->icmp.id);
 }
 
-static bool
+static void
 icmp_unique_tuple(struct nf_conntrack_tuple *tuple,
 		  const struct nf_nat_range *range,
 		  enum nf_nat_manip_type maniptype,
@@ -46,9 +46,9 @@ icmp_unique_tuple(struct nf_conntrack_tuple *tuple,
 		tuple->src.u.icmp.id = htons(ntohs(range->min.icmp.id) +
 					     (id % range_size));
 		if (!nf_nat_used_tuple(tuple, ct))
-			return true;
+			return;
 	}
-	return false;
+	return;
 }
 
 static bool
diff --git a/net/ipv4/netfilter/nf_nat_proto_sctp.c b/net/ipv4/netfilter/nf_nat_proto_sctp.c
index 3fc598eeeb1..756331d4266 100644
--- a/net/ipv4/netfilter/nf_nat_proto_sctp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_sctp.c
@@ -16,14 +16,14 @@
 
 static u_int16_t nf_sctp_port_rover;
 
-static bool
+static void
 sctp_unique_tuple(struct nf_conntrack_tuple *tuple,
 		  const struct nf_nat_range *range,
 		  enum nf_nat_manip_type maniptype,
 		  const struct nf_conn *ct)
 {
-	return nf_nat_proto_unique_tuple(tuple, range, maniptype, ct,
-					 &nf_sctp_port_rover);
+	nf_nat_proto_unique_tuple(tuple, range, maniptype, ct,
+				  &nf_sctp_port_rover);
 }
 
 static bool
diff --git a/net/ipv4/netfilter/nf_nat_proto_tcp.c b/net/ipv4/netfilter/nf_nat_proto_tcp.c
index 399e2cfa263..aa460a595d5 100644
--- a/net/ipv4/netfilter/nf_nat_proto_tcp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_tcp.c
@@ -20,14 +20,13 @@
 
 static u_int16_t tcp_port_rover;
 
-static bool
+static void
 tcp_unique_tuple(struct nf_conntrack_tuple *tuple,
 		 const struct nf_nat_range *range,
 		 enum nf_nat_manip_type maniptype,
 		 const struct nf_conn *ct)
 {
-	return nf_nat_proto_unique_tuple(tuple, range, maniptype, ct,
-					 &tcp_port_rover);
+	nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, &tcp_port_rover);
 }
 
 static bool
diff --git a/net/ipv4/netfilter/nf_nat_proto_udp.c b/net/ipv4/netfilter/nf_nat_proto_udp.c
index 9e61c79492e..dfe65c7e292 100644
--- a/net/ipv4/netfilter/nf_nat_proto_udp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_udp.c
@@ -19,14 +19,13 @@
 
 static u_int16_t udp_port_rover;
 
-static bool
+static void
 udp_unique_tuple(struct nf_conntrack_tuple *tuple,
 		 const struct nf_nat_range *range,
 		 enum nf_nat_manip_type maniptype,
 		 const struct nf_conn *ct)
 {
-	return nf_nat_proto_unique_tuple(tuple, range, maniptype, ct,
-					 &udp_port_rover);
+	nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, &udp_port_rover);
 }
 
 static bool
diff --git a/net/ipv4/netfilter/nf_nat_proto_udplite.c b/net/ipv4/netfilter/nf_nat_proto_udplite.c
index 440a229bbd8..3cc8c8af39e 100644
--- a/net/ipv4/netfilter/nf_nat_proto_udplite.c
+++ b/net/ipv4/netfilter/nf_nat_proto_udplite.c
@@ -18,14 +18,14 @@
 
 static u_int16_t udplite_port_rover;
 
-static bool
+static void
 udplite_unique_tuple(struct nf_conntrack_tuple *tuple,
 		     const struct nf_nat_range *range,
 		     enum nf_nat_manip_type maniptype,
 		     const struct nf_conn *ct)
 {
-	return nf_nat_proto_unique_tuple(tuple, range, maniptype, ct,
-					 &udplite_port_rover);
+	nf_nat_proto_unique_tuple(tuple, range, maniptype, ct,
+				  &udplite_port_rover);
 }
 
 static bool
diff --git a/net/ipv4/netfilter/nf_nat_proto_unknown.c b/net/ipv4/netfilter/nf_nat_proto_unknown.c
index 14381c62ace..a50f2bc1c73 100644
--- a/net/ipv4/netfilter/nf_nat_proto_unknown.c
+++ b/net/ipv4/netfilter/nf_nat_proto_unknown.c
@@ -26,14 +26,14 @@ static bool unknown_in_range(const struct nf_conntrack_tuple *tuple,
 	return true;
 }
 
-static bool unknown_unique_tuple(struct nf_conntrack_tuple *tuple,
+static void unknown_unique_tuple(struct nf_conntrack_tuple *tuple,
 				 const struct nf_nat_range *range,
 				 enum nf_nat_manip_type maniptype,
 				 const struct nf_conn *ct)
 {
 	/* Sorry: we can't help you; if it's not unique, we can't frob
 	   anything. */
-	return false;
+	return;
 }
 
 static bool
-- 
cgit v1.2.3-70-g09d2


From b126468e08d92aaeffa58ef04d70e417241dadc1 Mon Sep 17 00:00:00 2001
From: Fang Wenqi <anton.fang@gmail.com>
Date: Tue, 1 Jun 2010 02:43:06 +0000
Subject: virtio_9p.h needs <linux/types.h>

Found with makes headers_check:
include/linux/virtio_9p.h:15: found __[us]{8,16,32,64} type without #include <linux/types.h>

Signed-off-by: Fang Wenqi <antonf@turbolinux.com.cn>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 include/linux/virtio_9p.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/virtio_9p.h b/include/linux/virtio_9p.h
index 5cf11765146..395c38a47ad 100644
--- a/include/linux/virtio_9p.h
+++ b/include/linux/virtio_9p.h
@@ -4,6 +4,7 @@
  * compatible drivers/servers. */
 #include <linux/virtio_ids.h>
 #include <linux/virtio_config.h>
+#include <linux/types.h>
 
 /* The feature bitmap for virtio 9P */
 
-- 
cgit v1.2.3-70-g09d2


From 0204fe2a20da12ddae1b564712ceeebc55214f97 Mon Sep 17 00:00:00 2001
From: Jarod Wilson <jarod@redhat.com>
Date: Tue, 1 Jun 2010 17:30:35 -0300
Subject: V4L/DVB: IR: add RC6 keymap for Windows Media Center Ed. remotes

This is the RC6 keymap for the Windows Media Center Edition remotes
that come bundled with MCE/eHome Infrared Remote transceivers. Tested
with 3 different variants of the remote, but its possible there are
still some additional keys missing, but its simple enough to add them
in later...

This patch also adds an IR_TYPE_ALL convenience macro to make life
easier for receivers that support all IR protocols.

v2: fix an erroneous comment that referred to imon devices

Signed-off-by: Jarod Wilson <jarod@redhat.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/IR/keymaps/Makefile     |   1 +
 drivers/media/IR/keymaps/rc-rc6-mce.c | 105 ++++++++++++++++++++++++++++++++++
 include/media/rc-map.h                |   4 ++
 3 files changed, 110 insertions(+)
 create mode 100644 drivers/media/IR/keymaps/rc-rc6-mce.c

(limited to 'include')

diff --git a/drivers/media/IR/keymaps/Makefile b/drivers/media/IR/keymaps/Makefile
index aea649fbcf5..c3def729d75 100644
--- a/drivers/media/IR/keymaps/Makefile
+++ b/drivers/media/IR/keymaps/Makefile
@@ -57,6 +57,7 @@ obj-$(CONFIG_RC_MAP) += rc-adstech-dvb-t-pci.o \
 			rc-pv951.o \
 			rc-rc5-hauppauge-new.o \
 			rc-rc5-tv.o \
+			rc-rc6-mce.o \
 			rc-real-audio-220-32-keys.o \
 			rc-tbs-nec.o \
 			rc-terratec-cinergy-xs.o \
diff --git a/drivers/media/IR/keymaps/rc-rc6-mce.c b/drivers/media/IR/keymaps/rc-rc6-mce.c
new file mode 100644
index 00000000000..c6726a8039b
--- /dev/null
+++ b/drivers/media/IR/keymaps/rc-rc6-mce.c
@@ -0,0 +1,105 @@
+/* rc-rc6-mce.c - Keytable for Windows Media Center RC-6 remotes for use
+ * with the Media Center Edition eHome Infrared Transceiver.
+ *
+ * Copyright (c) 2010 by Jarod Wilson <jarod@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <media/rc-map.h>
+
+static struct ir_scancode rc6_mce[] = {
+	{ 0x800f0415, KEY_REWIND },
+	{ 0x800f0414, KEY_FASTFORWARD },
+	{ 0x800f041b, KEY_PREVIOUS },
+	{ 0x800f041a, KEY_NEXT },
+
+	{ 0x800f0416, KEY_PLAY },
+	{ 0x800f0418, KEY_PAUSE },
+	{ 0x800f0419, KEY_STOP },
+	{ 0x800f0417, KEY_RECORD },
+
+	{ 0x800f041e, KEY_UP },
+	{ 0x800f041f, KEY_DOWN },
+	{ 0x800f0420, KEY_LEFT },
+	{ 0x800f0421, KEY_RIGHT },
+
+	{ 0x800f040b, KEY_ENTER },
+	{ 0x800f0422, KEY_OK },
+	{ 0x800f0423, KEY_EXIT },
+	{ 0x800f040a, KEY_DELETE },
+
+	{ 0x800f040e, KEY_MUTE },
+	{ 0x800f0410, KEY_VOLUMEUP },
+	{ 0x800f0411, KEY_VOLUMEDOWN },
+	{ 0x800f0412, KEY_CHANNELUP },
+	{ 0x800f0413, KEY_CHANNELDOWN },
+
+	{ 0x800f0401, KEY_NUMERIC_1 },
+	{ 0x800f0402, KEY_NUMERIC_2 },
+	{ 0x800f0403, KEY_NUMERIC_3 },
+	{ 0x800f0404, KEY_NUMERIC_4 },
+	{ 0x800f0405, KEY_NUMERIC_5 },
+	{ 0x800f0406, KEY_NUMERIC_6 },
+	{ 0x800f0407, KEY_NUMERIC_7 },
+	{ 0x800f0408, KEY_NUMERIC_8 },
+	{ 0x800f0409, KEY_NUMERIC_9 },
+	{ 0x800f0400, KEY_NUMERIC_0 },
+
+	{ 0x800f041d, KEY_NUMERIC_STAR },
+	{ 0x800f041c, KEY_NUMERIC_POUND },
+
+	{ 0x800f0446, KEY_TV },
+	{ 0x800f0447, KEY_AUDIO }, /* My Music */
+	{ 0x800f0448, KEY_PVR }, /* RecordedTV */
+	{ 0x800f0449, KEY_CAMERA },
+	{ 0x800f044a, KEY_VIDEO },
+	{ 0x800f0424, KEY_DVD },
+	{ 0x800f0425, KEY_TUNER }, /* LiveTV */
+	{ 0x800f0450, KEY_RADIO },
+
+	{ 0x800f044c, KEY_LANGUAGE },
+	{ 0x800f0427, KEY_ZOOM }, /* Aspect */
+
+	{ 0x800f045b, KEY_RED },
+	{ 0x800f045c, KEY_GREEN },
+	{ 0x800f045d, KEY_YELLOW },
+	{ 0x800f045e, KEY_BLUE },
+
+	{ 0x800f040f, KEY_INFO },
+	{ 0x800f0426, KEY_EPG }, /* Guide */
+	{ 0x800f045a, KEY_SUBTITLE }, /* Caption/Teletext */
+	{ 0x800f044d, KEY_TITLE },
+
+	{ 0x800f040c, KEY_POWER },
+	{ 0x800f040d, KEY_PROG1 }, /* Windows MCE button */
+
+};
+
+static struct rc_keymap rc6_mce_map = {
+	.map = {
+		.scan    = rc6_mce,
+		.size    = ARRAY_SIZE(rc6_mce),
+		.ir_type = IR_TYPE_RC6,
+		.name    = RC_MAP_RC6_MCE,
+	}
+};
+
+static int __init init_rc_map_rc6_mce(void)
+{
+	return ir_register_map(&rc6_mce_map);
+}
+
+static void __exit exit_rc_map_rc6_mce(void)
+{
+	ir_unregister_map(&rc6_mce_map);
+}
+
+module_init(init_rc_map_rc6_mce)
+module_exit(exit_rc_map_rc6_mce)
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Jarod Wilson <jarod@redhat.com>");
diff --git a/include/media/rc-map.h b/include/media/rc-map.h
index c78e99a435b..36ee280d42a 100644
--- a/include/media/rc-map.h
+++ b/include/media/rc-map.h
@@ -19,6 +19,9 @@
 #define IR_TYPE_SONY	(1  << 4)	/* Sony12/15/20 protocol */
 #define IR_TYPE_OTHER	(1u << 31)
 
+#define IR_TYPE_ALL (IR_TYPE_RC5 | IR_TYPE_NEC  | IR_TYPE_RC6  | \
+		     IR_TYPE_JVC | IR_TYPE_SONY | IR_TYPE_OTHER)
+
 struct ir_scancode {
 	u32	scancode;
 	u32	keycode;
@@ -107,6 +110,7 @@ void rc_map_init(void);
 #define RC_MAP_PV951                     "rc-pv951"
 #define RC_MAP_RC5_HAUPPAUGE_NEW         "rc-rc5-hauppauge-new"
 #define RC_MAP_RC5_TV                    "rc-rc5-tv"
+#define RC_MAP_RC6_MCE                   "rc-rc6-mce"
 #define RC_MAP_REAL_AUDIO_220_32_KEYS    "rc-real-audio-220-32-keys"
 #define RC_MAP_TBS_NEC                   "rc-tbs-nec"
 #define RC_MAP_TERRATEC_CINERGY_XS       "rc-terratec-cinergy-xs"
-- 
cgit v1.2.3-70-g09d2


From 0dc50942d6f23989ffb3024aa2271941ec44aea8 Mon Sep 17 00:00:00 2001
From: David Härdeman <david@hardeman.nu>
Date: Mon, 7 Jun 2010 16:32:33 -0300
Subject: V4L/DVB: ir-core: partially convert ir-kbd-i2c.c to not use
 ir-functions.c
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Partially convert drivers/media/video/ir-kbd-i2c.c to
not use ir-functions.c

Signed-off-by: David Härdeman <david@hardeman.nu>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/ir-kbd-i2c.c | 14 ++++----------
 include/media/ir-kbd-i2c.h       |  2 +-
 2 files changed, 5 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/drivers/media/video/ir-kbd-i2c.c b/drivers/media/video/ir-kbd-i2c.c
index 29d43974265..27ae8bbfb47 100644
--- a/drivers/media/video/ir-kbd-i2c.c
+++ b/drivers/media/video/ir-kbd-i2c.c
@@ -47,7 +47,7 @@
 #include <linux/i2c-id.h>
 #include <linux/workqueue.h>
 
-#include <media/ir-common.h>
+#include <media/ir-core.h>
 #include <media/ir-kbd-i2c.h>
 
 /* ----------------------------------------------------------------------- */
@@ -272,11 +272,8 @@ static void ir_key_poll(struct IR_i2c *ir)
 		return;
 	}
 
-	if (0 == rc) {
-		ir_input_nokey(ir->input, &ir->ir);
-	} else {
-		ir_input_keydown(ir->input, &ir->ir, ir_key);
-	}
+	if (rc)
+		ir_keydown(ir->input, ir_key, 0);
 }
 
 static void ir_work(struct work_struct *work)
@@ -439,10 +436,7 @@ static int ir_probe(struct i2c_client *client, const struct i2c_device_id *id)
 		 dev_name(&client->dev));
 
 	/* init + register input device */
-	err = ir_input_init(input_dev, &ir->ir, ir_type);
-	if (err < 0)
-		goto err_out_free;
-
+	ir->ir_type = ir_type;
 	input_dev->id.bustype = BUS_I2C;
 	input_dev->name       = ir->name;
 	input_dev->phys       = ir->phys;
diff --git a/include/media/ir-kbd-i2c.h b/include/media/ir-kbd-i2c.h
index 0506e45c9a4..5e96d7a430b 100644
--- a/include/media/ir-kbd-i2c.h
+++ b/include/media/ir-kbd-i2c.h
@@ -11,7 +11,7 @@ struct IR_i2c {
 	struct i2c_client      *c;
 	struct input_dev       *input;
 	struct ir_input_state  ir;
-
+	u64                    ir_type;
 	/* Used to avoid fast repeating */
 	unsigned char          old;
 
-- 
cgit v1.2.3-70-g09d2


From 9b7c54d926284c5277cff3ef3cfe29f26568306a Mon Sep 17 00:00:00 2001
From: Jarod Wilson <jarod@redhat.com>
Date: Wed, 16 Jun 2010 17:55:25 -0300
Subject: V4L/DVB: IR: add tx callbacks to ir-core

Signed-off-by: Jarod Wilson <jarod@redhat.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/media/ir-core.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/media/ir-core.h b/include/media/ir-core.h
index ad1303f20e0..9b957af2158 100644
--- a/include/media/ir-core.h
+++ b/include/media/ir-core.h
@@ -47,15 +47,21 @@ enum rc_driver_type {
  *	is opened.
  * @close: callback to allow drivers to disable polling/irq when IR input device
  *	is opened.
+ * @s_tx_mask: set transmitter mask (for devices with multiple tx outputs)
+ * @s_tx_carrier: set transmit carrier frequency
+ * @tx_ir: transmit IR
  */
 struct ir_dev_props {
 	enum rc_driver_type	driver_type;
 	unsigned long		allowed_protos;
 	u32			scanmask;
-	void 			*priv;
+	void			*priv;
 	int			(*change_protocol)(void *priv, u64 ir_type);
 	int			(*open)(void *priv);
 	void			(*close)(void *priv);
+	int			(*s_tx_mask)(void *priv, u32 mask);
+	int			(*s_tx_carrier)(void *priv, u32 carrier);
+	int			(*tx_ir)(void *priv, const char *buf, u32 n);
 };
 
 struct ir_input_dev {
-- 
cgit v1.2.3-70-g09d2


From f6a20eb1a2d35660240cd1eb8dc2bd6504a0c6c5 Mon Sep 17 00:00:00 2001
From: Klaus Schmidinger <Klaus.Schmidinger@tvdr.de>
Date: Thu, 1 Jul 2010 01:37:34 -0300
Subject: V4L/DVB: Add FE_CAN_TURBO_FEC

Some (North American) providers use a non-standard mode called
"8psk turbo fec". Since there is no flag in the driver that
would allow an application to determine whether a particular
device can handle "turbo fec", the attached patch introduces
FE_CAN_TURBO_FEC.

Since there is no flag in the SI data that would indicate
that a transponder uses "turbo fec", VDR will assume that
all 8psk transponders on DVB-S use "turbo fec".

Tested-by: Derek Kelly <user.vdr@gmail.com>
Signed-off-by: Klaus Schmidinger <Klaus.Schmidinger@tvdr.de>
Signed-off-by: Douglas Schilling Landgraf <dougsland@redhat.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/dvb/dvb-usb/gp8psk-fe.c | 2 +-
 include/linux/dvb/frontend.h          | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/media/dvb/dvb-usb/gp8psk-fe.c b/drivers/media/dvb/dvb-usb/gp8psk-fe.c
index 7a7f1b2b681..dbdb5347b2a 100644
--- a/drivers/media/dvb/dvb-usb/gp8psk-fe.c
+++ b/drivers/media/dvb/dvb-usb/gp8psk-fe.c
@@ -349,7 +349,7 @@ static struct dvb_frontend_ops gp8psk_fe_ops = {
 			 * FE_CAN_QAM_16 is for compatibility
 			 * (Myth incorrectly detects Turbo-QPSK as plain QAM-16)
 			 */
-			FE_CAN_QPSK | FE_CAN_QAM_16
+			FE_CAN_QPSK | FE_CAN_QAM_16 | FE_CAN_TURBO_FEC
 	},
 
 	.release = gp8psk_fe_release,
diff --git a/include/linux/dvb/frontend.h b/include/linux/dvb/frontend.h
index b6cb5425cde..493a2bf85f6 100644
--- a/include/linux/dvb/frontend.h
+++ b/include/linux/dvb/frontend.h
@@ -62,6 +62,7 @@ typedef enum fe_caps {
 	FE_CAN_8VSB			= 0x200000,
 	FE_CAN_16VSB			= 0x400000,
 	FE_HAS_EXTENDED_CAPS		= 0x800000,   /* We need more bitspace for newer APIs, indicate this. */
+	FE_CAN_TURBO_FEC		= 0x8000000,  /* frontend supports "turbo fec modulation" */
 	FE_CAN_2G_MODULATION		= 0x10000000, /* frontend supports "2nd generation modulation" (DVB-S2) */
 	FE_NEEDS_BENDING		= 0x20000000, /* not supported anymore, don't use (frontend requires frequency bending) */
 	FE_CAN_RECOVER			= 0x40000000, /* frontend can recover from a cable unplug automatically */
-- 
cgit v1.2.3-70-g09d2


From 1ece36097d0170a41fc129b8b1823a36ec2fb5c6 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@redhat.com>
Date: Sat, 3 Jul 2010 18:06:13 -0300
Subject: V4L/DVB: Increment DVB API version

A new flag were added at the Frontend capabilities. Increment
API minor revision.

Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/linux/dvb/version.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/dvb/version.h b/include/linux/dvb/version.h
index 540b0583d9f..5a7546c1268 100644
--- a/include/linux/dvb/version.h
+++ b/include/linux/dvb/version.h
@@ -24,6 +24,6 @@
 #define _DVBVERSION_H_
 
 #define DVB_API_VERSION 5
-#define DVB_API_VERSION_MINOR 1
+#define DVB_API_VERSION_MINOR 2
 
 #endif /*_DVBVERSION_H_*/
-- 
cgit v1.2.3-70-g09d2


From 4a62a5ab59742331a4e17ccaa894968d40ed9b16 Mon Sep 17 00:00:00 2001
From: Jarod Wilson <jarod@redhat.com>
Date: Sat, 3 Jul 2010 01:06:57 -0300
Subject: V4L/DVB: IR: add lirc device interface

v2: currently unused ioctls are included, but #if 0'd out

Signed-off-by: Jarod Wilson <jarod@redhat.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/IR/Kconfig    |  11 +
 drivers/media/IR/Makefile   |   1 +
 drivers/media/IR/lirc_dev.c | 761 ++++++++++++++++++++++++++++++++++++++++++++
 drivers/media/IR/lirc_dev.h | 226 +++++++++++++
 include/media/lirc.h        | 163 ++++++++++
 5 files changed, 1162 insertions(+)
 create mode 100644 drivers/media/IR/lirc_dev.c
 create mode 100644 drivers/media/IR/lirc_dev.h
 create mode 100644 include/media/lirc.h

(limited to 'include')

diff --git a/drivers/media/IR/Kconfig b/drivers/media/IR/Kconfig
index 797a6c36d98..5d2c37dcf11 100644
--- a/drivers/media/IR/Kconfig
+++ b/drivers/media/IR/Kconfig
@@ -8,6 +8,17 @@ config VIDEO_IR
 	depends on IR_CORE
 	default IR_CORE
 
+config LIRC
+	tristate
+	default y
+
+	---help---
+	   Enable this option to build the Linux Infrared Remote
+	   Control (LIRC) core device interface driver. The LIRC
+	   interface passes raw IR to and from userspace, where the
+	   LIRC daemon handles protocol decoding for IR reception ann
+	   encoding for IR transmitting (aka "blasting").
+
 source "drivers/media/IR/keymaps/Kconfig"
 
 config IR_NEC_DECODER
diff --git a/drivers/media/IR/Makefile b/drivers/media/IR/Makefile
index b43fe36d88b..3ba00bb8bea 100644
--- a/drivers/media/IR/Makefile
+++ b/drivers/media/IR/Makefile
@@ -5,6 +5,7 @@ obj-y += keymaps/
 
 obj-$(CONFIG_IR_CORE) += ir-core.o
 obj-$(CONFIG_VIDEO_IR) += ir-common.o
+obj-$(CONFIG_LIRC) += lirc_dev.o
 obj-$(CONFIG_IR_NEC_DECODER) += ir-nec-decoder.o
 obj-$(CONFIG_IR_RC5_DECODER) += ir-rc5-decoder.o
 obj-$(CONFIG_IR_RC6_DECODER) += ir-rc6-decoder.o
diff --git a/drivers/media/IR/lirc_dev.c b/drivers/media/IR/lirc_dev.c
new file mode 100644
index 00000000000..9e141d51df9
--- /dev/null
+++ b/drivers/media/IR/lirc_dev.c
@@ -0,0 +1,761 @@
+/*
+ * LIRC base driver
+ *
+ * by Artur Lipowski <alipowski@interia.pl>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/errno.h>
+#include <linux/ioctl.h>
+#include <linux/fs.h>
+#include <linux/poll.h>
+#include <linux/completion.h>
+#include <linux/errno.h>
+#include <linux/mutex.h>
+#include <linux/wait.h>
+#include <linux/unistd.h>
+#include <linux/kthread.h>
+#include <linux/bitops.h>
+#include <linux/device.h>
+#include <linux/cdev.h>
+
+#include <media/lirc.h>
+#include "lirc_dev.h"
+
+static int debug;
+
+#define IRCTL_DEV_NAME	"BaseRemoteCtl"
+#define NOPLUG		-1
+#define LOGHEAD		"lirc_dev (%s[%d]): "
+
+static dev_t lirc_base_dev;
+
+struct irctl {
+	struct lirc_driver d;
+	int attached;
+	int open;
+
+	struct mutex irctl_lock;
+	struct lirc_buffer *buf;
+	unsigned int chunk_size;
+
+	struct task_struct *task;
+	long jiffies_to_wait;
+
+	struct cdev cdev;
+};
+
+static DEFINE_MUTEX(lirc_dev_lock);
+
+static struct irctl *irctls[MAX_IRCTL_DEVICES];
+
+/* Only used for sysfs but defined to void otherwise */
+static struct class *lirc_class;
+
+/*  helper function
+ *  initializes the irctl structure
+ */
+static void init_irctl(struct irctl *ir)
+{
+	dev_dbg(ir->d.dev, LOGHEAD "initializing irctl\n",
+		ir->d.name, ir->d.minor);
+	mutex_init(&ir->irctl_lock);
+	ir->d.minor = NOPLUG;
+}
+
+static void cleanup(struct irctl *ir)
+{
+	dev_dbg(ir->d.dev, LOGHEAD "cleaning up\n", ir->d.name, ir->d.minor);
+
+	device_destroy(lirc_class, MKDEV(MAJOR(lirc_base_dev), ir->d.minor));
+
+	if (ir->buf != ir->d.rbuf) {
+		lirc_buffer_free(ir->buf);
+		kfree(ir->buf);
+	}
+	ir->buf = NULL;
+}
+
+/*  helper function
+ *  reads key codes from driver and puts them into buffer
+ *  returns 0 on success
+ */
+static int add_to_buf(struct irctl *ir)
+{
+	if (ir->d.add_to_buf) {
+		int res = -ENODATA;
+		int got_data = 0;
+
+		/*
+		 * service the device as long as it is returning
+		 * data and we have space
+		 */
+get_data:
+		res = ir->d.add_to_buf(ir->d.data, ir->buf);
+		if (res == 0) {
+			got_data++;
+			goto get_data;
+		}
+
+		if (res == -ENODEV)
+			kthread_stop(ir->task);
+
+		return got_data ? 0 : res;
+	}
+
+	return 0;
+}
+
+/* main function of the polling thread
+ */
+static int lirc_thread(void *irctl)
+{
+	struct irctl *ir = irctl;
+
+	dev_dbg(ir->d.dev, LOGHEAD "poll thread started\n",
+		ir->d.name, ir->d.minor);
+
+	do {
+		if (ir->open) {
+			if (ir->jiffies_to_wait) {
+				set_current_state(TASK_INTERRUPTIBLE);
+				schedule_timeout(ir->jiffies_to_wait);
+			}
+			if (kthread_should_stop())
+				break;
+			if (!add_to_buf(ir))
+				wake_up_interruptible(&ir->buf->wait_poll);
+		} else {
+			set_current_state(TASK_INTERRUPTIBLE);
+			schedule();
+		}
+	} while (!kthread_should_stop());
+
+	dev_dbg(ir->d.dev, LOGHEAD "poll thread ended\n",
+		ir->d.name, ir->d.minor);
+
+	return 0;
+}
+
+
+static struct file_operations fops = {
+	.owner		= THIS_MODULE,
+	.read		= lirc_dev_fop_read,
+	.write		= lirc_dev_fop_write,
+	.poll		= lirc_dev_fop_poll,
+	.ioctl		= lirc_dev_fop_ioctl,
+	.open		= lirc_dev_fop_open,
+	.release	= lirc_dev_fop_close,
+};
+
+static int lirc_cdev_add(struct irctl *ir)
+{
+	int retval;
+	struct lirc_driver *d = &ir->d;
+
+	if (d->fops) {
+		cdev_init(&ir->cdev, d->fops);
+		ir->cdev.owner = d->owner;
+	} else {
+		cdev_init(&ir->cdev, &fops);
+		ir->cdev.owner = THIS_MODULE;
+	}
+	kobject_set_name(&ir->cdev.kobj, "lirc%d", d->minor);
+
+	retval = cdev_add(&ir->cdev, MKDEV(MAJOR(lirc_base_dev), d->minor), 1);
+	if (retval)
+		kobject_put(&ir->cdev.kobj);
+
+	return retval;
+}
+
+int lirc_register_driver(struct lirc_driver *d)
+{
+	struct irctl *ir;
+	int minor;
+	int bytes_in_key;
+	unsigned int chunk_size;
+	unsigned int buffer_size;
+	int err;
+
+	if (!d) {
+		printk(KERN_ERR "lirc_dev: lirc_register_driver: "
+		       "driver pointer must be not NULL!\n");
+		err = -EBADRQC;
+		goto out;
+	}
+
+	if (MAX_IRCTL_DEVICES <= d->minor) {
+		dev_err(d->dev, "lirc_dev: lirc_register_driver: "
+			"\"minor\" must be between 0 and %d (%d)!\n",
+			MAX_IRCTL_DEVICES-1, d->minor);
+		err = -EBADRQC;
+		goto out;
+	}
+
+	if (1 > d->code_length || (BUFLEN * 8) < d->code_length) {
+		dev_err(d->dev, "lirc_dev: lirc_register_driver: "
+			"code length in bits for minor (%d) "
+			"must be less than %d!\n",
+			d->minor, BUFLEN * 8);
+		err = -EBADRQC;
+		goto out;
+	}
+
+	dev_dbg(d->dev, "lirc_dev: lirc_register_driver: sample_rate: %d\n",
+		d->sample_rate);
+	if (d->sample_rate) {
+		if (2 > d->sample_rate || HZ < d->sample_rate) {
+			dev_err(d->dev, "lirc_dev: lirc_register_driver: "
+				"sample_rate must be between 2 and %d!\n", HZ);
+			err = -EBADRQC;
+			goto out;
+		}
+		if (!d->add_to_buf) {
+			dev_err(d->dev, "lirc_dev: lirc_register_driver: "
+				"add_to_buf cannot be NULL when "
+				"sample_rate is set\n");
+			err = -EBADRQC;
+			goto out;
+		}
+	} else if (!(d->fops && d->fops->read) && !d->rbuf) {
+		dev_err(d->dev, "lirc_dev: lirc_register_driver: "
+			"fops->read and rbuf cannot all be NULL!\n");
+		err = -EBADRQC;
+		goto out;
+	} else if (!d->rbuf) {
+		if (!(d->fops && d->fops->read && d->fops->poll &&
+		      d->fops->ioctl)) {
+			dev_err(d->dev, "lirc_dev: lirc_register_driver: "
+				"neither read, poll nor ioctl can be NULL!\n");
+			err = -EBADRQC;
+			goto out;
+		}
+	}
+
+	mutex_lock(&lirc_dev_lock);
+
+	minor = d->minor;
+
+	if (minor < 0) {
+		/* find first free slot for driver */
+		for (minor = 0; minor < MAX_IRCTL_DEVICES; minor++)
+			if (!irctls[minor])
+				break;
+		if (MAX_IRCTL_DEVICES == minor) {
+			dev_err(d->dev, "lirc_dev: lirc_register_driver: "
+				"no free slots for drivers!\n");
+			err = -ENOMEM;
+			goto out_lock;
+		}
+	} else if (irctls[minor]) {
+		dev_err(d->dev, "lirc_dev: lirc_register_driver: "
+			"minor (%d) just registered!\n", minor);
+		err = -EBUSY;
+		goto out_lock;
+	}
+
+	ir = kzalloc(sizeof(struct irctl), GFP_KERNEL);
+	if (!ir) {
+		err = -ENOMEM;
+		goto out_lock;
+	}
+	init_irctl(ir);
+	irctls[minor] = ir;
+	d->minor = minor;
+
+	if (d->sample_rate) {
+		ir->jiffies_to_wait = HZ / d->sample_rate;
+	} else {
+		/* it means - wait for external event in task queue */
+		ir->jiffies_to_wait = 0;
+	}
+
+	/* some safety check 8-) */
+	d->name[sizeof(d->name)-1] = '\0';
+
+	bytes_in_key = BITS_TO_LONGS(d->code_length) +
+			(d->code_length % 8 ? 1 : 0);
+	buffer_size = d->buffer_size ? d->buffer_size : BUFLEN / bytes_in_key;
+	chunk_size  = d->chunk_size  ? d->chunk_size  : bytes_in_key;
+
+	if (d->rbuf) {
+		ir->buf = d->rbuf;
+	} else {
+		ir->buf = kmalloc(sizeof(struct lirc_buffer), GFP_KERNEL);
+		if (!ir->buf) {
+			err = -ENOMEM;
+			goto out_lock;
+		}
+		err = lirc_buffer_init(ir->buf, chunk_size, buffer_size);
+		if (err) {
+			kfree(ir->buf);
+			goto out_lock;
+		}
+	}
+	ir->chunk_size = ir->buf->chunk_size;
+
+	if (d->features == 0)
+		d->features = LIRC_CAN_REC_LIRCCODE;
+
+	ir->d = *d;
+	ir->d.minor = minor;
+
+	device_create(lirc_class, ir->d.dev,
+		      MKDEV(MAJOR(lirc_base_dev), ir->d.minor), NULL,
+		      "lirc%u", ir->d.minor);
+
+	if (d->sample_rate) {
+		/* try to fire up polling thread */
+		ir->task = kthread_run(lirc_thread, (void *)ir, "lirc_dev");
+		if (IS_ERR(ir->task)) {
+			dev_err(d->dev, "lirc_dev: lirc_register_driver: "
+				"cannot run poll thread for minor = %d\n",
+				d->minor);
+			err = -ECHILD;
+			goto out_sysfs;
+		}
+	}
+
+	err = lirc_cdev_add(ir);
+	if (err)
+		goto out_sysfs;
+
+	ir->attached = 1;
+	mutex_unlock(&lirc_dev_lock);
+
+	dev_info(ir->d.dev, "lirc_dev: driver %s registered at minor = %d\n",
+		 ir->d.name, ir->d.minor);
+	return minor;
+
+out_sysfs:
+	device_destroy(lirc_class, MKDEV(MAJOR(lirc_base_dev), ir->d.minor));
+out_lock:
+	mutex_unlock(&lirc_dev_lock);
+out:
+	return err;
+}
+EXPORT_SYMBOL(lirc_register_driver);
+
+int lirc_unregister_driver(int minor)
+{
+	struct irctl *ir;
+
+	if (minor < 0 || minor >= MAX_IRCTL_DEVICES) {
+		printk(KERN_ERR "lirc_dev: lirc_unregister_driver: "
+		       "\"minor (%d)\" must be between 0 and %d!\n",
+		       minor, MAX_IRCTL_DEVICES-1);
+		return -EBADRQC;
+	}
+
+	ir = irctls[minor];
+
+	mutex_lock(&lirc_dev_lock);
+
+	if (ir->d.minor != minor) {
+		printk(KERN_ERR "lirc_dev: lirc_unregister_driver: "
+		       "minor (%d) device not registered!", minor);
+		mutex_unlock(&lirc_dev_lock);
+		return -ENOENT;
+	}
+
+	/* end up polling thread */
+	if (ir->task)
+		kthread_stop(ir->task);
+
+	dev_dbg(ir->d.dev, "lirc_dev: driver %s unregistered from minor = %d\n",
+		ir->d.name, ir->d.minor);
+
+	ir->attached = 0;
+	if (ir->open) {
+		dev_dbg(ir->d.dev, LOGHEAD "releasing opened driver\n",
+			ir->d.name, ir->d.minor);
+		wake_up_interruptible(&ir->buf->wait_poll);
+		mutex_lock(&ir->irctl_lock);
+		ir->d.set_use_dec(ir->d.data);
+		module_put(ir->d.owner);
+		mutex_unlock(&ir->irctl_lock);
+		cdev_del(&ir->cdev);
+	} else {
+		cleanup(ir);
+		cdev_del(&ir->cdev);
+		kfree(ir);
+		irctls[minor] = NULL;
+	}
+
+	mutex_unlock(&lirc_dev_lock);
+
+	return 0;
+}
+EXPORT_SYMBOL(lirc_unregister_driver);
+
+int lirc_dev_fop_open(struct inode *inode, struct file *file)
+{
+	struct irctl *ir;
+	int retval = 0;
+
+	if (iminor(inode) >= MAX_IRCTL_DEVICES) {
+		printk(KERN_WARNING "lirc_dev [%d]: open result = -ENODEV\n",
+		       iminor(inode));
+		return -ENODEV;
+	}
+
+	if (mutex_lock_interruptible(&lirc_dev_lock))
+		return -ERESTARTSYS;
+
+	ir = irctls[iminor(inode)];
+	if (!ir) {
+		retval = -ENODEV;
+		goto error;
+	}
+
+	dev_dbg(ir->d.dev, LOGHEAD "open called\n", ir->d.name, ir->d.minor);
+
+	if (ir->d.minor == NOPLUG) {
+		retval = -ENODEV;
+		goto error;
+	}
+
+	if (ir->open) {
+		retval = -EBUSY;
+		goto error;
+	}
+
+	if (try_module_get(ir->d.owner)) {
+		++ir->open;
+		retval = ir->d.set_use_inc(ir->d.data);
+
+		if (retval) {
+			module_put(ir->d.owner);
+			--ir->open;
+		} else {
+			lirc_buffer_clear(ir->buf);
+		}
+		if (ir->task)
+			wake_up_process(ir->task);
+	}
+
+error:
+	if (ir)
+		dev_dbg(ir->d.dev, LOGHEAD "open result = %d\n",
+			ir->d.name, ir->d.minor, retval);
+
+	mutex_unlock(&lirc_dev_lock);
+
+	return retval;
+}
+EXPORT_SYMBOL(lirc_dev_fop_open);
+
+int lirc_dev_fop_close(struct inode *inode, struct file *file)
+{
+	struct irctl *ir = irctls[iminor(inode)];
+
+	dev_dbg(ir->d.dev, LOGHEAD "close called\n", ir->d.name, ir->d.minor);
+
+	WARN_ON(mutex_lock_killable(&lirc_dev_lock));
+
+	--ir->open;
+	if (ir->attached) {
+		ir->d.set_use_dec(ir->d.data);
+		module_put(ir->d.owner);
+	} else {
+		cleanup(ir);
+		irctls[ir->d.minor] = NULL;
+		kfree(ir);
+	}
+
+	mutex_unlock(&lirc_dev_lock);
+
+	return 0;
+}
+EXPORT_SYMBOL(lirc_dev_fop_close);
+
+unsigned int lirc_dev_fop_poll(struct file *file, poll_table *wait)
+{
+	struct irctl *ir = irctls[iminor(file->f_dentry->d_inode)];
+	unsigned int ret;
+
+	dev_dbg(ir->d.dev, LOGHEAD "poll called\n", ir->d.name, ir->d.minor);
+
+	if (!ir->attached) {
+		mutex_unlock(&ir->irctl_lock);
+		return POLLERR;
+	}
+
+	poll_wait(file, &ir->buf->wait_poll, wait);
+
+	if (ir->buf)
+		if (lirc_buffer_empty(ir->buf))
+			ret = 0;
+		else
+			ret = POLLIN | POLLRDNORM;
+	else
+		ret = POLLERR;
+
+	dev_dbg(ir->d.dev, LOGHEAD "poll result = %d\n",
+		ir->d.name, ir->d.minor, ret);
+
+	return ret;
+}
+EXPORT_SYMBOL(lirc_dev_fop_poll);
+
+int lirc_dev_fop_ioctl(struct inode *inode, struct file *file,
+		       unsigned int cmd, unsigned long arg)
+{
+	unsigned long mode;
+	int result = 0;
+	struct irctl *ir = irctls[iminor(inode)];
+
+	dev_dbg(ir->d.dev, LOGHEAD "ioctl called (0x%x)\n",
+		ir->d.name, ir->d.minor, cmd);
+
+	if (ir->d.minor == NOPLUG || !ir->attached) {
+		dev_dbg(ir->d.dev, LOGHEAD "ioctl result = -ENODEV\n",
+			ir->d.name, ir->d.minor);
+		return -ENODEV;
+	}
+
+	mutex_lock(&ir->irctl_lock);
+
+	switch (cmd) {
+	case LIRC_GET_FEATURES:
+		result = put_user(ir->d.features, (unsigned long *)arg);
+		break;
+	case LIRC_GET_REC_MODE:
+		if (!(ir->d.features & LIRC_CAN_REC_MASK)) {
+			result = -ENOSYS;
+			break;
+		}
+
+		result = put_user(LIRC_REC2MODE
+				  (ir->d.features & LIRC_CAN_REC_MASK),
+				  (unsigned long *)arg);
+		break;
+	case LIRC_SET_REC_MODE:
+		if (!(ir->d.features & LIRC_CAN_REC_MASK)) {
+			result = -ENOSYS;
+			break;
+		}
+
+		result = get_user(mode, (unsigned long *)arg);
+		if (!result && !(LIRC_MODE2REC(mode) & ir->d.features))
+			result = -EINVAL;
+		/*
+		 * FIXME: We should actually set the mode somehow but
+		 * for now, lirc_serial doesn't support mode changing either
+		 */
+		break;
+	case LIRC_GET_LENGTH:
+		result = put_user(ir->d.code_length, (unsigned long *)arg);
+		break;
+	case LIRC_GET_MIN_TIMEOUT:
+		if (!(ir->d.features & LIRC_CAN_SET_REC_TIMEOUT) ||
+		    ir->d.min_timeout == 0) {
+			result = -ENOSYS;
+			break;
+		}
+
+		result = put_user(ir->d.min_timeout, (unsigned long *)arg);
+		break;
+	case LIRC_GET_MAX_TIMEOUT:
+		if (!(ir->d.features & LIRC_CAN_SET_REC_TIMEOUT) ||
+		    ir->d.max_timeout == 0) {
+			result = -ENOSYS;
+			break;
+		}
+
+		result = put_user(ir->d.max_timeout, (unsigned long *)arg);
+		break;
+	default:
+		result = -EINVAL;
+	}
+
+	dev_dbg(ir->d.dev, LOGHEAD "ioctl result = %d\n",
+		ir->d.name, ir->d.minor, result);
+
+	mutex_unlock(&ir->irctl_lock);
+
+	return result;
+}
+EXPORT_SYMBOL(lirc_dev_fop_ioctl);
+
+ssize_t lirc_dev_fop_read(struct file *file,
+			  char *buffer,
+			  size_t length,
+			  loff_t *ppos)
+{
+	struct irctl *ir = irctls[iminor(file->f_dentry->d_inode)];
+	unsigned char buf[ir->chunk_size];
+	int ret = 0, written = 0;
+	DECLARE_WAITQUEUE(wait, current);
+
+	dev_dbg(ir->d.dev, LOGHEAD "read called\n", ir->d.name, ir->d.minor);
+
+	if (mutex_lock_interruptible(&ir->irctl_lock))
+		return -ERESTARTSYS;
+	if (!ir->attached) {
+		mutex_unlock(&ir->irctl_lock);
+		return -ENODEV;
+	}
+
+	if (length % ir->chunk_size) {
+		dev_dbg(ir->d.dev, LOGHEAD "read result = -EINVAL\n",
+			ir->d.name, ir->d.minor);
+		mutex_unlock(&ir->irctl_lock);
+		return -EINVAL;
+	}
+
+	/*
+	 * we add ourselves to the task queue before buffer check
+	 * to avoid losing scan code (in case when queue is awaken somewhere
+	 * between while condition checking and scheduling)
+	 */
+	add_wait_queue(&ir->buf->wait_poll, &wait);
+	set_current_state(TASK_INTERRUPTIBLE);
+
+	/*
+	 * while we didn't provide 'length' bytes, device is opened in blocking
+	 * mode and 'copy_to_user' is happy, wait for data.
+	 */
+	while (written < length && ret == 0) {
+		if (lirc_buffer_empty(ir->buf)) {
+			/* According to the read(2) man page, 'written' can be
+			 * returned as less than 'length', instead of blocking
+			 * again, returning -EWOULDBLOCK, or returning
+			 * -ERESTARTSYS */
+			if (written)
+				break;
+			if (file->f_flags & O_NONBLOCK) {
+				ret = -EWOULDBLOCK;
+				break;
+			}
+			if (signal_pending(current)) {
+				ret = -ERESTARTSYS;
+				break;
+			}
+
+			mutex_unlock(&ir->irctl_lock);
+			schedule();
+			set_current_state(TASK_INTERRUPTIBLE);
+
+			if (mutex_lock_interruptible(&ir->irctl_lock)) {
+				ret = -ERESTARTSYS;
+				break;
+			}
+
+			if (!ir->attached) {
+				ret = -ENODEV;
+				break;
+			}
+		} else {
+			lirc_buffer_read(ir->buf, buf);
+			ret = copy_to_user((void *)buffer+written, buf,
+					   ir->buf->chunk_size);
+			written += ir->buf->chunk_size;
+		}
+	}
+
+	remove_wait_queue(&ir->buf->wait_poll, &wait);
+	set_current_state(TASK_RUNNING);
+	mutex_unlock(&ir->irctl_lock);
+
+	dev_dbg(ir->d.dev, LOGHEAD "read result = %s (%d)\n",
+		ir->d.name, ir->d.minor, ret ? "-EFAULT" : "OK", ret);
+
+	return ret ? ret : written;
+}
+EXPORT_SYMBOL(lirc_dev_fop_read);
+
+void *lirc_get_pdata(struct file *file)
+{
+	void *data = NULL;
+
+	if (file && file->f_dentry && file->f_dentry->d_inode &&
+	    file->f_dentry->d_inode->i_rdev) {
+		struct irctl *ir;
+		ir = irctls[iminor(file->f_dentry->d_inode)];
+		data = ir->d.data;
+	}
+
+	return data;
+}
+EXPORT_SYMBOL(lirc_get_pdata);
+
+
+ssize_t lirc_dev_fop_write(struct file *file, const char *buffer,
+			   size_t length, loff_t *ppos)
+{
+	struct irctl *ir = irctls[iminor(file->f_dentry->d_inode)];
+
+	dev_dbg(ir->d.dev, LOGHEAD "write called\n", ir->d.name, ir->d.minor);
+
+	if (!ir->attached)
+		return -ENODEV;
+
+	return -EINVAL;
+}
+EXPORT_SYMBOL(lirc_dev_fop_write);
+
+
+static int __init lirc_dev_init(void)
+{
+	int retval;
+
+	lirc_class = class_create(THIS_MODULE, "lirc");
+	if (IS_ERR(lirc_class)) {
+		retval = PTR_ERR(lirc_class);
+		printk(KERN_ERR "lirc_dev: class_create failed\n");
+		goto error;
+	}
+
+	retval = alloc_chrdev_region(&lirc_base_dev, 0, MAX_IRCTL_DEVICES,
+				     IRCTL_DEV_NAME);
+	if (retval) {
+		class_destroy(lirc_class);
+		printk(KERN_ERR "lirc_dev: alloc_chrdev_region failed\n");
+		goto error;
+	}
+
+
+	printk(KERN_INFO "lirc_dev: IR Remote Control driver registered, "
+	       "major %d \n", MAJOR(lirc_base_dev));
+
+error:
+	return retval;
+}
+
+
+
+static void __exit lirc_dev_exit(void)
+{
+	class_destroy(lirc_class);
+	unregister_chrdev_region(lirc_base_dev, MAX_IRCTL_DEVICES);
+	printk(KERN_INFO "lirc_dev: module unloaded\n");
+}
+
+module_init(lirc_dev_init);
+module_exit(lirc_dev_exit);
+
+MODULE_DESCRIPTION("LIRC base driver module");
+MODULE_AUTHOR("Artur Lipowski");
+MODULE_LICENSE("GPL");
+
+module_param(debug, bool, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(debug, "Enable debugging messages");
diff --git a/drivers/media/IR/lirc_dev.h b/drivers/media/IR/lirc_dev.h
new file mode 100644
index 00000000000..4afd96a38a4
--- /dev/null
+++ b/drivers/media/IR/lirc_dev.h
@@ -0,0 +1,226 @@
+/*
+ * LIRC base driver
+ *
+ * by Artur Lipowski <alipowski@interia.pl>
+ *        This code is licensed under GNU GPL
+ *
+ */
+
+#ifndef _LINUX_LIRC_DEV_H
+#define _LINUX_LIRC_DEV_H
+
+#define MAX_IRCTL_DEVICES 4
+#define BUFLEN            16
+
+#define mod(n, div) ((n) % (div))
+
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/ioctl.h>
+#include <linux/poll.h>
+#include <linux/kfifo.h>
+#include <media/lirc.h>
+
+struct lirc_buffer {
+	wait_queue_head_t wait_poll;
+	spinlock_t fifo_lock;
+	unsigned int chunk_size;
+	unsigned int size; /* in chunks */
+	/* Using chunks instead of bytes pretends to simplify boundary checking
+	 * And should allow for some performance fine tunning later */
+	struct kfifo fifo;
+	u8 fifo_initialized;
+};
+
+static inline void lirc_buffer_clear(struct lirc_buffer *buf)
+{
+	unsigned long flags;
+
+	if (buf->fifo_initialized) {
+		spin_lock_irqsave(&buf->fifo_lock, flags);
+		kfifo_reset(&buf->fifo);
+		spin_unlock_irqrestore(&buf->fifo_lock, flags);
+	} else
+		WARN(1, "calling %s on an uninitialized lirc_buffer\n",
+		     __func__);
+}
+
+static inline int lirc_buffer_init(struct lirc_buffer *buf,
+				    unsigned int chunk_size,
+				    unsigned int size)
+{
+	int ret;
+
+	init_waitqueue_head(&buf->wait_poll);
+	spin_lock_init(&buf->fifo_lock);
+	buf->chunk_size = chunk_size;
+	buf->size = size;
+	ret = kfifo_alloc(&buf->fifo, size * chunk_size, GFP_KERNEL);
+	if (ret == 0)
+		buf->fifo_initialized = 1;
+
+	return ret;
+}
+
+static inline void lirc_buffer_free(struct lirc_buffer *buf)
+{
+	if (buf->fifo_initialized) {
+		kfifo_free(&buf->fifo);
+		buf->fifo_initialized = 0;
+	} else
+		WARN(1, "calling %s on an uninitialized lirc_buffer\n",
+		     __func__);
+}
+
+static inline int lirc_buffer_len(struct lirc_buffer *buf)
+{
+	int len;
+	unsigned long flags;
+
+	spin_lock_irqsave(&buf->fifo_lock, flags);
+	len = kfifo_len(&buf->fifo);
+	spin_unlock_irqrestore(&buf->fifo_lock, flags);
+
+	return len;
+}
+
+static inline int lirc_buffer_full(struct lirc_buffer *buf)
+{
+	return lirc_buffer_len(buf) == buf->size * buf->chunk_size;
+}
+
+static inline int lirc_buffer_empty(struct lirc_buffer *buf)
+{
+	return !lirc_buffer_len(buf);
+}
+
+static inline int lirc_buffer_available(struct lirc_buffer *buf)
+{
+	return buf->size - (lirc_buffer_len(buf) / buf->chunk_size);
+}
+
+static inline unsigned int lirc_buffer_read(struct lirc_buffer *buf,
+					    unsigned char *dest)
+{
+	unsigned int ret = 0;
+
+	if (lirc_buffer_len(buf) >= buf->chunk_size)
+		ret = kfifo_out_locked(&buf->fifo, dest, buf->chunk_size,
+				       &buf->fifo_lock);
+	return ret;
+
+}
+
+static inline unsigned int lirc_buffer_write(struct lirc_buffer *buf,
+					     unsigned char *orig)
+{
+	unsigned int ret;
+
+	ret = kfifo_in_locked(&buf->fifo, orig, buf->chunk_size,
+			      &buf->fifo_lock);
+
+	return ret;
+}
+
+struct lirc_driver {
+	char name[40];
+	int minor;
+	unsigned long code_length;
+	unsigned int buffer_size; /* in chunks holding one code each */
+	int sample_rate;
+	unsigned long features;
+
+	unsigned int chunk_size;
+
+	void *data;
+	int min_timeout;
+	int max_timeout;
+	int (*add_to_buf) (void *data, struct lirc_buffer *buf);
+	struct lirc_buffer *rbuf;
+	int (*set_use_inc) (void *data);
+	void (*set_use_dec) (void *data);
+	struct file_operations *fops;
+	struct device *dev;
+	struct module *owner;
+};
+
+/* name:
+ * this string will be used for logs
+ *
+ * minor:
+ * indicates minor device (/dev/lirc) number for registered driver
+ * if caller fills it with negative value, then the first free minor
+ * number will be used (if available)
+ *
+ * code_length:
+ * length of the remote control key code expressed in bits
+ *
+ * sample_rate:
+ *
+ * data:
+ * it may point to any driver data and this pointer will be passed to
+ * all callback functions
+ *
+ * add_to_buf:
+ * add_to_buf will be called after specified period of the time or
+ * triggered by the external event, this behavior depends on value of
+ * the sample_rate this function will be called in user context. This
+ * routine should return 0 if data was added to the buffer and
+ * -ENODATA if none was available. This should add some number of bits
+ * evenly divisible by code_length to the buffer
+ *
+ * rbuf:
+ * if not NULL, it will be used as a read buffer, you will have to
+ * write to the buffer by other means, like irq's (see also
+ * lirc_serial.c).
+ *
+ * set_use_inc:
+ * set_use_inc will be called after device is opened
+ *
+ * set_use_dec:
+ * set_use_dec will be called after device is closed
+ *
+ * fops:
+ * file_operations for drivers which don't fit the current driver model.
+ *
+ * Some ioctl's can be directly handled by lirc_dev if the driver's
+ * ioctl function is NULL or if it returns -ENOIOCTLCMD (see also
+ * lirc_serial.c).
+ *
+ * owner:
+ * the module owning this struct
+ *
+ */
+
+
+/* following functions can be called ONLY from user context
+ *
+ * returns negative value on error or minor number
+ * of the registered device if success
+ * contents of the structure pointed by p is copied
+ */
+extern int lirc_register_driver(struct lirc_driver *d);
+
+/* returns negative value on error or 0 if success
+*/
+extern int lirc_unregister_driver(int minor);
+
+/* Returns the private data stored in the lirc_driver
+ * associated with the given device file pointer.
+ */
+void *lirc_get_pdata(struct file *file);
+
+/* default file operations
+ * used by drivers if they override only some operations
+ */
+int lirc_dev_fop_open(struct inode *inode, struct file *file);
+int lirc_dev_fop_close(struct inode *inode, struct file *file);
+unsigned int lirc_dev_fop_poll(struct file *file, poll_table *wait);
+int lirc_dev_fop_ioctl(struct inode *inode, struct file *file,
+		       unsigned int cmd, unsigned long arg);
+ssize_t lirc_dev_fop_read(struct file *file, char *buffer, size_t length,
+			  loff_t *ppos);
+ssize_t lirc_dev_fop_write(struct file *file, const char *buffer, size_t length,
+			   loff_t *ppos);
+
+#endif
diff --git a/include/media/lirc.h b/include/media/lirc.h
new file mode 100644
index 00000000000..8dffd4f47bf
--- /dev/null
+++ b/include/media/lirc.h
@@ -0,0 +1,163 @@
+/*
+ * lirc.h - linux infrared remote control header file
+ * last modified 2010/06/03 by Jarod Wilson
+ */
+
+#ifndef _LINUX_LIRC_H
+#define _LINUX_LIRC_H
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+
+#define PULSE_BIT       0x01000000
+#define PULSE_MASK      0x00FFFFFF
+
+#define LIRC_MODE2_SPACE     0x00000000
+#define LIRC_MODE2_PULSE     0x01000000
+#define LIRC_MODE2_FREQUENCY 0x02000000
+#define LIRC_MODE2_TIMEOUT   0x03000000
+
+#define LIRC_VALUE_MASK      0x00FFFFFF
+#define LIRC_MODE2_MASK      0xFF000000
+
+#define LIRC_SPACE(val) (((val)&LIRC_VALUE_MASK) | LIRC_MODE2_SPACE)
+#define LIRC_PULSE(val) (((val)&LIRC_VALUE_MASK) | LIRC_MODE2_PULSE)
+#define LIRC_FREQUENCY(val) (((val)&LIRC_VALUE_MASK) | LIRC_MODE2_FREQUENCY)
+#define LIRC_TIMEOUT(val) (((val)&LIRC_VALUE_MASK) | LIRC_MODE2_TIMEOUT)
+
+#define LIRC_VALUE(val) ((val)&LIRC_VALUE_MASK)
+#define LIRC_MODE2(val) ((val)&LIRC_MODE2_MASK)
+
+#define LIRC_IS_SPACE(val) (LIRC_MODE2(val) == LIRC_MODE2_SPACE)
+#define LIRC_IS_PULSE(val) (LIRC_MODE2(val) == LIRC_MODE2_PULSE)
+#define LIRC_IS_FREQUENCY(val) (LIRC_MODE2(val) == LIRC_MODE2_FREQUENCY)
+#define LIRC_IS_TIMEOUT(val) (LIRC_MODE2(val) == LIRC_MODE2_TIMEOUT)
+
+/*** lirc compatible hardware features ***/
+
+#define LIRC_MODE2SEND(x) (x)
+#define LIRC_SEND2MODE(x) (x)
+#define LIRC_MODE2REC(x) ((x) << 16)
+#define LIRC_REC2MODE(x) ((x) >> 16)
+
+#define LIRC_MODE_RAW                  0x00000001
+#define LIRC_MODE_PULSE                0x00000002
+#define LIRC_MODE_MODE2                0x00000004
+#define LIRC_MODE_LIRCCODE             0x00000010
+
+
+#define LIRC_CAN_SEND_RAW              LIRC_MODE2SEND(LIRC_MODE_RAW)
+#define LIRC_CAN_SEND_PULSE            LIRC_MODE2SEND(LIRC_MODE_PULSE)
+#define LIRC_CAN_SEND_MODE2            LIRC_MODE2SEND(LIRC_MODE_MODE2)
+#define LIRC_CAN_SEND_LIRCCODE         LIRC_MODE2SEND(LIRC_MODE_LIRCCODE)
+
+#define LIRC_CAN_SEND_MASK             0x0000003f
+
+#define LIRC_CAN_SET_SEND_CARRIER      0x00000100
+#define LIRC_CAN_SET_SEND_DUTY_CYCLE   0x00000200
+#define LIRC_CAN_SET_TRANSMITTER_MASK  0x00000400
+
+#define LIRC_CAN_REC_RAW               LIRC_MODE2REC(LIRC_MODE_RAW)
+#define LIRC_CAN_REC_PULSE             LIRC_MODE2REC(LIRC_MODE_PULSE)
+#define LIRC_CAN_REC_MODE2             LIRC_MODE2REC(LIRC_MODE_MODE2)
+#define LIRC_CAN_REC_LIRCCODE          LIRC_MODE2REC(LIRC_MODE_LIRCCODE)
+
+#define LIRC_CAN_REC_MASK              LIRC_MODE2REC(LIRC_CAN_SEND_MASK)
+
+#define LIRC_CAN_SET_REC_CARRIER       (LIRC_CAN_SET_SEND_CARRIER << 16)
+#define LIRC_CAN_SET_REC_DUTY_CYCLE    (LIRC_CAN_SET_SEND_DUTY_CYCLE << 16)
+
+#define LIRC_CAN_SET_REC_DUTY_CYCLE_RANGE 0x40000000
+#define LIRC_CAN_SET_REC_CARRIER_RANGE    0x80000000
+#define LIRC_CAN_GET_REC_RESOLUTION       0x20000000
+#define LIRC_CAN_SET_REC_TIMEOUT          0x10000000
+#define LIRC_CAN_SET_REC_FILTER           0x08000000
+
+#define LIRC_CAN_MEASURE_CARRIER          0x02000000
+
+#define LIRC_CAN_SEND(x) ((x)&LIRC_CAN_SEND_MASK)
+#define LIRC_CAN_REC(x) ((x)&LIRC_CAN_REC_MASK)
+
+#define LIRC_CAN_NOTIFY_DECODE            0x01000000
+
+/*** IOCTL commands for lirc driver ***/
+
+#define LIRC_GET_FEATURES              _IOR('i', 0x00000000, __u32)
+
+#define LIRC_GET_SEND_MODE             _IOR('i', 0x00000001, __u32)
+#define LIRC_GET_REC_MODE              _IOR('i', 0x00000002, __u32)
+#define LIRC_GET_SEND_CARRIER          _IOR('i', 0x00000003, __u32)
+#define LIRC_GET_REC_CARRIER           _IOR('i', 0x00000004, __u32)
+#define LIRC_GET_SEND_DUTY_CYCLE       _IOR('i', 0x00000005, __u32)
+#define LIRC_GET_REC_DUTY_CYCLE        _IOR('i', 0x00000006, __u32)
+#define LIRC_GET_REC_RESOLUTION        _IOR('i', 0x00000007, __u32)
+
+#define LIRC_GET_MIN_TIMEOUT           _IOR('i', 0x00000008, __u32)
+#define LIRC_GET_MAX_TIMEOUT           _IOR('i', 0x00000009, __u32)
+
+#if 0	/* these ioctls are not used at the moment */
+#define LIRC_GET_MIN_FILTER_PULSE      _IOR('i', 0x0000000a, __u32)
+#define LIRC_GET_MAX_FILTER_PULSE      _IOR('i', 0x0000000b, __u32)
+#define LIRC_GET_MIN_FILTER_SPACE      _IOR('i', 0x0000000c, __u32)
+#define LIRC_GET_MAX_FILTER_SPACE      _IOR('i', 0x0000000d, __u32)
+#endif
+
+/* code length in bits, currently only for LIRC_MODE_LIRCCODE */
+#define LIRC_GET_LENGTH                _IOR('i', 0x0000000f, __u32)
+
+#define LIRC_SET_SEND_MODE             _IOW('i', 0x00000011, __u32)
+#define LIRC_SET_REC_MODE              _IOW('i', 0x00000012, __u32)
+/* Note: these can reset the according pulse_width */
+#define LIRC_SET_SEND_CARRIER          _IOW('i', 0x00000013, __u32)
+#define LIRC_SET_REC_CARRIER           _IOW('i', 0x00000014, __u32)
+#define LIRC_SET_SEND_DUTY_CYCLE       _IOW('i', 0x00000015, __u32)
+#define LIRC_SET_REC_DUTY_CYCLE        _IOW('i', 0x00000016, __u32)
+#define LIRC_SET_TRANSMITTER_MASK      _IOW('i', 0x00000017, __u32)
+
+/*
+ * when a timeout != 0 is set the driver will send a
+ * LIRC_MODE2_TIMEOUT data packet, otherwise LIRC_MODE2_TIMEOUT is
+ * never sent, timeout is disabled by default
+ */
+#define LIRC_SET_REC_TIMEOUT           _IOW('i', 0x00000018, __u32)
+
+#if 0	/* these ioctls are not used at the moment */
+/*
+ * pulses shorter than this are filtered out by hardware (software
+ * emulation in lirc_dev?)
+ */
+#define LIRC_SET_REC_FILTER_PULSE      _IOW('i', 0x00000019, __u32)
+/*
+ * spaces shorter than this are filtered out by hardware (software
+ * emulation in lirc_dev?)
+ */
+#define LIRC_SET_REC_FILTER_SPACE      _IOW('i', 0x0000001a, __u32)
+/*
+ * if filter cannot be set independantly for pulse/space, this should
+ * be used
+ */
+#define LIRC_SET_REC_FILTER            _IOW('i', 0x0000001b, __u32)
+#endif
+
+/*
+ * to set a range use
+ * LIRC_SET_REC_DUTY_CYCLE_RANGE/LIRC_SET_REC_CARRIER_RANGE with the
+ * lower bound first and later
+ * LIRC_SET_REC_DUTY_CYCLE/LIRC_SET_REC_CARRIER with the upper bound
+ */
+
+#define LIRC_SET_REC_DUTY_CYCLE_RANGE  _IOW('i', 0x0000001e, __u32)
+#define LIRC_SET_REC_CARRIER_RANGE     _IOW('i', 0x0000001f, __u32)
+
+#define LIRC_NOTIFY_DECODE             _IO('i', 0x00000020)
+
+#if 0	/* these ioctls are not used at the moment */
+/*
+ * from the next key press on the driver will send
+ * LIRC_MODE2_FREQUENCY packets
+ */
+#define LIRC_MEASURE_CARRIER_ENABLE    _IO('i', 0x00000021)
+#define LIRC_MEASURE_CARRIER_DISABLE   _IO('i', 0x00000022)
+#endif
+
+#endif
-- 
cgit v1.2.3-70-g09d2


From ca4146985db7cbb97816e9b961b8db79e63d9e86 Mon Sep 17 00:00:00 2001
From: Jarod Wilson <jarod@redhat.com>
Date: Sat, 3 Jul 2010 01:07:53 -0300
Subject: V4L/DVB: IR: add ir-core to lirc userspace decoder bridge driver

v2: copy of buffer data from userspace done inside this plugin/driver,
keeping the actual drivers minimal, and more flexible in what we can
deliver to them later on (they may be fed from within kernelspace later
on, by an in-kernel IR encoder).

Signed-off-by: Jarod Wilson <jarod@redhat.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/IR/Kconfig         |  10 ++
 drivers/media/IR/Makefile        |   1 +
 drivers/media/IR/ir-core-priv.h  |  13 ++
 drivers/media/IR/ir-lirc-codec.c | 284 +++++++++++++++++++++++++++++++++++++++
 drivers/media/IR/ir-raw-event.c  |   1 +
 drivers/media/IR/ir-sysfs.c      |   1 +
 include/media/rc-map.h           |   4 +-
 7 files changed, 313 insertions(+), 1 deletion(-)
 create mode 100644 drivers/media/IR/ir-lirc-codec.c

(limited to 'include')

diff --git a/drivers/media/IR/Kconfig b/drivers/media/IR/Kconfig
index 5d2c37dcf11..40094c007ac 100644
--- a/drivers/media/IR/Kconfig
+++ b/drivers/media/IR/Kconfig
@@ -69,6 +69,16 @@ config IR_SONY_DECODER
 	   Enable this option if you have an infrared remote control which
 	   uses the Sony protocol, and you need software decoding support.
 
+config IR_LIRC_CODEC
+	tristate "Enable IR to LIRC bridge"
+	depends on IR_CORE
+	depends on LIRC
+	default y
+
+	---help---
+	   Enable this option to pass raw IR to and from userspace via
+	   the LIRC interface.
+
 config IR_IMON
 	tristate "SoundGraph iMON Receiver and Display"
 	depends on USB_ARCH_HAS_HCD
diff --git a/drivers/media/IR/Makefile b/drivers/media/IR/Makefile
index 3ba00bb8bea..2ae4f3abfdb 100644
--- a/drivers/media/IR/Makefile
+++ b/drivers/media/IR/Makefile
@@ -11,6 +11,7 @@ obj-$(CONFIG_IR_RC5_DECODER) += ir-rc5-decoder.o
 obj-$(CONFIG_IR_RC6_DECODER) += ir-rc6-decoder.o
 obj-$(CONFIG_IR_JVC_DECODER) += ir-jvc-decoder.o
 obj-$(CONFIG_IR_SONY_DECODER) += ir-sony-decoder.o
+obj-$(CONFIG_IR_LIRC_CODEC) += ir-lirc-codec.o
 
 # stand-alone IR receivers/transmitters
 obj-$(CONFIG_IR_IMON) += imon.o
diff --git a/drivers/media/IR/ir-core-priv.h b/drivers/media/IR/ir-core-priv.h
index 0a82b22d382..babd52061bc 100644
--- a/drivers/media/IR/ir-core-priv.h
+++ b/drivers/media/IR/ir-core-priv.h
@@ -73,6 +73,11 @@ struct ir_raw_event_ctrl {
 		bool first;
 		bool toggle;
 	} jvc;
+	struct lirc_codec {
+		struct ir_input_dev *ir_dev;
+		struct lirc_driver *drv;
+		int lircdata;
+	} lirc;
 };
 
 /* macros for IR decoders */
@@ -164,4 +169,12 @@ void ir_raw_init(void);
 #define load_sony_decode()	0
 #endif
 
+/* from ir-lirc-codec.c */
+#ifdef CONFIG_IR_LIRC_CODEC_MODULE
+#define load_lirc_codec()	request_module("ir-lirc-codec")
+#else
+#define load_lirc_codec()	0
+#endif
+
+
 #endif /* _IR_RAW_EVENT */
diff --git a/drivers/media/IR/ir-lirc-codec.c b/drivers/media/IR/ir-lirc-codec.c
new file mode 100644
index 00000000000..aff31d1b13d
--- /dev/null
+++ b/drivers/media/IR/ir-lirc-codec.c
@@ -0,0 +1,284 @@
+/* ir-lirc-codec.c - ir-core to classic lirc interface bridge
+ *
+ * Copyright (C) 2010 by Jarod Wilson <jarod@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation version 2 of the License.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ */
+
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <media/lirc.h>
+#include <media/ir-core.h>
+#include "ir-core-priv.h"
+#include "lirc_dev.h"
+
+#define LIRCBUF_SIZE 256
+
+/**
+ * ir_lirc_decode() - Send raw IR data to lirc_dev to be relayed to the
+ *		      lircd userspace daemon for decoding.
+ * @input_dev:	the struct input_dev descriptor of the device
+ * @duration:	the struct ir_raw_event descriptor of the pulse/space
+ *
+ * This function returns -EINVAL if the lirc interfaces aren't wired up.
+ */
+static int ir_lirc_decode(struct input_dev *input_dev, struct ir_raw_event ev)
+{
+	struct ir_input_dev *ir_dev = input_get_drvdata(input_dev);
+
+	if (!(ir_dev->raw->enabled_protocols & IR_TYPE_LIRC))
+		return 0;
+
+	if (!ir_dev->raw->lirc.drv || !ir_dev->raw->lirc.drv->rbuf)
+		return -EINVAL;
+
+	IR_dprintk(2, "LIRC data transfer started (%uus %s)\n",
+		   TO_US(ev.duration), TO_STR(ev.pulse));
+
+	ir_dev->raw->lirc.lircdata += ev.duration / 1000;
+	if (ev.pulse)
+		ir_dev->raw->lirc.lircdata |= PULSE_BIT;
+
+	lirc_buffer_write(ir_dev->raw->lirc.drv->rbuf,
+			  (unsigned char *) &ir_dev->raw->lirc.lircdata);
+	wake_up(&ir_dev->raw->lirc.drv->rbuf->wait_poll);
+
+	ir_dev->raw->lirc.lircdata = 0;
+
+	return 0;
+}
+
+static ssize_t ir_lirc_transmit_ir(struct file *file, const char *buf,
+				   size_t n, loff_t *ppos)
+{
+	struct lirc_codec *lirc;
+	struct ir_input_dev *ir_dev;
+	int *txbuf; /* buffer with values to transmit */
+	int ret = 0, count;
+
+	lirc = lirc_get_pdata(file);
+	if (!lirc)
+		return -EFAULT;
+
+	if (n % sizeof(int))
+		return -EINVAL;
+
+	count = n / sizeof(int);
+	if (count > LIRCBUF_SIZE || count % 2 == 0)
+		return -EINVAL;
+
+	txbuf = kzalloc(sizeof(int) * LIRCBUF_SIZE, GFP_KERNEL);
+	if (!txbuf)
+		return -ENOMEM;
+
+	if (copy_from_user(txbuf, buf, n)) {
+		ret = -EFAULT;
+		goto out;
+	}
+
+	ir_dev = lirc->ir_dev;
+	if (!ir_dev) {
+		ret = -EFAULT;
+		goto out;
+	}
+
+	if (ir_dev->props && ir_dev->props->tx_ir)
+		ret = ir_dev->props->tx_ir(ir_dev->props->priv, txbuf, (u32)n);
+
+out:
+	kfree(txbuf);
+	return ret;
+}
+
+static int ir_lirc_ioctl(struct inode *node, struct file *filep,
+			 unsigned int cmd, unsigned long arg)
+{
+	struct lirc_codec *lirc;
+	struct ir_input_dev *ir_dev;
+	int ret = 0;
+	void *drv_data;
+	unsigned long val;
+
+	lirc = lirc_get_pdata(filep);
+	if (!lirc)
+		return -EFAULT;
+
+	ir_dev = lirc->ir_dev;
+	if (!ir_dev || !ir_dev->props || !ir_dev->props->priv)
+		return -EFAULT;
+
+	drv_data = ir_dev->props->priv;
+
+	switch (cmd) {
+	case LIRC_SET_TRANSMITTER_MASK:
+		ret = get_user(val, (unsigned long *)arg);
+		if (ret)
+			return ret;
+
+		if (ir_dev->props && ir_dev->props->s_tx_mask)
+			ret = ir_dev->props->s_tx_mask(drv_data, (u32)val);
+		else
+			return -EINVAL;
+		break;
+
+	case LIRC_SET_SEND_CARRIER:
+		ret = get_user(val, (unsigned long *)arg);
+		if (ret)
+			return ret;
+
+		if (ir_dev->props && ir_dev->props->s_tx_carrier)
+			ir_dev->props->s_tx_carrier(drv_data, (u32)val);
+		else
+			return -EINVAL;
+		break;
+
+	case LIRC_GET_SEND_MODE:
+		val = LIRC_CAN_SEND_PULSE & LIRC_CAN_SEND_MASK;
+		ret = put_user(val, (unsigned long *)arg);
+		break;
+
+	case LIRC_SET_SEND_MODE:
+		ret = get_user(val, (unsigned long *)arg);
+		if (ret)
+			return ret;
+
+		if (val != (LIRC_MODE_PULSE & LIRC_CAN_SEND_MASK))
+			return -EINVAL;
+		break;
+
+	default:
+		return lirc_dev_fop_ioctl(node, filep, cmd, arg);
+	}
+
+	return ret;
+}
+
+static int ir_lirc_open(void *data)
+{
+	return 0;
+}
+
+static void ir_lirc_close(void *data)
+{
+	return;
+}
+
+static struct file_operations lirc_fops = {
+	.owner		= THIS_MODULE,
+	.write		= ir_lirc_transmit_ir,
+	.ioctl		= ir_lirc_ioctl,
+	.read		= lirc_dev_fop_read,
+	.poll		= lirc_dev_fop_poll,
+	.open		= lirc_dev_fop_open,
+	.release	= lirc_dev_fop_close,
+};
+
+static int ir_lirc_register(struct input_dev *input_dev)
+{
+	struct ir_input_dev *ir_dev = input_get_drvdata(input_dev);
+	struct lirc_driver *drv;
+	struct lirc_buffer *rbuf;
+	int rc = -ENOMEM;
+	unsigned long features;
+
+	drv = kzalloc(sizeof(struct lirc_driver), GFP_KERNEL);
+	if (!drv)
+		return rc;
+
+	rbuf = kzalloc(sizeof(struct lirc_buffer), GFP_KERNEL);
+	if (!drv)
+		goto rbuf_alloc_failed;
+
+	rc = lirc_buffer_init(rbuf, sizeof(int), LIRCBUF_SIZE);
+	if (rc)
+		goto rbuf_init_failed;
+
+	features = LIRC_CAN_REC_MODE2;
+	if (ir_dev->props->tx_ir) {
+		features |= LIRC_CAN_SEND_PULSE;
+		if (ir_dev->props->s_tx_mask)
+			features |= LIRC_CAN_SET_TRANSMITTER_MASK;
+		if (ir_dev->props->s_tx_carrier)
+			features |= LIRC_CAN_SET_SEND_CARRIER;
+	}
+
+	snprintf(drv->name, sizeof(drv->name), "ir-lirc-codec (%s)",
+		 ir_dev->driver_name);
+	drv->minor = -1;
+	drv->features = features;
+	drv->data = &ir_dev->raw->lirc;
+	drv->rbuf = rbuf;
+	drv->set_use_inc = &ir_lirc_open;
+	drv->set_use_dec = &ir_lirc_close;
+	drv->code_length = sizeof(struct ir_raw_event) * 8;
+	drv->fops = &lirc_fops;
+	drv->dev = &ir_dev->dev;
+	drv->owner = THIS_MODULE;
+
+	drv->minor = lirc_register_driver(drv);
+	if (drv->minor < 0) {
+		rc = -ENODEV;
+		goto lirc_register_failed;
+	}
+
+	ir_dev->raw->lirc.drv = drv;
+	ir_dev->raw->lirc.ir_dev = ir_dev;
+	ir_dev->raw->lirc.lircdata = PULSE_MASK;
+
+	return 0;
+
+lirc_register_failed:
+rbuf_init_failed:
+	kfree(rbuf);
+rbuf_alloc_failed:
+	kfree(drv);
+
+	return rc;
+}
+
+static int ir_lirc_unregister(struct input_dev *input_dev)
+{
+	struct ir_input_dev *ir_dev = input_get_drvdata(input_dev);
+	struct lirc_codec *lirc = &ir_dev->raw->lirc;
+
+	lirc_unregister_driver(lirc->drv->minor);
+	lirc_buffer_free(lirc->drv->rbuf);
+	kfree(lirc->drv);
+
+	return 0;
+}
+
+static struct ir_raw_handler lirc_handler = {
+	.protocols	= IR_TYPE_LIRC,
+	.decode		= ir_lirc_decode,
+	.raw_register	= ir_lirc_register,
+	.raw_unregister	= ir_lirc_unregister,
+};
+
+static int __init ir_lirc_codec_init(void)
+{
+	ir_raw_handler_register(&lirc_handler);
+
+	printk(KERN_INFO "IR LIRC bridge handler initialized\n");
+	return 0;
+}
+
+static void __exit ir_lirc_codec_exit(void)
+{
+	ir_raw_handler_unregister(&lirc_handler);
+}
+
+module_init(ir_lirc_codec_init);
+module_exit(ir_lirc_codec_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Jarod Wilson <jarod@redhat.com>");
+MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)");
+MODULE_DESCRIPTION("LIRC IR handler bridge");
diff --git a/drivers/media/IR/ir-raw-event.c b/drivers/media/IR/ir-raw-event.c
index 5f98ab82305..6f192ef31db 100644
--- a/drivers/media/IR/ir-raw-event.c
+++ b/drivers/media/IR/ir-raw-event.c
@@ -253,6 +253,7 @@ static void init_decoders(struct work_struct *work)
 	load_rc6_decode();
 	load_jvc_decode();
 	load_sony_decode();
+	load_lirc_codec();
 
 	/* If needed, we may later add some init code. In this case,
 	   it is needed to change the CONFIG_MODULE test at ir-core.h
diff --git a/drivers/media/IR/ir-sysfs.c b/drivers/media/IR/ir-sysfs.c
index f176fbff948..6273047e915 100644
--- a/drivers/media/IR/ir-sysfs.c
+++ b/drivers/media/IR/ir-sysfs.c
@@ -43,6 +43,7 @@ static struct {
 	{ IR_TYPE_RC6,		"rc-6"		},
 	{ IR_TYPE_JVC,		"jvc"		},
 	{ IR_TYPE_SONY,		"sony"		},
+	{ IR_TYPE_LIRC,		"lirc"		},
 };
 
 #define PROTO_NONE	"none"
diff --git a/include/media/rc-map.h b/include/media/rc-map.h
index 36ee280d42a..f982144685e 100644
--- a/include/media/rc-map.h
+++ b/include/media/rc-map.h
@@ -17,10 +17,12 @@
 #define IR_TYPE_RC6	(1  << 2)	/* Philips RC6 protocol */
 #define IR_TYPE_JVC	(1  << 3)	/* JVC protocol */
 #define IR_TYPE_SONY	(1  << 4)	/* Sony12/15/20 protocol */
+#define IR_TYPE_LIRC	(1  << 30)	/* Pass raw IR to lirc userspace */
 #define IR_TYPE_OTHER	(1u << 31)
 
 #define IR_TYPE_ALL (IR_TYPE_RC5 | IR_TYPE_NEC  | IR_TYPE_RC6  | \
-		     IR_TYPE_JVC | IR_TYPE_SONY | IR_TYPE_OTHER)
+		     IR_TYPE_JVC | IR_TYPE_SONY | IR_TYPE_LIRC | \
+		     IR_TYPE_OTHER)
 
 struct ir_scancode {
 	u32	scancode;
-- 
cgit v1.2.3-70-g09d2


From 30eb1be718a4753dd1912eb35af4cdaa25cefea9 Mon Sep 17 00:00:00 2001
From: Jarod Wilson <jarod@redhat.com>
Date: Fri, 2 Jul 2010 00:38:09 -0300
Subject: V4L/DVB: IR TX: incoming IR buffer now an int pointer

incoming IR buffer now an int pointer, and not fed from userspace

Signed-off-by: Jarod Wilson <jarod@redhat.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/media/ir-core.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/media/ir-core.h b/include/media/ir-core.h
index 9b957af2158..513e60dd101 100644
--- a/include/media/ir-core.h
+++ b/include/media/ir-core.h
@@ -61,7 +61,7 @@ struct ir_dev_props {
 	void			(*close)(void *priv);
 	int			(*s_tx_mask)(void *priv, u32 mask);
 	int			(*s_tx_carrier)(void *priv, u32 carrier);
-	int			(*tx_ir)(void *priv, const char *buf, u32 n);
+	int			(*tx_ir)(void *priv, int *txbuf, u32 n);
 };
 
 struct ir_input_dev {
-- 
cgit v1.2.3-70-g09d2


From 15f135d0cfc1ce762889bb804549da4081087597 Mon Sep 17 00:00:00 2001
From: Jarod Wilson <jarod@redhat.com>
Date: Sat, 3 Jul 2010 01:08:52 -0300
Subject: V4L/DVB: IR: add empty lirc pseudo-keymap

Signed-off-by: Jarod Wilson <jarod@redhat.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/IR/keymaps/Makefile  |  1 +
 drivers/media/IR/keymaps/rc-lirc.c | 41 ++++++++++++++++++++++++++++++++++++++
 include/media/rc-map.h             |  1 +
 3 files changed, 43 insertions(+)
 create mode 100644 drivers/media/IR/keymaps/rc-lirc.c

(limited to 'include')

diff --git a/drivers/media/IR/keymaps/Makefile b/drivers/media/IR/keymaps/Makefile
index c3def729d75..86d3d1f2eaa 100644
--- a/drivers/media/IR/keymaps/Makefile
+++ b/drivers/media/IR/keymaps/Makefile
@@ -37,6 +37,7 @@ obj-$(CONFIG_RC_MAP) += rc-adstech-dvb-t-pci.o \
 			rc-kaiomy.o \
 			rc-kworld-315u.o \
 			rc-kworld-plus-tv-analog.o \
+			rc-lirc.o \
 			rc-manli.o \
 			rc-msi-tvanywhere.o \
 			rc-msi-tvanywhere-plus.o \
diff --git a/drivers/media/IR/keymaps/rc-lirc.c b/drivers/media/IR/keymaps/rc-lirc.c
new file mode 100644
index 00000000000..43fcf903508
--- /dev/null
+++ b/drivers/media/IR/keymaps/rc-lirc.c
@@ -0,0 +1,41 @@
+/* rc-lirc.c - Empty dummy keytable, for use when its preferred to pass
+ * all raw IR data to the lirc userspace decoder.
+ *
+ * Copyright (c) 2010 by Jarod Wilson <jarod@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <media/ir-core.h>
+
+static struct ir_scancode lirc[] = {
+	{ },
+};
+
+static struct rc_keymap lirc_map = {
+	.map = {
+		.scan    = lirc,
+		.size    = ARRAY_SIZE(lirc),
+		.ir_type = IR_TYPE_LIRC,
+		.name    = RC_MAP_LIRC,
+	}
+};
+
+static int __init init_rc_map_lirc(void)
+{
+	return ir_register_map(&lirc_map);
+}
+
+static void __exit exit_rc_map_lirc(void)
+{
+	ir_unregister_map(&lirc_map);
+}
+
+module_init(init_rc_map_lirc)
+module_exit(exit_rc_map_lirc)
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Jarod Wilson <jarod@redhat.com>");
diff --git a/include/media/rc-map.h b/include/media/rc-map.h
index f982144685e..a329858c4b4 100644
--- a/include/media/rc-map.h
+++ b/include/media/rc-map.h
@@ -92,6 +92,7 @@ void rc_map_init(void);
 #define RC_MAP_KAIOMY                    "rc-kaiomy"
 #define RC_MAP_KWORLD_315U               "rc-kworld-315u"
 #define RC_MAP_KWORLD_PLUS_TV_ANALOG     "rc-kworld-plus-tv-analog"
+#define RC_MAP_LIRC                      "rc-lirc"
 #define RC_MAP_MANLI                     "rc-manli"
 #define RC_MAP_MSI_TVANYWHERE_PLUS       "rc-msi-tvanywhere-plus"
 #define RC_MAP_MSI_TVANYWHERE            "rc-msi-tvanywhere"
-- 
cgit v1.2.3-70-g09d2


From 33c38283f03d8ea0358229fc03c1beebe67aed0e Mon Sep 17 00:00:00 2001
From: Pawel Osciak <p.osciak@samsung.com>
Date: Tue, 11 May 2010 10:36:28 -0300
Subject: V4L/DVB: videobuf: rename videobuf_alloc to videobuf_alloc_vb

These functions allocate videobuf_buffer structures only. Renaming in order
to prevent confusion with functions allocating actual video buffer memory.

Rename the functions in videobuf-core.h videobuf-dma-sg.c as well.

Signed-off-by: Pawel Osciak <p.osciak@samsung.com>
Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/videobuf-core.c       | 14 +++++++-------
 drivers/media/video/videobuf-dma-contig.c |  4 ++--
 drivers/media/video/videobuf-dma-sg.c     |  6 +++---
 drivers/media/video/videobuf-vmalloc.c    |  4 ++--
 include/media/videobuf-core.h             |  4 ++--
 5 files changed, 16 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/drivers/media/video/videobuf-core.c b/drivers/media/video/videobuf-core.c
index 7d3378437de..4d565838795 100644
--- a/drivers/media/video/videobuf-core.c
+++ b/drivers/media/video/videobuf-core.c
@@ -52,18 +52,18 @@ MODULE_LICENSE("GPL");
 #define CALL(q, f, arg...)						\
 	((q->int_ops->f) ? q->int_ops->f(arg) : 0)
 
-struct videobuf_buffer *videobuf_alloc(struct videobuf_queue *q)
+struct videobuf_buffer *videobuf_alloc_vb(struct videobuf_queue *q)
 {
 	struct videobuf_buffer *vb;
 
 	BUG_ON(q->msize < sizeof(*vb));
 
-	if (!q->int_ops || !q->int_ops->alloc) {
+	if (!q->int_ops || !q->int_ops->alloc_vb) {
 		printk(KERN_ERR "No specific ops defined!\n");
 		BUG();
 	}
 
-	vb = q->int_ops->alloc(q->msize);
+	vb = q->int_ops->alloc_vb(q->msize);
 	if (NULL != vb) {
 		init_waitqueue_head(&vb->done);
 		vb->magic = MAGIC_BUFFER;
@@ -71,7 +71,7 @@ struct videobuf_buffer *videobuf_alloc(struct videobuf_queue *q)
 
 	return vb;
 }
-EXPORT_SYMBOL_GPL(videobuf_alloc);
+EXPORT_SYMBOL_GPL(videobuf_alloc_vb);
 
 #define WAITON_CONDITION (vb->state != VIDEOBUF_ACTIVE &&\
 				vb->state != VIDEOBUF_QUEUED)
@@ -359,7 +359,7 @@ int __videobuf_mmap_setup(struct videobuf_queue *q,
 
 	/* Allocate and initialize buffers */
 	for (i = 0; i < bcount; i++) {
-		q->bufs[i] = videobuf_alloc(q);
+		q->bufs[i] = videobuf_alloc_vb(q);
 
 		if (NULL == q->bufs[i])
 			break;
@@ -766,7 +766,7 @@ static ssize_t videobuf_read_zerocopy(struct videobuf_queue *q,
 	MAGIC_CHECK(q->int_ops->magic, MAGIC_QTYPE_OPS);
 
 	/* setup stuff */
-	q->read_buf = videobuf_alloc(q);
+	q->read_buf = videobuf_alloc_vb(q);
 	if (NULL == q->read_buf)
 		return -ENOMEM;
 
@@ -871,7 +871,7 @@ ssize_t videobuf_read_one(struct videobuf_queue *q,
 	if (NULL == q->read_buf) {
 		/* need to capture a new frame */
 		retval = -ENOMEM;
-		q->read_buf = videobuf_alloc(q);
+		q->read_buf = videobuf_alloc_vb(q);
 
 		dprintk(1, "video alloc=0x%p\n", q->read_buf);
 		if (NULL == q->read_buf)
diff --git a/drivers/media/video/videobuf-dma-contig.c b/drivers/media/video/videobuf-dma-contig.c
index 74730c624cf..98e292c3518 100644
--- a/drivers/media/video/videobuf-dma-contig.c
+++ b/drivers/media/video/videobuf-dma-contig.c
@@ -190,7 +190,7 @@ static int videobuf_dma_contig_user_get(struct videobuf_dma_contig_memory *mem,
 	return ret;
 }
 
-static struct videobuf_buffer *__videobuf_alloc(size_t size)
+static struct videobuf_buffer *__videobuf_alloc_vb(size_t size)
 {
 	struct videobuf_dma_contig_memory *mem;
 	struct videobuf_buffer *vb;
@@ -338,7 +338,7 @@ error:
 static struct videobuf_qtype_ops qops = {
 	.magic        = MAGIC_QTYPE_OPS,
 
-	.alloc        = __videobuf_alloc,
+	.alloc_vb     = __videobuf_alloc_vb,
 	.iolock       = __videobuf_iolock,
 	.mmap_mapper  = __videobuf_mmap_mapper,
 	.vaddr        = __videobuf_to_vaddr,
diff --git a/drivers/media/video/videobuf-dma-sg.c b/drivers/media/video/videobuf-dma-sg.c
index 8359e6badd3..a9b10917857 100644
--- a/drivers/media/video/videobuf-dma-sg.c
+++ b/drivers/media/video/videobuf-dma-sg.c
@@ -428,7 +428,7 @@ static const struct vm_operations_struct videobuf_vm_ops = {
 	struct videobuf_dma_sg_memory
  */
 
-static struct videobuf_buffer *__videobuf_alloc(size_t size)
+static struct videobuf_buffer *__videobuf_alloc_vb(size_t size)
 {
 	struct videobuf_dma_sg_memory *mem;
 	struct videobuf_buffer *vb;
@@ -638,7 +638,7 @@ done:
 static struct videobuf_qtype_ops sg_ops = {
 	.magic        = MAGIC_QTYPE_OPS,
 
-	.alloc        = __videobuf_alloc,
+	.alloc_vb     = __videobuf_alloc_vb,
 	.iolock       = __videobuf_iolock,
 	.sync         = __videobuf_sync,
 	.mmap_mapper  = __videobuf_mmap_mapper,
@@ -654,7 +654,7 @@ void *videobuf_sg_alloc(size_t size)
 
 	q.msize = size;
 
-	return videobuf_alloc(&q);
+	return videobuf_alloc_vb(&q);
 }
 EXPORT_SYMBOL_GPL(videobuf_sg_alloc);
 
diff --git a/drivers/media/video/videobuf-vmalloc.c b/drivers/media/video/videobuf-vmalloc.c
index 583728f4c22..cf5be6bfd74 100644
--- a/drivers/media/video/videobuf-vmalloc.c
+++ b/drivers/media/video/videobuf-vmalloc.c
@@ -135,7 +135,7 @@ static const struct vm_operations_struct videobuf_vm_ops = {
 	struct videobuf_dma_sg_memory
  */
 
-static struct videobuf_buffer *__videobuf_alloc(size_t size)
+static struct videobuf_buffer *__videobuf_alloc_vb(size_t size)
 {
 	struct videobuf_vmalloc_memory *mem;
 	struct videobuf_buffer *vb;
@@ -293,7 +293,7 @@ error:
 static struct videobuf_qtype_ops qops = {
 	.magic        = MAGIC_QTYPE_OPS,
 
-	.alloc        = __videobuf_alloc,
+	.alloc_vb     = __videobuf_alloc_vb,
 	.iolock       = __videobuf_iolock,
 	.mmap_mapper  = __videobuf_mmap_mapper,
 	.vaddr        = videobuf_to_vmalloc,
diff --git a/include/media/videobuf-core.h b/include/media/videobuf-core.h
index f91a736c133..a157cd166e6 100644
--- a/include/media/videobuf-core.h
+++ b/include/media/videobuf-core.h
@@ -127,7 +127,7 @@ struct videobuf_queue_ops {
 struct videobuf_qtype_ops {
 	u32                     magic;
 
-	struct videobuf_buffer *(*alloc)(size_t size);
+	struct videobuf_buffer *(*alloc_vb)(size_t size);
 	void *(*vaddr)		(struct videobuf_buffer *buf);
 	int (*iolock)		(struct videobuf_queue *q,
 				 struct videobuf_buffer *vb,
@@ -173,7 +173,7 @@ int videobuf_waiton(struct videobuf_buffer *vb, int non_blocking, int intr);
 int videobuf_iolock(struct videobuf_queue *q, struct videobuf_buffer *vb,
 		struct v4l2_framebuffer *fbuf);
 
-struct videobuf_buffer *videobuf_alloc(struct videobuf_queue *q);
+struct videobuf_buffer *videobuf_alloc_vb(struct videobuf_queue *q);
 
 /* Used on videobuf-dvb */
 void *videobuf_queue_to_vaddr(struct videobuf_queue *q,
-- 
cgit v1.2.3-70-g09d2


From 952684035a91334dbe33b15063514cab5e7c6907 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Date: Tue, 11 May 2010 10:36:30 -0300
Subject: V4L/DVB: videobuf: Remove the videobuf_sg_dma_map/unmap functions

Instead of creating dirty wrappers around videobuf_dma_map/unmap that
create a dummy videobuf_queue structure, modify videobuf_dma_map/unmap
to take a device pointer argument and use it directly. The
videobuf_sg_dma_map/unmap then become unused and can be removed.

Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/common/saa7146_fops.c        |  2 +-
 drivers/media/video/bt8xx/bttv-risc.c      |  2 +-
 drivers/media/video/cx23885/cx23885-core.c |  2 +-
 drivers/media/video/cx88/cx88-alsa.c       |  4 ++--
 drivers/media/video/cx88/cx88-core.c       |  2 +-
 drivers/media/video/omap24xxcam.c          |  2 +-
 drivers/media/video/pxa_camera.c           |  2 +-
 drivers/media/video/saa7134/saa7134-alsa.c | 10 +++++-----
 drivers/media/video/saa7134/saa7134-core.c |  2 +-
 drivers/media/video/videobuf-dma-sg.c      | 32 +++++-------------------------
 drivers/staging/cx25821/cx25821-alsa.c     |  4 ++--
 drivers/staging/cx25821/cx25821-core.c     |  2 +-
 include/media/videobuf-dma-sg.h            | 20 +++++++++++--------
 13 files changed, 34 insertions(+), 52 deletions(-)

(limited to 'include')

diff --git a/drivers/media/common/saa7146_fops.c b/drivers/media/common/saa7146_fops.c
index 7364b9642d0..4da2a54cb8b 100644
--- a/drivers/media/common/saa7146_fops.c
+++ b/drivers/media/common/saa7146_fops.c
@@ -57,7 +57,7 @@ void saa7146_dma_free(struct saa7146_dev *dev,struct videobuf_queue *q,
 	BUG_ON(in_interrupt());
 
 	videobuf_waiton(&buf->vb,0,0);
-	videobuf_dma_unmap(q, dma);
+	videobuf_dma_unmap(q->dev, dma);
 	videobuf_dma_free(dma);
 	buf->vb.state = VIDEOBUF_NEEDS_INIT;
 }
diff --git a/drivers/media/video/bt8xx/bttv-risc.c b/drivers/media/video/bt8xx/bttv-risc.c
index c24b1c100e1..0fa9f39f37a 100644
--- a/drivers/media/video/bt8xx/bttv-risc.c
+++ b/drivers/media/video/bt8xx/bttv-risc.c
@@ -583,7 +583,7 @@ bttv_dma_free(struct videobuf_queue *q,struct bttv *btv, struct bttv_buffer *buf
 
 	BUG_ON(in_interrupt());
 	videobuf_waiton(&buf->vb,0,0);
-	videobuf_dma_unmap(q, dma);
+	videobuf_dma_unmap(q->dev, dma);
 	videobuf_dma_free(dma);
 	btcx_riscmem_free(btv->c.pci,&buf->bottom);
 	btcx_riscmem_free(btv->c.pci,&buf->top);
diff --git a/drivers/media/video/cx23885/cx23885-core.c b/drivers/media/video/cx23885/cx23885-core.c
index 0dde57e96d3..161ae7316c9 100644
--- a/drivers/media/video/cx23885/cx23885-core.c
+++ b/drivers/media/video/cx23885/cx23885-core.c
@@ -1142,7 +1142,7 @@ void cx23885_free_buffer(struct videobuf_queue *q, struct cx23885_buffer *buf)
 
 	BUG_ON(in_interrupt());
 	videobuf_waiton(&buf->vb, 0, 0);
-	videobuf_dma_unmap(q, dma);
+	videobuf_dma_unmap(q->dev, dma);
 	videobuf_dma_free(dma);
 	btcx_riscmem_free(to_pci_dev(q->dev), &buf->risc);
 	buf->vb.state = VIDEOBUF_NEEDS_INIT;
diff --git a/drivers/media/video/cx88/cx88-alsa.c b/drivers/media/video/cx88/cx88-alsa.c
index 33082c96745..07fe905f657 100644
--- a/drivers/media/video/cx88/cx88-alsa.c
+++ b/drivers/media/video/cx88/cx88-alsa.c
@@ -283,7 +283,7 @@ static int dsp_buffer_free(snd_cx88_card_t *chip)
 	BUG_ON(!chip->dma_size);
 
 	dprintk(2,"Freeing buffer\n");
-	videobuf_sg_dma_unmap(&chip->pci->dev, chip->dma_risc);
+	videobuf_dma_unmap(&chip->pci->dev, chip->dma_risc);
 	videobuf_dma_free(chip->dma_risc);
 	btcx_riscmem_free(chip->pci,&chip->buf->risc);
 	kfree(chip->buf);
@@ -409,7 +409,7 @@ static int snd_cx88_hw_params(struct snd_pcm_substream * substream,
 	if (ret < 0)
 		goto error;
 
-	ret = videobuf_sg_dma_map(&chip->pci->dev, dma);
+	ret = videobuf_dma_map(&chip->pci->dev, dma);
 	if (ret < 0)
 		goto error;
 
diff --git a/drivers/media/video/cx88/cx88-core.c b/drivers/media/video/cx88/cx88-core.c
index 8b21457111b..85eb266fb35 100644
--- a/drivers/media/video/cx88/cx88-core.c
+++ b/drivers/media/video/cx88/cx88-core.c
@@ -218,7 +218,7 @@ cx88_free_buffer(struct videobuf_queue *q, struct cx88_buffer *buf)
 
 	BUG_ON(in_interrupt());
 	videobuf_waiton(&buf->vb,0,0);
-	videobuf_dma_unmap(q, dma);
+	videobuf_dma_unmap(q->dev, dma);
 	videobuf_dma_free(dma);
 	btcx_riscmem_free(to_pci_dev(q->dev), &buf->risc);
 	buf->vb.state = VIDEOBUF_NEEDS_INIT;
diff --git a/drivers/media/video/omap24xxcam.c b/drivers/media/video/omap24xxcam.c
index f85b2ed8a2d..926a5aa6f7f 100644
--- a/drivers/media/video/omap24xxcam.c
+++ b/drivers/media/video/omap24xxcam.c
@@ -426,7 +426,7 @@ static void omap24xxcam_vbq_release(struct videobuf_queue *vbq,
 			     dma->direction);
 		dma->direction = DMA_NONE;
 	} else {
-		videobuf_dma_unmap(vbq, videobuf_to_dma(vb));
+		videobuf_dma_unmap(vbq->dev, videobuf_to_dma(vb));
 		videobuf_dma_free(videobuf_to_dma(vb));
 	}
 
diff --git a/drivers/media/video/pxa_camera.c b/drivers/media/video/pxa_camera.c
index fb242f6cfb1..5835acf7fa7 100644
--- a/drivers/media/video/pxa_camera.c
+++ b/drivers/media/video/pxa_camera.c
@@ -276,7 +276,7 @@ static void free_buffer(struct videobuf_queue *vq, struct pxa_buffer *buf)
 	 * longer in STATE_QUEUED or STATE_ACTIVE
 	 */
 	videobuf_waiton(&buf->vb, 0, 0);
-	videobuf_dma_unmap(vq, dma);
+	videobuf_dma_unmap(vq->dev, dma);
 	videobuf_dma_free(dma);
 
 	for (i = 0; i < ARRAY_SIZE(buf->dmas); i++) {
diff --git a/drivers/media/video/saa7134/saa7134-alsa.c b/drivers/media/video/saa7134/saa7134-alsa.c
index d3bd82ad010..5bca2abb31e 100644
--- a/drivers/media/video/saa7134/saa7134-alsa.c
+++ b/drivers/media/video/saa7134/saa7134-alsa.c
@@ -630,7 +630,7 @@ static int snd_card_saa7134_hw_params(struct snd_pcm_substream * substream,
 	/* release the old buffer */
 	if (substream->runtime->dma_area) {
 		saa7134_pgtable_free(dev->pci, &dev->dmasound.pt);
-		videobuf_sg_dma_unmap(&dev->pci->dev, &dev->dmasound.dma);
+		videobuf_dma_unmap(&dev->pci->dev, &dev->dmasound.dma);
 		dsp_buffer_free(dev);
 		substream->runtime->dma_area = NULL;
 	}
@@ -646,12 +646,12 @@ static int snd_card_saa7134_hw_params(struct snd_pcm_substream * substream,
 		return err;
 	}
 
-	if (0 != (err = videobuf_sg_dma_map(&dev->pci->dev, &dev->dmasound.dma))) {
+	if (0 != (err = videobuf_dma_map(&dev->pci->dev, &dev->dmasound.dma))) {
 		dsp_buffer_free(dev);
 		return err;
 	}
 	if (0 != (err = saa7134_pgtable_alloc(dev->pci,&dev->dmasound.pt))) {
-		videobuf_sg_dma_unmap(&dev->pci->dev, &dev->dmasound.dma);
+		videobuf_dma_unmap(&dev->pci->dev, &dev->dmasound.dma);
 		dsp_buffer_free(dev);
 		return err;
 	}
@@ -660,7 +660,7 @@ static int snd_card_saa7134_hw_params(struct snd_pcm_substream * substream,
 						dev->dmasound.dma.sglen,
 						0))) {
 		saa7134_pgtable_free(dev->pci, &dev->dmasound.pt);
-		videobuf_sg_dma_unmap(&dev->pci->dev, &dev->dmasound.dma);
+		videobuf_dma_unmap(&dev->pci->dev, &dev->dmasound.dma);
 		dsp_buffer_free(dev);
 		return err;
 	}
@@ -696,7 +696,7 @@ static int snd_card_saa7134_hw_free(struct snd_pcm_substream * substream)
 
 	if (substream->runtime->dma_area) {
 		saa7134_pgtable_free(dev->pci, &dev->dmasound.pt);
-		videobuf_sg_dma_unmap(&dev->pci->dev, &dev->dmasound.dma);
+		videobuf_dma_unmap(&dev->pci->dev, &dev->dmasound.dma);
 		dsp_buffer_free(dev);
 		substream->runtime->dma_area = NULL;
 	}
diff --git a/drivers/media/video/saa7134/saa7134-core.c b/drivers/media/video/saa7134/saa7134-core.c
index 90f23188129..40bc635e8a3 100644
--- a/drivers/media/video/saa7134/saa7134-core.c
+++ b/drivers/media/video/saa7134/saa7134-core.c
@@ -256,7 +256,7 @@ void saa7134_dma_free(struct videobuf_queue *q,struct saa7134_buf *buf)
 	BUG_ON(in_interrupt());
 
 	videobuf_waiton(&buf->vb,0,0);
-	videobuf_dma_unmap(q, dma);
+	videobuf_dma_unmap(q->dev, dma);
 	videobuf_dma_free(dma);
 	buf->vb.state = VIDEOBUF_NEEDS_INIT;
 }
diff --git a/drivers/media/video/videobuf-dma-sg.c b/drivers/media/video/videobuf-dma-sg.c
index a9b10917857..17b1f89e813 100644
--- a/drivers/media/video/videobuf-dma-sg.c
+++ b/drivers/media/video/videobuf-dma-sg.c
@@ -235,7 +235,7 @@ int videobuf_dma_init_overlay(struct videobuf_dmabuf *dma, int direction,
 }
 EXPORT_SYMBOL_GPL(videobuf_dma_init_overlay);
 
-int videobuf_dma_map(struct videobuf_queue *q, struct videobuf_dmabuf *dma)
+int videobuf_dma_map(struct device *dev, struct videobuf_dmabuf *dma)
 {
 	MAGIC_CHECK(dma->magic, MAGIC_DMABUF);
 	BUG_ON(0 == dma->nr_pages);
@@ -263,7 +263,7 @@ int videobuf_dma_map(struct videobuf_queue *q, struct videobuf_dmabuf *dma)
 		return -ENOMEM;
 	}
 	if (!dma->bus_addr) {
-		dma->sglen = dma_map_sg(q->dev, dma->sglist,
+		dma->sglen = dma_map_sg(dev, dma->sglist,
 					dma->nr_pages, dma->direction);
 		if (0 == dma->sglen) {
 			printk(KERN_WARNING
@@ -279,14 +279,14 @@ int videobuf_dma_map(struct videobuf_queue *q, struct videobuf_dmabuf *dma)
 }
 EXPORT_SYMBOL_GPL(videobuf_dma_map);
 
-int videobuf_dma_unmap(struct videobuf_queue *q, struct videobuf_dmabuf *dma)
+int videobuf_dma_unmap(struct device *dev, struct videobuf_dmabuf *dma)
 {
 	MAGIC_CHECK(dma->magic, MAGIC_DMABUF);
 
 	if (!dma->sglen)
 		return 0;
 
-	dma_unmap_sg(q->dev, dma->sglist, dma->sglen, dma->direction);
+	dma_unmap_sg(dev, dma->sglist, dma->sglen, dma->direction);
 
 	vfree(dma->sglist);
 	dma->sglist = NULL;
@@ -322,28 +322,6 @@ EXPORT_SYMBOL_GPL(videobuf_dma_free);
 
 /* --------------------------------------------------------------------- */
 
-int videobuf_sg_dma_map(struct device *dev, struct videobuf_dmabuf *dma)
-{
-	struct videobuf_queue q;
-
-	q.dev = dev;
-
-	return videobuf_dma_map(&q, dma);
-}
-EXPORT_SYMBOL_GPL(videobuf_sg_dma_map);
-
-int videobuf_sg_dma_unmap(struct device *dev, struct videobuf_dmabuf *dma)
-{
-	struct videobuf_queue q;
-
-	q.dev = dev;
-
-	return videobuf_dma_unmap(&q, dma);
-}
-EXPORT_SYMBOL_GPL(videobuf_sg_dma_unmap);
-
-/* --------------------------------------------------------------------- */
-
 static void videobuf_vm_open(struct vm_area_struct *vma)
 {
 	struct videobuf_mapping *map = vma->vm_private_data;
@@ -520,7 +498,7 @@ static int __videobuf_iolock(struct videobuf_queue *q,
 	default:
 		BUG();
 	}
-	err = videobuf_dma_map(q, &mem->dma);
+	err = videobuf_dma_map(q->dev, &mem->dma);
 	if (0 != err)
 		return err;
 
diff --git a/drivers/staging/cx25821/cx25821-alsa.c b/drivers/staging/cx25821/cx25821-alsa.c
index 1798975a69b..4ce8790b05e 100644
--- a/drivers/staging/cx25821/cx25821-alsa.c
+++ b/drivers/staging/cx25821/cx25821-alsa.c
@@ -331,7 +331,7 @@ static int dsp_buffer_free(struct cx25821_audio_dev *chip)
 	BUG_ON(!chip->dma_size);
 
 	dprintk(2, "Freeing buffer\n");
-	videobuf_sg_dma_unmap(&chip->pci->dev, chip->dma_risc);
+	videobuf_dma_unmap(&chip->pci->dev, chip->dma_risc);
 	videobuf_dma_free(chip->dma_risc);
 	btcx_riscmem_free(chip->pci, &chip->buf->risc);
 	kfree(chip->buf);
@@ -470,7 +470,7 @@ static int snd_cx25821_hw_params(struct snd_pcm_substream *substream,
 	if (ret < 0)
 		goto error;
 
-	ret = videobuf_sg_dma_map(&chip->pci->dev, dma);
+	ret = videobuf_dma_map(&chip->pci->dev, dma);
 	if (ret < 0)
 		goto error;
 
diff --git a/drivers/staging/cx25821/cx25821-core.c b/drivers/staging/cx25821/cx25821-core.c
index be44195783d..c487c19256b 100644
--- a/drivers/staging/cx25821/cx25821-core.c
+++ b/drivers/staging/cx25821/cx25821-core.c
@@ -1320,7 +1320,7 @@ void cx25821_free_buffer(struct videobuf_queue *q, struct cx25821_buffer *buf)
 
 	BUG_ON(in_interrupt());
 	videobuf_waiton(&buf->vb, 0, 0);
-	videobuf_dma_unmap(q, dma);
+	videobuf_dma_unmap(q->dev, dma);
 	videobuf_dma_free(dma);
 	btcx_riscmem_free(to_pci_dev(q->dev), &buf->risc);
 	buf->vb.state = VIDEOBUF_NEEDS_INIT;
diff --git a/include/media/videobuf-dma-sg.h b/include/media/videobuf-dma-sg.h
index a195f3b9c00..80130100e45 100644
--- a/include/media/videobuf-dma-sg.h
+++ b/include/media/videobuf-dma-sg.h
@@ -87,6 +87,16 @@ struct videobuf_dma_sg_memory {
 	struct videobuf_dmabuf  dma;
 };
 
+/*
+ * Scatter-gather DMA buffer API.
+ *
+ * These functions provide a simple way to create a page list and a
+ * scatter-gather list from a kernel, userspace of physical address and map the
+ * memory for DMA operation.
+ *
+ * Despite the name, this is totally unrelated to videobuf, except that
+ * videobuf-dma-sg uses the same API internally.
+ */
 void videobuf_dma_init(struct videobuf_dmabuf *dma);
 int videobuf_dma_init_user(struct videobuf_dmabuf *dma, int direction,
 			   unsigned long data, unsigned long size);
@@ -96,8 +106,8 @@ int videobuf_dma_init_overlay(struct videobuf_dmabuf *dma, int direction,
 			      dma_addr_t addr, int nr_pages);
 int videobuf_dma_free(struct videobuf_dmabuf *dma);
 
-int videobuf_dma_map(struct videobuf_queue *q, struct videobuf_dmabuf *dma);
-int videobuf_dma_unmap(struct videobuf_queue *q, struct videobuf_dmabuf *dma);
+int videobuf_dma_map(struct device *dev, struct videobuf_dmabuf *dma);
+int videobuf_dma_unmap(struct device *dev, struct videobuf_dmabuf *dma);
 struct videobuf_dmabuf *videobuf_to_dma(struct videobuf_buffer *buf);
 
 void *videobuf_sg_alloc(size_t size);
@@ -111,11 +121,5 @@ void videobuf_queue_sg_init(struct videobuf_queue *q,
 			 unsigned int msize,
 			 void *priv);
 
-/*FIXME: these variants are used only on *-alsa code, where videobuf is
- * used without queue
- */
-int videobuf_sg_dma_map(struct device *dev, struct videobuf_dmabuf *dma);
-int videobuf_sg_dma_unmap(struct device *dev, struct videobuf_dmabuf *dma);
-
 #endif /* _VIDEOBUF_DMA_SG_H */
 
-- 
cgit v1.2.3-70-g09d2


From 7181772d8915e6025ee4f2f6c5b16064689646f0 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Date: Tue, 11 May 2010 10:36:32 -0300
Subject: V4L/DVB: videobuf: Don't export videobuf_(vmalloc|pages)_to_sg

Those functions are only called inside videobuf-dma-sg.c, make them
static.

Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/videobuf-dma-sg.c | 18 ++++++++++++++----
 include/media/videobuf-dma-sg.h       | 17 -----------------
 2 files changed, 14 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/drivers/media/video/videobuf-dma-sg.c b/drivers/media/video/videobuf-dma-sg.c
index 17b1f89e813..8924e51408c 100644
--- a/drivers/media/video/videobuf-dma-sg.c
+++ b/drivers/media/video/videobuf-dma-sg.c
@@ -57,7 +57,13 @@ MODULE_LICENSE("GPL");
 
 /* --------------------------------------------------------------------- */
 
-struct scatterlist *videobuf_vmalloc_to_sg(unsigned char *virt, int nr_pages)
+/*
+ * Return a scatterlist for some page-aligned vmalloc()'ed memory
+ * block (NULL on errors).  Memory for the scatterlist is allocated
+ * using kmalloc.  The caller must free the memory.
+ */
+static struct scatterlist *videobuf_vmalloc_to_sg(unsigned char *virt,
+						  int nr_pages)
 {
 	struct scatterlist *sglist;
 	struct page *pg;
@@ -81,10 +87,14 @@ err:
 	vfree(sglist);
 	return NULL;
 }
-EXPORT_SYMBOL_GPL(videobuf_vmalloc_to_sg);
 
-struct scatterlist *videobuf_pages_to_sg(struct page **pages, int nr_pages,
-					 int offset)
+/*
+ * Return a scatterlist for a an array of userpages (NULL on errors).
+ * Memory for the scatterlist is allocated using kmalloc.  The caller
+ * must free the memory.
+ */
+static struct scatterlist *videobuf_pages_to_sg(struct page **pages,
+						int nr_pages, int offset)
 {
 	struct scatterlist *sglist;
 	int i;
diff --git a/include/media/videobuf-dma-sg.h b/include/media/videobuf-dma-sg.h
index 80130100e45..913860e9c84 100644
--- a/include/media/videobuf-dma-sg.h
+++ b/include/media/videobuf-dma-sg.h
@@ -24,23 +24,6 @@
 
 /* --------------------------------------------------------------------- */
 
-/*
- * Return a scatterlist for some page-aligned vmalloc()'ed memory
- * block (NULL on errors).  Memory for the scatterlist is allocated
- * using kmalloc.  The caller must free the memory.
- */
-struct scatterlist *videobuf_vmalloc_to_sg(unsigned char *virt, int nr_pages);
-
-/*
- * Return a scatterlist for a an array of userpages (NULL on errors).
- * Memory for the scatterlist is allocated using kmalloc.  The caller
- * must free the memory.
- */
-struct scatterlist *videobuf_pages_to_sg(struct page **pages, int nr_pages,
-					 int offset);
-
-/* --------------------------------------------------------------------- */
-
 /*
  * A small set of helper functions to manage buffers (both userland
  * and kernel) for DMA.
-- 
cgit v1.2.3-70-g09d2


From 959794ddc05ab6fbcd458bc093e7f0b92633d052 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Date: Tue, 11 May 2010 10:36:33 -0300
Subject: V4L/DVB: videobuf: Remove videobuf_mapping start and end fields

The fields are assigned but never used, remove them.

Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/videobuf-dma-contig.c | 2 --
 drivers/media/video/videobuf-dma-sg.c     | 2 --
 drivers/media/video/videobuf-vmalloc.c    | 2 --
 include/media/videobuf-core.h             | 2 --
 4 files changed, 8 deletions(-)

(limited to 'include')

diff --git a/drivers/media/video/videobuf-dma-contig.c b/drivers/media/video/videobuf-dma-contig.c
index 98e292c3518..372b87efcd0 100644
--- a/drivers/media/video/videobuf-dma-contig.c
+++ b/drivers/media/video/videobuf-dma-contig.c
@@ -280,8 +280,6 @@ static int __videobuf_mmap_mapper(struct videobuf_queue *q,
 		return -ENOMEM;
 
 	buf->map = map;
-	map->start = vma->vm_start;
-	map->end = vma->vm_end;
 	map->q = q;
 
 	buf->baddr = vma->vm_start;
diff --git a/drivers/media/video/videobuf-dma-sg.c b/drivers/media/video/videobuf-dma-sg.c
index 8924e51408c..2d64040594b 100644
--- a/drivers/media/video/videobuf-dma-sg.c
+++ b/drivers/media/video/videobuf-dma-sg.c
@@ -608,8 +608,6 @@ static int __videobuf_mmap_mapper(struct videobuf_queue *q,
 	}
 
 	map->count    = 1;
-	map->start    = vma->vm_start;
-	map->end      = vma->vm_end;
 	map->q        = q;
 	vma->vm_ops   = &videobuf_vm_ops;
 	vma->vm_flags |= VM_DONTEXPAND | VM_RESERVED;
diff --git a/drivers/media/video/videobuf-vmalloc.c b/drivers/media/video/videobuf-vmalloc.c
index cf5be6bfd74..f0d7cb8d4c7 100644
--- a/drivers/media/video/videobuf-vmalloc.c
+++ b/drivers/media/video/videobuf-vmalloc.c
@@ -245,8 +245,6 @@ static int __videobuf_mmap_mapper(struct videobuf_queue *q,
 		return -ENOMEM;
 
 	buf->map = map;
-	map->start = vma->vm_start;
-	map->end   = vma->vm_end;
 	map->q     = q;
 
 	buf->baddr = vma->vm_start;
diff --git a/include/media/videobuf-core.h b/include/media/videobuf-core.h
index a157cd166e6..f2c41cebf45 100644
--- a/include/media/videobuf-core.h
+++ b/include/media/videobuf-core.h
@@ -54,8 +54,6 @@ struct videobuf_queue;
 
 struct videobuf_mapping {
 	unsigned int count;
-	unsigned long start;
-	unsigned long end;
 	struct videobuf_queue *q;
 };
 
-- 
cgit v1.2.3-70-g09d2


From bb6dbe74806a17bcec8396c57ca7fd9a889e3b27 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Date: Tue, 11 May 2010 10:36:34 -0300
Subject: V4L/DVB: videobuf: Rename vmalloc fields to vaddr

The videobuf_dmabuf and videobuf_vmalloc_memory fields have a vmalloc
field to store the kernel virtual address of vmalloc'ed buffers. Rename
the field to vaddr.

Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/cx88/cx88-alsa.c       |  2 +-
 drivers/media/video/saa7134/saa7134-alsa.c |  2 +-
 drivers/media/video/videobuf-dma-sg.c      | 18 +++++++++---------
 drivers/media/video/videobuf-vmalloc.c     | 30 +++++++++++++++---------------
 drivers/staging/cx25821/cx25821-alsa.c     |  2 +-
 include/media/videobuf-dma-sg.h            |  2 +-
 include/media/videobuf-vmalloc.h           |  2 +-
 7 files changed, 29 insertions(+), 29 deletions(-)

(limited to 'include')

diff --git a/drivers/media/video/cx88/cx88-alsa.c b/drivers/media/video/cx88/cx88-alsa.c
index ba499d04de4..9209d5b87e0 100644
--- a/drivers/media/video/cx88/cx88-alsa.c
+++ b/drivers/media/video/cx88/cx88-alsa.c
@@ -426,7 +426,7 @@ static int snd_cx88_hw_params(struct snd_pcm_substream * substream,
 	chip->buf = buf;
 	chip->dma_risc = dma;
 
-	substream->runtime->dma_area = chip->dma_risc->vmalloc;
+	substream->runtime->dma_area = chip->dma_risc->vaddr;
 	substream->runtime->dma_bytes = chip->dma_size;
 	substream->runtime->dma_addr = 0;
 	return 0;
diff --git a/drivers/media/video/saa7134/saa7134-alsa.c b/drivers/media/video/saa7134/saa7134-alsa.c
index 5bca2abb31e..68b7e8d10de 100644
--- a/drivers/media/video/saa7134/saa7134-alsa.c
+++ b/drivers/media/video/saa7134/saa7134-alsa.c
@@ -669,7 +669,7 @@ static int snd_card_saa7134_hw_params(struct snd_pcm_substream * substream,
 	   byte, but it doesn't work. So I allocate the DMA using the
 	   V4L functions, and force ALSA to use that as the DMA area */
 
-	substream->runtime->dma_area = dev->dmasound.dma.vmalloc;
+	substream->runtime->dma_area = dev->dmasound.dma.vaddr;
 	substream->runtime->dma_bytes = dev->dmasound.bufsize;
 	substream->runtime->dma_addr = 0;
 
diff --git a/drivers/media/video/videobuf-dma-sg.c b/drivers/media/video/videobuf-dma-sg.c
index 2d64040594b..06f9a9c2a39 100644
--- a/drivers/media/video/videobuf-dma-sg.c
+++ b/drivers/media/video/videobuf-dma-sg.c
@@ -211,17 +211,17 @@ int videobuf_dma_init_kernel(struct videobuf_dmabuf *dma, int direction,
 	dprintk(1, "init kernel [%d pages]\n", nr_pages);
 
 	dma->direction = direction;
-	dma->vmalloc = vmalloc_32(nr_pages << PAGE_SHIFT);
-	if (NULL == dma->vmalloc) {
+	dma->vaddr = vmalloc_32(nr_pages << PAGE_SHIFT);
+	if (NULL == dma->vaddr) {
 		dprintk(1, "vmalloc_32(%d pages) failed\n", nr_pages);
 		return -ENOMEM;
 	}
 
 	dprintk(1, "vmalloc is at addr 0x%08lx, size=%d\n",
-				(unsigned long)dma->vmalloc,
+				(unsigned long)dma->vaddr,
 				nr_pages << PAGE_SHIFT);
 
-	memset(dma->vmalloc, 0, nr_pages << PAGE_SHIFT);
+	memset(dma->vaddr, 0, nr_pages << PAGE_SHIFT);
 	dma->nr_pages = nr_pages;
 
 	return 0;
@@ -254,8 +254,8 @@ int videobuf_dma_map(struct device *dev, struct videobuf_dmabuf *dma)
 		dma->sglist = videobuf_pages_to_sg(dma->pages, dma->nr_pages,
 						   dma->offset);
 	}
-	if (dma->vmalloc) {
-		dma->sglist = videobuf_vmalloc_to_sg(dma->vmalloc,
+	if (dma->vaddr) {
+		dma->sglist = videobuf_vmalloc_to_sg(dma->vaddr,
 						     dma->nr_pages);
 	}
 	if (dma->bus_addr) {
@@ -319,8 +319,8 @@ int videobuf_dma_free(struct videobuf_dmabuf *dma)
 		dma->pages = NULL;
 	}
 
-	vfree(dma->vmalloc);
-	dma->vmalloc = NULL;
+	vfree(dma->vaddr);
+	dma->vaddr = NULL;
 
 	if (dma->bus_addr)
 		dma->bus_addr = 0;
@@ -444,7 +444,7 @@ static void *__videobuf_to_vaddr(struct videobuf_buffer *buf)
 
 	MAGIC_CHECK(mem->magic, MAGIC_SG_MEM);
 
-	return mem->dma.vmalloc;
+	return mem->dma.vaddr;
 }
 
 static int __videobuf_iolock(struct videobuf_queue *q,
diff --git a/drivers/media/video/videobuf-vmalloc.c b/drivers/media/video/videobuf-vmalloc.c
index f0d7cb8d4c7..e7fe31d54f0 100644
--- a/drivers/media/video/videobuf-vmalloc.c
+++ b/drivers/media/video/videobuf-vmalloc.c
@@ -102,10 +102,10 @@ static void videobuf_vm_close(struct vm_area_struct *vma)
 				   called with IRQ's disabled
 				 */
 				dprintk(1, "%s: buf[%d] freeing (%p)\n",
-					__func__, i, mem->vmalloc);
+					__func__, i, mem->vaddr);
 
-				vfree(mem->vmalloc);
-				mem->vmalloc = NULL;
+				vfree(mem->vaddr);
+				mem->vaddr = NULL;
 			}
 
 			q->bufs[i]->map   = NULL;
@@ -170,7 +170,7 @@ static int __videobuf_iolock(struct videobuf_queue *q,
 		dprintk(1, "%s memory method MMAP\n", __func__);
 
 		/* All handling should be done by __videobuf_mmap_mapper() */
-		if (!mem->vmalloc) {
+		if (!mem->vaddr) {
 			printk(KERN_ERR "memory is not alloced/mmapped.\n");
 			return -EINVAL;
 		}
@@ -189,13 +189,13 @@ static int __videobuf_iolock(struct videobuf_queue *q,
 		 * read() method.
 		 */
 
-		mem->vmalloc = vmalloc_user(pages);
-		if (!mem->vmalloc) {
+		mem->vaddr = vmalloc_user(pages);
+		if (!mem->vaddr) {
 			printk(KERN_ERR "vmalloc (%d pages) failed\n", pages);
 			return -ENOMEM;
 		}
 		dprintk(1, "vmalloc is at addr %p (%d pages)\n",
-			mem->vmalloc, pages);
+			mem->vaddr, pages);
 
 #if 0
 		int rc;
@@ -254,18 +254,18 @@ static int __videobuf_mmap_mapper(struct videobuf_queue *q,
 	MAGIC_CHECK(mem->magic, MAGIC_VMAL_MEM);
 
 	pages = PAGE_ALIGN(vma->vm_end - vma->vm_start);
-	mem->vmalloc = vmalloc_user(pages);
-	if (!mem->vmalloc) {
+	mem->vaddr = vmalloc_user(pages);
+	if (!mem->vaddr) {
 		printk(KERN_ERR "vmalloc (%d pages) failed\n", pages);
 		goto error;
 	}
-	dprintk(1, "vmalloc is at addr %p (%d pages)\n", mem->vmalloc, pages);
+	dprintk(1, "vmalloc is at addr %p (%d pages)\n", mem->vaddr, pages);
 
 	/* Try to remap memory */
-	retval = remap_vmalloc_range(vma, mem->vmalloc, 0);
+	retval = remap_vmalloc_range(vma, mem->vaddr, 0);
 	if (retval < 0) {
 		printk(KERN_ERR "mmap: remap failed with error %d. ", retval);
-		vfree(mem->vmalloc);
+		vfree(mem->vaddr);
 		goto error;
 	}
 
@@ -317,7 +317,7 @@ void *videobuf_to_vmalloc(struct videobuf_buffer *buf)
 	BUG_ON(!mem);
 	MAGIC_CHECK(mem->magic, MAGIC_VMAL_MEM);
 
-	return mem->vmalloc;
+	return mem->vaddr;
 }
 EXPORT_SYMBOL_GPL(videobuf_to_vmalloc);
 
@@ -339,8 +339,8 @@ void videobuf_vmalloc_free(struct videobuf_buffer *buf)
 
 	MAGIC_CHECK(mem->magic, MAGIC_VMAL_MEM);
 
-	vfree(mem->vmalloc);
-	mem->vmalloc = NULL;
+	vfree(mem->vaddr);
+	mem->vaddr = NULL;
 
 	return;
 }
diff --git a/drivers/staging/cx25821/cx25821-alsa.c b/drivers/staging/cx25821/cx25821-alsa.c
index 0771a6a313a..a43b18816fa 100644
--- a/drivers/staging/cx25821/cx25821-alsa.c
+++ b/drivers/staging/cx25821/cx25821-alsa.c
@@ -492,7 +492,7 @@ static int snd_cx25821_hw_params(struct snd_pcm_substream *substream,
 	chip->buf = buf;
 	chip->dma_risc = dma;
 
-	substream->runtime->dma_area = chip->dma_risc->vmalloc;
+	substream->runtime->dma_area = chip->dma_risc->vaddr;
 	substream->runtime->dma_bytes = chip->dma_size;
 	substream->runtime->dma_addr = 0;
 
diff --git a/include/media/videobuf-dma-sg.h b/include/media/videobuf-dma-sg.h
index 913860e9c84..97e07f46a0f 100644
--- a/include/media/videobuf-dma-sg.h
+++ b/include/media/videobuf-dma-sg.h
@@ -51,7 +51,7 @@ struct videobuf_dmabuf {
 	struct page         **pages;
 
 	/* for kernel buffers */
-	void                *vmalloc;
+	void                *vaddr;
 
 	/* for overlay buffers (pci-pci dma) */
 	dma_addr_t          bus_addr;
diff --git a/include/media/videobuf-vmalloc.h b/include/media/videobuf-vmalloc.h
index 851eb1a2ff2..e19403c18da 100644
--- a/include/media/videobuf-vmalloc.h
+++ b/include/media/videobuf-vmalloc.h
@@ -22,7 +22,7 @@
 struct videobuf_vmalloc_memory {
 	u32                 magic;
 
-	void                *vmalloc;
+	void                *vaddr;
 
 	/* remap_vmalloc_range seems to need to run
 	 * after mmap() on some cases */
-- 
cgit v1.2.3-70-g09d2


From 1b4e21c4f62eae6bdcb3e7bfdfc52171a24f3689 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Date: Thu, 17 Jun 2010 11:11:51 -0300
Subject: V4L/DVB: uvcvideo: Define control information bits using macros

Use the macros instead of hardcoding numerical constants for the
controls information bitfield.

Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/uvc/uvc_ctrl.c | 12 ++++++------
 include/linux/usb/video.h          |  7 +++++++
 2 files changed, 13 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/drivers/media/video/uvc/uvc_ctrl.c b/drivers/media/video/uvc/uvc_ctrl.c
index bd72100a21d..fa06cf512ec 100644
--- a/drivers/media/video/uvc/uvc_ctrl.c
+++ b/drivers/media/video/uvc/uvc_ctrl.c
@@ -1324,9 +1324,8 @@ static void uvc_ctrl_add_ctrl(struct uvc_device *dev,
 		/* Check if the device control information and length match
 		 * the user supplied information.
 		 */
-		__u32 flags;
 		__le16 size;
-		__u8 inf;
+		__u8 _info;
 
 		ret = uvc_query_ctrl(dev, UVC_GET_LEN, ctrl->entity->id,
 			dev->intfnum, info->selector, (__u8 *)&size, 2);
@@ -1345,7 +1344,7 @@ static void uvc_ctrl_add_ctrl(struct uvc_device *dev,
 		}
 
 		ret = uvc_query_ctrl(dev, UVC_GET_INFO, ctrl->entity->id,
-			dev->intfnum, info->selector, &inf, 1);
+				     dev->intfnum, info->selector, &_info, 1);
 		if (ret < 0) {
 			uvc_trace(UVC_TRACE_CONTROL,
 				"GET_INFO failed on control %pUl/%u (%d).\n",
@@ -1353,9 +1352,10 @@ static void uvc_ctrl_add_ctrl(struct uvc_device *dev,
 			return;
 		}
 
-		flags = info->flags;
-		if (((flags & UVC_CONTROL_GET_CUR) && !(inf & (1 << 0))) ||
-		    ((flags & UVC_CONTROL_SET_CUR) && !(inf & (1 << 1)))) {
+		if (((info->flags & UVC_CONTROL_GET_CUR) &&
+		    !(_info & UVC_CONTROL_CAP_GET)) ||
+		    ((info->flags & UVC_CONTROL_SET_CUR) &&
+		    !(_info & UVC_CONTROL_CAP_SET))) {
 			uvc_trace(UVC_TRACE_CONTROL, "Control %pUl/%u flags "
 				"don't match supported operations.\n",
 				info->entity, info->selector);
diff --git a/include/linux/usb/video.h b/include/linux/usb/video.h
index be436d9ee47..2d5b7fc6a26 100644
--- a/include/linux/usb/video.h
+++ b/include/linux/usb/video.h
@@ -160,5 +160,12 @@
 #define UVC_STATUS_TYPE_CONTROL				1
 #define UVC_STATUS_TYPE_STREAMING			2
 
+/* 4.1.2. Control Capabilities */
+#define UVC_CONTROL_CAP_GET				(1 << 0)
+#define UVC_CONTROL_CAP_SET				(1 << 1)
+#define UVC_CONTROL_CAP_DISABLED			(1 << 2)
+#define UVC_CONTROL_CAP_AUTOUPDATE			(1 << 3)
+#define UVC_CONTROL_CAP_ASYNCHRONOUS			(1 << 4)
+
 #endif /* __LINUX_USB_VIDEO_H */
 
-- 
cgit v1.2.3-70-g09d2


From 7751bdb3a095ad32dd4fcff3443cf8dd4cb1e748 Mon Sep 17 00:00:00 2001
From: Sripathi Kodi <sripathik@in.ibm.com>
Date: Fri, 4 Jun 2010 13:41:26 +0000
Subject: 9p: readdir implementation for 9p2000.L

This patch implements the kernel part of readdir() implementation for 9p2000.L

    Change from V3: Instead of inode, server now sends qids for each dirent

    SYNOPSIS

    size[4] Treaddir tag[2] fid[4] offset[8] count[4]
    size[4] Rreaddir tag[2] count[4] data[count]

    DESCRIPTION

    The readdir request asks the server to read the directory specified by 'fid'
    at an offset specified by 'offset' and return as many dirent structures as
    possible that fit into count bytes. Each dirent structure is laid out as
    follows.

            qid.type[1]
              the type of the file (directory, etc.), represented as a bit
              vector corresponding to the high 8 bits of the file's mode
              word.

            qid.vers[4]
              version number for given path

            qid.path[8]
              the file server's unique identification for the file

            offset[8]
              offset into the next dirent.

            type[1]
              type of this directory entry.

            name[256]
              name of this directory entry.

    This patch adds v9fs_dir_readdir_dotl() as the readdir() call for 9p2000.L.
    This function sends P9_TREADDIR command to the server. In response the server
    sends a buffer filled with dirent structures. This is different from the
    existing v9fs_dir_readdir() call which receives stat structures from the server.
    This results in significant speedup of readdir() on large directories.
    For example, doing 'ls >/dev/null' on a directory with 10000 files on my
    laptop takes 1.088 seconds with the existing code, but only takes 0.339 seconds
    with the new readdir.

Signed-off-by: Sripathi Kodi <sripathik@in.ibm.com>
Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 fs/9p/vfs_dir.c         | 134 ++++++++++++++++++++++++++++++++++++++++++------
 include/net/9p/9p.h     |  17 ++++++
 include/net/9p/client.h |  18 +++++++
 net/9p/client.c         |  47 +++++++++++++++++
 net/9p/protocol.c       |  27 ++++++++++
 5 files changed, 227 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index 36d961f342a..16c8a2a98c1 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -87,29 +87,19 @@ static void p9stat_init(struct p9_wstat *stbuf)
 }
 
 /**
- * v9fs_dir_readdir - read a directory
+ * v9fs_alloc_rdir_buf - Allocate buffer used for read and readdir
  * @filp: opened file structure
- * @dirent: directory structure ???
- * @filldir: function to populate directory structure ???
+ * @buflen: Length in bytes of buffer to allocate
  *
  */
 
-static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir)
+static int v9fs_alloc_rdir_buf(struct file *filp, int buflen)
 {
-	int over;
-	struct p9_wstat st;
-	int err = 0;
-	struct p9_fid *fid;
-	int buflen;
-	int reclen = 0;
 	struct p9_rdir *rdir;
+	struct p9_fid *fid;
+	int err = 0;
 
-	P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", filp->f_path.dentry->d_name.name);
 	fid = filp->private_data;
-
-	buflen = fid->clnt->msize - P9_IOHDRSZ;
-
-	/* allocate rdir on demand */
 	if (!fid->rdir) {
 		rdir = kmalloc(sizeof(struct p9_rdir) + buflen, GFP_KERNEL);
 
@@ -128,6 +118,36 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir)
 		spin_unlock(&filp->f_dentry->d_lock);
 		kfree(rdir);
 	}
+exit:
+	return err;
+}
+
+/**
+ * v9fs_dir_readdir - read a directory
+ * @filp: opened file structure
+ * @dirent: directory structure ???
+ * @filldir: function to populate directory structure ???
+ *
+ */
+
+static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir)
+{
+	int over;
+	struct p9_wstat st;
+	int err = 0;
+	struct p9_fid *fid;
+	int buflen;
+	int reclen = 0;
+	struct p9_rdir *rdir;
+
+	P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", filp->f_path.dentry->d_name.name);
+	fid = filp->private_data;
+
+	buflen = fid->clnt->msize - P9_IOHDRSZ;
+
+	err = v9fs_alloc_rdir_buf(filp, buflen);
+	if (err)
+		goto exit;
 	rdir = (struct p9_rdir *) fid->rdir;
 
 	err = mutex_lock_interruptible(&rdir->mutex);
@@ -176,6 +196,88 @@ exit:
 	return err;
 }
 
+/**
+ * v9fs_dir_readdir_dotl - read a directory
+ * @filp: opened file structure
+ * @dirent: buffer to fill dirent structures
+ * @filldir: function to populate dirent structures
+ *
+ */
+static int v9fs_dir_readdir_dotl(struct file *filp, void *dirent,
+						filldir_t filldir)
+{
+	int over;
+	int err = 0;
+	struct p9_fid *fid;
+	int buflen;
+	struct p9_rdir *rdir;
+	struct p9_dirent curdirent;
+	u64 oldoffset = 0;
+
+	P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", filp->f_path.dentry->d_name.name);
+	fid = filp->private_data;
+
+	buflen = fid->clnt->msize - P9_READDIRHDRSZ;
+
+	err = v9fs_alloc_rdir_buf(filp, buflen);
+	if (err)
+		goto exit;
+	rdir = (struct p9_rdir *) fid->rdir;
+
+	err = mutex_lock_interruptible(&rdir->mutex);
+	if (err)
+		return err;
+
+	while (err == 0) {
+		if (rdir->tail == rdir->head) {
+			err = p9_client_readdir(fid, rdir->buf, buflen,
+								filp->f_pos);
+			if (err <= 0)
+				goto unlock_and_exit;
+
+			rdir->head = 0;
+			rdir->tail = err;
+		}
+
+		while (rdir->head < rdir->tail) {
+
+			err = p9dirent_read(rdir->buf + rdir->head,
+						buflen - rdir->head, &curdirent,
+						fid->clnt->proto_version);
+			if (err < 0) {
+				P9_DPRINTK(P9_DEBUG_VFS, "returned %d\n", err);
+				err = -EIO;
+				goto unlock_and_exit;
+			}
+
+			/* d_off in dirent structure tracks the offset into
+			 * the next dirent in the dir. However, filldir()
+			 * expects offset into the current dirent. Hence
+			 * while calling filldir send the offset from the
+			 * previous dirent structure.
+			 */
+			over = filldir(dirent, curdirent.d_name,
+					strlen(curdirent.d_name),
+					oldoffset, v9fs_qid2ino(&curdirent.qid),
+					curdirent.d_type);
+			oldoffset = curdirent.d_off;
+
+			if (over) {
+				err = 0;
+				goto unlock_and_exit;
+			}
+
+			filp->f_pos = curdirent.d_off;
+			rdir->head += err;
+		}
+	}
+
+unlock_and_exit:
+	mutex_unlock(&rdir->mutex);
+exit:
+	return err;
+}
+
 
 /**
  * v9fs_dir_release - close a directory
@@ -207,7 +309,7 @@ const struct file_operations v9fs_dir_operations = {
 const struct file_operations v9fs_dir_operations_dotl = {
 	.read = generic_read_dir,
 	.llseek = generic_file_llseek,
-	.readdir = v9fs_dir_readdir,
+	.readdir = v9fs_dir_readdir_dotl,
 	.open = v9fs_file_open,
 	.release = v9fs_dir_release,
 };
diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h
index 156c26bb8bd..f1b0b310265 100644
--- a/include/net/9p/9p.h
+++ b/include/net/9p/9p.h
@@ -133,6 +133,8 @@ enum p9_msg_t {
 	P9_RSTATFS,
 	P9_TRENAME = 20,
 	P9_RRENAME,
+	P9_TREADDIR = 40,
+	P9_RREADDIR,
 	P9_TVERSION = 100,
 	P9_RVERSION,
 	P9_TAUTH = 102,
@@ -275,6 +277,9 @@ enum p9_qid_t {
 /* ample room for Twrite/Rread header */
 #define P9_IOHDRSZ	24
 
+/* Room for readdir header */
+#define P9_READDIRHDRSZ	24
+
 /**
  * struct p9_str - length prefixed string type
  * @len: length of the string
@@ -485,6 +490,18 @@ struct p9_rwrite {
 	u32 count;
 };
 
+struct p9_treaddir {
+	u32 fid;
+	u64 offset;
+	u32 count;
+};
+
+struct p9_rreaddir {
+	u32 count;
+	u8 *data;
+};
+
+
 struct p9_tclunk {
 	u32 fid;
 };
diff --git a/include/net/9p/client.h b/include/net/9p/client.h
index 7dd3ed85c78..2ec93685e6d 100644
--- a/include/net/9p/client.h
+++ b/include/net/9p/client.h
@@ -195,6 +195,21 @@ struct p9_fid {
 	struct list_head dlist;	/* list of all fids attached to a dentry */
 };
 
+/**
+ * struct p9_dirent - directory entry structure
+ * @qid: The p9 server qid for this dirent
+ * @d_off: offset to the next dirent
+ * @d_type: type of file
+ * @d_name: file name
+ */
+
+struct p9_dirent {
+	struct p9_qid qid;
+	u64 d_off;
+	unsigned char d_type;
+	char d_name[256];
+};
+
 int p9_client_statfs(struct p9_fid *fid, struct p9_rstatfs *sb);
 int p9_client_rename(struct p9_fid *fid, struct p9_fid *newdirfid, char *name);
 int p9_client_version(struct p9_client *);
@@ -217,6 +232,9 @@ int p9_client_read(struct p9_fid *fid, char *data, char __user *udata,
 							u64 offset, u32 count);
 int p9_client_write(struct p9_fid *fid, char *data, const char __user *udata,
 							u64 offset, u32 count);
+int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset);
+int p9dirent_read(char *buf, int len, struct p9_dirent *dirent,
+							int proto_version);
 struct p9_wstat *p9_client_stat(struct p9_fid *fid);
 int p9_client_wstat(struct p9_fid *fid, struct p9_wstat *wst);
 
diff --git a/net/9p/client.c b/net/9p/client.c
index 37c8da07a80..a80357483a4 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -1432,3 +1432,50 @@ error:
 }
 EXPORT_SYMBOL(p9_client_rename);
 
+int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset)
+{
+	int err, rsize, total;
+	struct p9_client *clnt;
+	struct p9_req_t *req;
+	char *dataptr;
+
+	P9_DPRINTK(P9_DEBUG_9P, ">>> TREADDIR fid %d offset %llu count %d\n",
+				fid->fid, (long long unsigned) offset, count);
+
+	err = 0;
+	clnt = fid->clnt;
+	total = 0;
+
+	rsize = fid->iounit;
+	if (!rsize || rsize > clnt->msize-P9_READDIRHDRSZ)
+		rsize = clnt->msize - P9_READDIRHDRSZ;
+
+	if (count < rsize)
+		rsize = count;
+
+	req = p9_client_rpc(clnt, P9_TREADDIR, "dqd", fid->fid, offset, rsize);
+	if (IS_ERR(req)) {
+		err = PTR_ERR(req);
+		goto error;
+	}
+
+	err = p9pdu_readf(req->rc, clnt->proto_version, "D", &count, &dataptr);
+	if (err) {
+		p9pdu_dump(1, req->rc);
+		goto free_and_error;
+	}
+
+	P9_DPRINTK(P9_DEBUG_9P, "<<< RREADDIR count %d\n", count);
+
+	if (data)
+		memmove(data, dataptr, count);
+
+	p9_free_req(clnt, req);
+	return count;
+
+free_and_error:
+	p9_free_req(clnt, req);
+error:
+	return err;
+}
+EXPORT_SYMBOL(p9_client_readdir);
diff --git a/net/9p/protocol.c b/net/9p/protocol.c
index 149f8216013..b645c826353 100644
--- a/net/9p/protocol.c
+++ b/net/9p/protocol.c
@@ -580,3 +580,30 @@ void p9pdu_reset(struct p9_fcall *pdu)
 	pdu->offset = 0;
 	pdu->size = 0;
 }
+
+int p9dirent_read(char *buf, int len, struct p9_dirent *dirent,
+						int proto_version)
+{
+	struct p9_fcall fake_pdu;
+	int ret;
+	char *nameptr;
+
+	fake_pdu.size = len;
+	fake_pdu.capacity = len;
+	fake_pdu.sdata = buf;
+	fake_pdu.offset = 0;
+
+	ret = p9pdu_readf(&fake_pdu, proto_version, "Qqbs", &dirent->qid,
+			&dirent->d_off, &dirent->d_type, &nameptr);
+	if (ret) {
+		P9_DPRINTK(P9_DEBUG_9P, "<<< p9dirent_read failed: %d\n", ret);
+		p9pdu_dump(1, &fake_pdu);
+		goto out;
+	}
+
+	strcpy(dirent->d_name, nameptr);
+
+out:
+	return fake_pdu.offset;
+}
+EXPORT_SYMBOL(p9dirent_read);
-- 
cgit v1.2.3-70-g09d2


From f085312204f384a0277a66c3c48ba8f9edcd58f2 Mon Sep 17 00:00:00 2001
From: Sripathi Kodi <sripathik@in.ibm.com>
Date: Mon, 12 Jul 2010 20:07:23 +0530
Subject: 9p: getattr client implementation for 9P2000.L protocol.

        SYNOPSIS

              size[4] Tgetattr tag[2] fid[4] request_mask[8]

              size[4] Rgetattr tag[2] lstat[n]

           DESCRIPTION

              The getattr transaction inquires about the file identified by fid.
              request_mask is a bit mask that specifies which fields of the
              stat structure is the client interested in.

              The reply will contain a machine-independent directory entry,
              laid out as follows:

                 st_result_mask[8]
                    Bit mask that indicates which fields in the stat structure
                    have been populated by the server

                 qid.type[1]
                    the type of the file (directory, etc.), represented as a bit
                    vector corresponding to the high 8 bits of the file's mode
                    word.

                 qid.vers[4]
                    version number for given path

                 qid.path[8]
                    the file server's unique identification for the file

                 st_mode[4]
                    Permission and flags

                 st_uid[4]
                    User id of owner

                 st_gid[4]
                    Group ID of owner

                 st_nlink[8]
                    Number of hard links

                 st_rdev[8]
                    Device ID (if special file)

                 st_size[8]
                    Size, in bytes

                 st_blksize[8]
                    Block size for file system IO

                 st_blocks[8]
                    Number of file system blocks allocated

                 st_atime_sec[8]
                    Time of last access, seconds

                 st_atime_nsec[8]
                    Time of last access, nanoseconds

                 st_mtime_sec[8]
                    Time of last modification, seconds

                 st_mtime_nsec[8]
                    Time of last modification, nanoseconds

                 st_ctime_sec[8]
                    Time of last status change, seconds

                 st_ctime_nsec[8]
                    Time of last status change, nanoseconds

                 st_btime_sec[8]
                    Time of creation (birth) of file, seconds

                 st_btime_nsec[8]
                    Time of creation (birth) of file, nanoseconds

                 st_gen[8]
                    Inode generation

                 st_data_version[8]
                    Data version number

              request_mask and result_mask bit masks contain the following bits
                 #define P9_STATS_MODE          0x00000001ULL
                 #define P9_STATS_NLINK         0x00000002ULL
                 #define P9_STATS_UID           0x00000004ULL
                 #define P9_STATS_GID           0x00000008ULL
                 #define P9_STATS_RDEV          0x00000010ULL
                 #define P9_STATS_ATIME         0x00000020ULL
                 #define P9_STATS_MTIME         0x00000040ULL
                 #define P9_STATS_CTIME         0x00000080ULL
                 #define P9_STATS_INO           0x00000100ULL
                 #define P9_STATS_SIZE          0x00000200ULL
                 #define P9_STATS_BLOCKS        0x00000400ULL

                 #define P9_STATS_BTIME         0x00000800ULL
                 #define P9_STATS_GEN           0x00001000ULL
                 #define P9_STATS_DATA_VERSION  0x00002000ULL

                 #define P9_STATS_BASIC         0x000007ffULL
                 #define P9_STATS_ALL           0x00003fffULL

        This patch implements the client side of getattr implementation for
        9P2000.L. It introduces a new structure p9_stat_dotl for getting
        Linux stat information along with QID. The data layout is similar to
        stat structure in Linux user space with the following major
        differences:

        inode (st_ino) is not part of data. Instead qid is.

        device (st_dev) is not part of data because this doesn't make sense
        on the client.

        All time variables are 64 bit wide on the wire. The kernel seems to use
        32 bit variables for these variables. However, some of the architectures
        have used 64 bit variables and glibc exposes 64 bit variables to user
        space on some architectures. Hence to be on the safer side we have made
        these 64 bit in the protocol. Refer to the comments in
        include/asm-generic/stat.h

        There are some additional fields: st_btime_sec, st_btime_nsec, st_gen,
        st_data_version apart from the bitmask, st_result_mask. The bit mask
        is filled by the server to indicate which stat fields have been
        populated by the server. Currently there is no clean way for the
        server to obtain these additional fields, so it sends back just the
        basic fields.

Signed-off-by: Sripathi Kodi <sripathik@in.ibm.com>
Signed-off-by: Eric Van Hensbegren <ericvh@gmail.com>
---
 fs/9p/v9fs_vfs.h        |   1 +
 fs/9p/vfs_inode.c       | 177 +++++++++++++++++++++++++++++++++++++++++++-----
 fs/9p/vfs_super.c       |  43 +++++++-----
 include/net/9p/9p.h     |  44 ++++++++++++
 include/net/9p/client.h |   3 +
 net/9p/client.c         |  59 ++++++++++++++++
 net/9p/protocol.c       |  28 ++++++++
 7 files changed, 321 insertions(+), 34 deletions(-)

(limited to 'include')

diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
index 32ef4009d03..f47c6bbb01b 100644
--- a/fs/9p/v9fs_vfs.h
+++ b/fs/9p/v9fs_vfs.h
@@ -55,6 +55,7 @@ struct inode *v9fs_get_inode(struct super_block *sb, int mode);
 void v9fs_clear_inode(struct inode *inode);
 ino_t v9fs_qid2ino(struct p9_qid *qid);
 void v9fs_stat2inode(struct p9_wstat *, struct inode *, struct super_block *);
+void v9fs_stat2inode_dotl(struct p9_stat_dotl *, struct inode *);
 int v9fs_dir_release(struct inode *inode, struct file *filp);
 int v9fs_file_open(struct inode *inode, struct file *file);
 void v9fs_inode2stat(struct inode *inode, struct p9_wstat *stat);
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 4331b3b5ee1..afcb8d88938 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -396,23 +396,14 @@ void v9fs_clear_inode(struct inode *inode)
 #endif
 }
 
-/**
- * v9fs_inode_from_fid - populate an inode by issuing a attribute request
- * @v9ses: session information
- * @fid: fid to issue attribute request for
- * @sb: superblock on which to create inode
- *
- */
-
 static struct inode *
-v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid,
+v9fs_inode(struct v9fs_session_info *v9ses, struct p9_fid *fid,
 	struct super_block *sb)
 {
 	int err, umode;
-	struct inode *ret;
+	struct inode *ret = NULL;
 	struct p9_wstat *st;
 
-	ret = NULL;
 	st = p9_client_stat(fid);
 	if (IS_ERR(st))
 		return ERR_CAST(st);
@@ -433,15 +424,62 @@ v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid,
 #endif
 	p9stat_free(st);
 	kfree(st);
-
 	return ret;
-
 error:
 	p9stat_free(st);
 	kfree(st);
 	return ERR_PTR(err);
 }
 
+static struct inode *
+v9fs_inode_dotl(struct v9fs_session_info *v9ses, struct p9_fid *fid,
+	struct super_block *sb)
+{
+	struct inode *ret = NULL;
+	int err;
+	struct p9_stat_dotl *st;
+
+	st = p9_client_getattr_dotl(fid, P9_STATS_BASIC);
+	if (IS_ERR(st))
+		return ERR_CAST(st);
+
+	ret = v9fs_get_inode(sb, st->st_mode);
+	if (IS_ERR(ret)) {
+		err = PTR_ERR(ret);
+		goto error;
+	}
+
+	v9fs_stat2inode_dotl(st, ret);
+	ret->i_ino = v9fs_qid2ino(&st->qid);
+#ifdef CONFIG_9P_FSCACHE
+	v9fs_vcookie_set_qid(ret, &st->qid);
+	v9fs_cache_inode_get_cookie(ret);
+#endif
+	kfree(st);
+	return ret;
+error:
+	kfree(st);
+	return ERR_PTR(err);
+}
+
+/**
+ * v9fs_inode_from_fid - Helper routine to populate an inode by
+ * issuing a attribute request
+ * @v9ses: session information
+ * @fid: fid to issue attribute request for
+ * @sb: superblock on which to create inode
+ *
+ */
+static inline struct inode *
+v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid,
+			struct super_block *sb)
+{
+	if (v9fs_proto_dotl(v9ses))
+		return v9fs_inode_dotl(v9ses, fid, sb);
+	else
+		return v9fs_inode(v9ses, fid, sb);
+}
+
 /**
  * v9fs_remove - helper function to remove files and directories
  * @dir: directory inode that is being deleted
@@ -853,6 +891,42 @@ v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
 	return 0;
 }
 
+static int
+v9fs_vfs_getattr_dotl(struct vfsmount *mnt, struct dentry *dentry,
+		 struct kstat *stat)
+{
+	int err;
+	struct v9fs_session_info *v9ses;
+	struct p9_fid *fid;
+	struct p9_stat_dotl *st;
+
+	P9_DPRINTK(P9_DEBUG_VFS, "dentry: %p\n", dentry);
+	err = -EPERM;
+	v9ses = v9fs_inode2v9ses(dentry->d_inode);
+	if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE)
+		return simple_getattr(mnt, dentry, stat);
+
+	fid = v9fs_fid_lookup(dentry);
+	if (IS_ERR(fid))
+		return PTR_ERR(fid);
+
+	/* Ask for all the fields in stat structure. Server will return
+	 * whatever it supports
+	 */
+
+	st = p9_client_getattr_dotl(fid, P9_STATS_ALL);
+	if (IS_ERR(st))
+		return PTR_ERR(st);
+
+	v9fs_stat2inode_dotl(st, dentry->d_inode);
+	generic_fillattr(dentry->d_inode, stat);
+	/* Change block size to what the server returned */
+	stat->blksize = st->st_blksize;
+
+	kfree(st);
+	return 0;
+}
+
 /**
  * v9fs_vfs_setattr - set file metadata
  * @dentry: file whose metadata to set
@@ -979,6 +1053,77 @@ v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
 	inode->i_blocks = (i_size_read(inode) + 512 - 1) >> 9;
 }
 
+/**
+ * v9fs_stat2inode_dotl - populate an inode structure with stat info
+ * @stat: stat structure
+ * @inode: inode to populate
+ * @sb: superblock of filesystem
+ *
+ */
+
+void
+v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode)
+{
+
+	if ((stat->st_result_mask & P9_STATS_BASIC) == P9_STATS_BASIC) {
+		inode->i_atime.tv_sec = stat->st_atime_sec;
+		inode->i_atime.tv_nsec = stat->st_atime_nsec;
+		inode->i_mtime.tv_sec = stat->st_mtime_sec;
+		inode->i_mtime.tv_nsec = stat->st_mtime_nsec;
+		inode->i_ctime.tv_sec = stat->st_ctime_sec;
+		inode->i_ctime.tv_nsec = stat->st_ctime_nsec;
+		inode->i_uid = stat->st_uid;
+		inode->i_gid = stat->st_gid;
+		inode->i_nlink = stat->st_nlink;
+		inode->i_mode = stat->st_mode;
+		inode->i_rdev = new_decode_dev(stat->st_rdev);
+
+		if ((S_ISBLK(inode->i_mode)) || (S_ISCHR(inode->i_mode)))
+			init_special_inode(inode, inode->i_mode, inode->i_rdev);
+
+		i_size_write(inode, stat->st_size);
+		inode->i_blocks = stat->st_blocks;
+	} else {
+		if (stat->st_result_mask & P9_STATS_ATIME) {
+			inode->i_atime.tv_sec = stat->st_atime_sec;
+			inode->i_atime.tv_nsec = stat->st_atime_nsec;
+		}
+		if (stat->st_result_mask & P9_STATS_MTIME) {
+			inode->i_mtime.tv_sec = stat->st_mtime_sec;
+			inode->i_mtime.tv_nsec = stat->st_mtime_nsec;
+		}
+		if (stat->st_result_mask & P9_STATS_CTIME) {
+			inode->i_ctime.tv_sec = stat->st_ctime_sec;
+			inode->i_ctime.tv_nsec = stat->st_ctime_nsec;
+		}
+		if (stat->st_result_mask & P9_STATS_UID)
+			inode->i_uid = stat->st_uid;
+		if (stat->st_result_mask & P9_STATS_GID)
+			inode->i_gid = stat->st_gid;
+		if (stat->st_result_mask & P9_STATS_NLINK)
+			inode->i_nlink = stat->st_nlink;
+		if (stat->st_result_mask & P9_STATS_MODE) {
+			inode->i_mode = stat->st_mode;
+			if ((S_ISBLK(inode->i_mode)) ||
+						(S_ISCHR(inode->i_mode)))
+				init_special_inode(inode, inode->i_mode,
+								inode->i_rdev);
+		}
+		if (stat->st_result_mask & P9_STATS_RDEV)
+			inode->i_rdev = new_decode_dev(stat->st_rdev);
+		if (stat->st_result_mask & P9_STATS_SIZE)
+			i_size_write(inode, stat->st_size);
+		if (stat->st_result_mask & P9_STATS_BLOCKS)
+			inode->i_blocks = stat->st_blocks;
+	}
+	if (stat->st_result_mask & P9_STATS_GEN)
+			inode->i_generation = stat->st_gen;
+
+	/* Currently we don't support P9_STATS_BTIME and P9_STATS_DATA_VERSION
+	 * because the inode structure does not have fields for them.
+	 */
+}
+
 /**
  * v9fs_qid2ino - convert qid into inode number
  * @qid: qid to hash
@@ -1254,7 +1399,7 @@ static const struct inode_operations v9fs_dir_inode_operations_dotl = {
 	.rmdir = v9fs_vfs_rmdir,
 	.mknod = v9fs_vfs_mknod,
 	.rename = v9fs_vfs_rename,
-	.getattr = v9fs_vfs_getattr,
+	.getattr = v9fs_vfs_getattr_dotl,
 	.setattr = v9fs_vfs_setattr,
 };
 
@@ -1276,7 +1421,7 @@ static const struct inode_operations v9fs_file_inode_operations = {
 };
 
 static const struct inode_operations v9fs_file_inode_operations_dotl = {
-	.getattr = v9fs_vfs_getattr,
+	.getattr = v9fs_vfs_getattr_dotl,
 	.setattr = v9fs_vfs_setattr,
 };
 
@@ -1292,6 +1437,6 @@ static const struct inode_operations v9fs_symlink_inode_operations_dotl = {
 	.readlink = generic_readlink,
 	.follow_link = v9fs_vfs_follow_link,
 	.put_link = v9fs_vfs_put_link,
-	.getattr = v9fs_vfs_getattr,
+	.getattr = v9fs_vfs_getattr_dotl,
 	.setattr = v9fs_vfs_setattr,
 };
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index be74d020436..3623f692b44 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -107,7 +107,6 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
 	struct inode *inode = NULL;
 	struct dentry *root = NULL;
 	struct v9fs_session_info *v9ses = NULL;
-	struct p9_wstat *st = NULL;
 	int mode = S_IRWXUGO | S_ISVTX;
 	struct p9_fid *fid;
 	int retval = 0;
@@ -124,16 +123,10 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
 		goto close_session;
 	}
 
-	st = p9_client_stat(fid);
-	if (IS_ERR(st)) {
-		retval = PTR_ERR(st);
-		goto clunk_fid;
-	}
-
 	sb = sget(fs_type, NULL, v9fs_set_super, v9ses);
 	if (IS_ERR(sb)) {
 		retval = PTR_ERR(sb);
-		goto free_stat;
+		goto clunk_fid;
 	}
 	v9fs_fill_super(sb, v9ses, flags, data);
 
@@ -151,22 +144,38 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
 	}
 
 	sb->s_root = root;
-	root->d_inode->i_ino = v9fs_qid2ino(&st->qid);
 
-	v9fs_stat2inode(st, root->d_inode, sb);
+	if (v9fs_proto_dotl(v9ses)) {
+		struct p9_stat_dotl *st = NULL;
+		st = p9_client_getattr_dotl(fid, P9_STATS_BASIC);
+		if (IS_ERR(st)) {
+			retval = PTR_ERR(st);
+			goto clunk_fid;
+		}
+
+		v9fs_stat2inode_dotl(st, root->d_inode);
+		kfree(st);
+	} else {
+		struct p9_wstat *st = NULL;
+		st = p9_client_stat(fid);
+		if (IS_ERR(st)) {
+			retval = PTR_ERR(st);
+			goto clunk_fid;
+		}
+
+		root->d_inode->i_ino = v9fs_qid2ino(&st->qid);
+		v9fs_stat2inode(st, root->d_inode, sb);
+
+		p9stat_free(st);
+		kfree(st);
+	}
 
 	v9fs_fid_add(root, fid);
-	p9stat_free(st);
-	kfree(st);
 
 P9_DPRINTK(P9_DEBUG_VFS, " simple set mount, return 0\n");
 	simple_set_mnt(mnt, sb);
 	return 0;
 
-free_stat:
-	p9stat_free(st);
-	kfree(st);
-
 clunk_fid:
 	p9_client_clunk(fid);
 
@@ -176,8 +185,6 @@ close_session:
 	return retval;
 
 release_sb:
-	p9stat_free(st);
-	kfree(st);
 	deactivate_locked_super(sb);
 	return retval;
 }
diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h
index f1b0b310265..ab12e1c9cc7 100644
--- a/include/net/9p/9p.h
+++ b/include/net/9p/9p.h
@@ -133,6 +133,8 @@ enum p9_msg_t {
 	P9_RSTATFS,
 	P9_TRENAME = 20,
 	P9_RRENAME,
+	P9_TGETATTR = 24,
+	P9_RGETATTR,
 	P9_TREADDIR = 40,
 	P9_RREADDIR,
 	P9_TVERSION = 100,
@@ -362,6 +364,48 @@ struct p9_wstat {
 	u32 n_muid;		/* 9p2000.u extensions */
 };
 
+struct p9_stat_dotl {
+	u64 st_result_mask;
+	struct p9_qid qid;
+	u32 st_mode;
+	u32 st_uid;
+	u32 st_gid;
+	u64 st_nlink;
+	u64 st_rdev;
+	u64 st_size;
+	u64 st_blksize;
+	u64 st_blocks;
+	u64 st_atime_sec;
+	u64 st_atime_nsec;
+	u64 st_mtime_sec;
+	u64 st_mtime_nsec;
+	u64 st_ctime_sec;
+	u64 st_ctime_nsec;
+	u64 st_btime_sec;
+	u64 st_btime_nsec;
+	u64 st_gen;
+	u64 st_data_version;
+};
+
+#define P9_STATS_MODE		0x00000001ULL
+#define P9_STATS_NLINK		0x00000002ULL
+#define P9_STATS_UID		0x00000004ULL
+#define P9_STATS_GID		0x00000008ULL
+#define P9_STATS_RDEV		0x00000010ULL
+#define P9_STATS_ATIME		0x00000020ULL
+#define P9_STATS_MTIME		0x00000040ULL
+#define P9_STATS_CTIME		0x00000080ULL
+#define P9_STATS_INO		0x00000100ULL
+#define P9_STATS_SIZE		0x00000200ULL
+#define P9_STATS_BLOCKS		0x00000400ULL
+
+#define P9_STATS_BTIME		0x00000800ULL
+#define P9_STATS_GEN		0x00001000ULL
+#define P9_STATS_DATA_VERSION	0x00002000ULL
+
+#define P9_STATS_BASIC		0x000007ffULL /* Mask for fields up to BLOCKS */
+#define P9_STATS_ALL		0x00003fffULL /* Mask for All fields above */
+
 /* Structures for Protocol Operations */
 struct p9_tstatfs {
 	u32 fid;
diff --git a/include/net/9p/client.h b/include/net/9p/client.h
index 2ec93685e6d..6462eec435b 100644
--- a/include/net/9p/client.h
+++ b/include/net/9p/client.h
@@ -238,6 +238,9 @@ int p9dirent_read(char *buf, int len, struct p9_dirent *dirent,
 struct p9_wstat *p9_client_stat(struct p9_fid *fid);
 int p9_client_wstat(struct p9_fid *fid, struct p9_wstat *wst);
 
+struct p9_stat_dotl *p9_client_getattr_dotl(struct p9_fid *fid,
+							u64 request_mask);
+
 struct p9_req_t *p9_tag_lookup(struct p9_client *, u16);
 void p9_client_cb(struct p9_client *c, struct p9_req_t *req);
 
diff --git a/net/9p/client.c b/net/9p/client.c
index 4ff068e98f7..5e97118da3b 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -1303,6 +1303,65 @@ error:
 }
 EXPORT_SYMBOL(p9_client_stat);
 
+struct p9_stat_dotl *p9_client_getattr_dotl(struct p9_fid *fid,
+							u64 request_mask)
+{
+	int err;
+	struct p9_client *clnt;
+	struct p9_stat_dotl *ret = kmalloc(sizeof(struct p9_stat_dotl),
+								GFP_KERNEL);
+	struct p9_req_t *req;
+
+	P9_DPRINTK(P9_DEBUG_9P, ">>> TGETATTR fid %d, request_mask %lld\n",
+							fid->fid, request_mask);
+
+	if (!ret)
+		return ERR_PTR(-ENOMEM);
+
+	err = 0;
+	clnt = fid->clnt;
+
+	req = p9_client_rpc(clnt, P9_TGETATTR, "dq", fid->fid, request_mask);
+	if (IS_ERR(req)) {
+		err = PTR_ERR(req);
+		goto error;
+	}
+
+	err = p9pdu_readf(req->rc, clnt->proto_version, "A", ret);
+	if (err) {
+		p9pdu_dump(1, req->rc);
+		p9_free_req(clnt, req);
+		goto error;
+	}
+
+	P9_DPRINTK(P9_DEBUG_9P,
+		"<<< RGETATTR st_result_mask=%lld\n"
+		"<<< qid=%x.%llx.%x\n"
+		"<<< st_mode=%8.8x st_nlink=%llu\n"
+		"<<< st_uid=%d st_gid=%d\n"
+		"<<< st_rdev=%llx st_size=%llx st_blksize=%llu st_blocks=%llu\n"
+		"<<< st_atime_sec=%lld st_atime_nsec=%lld\n"
+		"<<< st_mtime_sec=%lld st_mtime_nsec=%lld\n"
+		"<<< st_ctime_sec=%lld st_ctime_nsec=%lld\n"
+		"<<< st_btime_sec=%lld st_btime_nsec=%lld\n"
+		"<<< st_gen=%lld st_data_version=%lld",
+		ret->st_result_mask, ret->qid.type, ret->qid.path,
+		ret->qid.version, ret->st_mode, ret->st_nlink, ret->st_uid,
+		ret->st_gid, ret->st_rdev, ret->st_size, ret->st_blksize,
+		ret->st_blocks, ret->st_atime_sec, ret->st_atime_nsec,
+		ret->st_mtime_sec, ret->st_mtime_nsec, ret->st_ctime_sec,
+		ret->st_ctime_nsec, ret->st_btime_sec, ret->st_btime_nsec,
+		ret->st_gen, ret->st_data_version);
+
+	p9_free_req(clnt, req);
+	return ret;
+
+error:
+	kfree(ret);
+	return ERR_PTR(err);
+}
+EXPORT_SYMBOL(p9_client_getattr_dotl);
+
 static int p9_client_statsize(struct p9_wstat *wst, int proto_version)
 {
 	int ret;
diff --git a/net/9p/protocol.c b/net/9p/protocol.c
index b645c826353..3e4f7769589 100644
--- a/net/9p/protocol.c
+++ b/net/9p/protocol.c
@@ -141,6 +141,7 @@ pdu_write_u(struct p9_fcall *pdu, const char __user *udata, size_t size)
 	D - data blob (int32_t size followed by void *, results are not freed)
 	T - array of strings (int16_t count, followed by strings)
 	R - array of qids (int16_t count, followed by qids)
+	A - stat for 9p2000.L (p9_stat_dotl)
 	? - if optional = 1, continue parsing
 */
 
@@ -340,6 +341,33 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt,
 				}
 			}
 			break;
+		case 'A': {
+				struct p9_stat_dotl *stbuf =
+				    va_arg(ap, struct p9_stat_dotl *);
+
+				memset(stbuf, 0, sizeof(struct p9_stat_dotl));
+				errcode =
+				    p9pdu_readf(pdu, proto_version,
+					"qQdddqqqqqqqqqqqqqqq",
+					&stbuf->st_result_mask,
+					&stbuf->qid,
+					&stbuf->st_mode,
+					&stbuf->st_uid, &stbuf->st_gid,
+					&stbuf->st_nlink,
+					&stbuf->st_rdev, &stbuf->st_size,
+					&stbuf->st_blksize, &stbuf->st_blocks,
+					&stbuf->st_atime_sec,
+					&stbuf->st_atime_nsec,
+					&stbuf->st_mtime_sec,
+					&stbuf->st_mtime_nsec,
+					&stbuf->st_ctime_sec,
+					&stbuf->st_ctime_nsec,
+					&stbuf->st_btime_sec,
+					&stbuf->st_btime_nsec,
+					&stbuf->st_gen,
+					&stbuf->st_data_version);
+			}
+			break;
 		case '?':
 			if ((proto_version != p9_proto_2000u) &&
 				(proto_version != p9_proto_2000L))
-- 
cgit v1.2.3-70-g09d2


From 87d7845aa0b157a62448dd3e339856f28befe1f4 Mon Sep 17 00:00:00 2001
From: Sripathi Kodi <sripathik@in.ibm.com>
Date: Fri, 18 Jun 2010 11:50:10 +0530
Subject: 9p: Implement client side of setattr for 9P2000.L protocol.

    SYNOPSIS

      size[4] Tsetattr tag[2] attr[n]

      size[4] Rsetattr tag[2]

    DESCRIPTION

      The setattr command changes some of the file status information.
      attr resembles the iattr structure used in Linux kernel. It
      specifies which status parameter is to be changed and to what
      value. It is laid out as follows:

         valid[4]
            specifies which status information is to be changed. Possible
            values are:
            ATTR_MODE       (1 << 0)
            ATTR_UID        (1 << 1)
            ATTR_GID        (1 << 2)
            ATTR_SIZE       (1 << 3)
            ATTR_ATIME      (1 << 4)
            ATTR_MTIME      (1 << 5)
            ATTR_ATIME_SET  (1 << 7)
            ATTR_MTIME_SET  (1 << 8)

            The last two bits represent whether the time information
            is being sent by the client's user space. In the absense
            of these bits the server always uses server's time.

         mode[4]
            File permission bits

         uid[4]
            Owner id of file

         gid[4]
            Group id of the file

         size[8]
            File size

         atime_sec[8]
            Time of last file access, seconds

         atime_nsec[8]
            Time of last file access, nanoseconds

         mtime_sec[8]
            Time of last file modification, seconds

         mtime_nsec[8]
            Time of last file modification, nanoseconds

Explanation of the patches:
--------------------------

*) The kernel just copies relevent contents of iattr structure to
   p9_iattr_dotl structure and passes it down to the client. The
   only check it has is calling inode_change_ok()
*) The p9_iattr_dotl structure does not have ctime and ia_file
   parameters because I don't think these are needed in our case.
   The client user space can request updating just ctime by calling
   chown(fd, -1, -1). This is handled on server side without a need
   for putting ctime on the wire.
*) The server currently supports changing mode, time, ownership and
   size of the file.
*) 9P RFC says "Either all the changes in wstat request happen, or
   none of them does: if the request succeeds, all changes were made;
   if it fails, none were."
   I have not done anything to implement this specifically because I
   don't see a reason.

Signed-off-by: Sripathi Kodi <sripathik@in.ibm.com>
Signed-off-by: Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 fs/9p/vfs_inode.c       | 49 ++++++++++++++++++++++++++++++++++++++++++++++---
 include/net/9p/9p.h     | 28 ++++++++++++++++++++++++++++
 include/net/9p/client.h |  1 +
 net/9p/client.c         | 30 ++++++++++++++++++++++++++++++
 net/9p/protocol.c       | 17 +++++++++++++++++
 5 files changed, 122 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index afcb8d88938..a90324f4546 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -976,6 +976,49 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
 	return retval;
 }
 
+/**
+ * v9fs_vfs_setattr_dotl - set file metadata
+ * @dentry: file whose metadata to set
+ * @iattr: metadata assignment structure
+ *
+ */
+
+static int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr)
+{
+	int retval;
+	struct v9fs_session_info *v9ses;
+	struct p9_fid *fid;
+	struct p9_iattr_dotl p9attr;
+
+	P9_DPRINTK(P9_DEBUG_VFS, "\n");
+
+	retval = inode_change_ok(dentry->d_inode, iattr);
+	if (retval)
+		return retval;
+
+	p9attr.valid = iattr->ia_valid;
+	p9attr.mode = iattr->ia_mode;
+	p9attr.uid = iattr->ia_uid;
+	p9attr.gid = iattr->ia_gid;
+	p9attr.size = iattr->ia_size;
+	p9attr.atime_sec = iattr->ia_atime.tv_sec;
+	p9attr.atime_nsec = iattr->ia_atime.tv_nsec;
+	p9attr.mtime_sec = iattr->ia_mtime.tv_sec;
+	p9attr.mtime_nsec = iattr->ia_mtime.tv_nsec;
+
+	retval = -EPERM;
+	v9ses = v9fs_inode2v9ses(dentry->d_inode);
+	fid = v9fs_fid_lookup(dentry);
+	if (IS_ERR(fid))
+		return PTR_ERR(fid);
+
+	retval = p9_client_setattr(fid, &p9attr);
+	if (retval >= 0)
+		retval = inode_setattr(dentry->d_inode, iattr);
+
+	return retval;
+}
+
 /**
  * v9fs_stat2inode - populate an inode structure with mistat info
  * @stat: Plan 9 metadata (mistat) structure
@@ -1400,7 +1443,7 @@ static const struct inode_operations v9fs_dir_inode_operations_dotl = {
 	.mknod = v9fs_vfs_mknod,
 	.rename = v9fs_vfs_rename,
 	.getattr = v9fs_vfs_getattr_dotl,
-	.setattr = v9fs_vfs_setattr,
+	.setattr = v9fs_vfs_setattr_dotl,
 };
 
 static const struct inode_operations v9fs_dir_inode_operations = {
@@ -1422,7 +1465,7 @@ static const struct inode_operations v9fs_file_inode_operations = {
 
 static const struct inode_operations v9fs_file_inode_operations_dotl = {
 	.getattr = v9fs_vfs_getattr_dotl,
-	.setattr = v9fs_vfs_setattr,
+	.setattr = v9fs_vfs_setattr_dotl,
 };
 
 static const struct inode_operations v9fs_symlink_inode_operations = {
@@ -1438,5 +1481,5 @@ static const struct inode_operations v9fs_symlink_inode_operations_dotl = {
 	.follow_link = v9fs_vfs_follow_link,
 	.put_link = v9fs_vfs_put_link,
 	.getattr = v9fs_vfs_getattr_dotl,
-	.setattr = v9fs_vfs_setattr,
+	.setattr = v9fs_vfs_setattr_dotl,
 };
diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h
index ab12e1c9cc7..7f64d72f6c6 100644
--- a/include/net/9p/9p.h
+++ b/include/net/9p/9p.h
@@ -135,6 +135,8 @@ enum p9_msg_t {
 	P9_RRENAME,
 	P9_TGETATTR = 24,
 	P9_RGETATTR,
+	P9_TSETATTR = 26,
+	P9_RSETATTR,
 	P9_TREADDIR = 40,
 	P9_RREADDIR,
 	P9_TVERSION = 100,
@@ -406,6 +408,32 @@ struct p9_stat_dotl {
 #define P9_STATS_BASIC		0x000007ffULL /* Mask for fields up to BLOCKS */
 #define P9_STATS_ALL		0x00003fffULL /* Mask for All fields above */
 
+/**
+ * struct p9_iattr_dotl - P9 inode attribute for setattr
+ * @valid: bitfield specifying which fields are valid
+ *         same as in struct iattr
+ * @mode: File permission bits
+ * @uid: user id of owner
+ * @gid: group id
+ * @size: File size
+ * @atime_sec: Last access time, seconds
+ * @atime_nsec: Last access time, nanoseconds
+ * @mtime_sec: Last modification time, seconds
+ * @mtime_nsec: Last modification time, nanoseconds
+ */
+
+struct p9_iattr_dotl {
+	u32 valid;
+	u32 mode;
+	u32 uid;
+	u32 gid;
+	u64 size;
+	u64 atime_sec;
+	u64 atime_nsec;
+	u64 mtime_sec;
+	u64 mtime_nsec;
+};
+
 /* Structures for Protocol Operations */
 struct p9_tstatfs {
 	u32 fid;
diff --git a/include/net/9p/client.h b/include/net/9p/client.h
index 6462eec435b..afdc385152f 100644
--- a/include/net/9p/client.h
+++ b/include/net/9p/client.h
@@ -237,6 +237,7 @@ int p9dirent_read(char *buf, int len, struct p9_dirent *dirent,
 							int proto_version);
 struct p9_wstat *p9_client_stat(struct p9_fid *fid);
 int p9_client_wstat(struct p9_fid *fid, struct p9_wstat *wst);
+int p9_client_setattr(struct p9_fid *fid, struct p9_iattr_dotl *attr);
 
 struct p9_stat_dotl *p9_client_getattr_dotl(struct p9_fid *fid,
 							u64 request_mask);
diff --git a/net/9p/client.c b/net/9p/client.c
index 5e97118da3b..b2f70ec889c 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -1426,6 +1426,36 @@ error:
 }
 EXPORT_SYMBOL(p9_client_wstat);
 
+int p9_client_setattr(struct p9_fid *fid, struct p9_iattr_dotl *p9attr)
+{
+	int err;
+	struct p9_req_t *req;
+	struct p9_client *clnt;
+
+	err = 0;
+	clnt = fid->clnt;
+	P9_DPRINTK(P9_DEBUG_9P, ">>> TSETATTR fid %d\n", fid->fid);
+	P9_DPRINTK(P9_DEBUG_9P,
+		"    valid=%x mode=%x uid=%d gid=%d size=%lld\n"
+		"    atime_sec=%lld atime_nsec=%lld\n"
+		"    mtime_sec=%lld mtime_nsec=%lld\n",
+		p9attr->valid, p9attr->mode, p9attr->uid, p9attr->gid,
+		p9attr->size, p9attr->atime_sec, p9attr->atime_nsec,
+		p9attr->mtime_sec, p9attr->mtime_nsec);
+
+	req = p9_client_rpc(clnt, P9_TSETATTR, "dI", fid->fid, p9attr);
+
+	if (IS_ERR(req)) {
+		err = PTR_ERR(req);
+		goto error;
+	}
+	P9_DPRINTK(P9_DEBUG_9P, "<<< RSETATTR fid %d\n", fid->fid);
+	p9_free_req(clnt, req);
+error:
+	return err;
+}
+EXPORT_SYMBOL(p9_client_setattr);
+
 int p9_client_statfs(struct p9_fid *fid, struct p9_rstatfs *sb)
 {
 	int err;
diff --git a/net/9p/protocol.c b/net/9p/protocol.c
index 3e4f7769589..3acd3afb20c 100644
--- a/net/9p/protocol.c
+++ b/net/9p/protocol.c
@@ -516,6 +516,23 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt,
 				}
 			}
 			break;
+		case 'I':{
+				struct p9_iattr_dotl *p9attr = va_arg(ap,
+							struct p9_iattr_dotl *);
+
+				errcode = p9pdu_writef(pdu, proto_version,
+							"ddddqqqqq",
+							p9attr->valid,
+							p9attr->mode,
+							p9attr->uid,
+							p9attr->gid,
+							p9attr->size,
+							p9attr->atime_sec,
+							p9attr->atime_nsec,
+							p9attr->mtime_sec,
+							p9attr->mtime_nsec);
+			}
+			break;
 		case '?':
 			if ((proto_version != p9_proto_2000u) &&
 				(proto_version != p9_proto_2000L))
-- 
cgit v1.2.3-70-g09d2


From 652df9a7fd03cb47a3f663f0c08a2bd086505e9b Mon Sep 17 00:00:00 2001
From: "Venkateswararao Jujjuri (JV)" <jvrao@linux.vnet.ibm.com>
Date: Thu, 3 Jun 2010 15:16:59 -0700
Subject: 9p: Define and implement TLINK for 9P2000.L

This patch adds a helper function to get the dentry from inode and
uses it in creating a Hardlink

SYNOPSIS

size[4] Tlink tag[2] dfid[4] oldfid[4] newpath[s]

size[4] Rlink tag[2]

DESCRIPTION

Create a link 'newpath' in directory pointed by dfid linking to oldfid path.

[sripathik@in.ibm.com : p9_client_link should not free req structure
if p9_client_rpc has returned an error.]

Signed-off-by: Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 include/net/9p/9p.h     |  2 ++
 include/net/9p/client.h |  1 +
 net/9p/client.c         | 19 +++++++++++++++++++
 3 files changed, 22 insertions(+)

(limited to 'include')

diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h
index 7f64d72f6c6..5985c0f83db 100644
--- a/include/net/9p/9p.h
+++ b/include/net/9p/9p.h
@@ -139,6 +139,8 @@ enum p9_msg_t {
 	P9_RSETATTR,
 	P9_TREADDIR = 40,
 	P9_RREADDIR,
+	P9_TLINK = 70,
+	P9_RLINK,
 	P9_TVERSION = 100,
 	P9_RVERSION,
 	P9_TAUTH = 102,
diff --git a/include/net/9p/client.h b/include/net/9p/client.h
index afdc385152f..e36f11650e9 100644
--- a/include/net/9p/client.h
+++ b/include/net/9p/client.h
@@ -226,6 +226,7 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, int nwname, char **wnames,
 int p9_client_open(struct p9_fid *fid, int mode);
 int p9_client_fcreate(struct p9_fid *fid, char *name, u32 perm, int mode,
 							char *extension);
+int p9_client_link(struct p9_fid *fid, struct p9_fid *oldfid, char *newname);
 int p9_client_clunk(struct p9_fid *fid);
 int p9_client_remove(struct p9_fid *fid);
 int p9_client_read(struct p9_fid *fid, char *data, char __user *udata,
diff --git a/net/9p/client.c b/net/9p/client.c
index b2f70ec889c..ad1c4489ab4 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -1095,6 +1095,25 @@ error:
 }
 EXPORT_SYMBOL(p9_client_fcreate);
 
+int p9_client_link(struct p9_fid *dfid, struct p9_fid *oldfid, char *newname)
+{
+	struct p9_client *clnt;
+	struct p9_req_t *req;
+
+	P9_DPRINTK(P9_DEBUG_9P, ">>> TLINK dfid %d oldfid %d newname %s\n",
+			dfid->fid, oldfid->fid, newname);
+	clnt = dfid->clnt;
+	req = p9_client_rpc(clnt, P9_TLINK, "dds", dfid->fid, oldfid->fid,
+			newname);
+	if (IS_ERR(req))
+		return PTR_ERR(req);
+
+	P9_DPRINTK(P9_DEBUG_9P, "<<< RLINK\n");
+	p9_free_req(clnt, req);
+	return 0;
+}
+EXPORT_SYMBOL(p9_client_link);
+
 int p9_client_clunk(struct p9_fid *fid)
 {
 	int err;
-- 
cgit v1.2.3-70-g09d2


From 50cc42ff3d7bc48a436c5a0413459ca7841b505f Mon Sep 17 00:00:00 2001
From: "Venkateswararao Jujjuri (JV)" <jvrao@linux.vnet.ibm.com>
Date: Wed, 9 Jun 2010 15:59:31 -0700
Subject: 9p: Define and implement TSYMLINK for 9P2000.L

Create a symbolic link

SYNOPSIS

size[4] Tsymlink tag[2] fid[4] name[s] symtgt[s] gid[4]

size[4] Rsymlink tag[2] qid[13]

DESCRIPTION

Create a symbolic link named 'name' pointing to 'symtgt'.
gid represents the effective group id of the caller.
The  permissions of a symbolic link are irrelevant hence it is omitted
from the protocol.

Signed-off-by: Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com>
Reviewed-by: Sripathi Kodi <sripathik@in.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 fs/9p/vfs_inode.c       | 101 ++++++++++++++++++++++++++++++++++++++++++++++--
 include/net/9p/9p.h     |   4 ++
 include/net/9p/client.h |   2 +
 net/9p/client.c         |  34 ++++++++++++++++
 4 files changed, 137 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index e6ece237241..a7319364544 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -1245,7 +1245,7 @@ static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen)
 	if (IS_ERR(fid))
 		return PTR_ERR(fid);
 
-	if (!v9fs_proto_dotu(v9ses))
+	if (!v9fs_proto_dotu(v9ses) && !v9fs_proto_dotl(v9ses))
 		return -EBADF;
 
 	st = p9_client_stat(fid);
@@ -1350,6 +1350,99 @@ static int v9fs_vfs_mkspecial(struct inode *dir, struct dentry *dentry,
 	return 0;
 }
 
+/**
+ * v9fs_vfs_symlink_dotl - helper function to create symlinks
+ * @dir: directory inode containing symlink
+ * @dentry: dentry for symlink
+ * @symname: symlink data
+ *
+ * See Also: 9P2000.L RFC for more information
+ *
+ */
+
+static int
+v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry,
+		const char *symname)
+{
+	struct v9fs_session_info *v9ses;
+	struct p9_fid *dfid;
+	struct p9_fid *fid = NULL;
+	struct inode *inode;
+	struct p9_qid qid;
+	char *name;
+	int err;
+	gid_t gid;
+
+	name = (char *) dentry->d_name.name;
+	P9_DPRINTK(P9_DEBUG_VFS, "v9fs_vfs_symlink_dotl : %lu,%s,%s\n",
+			dir->i_ino, name, symname);
+	v9ses = v9fs_inode2v9ses(dir);
+
+	dfid = v9fs_fid_lookup(dentry->d_parent);
+	if (IS_ERR(dfid)) {
+		err = PTR_ERR(dfid);
+		P9_DPRINTK(P9_DEBUG_VFS, "fid lookup failed %d\n", err);
+		return err;
+	}
+
+	gid = v9fs_get_fsgid_for_create(dir);
+
+	if (gid < 0) {
+		P9_DPRINTK(P9_DEBUG_VFS, "v9fs_get_egid failed %d\n", gid);
+		goto error;
+	}
+
+	/* Server doesn't alter fid on TSYMLINK. Hence no need to clone it. */
+	err = p9_client_symlink(dfid, name, (char *)symname, gid, &qid);
+
+	if (err < 0) {
+		P9_DPRINTK(P9_DEBUG_VFS, "p9_client_symlink failed %d\n", err);
+		goto error;
+	}
+
+	if (v9ses->cache) {
+		/* Now walk from the parent so we can get an unopened fid. */
+		fid = p9_client_walk(dfid, 1, &name, 1);
+		if (IS_ERR(fid)) {
+			err = PTR_ERR(fid);
+			P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n",
+					err);
+			fid = NULL;
+			goto error;
+		}
+
+		/* instantiate inode and assign the unopened fid to dentry */
+		inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb);
+		if (IS_ERR(inode)) {
+			err = PTR_ERR(inode);
+			P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n",
+					err);
+			goto error;
+		}
+		dentry->d_op = &v9fs_cached_dentry_operations;
+		d_instantiate(dentry, inode);
+		err = v9fs_fid_add(dentry, fid);
+		if (err < 0)
+			goto error;
+		fid = NULL;
+	} else {
+		/* Not in cached mode. No need to populate inode with stat */
+		inode = v9fs_get_inode(dir->i_sb, S_IFLNK);
+		if (IS_ERR(inode)) {
+			err = PTR_ERR(inode);
+			goto error;
+		}
+		dentry->d_op = &v9fs_dentry_operations;
+		d_instantiate(dentry, inode);
+	}
+
+error:
+	if (fid)
+		p9_client_clunk(fid);
+
+	return err;
+}
+
 /**
  * v9fs_vfs_symlink - helper function to create symlinks
  * @dir: directory inode containing symlink
@@ -1527,7 +1620,7 @@ static const struct inode_operations v9fs_dir_inode_operations_dotu = {
 	.create = v9fs_vfs_create,
 	.lookup = v9fs_vfs_lookup,
 	.symlink = v9fs_vfs_symlink,
-	.link = v9fs_vfs_link_dotl,
+	.link = v9fs_vfs_link,
 	.unlink = v9fs_vfs_unlink,
 	.mkdir = v9fs_vfs_mkdir,
 	.rmdir = v9fs_vfs_rmdir,
@@ -1540,8 +1633,8 @@ static const struct inode_operations v9fs_dir_inode_operations_dotu = {
 static const struct inode_operations v9fs_dir_inode_operations_dotl = {
 	.create = v9fs_vfs_create,
 	.lookup = v9fs_vfs_lookup,
-	.symlink = v9fs_vfs_symlink,
-	.link = v9fs_vfs_link,
+	.link = v9fs_vfs_link_dotl,
+	.symlink = v9fs_vfs_symlink_dotl,
 	.unlink = v9fs_vfs_unlink,
 	.mkdir = v9fs_vfs_mkdir,
 	.rmdir = v9fs_vfs_rmdir,
diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h
index 5985c0f83db..44a6883d714 100644
--- a/include/net/9p/9p.h
+++ b/include/net/9p/9p.h
@@ -88,6 +88,8 @@ do { \
  * enum p9_msg_t - 9P message types
  * @P9_TSTATFS: file system status request
  * @P9_RSTATFS: file system status response
+ * @P9_TSYMLINK: make symlink request
+ * @P9_RSYMLINK: make symlink response
  * @P9_TRENAME: rename request
  * @P9_RRENAME: rename response
  * @P9_TVERSION: version handshake request
@@ -131,6 +133,8 @@ do { \
 enum p9_msg_t {
 	P9_TSTATFS = 8,
 	P9_RSTATFS,
+	P9_TSYMLINK = 16,
+	P9_RSYMLINK,
 	P9_TRENAME = 20,
 	P9_RRENAME,
 	P9_TGETATTR = 24,
diff --git a/include/net/9p/client.h b/include/net/9p/client.h
index e36f11650e9..2e039730920 100644
--- a/include/net/9p/client.h
+++ b/include/net/9p/client.h
@@ -227,6 +227,8 @@ int p9_client_open(struct p9_fid *fid, int mode);
 int p9_client_fcreate(struct p9_fid *fid, char *name, u32 perm, int mode,
 							char *extension);
 int p9_client_link(struct p9_fid *fid, struct p9_fid *oldfid, char *newname);
+int p9_client_symlink(struct p9_fid *fid, char *name, char *symname, gid_t gid,
+							struct p9_qid *qid);
 int p9_client_clunk(struct p9_fid *fid);
 int p9_client_remove(struct p9_fid *fid);
 int p9_client_read(struct p9_fid *fid, char *data, char __user *udata,
diff --git a/net/9p/client.c b/net/9p/client.c
index ad1c4489ab4..e37e64cb939 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -1095,6 +1095,40 @@ error:
 }
 EXPORT_SYMBOL(p9_client_fcreate);
 
+int p9_client_symlink(struct p9_fid *dfid, char *name, char *symtgt, gid_t gid,
+		struct p9_qid *qid)
+{
+	int err = 0;
+	struct p9_client *clnt;
+	struct p9_req_t *req;
+
+	P9_DPRINTK(P9_DEBUG_9P, ">>> TSYMLINK dfid %d name %s  symtgt %s\n",
+			dfid->fid, name, symtgt);
+	clnt = dfid->clnt;
+
+	req = p9_client_rpc(clnt, P9_TSYMLINK, "dssd", dfid->fid, name, symtgt,
+			gid);
+	if (IS_ERR(req)) {
+		err = PTR_ERR(req);
+		goto error;
+	}
+
+	err = p9pdu_readf(req->rc, clnt->proto_version, "Q", qid);
+	if (err) {
+		p9pdu_dump(1, req->rc);
+		goto free_and_error;
+	}
+
+	P9_DPRINTK(P9_DEBUG_9P, "<<< RSYMLINK qid %x.%llx.%x\n",
+			qid->type, (unsigned long long)qid->path, qid->version);
+
+free_and_error:
+	p9_free_req(clnt, req);
+error:
+	return err;
+}
+EXPORT_SYMBOL(p9_client_symlink);
+
 int p9_client_link(struct p9_fid *dfid, struct p9_fid *oldfid, char *newname)
 {
 	struct p9_client *clnt;
-- 
cgit v1.2.3-70-g09d2


From 4b43516ab19b748b48322937fd9307af17541c4d Mon Sep 17 00:00:00 2001
From: "M. Mohan Kumar" <mohan@in.ibm.com>
Date: Wed, 16 Jun 2010 14:27:01 +0530
Subject: 9p: Implement TMKNOD

Synopsis

    size[4] Tmknod tag[2] fid[4] name[s] mode[4] major[4] minor[4] gid[4]

    size[4] Rmknod tag[2] qid[13]

Description

    mknod asks the file server to create a device node with given major and
    minor number, mode and gid. The qid for the new device node is returned
    with the mknod reply message.

[sripathik@in.ibm.com: Fix error handling code]

Signed-off-by: M. Mohan Kumar <mohan@in.ibm.com>
Signed-off-by: Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 fs/9p/vfs_inode.c       | 106 ++++++++++++++++++++++++++++++++++++++++++++++--
 include/net/9p/9p.h     |   4 ++
 include/net/9p/client.h |   2 +
 net/9p/client.c         |  31 ++++++++++++++
 4 files changed, 140 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index a7319364544..4d9f45ec612 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -302,7 +302,13 @@ struct inode *v9fs_get_inode(struct super_block *sb, int mode)
 	case S_IFBLK:
 	case S_IFCHR:
 	case S_IFSOCK:
-		if (!v9fs_proto_dotu(v9ses)) {
+		if (v9fs_proto_dotl(v9ses)) {
+			inode->i_op = &v9fs_file_inode_operations_dotl;
+			inode->i_fop = &v9fs_file_operations_dotl;
+		} else if (v9fs_proto_dotu(v9ses)) {
+			inode->i_op = &v9fs_file_inode_operations;
+			inode->i_fop = &v9fs_file_operations;
+		} else {
 			P9_DPRINTK(P9_DEBUG_ERROR,
 				   "special files without extended mode\n");
 			err = -EINVAL;
@@ -1616,6 +1622,100 @@ v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
 	return retval;
 }
 
+/**
+ * v9fs_vfs_mknod_dotl - create a special file
+ * @dir: inode destination for new link
+ * @dentry: dentry for file
+ * @mode: mode for creation
+ * @rdev: device associated with special file
+ *
+ */
+static int
+v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int mode,
+		dev_t rdev)
+{
+	int err;
+	char *name;
+	struct v9fs_session_info *v9ses;
+	struct p9_fid *fid = NULL, *dfid = NULL;
+	struct inode *inode;
+	gid_t gid;
+	struct p9_qid qid;
+	struct dentry *dir_entry;
+
+	P9_DPRINTK(P9_DEBUG_VFS,
+		" %lu,%s mode: %x MAJOR: %u MINOR: %u\n", dir->i_ino,
+		dentry->d_name.name, mode, MAJOR(rdev), MINOR(rdev));
+
+	if (!new_valid_dev(rdev))
+		return -EINVAL;
+
+	v9ses = v9fs_inode2v9ses(dir);
+	dir_dentry = v9fs_dentry_from_dir_inode(dir);
+	dfid = v9fs_fid_lookup(dir_entry);
+	if (IS_ERR(dfid)) {
+		err = PTR_ERR(dfid);
+		P9_DPRINTK(P9_DEBUG_VFS, "fid lookup failed %d\n", err);
+		dfid = NULL;
+		goto error;
+	}
+
+	gid = v9fs_get_fsgid_for_create(dir);
+	if (gid < 0) {
+		P9_DPRINTK(P9_DEBUG_VFS, "v9fs_get_fsgid_for_create failed\n");
+		goto error;
+	}
+
+	name = (char *) dentry->d_name.name;
+
+	err = p9_client_mknod_dotl(dfid, name, mode, rdev, gid, &qid);
+	if (err < 0)
+		goto error;
+
+	/* instantiate inode and assign the unopened fid to the dentry */
+	if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
+		fid = p9_client_walk(dfid, 1, &name, 1);
+		if (IS_ERR(fid)) {
+			err = PTR_ERR(fid);
+			P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n",
+				err);
+			fid = NULL;
+			goto error;
+		}
+
+		inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb);
+		if (IS_ERR(inode)) {
+			err = PTR_ERR(inode);
+			P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n",
+				err);
+			goto error;
+		}
+		dentry->d_op = &v9fs_cached_dentry_operations;
+		d_instantiate(dentry, inode);
+		err = v9fs_fid_add(dentry, fid);
+		if (err < 0)
+			goto error;
+		fid = NULL;
+	} else {
+		/*
+		 * Not in cached mode. No need to populate inode with stat.
+		 * socket syscall returns a fd, so we need instantiate
+		 */
+		inode = v9fs_get_inode(dir->i_sb, mode);
+		if (IS_ERR(inode)) {
+			err = PTR_ERR(inode);
+			goto error;
+		}
+		dentry->d_op = &v9fs_dentry_operations;
+		d_instantiate(dentry, inode);
+	}
+
+error:
+	if (fid)
+		p9_client_clunk(fid);
+	return err;
+}
+
 static const struct inode_operations v9fs_dir_inode_operations_dotu = {
 	.create = v9fs_vfs_create,
 	.lookup = v9fs_vfs_lookup,
@@ -1624,7 +1724,7 @@ static const struct inode_operations v9fs_dir_inode_operations_dotu = {
 	.unlink = v9fs_vfs_unlink,
 	.mkdir = v9fs_vfs_mkdir,
 	.rmdir = v9fs_vfs_rmdir,
-	.mknod = v9fs_vfs_mknod,
+	.mknod = v9fs_vfs_mknod_dotl,
 	.rename = v9fs_vfs_rename,
 	.getattr = v9fs_vfs_getattr,
 	.setattr = v9fs_vfs_setattr,
@@ -1638,7 +1738,7 @@ static const struct inode_operations v9fs_dir_inode_operations_dotl = {
 	.unlink = v9fs_vfs_unlink,
 	.mkdir = v9fs_vfs_mkdir,
 	.rmdir = v9fs_vfs_rmdir,
-	.mknod = v9fs_vfs_mknod,
+	.mknod = v9fs_vfs_mknod_dotl,
 	.rename = v9fs_vfs_rename,
 	.getattr = v9fs_vfs_getattr_dotl,
 	.setattr = v9fs_vfs_setattr_dotl,
diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h
index 44a6883d714..ff32091d806 100644
--- a/include/net/9p/9p.h
+++ b/include/net/9p/9p.h
@@ -90,6 +90,8 @@ do { \
  * @P9_RSTATFS: file system status response
  * @P9_TSYMLINK: make symlink request
  * @P9_RSYMLINK: make symlink response
+ * @P9_TMKNOD: create a special file object request
+ * @P9_RMKNOD: create a special file object response
  * @P9_TRENAME: rename request
  * @P9_RRENAME: rename response
  * @P9_TVERSION: version handshake request
@@ -135,6 +137,8 @@ enum p9_msg_t {
 	P9_RSTATFS,
 	P9_TSYMLINK = 16,
 	P9_RSYMLINK,
+	P9_TMKNOD = 18,
+	P9_RMKNOD,
 	P9_TRENAME = 20,
 	P9_RRENAME,
 	P9_TGETATTR = 24,
diff --git a/include/net/9p/client.h b/include/net/9p/client.h
index 2e039730920..6e70358c71d 100644
--- a/include/net/9p/client.h
+++ b/include/net/9p/client.h
@@ -245,6 +245,8 @@ int p9_client_setattr(struct p9_fid *fid, struct p9_iattr_dotl *attr);
 struct p9_stat_dotl *p9_client_getattr_dotl(struct p9_fid *fid,
 							u64 request_mask);
 
+int p9_client_mknod_dotl(struct p9_fid *oldfid, char *name, int mode,
+			dev_t rdev, gid_t gid, struct p9_qid *);
 struct p9_req_t *p9_tag_lookup(struct p9_client *, u16);
 void p9_client_cb(struct p9_client *c, struct p9_req_t *req);
 
diff --git a/net/9p/client.c b/net/9p/client.c
index e37e64cb939..cdfbd674079 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -1622,3 +1622,34 @@ error:
 	return err;
 }
 EXPORT_SYMBOL(p9_client_readdir);
+
+int p9_client_mknod_dotl(struct p9_fid *fid, char *name, int mode,
+			dev_t rdev, gid_t gid, struct p9_qid *qid)
+{
+	int err;
+	struct p9_client *clnt;
+	struct p9_req_t *req;
+
+	err = 0;
+	clnt = fid->clnt;
+	P9_DPRINTK(P9_DEBUG_9P, ">>> TMKNOD fid %d name %s mode %d major %d "
+		"minor %d\n", fid->fid, name, mode, MAJOR(rdev), MINOR(rdev));
+	req = p9_client_rpc(clnt, P9_TMKNOD, "dsdddd", fid->fid, name, mode,
+		MAJOR(rdev), MINOR(rdev), gid);
+	if (IS_ERR(req))
+		return PTR_ERR(req);
+
+	err = p9pdu_readf(req->rc, clnt->proto_version, "Q", qid);
+	if (err) {
+		p9pdu_dump(1, req->rc);
+		goto error;
+	}
+	P9_DPRINTK(P9_DEBUG_9P, "<<< RMKNOD qid %x.%llx.%x\n", qid->type,
+				(unsigned long long)qid->path, qid->version);
+
+error:
+	p9_free_req(clnt, req);
+	return err;
+
+}
+EXPORT_SYMBOL(p9_client_mknod_dotl);
-- 
cgit v1.2.3-70-g09d2


From 01a622bd7409bb7af38e784cff814e5e723f7951 Mon Sep 17 00:00:00 2001
From: "M. Mohan Kumar" <mohan@in.ibm.com>
Date: Wed, 16 Jun 2010 14:27:22 +0530
Subject: 9p: Implement TMKDIR

Implement TMKDIR as part of 2000.L Work

Synopsis

    size[4] Tmkdir tag[2] fid[4] name[s] mode[4] gid[4]

    size[4] Rmkdir tag[2] qid[13]

Description

    mkdir asks the file server to create a directory with given name,
    mode and gid. The qid for the new directory is returned with
    the mkdir reply message.

Note: 72 is selected as the opcode for TMKDIR from the reserved list.

Signed-off-by: M. Mohan Kumar <mohan@in.ibm.com>
Signed-off-by: Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 fs/9p/vfs_inode.c       | 83 +++++++++++++++++++++++++++++++++++++++++++++++--
 include/net/9p/9p.h     |  4 +++
 include/net/9p/client.h |  2 ++
 net/9p/client.c         | 31 ++++++++++++++++++
 4 files changed, 117 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 4d9f45ec612..39dc7956732 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -731,6 +731,83 @@ static int v9fs_vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 	return err;
 }
 
+
+/**
+ * v9fs_vfs_mkdir_dotl - VFS mkdir hook to create a directory
+ * @dir:  inode that is being unlinked
+ * @dentry: dentry that is being unlinked
+ * @mode: mode for new directory
+ *
+ */
+
+static int v9fs_vfs_mkdir_dotl(struct inode *dir, struct dentry *dentry,
+					int mode)
+{
+	int err;
+	struct v9fs_session_info *v9ses;
+	struct p9_fid *fid = NULL, *dfid = NULL;
+	gid_t gid;
+	char *name;
+	struct inode *inode;
+	struct p9_qid qid;
+	struct dentry *dir_dentry;
+
+	P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", dentry->d_name.name);
+	err = 0;
+	v9ses = v9fs_inode2v9ses(dir);
+
+	mode |= S_IFDIR;
+	dir_dentry = v9fs_dentry_from_dir_inode(dir);
+	dfid = v9fs_fid_lookup(dir_dentry);
+	if (IS_ERR(dfid)) {
+		err = PTR_ERR(dfid);
+		P9_DPRINTK(P9_DEBUG_VFS, "fid lookup failed %d\n", err);
+		dfid = NULL;
+		goto error;
+	}
+
+	gid = v9fs_get_fsgid_for_create(dir);
+	if (gid < 0) {
+		P9_DPRINTK(P9_DEBUG_VFS, "v9fs_get_fsgid_for_create failed\n");
+		goto error;
+	}
+
+	name = (char *) dentry->d_name.name;
+	err = p9_client_mkdir_dotl(dfid, name, mode, gid, &qid);
+	if (err < 0)
+		goto error;
+
+	/* instantiate inode and assign the unopened fid to the dentry */
+	if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
+		fid = p9_client_walk(dfid, 1, &name, 1);
+		if (IS_ERR(fid)) {
+			err = PTR_ERR(fid);
+			P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n",
+				err);
+			fid = NULL;
+			goto error;
+		}
+
+		inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb);
+		if (IS_ERR(inode)) {
+			err = PTR_ERR(inode);
+			P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n",
+				err);
+			goto error;
+		}
+		dentry->d_op = &v9fs_cached_dentry_operations;
+		d_instantiate(dentry, inode);
+		err = v9fs_fid_add(dentry, fid);
+		if (err < 0)
+			goto error;
+		fid = NULL;
+	}
+error:
+	if (fid)
+		p9_client_clunk(fid);
+	return err;
+}
+
 /**
  * v9fs_vfs_lookup - VFS lookup hook to "walk" to a new inode
  * @dir:  inode that is being walked from
@@ -1641,7 +1718,7 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int mode,
 	struct inode *inode;
 	gid_t gid;
 	struct p9_qid qid;
-	struct dentry *dir_entry;
+	struct dentry *dir_dentry;
 
 	P9_DPRINTK(P9_DEBUG_VFS,
 		" %lu,%s mode: %x MAJOR: %u MINOR: %u\n", dir->i_ino,
@@ -1652,7 +1729,7 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int mode,
 
 	v9ses = v9fs_inode2v9ses(dir);
 	dir_dentry = v9fs_dentry_from_dir_inode(dir);
-	dfid = v9fs_fid_lookup(dir_entry);
+	dfid = v9fs_fid_lookup(dir_dentry);
 	if (IS_ERR(dfid)) {
 		err = PTR_ERR(dfid);
 		P9_DPRINTK(P9_DEBUG_VFS, "fid lookup failed %d\n", err);
@@ -1736,7 +1813,7 @@ static const struct inode_operations v9fs_dir_inode_operations_dotl = {
 	.link = v9fs_vfs_link_dotl,
 	.symlink = v9fs_vfs_symlink_dotl,
 	.unlink = v9fs_vfs_unlink,
-	.mkdir = v9fs_vfs_mkdir,
+	.mkdir = v9fs_vfs_mkdir_dotl,
 	.rmdir = v9fs_vfs_rmdir,
 	.mknod = v9fs_vfs_mknod_dotl,
 	.rename = v9fs_vfs_rename,
diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h
index ff32091d806..091b471d8f0 100644
--- a/include/net/9p/9p.h
+++ b/include/net/9p/9p.h
@@ -94,6 +94,8 @@ do { \
  * @P9_RMKNOD: create a special file object response
  * @P9_TRENAME: rename request
  * @P9_RRENAME: rename response
+ * @P9_TMKDIR: create a directory request
+ * @P9_RMKDIR: create a directory response
  * @P9_TVERSION: version handshake request
  * @P9_RVERSION: version handshake response
  * @P9_TAUTH: request to establish authentication channel
@@ -149,6 +151,8 @@ enum p9_msg_t {
 	P9_RREADDIR,
 	P9_TLINK = 70,
 	P9_RLINK,
+	P9_TMKDIR = 72,
+	P9_RMKDIR,
 	P9_TVERSION = 100,
 	P9_RVERSION,
 	P9_TAUTH = 102,
diff --git a/include/net/9p/client.h b/include/net/9p/client.h
index 6e70358c71d..55d913a9b79 100644
--- a/include/net/9p/client.h
+++ b/include/net/9p/client.h
@@ -247,6 +247,8 @@ struct p9_stat_dotl *p9_client_getattr_dotl(struct p9_fid *fid,
 
 int p9_client_mknod_dotl(struct p9_fid *oldfid, char *name, int mode,
 			dev_t rdev, gid_t gid, struct p9_qid *);
+int p9_client_mkdir_dotl(struct p9_fid *fid, char *name, int mode,
+				gid_t gid, struct p9_qid *);
 struct p9_req_t *p9_tag_lookup(struct p9_client *, u16);
 void p9_client_cb(struct p9_client *c, struct p9_req_t *req);
 
diff --git a/net/9p/client.c b/net/9p/client.c
index cdfbd674079..a3bdd341f2a 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -1653,3 +1653,34 @@ error:
 
 }
 EXPORT_SYMBOL(p9_client_mknod_dotl);
+
+int p9_client_mkdir_dotl(struct p9_fid *fid, char *name, int mode,
+				gid_t gid, struct p9_qid *qid)
+{
+	int err;
+	struct p9_client *clnt;
+	struct p9_req_t *req;
+
+	err = 0;
+	clnt = fid->clnt;
+	P9_DPRINTK(P9_DEBUG_9P, ">>> TMKDIR fid %d name %s mode %d gid %d\n",
+		 fid->fid, name, mode, gid);
+	req = p9_client_rpc(clnt, P9_TMKDIR, "dsdd", fid->fid, name, mode,
+		gid);
+	if (IS_ERR(req))
+		return PTR_ERR(req);
+
+	err = p9pdu_readf(req->rc, clnt->proto_version, "Q", qid);
+	if (err) {
+		p9pdu_dump(1, req->rc);
+		goto error;
+	}
+	P9_DPRINTK(P9_DEBUG_9P, "<<< RMKDIR qid %x.%llx.%x\n", qid->type,
+				(unsigned long long)qid->path, qid->version);
+
+error:
+	p9_free_req(clnt, req);
+	return err;
+
+}
+EXPORT_SYMBOL(p9_client_mkdir_dotl);
-- 
cgit v1.2.3-70-g09d2


From 5643135a28464e7c19d8d23a9e0804697a62c84b Mon Sep 17 00:00:00 2001
From: "Venkateswararao Jujjuri (JV)" <jvrao@linux.vnet.ibm.com>
Date: Thu, 17 Jun 2010 18:27:46 -0700
Subject: fs/9p: This patch implements TLCREATE for 9p2000.L protocol.

SYNOPSIS

    size[4] Tlcreate tag[2] fid[4] name[s] flags[4] mode[4] gid[4]

    size[4] Rlcreate tag[2] qid[13] iounit[4]

DESCRIPTION

The Tlreate request asks the file server to create a new regular file with the
name supplied, in the directory (dir) represented by fid.
The mode argument specifies the permissions to use. New file is created with
the uid if the fid and with supplied gid.

The flags argument represent Linux access mode flags with which the caller
is requesting to open the file with. Protocol allows all the Linux access
modes but it is upto the server to allow/disallow any of these acess modes.
If the server doesn't support any of the access mode, it is expected to
return error.

Signed-off-by: Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 fs/9p/vfs_inode.c       | 114 +++++++++++++++++++++++++++++++++++++++++++++++-
 include/net/9p/9p.h     |   4 ++
 include/net/9p/client.h |   2 +
 net/9p/client.c         |  44 +++++++++++++++++++
 4 files changed, 163 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 39dc7956732..2ac245902a4 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -641,6 +641,118 @@ error:
 	return ERR_PTR(err);
 }
 
+/**
+ * v9fs_vfs_create_dotl - VFS hook to create files for 9P2000.L protocol.
+ * @dir: directory inode that is being created
+ * @dentry:  dentry that is being deleted
+ * @mode: create permissions
+ * @nd: path information
+ *
+ */
+
+static int
+v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int mode,
+		struct nameidata *nd)
+{
+	int err = 0;
+	char *name = NULL;
+	gid_t gid;
+	int flags;
+	struct v9fs_session_info *v9ses;
+	struct p9_fid *fid = NULL;
+	struct p9_fid *dfid, *ofid;
+	struct file *filp;
+	struct p9_qid qid;
+	struct inode *inode;
+
+	v9ses = v9fs_inode2v9ses(dir);
+	if (nd && nd->flags & LOOKUP_OPEN)
+		flags = nd->intent.open.flags - 1;
+	else
+		flags = O_RDWR;
+
+	name = (char *) dentry->d_name.name;
+	P9_DPRINTK(P9_DEBUG_VFS, "v9fs_vfs_create_dotl: name:%s flags:0x%x "
+			"mode:0x%x\n", name, flags, mode);
+
+	dfid = v9fs_fid_lookup(dentry->d_parent);
+	if (IS_ERR(dfid)) {
+		err = PTR_ERR(dfid);
+		P9_DPRINTK(P9_DEBUG_VFS, "fid lookup failed %d\n", err);
+		return err;
+	}
+
+	/* clone a fid to use for creation */
+	ofid = p9_client_walk(dfid, 0, NULL, 1);
+	if (IS_ERR(ofid)) {
+		err = PTR_ERR(ofid);
+		P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err);
+		return err;
+	}
+
+	gid = v9fs_get_fsgid_for_create(dir);
+	err = p9_client_create_dotl(ofid, name, flags, mode, gid, &qid);
+	if (err < 0) {
+		P9_DPRINTK(P9_DEBUG_VFS,
+				"p9_client_open_dotl failed in creat %d\n",
+				err);
+		goto error;
+	}
+
+	/* No need to populate the inode if we are not opening the file AND
+	 * not in cached mode.
+	 */
+	if (!v9ses->cache && !(nd && nd->flags & LOOKUP_OPEN)) {
+		/* Not in cached mode. No need to populate inode with stat */
+		dentry->d_op = &v9fs_dentry_operations;
+		p9_client_clunk(ofid);
+		d_instantiate(dentry, NULL);
+		return 0;
+	}
+
+	/* Now walk from the parent so we can get an unopened fid. */
+	fid = p9_client_walk(dfid, 1, &name, 1);
+	if (IS_ERR(fid)) {
+		err = PTR_ERR(fid);
+		P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err);
+		fid = NULL;
+		goto error;
+	}
+
+	/* instantiate inode and assign the unopened fid to dentry */
+	inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb);
+	if (IS_ERR(inode)) {
+		err = PTR_ERR(inode);
+		P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err);
+		goto error;
+	}
+	dentry->d_op = &v9fs_cached_dentry_operations;
+	d_instantiate(dentry, inode);
+	err = v9fs_fid_add(dentry, fid);
+	if (err < 0)
+		goto error;
+
+	/* if we are opening a file, assign the open fid to the file */
+	if (nd && nd->flags & LOOKUP_OPEN) {
+		filp = lookup_instantiate_filp(nd, dentry, v9fs_open_created);
+		if (IS_ERR(filp)) {
+			p9_client_clunk(ofid);
+			return PTR_ERR(filp);
+		}
+		filp->private_data = ofid;
+	} else
+		p9_client_clunk(ofid);
+
+	return 0;
+
+error:
+	if (ofid)
+		p9_client_clunk(ofid);
+	if (fid)
+		p9_client_clunk(fid);
+	return err;
+}
+
 /**
  * v9fs_vfs_create - VFS hook to create files
  * @dir: directory inode that is being created
@@ -1808,7 +1920,7 @@ static const struct inode_operations v9fs_dir_inode_operations_dotu = {
 };
 
 static const struct inode_operations v9fs_dir_inode_operations_dotl = {
-	.create = v9fs_vfs_create,
+	.create = v9fs_vfs_create_dotl,
 	.lookup = v9fs_vfs_lookup,
 	.link = v9fs_vfs_link_dotl,
 	.symlink = v9fs_vfs_symlink_dotl,
diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h
index 091b471d8f0..06d111d6103 100644
--- a/include/net/9p/9p.h
+++ b/include/net/9p/9p.h
@@ -92,6 +92,8 @@ do { \
  * @P9_RSYMLINK: make symlink response
  * @P9_TMKNOD: create a special file object request
  * @P9_RMKNOD: create a special file object response
+ * @P9_TLCREATE: prepare a handle for I/O on an new file for 9P2000.L
+ * @P9_RLCREATE: response with file access information for 9P2000.L
  * @P9_TRENAME: rename request
  * @P9_RRENAME: rename response
  * @P9_TMKDIR: create a directory request
@@ -137,6 +139,8 @@ do { \
 enum p9_msg_t {
 	P9_TSTATFS = 8,
 	P9_RSTATFS,
+	P9_TLCREATE = 14,
+	P9_RLCREATE,
 	P9_TSYMLINK = 16,
 	P9_RSYMLINK,
 	P9_TMKNOD = 18,
diff --git a/include/net/9p/client.h b/include/net/9p/client.h
index 55d913a9b79..d755c0ed675 100644
--- a/include/net/9p/client.h
+++ b/include/net/9p/client.h
@@ -229,6 +229,8 @@ int p9_client_fcreate(struct p9_fid *fid, char *name, u32 perm, int mode,
 int p9_client_link(struct p9_fid *fid, struct p9_fid *oldfid, char *newname);
 int p9_client_symlink(struct p9_fid *fid, char *name, char *symname, gid_t gid,
 							struct p9_qid *qid);
+int p9_client_create_dotl(struct p9_fid *ofid, char *name, u32 flags, u32 mode,
+		gid_t gid, struct p9_qid *qid);
 int p9_client_clunk(struct p9_fid *fid);
 int p9_client_remove(struct p9_fid *fid);
 int p9_client_read(struct p9_fid *fid, char *data, char __user *udata,
diff --git a/net/9p/client.c b/net/9p/client.c
index a3bdd341f2a..e580409b105 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -1050,6 +1050,50 @@ error:
 }
 EXPORT_SYMBOL(p9_client_open);
 
+int p9_client_create_dotl(struct p9_fid *ofid, char *name, u32 flags, u32 mode,
+		gid_t gid, struct p9_qid *qid)
+{
+	int err = 0;
+	struct p9_client *clnt;
+	struct p9_req_t *req;
+	int iounit;
+
+	P9_DPRINTK(P9_DEBUG_9P,
+			">>> TLCREATE fid %d name %s flags %d mode %d gid %d\n",
+			ofid->fid, name, flags, mode, gid);
+	clnt = ofid->clnt;
+
+	if (ofid->mode != -1)
+		return -EINVAL;
+
+	req = p9_client_rpc(clnt, P9_TLCREATE, "dsddd", ofid->fid, name, flags,
+			mode, gid);
+	if (IS_ERR(req)) {
+		err = PTR_ERR(req);
+		goto error;
+	}
+
+	err = p9pdu_readf(req->rc, clnt->proto_version, "Qd", qid, &iounit);
+	if (err) {
+		p9pdu_dump(1, req->rc);
+		goto free_and_error;
+	}
+
+	P9_DPRINTK(P9_DEBUG_9P, "<<< RLCREATE qid %x.%llx.%x iounit %x\n",
+			qid->type,
+			(unsigned long long)qid->path,
+			qid->version, iounit);
+
+	ofid->mode = mode;
+	ofid->iounit = iounit;
+
+free_and_error:
+	p9_free_req(clnt, req);
+error:
+	return err;
+}
+EXPORT_SYMBOL(p9_client_create_dotl);
+
 int p9_client_fcreate(struct p9_fid *fid, char *name, u32 perm, int mode,
 		     char *extension)
 {
-- 
cgit v1.2.3-70-g09d2


From ef56547efa3c88609069e2a91f46e25c31dd536e Mon Sep 17 00:00:00 2001
From: "M. Mohan Kumar" <mohan@in.ibm.com>
Date: Tue, 22 Jun 2010 19:47:50 +0530
Subject: 9p: Implement LOPEN

Implement 9p2000.L version of open(LOPEN) interface in 9p client.

For LOPEN, no need to convert the flags to and from 9p mode to VFS mode.

Synopsis:

    size[4] Tlopen tag[2] fid[4] mode[4]

    size[4] Rlopen tag[2] qid[13] iounit[4]

[Fix mode bit format - jvrao@linux.vnet.ibm.com]

Signed-off-by: M. Mohan Kumar <mohan@in.ibm.com>
Signed-off-by: Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com>
Signed-off-by: Eric Van Hensbegren <ericvh@gmail.com>
---
 fs/9p/vfs_file.c    | 13 +++++++++----
 include/net/9p/9p.h |  2 ++
 net/9p/client.c     | 17 ++++++++++-------
 3 files changed, 21 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 2d686ec322a..e97c92bd6f1 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -59,9 +59,13 @@ int v9fs_file_open(struct inode *inode, struct file *file)
 	struct p9_fid *fid;
 	int omode;
 
-	P9_DPRINTK(P9_DEBUG_VFS, "inode: %p file: %p \n", inode, file);
+	P9_DPRINTK(P9_DEBUG_VFS, "inode: %p file: %p\n", inode, file);
 	v9ses = v9fs_inode2v9ses(inode);
-	omode = v9fs_uflags2omode(file->f_flags, v9fs_proto_dotu(v9ses));
+	if (v9fs_proto_dotl(v9ses))
+		omode = file->f_flags;
+	else
+		omode = v9fs_uflags2omode(file->f_flags,
+					v9fs_proto_dotu(v9ses));
 	fid = file->private_data;
 	if (!fid) {
 		fid = v9fs_fid_clone(file->f_path.dentry);
@@ -73,11 +77,12 @@ int v9fs_file_open(struct inode *inode, struct file *file)
 			p9_client_clunk(fid);
 			return err;
 		}
-		if (omode & P9_OTRUNC) {
+		if (file->f_flags & O_TRUNC) {
 			i_size_write(inode, 0);
 			inode->i_blocks = 0;
 		}
-		if ((file->f_flags & O_APPEND) && (!v9fs_proto_dotu(v9ses)))
+		if ((file->f_flags & O_APPEND) &&
+			(!v9fs_proto_dotu(v9ses) && !v9fs_proto_dotl(v9ses)))
 			generic_file_llseek(file, 0, SEEK_END);
 	}
 
diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h
index 06d111d6103..cf580a40e29 100644
--- a/include/net/9p/9p.h
+++ b/include/net/9p/9p.h
@@ -139,6 +139,8 @@ do { \
 enum p9_msg_t {
 	P9_TSTATFS = 8,
 	P9_RSTATFS,
+	P9_TLOPEN = 12,
+	P9_RLOPEN,
 	P9_TLCREATE = 14,
 	P9_RLCREATE,
 	P9_TSYMLINK = 16,
diff --git a/net/9p/client.c b/net/9p/client.c
index e580409b105..c458e042d38 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -1016,14 +1016,18 @@ int p9_client_open(struct p9_fid *fid, int mode)
 	struct p9_qid qid;
 	int iounit;
 
-	P9_DPRINTK(P9_DEBUG_9P, ">>> TOPEN fid %d mode %d\n", fid->fid, mode);
-	err = 0;
 	clnt = fid->clnt;
+	P9_DPRINTK(P9_DEBUG_9P, ">>> %s fid %d mode %d\n",
+		p9_is_proto_dotl(clnt) ? "TLOPEN" : "TOPEN", fid->fid, mode);
+	err = 0;
 
 	if (fid->mode != -1)
 		return -EINVAL;
 
-	req = p9_client_rpc(clnt, P9_TOPEN, "db", fid->fid, mode);
+	if (p9_is_proto_dotl(clnt))
+		req = p9_client_rpc(clnt, P9_TLOPEN, "dd", fid->fid, mode);
+	else
+		req = p9_client_rpc(clnt, P9_TOPEN, "db", fid->fid, mode);
 	if (IS_ERR(req)) {
 		err = PTR_ERR(req);
 		goto error;
@@ -1035,10 +1039,9 @@ int p9_client_open(struct p9_fid *fid, int mode)
 		goto free_and_error;
 	}
 
-	P9_DPRINTK(P9_DEBUG_9P, "<<< ROPEN qid %x.%llx.%x iounit %x\n",
-				qid.type,
-				(unsigned long long)qid.path,
-				qid.version, iounit);
+	P9_DPRINTK(P9_DEBUG_9P, "<<< %s qid %x.%llx.%x iounit %x\n",
+		p9_is_proto_dotl(clnt) ? "RLOPEN" : "ROPEN",  qid.type,
+		(unsigned long long)qid.path, qid.version, iounit);
 
 	fid->mode = mode;
 	fid->iounit = iounit;
-- 
cgit v1.2.3-70-g09d2


From 0ef63f345c48afe5896c5cffcba57f0457d409b9 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Mon, 31 May 2010 13:22:45 +0530
Subject: net/9p: Implement attrwalk 9p call

TXATTRWALK: Descend a ATTR namespace

 size[4] TXATTRWALK tag[2] fid[4] newfid[4] name[s]
 size[4] RXATTRWALK tag[2] size[8]

txattrwalk gets a fid pointing to xattr. This fid can later be
used to read the xattr value. If name is NULL the fid returned
can be used to get the list of extended attribute associated to
the file system object.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 include/net/9p/9p.h     |  2 ++
 include/net/9p/client.h |  1 +
 net/9p/client.c         | 50 +++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 53 insertions(+)

(limited to 'include')

diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h
index cf580a40e29..6fabb5e559b 100644
--- a/include/net/9p/9p.h
+++ b/include/net/9p/9p.h
@@ -153,6 +153,8 @@ enum p9_msg_t {
 	P9_RGETATTR,
 	P9_TSETATTR = 26,
 	P9_RSETATTR,
+	P9_TXATTRWALK = 30,
+	P9_RXATTRWALK,
 	P9_TREADDIR = 40,
 	P9_RREADDIR,
 	P9_TLINK = 70,
diff --git a/include/net/9p/client.h b/include/net/9p/client.h
index d755c0ed675..60398b1a3f7 100644
--- a/include/net/9p/client.h
+++ b/include/net/9p/client.h
@@ -260,5 +260,6 @@ void p9stat_free(struct p9_wstat *);
 
 int p9_is_proto_dotu(struct p9_client *clnt);
 int p9_is_proto_dotl(struct p9_client *clnt);
+struct p9_fid *p9_client_xattrwalk(struct p9_fid *, const char *, u64 *);
 
 #endif /* NET_9P_CLIENT_H */
diff --git a/net/9p/client.c b/net/9p/client.c
index c458e042d38..ec80ee71d45 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -1622,6 +1622,56 @@ error:
 }
 EXPORT_SYMBOL(p9_client_rename);
 
+/*
+ * An xattrwalk without @attr_name gives the fid for the lisxattr namespace
+ */
+struct p9_fid *p9_client_xattrwalk(struct p9_fid *file_fid,
+				const char *attr_name, u64 *attr_size)
+{
+	int err;
+	struct p9_req_t *req;
+	struct p9_client *clnt;
+	struct p9_fid *attr_fid;
+
+	err = 0;
+	clnt = file_fid->clnt;
+	attr_fid = p9_fid_create(clnt);
+	if (IS_ERR(attr_fid)) {
+		err = PTR_ERR(attr_fid);
+		attr_fid = NULL;
+		goto error;
+	}
+	P9_DPRINTK(P9_DEBUG_9P,
+		">>> TXATTRWALK file_fid %d, attr_fid %d name %s\n",
+		file_fid->fid, attr_fid->fid, attr_name);
+
+	req = p9_client_rpc(clnt, P9_TXATTRWALK, "dds",
+			file_fid->fid, attr_fid->fid, attr_name);
+	if (IS_ERR(req)) {
+		err = PTR_ERR(req);
+		goto error;
+	}
+	err = p9pdu_readf(req->rc, clnt->proto_version, "q", attr_size);
+	if (err) {
+		p9pdu_dump(1, req->rc);
+		p9_free_req(clnt, req);
+		goto clunk_fid;
+	}
+	p9_free_req(clnt, req);
+	P9_DPRINTK(P9_DEBUG_9P, "<<<  RXATTRWALK fid %d size %llu\n",
+		attr_fid->fid, *attr_size);
+	return attr_fid;
+clunk_fid:
+	p9_client_clunk(attr_fid);
+	attr_fid = NULL;
+error:
+	if (attr_fid && (attr_fid != file_fid))
+		p9_fid_destroy(attr_fid);
+
+	return ERR_PTR(err);
+}
+EXPORT_SYMBOL_GPL(p9_client_xattrwalk);
+
 int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset)
 {
 	int err, rsize, total;
-- 
cgit v1.2.3-70-g09d2


From eda25e46161527845572131b37706a458d9270ef Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Mon, 31 May 2010 13:22:50 +0530
Subject: net/9p: Implement TXATTRCREATE 9p call

TXATTRCREATE:  Prepare a fid for setting xattr value on a file system object.

 size[4] TXATTRCREATE tag[2] fid[4] name[s] attr_size[8] flags[4]
 size[4] RXATTRCREATE tag[2]

txattrcreate gets a fid pointing to xattr. This fid can later be
used to set the xattr value.

flag value is derived from set Linux setxattr. The manpage says
"The flags parameter can be used to refine the semantics of the operation.
XATTR_CREATE specifies a pure create, which fails if the named attribute
exists already. XATTR_REPLACE specifies a pure replace operation, which
fails if the named attribute does not already exist. By default (no flags),
the extended attribute will be created if need be, or will simply replace
the value if the attribute exists."

The actual setxattr operation happens when the fid is clunked. At that point
the written byte count and the attr_size specified in TXATTRCREATE should be
same otherwise an error will be returned.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 include/net/9p/9p.h     |  2 ++
 include/net/9p/client.h |  1 +
 net/9p/client.c         | 25 +++++++++++++++++++++++++
 3 files changed, 28 insertions(+)

(limited to 'include')

diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h
index 6fabb5e559b..a8de812ccbc 100644
--- a/include/net/9p/9p.h
+++ b/include/net/9p/9p.h
@@ -155,6 +155,8 @@ enum p9_msg_t {
 	P9_RSETATTR,
 	P9_TXATTRWALK = 30,
 	P9_RXATTRWALK,
+	P9_TXATTRCREATE = 32,
+	P9_RXATTRCREATE,
 	P9_TREADDIR = 40,
 	P9_RREADDIR,
 	P9_TLINK = 70,
diff --git a/include/net/9p/client.h b/include/net/9p/client.h
index 60398b1a3f7..d1aa2cfb30f 100644
--- a/include/net/9p/client.h
+++ b/include/net/9p/client.h
@@ -261,5 +261,6 @@ void p9stat_free(struct p9_wstat *);
 int p9_is_proto_dotu(struct p9_client *clnt);
 int p9_is_proto_dotl(struct p9_client *clnt);
 struct p9_fid *p9_client_xattrwalk(struct p9_fid *, const char *, u64 *);
+int p9_client_xattrcreate(struct p9_fid *, const char *, u64, int);
 
 #endif /* NET_9P_CLIENT_H */
diff --git a/net/9p/client.c b/net/9p/client.c
index ec80ee71d45..43396acd714 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -1672,6 +1672,31 @@ error:
 }
 EXPORT_SYMBOL_GPL(p9_client_xattrwalk);
 
+int p9_client_xattrcreate(struct p9_fid *fid, const char *name,
+			u64 attr_size, int flags)
+{
+	int err;
+	struct p9_req_t *req;
+	struct p9_client *clnt;
+
+	P9_DPRINTK(P9_DEBUG_9P,
+		">>> TXATTRCREATE fid %d name  %s size %lld flag %d\n",
+		fid->fid, name, (long long)attr_size, flags);
+	err = 0;
+	clnt = fid->clnt;
+	req = p9_client_rpc(clnt, P9_TXATTRCREATE, "dsqd",
+			fid->fid, name, attr_size, flags);
+	if (IS_ERR(req)) {
+		err = PTR_ERR(req);
+		goto error;
+	}
+	P9_DPRINTK(P9_DEBUG_9P, "<<< RXATTRCREATE fid %d\n", fid->fid);
+	p9_free_req(clnt, req);
+error:
+	return err;
+}
+EXPORT_SYMBOL_GPL(p9_client_xattrcreate);
+
 int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset)
 {
 	int err, rsize, total;
-- 
cgit v1.2.3-70-g09d2


From 5690085e7ba7f3081c6ab6db3a3b543444ad8a21 Mon Sep 17 00:00:00 2001
From: Jarod Wilson <jarod@redhat.com>
Date: Fri, 16 Jul 2010 14:25:33 -0300
Subject: V4L/DVB: IR/lirc: make lirc userspace and staging modules buildable

The lirc userspace needs all the current ioctls defined, and we need to
put the header files in places out-of-tree and/or staging lirc drivers
(which I plan to prep soon) can easily build with. I've actually tested this
in a tree w/all the lirc drivers queued up to be submitted for staging. I'm
also reasonably sure that Andy Walls is going to need most of the ioctls
anyway for his cx23888 IR driver work.

Signed-off-by: Jarod Wilson <jarod@redhat.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/IR/ir-lirc-codec.c |   2 +-
 drivers/media/IR/lirc_dev.c      |   2 +-
 drivers/media/IR/lirc_dev.h      | 225 ---------------------------------------
 include/media/lirc.h             |  34 +++---
 include/media/lirc_dev.h         | 225 +++++++++++++++++++++++++++++++++++++++
 5 files changed, 245 insertions(+), 243 deletions(-)
 delete mode 100644 drivers/media/IR/lirc_dev.h
 create mode 100644 include/media/lirc_dev.h

(limited to 'include')

diff --git a/drivers/media/IR/ir-lirc-codec.c b/drivers/media/IR/ir-lirc-codec.c
index 178bc5baab7..afb1ada36c7 100644
--- a/drivers/media/IR/ir-lirc-codec.c
+++ b/drivers/media/IR/ir-lirc-codec.c
@@ -15,9 +15,9 @@
 #include <linux/sched.h>
 #include <linux/wait.h>
 #include <media/lirc.h>
+#include <media/lirc_dev.h>
 #include <media/ir-core.h>
 #include "ir-core-priv.h"
-#include "lirc_dev.h"
 
 #define LIRCBUF_SIZE 256
 
diff --git a/drivers/media/IR/lirc_dev.c b/drivers/media/IR/lirc_dev.c
index c11b8f70625..899891bec35 100644
--- a/drivers/media/IR/lirc_dev.c
+++ b/drivers/media/IR/lirc_dev.c
@@ -37,7 +37,7 @@
 #include <linux/cdev.h>
 
 #include <media/lirc.h>
-#include "lirc_dev.h"
+#include <media/lirc_dev.h>
 
 static int debug;
 
diff --git a/drivers/media/IR/lirc_dev.h b/drivers/media/IR/lirc_dev.h
deleted file mode 100644
index b1f60663cb3..00000000000
--- a/drivers/media/IR/lirc_dev.h
+++ /dev/null
@@ -1,225 +0,0 @@
-/*
- * LIRC base driver
- *
- * by Artur Lipowski <alipowski@interia.pl>
- *        This code is licensed under GNU GPL
- *
- */
-
-#ifndef _LINUX_LIRC_DEV_H
-#define _LINUX_LIRC_DEV_H
-
-#define MAX_IRCTL_DEVICES 4
-#define BUFLEN            16
-
-#define mod(n, div) ((n) % (div))
-
-#include <linux/slab.h>
-#include <linux/fs.h>
-#include <linux/ioctl.h>
-#include <linux/poll.h>
-#include <linux/kfifo.h>
-#include <media/lirc.h>
-
-struct lirc_buffer {
-	wait_queue_head_t wait_poll;
-	spinlock_t fifo_lock;
-	unsigned int chunk_size;
-	unsigned int size; /* in chunks */
-	/* Using chunks instead of bytes pretends to simplify boundary checking
-	 * And should allow for some performance fine tunning later */
-	struct kfifo fifo;
-	u8 fifo_initialized;
-};
-
-static inline void lirc_buffer_clear(struct lirc_buffer *buf)
-{
-	unsigned long flags;
-
-	if (buf->fifo_initialized) {
-		spin_lock_irqsave(&buf->fifo_lock, flags);
-		kfifo_reset(&buf->fifo);
-		spin_unlock_irqrestore(&buf->fifo_lock, flags);
-	} else
-		WARN(1, "calling %s on an uninitialized lirc_buffer\n",
-		     __func__);
-}
-
-static inline int lirc_buffer_init(struct lirc_buffer *buf,
-				    unsigned int chunk_size,
-				    unsigned int size)
-{
-	int ret;
-
-	init_waitqueue_head(&buf->wait_poll);
-	spin_lock_init(&buf->fifo_lock);
-	buf->chunk_size = chunk_size;
-	buf->size = size;
-	ret = kfifo_alloc(&buf->fifo, size * chunk_size, GFP_KERNEL);
-	if (ret == 0)
-		buf->fifo_initialized = 1;
-
-	return ret;
-}
-
-static inline void lirc_buffer_free(struct lirc_buffer *buf)
-{
-	if (buf->fifo_initialized) {
-		kfifo_free(&buf->fifo);
-		buf->fifo_initialized = 0;
-	} else
-		WARN(1, "calling %s on an uninitialized lirc_buffer\n",
-		     __func__);
-}
-
-static inline int lirc_buffer_len(struct lirc_buffer *buf)
-{
-	int len;
-	unsigned long flags;
-
-	spin_lock_irqsave(&buf->fifo_lock, flags);
-	len = kfifo_len(&buf->fifo);
-	spin_unlock_irqrestore(&buf->fifo_lock, flags);
-
-	return len;
-}
-
-static inline int lirc_buffer_full(struct lirc_buffer *buf)
-{
-	return lirc_buffer_len(buf) == buf->size * buf->chunk_size;
-}
-
-static inline int lirc_buffer_empty(struct lirc_buffer *buf)
-{
-	return !lirc_buffer_len(buf);
-}
-
-static inline int lirc_buffer_available(struct lirc_buffer *buf)
-{
-	return buf->size - (lirc_buffer_len(buf) / buf->chunk_size);
-}
-
-static inline unsigned int lirc_buffer_read(struct lirc_buffer *buf,
-					    unsigned char *dest)
-{
-	unsigned int ret = 0;
-
-	if (lirc_buffer_len(buf) >= buf->chunk_size)
-		ret = kfifo_out_locked(&buf->fifo, dest, buf->chunk_size,
-				       &buf->fifo_lock);
-	return ret;
-
-}
-
-static inline unsigned int lirc_buffer_write(struct lirc_buffer *buf,
-					     unsigned char *orig)
-{
-	unsigned int ret;
-
-	ret = kfifo_in_locked(&buf->fifo, orig, buf->chunk_size,
-			      &buf->fifo_lock);
-
-	return ret;
-}
-
-struct lirc_driver {
-	char name[40];
-	int minor;
-	unsigned long code_length;
-	unsigned int buffer_size; /* in chunks holding one code each */
-	int sample_rate;
-	unsigned long features;
-
-	unsigned int chunk_size;
-
-	void *data;
-	int min_timeout;
-	int max_timeout;
-	int (*add_to_buf) (void *data, struct lirc_buffer *buf);
-	struct lirc_buffer *rbuf;
-	int (*set_use_inc) (void *data);
-	void (*set_use_dec) (void *data);
-	struct file_operations *fops;
-	struct device *dev;
-	struct module *owner;
-};
-
-/* name:
- * this string will be used for logs
- *
- * minor:
- * indicates minor device (/dev/lirc) number for registered driver
- * if caller fills it with negative value, then the first free minor
- * number will be used (if available)
- *
- * code_length:
- * length of the remote control key code expressed in bits
- *
- * sample_rate:
- *
- * data:
- * it may point to any driver data and this pointer will be passed to
- * all callback functions
- *
- * add_to_buf:
- * add_to_buf will be called after specified period of the time or
- * triggered by the external event, this behavior depends on value of
- * the sample_rate this function will be called in user context. This
- * routine should return 0 if data was added to the buffer and
- * -ENODATA if none was available. This should add some number of bits
- * evenly divisible by code_length to the buffer
- *
- * rbuf:
- * if not NULL, it will be used as a read buffer, you will have to
- * write to the buffer by other means, like irq's (see also
- * lirc_serial.c).
- *
- * set_use_inc:
- * set_use_inc will be called after device is opened
- *
- * set_use_dec:
- * set_use_dec will be called after device is closed
- *
- * fops:
- * file_operations for drivers which don't fit the current driver model.
- *
- * Some ioctl's can be directly handled by lirc_dev if the driver's
- * ioctl function is NULL or if it returns -ENOIOCTLCMD (see also
- * lirc_serial.c).
- *
- * owner:
- * the module owning this struct
- *
- */
-
-
-/* following functions can be called ONLY from user context
- *
- * returns negative value on error or minor number
- * of the registered device if success
- * contents of the structure pointed by p is copied
- */
-extern int lirc_register_driver(struct lirc_driver *d);
-
-/* returns negative value on error or 0 if success
-*/
-extern int lirc_unregister_driver(int minor);
-
-/* Returns the private data stored in the lirc_driver
- * associated with the given device file pointer.
- */
-void *lirc_get_pdata(struct file *file);
-
-/* default file operations
- * used by drivers if they override only some operations
- */
-int lirc_dev_fop_open(struct inode *inode, struct file *file);
-int lirc_dev_fop_close(struct inode *inode, struct file *file);
-unsigned int lirc_dev_fop_poll(struct file *file, poll_table *wait);
-long lirc_dev_fop_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
-ssize_t lirc_dev_fop_read(struct file *file, char *buffer, size_t length,
-			  loff_t *ppos);
-ssize_t lirc_dev_fop_write(struct file *file, const char *buffer, size_t length,
-			   loff_t *ppos);
-
-#endif
diff --git a/include/media/lirc.h b/include/media/lirc.h
index 8dffd4f47bf..42c467c5051 100644
--- a/include/media/lirc.h
+++ b/include/media/lirc.h
@@ -1,6 +1,6 @@
 /*
  * lirc.h - linux infrared remote control header file
- * last modified 2010/06/03 by Jarod Wilson
+ * last modified 2010/07/13 by Jarod Wilson
  */
 
 #ifndef _LINUX_LIRC_H
@@ -33,6 +33,9 @@
 #define LIRC_IS_FREQUENCY(val) (LIRC_MODE2(val) == LIRC_MODE2_FREQUENCY)
 #define LIRC_IS_TIMEOUT(val) (LIRC_MODE2(val) == LIRC_MODE2_TIMEOUT)
 
+/* used heavily by lirc userspace */
+#define lirc_t int
+
 /*** lirc compatible hardware features ***/
 
 #define LIRC_MODE2SEND(x) (x)
@@ -95,12 +98,10 @@
 #define LIRC_GET_MIN_TIMEOUT           _IOR('i', 0x00000008, __u32)
 #define LIRC_GET_MAX_TIMEOUT           _IOR('i', 0x00000009, __u32)
 
-#if 0	/* these ioctls are not used at the moment */
 #define LIRC_GET_MIN_FILTER_PULSE      _IOR('i', 0x0000000a, __u32)
 #define LIRC_GET_MAX_FILTER_PULSE      _IOR('i', 0x0000000b, __u32)
 #define LIRC_GET_MIN_FILTER_SPACE      _IOR('i', 0x0000000c, __u32)
 #define LIRC_GET_MAX_FILTER_SPACE      _IOR('i', 0x0000000d, __u32)
-#endif
 
 /* code length in bits, currently only for LIRC_MODE_LIRCCODE */
 #define LIRC_GET_LENGTH                _IOR('i', 0x0000000f, __u32)
@@ -121,23 +122,30 @@
  */
 #define LIRC_SET_REC_TIMEOUT           _IOW('i', 0x00000018, __u32)
 
-#if 0	/* these ioctls are not used at the moment */
+/* 1 enables, 0 disables timeout reports in MODE2 */
+#define LIRC_SET_REC_TIMEOUT_REPORTS   _IOW('i', 0x00000019, __u32)
+
 /*
  * pulses shorter than this are filtered out by hardware (software
  * emulation in lirc_dev?)
  */
-#define LIRC_SET_REC_FILTER_PULSE      _IOW('i', 0x00000019, __u32)
+#define LIRC_SET_REC_FILTER_PULSE      _IOW('i', 0x0000001a, __u32)
 /*
  * spaces shorter than this are filtered out by hardware (software
  * emulation in lirc_dev?)
  */
-#define LIRC_SET_REC_FILTER_SPACE      _IOW('i', 0x0000001a, __u32)
+#define LIRC_SET_REC_FILTER_SPACE      _IOW('i', 0x0000001b, __u32)
 /*
  * if filter cannot be set independantly for pulse/space, this should
  * be used
  */
-#define LIRC_SET_REC_FILTER            _IOW('i', 0x0000001b, __u32)
-#endif
+#define LIRC_SET_REC_FILTER            _IOW('i', 0x0000001c, __u32)
+
+/*
+ * if enabled from the next key press on the driver will send
+ * LIRC_MODE2_FREQUENCY packets
+ */
+#define LIRC_SET_MEASURE_CARRIER_MODE  _IOW('i', 0x0000001d, __u32)
 
 /*
  * to set a range use
@@ -151,13 +159,7 @@
 
 #define LIRC_NOTIFY_DECODE             _IO('i', 0x00000020)
 
-#if 0	/* these ioctls are not used at the moment */
-/*
- * from the next key press on the driver will send
- * LIRC_MODE2_FREQUENCY packets
- */
-#define LIRC_MEASURE_CARRIER_ENABLE    _IO('i', 0x00000021)
-#define LIRC_MEASURE_CARRIER_DISABLE   _IO('i', 0x00000022)
-#endif
+#define LIRC_SETUP_START               _IO('i', 0x00000021)
+#define LIRC_SETUP_END                 _IO('i', 0x00000022)
 
 #endif
diff --git a/include/media/lirc_dev.h b/include/media/lirc_dev.h
new file mode 100644
index 00000000000..b1f60663cb3
--- /dev/null
+++ b/include/media/lirc_dev.h
@@ -0,0 +1,225 @@
+/*
+ * LIRC base driver
+ *
+ * by Artur Lipowski <alipowski@interia.pl>
+ *        This code is licensed under GNU GPL
+ *
+ */
+
+#ifndef _LINUX_LIRC_DEV_H
+#define _LINUX_LIRC_DEV_H
+
+#define MAX_IRCTL_DEVICES 4
+#define BUFLEN            16
+
+#define mod(n, div) ((n) % (div))
+
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/ioctl.h>
+#include <linux/poll.h>
+#include <linux/kfifo.h>
+#include <media/lirc.h>
+
+struct lirc_buffer {
+	wait_queue_head_t wait_poll;
+	spinlock_t fifo_lock;
+	unsigned int chunk_size;
+	unsigned int size; /* in chunks */
+	/* Using chunks instead of bytes pretends to simplify boundary checking
+	 * And should allow for some performance fine tunning later */
+	struct kfifo fifo;
+	u8 fifo_initialized;
+};
+
+static inline void lirc_buffer_clear(struct lirc_buffer *buf)
+{
+	unsigned long flags;
+
+	if (buf->fifo_initialized) {
+		spin_lock_irqsave(&buf->fifo_lock, flags);
+		kfifo_reset(&buf->fifo);
+		spin_unlock_irqrestore(&buf->fifo_lock, flags);
+	} else
+		WARN(1, "calling %s on an uninitialized lirc_buffer\n",
+		     __func__);
+}
+
+static inline int lirc_buffer_init(struct lirc_buffer *buf,
+				    unsigned int chunk_size,
+				    unsigned int size)
+{
+	int ret;
+
+	init_waitqueue_head(&buf->wait_poll);
+	spin_lock_init(&buf->fifo_lock);
+	buf->chunk_size = chunk_size;
+	buf->size = size;
+	ret = kfifo_alloc(&buf->fifo, size * chunk_size, GFP_KERNEL);
+	if (ret == 0)
+		buf->fifo_initialized = 1;
+
+	return ret;
+}
+
+static inline void lirc_buffer_free(struct lirc_buffer *buf)
+{
+	if (buf->fifo_initialized) {
+		kfifo_free(&buf->fifo);
+		buf->fifo_initialized = 0;
+	} else
+		WARN(1, "calling %s on an uninitialized lirc_buffer\n",
+		     __func__);
+}
+
+static inline int lirc_buffer_len(struct lirc_buffer *buf)
+{
+	int len;
+	unsigned long flags;
+
+	spin_lock_irqsave(&buf->fifo_lock, flags);
+	len = kfifo_len(&buf->fifo);
+	spin_unlock_irqrestore(&buf->fifo_lock, flags);
+
+	return len;
+}
+
+static inline int lirc_buffer_full(struct lirc_buffer *buf)
+{
+	return lirc_buffer_len(buf) == buf->size * buf->chunk_size;
+}
+
+static inline int lirc_buffer_empty(struct lirc_buffer *buf)
+{
+	return !lirc_buffer_len(buf);
+}
+
+static inline int lirc_buffer_available(struct lirc_buffer *buf)
+{
+	return buf->size - (lirc_buffer_len(buf) / buf->chunk_size);
+}
+
+static inline unsigned int lirc_buffer_read(struct lirc_buffer *buf,
+					    unsigned char *dest)
+{
+	unsigned int ret = 0;
+
+	if (lirc_buffer_len(buf) >= buf->chunk_size)
+		ret = kfifo_out_locked(&buf->fifo, dest, buf->chunk_size,
+				       &buf->fifo_lock);
+	return ret;
+
+}
+
+static inline unsigned int lirc_buffer_write(struct lirc_buffer *buf,
+					     unsigned char *orig)
+{
+	unsigned int ret;
+
+	ret = kfifo_in_locked(&buf->fifo, orig, buf->chunk_size,
+			      &buf->fifo_lock);
+
+	return ret;
+}
+
+struct lirc_driver {
+	char name[40];
+	int minor;
+	unsigned long code_length;
+	unsigned int buffer_size; /* in chunks holding one code each */
+	int sample_rate;
+	unsigned long features;
+
+	unsigned int chunk_size;
+
+	void *data;
+	int min_timeout;
+	int max_timeout;
+	int (*add_to_buf) (void *data, struct lirc_buffer *buf);
+	struct lirc_buffer *rbuf;
+	int (*set_use_inc) (void *data);
+	void (*set_use_dec) (void *data);
+	struct file_operations *fops;
+	struct device *dev;
+	struct module *owner;
+};
+
+/* name:
+ * this string will be used for logs
+ *
+ * minor:
+ * indicates minor device (/dev/lirc) number for registered driver
+ * if caller fills it with negative value, then the first free minor
+ * number will be used (if available)
+ *
+ * code_length:
+ * length of the remote control key code expressed in bits
+ *
+ * sample_rate:
+ *
+ * data:
+ * it may point to any driver data and this pointer will be passed to
+ * all callback functions
+ *
+ * add_to_buf:
+ * add_to_buf will be called after specified period of the time or
+ * triggered by the external event, this behavior depends on value of
+ * the sample_rate this function will be called in user context. This
+ * routine should return 0 if data was added to the buffer and
+ * -ENODATA if none was available. This should add some number of bits
+ * evenly divisible by code_length to the buffer
+ *
+ * rbuf:
+ * if not NULL, it will be used as a read buffer, you will have to
+ * write to the buffer by other means, like irq's (see also
+ * lirc_serial.c).
+ *
+ * set_use_inc:
+ * set_use_inc will be called after device is opened
+ *
+ * set_use_dec:
+ * set_use_dec will be called after device is closed
+ *
+ * fops:
+ * file_operations for drivers which don't fit the current driver model.
+ *
+ * Some ioctl's can be directly handled by lirc_dev if the driver's
+ * ioctl function is NULL or if it returns -ENOIOCTLCMD (see also
+ * lirc_serial.c).
+ *
+ * owner:
+ * the module owning this struct
+ *
+ */
+
+
+/* following functions can be called ONLY from user context
+ *
+ * returns negative value on error or minor number
+ * of the registered device if success
+ * contents of the structure pointed by p is copied
+ */
+extern int lirc_register_driver(struct lirc_driver *d);
+
+/* returns negative value on error or 0 if success
+*/
+extern int lirc_unregister_driver(int minor);
+
+/* Returns the private data stored in the lirc_driver
+ * associated with the given device file pointer.
+ */
+void *lirc_get_pdata(struct file *file);
+
+/* default file operations
+ * used by drivers if they override only some operations
+ */
+int lirc_dev_fop_open(struct inode *inode, struct file *file);
+int lirc_dev_fop_close(struct inode *inode, struct file *file);
+unsigned int lirc_dev_fop_poll(struct file *file, poll_table *wait);
+long lirc_dev_fop_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
+ssize_t lirc_dev_fop_read(struct file *file, char *buffer, size_t length,
+			  loff_t *ppos);
+ssize_t lirc_dev_fop_write(struct file *file, const char *buffer, size_t length,
+			   loff_t *ppos);
+
+#endif
-- 
cgit v1.2.3-70-g09d2


From bbafc0cb6c52c40647f561854db5fbac4d608186 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Date: Sat, 10 Jul 2010 15:03:20 -0300
Subject: V4L/DVB: uvc: Move constants and structures definitions to
 linux/usb/video.h

The UVC host and gadget drivers both define constants and structures in
private header files. Move all those definitions to linux/usb/video.h
where they can be shared by the two drivers (and be available for
userspace applications).

Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/uvc/uvcvideo.h |  19 --
 drivers/usb/gadget/f_uvc.c         |  16 +-
 drivers/usb/gadget/f_uvc.h         | 352 +-------------------------------
 drivers/usb/gadget/uvc.h           |  36 ----
 drivers/usb/gadget/webcam.c        |  24 +--
 include/linux/usb/video.h          | 397 +++++++++++++++++++++++++++++++++++++
 6 files changed, 418 insertions(+), 426 deletions(-)

(limited to 'include')

diff --git a/drivers/media/video/uvc/uvcvideo.h b/drivers/media/video/uvc/uvcvideo.h
index 47b20e7e378..ac272456fbf 100644
--- a/drivers/media/video/uvc/uvcvideo.h
+++ b/drivers/media/video/uvc/uvcvideo.h
@@ -196,25 +196,6 @@ struct uvc_device;
 /* TODO: Put the most frequently accessed fields at the beginning of
  * structures to maximize cache efficiency.
  */
-struct uvc_streaming_control {
-	__u16 bmHint;
-	__u8  bFormatIndex;
-	__u8  bFrameIndex;
-	__u32 dwFrameInterval;
-	__u16 wKeyFrameRate;
-	__u16 wPFrameRate;
-	__u16 wCompQuality;
-	__u16 wCompWindowSize;
-	__u16 wDelay;
-	__u32 dwMaxVideoFrameSize;
-	__u32 dwMaxPayloadTransferSize;
-	__u32 dwClockFrequency;
-	__u8  bmFramingInfo;
-	__u8  bPreferedVersion;
-	__u8  bMinVersion;
-	__u8  bMaxVersion;
-};
-
 struct uvc_control_info {
 	struct list_head list;
 	struct list_head mappings;
diff --git a/drivers/usb/gadget/f_uvc.c b/drivers/usb/gadget/f_uvc.c
index dbe6db0184f..be446b7e7ea 100644
--- a/drivers/usb/gadget/f_uvc.c
+++ b/drivers/usb/gadget/f_uvc.c
@@ -61,12 +61,12 @@ static struct usb_gadget_strings *uvc_function_strings[] = {
 #define UVC_INTF_VIDEO_STREAMING		1
 
 static struct usb_interface_assoc_descriptor uvc_iad __initdata = {
-	.bLength		= USB_DT_INTERFACE_ASSOCIATION_SIZE,
+	.bLength		= sizeof(uvc_iad),
 	.bDescriptorType	= USB_DT_INTERFACE_ASSOCIATION,
 	.bFirstInterface	= 0,
 	.bInterfaceCount	= 2,
 	.bFunctionClass		= USB_CLASS_VIDEO,
-	.bFunctionSubClass	= 0x03,
+	.bFunctionSubClass	= UVC_SC_VIDEO_INTERFACE_COLLECTION,
 	.bFunctionProtocol	= 0x00,
 	.iFunction		= 0,
 };
@@ -78,7 +78,7 @@ static struct usb_interface_descriptor uvc_control_intf __initdata = {
 	.bAlternateSetting	= 0,
 	.bNumEndpoints		= 1,
 	.bInterfaceClass	= USB_CLASS_VIDEO,
-	.bInterfaceSubClass	= 0x01,
+	.bInterfaceSubClass	= UVC_SC_VIDEOCONTROL,
 	.bInterfaceProtocol	= 0x00,
 	.iInterface		= 0,
 };
@@ -106,7 +106,7 @@ static struct usb_interface_descriptor uvc_streaming_intf_alt0 __initdata = {
 	.bAlternateSetting	= 0,
 	.bNumEndpoints		= 0,
 	.bInterfaceClass	= USB_CLASS_VIDEO,
-	.bInterfaceSubClass	= 0x02,
+	.bInterfaceSubClass	= UVC_SC_VIDEOSTREAMING,
 	.bInterfaceProtocol	= 0x00,
 	.iInterface		= 0,
 };
@@ -118,7 +118,7 @@ static struct usb_interface_descriptor uvc_streaming_intf_alt1 __initdata = {
 	.bAlternateSetting	= 1,
 	.bNumEndpoints		= 1,
 	.bInterfaceClass	= USB_CLASS_VIDEO,
-	.bInterfaceSubClass	= 0x02,
+	.bInterfaceSubClass	= UVC_SC_VIDEOSTREAMING,
 	.bInterfaceProtocol	= 0x00,
 	.iInterface		= 0,
 };
@@ -603,15 +603,15 @@ uvc_bind_config(struct usb_configuration *c,
 
 	/* Validate the descriptors. */
 	if (control == NULL || control[0] == NULL ||
-	    control[0]->bDescriptorSubType != UVC_DT_HEADER)
+	    control[0]->bDescriptorSubType != UVC_VC_HEADER)
 		goto error;
 
 	if (fs_streaming == NULL || fs_streaming[0] == NULL ||
-	    fs_streaming[0]->bDescriptorSubType != UVC_DT_INPUT_HEADER)
+	    fs_streaming[0]->bDescriptorSubType != UVC_VS_INPUT_HEADER)
 		goto error;
 
 	if (hs_streaming == NULL || hs_streaming[0] == NULL ||
-	    hs_streaming[0]->bDescriptorSubType != UVC_DT_INPUT_HEADER)
+	    hs_streaming[0]->bDescriptorSubType != UVC_VS_INPUT_HEADER)
 		goto error;
 
 	uvc->desc.control = control;
diff --git a/drivers/usb/gadget/f_uvc.h b/drivers/usb/gadget/f_uvc.h
index 8a5db7c4fe7..e18a6636c28 100644
--- a/drivers/usb/gadget/f_uvc.h
+++ b/drivers/usb/gadget/f_uvc.h
@@ -15,357 +15,7 @@
 #define _F_UVC_H_
 
 #include <linux/usb/composite.h>
-
-#define USB_CLASS_VIDEO_CONTROL		1
-#define USB_CLASS_VIDEO_STREAMING	2
-
-struct uvc_descriptor_header {
-	__u8  bLength;
-	__u8  bDescriptorType;
-	__u8  bDescriptorSubType;
-} __attribute__ ((packed));
-
-struct uvc_header_descriptor {
-	__u8  bLength;
-	__u8  bDescriptorType;
-	__u8  bDescriptorSubType;
-	__u16 bcdUVC;
-	__u16 wTotalLength;
-	__u32 dwClockFrequency;
-	__u8  bInCollection;
-	__u8  baInterfaceNr[];
-} __attribute__((__packed__));
-
-#define UVC_HEADER_DESCRIPTOR(n)	uvc_header_descriptor_##n
-
-#define DECLARE_UVC_HEADER_DESCRIPTOR(n) 			\
-struct UVC_HEADER_DESCRIPTOR(n) {				\
-	__u8  bLength;						\
-	__u8  bDescriptorType;					\
-	__u8  bDescriptorSubType;				\
-	__u16 bcdUVC;						\
-	__u16 wTotalLength;					\
-	__u32 dwClockFrequency;					\
-	__u8  bInCollection;					\
-	__u8  baInterfaceNr[n];					\
-} __attribute__ ((packed))
-
-struct uvc_input_terminal_descriptor {
-	__u8  bLength;
-	__u8  bDescriptorType;
-	__u8  bDescriptorSubType;
-	__u8  bTerminalID;
-	__u16 wTerminalType;
-	__u8  bAssocTerminal;
-	__u8  iTerminal;
-} __attribute__((__packed__));
-
-struct uvc_output_terminal_descriptor {
-	__u8  bLength;
-	__u8  bDescriptorType;
-	__u8  bDescriptorSubType;
-	__u8  bTerminalID;
-	__u16 wTerminalType;
-	__u8  bAssocTerminal;
-	__u8  bSourceID;
-	__u8  iTerminal;
-} __attribute__((__packed__));
-
-struct uvc_camera_terminal_descriptor {
-	__u8  bLength;
-	__u8  bDescriptorType;
-	__u8  bDescriptorSubType;
-	__u8  bTerminalID;
-	__u16 wTerminalType;
-	__u8  bAssocTerminal;
-	__u8  iTerminal;
-	__u16 wObjectiveFocalLengthMin;
-	__u16 wObjectiveFocalLengthMax;
-	__u16 wOcularFocalLength;
-	__u8  bControlSize;
-	__u8  bmControls[3];
-} __attribute__((__packed__));
-
-struct uvc_selector_unit_descriptor {
-	__u8  bLength;
-	__u8  bDescriptorType;
-	__u8  bDescriptorSubType;
-	__u8  bUnitID;
-	__u8  bNrInPins;
-	__u8  baSourceID[0];
-	__u8  iSelector;
-} __attribute__((__packed__));
-
-#define UVC_SELECTOR_UNIT_DESCRIPTOR(n)	\
-	uvc_selector_unit_descriptor_##n
-
-#define DECLARE_UVC_SELECTOR_UNIT_DESCRIPTOR(n) 		\
-struct UVC_SELECTOR_UNIT_DESCRIPTOR(n) {			\
-	__u8  bLength;						\
-	__u8  bDescriptorType;					\
-	__u8  bDescriptorSubType;				\
-	__u8  bUnitID;						\
-	__u8  bNrInPins;					\
-	__u8  baSourceID[n];					\
-	__u8  iSelector;					\
-} __attribute__ ((packed))
-
-struct uvc_processing_unit_descriptor {
-	__u8  bLength;
-	__u8  bDescriptorType;
-	__u8  bDescriptorSubType;
-	__u8  bUnitID;
-	__u8  bSourceID;
-	__u16 wMaxMultiplier;
-	__u8  bControlSize;
-	__u8  bmControls[2];
-	__u8  iProcessing;
-} __attribute__((__packed__));
-
-struct uvc_extension_unit_descriptor {
-	__u8  bLength;
-	__u8  bDescriptorType;
-	__u8  bDescriptorSubType;
-	__u8  bUnitID;
-	__u8  guidExtensionCode[16];
-	__u8  bNumControls;
-	__u8  bNrInPins;
-	__u8  baSourceID[0];
-	__u8  bControlSize;
-	__u8  bmControls[0];
-	__u8  iExtension;
-} __attribute__((__packed__));
-
-#define UVC_EXTENSION_UNIT_DESCRIPTOR(p, n) \
-	uvc_extension_unit_descriptor_##p_##n
-
-#define DECLARE_UVC_EXTENSION_UNIT_DESCRIPTOR(p, n) 		\
-struct UVC_EXTENSION_UNIT_DESCRIPTOR(p, n) {			\
-	__u8  bLength;						\
-	__u8  bDescriptorType;					\
-	__u8  bDescriptorSubType;				\
-	__u8  bUnitID;						\
-	__u8  guidExtensionCode[16];				\
-	__u8  bNumControls;					\
-	__u8  bNrInPins;					\
-	__u8  baSourceID[p];					\
-	__u8  bControlSize;					\
-	__u8  bmControls[n];					\
-	__u8  iExtension;					\
-} __attribute__ ((packed))
-
-struct uvc_control_endpoint_descriptor {
-	__u8  bLength;
-	__u8  bDescriptorType;
-	__u8  bDescriptorSubType;
-	__u16 wMaxTransferSize;
-} __attribute__((__packed__));
-
-#define UVC_DT_HEADER				1
-#define UVC_DT_INPUT_TERMINAL			2
-#define UVC_DT_OUTPUT_TERMINAL			3
-#define UVC_DT_SELECTOR_UNIT			4
-#define UVC_DT_PROCESSING_UNIT			5
-#define UVC_DT_EXTENSION_UNIT			6
-
-#define UVC_DT_HEADER_SIZE(n)			(12+(n))
-#define UVC_DT_INPUT_TERMINAL_SIZE		8
-#define UVC_DT_OUTPUT_TERMINAL_SIZE		9
-#define UVC_DT_CAMERA_TERMINAL_SIZE(n)		(15+(n))
-#define UVC_DT_SELECTOR_UNIT_SIZE(n)		(6+(n))
-#define UVC_DT_PROCESSING_UNIT_SIZE(n)		(9+(n))
-#define UVC_DT_EXTENSION_UNIT_SIZE(p,n)		(24+(p)+(n))
-#define UVC_DT_CONTROL_ENDPOINT_SIZE		5
-
-struct uvc_input_header_descriptor {
-	__u8  bLength;
-	__u8  bDescriptorType;
-	__u8  bDescriptorSubType;
-	__u8  bNumFormats;
-	__u16 wTotalLength;
-	__u8  bEndpointAddress;
-	__u8  bmInfo;
-	__u8  bTerminalLink;
-	__u8  bStillCaptureMethod;
-	__u8  bTriggerSupport;
-	__u8  bTriggerUsage;
-	__u8  bControlSize;
-	__u8  bmaControls[];
-} __attribute__((__packed__));
-
-#define UVC_INPUT_HEADER_DESCRIPTOR(n, p) \
-	uvc_input_header_descriptor_##n_##p
-
-#define DECLARE_UVC_INPUT_HEADER_DESCRIPTOR(n, p)		\
-struct UVC_INPUT_HEADER_DESCRIPTOR(n, p) {			\
-	__u8  bLength;						\
-	__u8  bDescriptorType;					\
-	__u8  bDescriptorSubType;				\
-	__u8  bNumFormats;					\
-	__u16 wTotalLength;					\
-	__u8  bEndpointAddress;					\
-	__u8  bmInfo;						\
-	__u8  bTerminalLink;					\
-	__u8  bStillCaptureMethod;				\
-	__u8  bTriggerSupport;					\
-	__u8  bTriggerUsage;					\
-	__u8  bControlSize;					\
-	__u8  bmaControls[p][n];				\
-} __attribute__ ((packed))
-
-struct uvc_output_header_descriptor {
-	__u8  bLength;
-	__u8  bDescriptorType;
-	__u8  bDescriptorSubType;
-	__u8  bNumFormats;
-	__u16 wTotalLength;
-	__u8  bEndpointAddress;
-	__u8  bTerminalLink;
-	__u8  bControlSize;
-	__u8  bmaControls[];
-} __attribute__((__packed__));
-
-#define UVC_OUTPUT_HEADER_DESCRIPTOR(n, p) \
-	uvc_output_header_descriptor_##n_##p
-
-#define DECLARE_UVC_OUTPUT_HEADER_DESCRIPTOR(n, p)		\
-struct UVC_OUTPUT_HEADER_DESCRIPTOR(n, p) {			\
-	__u8  bLength;						\
-	__u8  bDescriptorType;					\
-	__u8  bDescriptorSubType;				\
-	__u8  bNumFormats;					\
-	__u16 wTotalLength;					\
-	__u8  bEndpointAddress;					\
-	__u8  bTerminalLink;					\
-	__u8  bControlSize;					\
-	__u8  bmaControls[p][n];				\
-} __attribute__ ((packed))
-
-struct uvc_format_uncompressed {
-	__u8  bLength;
-	__u8  bDescriptorType;
-	__u8  bDescriptorSubType;
-	__u8  bFormatIndex;
-	__u8  bNumFrameDescriptors;
-	__u8  guidFormat[16];
-	__u8  bBitsPerPixel;
-	__u8  bDefaultFrameIndex;
-	__u8  bAspectRatioX;
-	__u8  bAspectRatioY;
-	__u8  bmInterfaceFlags;
-	__u8  bCopyProtect;
-} __attribute__((__packed__));
-
-struct uvc_frame_uncompressed {
-	__u8  bLength;
-	__u8  bDescriptorType;
-	__u8  bDescriptorSubType;
-	__u8  bFrameIndex;
-	__u8  bmCapabilities;
-	__u16 wWidth;
-	__u16 wHeight;
-	__u32 dwMinBitRate;
-	__u32 dwMaxBitRate;
-	__u32 dwMaxVideoFrameBufferSize;
-	__u32 dwDefaultFrameInterval;
-	__u8  bFrameIntervalType;
-	__u32 dwFrameInterval[];
-} __attribute__((__packed__));
-
-#define UVC_FRAME_UNCOMPRESSED(n) \
-	uvc_frame_uncompressed_##n
-
-#define DECLARE_UVC_FRAME_UNCOMPRESSED(n) 			\
-struct UVC_FRAME_UNCOMPRESSED(n) {				\
-	__u8  bLength;						\
-	__u8  bDescriptorType;					\
-	__u8  bDescriptorSubType;				\
-	__u8  bFrameIndex;					\
-	__u8  bmCapabilities;					\
-	__u16 wWidth;						\
-	__u16 wHeight;						\
-	__u32 dwMinBitRate;					\
-	__u32 dwMaxBitRate;					\
-	__u32 dwMaxVideoFrameBufferSize;			\
-	__u32 dwDefaultFrameInterval;				\
-	__u8  bFrameIntervalType;				\
-	__u32 dwFrameInterval[n];				\
-} __attribute__ ((packed))
-
-struct uvc_format_mjpeg {
-	__u8  bLength;
-	__u8  bDescriptorType;
-	__u8  bDescriptorSubType;
-	__u8  bFormatIndex;
-	__u8  bNumFrameDescriptors;
-	__u8  bmFlags;
-	__u8  bDefaultFrameIndex;
-	__u8  bAspectRatioX;
-	__u8  bAspectRatioY;
-	__u8  bmInterfaceFlags;
-	__u8  bCopyProtect;
-} __attribute__((__packed__));
-
-struct uvc_frame_mjpeg {
-	__u8  bLength;
-	__u8  bDescriptorType;
-	__u8  bDescriptorSubType;
-	__u8  bFrameIndex;
-	__u8  bmCapabilities;
-	__u16 wWidth;
-	__u16 wHeight;
-	__u32 dwMinBitRate;
-	__u32 dwMaxBitRate;
-	__u32 dwMaxVideoFrameBufferSize;
-	__u32 dwDefaultFrameInterval;
-	__u8  bFrameIntervalType;
-	__u32 dwFrameInterval[];
-} __attribute__((__packed__));
-
-#define UVC_FRAME_MJPEG(n) \
-	uvc_frame_mjpeg_##n
-
-#define DECLARE_UVC_FRAME_MJPEG(n) 				\
-struct UVC_FRAME_MJPEG(n) {					\
-	__u8  bLength;						\
-	__u8  bDescriptorType;					\
-	__u8  bDescriptorSubType;				\
-	__u8  bFrameIndex;					\
-	__u8  bmCapabilities;					\
-	__u16 wWidth;						\
-	__u16 wHeight;						\
-	__u32 dwMinBitRate;					\
-	__u32 dwMaxBitRate;					\
-	__u32 dwMaxVideoFrameBufferSize;			\
-	__u32 dwDefaultFrameInterval;				\
-	__u8  bFrameIntervalType;				\
-	__u32 dwFrameInterval[n];				\
-} __attribute__ ((packed))
-
-struct uvc_color_matching_descriptor {
-	__u8  bLength;
-	__u8  bDescriptorType;
-	__u8  bDescriptorSubType;
-	__u8  bColorPrimaries;
-	__u8  bTransferCharacteristics;
-	__u8  bMatrixCoefficients;
-} __attribute__((__packed__));
-
-#define UVC_DT_INPUT_HEADER			1
-#define UVC_DT_OUTPUT_HEADER			2
-#define UVC_DT_FORMAT_UNCOMPRESSED		4
-#define UVC_DT_FRAME_UNCOMPRESSED		5
-#define UVC_DT_FORMAT_MJPEG			6
-#define UVC_DT_FRAME_MJPEG			7
-#define UVC_DT_COLOR_MATCHING			13
-
-#define UVC_DT_INPUT_HEADER_SIZE(n, p)		(13+(n*p))
-#define UVC_DT_OUTPUT_HEADER_SIZE(n, p)		(9+(n*p))
-#define UVC_DT_FORMAT_UNCOMPRESSED_SIZE		27
-#define UVC_DT_FRAME_UNCOMPRESSED_SIZE(n)	(26+4*(n))
-#define UVC_DT_FORMAT_MJPEG_SIZE		11
-#define UVC_DT_FRAME_MJPEG_SIZE(n)		(26+4*(n))
-#define UVC_DT_COLOR_MATCHING_SIZE		6
+#include <linux/usb/video.h>
 
 extern int uvc_bind_config(struct usb_configuration *c,
 			   const struct uvc_descriptor_header * const *control,
diff --git a/drivers/usb/gadget/uvc.h b/drivers/usb/gadget/uvc.h
index e92454cddd7..5b7919460fd 100644
--- a/drivers/usb/gadget/uvc.h
+++ b/drivers/usb/gadget/uvc.h
@@ -47,39 +47,6 @@ struct uvc_event
 #define UVC_INTF_CONTROL		0
 #define UVC_INTF_STREAMING		1
 
-/* ------------------------------------------------------------------------
- * UVC constants & structures
- */
-
-/* Values for bmHeaderInfo (Video and Still Image Payload Headers, 2.4.3.3) */
-#define UVC_STREAM_EOH				(1 << 7)
-#define UVC_STREAM_ERR				(1 << 6)
-#define UVC_STREAM_STI				(1 << 5)
-#define UVC_STREAM_RES				(1 << 4)
-#define UVC_STREAM_SCR				(1 << 3)
-#define UVC_STREAM_PTS				(1 << 2)
-#define UVC_STREAM_EOF				(1 << 1)
-#define UVC_STREAM_FID				(1 << 0)
-
-struct uvc_streaming_control {
-	__u16 bmHint;
-	__u8  bFormatIndex;
-	__u8  bFrameIndex;
-	__u32 dwFrameInterval;
-	__u16 wKeyFrameRate;
-	__u16 wPFrameRate;
-	__u16 wCompQuality;
-	__u16 wCompWindowSize;
-	__u16 wDelay;
-	__u32 dwMaxVideoFrameSize;
-	__u32 dwMaxPayloadTransferSize;
-	__u32 dwClockFrequency;
-	__u8  bmFramingInfo;
-	__u8  bPreferedVersion;
-	__u8  bMinVersion;
-	__u8  bMaxVersion;
-} __attribute__((__packed__));
-
 /* ------------------------------------------------------------------------
  * Debugging, printing and logging
  */
@@ -137,9 +104,6 @@ extern unsigned int uvc_gadget_trace_param;
 #define UVC_MAX_REQUEST_SIZE			64
 #define UVC_MAX_EVENTS				4
 
-#define USB_DT_INTERFACE_ASSOCIATION_SIZE	8
-#define USB_CLASS_MISC				0xef
-
 /* ------------------------------------------------------------------------
  * Structures
  */
diff --git a/drivers/usb/gadget/webcam.c b/drivers/usb/gadget/webcam.c
index f5f3030cc41..288d21155ab 100644
--- a/drivers/usb/gadget/webcam.c
+++ b/drivers/usb/gadget/webcam.c
@@ -90,7 +90,7 @@ DECLARE_UVC_HEADER_DESCRIPTOR(1);
 static const struct UVC_HEADER_DESCRIPTOR(1) uvc_control_header = {
 	.bLength		= UVC_DT_HEADER_SIZE(1),
 	.bDescriptorType	= USB_DT_CS_INTERFACE,
-	.bDescriptorSubType	= UVC_DT_HEADER,
+	.bDescriptorSubType	= UVC_VC_HEADER,
 	.bcdUVC			= cpu_to_le16(0x0100),
 	.wTotalLength		= 0, /* dynamic */
 	.dwClockFrequency	= cpu_to_le32(48000000),
@@ -101,7 +101,7 @@ static const struct UVC_HEADER_DESCRIPTOR(1) uvc_control_header = {
 static const struct uvc_camera_terminal_descriptor uvc_camera_terminal = {
 	.bLength		= UVC_DT_CAMERA_TERMINAL_SIZE(3),
 	.bDescriptorType	= USB_DT_CS_INTERFACE,
-	.bDescriptorSubType	= UVC_DT_INPUT_TERMINAL,
+	.bDescriptorSubType	= UVC_VC_INPUT_TERMINAL,
 	.bTerminalID		= 1,
 	.wTerminalType		= cpu_to_le16(0x0201),
 	.bAssocTerminal		= 0,
@@ -118,7 +118,7 @@ static const struct uvc_camera_terminal_descriptor uvc_camera_terminal = {
 static const struct uvc_processing_unit_descriptor uvc_processing = {
 	.bLength		= UVC_DT_PROCESSING_UNIT_SIZE(2),
 	.bDescriptorType	= USB_DT_CS_INTERFACE,
-	.bDescriptorSubType	= UVC_DT_PROCESSING_UNIT,
+	.bDescriptorSubType	= UVC_VC_PROCESSING_UNIT,
 	.bUnitID		= 2,
 	.bSourceID		= 1,
 	.wMaxMultiplier		= cpu_to_le16(16*1024),
@@ -131,7 +131,7 @@ static const struct uvc_processing_unit_descriptor uvc_processing = {
 static const struct uvc_output_terminal_descriptor uvc_output_terminal = {
 	.bLength		= UVC_DT_OUTPUT_TERMINAL_SIZE,
 	.bDescriptorType	= USB_DT_CS_INTERFACE,
-	.bDescriptorSubType	= UVC_DT_OUTPUT_TERMINAL,
+	.bDescriptorSubType	= UVC_VC_OUTPUT_TERMINAL,
 	.bTerminalID		= 3,
 	.wTerminalType		= cpu_to_le16(0x0101),
 	.bAssocTerminal		= 0,
@@ -144,7 +144,7 @@ DECLARE_UVC_INPUT_HEADER_DESCRIPTOR(1, 2);
 static const struct UVC_INPUT_HEADER_DESCRIPTOR(1, 2) uvc_input_header = {
 	.bLength		= UVC_DT_INPUT_HEADER_SIZE(1, 2),
 	.bDescriptorType	= USB_DT_CS_INTERFACE,
-	.bDescriptorSubType	= UVC_DT_INPUT_HEADER,
+	.bDescriptorSubType	= UVC_VS_INPUT_HEADER,
 	.bNumFormats		= 2,
 	.wTotalLength		= 0, /* dynamic */
 	.bEndpointAddress	= 0, /* dynamic */
@@ -161,7 +161,7 @@ static const struct UVC_INPUT_HEADER_DESCRIPTOR(1, 2) uvc_input_header = {
 static const struct uvc_format_uncompressed uvc_format_yuv = {
 	.bLength		= UVC_DT_FORMAT_UNCOMPRESSED_SIZE,
 	.bDescriptorType	= USB_DT_CS_INTERFACE,
-	.bDescriptorSubType	= UVC_DT_FORMAT_UNCOMPRESSED,
+	.bDescriptorSubType	= UVC_VS_FORMAT_UNCOMPRESSED,
 	.bFormatIndex		= 1,
 	.bNumFrameDescriptors	= 2,
 	.guidFormat		=
@@ -181,7 +181,7 @@ DECLARE_UVC_FRAME_UNCOMPRESSED(3);
 static const struct UVC_FRAME_UNCOMPRESSED(3) uvc_frame_yuv_360p = {
 	.bLength		= UVC_DT_FRAME_UNCOMPRESSED_SIZE(3),
 	.bDescriptorType	= USB_DT_CS_INTERFACE,
-	.bDescriptorSubType	= UVC_DT_FRAME_UNCOMPRESSED,
+	.bDescriptorSubType	= UVC_VS_FRAME_UNCOMPRESSED,
 	.bFrameIndex		= 1,
 	.bmCapabilities		= 0,
 	.wWidth			= cpu_to_le16(640),
@@ -199,7 +199,7 @@ static const struct UVC_FRAME_UNCOMPRESSED(3) uvc_frame_yuv_360p = {
 static const struct UVC_FRAME_UNCOMPRESSED(1) uvc_frame_yuv_720p = {
 	.bLength		= UVC_DT_FRAME_UNCOMPRESSED_SIZE(1),
 	.bDescriptorType	= USB_DT_CS_INTERFACE,
-	.bDescriptorSubType	= UVC_DT_FRAME_UNCOMPRESSED,
+	.bDescriptorSubType	= UVC_VS_FRAME_UNCOMPRESSED,
 	.bFrameIndex		= 2,
 	.bmCapabilities		= 0,
 	.wWidth			= cpu_to_le16(1280),
@@ -215,7 +215,7 @@ static const struct UVC_FRAME_UNCOMPRESSED(1) uvc_frame_yuv_720p = {
 static const struct uvc_format_mjpeg uvc_format_mjpg = {
 	.bLength		= UVC_DT_FORMAT_MJPEG_SIZE,
 	.bDescriptorType	= USB_DT_CS_INTERFACE,
-	.bDescriptorSubType	= UVC_DT_FORMAT_MJPEG,
+	.bDescriptorSubType	= UVC_VS_FORMAT_MJPEG,
 	.bFormatIndex		= 2,
 	.bNumFrameDescriptors	= 2,
 	.bmFlags		= 0,
@@ -232,7 +232,7 @@ DECLARE_UVC_FRAME_MJPEG(3);
 static const struct UVC_FRAME_MJPEG(3) uvc_frame_mjpg_360p = {
 	.bLength		= UVC_DT_FRAME_MJPEG_SIZE(3),
 	.bDescriptorType	= USB_DT_CS_INTERFACE,
-	.bDescriptorSubType	= UVC_DT_FRAME_MJPEG,
+	.bDescriptorSubType	= UVC_VS_FRAME_MJPEG,
 	.bFrameIndex		= 1,
 	.bmCapabilities		= 0,
 	.wWidth			= cpu_to_le16(640),
@@ -250,7 +250,7 @@ static const struct UVC_FRAME_MJPEG(3) uvc_frame_mjpg_360p = {
 static const struct UVC_FRAME_MJPEG(1) uvc_frame_mjpg_720p = {
 	.bLength		= UVC_DT_FRAME_MJPEG_SIZE(1),
 	.bDescriptorType	= USB_DT_CS_INTERFACE,
-	.bDescriptorSubType	= UVC_DT_FRAME_MJPEG,
+	.bDescriptorSubType	= UVC_VS_FRAME_MJPEG,
 	.bFrameIndex		= 2,
 	.bmCapabilities		= 0,
 	.wWidth			= cpu_to_le16(1280),
@@ -266,7 +266,7 @@ static const struct UVC_FRAME_MJPEG(1) uvc_frame_mjpg_720p = {
 static const struct uvc_color_matching_descriptor uvc_color_matching = {
 	.bLength		= UVC_DT_COLOR_MATCHING_SIZE,
 	.bDescriptorType	= USB_DT_CS_INTERFACE,
-	.bDescriptorSubType	= UVC_DT_COLOR_MATCHING,
+	.bDescriptorSubType	= UVC_VS_COLORFORMAT,
 	.bColorPrimaries	= 1,
 	.bTransferCharacteristics	= 1,
 	.bMatrixCoefficients	= 4,
diff --git a/include/linux/usb/video.h b/include/linux/usb/video.h
index 2d5b7fc6a26..3b3b95e01f7 100644
--- a/include/linux/usb/video.h
+++ b/include/linux/usb/video.h
@@ -160,6 +160,16 @@
 #define UVC_STATUS_TYPE_CONTROL				1
 #define UVC_STATUS_TYPE_STREAMING			2
 
+/* 2.4.3.3. Payload Header Information */
+#define UVC_STREAM_EOH					(1 << 7)
+#define UVC_STREAM_ERR					(1 << 6)
+#define UVC_STREAM_STI					(1 << 5)
+#define UVC_STREAM_RES					(1 << 4)
+#define UVC_STREAM_SCR					(1 << 3)
+#define UVC_STREAM_PTS					(1 << 2)
+#define UVC_STREAM_EOF					(1 << 1)
+#define UVC_STREAM_FID					(1 << 0)
+
 /* 4.1.2. Control Capabilities */
 #define UVC_CONTROL_CAP_GET				(1 << 0)
 #define UVC_CONTROL_CAP_SET				(1 << 1)
@@ -167,5 +177,392 @@
 #define UVC_CONTROL_CAP_AUTOUPDATE			(1 << 3)
 #define UVC_CONTROL_CAP_ASYNCHRONOUS			(1 << 4)
 
+/* ------------------------------------------------------------------------
+ * UVC structures
+ */
+
+/* All UVC descriptors have these 3 fields at the beginning */
+struct uvc_descriptor_header {
+	__u8  bLength;
+	__u8  bDescriptorType;
+	__u8  bDescriptorSubType;
+} __attribute__((packed));
+
+/* 3.7.2. Video Control Interface Header Descriptor */
+struct uvc_header_descriptor {
+	__u8  bLength;
+	__u8  bDescriptorType;
+	__u8  bDescriptorSubType;
+	__u16 bcdUVC;
+	__u16 wTotalLength;
+	__u32 dwClockFrequency;
+	__u8  bInCollection;
+	__u8  baInterfaceNr[];
+} __attribute__((__packed__));
+
+#define UVC_DT_HEADER_SIZE(n)				(12+(n))
+
+#define UVC_HEADER_DESCRIPTOR(n) \
+	uvc_header_descriptor_##n
+
+#define DECLARE_UVC_HEADER_DESCRIPTOR(n)		\
+struct UVC_HEADER_DESCRIPTOR(n) {			\
+	__u8  bLength;					\
+	__u8  bDescriptorType;				\
+	__u8  bDescriptorSubType;			\
+	__u16 bcdUVC;					\
+	__u16 wTotalLength;				\
+	__u32 dwClockFrequency;				\
+	__u8  bInCollection;				\
+	__u8  baInterfaceNr[n];				\
+} __attribute__ ((packed))
+
+/* 3.7.2.1. Input Terminal Descriptor */
+struct uvc_input_terminal_descriptor {
+	__u8  bLength;
+	__u8  bDescriptorType;
+	__u8  bDescriptorSubType;
+	__u8  bTerminalID;
+	__u16 wTerminalType;
+	__u8  bAssocTerminal;
+	__u8  iTerminal;
+} __attribute__((__packed__));
+
+#define UVC_DT_INPUT_TERMINAL_SIZE			8
+
+/* 3.7.2.2. Output Terminal Descriptor */
+struct uvc_output_terminal_descriptor {
+	__u8  bLength;
+	__u8  bDescriptorType;
+	__u8  bDescriptorSubType;
+	__u8  bTerminalID;
+	__u16 wTerminalType;
+	__u8  bAssocTerminal;
+	__u8  bSourceID;
+	__u8  iTerminal;
+} __attribute__((__packed__));
+
+#define UVC_DT_OUTPUT_TERMINAL_SIZE			9
+
+/* 3.7.2.3. Camera Terminal Descriptor */
+struct uvc_camera_terminal_descriptor {
+	__u8  bLength;
+	__u8  bDescriptorType;
+	__u8  bDescriptorSubType;
+	__u8  bTerminalID;
+	__u16 wTerminalType;
+	__u8  bAssocTerminal;
+	__u8  iTerminal;
+	__u16 wObjectiveFocalLengthMin;
+	__u16 wObjectiveFocalLengthMax;
+	__u16 wOcularFocalLength;
+	__u8  bControlSize;
+	__u8  bmControls[3];
+} __attribute__((__packed__));
+
+#define UVC_DT_CAMERA_TERMINAL_SIZE(n)			(15+(n))
+
+/* 3.7.2.4. Selector Unit Descriptor */
+struct uvc_selector_unit_descriptor {
+	__u8  bLength;
+	__u8  bDescriptorType;
+	__u8  bDescriptorSubType;
+	__u8  bUnitID;
+	__u8  bNrInPins;
+	__u8  baSourceID[0];
+	__u8  iSelector;
+} __attribute__((__packed__));
+
+#define UVC_DT_SELECTOR_UNIT_SIZE(n)			(6+(n))
+
+#define UVC_SELECTOR_UNIT_DESCRIPTOR(n)	\
+	uvc_selector_unit_descriptor_##n
+
+#define DECLARE_UVC_SELECTOR_UNIT_DESCRIPTOR(n)	\
+struct UVC_SELECTOR_UNIT_DESCRIPTOR(n) {		\
+	__u8  bLength;					\
+	__u8  bDescriptorType;				\
+	__u8  bDescriptorSubType;			\
+	__u8  bUnitID;					\
+	__u8  bNrInPins;				\
+	__u8  baSourceID[n];				\
+	__u8  iSelector;				\
+} __attribute__ ((packed))
+
+/* 3.7.2.5. Processing Unit Descriptor */
+struct uvc_processing_unit_descriptor {
+	__u8  bLength;
+	__u8  bDescriptorType;
+	__u8  bDescriptorSubType;
+	__u8  bUnitID;
+	__u8  bSourceID;
+	__u16 wMaxMultiplier;
+	__u8  bControlSize;
+	__u8  bmControls[2];
+	__u8  iProcessing;
+} __attribute__((__packed__));
+
+#define UVC_DT_PROCESSING_UNIT_SIZE(n)			(9+(n))
+
+/* 3.7.2.6. Extension Unit Descriptor */
+struct uvc_extension_unit_descriptor {
+	__u8  bLength;
+	__u8  bDescriptorType;
+	__u8  bDescriptorSubType;
+	__u8  bUnitID;
+	__u8  guidExtensionCode[16];
+	__u8  bNumControls;
+	__u8  bNrInPins;
+	__u8  baSourceID[0];
+	__u8  bControlSize;
+	__u8  bmControls[0];
+	__u8  iExtension;
+} __attribute__((__packed__));
+
+#define UVC_DT_EXTENSION_UNIT_SIZE(p, n)		(24+(p)+(n))
+
+#define UVC_EXTENSION_UNIT_DESCRIPTOR(p, n) \
+	uvc_extension_unit_descriptor_##p_##n
+
+#define DECLARE_UVC_EXTENSION_UNIT_DESCRIPTOR(p, n)	\
+struct UVC_EXTENSION_UNIT_DESCRIPTOR(p, n) {		\
+	__u8  bLength;					\
+	__u8  bDescriptorType;				\
+	__u8  bDescriptorSubType;			\
+	__u8  bUnitID;					\
+	__u8  guidExtensionCode[16];			\
+	__u8  bNumControls;				\
+	__u8  bNrInPins;				\
+	__u8  baSourceID[p];				\
+	__u8  bControlSize;				\
+	__u8  bmControls[n];				\
+	__u8  iExtension;				\
+} __attribute__ ((packed))
+
+/* 3.8.2.2. Video Control Interrupt Endpoint Descriptor */
+struct uvc_control_endpoint_descriptor {
+	__u8  bLength;
+	__u8  bDescriptorType;
+	__u8  bDescriptorSubType;
+	__u16 wMaxTransferSize;
+} __attribute__((__packed__));
+
+#define UVC_DT_CONTROL_ENDPOINT_SIZE			5
+
+/* 3.9.2.1. Input Header Descriptor */
+struct uvc_input_header_descriptor {
+	__u8  bLength;
+	__u8  bDescriptorType;
+	__u8  bDescriptorSubType;
+	__u8  bNumFormats;
+	__u16 wTotalLength;
+	__u8  bEndpointAddress;
+	__u8  bmInfo;
+	__u8  bTerminalLink;
+	__u8  bStillCaptureMethod;
+	__u8  bTriggerSupport;
+	__u8  bTriggerUsage;
+	__u8  bControlSize;
+	__u8  bmaControls[];
+} __attribute__((__packed__));
+
+#define UVC_DT_INPUT_HEADER_SIZE(n, p)			(13+(n*p))
+
+#define UVC_INPUT_HEADER_DESCRIPTOR(n, p) \
+	uvc_input_header_descriptor_##n_##p
+
+#define DECLARE_UVC_INPUT_HEADER_DESCRIPTOR(n, p)	\
+struct UVC_INPUT_HEADER_DESCRIPTOR(n, p) {		\
+	__u8  bLength;					\
+	__u8  bDescriptorType;				\
+	__u8  bDescriptorSubType;			\
+	__u8  bNumFormats;				\
+	__u16 wTotalLength;				\
+	__u8  bEndpointAddress;				\
+	__u8  bmInfo;					\
+	__u8  bTerminalLink;				\
+	__u8  bStillCaptureMethod;			\
+	__u8  bTriggerSupport;				\
+	__u8  bTriggerUsage;				\
+	__u8  bControlSize;				\
+	__u8  bmaControls[p][n];			\
+} __attribute__ ((packed))
+
+/* 3.9.2.2. Output Header Descriptor */
+struct uvc_output_header_descriptor {
+	__u8  bLength;
+	__u8  bDescriptorType;
+	__u8  bDescriptorSubType;
+	__u8  bNumFormats;
+	__u16 wTotalLength;
+	__u8  bEndpointAddress;
+	__u8  bTerminalLink;
+	__u8  bControlSize;
+	__u8  bmaControls[];
+} __attribute__((__packed__));
+
+#define UVC_DT_OUTPUT_HEADER_SIZE(n, p)			(9+(n*p))
+
+#define UVC_OUTPUT_HEADER_DESCRIPTOR(n, p) \
+	uvc_output_header_descriptor_##n_##p
+
+#define DECLARE_UVC_OUTPUT_HEADER_DESCRIPTOR(n, p)	\
+struct UVC_OUTPUT_HEADER_DESCRIPTOR(n, p) {		\
+	__u8  bLength;					\
+	__u8  bDescriptorType;				\
+	__u8  bDescriptorSubType;			\
+	__u8  bNumFormats;				\
+	__u16 wTotalLength;				\
+	__u8  bEndpointAddress;				\
+	__u8  bTerminalLink;				\
+	__u8  bControlSize;				\
+	__u8  bmaControls[p][n];			\
+} __attribute__ ((packed))
+
+/* 3.9.2.6. Color matching descriptor */
+struct uvc_color_matching_descriptor {
+	__u8  bLength;
+	__u8  bDescriptorType;
+	__u8  bDescriptorSubType;
+	__u8  bColorPrimaries;
+	__u8  bTransferCharacteristics;
+	__u8  bMatrixCoefficients;
+} __attribute__((__packed__));
+
+#define UVC_DT_COLOR_MATCHING_SIZE			6
+
+/* 4.3.1.1. Video Probe and Commit Controls */
+struct uvc_streaming_control {
+	__u16 bmHint;
+	__u8  bFormatIndex;
+	__u8  bFrameIndex;
+	__u32 dwFrameInterval;
+	__u16 wKeyFrameRate;
+	__u16 wPFrameRate;
+	__u16 wCompQuality;
+	__u16 wCompWindowSize;
+	__u16 wDelay;
+	__u32 dwMaxVideoFrameSize;
+	__u32 dwMaxPayloadTransferSize;
+	__u32 dwClockFrequency;
+	__u8  bmFramingInfo;
+	__u8  bPreferedVersion;
+	__u8  bMinVersion;
+	__u8  bMaxVersion;
+} __attribute__((__packed__));
+
+/* Uncompressed Payload - 3.1.1. Uncompressed Video Format Descriptor */
+struct uvc_format_uncompressed {
+	__u8  bLength;
+	__u8  bDescriptorType;
+	__u8  bDescriptorSubType;
+	__u8  bFormatIndex;
+	__u8  bNumFrameDescriptors;
+	__u8  guidFormat[16];
+	__u8  bBitsPerPixel;
+	__u8  bDefaultFrameIndex;
+	__u8  bAspectRatioX;
+	__u8  bAspectRatioY;
+	__u8  bmInterfaceFlags;
+	__u8  bCopyProtect;
+} __attribute__((__packed__));
+
+#define UVC_DT_FORMAT_UNCOMPRESSED_SIZE			27
+
+/* Uncompressed Payload - 3.1.2. Uncompressed Video Frame Descriptor */
+struct uvc_frame_uncompressed {
+	__u8  bLength;
+	__u8  bDescriptorType;
+	__u8  bDescriptorSubType;
+	__u8  bFrameIndex;
+	__u8  bmCapabilities;
+	__u16 wWidth;
+	__u16 wHeight;
+	__u32 dwMinBitRate;
+	__u32 dwMaxBitRate;
+	__u32 dwMaxVideoFrameBufferSize;
+	__u32 dwDefaultFrameInterval;
+	__u8  bFrameIntervalType;
+	__u32 dwFrameInterval[];
+} __attribute__((__packed__));
+
+#define UVC_DT_FRAME_UNCOMPRESSED_SIZE(n)		(26+4*(n))
+
+#define UVC_FRAME_UNCOMPRESSED(n) \
+	uvc_frame_uncompressed_##n
+
+#define DECLARE_UVC_FRAME_UNCOMPRESSED(n)		\
+struct UVC_FRAME_UNCOMPRESSED(n) {			\
+	__u8  bLength;					\
+	__u8  bDescriptorType;				\
+	__u8  bDescriptorSubType;			\
+	__u8  bFrameIndex;				\
+	__u8  bmCapabilities;				\
+	__u16 wWidth;					\
+	__u16 wHeight;					\
+	__u32 dwMinBitRate;				\
+	__u32 dwMaxBitRate;				\
+	__u32 dwMaxVideoFrameBufferSize;		\
+	__u32 dwDefaultFrameInterval;			\
+	__u8  bFrameIntervalType;			\
+	__u32 dwFrameInterval[n];			\
+} __attribute__ ((packed))
+
+/* MJPEG Payload - 3.1.1. MJPEG Video Format Descriptor */
+struct uvc_format_mjpeg {
+	__u8  bLength;
+	__u8  bDescriptorType;
+	__u8  bDescriptorSubType;
+	__u8  bFormatIndex;
+	__u8  bNumFrameDescriptors;
+	__u8  bmFlags;
+	__u8  bDefaultFrameIndex;
+	__u8  bAspectRatioX;
+	__u8  bAspectRatioY;
+	__u8  bmInterfaceFlags;
+	__u8  bCopyProtect;
+} __attribute__((__packed__));
+
+#define UVC_DT_FORMAT_MJPEG_SIZE			11
+
+/* MJPEG Payload - 3.1.2. MJPEG Video Frame Descriptor */
+struct uvc_frame_mjpeg {
+	__u8  bLength;
+	__u8  bDescriptorType;
+	__u8  bDescriptorSubType;
+	__u8  bFrameIndex;
+	__u8  bmCapabilities;
+	__u16 wWidth;
+	__u16 wHeight;
+	__u32 dwMinBitRate;
+	__u32 dwMaxBitRate;
+	__u32 dwMaxVideoFrameBufferSize;
+	__u32 dwDefaultFrameInterval;
+	__u8  bFrameIntervalType;
+	__u32 dwFrameInterval[];
+} __attribute__((__packed__));
+
+#define UVC_DT_FRAME_MJPEG_SIZE(n)			(26+4*(n))
+
+#define UVC_FRAME_MJPEG(n) \
+	uvc_frame_mjpeg_##n
+
+#define DECLARE_UVC_FRAME_MJPEG(n)			\
+struct UVC_FRAME_MJPEG(n) {				\
+	__u8  bLength;					\
+	__u8  bDescriptorType;				\
+	__u8  bDescriptorSubType;			\
+	__u8  bFrameIndex;				\
+	__u8  bmCapabilities;				\
+	__u16 wWidth;					\
+	__u16 wHeight;					\
+	__u32 dwMinBitRate;				\
+	__u32 dwMaxBitRate;				\
+	__u32 dwMaxVideoFrameBufferSize;		\
+	__u32 dwDefaultFrameInterval;			\
+	__u8  bFrameIntervalType;			\
+	__u32 dwFrameInterval[n];			\
+} __attribute__ ((packed))
+
 #endif /* __LINUX_USB_VIDEO_H */
 
-- 
cgit v1.2.3-70-g09d2


From ace6e9799f585994c92ac3c0696bc336e50077e6 Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <lyakh@axis700.grange>
Date: Thu, 22 Jul 2010 16:52:51 -0300
Subject: V4L/DVB: mediabus: fix ambiguous pixel code names

Endianness notation is meaningless for 8 bit YUYV codes. Switch pixel code
names to explicitly state the order of colour components in the data
stream.

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 arch/sh/boards/mach-ap325rxa/setup.c       |  2 +-
 drivers/media/video/ak881x.c               |  6 +++---
 drivers/media/video/mt9m111.c              | 16 ++++++++--------
 drivers/media/video/mt9t112.c              | 12 ++++++------
 drivers/media/video/ov772x.c               |  8 ++++----
 drivers/media/video/ov9640.c               | 14 +++++++-------
 drivers/media/video/pxa_camera.c           |  8 ++++----
 drivers/media/video/rj54n1cb0c.c           |  8 ++++----
 drivers/media/video/sh_mobile_ceu_camera.c | 16 ++++++++--------
 drivers/media/video/sh_vou.c               |  8 ++++----
 drivers/media/video/soc_mediabus.c         |  8 ++++----
 drivers/media/video/tw9910.c               |  8 ++++----
 include/media/v4l2-mediabus.h              |  8 ++++----
 13 files changed, 61 insertions(+), 61 deletions(-)

(limited to 'include')

diff --git a/arch/sh/boards/mach-ap325rxa/setup.c b/arch/sh/boards/mach-ap325rxa/setup.c
index 3a170bd3f3d..de375b64e41 100644
--- a/arch/sh/boards/mach-ap325rxa/setup.c
+++ b/arch/sh/boards/mach-ap325rxa/setup.c
@@ -316,7 +316,7 @@ static struct soc_camera_platform_info camera_info = {
 	.format_name = "UYVY",
 	.format_depth = 16,
 	.format = {
-		.code = V4L2_MBUS_FMT_YUYV8_2X8_BE,
+		.code = V4L2_MBUS_FMT_UYVY8_2X8,
 		.colorspace = V4L2_COLORSPACE_SMPTE170M,
 		.field = V4L2_FIELD_NONE,
 		.width = 640,
diff --git a/drivers/media/video/ak881x.c b/drivers/media/video/ak881x.c
index 1573392f74b..b388654d48c 100644
--- a/drivers/media/video/ak881x.c
+++ b/drivers/media/video/ak881x.c
@@ -126,7 +126,7 @@ static int ak881x_try_g_mbus_fmt(struct v4l2_subdev *sd,
 	v4l_bound_align_image(&mf->width, 0, 720, 2,
 			      &mf->height, 0, ak881x->lines, 1, 0);
 	mf->field	= V4L2_FIELD_INTERLACED;
-	mf->code	= V4L2_MBUS_FMT_YUYV8_2X8_LE;
+	mf->code	= V4L2_MBUS_FMT_YUYV8_2X8;
 	mf->colorspace	= V4L2_COLORSPACE_SMPTE170M;
 
 	return 0;
@@ -136,7 +136,7 @@ static int ak881x_s_mbus_fmt(struct v4l2_subdev *sd,
 			     struct v4l2_mbus_framefmt *mf)
 {
 	if (mf->field != V4L2_FIELD_INTERLACED ||
-	    mf->code != V4L2_MBUS_FMT_YUYV8_2X8_LE)
+	    mf->code != V4L2_MBUS_FMT_YUYV8_2X8)
 		return -EINVAL;
 
 	return ak881x_try_g_mbus_fmt(sd, mf);
@@ -148,7 +148,7 @@ static int ak881x_enum_mbus_fmt(struct v4l2_subdev *sd, unsigned int index,
 	if (index)
 		return -EINVAL;
 
-	*code = V4L2_MBUS_FMT_YUYV8_2X8_LE;
+	*code = V4L2_MBUS_FMT_YUYV8_2X8;
 	return 0;
 }
 
diff --git a/drivers/media/video/mt9m111.c b/drivers/media/video/mt9m111.c
index fbd0fc79472..31cc3d04bcc 100644
--- a/drivers/media/video/mt9m111.c
+++ b/drivers/media/video/mt9m111.c
@@ -143,10 +143,10 @@ static const struct mt9m111_datafmt *mt9m111_find_datafmt(
 }
 
 static const struct mt9m111_datafmt mt9m111_colour_fmts[] = {
-	{V4L2_MBUS_FMT_YUYV8_2X8_LE, V4L2_COLORSPACE_JPEG},
-	{V4L2_MBUS_FMT_YVYU8_2X8_LE, V4L2_COLORSPACE_JPEG},
-	{V4L2_MBUS_FMT_YUYV8_2X8_BE, V4L2_COLORSPACE_JPEG},
-	{V4L2_MBUS_FMT_YVYU8_2X8_BE, V4L2_COLORSPACE_JPEG},
+	{V4L2_MBUS_FMT_YUYV8_2X8, V4L2_COLORSPACE_JPEG},
+	{V4L2_MBUS_FMT_YVYU8_2X8, V4L2_COLORSPACE_JPEG},
+	{V4L2_MBUS_FMT_UYVY8_2X8, V4L2_COLORSPACE_JPEG},
+	{V4L2_MBUS_FMT_VYUY8_2X8, V4L2_COLORSPACE_JPEG},
 	{V4L2_MBUS_FMT_RGB555_2X8_PADHI_LE, V4L2_COLORSPACE_SRGB},
 	{V4L2_MBUS_FMT_RGB565_2X8_LE, V4L2_COLORSPACE_SRGB},
 	{V4L2_MBUS_FMT_SBGGR8_1X8, V4L2_COLORSPACE_SRGB},
@@ -505,22 +505,22 @@ static int mt9m111_set_pixfmt(struct i2c_client *client,
 	case V4L2_MBUS_FMT_RGB565_2X8_LE:
 		ret = mt9m111_setfmt_rgb565(client);
 		break;
-	case V4L2_MBUS_FMT_YUYV8_2X8_BE:
+	case V4L2_MBUS_FMT_UYVY8_2X8:
 		mt9m111->swap_yuv_y_chromas = 0;
 		mt9m111->swap_yuv_cb_cr = 0;
 		ret = mt9m111_setfmt_yuv(client);
 		break;
-	case V4L2_MBUS_FMT_YVYU8_2X8_BE:
+	case V4L2_MBUS_FMT_VYUY8_2X8:
 		mt9m111->swap_yuv_y_chromas = 0;
 		mt9m111->swap_yuv_cb_cr = 1;
 		ret = mt9m111_setfmt_yuv(client);
 		break;
-	case V4L2_MBUS_FMT_YUYV8_2X8_LE:
+	case V4L2_MBUS_FMT_YUYV8_2X8:
 		mt9m111->swap_yuv_y_chromas = 1;
 		mt9m111->swap_yuv_cb_cr = 0;
 		ret = mt9m111_setfmt_yuv(client);
 		break;
-	case V4L2_MBUS_FMT_YVYU8_2X8_LE:
+	case V4L2_MBUS_FMT_YVYU8_2X8:
 		mt9m111->swap_yuv_y_chromas = 1;
 		mt9m111->swap_yuv_cb_cr = 1;
 		ret = mt9m111_setfmt_yuv(client);
diff --git a/drivers/media/video/mt9t112.c b/drivers/media/video/mt9t112.c
index e4bf1db9a87..8ec47e42d4d 100644
--- a/drivers/media/video/mt9t112.c
+++ b/drivers/media/video/mt9t112.c
@@ -121,22 +121,22 @@ struct mt9t112_priv {
 
 static const struct mt9t112_format mt9t112_cfmts[] = {
 	{
-		.code		= V4L2_MBUS_FMT_YUYV8_2X8_BE,
+		.code		= V4L2_MBUS_FMT_UYVY8_2X8,
 		.colorspace	= V4L2_COLORSPACE_JPEG,
 		.fmt		= 1,
 		.order		= 0,
 	}, {
-		.code		= V4L2_MBUS_FMT_YVYU8_2X8_BE,
+		.code		= V4L2_MBUS_FMT_VYUY8_2X8,
 		.colorspace	= V4L2_COLORSPACE_JPEG,
 		.fmt		= 1,
 		.order		= 1,
 	}, {
-		.code		= V4L2_MBUS_FMT_YUYV8_2X8_LE,
+		.code		= V4L2_MBUS_FMT_YUYV8_2X8,
 		.colorspace	= V4L2_COLORSPACE_JPEG,
 		.fmt		= 1,
 		.order		= 2,
 	}, {
-		.code		= V4L2_MBUS_FMT_YVYU8_2X8_LE,
+		.code		= V4L2_MBUS_FMT_YVYU8_2X8,
 		.colorspace	= V4L2_COLORSPACE_JPEG,
 		.fmt		= 1,
 		.order		= 3,
@@ -972,7 +972,7 @@ static int mt9t112_s_crop(struct v4l2_subdev *sd, struct v4l2_crop *a)
 	struct v4l2_rect *rect = &a->c;
 
 	return mt9t112_set_params(client, rect->width, rect->height,
-				 V4L2_MBUS_FMT_YUYV8_2X8_BE);
+				 V4L2_MBUS_FMT_UYVY8_2X8);
 }
 
 static int mt9t112_g_fmt(struct v4l2_subdev *sd,
@@ -983,7 +983,7 @@ static int mt9t112_g_fmt(struct v4l2_subdev *sd,
 
 	if (!priv->format) {
 		int ret = mt9t112_set_params(client, VGA_WIDTH, VGA_HEIGHT,
-					     V4L2_MBUS_FMT_YUYV8_2X8_BE);
+					     V4L2_MBUS_FMT_UYVY8_2X8);
 		if (ret < 0)
 			return ret;
 	}
diff --git a/drivers/media/video/ov772x.c b/drivers/media/video/ov772x.c
index 34034a71021..25eb5d637ee 100644
--- a/drivers/media/video/ov772x.c
+++ b/drivers/media/video/ov772x.c
@@ -440,21 +440,21 @@ static const struct regval_list ov772x_vga_regs[] = {
  */
 static const struct ov772x_color_format ov772x_cfmts[] = {
 	{
-		.code		= V4L2_MBUS_FMT_YUYV8_2X8_LE,
+		.code		= V4L2_MBUS_FMT_YUYV8_2X8,
 		.colorspace	= V4L2_COLORSPACE_JPEG,
 		.dsp3		= 0x0,
 		.com3		= SWAP_YUV,
 		.com7		= OFMT_YUV,
 	},
 	{
-		.code		= V4L2_MBUS_FMT_YVYU8_2X8_LE,
+		.code		= V4L2_MBUS_FMT_YVYU8_2X8,
 		.colorspace	= V4L2_COLORSPACE_JPEG,
 		.dsp3		= UV_ON,
 		.com3		= SWAP_YUV,
 		.com7		= OFMT_YUV,
 	},
 	{
-		.code		= V4L2_MBUS_FMT_YUYV8_2X8_BE,
+		.code		= V4L2_MBUS_FMT_UYVY8_2X8,
 		.colorspace	= V4L2_COLORSPACE_JPEG,
 		.dsp3		= 0x0,
 		.com3		= 0x0,
@@ -960,7 +960,7 @@ static int ov772x_g_fmt(struct v4l2_subdev *sd,
 	if (!priv->win || !priv->cfmt) {
 		u32 width = VGA_WIDTH, height = VGA_HEIGHT;
 		int ret = ov772x_set_params(client, &width, &height,
-					    V4L2_MBUS_FMT_YUYV8_2X8_LE);
+					    V4L2_MBUS_FMT_YUYV8_2X8);
 		if (ret < 0)
 			return ret;
 	}
diff --git a/drivers/media/video/ov9640.c b/drivers/media/video/ov9640.c
index 7ce9e05b478..40cdfab74cc 100644
--- a/drivers/media/video/ov9640.c
+++ b/drivers/media/video/ov9640.c
@@ -155,7 +155,7 @@ static const struct ov9640_reg ov9640_regs_rgb[] = {
 };
 
 static enum v4l2_mbus_pixelcode ov9640_codes[] = {
-	V4L2_MBUS_FMT_YUYV8_2X8_BE,
+	V4L2_MBUS_FMT_UYVY8_2X8,
 	V4L2_MBUS_FMT_RGB555_2X8_PADHI_LE,
 	V4L2_MBUS_FMT_RGB565_2X8_LE,
 };
@@ -430,7 +430,7 @@ static void ov9640_alter_regs(enum v4l2_mbus_pixelcode code,
 {
 	switch (code) {
 	default:
-	case V4L2_MBUS_FMT_YUYV8_2X8_BE:
+	case V4L2_MBUS_FMT_UYVY8_2X8:
 		alt->com12	= OV9640_COM12_YUV_AVG;
 		alt->com13	= OV9640_COM13_Y_DELAY_EN |
 					OV9640_COM13_YUV_DLY(0x01);
@@ -493,7 +493,7 @@ static int ov9640_write_regs(struct i2c_client *client, u32 width,
 	}
 
 	/* select color matrix configuration for given color encoding */
-	if (code == V4L2_MBUS_FMT_YUYV8_2X8_BE) {
+	if (code == V4L2_MBUS_FMT_UYVY8_2X8) {
 		matrix_regs	= ov9640_regs_yuv;
 		matrix_regs_len	= ARRAY_SIZE(ov9640_regs_yuv);
 	} else {
@@ -579,8 +579,8 @@ static int ov9640_s_fmt(struct v4l2_subdev *sd,
 		cspace = V4L2_COLORSPACE_SRGB;
 		break;
 	default:
-		code = V4L2_MBUS_FMT_YUYV8_2X8_BE;
-	case V4L2_MBUS_FMT_YUYV8_2X8_BE:
+		code = V4L2_MBUS_FMT_UYVY8_2X8;
+	case V4L2_MBUS_FMT_UYVY8_2X8:
 		cspace = V4L2_COLORSPACE_JPEG;
 	}
 
@@ -606,8 +606,8 @@ static int ov9640_try_fmt(struct v4l2_subdev *sd,
 		mf->colorspace = V4L2_COLORSPACE_SRGB;
 		break;
 	default:
-		mf->code = V4L2_MBUS_FMT_YUYV8_2X8_BE;
-	case V4L2_MBUS_FMT_YUYV8_2X8_BE:
+		mf->code = V4L2_MBUS_FMT_UYVY8_2X8;
+	case V4L2_MBUS_FMT_UYVY8_2X8:
 		mf->colorspace = V4L2_COLORSPACE_JPEG;
 	}
 
diff --git a/drivers/media/video/pxa_camera.c b/drivers/media/video/pxa_camera.c
index 5835acf7fa7..9de7d59916b 100644
--- a/drivers/media/video/pxa_camera.c
+++ b/drivers/media/video/pxa_camera.c
@@ -1284,7 +1284,7 @@ static int pxa_camera_get_formats(struct soc_camera_device *icd, unsigned int id
 	}
 
 	switch (code) {
-	case V4L2_MBUS_FMT_YUYV8_2X8_BE:
+	case V4L2_MBUS_FMT_UYVY8_2X8:
 		formats++;
 		if (xlate) {
 			xlate->host_fmt	= &pxa_camera_formats[0];
@@ -1293,9 +1293,9 @@ static int pxa_camera_get_formats(struct soc_camera_device *icd, unsigned int id
 			dev_dbg(dev, "Providing format %s using code %d\n",
 				pxa_camera_formats[0].name, code);
 		}
-	case V4L2_MBUS_FMT_YVYU8_2X8_BE:
-	case V4L2_MBUS_FMT_YUYV8_2X8_LE:
-	case V4L2_MBUS_FMT_YVYU8_2X8_LE:
+	case V4L2_MBUS_FMT_VYUY8_2X8:
+	case V4L2_MBUS_FMT_YUYV8_2X8:
+	case V4L2_MBUS_FMT_YVYU8_2X8:
 	case V4L2_MBUS_FMT_RGB565_2X8_LE:
 	case V4L2_MBUS_FMT_RGB555_2X8_PADHI_LE:
 		if (xlate)
diff --git a/drivers/media/video/rj54n1cb0c.c b/drivers/media/video/rj54n1cb0c.c
index 47fd207ba3b..d319aef7527 100644
--- a/drivers/media/video/rj54n1cb0c.c
+++ b/drivers/media/video/rj54n1cb0c.c
@@ -127,8 +127,8 @@ static const struct rj54n1_datafmt *rj54n1_find_datafmt(
 }
 
 static const struct rj54n1_datafmt rj54n1_colour_fmts[] = {
-	{V4L2_MBUS_FMT_YUYV8_2X8_LE, V4L2_COLORSPACE_JPEG},
-	{V4L2_MBUS_FMT_YVYU8_2X8_LE, V4L2_COLORSPACE_JPEG},
+	{V4L2_MBUS_FMT_YUYV8_2X8, V4L2_COLORSPACE_JPEG},
+	{V4L2_MBUS_FMT_YVYU8_2X8, V4L2_COLORSPACE_JPEG},
 	{V4L2_MBUS_FMT_RGB565_2X8_LE, V4L2_COLORSPACE_SRGB},
 	{V4L2_MBUS_FMT_RGB565_2X8_BE, V4L2_COLORSPACE_SRGB},
 	{V4L2_MBUS_FMT_SBGGR10_2X8_PADHI_LE, V4L2_COLORSPACE_SRGB},
@@ -1046,12 +1046,12 @@ static int rj54n1_s_fmt(struct v4l2_subdev *sd,
 
 	/* RA_SEL_UL is only relevant for raw modes, ignored otherwise. */
 	switch (mf->code) {
-	case V4L2_MBUS_FMT_YUYV8_2X8_LE:
+	case V4L2_MBUS_FMT_YUYV8_2X8:
 		ret = reg_write(client, RJ54N1_OUT_SEL, 0);
 		if (!ret)
 			ret = reg_set(client, RJ54N1_BYTE_SWAP, 8, 8);
 		break;
-	case V4L2_MBUS_FMT_YVYU8_2X8_LE:
+	case V4L2_MBUS_FMT_YVYU8_2X8:
 		ret = reg_write(client, RJ54N1_OUT_SEL, 0);
 		if (!ret)
 			ret = reg_set(client, RJ54N1_BYTE_SWAP, 0, 8);
diff --git a/drivers/media/video/sh_mobile_ceu_camera.c b/drivers/media/video/sh_mobile_ceu_camera.c
index d40b1e08bce..86869dbcbab 100644
--- a/drivers/media/video/sh_mobile_ceu_camera.c
+++ b/drivers/media/video/sh_mobile_ceu_camera.c
@@ -743,16 +743,16 @@ static int sh_mobile_ceu_set_bus_param(struct soc_camera_device *icd,
 	case V4L2_PIX_FMT_NV16:
 	case V4L2_PIX_FMT_NV61:
 		switch (cam->code) {
-		case V4L2_MBUS_FMT_YUYV8_2X8_BE:
+		case V4L2_MBUS_FMT_UYVY8_2X8:
 			value = 0x00000000; /* Cb0, Y0, Cr0, Y1 */
 			break;
-		case V4L2_MBUS_FMT_YVYU8_2X8_BE:
+		case V4L2_MBUS_FMT_VYUY8_2X8:
 			value = 0x00000100; /* Cr0, Y0, Cb0, Y1 */
 			break;
-		case V4L2_MBUS_FMT_YUYV8_2X8_LE:
+		case V4L2_MBUS_FMT_YUYV8_2X8:
 			value = 0x00000200; /* Y0, Cb0, Y1, Cr0 */
 			break;
-		case V4L2_MBUS_FMT_YVYU8_2X8_LE:
+		case V4L2_MBUS_FMT_YVYU8_2X8:
 			value = 0x00000300; /* Y0, Cr0, Y1, Cb0 */
 			break;
 		default:
@@ -965,10 +965,10 @@ static int sh_mobile_ceu_get_formats(struct soc_camera_device *icd, unsigned int
 		cam->extra_fmt = NULL;
 
 	switch (code) {
-	case V4L2_MBUS_FMT_YUYV8_2X8_BE:
-	case V4L2_MBUS_FMT_YVYU8_2X8_BE:
-	case V4L2_MBUS_FMT_YUYV8_2X8_LE:
-	case V4L2_MBUS_FMT_YVYU8_2X8_LE:
+	case V4L2_MBUS_FMT_UYVY8_2X8:
+	case V4L2_MBUS_FMT_VYUY8_2X8:
+	case V4L2_MBUS_FMT_YUYV8_2X8:
+	case V4L2_MBUS_FMT_YVYU8_2X8:
 		if (cam->extra_fmt)
 			break;
 
diff --git a/drivers/media/video/sh_vou.c b/drivers/media/video/sh_vou.c
index 5f73a017961..3869d515feb 100644
--- a/drivers/media/video/sh_vou.c
+++ b/drivers/media/video/sh_vou.c
@@ -678,7 +678,7 @@ static int sh_vou_s_fmt_vid_out(struct file *file, void *priv,
 	struct sh_vou_geometry geo;
 	struct v4l2_mbus_framefmt mbfmt = {
 		/* Revisit: is this the correct code? */
-		.code = V4L2_MBUS_FMT_YUYV8_2X8_LE,
+		.code = V4L2_MBUS_FMT_YUYV8_2X8,
 		.field = V4L2_FIELD_INTERLACED,
 		.colorspace = V4L2_COLORSPACE_SMPTE170M,
 	};
@@ -726,7 +726,7 @@ static int sh_vou_s_fmt_vid_out(struct file *file, void *priv,
 	/* Sanity checks */
 	if ((unsigned)mbfmt.width > VOU_MAX_IMAGE_WIDTH ||
 	    (unsigned)mbfmt.height > VOU_MAX_IMAGE_HEIGHT ||
-	    mbfmt.code != V4L2_MBUS_FMT_YUYV8_2X8_LE)
+	    mbfmt.code != V4L2_MBUS_FMT_YUYV8_2X8)
 		return -EIO;
 
 	if (mbfmt.width != geo.output.width ||
@@ -937,7 +937,7 @@ static int sh_vou_s_crop(struct file *file, void *fh, struct v4l2_crop *a)
 	struct sh_vou_geometry geo;
 	struct v4l2_mbus_framefmt mbfmt = {
 		/* Revisit: is this the correct code? */
-		.code = V4L2_MBUS_FMT_YUYV8_2X8_LE,
+		.code = V4L2_MBUS_FMT_YUYV8_2X8,
 		.field = V4L2_FIELD_INTERLACED,
 		.colorspace = V4L2_COLORSPACE_SMPTE170M,
 	};
@@ -982,7 +982,7 @@ static int sh_vou_s_crop(struct file *file, void *fh, struct v4l2_crop *a)
 	/* Sanity checks */
 	if ((unsigned)mbfmt.width > VOU_MAX_IMAGE_WIDTH ||
 	    (unsigned)mbfmt.height > VOU_MAX_IMAGE_HEIGHT ||
-	    mbfmt.code != V4L2_MBUS_FMT_YUYV8_2X8_LE)
+	    mbfmt.code != V4L2_MBUS_FMT_YUYV8_2X8)
 		return -EIO;
 
 	geo.output.width = mbfmt.width;
diff --git a/drivers/media/video/soc_mediabus.c b/drivers/media/video/soc_mediabus.c
index 8b63b6545e7..91391214c68 100644
--- a/drivers/media/video/soc_mediabus.c
+++ b/drivers/media/video/soc_mediabus.c
@@ -18,28 +18,28 @@
 #define MBUS_IDX(f) (V4L2_MBUS_FMT_ ## f - V4L2_MBUS_FMT_FIXED - 1)
 
 static const struct soc_mbus_pixelfmt mbus_fmt[] = {
-	[MBUS_IDX(YUYV8_2X8_LE)] = {
+	[MBUS_IDX(YUYV8_2X8)] = {
 		.fourcc			= V4L2_PIX_FMT_YUYV,
 		.name			= "YUYV",
 		.bits_per_sample	= 8,
 		.packing		= SOC_MBUS_PACKING_2X8_PADHI,
 		.order			= SOC_MBUS_ORDER_LE,
 	},
-	[MBUS_IDX(YVYU8_2X8_LE)] = {
+	[MBUS_IDX(YVYU8_2X8)] = {
 		.fourcc			= V4L2_PIX_FMT_YVYU,
 		.name			= "YVYU",
 		.bits_per_sample	= 8,
 		.packing		= SOC_MBUS_PACKING_2X8_PADHI,
 		.order			= SOC_MBUS_ORDER_LE,
 	},
-	[MBUS_IDX(YUYV8_2X8_BE)] = {
+	[MBUS_IDX(UYVY8_2X8)] = {
 		.fourcc			= V4L2_PIX_FMT_UYVY,
 		.name			= "UYVY",
 		.bits_per_sample	= 8,
 		.packing		= SOC_MBUS_PACKING_2X8_PADHI,
 		.order			= SOC_MBUS_ORDER_LE,
 	},
-	[MBUS_IDX(YVYU8_2X8_BE)] = {
+	[MBUS_IDX(VYUY8_2X8)] = {
 		.fourcc			= V4L2_PIX_FMT_VYUY,
 		.name			= "VYUY",
 		.bits_per_sample	= 8,
diff --git a/drivers/media/video/tw9910.c b/drivers/media/video/tw9910.c
index 445dc93413e..a727962781a 100644
--- a/drivers/media/video/tw9910.c
+++ b/drivers/media/video/tw9910.c
@@ -768,7 +768,7 @@ static int tw9910_g_fmt(struct v4l2_subdev *sd,
 
 	mf->width	= priv->scale->width;
 	mf->height	= priv->scale->height;
-	mf->code	= V4L2_MBUS_FMT_YUYV8_2X8_BE;
+	mf->code	= V4L2_MBUS_FMT_UYVY8_2X8;
 	mf->colorspace	= V4L2_COLORSPACE_JPEG;
 	mf->field	= V4L2_FIELD_INTERLACED_BT;
 
@@ -797,7 +797,7 @@ static int tw9910_s_fmt(struct v4l2_subdev *sd,
 	/*
 	 * check color format
 	 */
-	if (mf->code != V4L2_MBUS_FMT_YUYV8_2X8_BE)
+	if (mf->code != V4L2_MBUS_FMT_UYVY8_2X8)
 		return -EINVAL;
 
 	mf->colorspace = V4L2_COLORSPACE_JPEG;
@@ -824,7 +824,7 @@ static int tw9910_try_fmt(struct v4l2_subdev *sd,
 		return -EINVAL;
 	}
 
-	mf->code = V4L2_MBUS_FMT_YUYV8_2X8_BE;
+	mf->code = V4L2_MBUS_FMT_UYVY8_2X8;
 	mf->colorspace = V4L2_COLORSPACE_JPEG;
 
 	/*
@@ -909,7 +909,7 @@ static int tw9910_enum_fmt(struct v4l2_subdev *sd, unsigned int index,
 	if (index)
 		return -EINVAL;
 
-	*code = V4L2_MBUS_FMT_YUYV8_2X8_BE;
+	*code = V4L2_MBUS_FMT_UYVY8_2X8;
 	return 0;
 }
 
diff --git a/include/media/v4l2-mediabus.h b/include/media/v4l2-mediabus.h
index 865cda7cd61..a8709659841 100644
--- a/include/media/v4l2-mediabus.h
+++ b/include/media/v4l2-mediabus.h
@@ -24,10 +24,10 @@
  */
 enum v4l2_mbus_pixelcode {
 	V4L2_MBUS_FMT_FIXED = 1,
-	V4L2_MBUS_FMT_YUYV8_2X8_LE,
-	V4L2_MBUS_FMT_YVYU8_2X8_LE,
-	V4L2_MBUS_FMT_YUYV8_2X8_BE,
-	V4L2_MBUS_FMT_YVYU8_2X8_BE,
+	V4L2_MBUS_FMT_YUYV8_2X8,
+	V4L2_MBUS_FMT_YVYU8_2X8,
+	V4L2_MBUS_FMT_UYVY8_2X8,
+	V4L2_MBUS_FMT_VYUY8_2X8,
 	V4L2_MBUS_FMT_RGB555_2X8_PADHI_LE,
 	V4L2_MBUS_FMT_RGB555_2X8_PADHI_BE,
 	V4L2_MBUS_FMT_RGB565_2X8_LE,
-- 
cgit v1.2.3-70-g09d2


From c6b65ab78bebf5ceaa8de53d8a9c4f5e34e45e57 Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Date: Mon, 26 Jul 2010 10:41:55 -0300
Subject: V4L/DVB: V4L2: mediabus: add 12-bit Bayer and YUV420 pixel formats

These formats belong to the standard format set, defined by the MIPI CSI-2
specification.

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/media/v4l2-mediabus.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/media/v4l2-mediabus.h b/include/media/v4l2-mediabus.h
index a8709659841..f0cf2e7def0 100644
--- a/include/media/v4l2-mediabus.h
+++ b/include/media/v4l2-mediabus.h
@@ -41,6 +41,11 @@ enum v4l2_mbus_pixelcode {
 	V4L2_MBUS_FMT_SBGGR10_2X8_PADHI_BE,
 	V4L2_MBUS_FMT_SBGGR10_2X8_PADLO_BE,
 	V4L2_MBUS_FMT_SGRBG8_1X8,
+	V4L2_MBUS_FMT_SBGGR12_1X12,
+	V4L2_MBUS_FMT_YUYV8_1_5X8,
+	V4L2_MBUS_FMT_YVYU8_1_5X8,
+	V4L2_MBUS_FMT_UYVY8_1_5X8,
+	V4L2_MBUS_FMT_VYUY8_1_5X8,
 };
 
 /**
-- 
cgit v1.2.3-70-g09d2


From 52d268a36246ee4156cc719036522616bb4d73fa Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Date: Mon, 26 Jul 2010 11:37:13 -0300
Subject: V4L/DVB: V4L2: soc-camera: export soc-camera bus type for
 notifications

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/soc_camera.c | 3 ++-
 include/media/soc_camera.h       | 3 +++
 2 files changed, 5 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/media/video/soc_camera.c b/drivers/media/video/soc_camera.c
index 475757bfd7b..f2032939fd4 100644
--- a/drivers/media/video/soc_camera.c
+++ b/drivers/media/video/soc_camera.c
@@ -1107,13 +1107,14 @@ static int soc_camera_resume(struct device *dev)
 	return ret;
 }
 
-static struct bus_type soc_camera_bus_type = {
+struct bus_type soc_camera_bus_type = {
 	.name		= "soc-camera",
 	.probe		= soc_camera_probe,
 	.remove		= soc_camera_remove,
 	.suspend	= soc_camera_suspend,
 	.resume		= soc_camera_resume,
 };
+EXPORT_SYMBOL_GPL(soc_camera_bus_type);
 
 static struct device_driver ic_drv = {
 	.name	= "camera",
diff --git a/include/media/soc_camera.h b/include/media/soc_camera.h
index b8289c2f609..2ce957301f7 100644
--- a/include/media/soc_camera.h
+++ b/include/media/soc_camera.h
@@ -12,12 +12,15 @@
 #ifndef SOC_CAMERA_H
 #define SOC_CAMERA_H
 
+#include <linux/device.h>
 #include <linux/mutex.h>
 #include <linux/pm.h>
 #include <linux/videodev2.h>
 #include <media/videobuf-core.h>
 #include <media/v4l2-device.h>
 
+extern struct bus_type soc_camera_bus_type;
+
 struct soc_camera_device {
 	struct list_head list;
 	struct device dev;
-- 
cgit v1.2.3-70-g09d2


From 077e2c10c9cb618d571bf16475db696610bdb24a Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Date: Mon, 26 Jul 2010 11:12:43 -0300
Subject: V4L/DVB: V4L2: soc-camera: add a MIPI CSI-2 driver for SH-Mobile
 platforms

Some SH-Mobile SoCs implement a MIPI CSI-2 controller, that can interface to
several video clients and send data to the CEU or to the Image Signal
Processor.  This patch implements a v4l2-subdevice driver for CSI-2 to be used
within the soc-camera framework, implementing the second subdevice in addition
to the actual video clients.

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/Kconfig          |   6 +
 drivers/media/video/Makefile         |   1 +
 drivers/media/video/sh_mobile_csi2.c | 354 +++++++++++++++++++++++++++++++++++
 include/media/sh_mobile_csi2.h       |  46 +++++
 4 files changed, 407 insertions(+)
 create mode 100644 drivers/media/video/sh_mobile_csi2.c
 create mode 100644 include/media/sh_mobile_csi2.h

(limited to 'include')

diff --git a/drivers/media/video/Kconfig b/drivers/media/video/Kconfig
index c627f776c1e..0f1ac401ded 100644
--- a/drivers/media/video/Kconfig
+++ b/drivers/media/video/Kconfig
@@ -876,6 +876,12 @@ config VIDEO_PXA27x
 	---help---
 	  This is a v4l2 driver for the PXA27x Quick Capture Interface
 
+config VIDEO_SH_MOBILE_CSI2
+	tristate "SuperH Mobile MIPI CSI-2 Interface driver"
+	depends on VIDEO_DEV && SOC_CAMERA && HAVE_CLK
+	---help---
+	  This is a v4l2 driver for the SuperH MIPI CSI-2 Interface
+
 config VIDEO_SH_MOBILE_CEU
 	tristate "SuperH Mobile CEU Interface driver"
 	depends on VIDEO_DEV && SOC_CAMERA && HAS_DMA && HAVE_CLK
diff --git a/drivers/media/video/Makefile b/drivers/media/video/Makefile
index eba259fbf7e..88478630e85 100644
--- a/drivers/media/video/Makefile
+++ b/drivers/media/video/Makefile
@@ -160,6 +160,7 @@ obj-$(CONFIG_SOC_CAMERA_PLATFORM)	+= soc_camera_platform.o
 obj-$(CONFIG_VIDEO_MX1)			+= mx1_camera.o
 obj-$(CONFIG_VIDEO_MX3)			+= mx3_camera.o
 obj-$(CONFIG_VIDEO_PXA27x)		+= pxa_camera.o
+obj-$(CONFIG_VIDEO_SH_MOBILE_CSI2)	+= sh_mobile_csi2.o
 obj-$(CONFIG_VIDEO_SH_MOBILE_CEU)	+= sh_mobile_ceu_camera.o
 
 obj-$(CONFIG_ARCH_DAVINCI)		+= davinci/
diff --git a/drivers/media/video/sh_mobile_csi2.c b/drivers/media/video/sh_mobile_csi2.c
new file mode 100644
index 00000000000..84a64681931
--- /dev/null
+++ b/drivers/media/video/sh_mobile_csi2.c
@@ -0,0 +1,354 @@
+/*
+ * Driver for the SH-Mobile MIPI CSI-2 unit
+ *
+ * Copyright (C) 2010, Guennadi Liakhovetski <g.liakhovetski@gmx.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/delay.h>
+#include <linux/i2c.h>
+#include <linux/io.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/slab.h>
+#include <linux/videodev2.h>
+
+#include <media/sh_mobile_csi2.h>
+#include <media/soc_camera.h>
+#include <media/v4l2-common.h>
+#include <media/v4l2-dev.h>
+#include <media/v4l2-device.h>
+#include <media/v4l2-mediabus.h>
+#include <media/v4l2-subdev.h>
+
+#define SH_CSI2_TREF	0x00
+#define SH_CSI2_SRST	0x04
+#define SH_CSI2_PHYCNT	0x08
+#define SH_CSI2_CHKSUM	0x0C
+#define SH_CSI2_VCDT	0x10
+
+struct sh_csi2 {
+	struct v4l2_subdev		subdev;
+	struct list_head		list;
+	struct notifier_block		notifier;
+	unsigned int			irq;
+	void __iomem			*base;
+	struct platform_device		*pdev;
+	struct sh_csi2_client_config	*client;
+};
+
+static int sh_csi2_try_fmt(struct v4l2_subdev *sd,
+			   struct v4l2_mbus_framefmt *mf)
+{
+	struct sh_csi2 *priv = container_of(sd, struct sh_csi2, subdev);
+	struct sh_csi2_pdata *pdata = priv->pdev->dev.platform_data;
+
+	if (mf->width > 8188)
+		mf->width = 8188;
+	else if (mf->width & 1)
+		mf->width &= ~1;
+
+	switch (pdata->type) {
+	case SH_CSI2C:
+		switch (mf->code) {
+		case V4L2_MBUS_FMT_UYVY8_2X8:		/* YUV422 */
+		case V4L2_MBUS_FMT_YUYV8_1_5X8:		/* YUV420 */
+		case V4L2_MBUS_FMT_GREY8_1X8:		/* RAW8 */
+		case V4L2_MBUS_FMT_SBGGR8_1X8:
+		case V4L2_MBUS_FMT_SGRBG8_1X8:
+			break;
+		default:
+			/* All MIPI CSI-2 devices must support one of primary formats */
+			mf->code = V4L2_MBUS_FMT_YUYV8_2X8;
+		}
+		break;
+	case SH_CSI2I:
+		switch (mf->code) {
+		case V4L2_MBUS_FMT_GREY8_1X8:		/* RAW8 */
+		case V4L2_MBUS_FMT_SBGGR8_1X8:
+		case V4L2_MBUS_FMT_SGRBG8_1X8:
+		case V4L2_MBUS_FMT_SBGGR10_1X10:	/* RAW10 */
+		case V4L2_MBUS_FMT_SBGGR12_1X12:	/* RAW12 */
+			break;
+		default:
+			/* All MIPI CSI-2 devices must support one of primary formats */
+			mf->code = V4L2_MBUS_FMT_SBGGR8_1X8;
+		}
+		break;
+	}
+
+	return 0;
+}
+
+/*
+ * We have done our best in try_fmt to try and tell the sensor, which formats
+ * we support. If now the configuration is unsuitable for us we can only
+ * error out.
+ */
+static int sh_csi2_s_fmt(struct v4l2_subdev *sd,
+			 struct v4l2_mbus_framefmt *mf)
+{
+	struct sh_csi2 *priv = container_of(sd, struct sh_csi2, subdev);
+	u32 tmp = (priv->client->channel & 3) << 8;
+
+	dev_dbg(sd->v4l2_dev->dev, "%s(%u)\n", __func__, mf->code);
+	if (mf->width > 8188 || mf->width & 1)
+		return -EINVAL;
+
+	switch (mf->code) {
+	case V4L2_MBUS_FMT_UYVY8_2X8:
+		tmp |= 0x1e;	/* YUV422 8 bit */
+		break;
+	case V4L2_MBUS_FMT_YUYV8_1_5X8:
+		tmp |= 0x18;	/* YUV420 8 bit */
+		break;
+	case V4L2_MBUS_FMT_RGB555_2X8_PADHI_BE:
+		tmp |= 0x21;	/* RGB555 */
+		break;
+	case V4L2_MBUS_FMT_RGB565_2X8_BE:
+		tmp |= 0x22;	/* RGB565 */
+		break;
+	case V4L2_MBUS_FMT_GREY8_1X8:
+	case V4L2_MBUS_FMT_SBGGR8_1X8:
+	case V4L2_MBUS_FMT_SGRBG8_1X8:
+		tmp |= 0x2a;	/* RAW8 */
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	iowrite32(tmp, priv->base + SH_CSI2_VCDT);
+
+	return 0;
+}
+
+static struct v4l2_subdev_video_ops sh_csi2_subdev_video_ops = {
+	.s_mbus_fmt	= sh_csi2_s_fmt,
+	.try_mbus_fmt	= sh_csi2_try_fmt,
+};
+
+static struct v4l2_subdev_core_ops sh_csi2_subdev_core_ops;
+
+static struct v4l2_subdev_ops sh_csi2_subdev_ops = {
+	.core	= &sh_csi2_subdev_core_ops,
+	.video	= &sh_csi2_subdev_video_ops,
+};
+
+static void sh_csi2_hwinit(struct sh_csi2 *priv)
+{
+	struct sh_csi2_pdata *pdata = priv->pdev->dev.platform_data;
+	__u32 tmp = 0x10; /* Enable MIPI CSI clock lane */
+
+	/* Reflect registers immediately */
+	iowrite32(0x00000001, priv->base + SH_CSI2_TREF);
+	/* reset CSI2 harware */
+	iowrite32(0x00000001, priv->base + SH_CSI2_SRST);
+	udelay(5);
+	iowrite32(0x00000000, priv->base + SH_CSI2_SRST);
+
+	if (priv->client->lanes & 3)
+		tmp |= priv->client->lanes & 3;
+	else
+		/* Default - both lanes */
+		tmp |= 3;
+
+	if (priv->client->phy == SH_CSI2_PHY_MAIN)
+		tmp |= 0x8000;
+
+	iowrite32(tmp, priv->base + SH_CSI2_PHYCNT);
+
+	tmp = 0;
+	if (pdata->flags & SH_CSI2_ECC)
+		tmp |= 2;
+	if (pdata->flags & SH_CSI2_CRC)
+		tmp |= 1;
+	iowrite32(tmp, priv->base + SH_CSI2_CHKSUM);
+}
+
+static int sh_csi2_set_bus_param(struct soc_camera_device *icd,
+				 unsigned long flags)
+{
+	return 0;
+}
+
+static unsigned long sh_csi2_query_bus_param(struct soc_camera_device *icd)
+{
+	struct soc_camera_link *icl = to_soc_camera_link(icd);
+	const unsigned long flags = SOCAM_PCLK_SAMPLE_RISING |
+		SOCAM_HSYNC_ACTIVE_HIGH | SOCAM_VSYNC_ACTIVE_HIGH |
+		SOCAM_MASTER | SOCAM_DATAWIDTH_8 | SOCAM_DATA_ACTIVE_HIGH;
+
+	return soc_camera_apply_sensor_flags(icl, flags);
+}
+
+static int sh_csi2_notify(struct notifier_block *nb,
+			  unsigned long action, void *data)
+{
+	struct device *dev = data;
+	struct soc_camera_device *icd = to_soc_camera_dev(dev);
+	struct v4l2_device *v4l2_dev = dev_get_drvdata(dev->parent);
+	struct sh_csi2 *priv =
+		container_of(nb, struct sh_csi2, notifier);
+	struct sh_csi2_pdata *pdata = priv->pdev->dev.platform_data;
+	int ret, i;
+
+	for (i = 0; i < pdata->num_clients; i++)
+		if (&pdata->clients[i].pdev->dev == icd->pdev)
+			break;
+
+	dev_dbg(dev, "%s(%p): action = %lu, found #%d\n", __func__, dev, action, i);
+
+	if (i == pdata->num_clients)
+		return NOTIFY_DONE;
+
+	switch (action) {
+	case BUS_NOTIFY_BOUND_DRIVER:
+		snprintf(priv->subdev.name, V4L2_SUBDEV_NAME_SIZE, "%s%s",
+			 dev_name(v4l2_dev->dev), ".mipi-csi");
+		ret = v4l2_device_register_subdev(v4l2_dev, &priv->subdev);
+		dev_dbg(dev, "%s(%p): ret(register_subdev) = %d\n", __func__, priv, ret);
+		if (ret < 0)
+			return NOTIFY_DONE;
+
+		priv->client = pdata->clients + i;
+
+		icd->ops->set_bus_param		= sh_csi2_set_bus_param;
+		icd->ops->query_bus_param	= sh_csi2_query_bus_param;
+
+		pm_runtime_get_sync(v4l2_get_subdevdata(&priv->subdev));
+
+		sh_csi2_hwinit(priv);
+		break;
+	case BUS_NOTIFY_UNBIND_DRIVER:
+		priv->client = NULL;
+
+		/* Driver is about to be unbound */
+		icd->ops->set_bus_param		= NULL;
+		icd->ops->query_bus_param	= NULL;
+
+		v4l2_device_unregister_subdev(&priv->subdev);
+
+		pm_runtime_put(v4l2_get_subdevdata(&priv->subdev));
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+static __devinit int sh_csi2_probe(struct platform_device *pdev)
+{
+	struct resource *res;
+	unsigned int irq;
+	int ret;
+	struct sh_csi2 *priv;
+	/* Platform data specify the PHY, lanes, ECC, CRC */
+	struct sh_csi2_pdata *pdata = pdev->dev.platform_data;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	/* Interrupt unused so far */
+	irq = platform_get_irq(pdev, 0);
+
+	if (!res || (int)irq <= 0 || !pdata) {
+		dev_err(&pdev->dev, "Not enough CSI2 platform resources.\n");
+		return -ENODEV;
+	}
+
+	/* TODO: Add support for CSI2I. Careful: different register layout! */
+	if (pdata->type != SH_CSI2C) {
+		dev_err(&pdev->dev, "Only CSI2C supported ATM.\n");
+		return -EINVAL;
+	}
+
+	priv = kzalloc(sizeof(struct sh_csi2), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	priv->irq = irq;
+	priv->notifier.notifier_call = sh_csi2_notify;
+
+	/* We MUST attach after the MIPI sensor */
+	ret = bus_register_notifier(&soc_camera_bus_type, &priv->notifier);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "CSI2 cannot register notifier\n");
+		goto ernotify;
+	}
+
+	if (!request_mem_region(res->start, resource_size(res), pdev->name)) {
+		dev_err(&pdev->dev, "CSI2 register region already claimed\n");
+		ret = -EBUSY;
+		goto ereqreg;
+	}
+
+	priv->base = ioremap(res->start, resource_size(res));
+	if (!priv->base) {
+		ret = -ENXIO;
+		dev_err(&pdev->dev, "Unable to ioremap CSI2 registers.\n");
+		goto eremap;
+	}
+
+	priv->pdev = pdev;
+
+	v4l2_subdev_init(&priv->subdev, &sh_csi2_subdev_ops);
+	v4l2_set_subdevdata(&priv->subdev, &pdev->dev);
+
+	platform_set_drvdata(pdev, priv);
+
+	pm_runtime_enable(&pdev->dev);
+
+	dev_dbg(&pdev->dev, "CSI2 probed.\n");
+
+	return 0;
+
+eremap:
+	release_mem_region(res->start, resource_size(res));
+ereqreg:
+	bus_unregister_notifier(&soc_camera_bus_type, &priv->notifier);
+ernotify:
+	kfree(priv);
+
+	return ret;
+}
+
+static __devexit int sh_csi2_remove(struct platform_device *pdev)
+{
+	struct sh_csi2 *priv = platform_get_drvdata(pdev);
+	struct resource *res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+
+	bus_unregister_notifier(&soc_camera_bus_type, &priv->notifier);
+	pm_runtime_disable(&pdev->dev);
+	iounmap(priv->base);
+	release_mem_region(res->start, resource_size(res));
+	platform_set_drvdata(pdev, NULL);
+	kfree(priv);
+
+	return 0;
+}
+
+static struct platform_driver __refdata sh_csi2_pdrv = {
+	.remove  = __devexit_p(sh_csi2_remove),
+	.driver  = {
+		.name	= "sh-mobile-csi2",
+		.owner	= THIS_MODULE,
+	},
+};
+
+static int __init sh_csi2_init(void)
+{
+	return platform_driver_probe(&sh_csi2_pdrv, sh_csi2_probe);
+}
+
+static void __exit sh_csi2_exit(void)
+{
+	platform_driver_unregister(&sh_csi2_pdrv);
+}
+
+module_init(sh_csi2_init);
+module_exit(sh_csi2_exit);
+
+MODULE_DESCRIPTION("SH-Mobile MIPI CSI-2 driver");
+MODULE_AUTHOR("Guennadi Liakhovetski <g.liakhovetski@gmx.de>");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:sh-mobile-csi2");
diff --git a/include/media/sh_mobile_csi2.h b/include/media/sh_mobile_csi2.h
new file mode 100644
index 00000000000..4d261517446
--- /dev/null
+++ b/include/media/sh_mobile_csi2.h
@@ -0,0 +1,46 @@
+/*
+ * Driver header for the SH-Mobile MIPI CSI-2 unit
+ *
+ * Copyright (C) 2010, Guennadi Liakhovetski <g.liakhovetski@gmx.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef SH_MIPI_CSI
+#define SH_MIPI_CSI
+
+enum sh_csi2_phy {
+	SH_CSI2_PHY_MAIN,
+	SH_CSI2_PHY_SUB,
+};
+
+enum sh_csi2_type {
+	SH_CSI2C,
+	SH_CSI2I,
+};
+
+#define SH_CSI2_CRC	(1 << 0)
+#define SH_CSI2_ECC	(1 << 1)
+
+struct platform_device;
+
+struct sh_csi2_client_config {
+	enum sh_csi2_phy phy;
+	unsigned char lanes;		/* bitmask[3:0] */
+	unsigned char channel;		/* 0..3 */
+	struct platform_device *pdev;	/* client platform device */
+};
+
+struct sh_csi2_pdata {
+	enum sh_csi2_type type;
+	unsigned int flags;
+	struct sh_csi2_client_config *clients;
+	int num_clients;
+};
+
+struct device;
+struct v4l2_device;
+
+#endif
-- 
cgit v1.2.3-70-g09d2


From b3b5020d8c12037f030242aab8e272148bf1f472 Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Date: Mon, 26 Jul 2010 12:13:34 -0300
Subject: V4L/DVB: V4L2: sh_mobile_camera_ceu: add support for CSI2

Using CEU with CSI2 on SH-Mobile requires some special configuration of the
former. We also have to switch from calling only one subdev .s_mbus_fmt and
.try_mbus_fmt to calling all subdevices. Take care to increment CSI2 driver
use count to prevent it from unloading, while in use.

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/sh_mobile_ceu_camera.c | 131 +++++++++++++++++++++++++----
 include/media/sh_mobile_ceu.h              |   3 +
 2 files changed, 119 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/drivers/media/video/sh_mobile_ceu_camera.c b/drivers/media/video/sh_mobile_ceu_camera.c
index 86869dbcbab..2b24bd0de3a 100644
--- a/drivers/media/video/sh_mobile_ceu_camera.c
+++ b/drivers/media/video/sh_mobile_ceu_camera.c
@@ -633,6 +633,12 @@ static void sh_mobile_ceu_set_rect(struct soc_camera_device *icd)
 		cdwdr_width *= 2;
 	}
 
+	/* CSI2 special configuration */
+	if (pcdev->pdata->csi2_dev) {
+		in_width = ((in_width - 2) * 2);
+		left_offset *= 2;
+	}
+
 	/* Set CAMOR, CAPWR, CFSZR, take care of CDWDR */
 	camor = left_offset | (top_offset << 16);
 
@@ -767,6 +773,11 @@ static int sh_mobile_ceu_set_bus_param(struct soc_camera_device *icd,
 	value |= common_flags & SOCAM_VSYNC_ACTIVE_LOW ? 1 << 1 : 0;
 	value |= common_flags & SOCAM_HSYNC_ACTIVE_LOW ? 1 << 0 : 0;
 	value |= pcdev->is_16bit ? 1 << 12 : 0;
+
+	/* CSI2 mode */
+	if (pcdev->pdata->csi2_dev)
+		value |= 3 << 12;
+
 	ceu_write(pcdev, CAMCR, value);
 
 	ceu_write(pcdev, CAPCR, 0x00300000);
@@ -883,6 +894,8 @@ static int sh_mobile_ceu_get_formats(struct soc_camera_device *icd, unsigned int
 {
 	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
 	struct device *dev = icd->dev.parent;
+	struct soc_camera_host *ici = to_soc_camera_host(dev);
+	struct sh_mobile_ceu_dev *pcdev = ici->priv;
 	int ret, k, n;
 	int formats = 0;
 	struct sh_mobile_ceu_cam *cam;
@@ -896,19 +909,19 @@ static int sh_mobile_ceu_get_formats(struct soc_camera_device *icd, unsigned int
 
 	fmt = soc_mbus_get_fmtdesc(code);
 	if (!fmt) {
-		dev_err(icd->dev.parent,
-			"Invalid format code #%u: %d\n", idx, code);
+		dev_err(dev, "Invalid format code #%u: %d\n", idx, code);
 		return -EINVAL;
 	}
 
-	ret = sh_mobile_ceu_try_bus_param(icd, fmt->bits_per_sample);
-	if (ret < 0)
-		return 0;
+	if (!pcdev->pdata->csi2_dev) {
+		ret = sh_mobile_ceu_try_bus_param(icd, fmt->bits_per_sample);
+		if (ret < 0)
+			return 0;
+	}
 
 	if (!icd->host_priv) {
 		struct v4l2_mbus_framefmt mf;
 		struct v4l2_rect rect;
-		struct device *dev = icd->dev.parent;
 		int shift = 0;
 
 		/* FIXME: subwindow is lost between close / open */
@@ -927,7 +940,8 @@ static int sh_mobile_ceu_get_formats(struct soc_camera_device *icd, unsigned int
 			/* Try 2560x1920, 1280x960, 640x480, 320x240 */
 			mf.width	= 2560 >> shift;
 			mf.height	= 1920 >> shift;
-			ret = v4l2_subdev_call(sd, video, s_mbus_fmt, &mf);
+			ret = v4l2_device_call_until_err(sd->v4l2_dev, 0, video,
+							 s_mbus_fmt, &mf);
 			if (ret < 0)
 				return ret;
 			shift++;
@@ -1228,7 +1242,8 @@ static int client_s_fmt(struct soc_camera_device *icd,
 	struct v4l2_cropcap cap;
 	int ret;
 
-	ret = v4l2_subdev_call(sd, video, s_mbus_fmt, mf);
+	ret = v4l2_device_call_until_err(sd->v4l2_dev, 0, video,
+					 s_mbus_fmt, mf);
 	if (ret < 0)
 		return ret;
 
@@ -1257,7 +1272,8 @@ static int client_s_fmt(struct soc_camera_device *icd,
 		tmp_h = min(2 * tmp_h, max_height);
 		mf->width = tmp_w;
 		mf->height = tmp_h;
-		ret = v4l2_subdev_call(sd, video, s_mbus_fmt, mf);
+		ret = v4l2_device_call_until_err(sd->v4l2_dev, 0, video,
+						 s_mbus_fmt, mf);
 		dev_geo(dev, "Camera scaled to %ux%u\n",
 			mf->width, mf->height);
 		if (ret < 0) {
@@ -1514,7 +1530,8 @@ static int sh_mobile_ceu_set_fmt(struct soc_camera_device *icd,
 	struct device *dev = icd->dev.parent;
 	__u32 pixfmt = pix->pixelformat;
 	const struct soc_camera_format_xlate *xlate;
-	unsigned int ceu_sub_width, ceu_sub_height;
+	/* Keep Compiler Happy */
+	unsigned int ceu_sub_width = 0, ceu_sub_height = 0;
 	u16 scale_v, scale_h;
 	int ret;
 	bool image_mode;
@@ -1569,8 +1586,8 @@ static int sh_mobile_ceu_set_fmt(struct soc_camera_device *icd,
 
 	/* Done with the camera. Now see if we can improve the result */
 
-	dev_geo(dev, "Camera %d fmt %ux%u, requested %ux%u\n",
-		ret, mf.width, mf.height, pix->width, pix->height);
+	dev_geo(dev, "fmt %ux%u, requested %ux%u\n",
+		mf.width, mf.height, pix->width, pix->height);
 	if (ret < 0)
 		return ret;
 
@@ -1634,6 +1651,9 @@ static int sh_mobile_ceu_try_fmt(struct soc_camera_device *icd,
 	int width, height;
 	int ret;
 
+	dev_geo(icd->dev.parent, "TRY_FMT(pix=0x%x, %ux%u)\n",
+		 pixfmt, pix->width, pix->height);
+
 	xlate = soc_camera_xlate_by_fourcc(icd, pixfmt);
 	if (!xlate) {
 		dev_warn(icd->dev.parent, "Format %x not found\n", pixfmt);
@@ -1660,7 +1680,7 @@ static int sh_mobile_ceu_try_fmt(struct soc_camera_device *icd,
 	mf.code		= xlate->code;
 	mf.colorspace	= pix->colorspace;
 
-	ret = v4l2_subdev_call(sd, video, try_mbus_fmt, &mf);
+	ret = v4l2_device_call_until_err(sd->v4l2_dev, 0, video, try_mbus_fmt, &mf);
 	if (ret < 0)
 		return ret;
 
@@ -1684,7 +1704,8 @@ static int sh_mobile_ceu_try_fmt(struct soc_camera_device *icd,
 			 */
 			mf.width = 2560;
 			mf.height = 1920;
-			ret = v4l2_subdev_call(sd, video, try_mbus_fmt, &mf);
+			ret = v4l2_device_call_until_err(sd->v4l2_dev, 0, video,
+							 try_mbus_fmt, &mf);
 			if (ret < 0) {
 				/* Shouldn't actually happen... */
 				dev_err(icd->dev.parent,
@@ -1699,6 +1720,9 @@ static int sh_mobile_ceu_try_fmt(struct soc_camera_device *icd,
 			pix->height = height;
 	}
 
+	dev_geo(icd->dev.parent, "%s(): return %d, fmt 0x%x, %ux%u\n",
+		__func__, ret, pix->pixelformat, pix->width, pix->height);
+
 	return ret;
 }
 
@@ -1853,6 +1877,30 @@ static struct soc_camera_host_ops sh_mobile_ceu_host_ops = {
 	.num_controls	= ARRAY_SIZE(sh_mobile_ceu_controls),
 };
 
+struct bus_wait {
+	struct notifier_block	notifier;
+	struct completion	completion;
+	struct device		*dev;
+};
+
+static int bus_notify(struct notifier_block *nb,
+		      unsigned long action, void *data)
+{
+	struct device *dev = data;
+	struct bus_wait *wait = container_of(nb, struct bus_wait, notifier);
+
+	if (wait->dev != dev)
+		return NOTIFY_DONE;
+
+	switch (action) {
+	case BUS_NOTIFY_UNBOUND_DRIVER:
+		/* Protect from module unloading */
+		wait_for_completion(&wait->completion);
+		return NOTIFY_OK;
+	}
+	return NOTIFY_DONE;
+}
+
 static int __devinit sh_mobile_ceu_probe(struct platform_device *pdev)
 {
 	struct sh_mobile_ceu_dev *pcdev;
@@ -1860,6 +1908,11 @@ static int __devinit sh_mobile_ceu_probe(struct platform_device *pdev)
 	void __iomem *base;
 	unsigned int irq;
 	int err = 0;
+	struct bus_wait wait = {
+		.completion = COMPLETION_INITIALIZER_ONSTACK(wait.completion),
+		.notifier.notifier_call = bus_notify,
+	};
+	struct device *csi2;
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	irq = platform_get_irq(pdev, 0);
@@ -1931,12 +1984,54 @@ static int __devinit sh_mobile_ceu_probe(struct platform_device *pdev)
 	pcdev->ici.drv_name = dev_name(&pdev->dev);
 	pcdev->ici.ops = &sh_mobile_ceu_host_ops;
 
+	/* CSI2 interfacing */
+	csi2 = pcdev->pdata->csi2_dev;
+	if (csi2) {
+		wait.dev = csi2;
+
+		err = bus_register_notifier(&platform_bus_type, &wait.notifier);
+		if (err < 0)
+			goto exit_free_clk;
+
+		/*
+		 * From this point the driver module will not unload, until
+		 * we complete the completion.
+		 */
+
+		if (!csi2->driver || !csi2->driver->owner) {
+			complete(&wait.completion);
+			/* Either too late, or probing failed */
+			bus_unregister_notifier(&platform_bus_type, &wait.notifier);
+			err = -ENXIO;
+			goto exit_free_clk;
+		}
+
+		/*
+		 * The module is still loaded, in the worst case it is hanging
+		 * in device release on our completion. So, _now_ dereferencing
+		 * the "owner" is safe!
+		 */
+
+		err = try_module_get(csi2->driver->owner);
+
+		/* Let notifier complete, if it has been locked */
+		complete(&wait.completion);
+		bus_unregister_notifier(&platform_bus_type, &wait.notifier);
+		if (!err) {
+			err = -ENODEV;
+			goto exit_free_clk;
+		}
+	}
+
 	err = soc_camera_host_register(&pcdev->ici);
 	if (err)
-		goto exit_free_clk;
+		goto exit_module_put;
 
 	return 0;
 
+exit_module_put:
+	if (csi2 && csi2->driver)
+		module_put(csi2->driver->owner);
 exit_free_clk:
 	pm_runtime_disable(&pdev->dev);
 	free_irq(pcdev->irq, pcdev);
@@ -1956,6 +2051,7 @@ static int __devexit sh_mobile_ceu_remove(struct platform_device *pdev)
 	struct soc_camera_host *soc_host = to_soc_camera_host(&pdev->dev);
 	struct sh_mobile_ceu_dev *pcdev = container_of(soc_host,
 					struct sh_mobile_ceu_dev, ici);
+	struct device *csi2 = pcdev->pdata->csi2_dev;
 
 	soc_camera_host_unregister(soc_host);
 	pm_runtime_disable(&pdev->dev);
@@ -1963,7 +2059,10 @@ static int __devexit sh_mobile_ceu_remove(struct platform_device *pdev)
 	if (platform_get_resource(pdev, IORESOURCE_MEM, 1))
 		dma_release_declared_memory(&pdev->dev);
 	iounmap(pcdev->base);
+	if (csi2 && csi2->driver)
+		module_put(csi2->driver->owner);
 	kfree(pcdev);
+
 	return 0;
 }
 
@@ -1995,6 +2094,8 @@ static struct platform_driver sh_mobile_ceu_driver = {
 
 static int __init sh_mobile_ceu_init(void)
 {
+	/* Whatever return code */
+	request_module("sh_mobile_csi2");
 	return platform_driver_register(&sh_mobile_ceu_driver);
 }
 
diff --git a/include/media/sh_mobile_ceu.h b/include/media/sh_mobile_ceu.h
index b6774783687..80346a6d28a 100644
--- a/include/media/sh_mobile_ceu.h
+++ b/include/media/sh_mobile_ceu.h
@@ -6,8 +6,11 @@
 #define SH_CEU_FLAG_HSYNC_LOW		(1 << 2) /* default High if possible */
 #define SH_CEU_FLAG_VSYNC_LOW		(1 << 3) /* default High if possible */
 
+struct device;
+
 struct sh_mobile_ceu_info {
 	unsigned long flags;
+	struct device *csi2_dev;
 };
 
 #endif /* __ASM_SH_MOBILE_CEU_H__ */
-- 
cgit v1.2.3-70-g09d2


From d700226902a62a3b6f3563782d569c0e2af74397 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@redhat.com>
Date: Sat, 31 Jul 2010 19:24:49 -0300
Subject: V4L/DVB: Add a keymap file with dib0700 table

Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/IR/keymaps/Makefile         |   1 +
 drivers/media/IR/keymaps/rc-dib0700-big.c | 314 ++++++++++++++++++++++++++++++
 include/media/rc-map.h                    |   4 +
 3 files changed, 319 insertions(+)
 create mode 100644 drivers/media/IR/keymaps/rc-dib0700-big.c

(limited to 'include')

diff --git a/drivers/media/IR/keymaps/Makefile b/drivers/media/IR/keymaps/Makefile
index 86d3d1f2eaa..85330d171c4 100644
--- a/drivers/media/IR/keymaps/Makefile
+++ b/drivers/media/IR/keymaps/Makefile
@@ -14,6 +14,7 @@ obj-$(CONFIG_RC_MAP) += rc-adstech-dvb-t-pci.o \
 			rc-budget-ci-old.o \
 			rc-cinergy-1400.o \
 			rc-cinergy.o \
+			rc-dib0700-big.o \
 			rc-dm1105-nec.o \
 			rc-dntv-live-dvb-t.o \
 			rc-dntv-live-dvbt-pro.o \
diff --git a/drivers/media/IR/keymaps/rc-dib0700-big.c b/drivers/media/IR/keymaps/rc-dib0700-big.c
new file mode 100644
index 00000000000..2e83820d3e5
--- /dev/null
+++ b/drivers/media/IR/keymaps/rc-dib0700-big.c
@@ -0,0 +1,314 @@
+/* rc-dvb0700-big.c - Keytable for devices in dvb0700
+ *
+ * Copyright (c) 2010 by Mauro Carvalho Chehab <mchehab@redhat.com>
+ *
+ * TODO: This table is a real mess, as it merges RC codes from several
+ * devices into a big table. It also has both RC-5 and NEC codes inside.
+ * It should be broken into small tables, and the protocols should properly
+ * be indentificated.
+ *
+ * The table were imported from dib0700_devices.c.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <media/rc-map.h>
+
+static struct ir_scancode dib0700_table[] = {
+	/* Key codes for the tiny Pinnacle remote*/
+	{ 0x0700, KEY_MUTE },
+	{ 0x0701, KEY_MENU }, /* Pinnacle logo */
+	{ 0x0739, KEY_POWER },
+	{ 0x0703, KEY_VOLUMEUP },
+	{ 0x0709, KEY_VOLUMEDOWN },
+	{ 0x0706, KEY_CHANNELUP },
+	{ 0x070c, KEY_CHANNELDOWN },
+	{ 0x070f, KEY_1 },
+	{ 0x0715, KEY_2 },
+	{ 0x0710, KEY_3 },
+	{ 0x0718, KEY_4 },
+	{ 0x071b, KEY_5 },
+	{ 0x071e, KEY_6 },
+	{ 0x0711, KEY_7 },
+	{ 0x0721, KEY_8 },
+	{ 0x0712, KEY_9 },
+	{ 0x0727, KEY_0 },
+	{ 0x0724, KEY_SCREEN }, /* 'Square' key */
+	{ 0x072a, KEY_TEXT },   /* 'T' key */
+	{ 0x072d, KEY_REWIND },
+	{ 0x0730, KEY_PLAY },
+	{ 0x0733, KEY_FASTFORWARD },
+	{ 0x0736, KEY_RECORD },
+	{ 0x073c, KEY_STOP },
+	{ 0x073f, KEY_CANCEL }, /* '?' key */
+	/* Key codes for the Terratec Cinergy DT XS Diversity, similar to cinergyT2.c */
+	{ 0xeb01, KEY_POWER },
+	{ 0xeb02, KEY_1 },
+	{ 0xeb03, KEY_2 },
+	{ 0xeb04, KEY_3 },
+	{ 0xeb05, KEY_4 },
+	{ 0xeb06, KEY_5 },
+	{ 0xeb07, KEY_6 },
+	{ 0xeb08, KEY_7 },
+	{ 0xeb09, KEY_8 },
+	{ 0xeb0a, KEY_9 },
+	{ 0xeb0b, KEY_VIDEO },
+	{ 0xeb0c, KEY_0 },
+	{ 0xeb0d, KEY_REFRESH },
+	{ 0xeb0f, KEY_EPG },
+	{ 0xeb10, KEY_UP },
+	{ 0xeb11, KEY_LEFT },
+	{ 0xeb12, KEY_OK },
+	{ 0xeb13, KEY_RIGHT },
+	{ 0xeb14, KEY_DOWN },
+	{ 0xeb16, KEY_INFO },
+	{ 0xeb17, KEY_RED },
+	{ 0xeb18, KEY_GREEN },
+	{ 0xeb19, KEY_YELLOW },
+	{ 0xeb1a, KEY_BLUE },
+	{ 0xeb1b, KEY_CHANNELUP },
+	{ 0xeb1c, KEY_VOLUMEUP },
+	{ 0xeb1d, KEY_MUTE },
+	{ 0xeb1e, KEY_VOLUMEDOWN },
+	{ 0xeb1f, KEY_CHANNELDOWN },
+	{ 0xeb40, KEY_PAUSE },
+	{ 0xeb41, KEY_HOME },
+	{ 0xeb42, KEY_MENU }, /* DVD Menu */
+	{ 0xeb43, KEY_SUBTITLE },
+	{ 0xeb44, KEY_TEXT }, /* Teletext */
+	{ 0xeb45, KEY_DELETE },
+	{ 0xeb46, KEY_TV },
+	{ 0xeb47, KEY_DVD },
+	{ 0xeb48, KEY_STOP },
+	{ 0xeb49, KEY_VIDEO },
+	{ 0xeb4a, KEY_AUDIO }, /* Music */
+	{ 0xeb4b, KEY_SCREEN }, /* Pic */
+	{ 0xeb4c, KEY_PLAY },
+	{ 0xeb4d, KEY_BACK },
+	{ 0xeb4e, KEY_REWIND },
+	{ 0xeb4f, KEY_FASTFORWARD },
+	{ 0xeb54, KEY_PREVIOUS },
+	{ 0xeb58, KEY_RECORD },
+	{ 0xeb5c, KEY_NEXT },
+
+	/* Key codes for the Haupauge WinTV Nova-TD, copied from nova-t-usb2.c (Nova-T USB2) */
+	{ 0x1e00, KEY_0 },
+	{ 0x1e01, KEY_1 },
+	{ 0x1e02, KEY_2 },
+	{ 0x1e03, KEY_3 },
+	{ 0x1e04, KEY_4 },
+	{ 0x1e05, KEY_5 },
+	{ 0x1e06, KEY_6 },
+	{ 0x1e07, KEY_7 },
+	{ 0x1e08, KEY_8 },
+	{ 0x1e09, KEY_9 },
+	{ 0x1e0a, KEY_KPASTERISK },
+	{ 0x1e0b, KEY_RED },
+	{ 0x1e0c, KEY_RADIO },
+	{ 0x1e0d, KEY_MENU },
+	{ 0x1e0e, KEY_GRAVE }, /* # */
+	{ 0x1e0f, KEY_MUTE },
+	{ 0x1e10, KEY_VOLUMEUP },
+	{ 0x1e11, KEY_VOLUMEDOWN },
+	{ 0x1e12, KEY_CHANNEL },
+	{ 0x1e14, KEY_UP },
+	{ 0x1e15, KEY_DOWN },
+	{ 0x1e16, KEY_LEFT },
+	{ 0x1e17, KEY_RIGHT },
+	{ 0x1e18, KEY_VIDEO },
+	{ 0x1e19, KEY_AUDIO },
+	{ 0x1e1a, KEY_MEDIA },
+	{ 0x1e1b, KEY_EPG },
+	{ 0x1e1c, KEY_TV },
+	{ 0x1e1e, KEY_NEXT },
+	{ 0x1e1f, KEY_BACK },
+	{ 0x1e20, KEY_CHANNELUP },
+	{ 0x1e21, KEY_CHANNELDOWN },
+	{ 0x1e24, KEY_LAST }, /* Skip backwards */
+	{ 0x1e25, KEY_OK },
+	{ 0x1e29, KEY_BLUE},
+	{ 0x1e2e, KEY_GREEN },
+	{ 0x1e30, KEY_PAUSE },
+	{ 0x1e32, KEY_REWIND },
+	{ 0x1e34, KEY_FASTFORWARD },
+	{ 0x1e35, KEY_PLAY },
+	{ 0x1e36, KEY_STOP },
+	{ 0x1e37, KEY_RECORD },
+	{ 0x1e38, KEY_YELLOW },
+	{ 0x1e3b, KEY_GOTO },
+	{ 0x1e3d, KEY_POWER },
+
+	/* Key codes for the Leadtek Winfast DTV Dongle */
+	{ 0x0042, KEY_POWER },
+	{ 0x077c, KEY_TUNER },
+	{ 0x0f4e, KEY_PRINT }, /* PREVIEW */
+	{ 0x0840, KEY_SCREEN }, /* full screen toggle*/
+	{ 0x0f71, KEY_DOT }, /* frequency */
+	{ 0x0743, KEY_0 },
+	{ 0x0c41, KEY_1 },
+	{ 0x0443, KEY_2 },
+	{ 0x0b7f, KEY_3 },
+	{ 0x0e41, KEY_4 },
+	{ 0x0643, KEY_5 },
+	{ 0x097f, KEY_6 },
+	{ 0x0d7e, KEY_7 },
+	{ 0x057c, KEY_8 },
+	{ 0x0a40, KEY_9 },
+	{ 0x0e4e, KEY_CLEAR },
+	{ 0x047c, KEY_CHANNEL }, /* show channel number */
+	{ 0x0f41, KEY_LAST }, /* recall */
+	{ 0x0342, KEY_MUTE },
+	{ 0x064c, KEY_RESERVED }, /* PIP button*/
+	{ 0x0172, KEY_SHUFFLE }, /* SNAPSHOT */
+	{ 0x0c4e, KEY_PLAYPAUSE }, /* TIMESHIFT */
+	{ 0x0b70, KEY_RECORD },
+	{ 0x037d, KEY_VOLUMEUP },
+	{ 0x017d, KEY_VOLUMEDOWN },
+	{ 0x0242, KEY_CHANNELUP },
+	{ 0x007d, KEY_CHANNELDOWN },
+
+	/* Key codes for Nova-TD "credit card" remote control. */
+	{ 0x1d00, KEY_0 },
+	{ 0x1d01, KEY_1 },
+	{ 0x1d02, KEY_2 },
+	{ 0x1d03, KEY_3 },
+	{ 0x1d04, KEY_4 },
+	{ 0x1d05, KEY_5 },
+	{ 0x1d06, KEY_6 },
+	{ 0x1d07, KEY_7 },
+	{ 0x1d08, KEY_8 },
+	{ 0x1d09, KEY_9 },
+	{ 0x1d0a, KEY_TEXT },
+	{ 0x1d0d, KEY_MENU },
+	{ 0x1d0f, KEY_MUTE },
+	{ 0x1d10, KEY_VOLUMEUP },
+	{ 0x1d11, KEY_VOLUMEDOWN },
+	{ 0x1d12, KEY_CHANNEL },
+	{ 0x1d14, KEY_UP },
+	{ 0x1d15, KEY_DOWN },
+	{ 0x1d16, KEY_LEFT },
+	{ 0x1d17, KEY_RIGHT },
+	{ 0x1d1c, KEY_TV },
+	{ 0x1d1e, KEY_NEXT },
+	{ 0x1d1f, KEY_BACK },
+	{ 0x1d20, KEY_CHANNELUP },
+	{ 0x1d21, KEY_CHANNELDOWN },
+	{ 0x1d24, KEY_LAST },
+	{ 0x1d25, KEY_OK },
+	{ 0x1d30, KEY_PAUSE },
+	{ 0x1d32, KEY_REWIND },
+	{ 0x1d34, KEY_FASTFORWARD },
+	{ 0x1d35, KEY_PLAY },
+	{ 0x1d36, KEY_STOP },
+	{ 0x1d37, KEY_RECORD },
+	{ 0x1d3b, KEY_GOTO },
+	{ 0x1d3d, KEY_POWER },
+
+	/* Key codes for the Pixelview SBTVD remote (proto NEC) */
+	{ 0x8613, KEY_MUTE },
+	{ 0x8612, KEY_POWER },
+	{ 0x8601, KEY_1 },
+	{ 0x8602, KEY_2 },
+	{ 0x8603, KEY_3 },
+	{ 0x8604, KEY_4 },
+	{ 0x8605, KEY_5 },
+	{ 0x8606, KEY_6 },
+	{ 0x8607, KEY_7 },
+	{ 0x8608, KEY_8 },
+	{ 0x8609, KEY_9 },
+	{ 0x8600, KEY_0 },
+	{ 0x860d, KEY_CHANNELUP },
+	{ 0x8619, KEY_CHANNELDOWN },
+	{ 0x8610, KEY_VOLUMEUP },
+	{ 0x860c, KEY_VOLUMEDOWN },
+
+	{ 0x860a, KEY_CAMERA },
+	{ 0x860b, KEY_ZOOM },
+	{ 0x861b, KEY_BACKSPACE },
+	{ 0x8615, KEY_ENTER },
+
+	{ 0x861d, KEY_UP },
+	{ 0x861e, KEY_DOWN },
+	{ 0x860e, KEY_LEFT },
+	{ 0x860f, KEY_RIGHT },
+
+	{ 0x8618, KEY_RECORD },
+	{ 0x861a, KEY_STOP },
+
+	/* Key codes for the EvolutePC TVWay+ remote (proto NEC) */
+	{ 0x7a00, KEY_MENU },
+	{ 0x7a01, KEY_RECORD },
+	{ 0x7a02, KEY_PLAY },
+	{ 0x7a03, KEY_STOP },
+	{ 0x7a10, KEY_CHANNELUP },
+	{ 0x7a11, KEY_CHANNELDOWN },
+	{ 0x7a12, KEY_VOLUMEUP },
+	{ 0x7a13, KEY_VOLUMEDOWN },
+	{ 0x7a40, KEY_POWER },
+	{ 0x7a41, KEY_MUTE },
+
+	/* Key codes for the Elgato EyeTV Diversity silver remote,
+	   set dvb_usb_dib0700_ir_proto=0 */
+	{ 0x4501, KEY_POWER },
+	{ 0x4502, KEY_MUTE },
+	{ 0x4503, KEY_1 },
+	{ 0x4504, KEY_2 },
+	{ 0x4505, KEY_3 },
+	{ 0x4506, KEY_4 },
+	{ 0x4507, KEY_5 },
+	{ 0x4508, KEY_6 },
+	{ 0x4509, KEY_7 },
+	{ 0x450a, KEY_8 },
+	{ 0x450b, KEY_9 },
+	{ 0x450c, KEY_LAST },
+	{ 0x450d, KEY_0 },
+	{ 0x450e, KEY_ENTER },
+	{ 0x450f, KEY_RED },
+	{ 0x4510, KEY_CHANNELUP },
+	{ 0x4511, KEY_GREEN },
+	{ 0x4512, KEY_VOLUMEDOWN },
+	{ 0x4513, KEY_OK },
+	{ 0x4514, KEY_VOLUMEUP },
+	{ 0x4515, KEY_YELLOW },
+	{ 0x4516, KEY_CHANNELDOWN },
+	{ 0x4517, KEY_BLUE },
+	{ 0x4518, KEY_LEFT }, /* Skip backwards */
+	{ 0x4519, KEY_PLAYPAUSE },
+	{ 0x451a, KEY_RIGHT }, /* Skip forward */
+	{ 0x451b, KEY_REWIND },
+	{ 0x451c, KEY_L }, /* Live */
+	{ 0x451d, KEY_FASTFORWARD },
+	{ 0x451e, KEY_STOP }, /* 'Reveal' for Teletext */
+	{ 0x451f, KEY_MENU }, /* KEY_TEXT for Teletext */
+	{ 0x4540, KEY_RECORD }, /* Font 'Size' for Teletext */
+	{ 0x4541, KEY_SCREEN }, /*  Full screen toggle, 'Hold' for Teletext */
+	{ 0x4542, KEY_SELECT }, /* Select video input, 'Select' for Teletext */
+};
+
+static struct rc_keymap dib0700_map = {
+	.map = {
+		.scan    = dib0700_table,
+		.size    = ARRAY_SIZE(dib0700_table),
+		.ir_type = IR_TYPE_UNKNOWN,	/* Legacy IR type */
+		.name    = RC_MAP_DIB0700_BIG_TABLE,
+	}
+};
+
+static int __init init_rc_map(void)
+{
+	return ir_register_map(&dib0700_map);
+}
+
+static void __exit exit_rc_map(void)
+{
+	ir_unregister_map(&dib0700_map);
+}
+
+module_init(init_rc_map)
+module_exit(exit_rc_map)
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@redhat.com>");
diff --git a/include/media/rc-map.h b/include/media/rc-map.h
index a329858c4b4..adbcccb54c8 100644
--- a/include/media/rc-map.h
+++ b/include/media/rc-map.h
@@ -69,6 +69,9 @@ void rc_map_init(void);
 #define RC_MAP_BUDGET_CI_OLD             "rc-budget-ci-old"
 #define RC_MAP_CINERGY_1400              "rc-cinergy-1400"
 #define RC_MAP_CINERGY                   "rc-cinergy"
+/* Temporary table - should be broken into smaller tables */
+#define RC_MAP_DIB0700_BIG_TABLE         "rc-dib0700-big"
+
 #define RC_MAP_DM1105_NEC                "rc-dm1105-nec"
 #define RC_MAP_DNTV_LIVE_DVBT_PRO        "rc-dntv-live-dvbt-pro"
 #define RC_MAP_DNTV_LIVE_DVB_T           "rc-dntv-live-dvb-t"
@@ -123,6 +126,7 @@ void rc_map_init(void);
 #define RC_MAP_VIDEOMATE_TV_PVR          "rc-videomate-tv-pvr"
 #define RC_MAP_WINFAST                   "rc-winfast"
 #define RC_MAP_WINFAST_USBII_DELUXE      "rc-winfast-usbii-deluxe"
+
 /*
  * Please, do not just append newer Remote Controller names at the end.
  * The names should be ordered in alphabetical order
-- 
cgit v1.2.3-70-g09d2


From 5af935cc96a291f90799bf6a2587d87329a91699 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@redhat.com>
Date: Sun, 1 Aug 2010 08:02:35 -0300
Subject: V4L/DVB: dib0700: break keytable into NEC and RC-5 variants

Instead of having one big keytable with 2 protocols inside, break it
into two separate tables, being one for NEC and another for RC-5 variants,
and properly identify what variant should be used at the boards entries.

Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/IR/keymaps/Makefile           |   3 +-
 drivers/media/IR/keymaps/rc-dib0700-big.c   | 314 ----------------------------
 drivers/media/IR/keymaps/rc-dib0700-nec.c   | 124 +++++++++++
 drivers/media/IR/keymaps/rc-dib0700-rc5.c   | 235 +++++++++++++++++++++
 drivers/media/dvb/dvb-usb/dib0700_devices.c |  67 ++++--
 include/media/rc-map.h                      |   5 +-
 6 files changed, 416 insertions(+), 332 deletions(-)
 delete mode 100644 drivers/media/IR/keymaps/rc-dib0700-big.c
 create mode 100644 drivers/media/IR/keymaps/rc-dib0700-nec.c
 create mode 100644 drivers/media/IR/keymaps/rc-dib0700-rc5.c

(limited to 'include')

diff --git a/drivers/media/IR/keymaps/Makefile b/drivers/media/IR/keymaps/Makefile
index 85330d171c4..cbee06243b5 100644
--- a/drivers/media/IR/keymaps/Makefile
+++ b/drivers/media/IR/keymaps/Makefile
@@ -14,7 +14,8 @@ obj-$(CONFIG_RC_MAP) += rc-adstech-dvb-t-pci.o \
 			rc-budget-ci-old.o \
 			rc-cinergy-1400.o \
 			rc-cinergy.o \
-			rc-dib0700-big.o \
+			rc-dib0700-nec.o \
+			rc-dib0700-rc5.o \
 			rc-dm1105-nec.o \
 			rc-dntv-live-dvb-t.o \
 			rc-dntv-live-dvbt-pro.o \
diff --git a/drivers/media/IR/keymaps/rc-dib0700-big.c b/drivers/media/IR/keymaps/rc-dib0700-big.c
deleted file mode 100644
index 2e83820d3e5..00000000000
--- a/drivers/media/IR/keymaps/rc-dib0700-big.c
+++ /dev/null
@@ -1,314 +0,0 @@
-/* rc-dvb0700-big.c - Keytable for devices in dvb0700
- *
- * Copyright (c) 2010 by Mauro Carvalho Chehab <mchehab@redhat.com>
- *
- * TODO: This table is a real mess, as it merges RC codes from several
- * devices into a big table. It also has both RC-5 and NEC codes inside.
- * It should be broken into small tables, and the protocols should properly
- * be indentificated.
- *
- * The table were imported from dib0700_devices.c.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#include <media/rc-map.h>
-
-static struct ir_scancode dib0700_table[] = {
-	/* Key codes for the tiny Pinnacle remote*/
-	{ 0x0700, KEY_MUTE },
-	{ 0x0701, KEY_MENU }, /* Pinnacle logo */
-	{ 0x0739, KEY_POWER },
-	{ 0x0703, KEY_VOLUMEUP },
-	{ 0x0709, KEY_VOLUMEDOWN },
-	{ 0x0706, KEY_CHANNELUP },
-	{ 0x070c, KEY_CHANNELDOWN },
-	{ 0x070f, KEY_1 },
-	{ 0x0715, KEY_2 },
-	{ 0x0710, KEY_3 },
-	{ 0x0718, KEY_4 },
-	{ 0x071b, KEY_5 },
-	{ 0x071e, KEY_6 },
-	{ 0x0711, KEY_7 },
-	{ 0x0721, KEY_8 },
-	{ 0x0712, KEY_9 },
-	{ 0x0727, KEY_0 },
-	{ 0x0724, KEY_SCREEN }, /* 'Square' key */
-	{ 0x072a, KEY_TEXT },   /* 'T' key */
-	{ 0x072d, KEY_REWIND },
-	{ 0x0730, KEY_PLAY },
-	{ 0x0733, KEY_FASTFORWARD },
-	{ 0x0736, KEY_RECORD },
-	{ 0x073c, KEY_STOP },
-	{ 0x073f, KEY_CANCEL }, /* '?' key */
-	/* Key codes for the Terratec Cinergy DT XS Diversity, similar to cinergyT2.c */
-	{ 0xeb01, KEY_POWER },
-	{ 0xeb02, KEY_1 },
-	{ 0xeb03, KEY_2 },
-	{ 0xeb04, KEY_3 },
-	{ 0xeb05, KEY_4 },
-	{ 0xeb06, KEY_5 },
-	{ 0xeb07, KEY_6 },
-	{ 0xeb08, KEY_7 },
-	{ 0xeb09, KEY_8 },
-	{ 0xeb0a, KEY_9 },
-	{ 0xeb0b, KEY_VIDEO },
-	{ 0xeb0c, KEY_0 },
-	{ 0xeb0d, KEY_REFRESH },
-	{ 0xeb0f, KEY_EPG },
-	{ 0xeb10, KEY_UP },
-	{ 0xeb11, KEY_LEFT },
-	{ 0xeb12, KEY_OK },
-	{ 0xeb13, KEY_RIGHT },
-	{ 0xeb14, KEY_DOWN },
-	{ 0xeb16, KEY_INFO },
-	{ 0xeb17, KEY_RED },
-	{ 0xeb18, KEY_GREEN },
-	{ 0xeb19, KEY_YELLOW },
-	{ 0xeb1a, KEY_BLUE },
-	{ 0xeb1b, KEY_CHANNELUP },
-	{ 0xeb1c, KEY_VOLUMEUP },
-	{ 0xeb1d, KEY_MUTE },
-	{ 0xeb1e, KEY_VOLUMEDOWN },
-	{ 0xeb1f, KEY_CHANNELDOWN },
-	{ 0xeb40, KEY_PAUSE },
-	{ 0xeb41, KEY_HOME },
-	{ 0xeb42, KEY_MENU }, /* DVD Menu */
-	{ 0xeb43, KEY_SUBTITLE },
-	{ 0xeb44, KEY_TEXT }, /* Teletext */
-	{ 0xeb45, KEY_DELETE },
-	{ 0xeb46, KEY_TV },
-	{ 0xeb47, KEY_DVD },
-	{ 0xeb48, KEY_STOP },
-	{ 0xeb49, KEY_VIDEO },
-	{ 0xeb4a, KEY_AUDIO }, /* Music */
-	{ 0xeb4b, KEY_SCREEN }, /* Pic */
-	{ 0xeb4c, KEY_PLAY },
-	{ 0xeb4d, KEY_BACK },
-	{ 0xeb4e, KEY_REWIND },
-	{ 0xeb4f, KEY_FASTFORWARD },
-	{ 0xeb54, KEY_PREVIOUS },
-	{ 0xeb58, KEY_RECORD },
-	{ 0xeb5c, KEY_NEXT },
-
-	/* Key codes for the Haupauge WinTV Nova-TD, copied from nova-t-usb2.c (Nova-T USB2) */
-	{ 0x1e00, KEY_0 },
-	{ 0x1e01, KEY_1 },
-	{ 0x1e02, KEY_2 },
-	{ 0x1e03, KEY_3 },
-	{ 0x1e04, KEY_4 },
-	{ 0x1e05, KEY_5 },
-	{ 0x1e06, KEY_6 },
-	{ 0x1e07, KEY_7 },
-	{ 0x1e08, KEY_8 },
-	{ 0x1e09, KEY_9 },
-	{ 0x1e0a, KEY_KPASTERISK },
-	{ 0x1e0b, KEY_RED },
-	{ 0x1e0c, KEY_RADIO },
-	{ 0x1e0d, KEY_MENU },
-	{ 0x1e0e, KEY_GRAVE }, /* # */
-	{ 0x1e0f, KEY_MUTE },
-	{ 0x1e10, KEY_VOLUMEUP },
-	{ 0x1e11, KEY_VOLUMEDOWN },
-	{ 0x1e12, KEY_CHANNEL },
-	{ 0x1e14, KEY_UP },
-	{ 0x1e15, KEY_DOWN },
-	{ 0x1e16, KEY_LEFT },
-	{ 0x1e17, KEY_RIGHT },
-	{ 0x1e18, KEY_VIDEO },
-	{ 0x1e19, KEY_AUDIO },
-	{ 0x1e1a, KEY_MEDIA },
-	{ 0x1e1b, KEY_EPG },
-	{ 0x1e1c, KEY_TV },
-	{ 0x1e1e, KEY_NEXT },
-	{ 0x1e1f, KEY_BACK },
-	{ 0x1e20, KEY_CHANNELUP },
-	{ 0x1e21, KEY_CHANNELDOWN },
-	{ 0x1e24, KEY_LAST }, /* Skip backwards */
-	{ 0x1e25, KEY_OK },
-	{ 0x1e29, KEY_BLUE},
-	{ 0x1e2e, KEY_GREEN },
-	{ 0x1e30, KEY_PAUSE },
-	{ 0x1e32, KEY_REWIND },
-	{ 0x1e34, KEY_FASTFORWARD },
-	{ 0x1e35, KEY_PLAY },
-	{ 0x1e36, KEY_STOP },
-	{ 0x1e37, KEY_RECORD },
-	{ 0x1e38, KEY_YELLOW },
-	{ 0x1e3b, KEY_GOTO },
-	{ 0x1e3d, KEY_POWER },
-
-	/* Key codes for the Leadtek Winfast DTV Dongle */
-	{ 0x0042, KEY_POWER },
-	{ 0x077c, KEY_TUNER },
-	{ 0x0f4e, KEY_PRINT }, /* PREVIEW */
-	{ 0x0840, KEY_SCREEN }, /* full screen toggle*/
-	{ 0x0f71, KEY_DOT }, /* frequency */
-	{ 0x0743, KEY_0 },
-	{ 0x0c41, KEY_1 },
-	{ 0x0443, KEY_2 },
-	{ 0x0b7f, KEY_3 },
-	{ 0x0e41, KEY_4 },
-	{ 0x0643, KEY_5 },
-	{ 0x097f, KEY_6 },
-	{ 0x0d7e, KEY_7 },
-	{ 0x057c, KEY_8 },
-	{ 0x0a40, KEY_9 },
-	{ 0x0e4e, KEY_CLEAR },
-	{ 0x047c, KEY_CHANNEL }, /* show channel number */
-	{ 0x0f41, KEY_LAST }, /* recall */
-	{ 0x0342, KEY_MUTE },
-	{ 0x064c, KEY_RESERVED }, /* PIP button*/
-	{ 0x0172, KEY_SHUFFLE }, /* SNAPSHOT */
-	{ 0x0c4e, KEY_PLAYPAUSE }, /* TIMESHIFT */
-	{ 0x0b70, KEY_RECORD },
-	{ 0x037d, KEY_VOLUMEUP },
-	{ 0x017d, KEY_VOLUMEDOWN },
-	{ 0x0242, KEY_CHANNELUP },
-	{ 0x007d, KEY_CHANNELDOWN },
-
-	/* Key codes for Nova-TD "credit card" remote control. */
-	{ 0x1d00, KEY_0 },
-	{ 0x1d01, KEY_1 },
-	{ 0x1d02, KEY_2 },
-	{ 0x1d03, KEY_3 },
-	{ 0x1d04, KEY_4 },
-	{ 0x1d05, KEY_5 },
-	{ 0x1d06, KEY_6 },
-	{ 0x1d07, KEY_7 },
-	{ 0x1d08, KEY_8 },
-	{ 0x1d09, KEY_9 },
-	{ 0x1d0a, KEY_TEXT },
-	{ 0x1d0d, KEY_MENU },
-	{ 0x1d0f, KEY_MUTE },
-	{ 0x1d10, KEY_VOLUMEUP },
-	{ 0x1d11, KEY_VOLUMEDOWN },
-	{ 0x1d12, KEY_CHANNEL },
-	{ 0x1d14, KEY_UP },
-	{ 0x1d15, KEY_DOWN },
-	{ 0x1d16, KEY_LEFT },
-	{ 0x1d17, KEY_RIGHT },
-	{ 0x1d1c, KEY_TV },
-	{ 0x1d1e, KEY_NEXT },
-	{ 0x1d1f, KEY_BACK },
-	{ 0x1d20, KEY_CHANNELUP },
-	{ 0x1d21, KEY_CHANNELDOWN },
-	{ 0x1d24, KEY_LAST },
-	{ 0x1d25, KEY_OK },
-	{ 0x1d30, KEY_PAUSE },
-	{ 0x1d32, KEY_REWIND },
-	{ 0x1d34, KEY_FASTFORWARD },
-	{ 0x1d35, KEY_PLAY },
-	{ 0x1d36, KEY_STOP },
-	{ 0x1d37, KEY_RECORD },
-	{ 0x1d3b, KEY_GOTO },
-	{ 0x1d3d, KEY_POWER },
-
-	/* Key codes for the Pixelview SBTVD remote (proto NEC) */
-	{ 0x8613, KEY_MUTE },
-	{ 0x8612, KEY_POWER },
-	{ 0x8601, KEY_1 },
-	{ 0x8602, KEY_2 },
-	{ 0x8603, KEY_3 },
-	{ 0x8604, KEY_4 },
-	{ 0x8605, KEY_5 },
-	{ 0x8606, KEY_6 },
-	{ 0x8607, KEY_7 },
-	{ 0x8608, KEY_8 },
-	{ 0x8609, KEY_9 },
-	{ 0x8600, KEY_0 },
-	{ 0x860d, KEY_CHANNELUP },
-	{ 0x8619, KEY_CHANNELDOWN },
-	{ 0x8610, KEY_VOLUMEUP },
-	{ 0x860c, KEY_VOLUMEDOWN },
-
-	{ 0x860a, KEY_CAMERA },
-	{ 0x860b, KEY_ZOOM },
-	{ 0x861b, KEY_BACKSPACE },
-	{ 0x8615, KEY_ENTER },
-
-	{ 0x861d, KEY_UP },
-	{ 0x861e, KEY_DOWN },
-	{ 0x860e, KEY_LEFT },
-	{ 0x860f, KEY_RIGHT },
-
-	{ 0x8618, KEY_RECORD },
-	{ 0x861a, KEY_STOP },
-
-	/* Key codes for the EvolutePC TVWay+ remote (proto NEC) */
-	{ 0x7a00, KEY_MENU },
-	{ 0x7a01, KEY_RECORD },
-	{ 0x7a02, KEY_PLAY },
-	{ 0x7a03, KEY_STOP },
-	{ 0x7a10, KEY_CHANNELUP },
-	{ 0x7a11, KEY_CHANNELDOWN },
-	{ 0x7a12, KEY_VOLUMEUP },
-	{ 0x7a13, KEY_VOLUMEDOWN },
-	{ 0x7a40, KEY_POWER },
-	{ 0x7a41, KEY_MUTE },
-
-	/* Key codes for the Elgato EyeTV Diversity silver remote,
-	   set dvb_usb_dib0700_ir_proto=0 */
-	{ 0x4501, KEY_POWER },
-	{ 0x4502, KEY_MUTE },
-	{ 0x4503, KEY_1 },
-	{ 0x4504, KEY_2 },
-	{ 0x4505, KEY_3 },
-	{ 0x4506, KEY_4 },
-	{ 0x4507, KEY_5 },
-	{ 0x4508, KEY_6 },
-	{ 0x4509, KEY_7 },
-	{ 0x450a, KEY_8 },
-	{ 0x450b, KEY_9 },
-	{ 0x450c, KEY_LAST },
-	{ 0x450d, KEY_0 },
-	{ 0x450e, KEY_ENTER },
-	{ 0x450f, KEY_RED },
-	{ 0x4510, KEY_CHANNELUP },
-	{ 0x4511, KEY_GREEN },
-	{ 0x4512, KEY_VOLUMEDOWN },
-	{ 0x4513, KEY_OK },
-	{ 0x4514, KEY_VOLUMEUP },
-	{ 0x4515, KEY_YELLOW },
-	{ 0x4516, KEY_CHANNELDOWN },
-	{ 0x4517, KEY_BLUE },
-	{ 0x4518, KEY_LEFT }, /* Skip backwards */
-	{ 0x4519, KEY_PLAYPAUSE },
-	{ 0x451a, KEY_RIGHT }, /* Skip forward */
-	{ 0x451b, KEY_REWIND },
-	{ 0x451c, KEY_L }, /* Live */
-	{ 0x451d, KEY_FASTFORWARD },
-	{ 0x451e, KEY_STOP }, /* 'Reveal' for Teletext */
-	{ 0x451f, KEY_MENU }, /* KEY_TEXT for Teletext */
-	{ 0x4540, KEY_RECORD }, /* Font 'Size' for Teletext */
-	{ 0x4541, KEY_SCREEN }, /*  Full screen toggle, 'Hold' for Teletext */
-	{ 0x4542, KEY_SELECT }, /* Select video input, 'Select' for Teletext */
-};
-
-static struct rc_keymap dib0700_map = {
-	.map = {
-		.scan    = dib0700_table,
-		.size    = ARRAY_SIZE(dib0700_table),
-		.ir_type = IR_TYPE_UNKNOWN,	/* Legacy IR type */
-		.name    = RC_MAP_DIB0700_BIG_TABLE,
-	}
-};
-
-static int __init init_rc_map(void)
-{
-	return ir_register_map(&dib0700_map);
-}
-
-static void __exit exit_rc_map(void)
-{
-	ir_unregister_map(&dib0700_map);
-}
-
-module_init(init_rc_map)
-module_exit(exit_rc_map)
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@redhat.com>");
diff --git a/drivers/media/IR/keymaps/rc-dib0700-nec.c b/drivers/media/IR/keymaps/rc-dib0700-nec.c
new file mode 100644
index 00000000000..f5809f4757f
--- /dev/null
+++ b/drivers/media/IR/keymaps/rc-dib0700-nec.c
@@ -0,0 +1,124 @@
+/* rc-dvb0700-big.c - Keytable for devices in dvb0700
+ *
+ * Copyright (c) 2010 by Mauro Carvalho Chehab <mchehab@redhat.com>
+ *
+ * TODO: This table is a real mess, as it merges RC codes from several
+ * devices into a big table. It also has both RC-5 and NEC codes inside.
+ * It should be broken into small tables, and the protocols should properly
+ * be indentificated.
+ *
+ * The table were imported from dib0700_devices.c.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <media/rc-map.h>
+
+static struct ir_scancode dib0700_table[] = {
+	/* Key codes for the Pixelview SBTVD remote */
+	{ 0x8613, KEY_MUTE },
+	{ 0x8612, KEY_POWER },
+	{ 0x8601, KEY_1 },
+	{ 0x8602, KEY_2 },
+	{ 0x8603, KEY_3 },
+	{ 0x8604, KEY_4 },
+	{ 0x8605, KEY_5 },
+	{ 0x8606, KEY_6 },
+	{ 0x8607, KEY_7 },
+	{ 0x8608, KEY_8 },
+	{ 0x8609, KEY_9 },
+	{ 0x8600, KEY_0 },
+	{ 0x860d, KEY_CHANNELUP },
+	{ 0x8619, KEY_CHANNELDOWN },
+	{ 0x8610, KEY_VOLUMEUP },
+	{ 0x860c, KEY_VOLUMEDOWN },
+
+	{ 0x860a, KEY_CAMERA },
+	{ 0x860b, KEY_ZOOM },
+	{ 0x861b, KEY_BACKSPACE },
+	{ 0x8615, KEY_ENTER },
+
+	{ 0x861d, KEY_UP },
+	{ 0x861e, KEY_DOWN },
+	{ 0x860e, KEY_LEFT },
+	{ 0x860f, KEY_RIGHT },
+
+	{ 0x8618, KEY_RECORD },
+	{ 0x861a, KEY_STOP },
+
+	/* Key codes for the EvolutePC TVWay+ remote */
+	{ 0x7a00, KEY_MENU },
+	{ 0x7a01, KEY_RECORD },
+	{ 0x7a02, KEY_PLAY },
+	{ 0x7a03, KEY_STOP },
+	{ 0x7a10, KEY_CHANNELUP },
+	{ 0x7a11, KEY_CHANNELDOWN },
+	{ 0x7a12, KEY_VOLUMEUP },
+	{ 0x7a13, KEY_VOLUMEDOWN },
+	{ 0x7a40, KEY_POWER },
+	{ 0x7a41, KEY_MUTE },
+
+	/* Key codes for the Elgato EyeTV Diversity silver remote */
+	{ 0x4501, KEY_POWER },
+	{ 0x4502, KEY_MUTE },
+	{ 0x4503, KEY_1 },
+	{ 0x4504, KEY_2 },
+	{ 0x4505, KEY_3 },
+	{ 0x4506, KEY_4 },
+	{ 0x4507, KEY_5 },
+	{ 0x4508, KEY_6 },
+	{ 0x4509, KEY_7 },
+	{ 0x450a, KEY_8 },
+	{ 0x450b, KEY_9 },
+	{ 0x450c, KEY_LAST },
+	{ 0x450d, KEY_0 },
+	{ 0x450e, KEY_ENTER },
+	{ 0x450f, KEY_RED },
+	{ 0x4510, KEY_CHANNELUP },
+	{ 0x4511, KEY_GREEN },
+	{ 0x4512, KEY_VOLUMEDOWN },
+	{ 0x4513, KEY_OK },
+	{ 0x4514, KEY_VOLUMEUP },
+	{ 0x4515, KEY_YELLOW },
+	{ 0x4516, KEY_CHANNELDOWN },
+	{ 0x4517, KEY_BLUE },
+	{ 0x4518, KEY_LEFT }, /* Skip backwards */
+	{ 0x4519, KEY_PLAYPAUSE },
+	{ 0x451a, KEY_RIGHT }, /* Skip forward */
+	{ 0x451b, KEY_REWIND },
+	{ 0x451c, KEY_L }, /* Live */
+	{ 0x451d, KEY_FASTFORWARD },
+	{ 0x451e, KEY_STOP }, /* 'Reveal' for Teletext */
+	{ 0x451f, KEY_MENU }, /* KEY_TEXT for Teletext */
+	{ 0x4540, KEY_RECORD }, /* Font 'Size' for Teletext */
+	{ 0x4541, KEY_SCREEN }, /*  Full screen toggle, 'Hold' for Teletext */
+	{ 0x4542, KEY_SELECT }, /* Select video input, 'Select' for Teletext */
+};
+
+static struct rc_keymap dib0700_map = {
+	.map = {
+		.scan    = dib0700_table,
+		.size    = ARRAY_SIZE(dib0700_table),
+		.ir_type = IR_TYPE_NEC,
+		.name    = RC_MAP_DIB0700_NEC_TABLE,
+	}
+};
+
+static int __init init_rc_map(void)
+{
+	return ir_register_map(&dib0700_map);
+}
+
+static void __exit exit_rc_map(void)
+{
+	ir_unregister_map(&dib0700_map);
+}
+
+module_init(init_rc_map)
+module_exit(exit_rc_map)
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@redhat.com>");
diff --git a/drivers/media/IR/keymaps/rc-dib0700-rc5.c b/drivers/media/IR/keymaps/rc-dib0700-rc5.c
new file mode 100644
index 00000000000..e2d0fd2bbaf
--- /dev/null
+++ b/drivers/media/IR/keymaps/rc-dib0700-rc5.c
@@ -0,0 +1,235 @@
+/* rc-dvb0700-big.c - Keytable for devices in dvb0700
+ *
+ * Copyright (c) 2010 by Mauro Carvalho Chehab <mchehab@redhat.com>
+ *
+ * TODO: This table is a real mess, as it merges RC codes from several
+ * devices into a big table. It also has both RC-5 and NEC codes inside.
+ * It should be broken into small tables, and the protocols should properly
+ * be indentificated.
+ *
+ * The table were imported from dib0700_devices.c.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <media/rc-map.h>
+
+static struct ir_scancode dib0700_table[] = {
+	/* Key codes for the tiny Pinnacle remote*/
+	{ 0x0700, KEY_MUTE },
+	{ 0x0701, KEY_MENU }, /* Pinnacle logo */
+	{ 0x0739, KEY_POWER },
+	{ 0x0703, KEY_VOLUMEUP },
+	{ 0x0709, KEY_VOLUMEDOWN },
+	{ 0x0706, KEY_CHANNELUP },
+	{ 0x070c, KEY_CHANNELDOWN },
+	{ 0x070f, KEY_1 },
+	{ 0x0715, KEY_2 },
+	{ 0x0710, KEY_3 },
+	{ 0x0718, KEY_4 },
+	{ 0x071b, KEY_5 },
+	{ 0x071e, KEY_6 },
+	{ 0x0711, KEY_7 },
+	{ 0x0721, KEY_8 },
+	{ 0x0712, KEY_9 },
+	{ 0x0727, KEY_0 },
+	{ 0x0724, KEY_SCREEN }, /* 'Square' key */
+	{ 0x072a, KEY_TEXT },   /* 'T' key */
+	{ 0x072d, KEY_REWIND },
+	{ 0x0730, KEY_PLAY },
+	{ 0x0733, KEY_FASTFORWARD },
+	{ 0x0736, KEY_RECORD },
+	{ 0x073c, KEY_STOP },
+	{ 0x073f, KEY_CANCEL }, /* '?' key */
+
+	/* Key codes for the Terratec Cinergy DT XS Diversity, similar to cinergyT2.c */
+	{ 0xeb01, KEY_POWER },
+	{ 0xeb02, KEY_1 },
+	{ 0xeb03, KEY_2 },
+	{ 0xeb04, KEY_3 },
+	{ 0xeb05, KEY_4 },
+	{ 0xeb06, KEY_5 },
+	{ 0xeb07, KEY_6 },
+	{ 0xeb08, KEY_7 },
+	{ 0xeb09, KEY_8 },
+	{ 0xeb0a, KEY_9 },
+	{ 0xeb0b, KEY_VIDEO },
+	{ 0xeb0c, KEY_0 },
+	{ 0xeb0d, KEY_REFRESH },
+	{ 0xeb0f, KEY_EPG },
+	{ 0xeb10, KEY_UP },
+	{ 0xeb11, KEY_LEFT },
+	{ 0xeb12, KEY_OK },
+	{ 0xeb13, KEY_RIGHT },
+	{ 0xeb14, KEY_DOWN },
+	{ 0xeb16, KEY_INFO },
+	{ 0xeb17, KEY_RED },
+	{ 0xeb18, KEY_GREEN },
+	{ 0xeb19, KEY_YELLOW },
+	{ 0xeb1a, KEY_BLUE },
+	{ 0xeb1b, KEY_CHANNELUP },
+	{ 0xeb1c, KEY_VOLUMEUP },
+	{ 0xeb1d, KEY_MUTE },
+	{ 0xeb1e, KEY_VOLUMEDOWN },
+	{ 0xeb1f, KEY_CHANNELDOWN },
+	{ 0xeb40, KEY_PAUSE },
+	{ 0xeb41, KEY_HOME },
+	{ 0xeb42, KEY_MENU }, /* DVD Menu */
+	{ 0xeb43, KEY_SUBTITLE },
+	{ 0xeb44, KEY_TEXT }, /* Teletext */
+	{ 0xeb45, KEY_DELETE },
+	{ 0xeb46, KEY_TV },
+	{ 0xeb47, KEY_DVD },
+	{ 0xeb48, KEY_STOP },
+	{ 0xeb49, KEY_VIDEO },
+	{ 0xeb4a, KEY_AUDIO }, /* Music */
+	{ 0xeb4b, KEY_SCREEN }, /* Pic */
+	{ 0xeb4c, KEY_PLAY },
+	{ 0xeb4d, KEY_BACK },
+	{ 0xeb4e, KEY_REWIND },
+	{ 0xeb4f, KEY_FASTFORWARD },
+	{ 0xeb54, KEY_PREVIOUS },
+	{ 0xeb58, KEY_RECORD },
+	{ 0xeb5c, KEY_NEXT },
+
+	/* Key codes for the Haupauge WinTV Nova-TD, copied from nova-t-usb2.c (Nova-T USB2) */
+	{ 0x1e00, KEY_0 },
+	{ 0x1e01, KEY_1 },
+	{ 0x1e02, KEY_2 },
+	{ 0x1e03, KEY_3 },
+	{ 0x1e04, KEY_4 },
+	{ 0x1e05, KEY_5 },
+	{ 0x1e06, KEY_6 },
+	{ 0x1e07, KEY_7 },
+	{ 0x1e08, KEY_8 },
+	{ 0x1e09, KEY_9 },
+	{ 0x1e0a, KEY_KPASTERISK },
+	{ 0x1e0b, KEY_RED },
+	{ 0x1e0c, KEY_RADIO },
+	{ 0x1e0d, KEY_MENU },
+	{ 0x1e0e, KEY_GRAVE }, /* # */
+	{ 0x1e0f, KEY_MUTE },
+	{ 0x1e10, KEY_VOLUMEUP },
+	{ 0x1e11, KEY_VOLUMEDOWN },
+	{ 0x1e12, KEY_CHANNEL },
+	{ 0x1e14, KEY_UP },
+	{ 0x1e15, KEY_DOWN },
+	{ 0x1e16, KEY_LEFT },
+	{ 0x1e17, KEY_RIGHT },
+	{ 0x1e18, KEY_VIDEO },
+	{ 0x1e19, KEY_AUDIO },
+	{ 0x1e1a, KEY_MEDIA },
+	{ 0x1e1b, KEY_EPG },
+	{ 0x1e1c, KEY_TV },
+	{ 0x1e1e, KEY_NEXT },
+	{ 0x1e1f, KEY_BACK },
+	{ 0x1e20, KEY_CHANNELUP },
+	{ 0x1e21, KEY_CHANNELDOWN },
+	{ 0x1e24, KEY_LAST }, /* Skip backwards */
+	{ 0x1e25, KEY_OK },
+	{ 0x1e29, KEY_BLUE},
+	{ 0x1e2e, KEY_GREEN },
+	{ 0x1e30, KEY_PAUSE },
+	{ 0x1e32, KEY_REWIND },
+	{ 0x1e34, KEY_FASTFORWARD },
+	{ 0x1e35, KEY_PLAY },
+	{ 0x1e36, KEY_STOP },
+	{ 0x1e37, KEY_RECORD },
+	{ 0x1e38, KEY_YELLOW },
+	{ 0x1e3b, KEY_GOTO },
+	{ 0x1e3d, KEY_POWER },
+
+	/* Key codes for the Leadtek Winfast DTV Dongle */
+	{ 0x0042, KEY_POWER },
+	{ 0x077c, KEY_TUNER },
+	{ 0x0f4e, KEY_PRINT }, /* PREVIEW */
+	{ 0x0840, KEY_SCREEN }, /* full screen toggle*/
+	{ 0x0f71, KEY_DOT }, /* frequency */
+	{ 0x0743, KEY_0 },
+	{ 0x0c41, KEY_1 },
+	{ 0x0443, KEY_2 },
+	{ 0x0b7f, KEY_3 },
+	{ 0x0e41, KEY_4 },
+	{ 0x0643, KEY_5 },
+	{ 0x097f, KEY_6 },
+	{ 0x0d7e, KEY_7 },
+	{ 0x057c, KEY_8 },
+	{ 0x0a40, KEY_9 },
+	{ 0x0e4e, KEY_CLEAR },
+	{ 0x047c, KEY_CHANNEL }, /* show channel number */
+	{ 0x0f41, KEY_LAST }, /* recall */
+	{ 0x0342, KEY_MUTE },
+	{ 0x064c, KEY_RESERVED }, /* PIP button*/
+	{ 0x0172, KEY_SHUFFLE }, /* SNAPSHOT */
+	{ 0x0c4e, KEY_PLAYPAUSE }, /* TIMESHIFT */
+	{ 0x0b70, KEY_RECORD },
+	{ 0x037d, KEY_VOLUMEUP },
+	{ 0x017d, KEY_VOLUMEDOWN },
+	{ 0x0242, KEY_CHANNELUP },
+	{ 0x007d, KEY_CHANNELDOWN },
+
+	/* Key codes for Nova-TD "credit card" remote control. */
+	{ 0x1d00, KEY_0 },
+	{ 0x1d01, KEY_1 },
+	{ 0x1d02, KEY_2 },
+	{ 0x1d03, KEY_3 },
+	{ 0x1d04, KEY_4 },
+	{ 0x1d05, KEY_5 },
+	{ 0x1d06, KEY_6 },
+	{ 0x1d07, KEY_7 },
+	{ 0x1d08, KEY_8 },
+	{ 0x1d09, KEY_9 },
+	{ 0x1d0a, KEY_TEXT },
+	{ 0x1d0d, KEY_MENU },
+	{ 0x1d0f, KEY_MUTE },
+	{ 0x1d10, KEY_VOLUMEUP },
+	{ 0x1d11, KEY_VOLUMEDOWN },
+	{ 0x1d12, KEY_CHANNEL },
+	{ 0x1d14, KEY_UP },
+	{ 0x1d15, KEY_DOWN },
+	{ 0x1d16, KEY_LEFT },
+	{ 0x1d17, KEY_RIGHT },
+	{ 0x1d1c, KEY_TV },
+	{ 0x1d1e, KEY_NEXT },
+	{ 0x1d1f, KEY_BACK },
+	{ 0x1d20, KEY_CHANNELUP },
+	{ 0x1d21, KEY_CHANNELDOWN },
+	{ 0x1d24, KEY_LAST },
+	{ 0x1d25, KEY_OK },
+	{ 0x1d30, KEY_PAUSE },
+	{ 0x1d32, KEY_REWIND },
+	{ 0x1d34, KEY_FASTFORWARD },
+	{ 0x1d35, KEY_PLAY },
+	{ 0x1d36, KEY_STOP },
+	{ 0x1d37, KEY_RECORD },
+	{ 0x1d3b, KEY_GOTO },
+	{ 0x1d3d, KEY_POWER },
+};
+
+static struct rc_keymap dib0700_map = {
+	.map = {
+		.scan    = dib0700_table,
+		.size    = ARRAY_SIZE(dib0700_table),
+		.ir_type = IR_TYPE_RC5,
+		.name    = RC_MAP_DIB0700_RC5_TABLE,
+	}
+};
+
+static int __init init_rc_map(void)
+{
+	return ir_register_map(&dib0700_map);
+}
+
+static void __exit exit_rc_map(void)
+{
+	ir_unregister_map(&dib0700_map);
+}
+
+module_init(init_rc_map)
+module_exit(exit_rc_map)
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@redhat.com>");
diff --git a/drivers/media/dvb/dvb-usb/dib0700_devices.c b/drivers/media/dvb/dvb-usb/dib0700_devices.c
index 6e587cd1f51..ee2a84beb55 100644
--- a/drivers/media/dvb/dvb-usb/dib0700_devices.c
+++ b/drivers/media/dvb/dvb-usb/dib0700_devices.c
@@ -1872,7 +1872,7 @@ struct dvb_usb_device_properties dib0700_devices[] = {
 
 		.rc.core = {
 			.rc_interval      = DEFAULT_RC_INTERVAL,
-			.rc_codes         = RC_MAP_DIB0700_BIG_TABLE,
+			.rc_codes         = RC_MAP_DIB0700_RC5_TABLE,
 			.rc_query         = dib0700_rc_query_old_firmware
 		},
 	}, { DIB0700_DEFAULT_DEVICE_PROPERTIES,
@@ -1902,7 +1902,7 @@ struct dvb_usb_device_properties dib0700_devices[] = {
 
 		.rc.core = {
 			.rc_interval      = DEFAULT_RC_INTERVAL,
-			.rc_codes         = RC_MAP_DIB0700_BIG_TABLE,
+			.rc_codes         = RC_MAP_DIB0700_RC5_TABLE,
 			.rc_query         = dib0700_rc_query_old_firmware
 		},
 	}, { DIB0700_DEFAULT_DEVICE_PROPERTIES,
@@ -1957,7 +1957,7 @@ struct dvb_usb_device_properties dib0700_devices[] = {
 
 		.rc.core = {
 			.rc_interval      = DEFAULT_RC_INTERVAL,
-			.rc_codes         = RC_MAP_DIB0700_BIG_TABLE,
+			.rc_codes         = RC_MAP_DIB0700_RC5_TABLE,
 			.rc_query         = dib0700_rc_query_old_firmware
 		},
 	}, { DIB0700_DEFAULT_DEVICE_PROPERTIES,
@@ -1994,7 +1994,7 @@ struct dvb_usb_device_properties dib0700_devices[] = {
 
 		.rc.core = {
 			.rc_interval      = DEFAULT_RC_INTERVAL,
-			.rc_codes         = RC_MAP_DIB0700_BIG_TABLE,
+			.rc_codes         = RC_MAP_DIB0700_RC5_TABLE,
 			.module_name	  = "dib0700",
 			.rc_query         = dib0700_rc_query_old_firmware
 		},
@@ -2066,7 +2066,7 @@ struct dvb_usb_device_properties dib0700_devices[] = {
 
 		.rc.core = {
 			.rc_interval      = DEFAULT_RC_INTERVAL,
-			.rc_codes         = RC_MAP_DIB0700_BIG_TABLE,
+			.rc_codes         = RC_MAP_DIB0700_RC5_TABLE,
 			.module_name	  = "dib0700",
 			.rc_query         = dib0700_rc_query_old_firmware
 		},
@@ -2106,7 +2106,7 @@ struct dvb_usb_device_properties dib0700_devices[] = {
 
 		.rc.core = {
 			.rc_interval      = DEFAULT_RC_INTERVAL,
-			.rc_codes         = RC_MAP_DIB0700_BIG_TABLE,
+			.rc_codes         = RC_MAP_DIB0700_RC5_TABLE,
 			.module_name	  = "dib0700",
 			.rc_query         = dib0700_rc_query_old_firmware
 		},
@@ -2139,7 +2139,7 @@ struct dvb_usb_device_properties dib0700_devices[] = {
 			}
 		},
 
-		.num_device_descs = 7,
+		.num_device_descs = 6,
 		.devices = {
 			{   "DiBcom STK7070PD reference design",
 				{ &dib0700_usb_id_table[17], NULL },
@@ -2166,6 +2166,45 @@ struct dvb_usb_device_properties dib0700_devices[] = {
 				{ &dib0700_usb_id_table[44], NULL },
 				{ NULL },
 			},
+		},
+
+		.rc.core = {
+			.rc_interval      = DEFAULT_RC_INTERVAL,
+			.rc_codes         = RC_MAP_DIB0700_RC5_TABLE,
+			.module_name	  = "dib0700",
+			.rc_query         = dib0700_rc_query_old_firmware
+		},
+	}, { DIB0700_DEFAULT_DEVICE_PROPERTIES,
+
+		.num_adapters = 2,
+		.adapter = {
+			{
+				.caps = DVB_USB_ADAP_HAS_PID_FILTER | DVB_USB_ADAP_PID_FILTER_CAN_BE_TURNED_OFF,
+				.pid_filter_count = 32,
+				.pid_filter       = stk70x0p_pid_filter,
+				.pid_filter_ctrl  = stk70x0p_pid_filter_ctrl,
+				.frontend_attach  = stk7070pd_frontend_attach0,
+				.tuner_attach     = dib7070p_tuner_attach,
+
+				DIB0700_DEFAULT_STREAMING_CONFIG(0x02),
+
+				.size_of_priv     = sizeof(struct dib0700_adapter_state),
+			}, {
+				.caps = DVB_USB_ADAP_HAS_PID_FILTER | DVB_USB_ADAP_PID_FILTER_CAN_BE_TURNED_OFF,
+				.pid_filter_count = 32,
+				.pid_filter       = stk70x0p_pid_filter,
+				.pid_filter_ctrl  = stk70x0p_pid_filter_ctrl,
+				.frontend_attach  = stk7070pd_frontend_attach1,
+				.tuner_attach     = dib7070p_tuner_attach,
+
+				DIB0700_DEFAULT_STREAMING_CONFIG(0x03),
+
+				.size_of_priv     = sizeof(struct dib0700_adapter_state),
+			}
+		},
+
+		.num_device_descs = 1,
+		.devices = {
 			{   "Elgato EyeTV Diversity",
 				{ &dib0700_usb_id_table[68], NULL },
 				{ NULL },
@@ -2174,7 +2213,7 @@ struct dvb_usb_device_properties dib0700_devices[] = {
 
 		.rc.core = {
 			.rc_interval      = DEFAULT_RC_INTERVAL,
-			.rc_codes         = RC_MAP_DIB0700_BIG_TABLE,
+			.rc_codes         = RC_MAP_DIB0700_NEC_TABLE,
 			.module_name	  = "dib0700",
 			.rc_query         = dib0700_rc_query_old_firmware
 		},
@@ -2239,7 +2278,7 @@ struct dvb_usb_device_properties dib0700_devices[] = {
 
 		.rc.core = {
 			.rc_interval      = DEFAULT_RC_INTERVAL,
-			.rc_codes         = RC_MAP_DIB0700_BIG_TABLE,
+			.rc_codes         = RC_MAP_DIB0700_RC5_TABLE,
 			.module_name	  = "dib0700",
 			.rc_query         = dib0700_rc_query_old_firmware
 		},
@@ -2271,7 +2310,7 @@ struct dvb_usb_device_properties dib0700_devices[] = {
 
 		.rc.core = {
 			.rc_interval      = DEFAULT_RC_INTERVAL,
-			.rc_codes         = RC_MAP_DIB0700_BIG_TABLE,
+			.rc_codes         = RC_MAP_DIB0700_RC5_TABLE,
 			.module_name	  = "dib0700",
 			.rc_query         = dib0700_rc_query_old_firmware
 		},
@@ -2335,7 +2374,7 @@ struct dvb_usb_device_properties dib0700_devices[] = {
 
 		.rc.core = {
 			.rc_interval      = DEFAULT_RC_INTERVAL,
-			.rc_codes         = RC_MAP_DIB0700_BIG_TABLE,
+			.rc_codes         = RC_MAP_DIB0700_RC5_TABLE,
 			.module_name	  = "dib0700",
 			.rc_query         = dib0700_rc_query_old_firmware
 		},
@@ -2375,7 +2414,7 @@ struct dvb_usb_device_properties dib0700_devices[] = {
 
 		.rc.core = {
 			.rc_interval      = DEFAULT_RC_INTERVAL,
-			.rc_codes         = RC_MAP_DIB0700_BIG_TABLE,
+			.rc_codes         = RC_MAP_DIB0700_NEC_TABLE,
 			.module_name	  = "dib0700",
 			.rc_query         = dib0700_rc_query_old_firmware
 		},
@@ -2420,7 +2459,7 @@ struct dvb_usb_device_properties dib0700_devices[] = {
 
 		.rc.core = {
 			.rc_interval      = DEFAULT_RC_INTERVAL,
-			.rc_codes         = RC_MAP_DIB0700_BIG_TABLE,
+			.rc_codes         = RC_MAP_DIB0700_RC5_TABLE,
 			.module_name	  = "dib0700",
 			.rc_query         = dib0700_rc_query_old_firmware
 		},
@@ -2453,7 +2492,7 @@ struct dvb_usb_device_properties dib0700_devices[] = {
 
 		.rc.core = {
 			.rc_interval      = DEFAULT_RC_INTERVAL,
-			.rc_codes         = RC_MAP_DIB0700_BIG_TABLE,
+			.rc_codes         = RC_MAP_DIB0700_RC5_TABLE,
 			.module_name	  = "dib0700",
 			.rc_query         = dib0700_rc_query_old_firmware
 		},
diff --git a/include/media/rc-map.h b/include/media/rc-map.h
index adbcccb54c8..9569d0863f8 100644
--- a/include/media/rc-map.h
+++ b/include/media/rc-map.h
@@ -69,9 +69,8 @@ void rc_map_init(void);
 #define RC_MAP_BUDGET_CI_OLD             "rc-budget-ci-old"
 #define RC_MAP_CINERGY_1400              "rc-cinergy-1400"
 #define RC_MAP_CINERGY                   "rc-cinergy"
-/* Temporary table - should be broken into smaller tables */
-#define RC_MAP_DIB0700_BIG_TABLE         "rc-dib0700-big"
-
+#define RC_MAP_DIB0700_NEC_TABLE         "rc-dib0700-nec"
+#define RC_MAP_DIB0700_RC5_TABLE         "rc-dib0700-rc5"
 #define RC_MAP_DM1105_NEC                "rc-dm1105-nec"
 #define RC_MAP_DNTV_LIVE_DVBT_PRO        "rc-dntv-live-dvbt-pro"
 #define RC_MAP_DNTV_LIVE_DVB_T           "rc-dntv-live-dvb-t"
-- 
cgit v1.2.3-70-g09d2


From 67b284d476bcb3d100e946da23d6cf9acfd0465c Mon Sep 17 00:00:00 2001
From: Matt Carlson <mcarlson@broadcom.com>
Date: Mon, 2 Aug 2010 11:26:02 +0000
Subject: tg3: Remove 5720, 5750, and 5750M

These devices were never released to the public.

Reviewed-by: Benjamin Li <benli@broadcom.com>
Reviewed-by: Michael Chan <mchan@broadcom.com>
Signed-off-by: Matt Carlson <mcarlson@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/tg3.c       | 3 ---
 include/linux/pci_ids.h | 3 ---
 2 files changed, 6 deletions(-)

(limited to 'include')

diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index a52f52fbb47..32e3a3de4c6 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -221,12 +221,9 @@ static DEFINE_PCI_DEVICE_TABLE(tg3_pci_tbl) = {
 	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5901_2)},
 	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5704S_2)},
 	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5705F)},
-	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5720)},
 	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5721)},
 	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5722)},
-	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5750)},
 	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5751)},
-	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5750M)},
 	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5751M)},
 	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5751F)},
 	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5752)},
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index ae66851870b..9ac60dabb6f 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2053,7 +2053,6 @@
 #define PCI_DEVICE_ID_NX2_57711E	0x1650
 #define PCI_DEVICE_ID_TIGON3_5705	0x1653
 #define PCI_DEVICE_ID_TIGON3_5705_2	0x1654
-#define PCI_DEVICE_ID_TIGON3_5720	0x1658
 #define PCI_DEVICE_ID_TIGON3_5721	0x1659
 #define PCI_DEVICE_ID_TIGON3_5722	0x165a
 #define PCI_DEVICE_ID_TIGON3_5723	0x165b
@@ -2067,13 +2066,11 @@
 #define PCI_DEVICE_ID_TIGON3_5754M	0x1672
 #define PCI_DEVICE_ID_TIGON3_5755M	0x1673
 #define PCI_DEVICE_ID_TIGON3_5756	0x1674
-#define PCI_DEVICE_ID_TIGON3_5750	0x1676
 #define PCI_DEVICE_ID_TIGON3_5751	0x1677
 #define PCI_DEVICE_ID_TIGON3_5715	0x1678
 #define PCI_DEVICE_ID_TIGON3_5715S	0x1679
 #define PCI_DEVICE_ID_TIGON3_5754	0x167a
 #define PCI_DEVICE_ID_TIGON3_5755	0x167b
-#define PCI_DEVICE_ID_TIGON3_5750M	0x167c
 #define PCI_DEVICE_ID_TIGON3_5751M	0x167d
 #define PCI_DEVICE_ID_TIGON3_5751F	0x167e
 #define PCI_DEVICE_ID_TIGON3_5787F	0x167f
-- 
cgit v1.2.3-70-g09d2


From 9292d8f20ff3c034c99c2adfe27496957b3defe3 Mon Sep 17 00:00:00 2001
From: Krzysztof Hałasa <khc@pm.waw.pl>
Date: Mon, 2 Aug 2010 16:03:29 -0700
Subject: Tulip: don't initialize SBE xT3E3 WAN ports.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

SBE 2T3E3 cards use DECchips 21143 but they need a different driver.
Don't even try to use a normal tulip driver with them.

Signed-off-by: Krzysztof Hałasa <khc@pm.waw.pl>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/tulip/tulip_core.c | 6 ++++++
 include/linux/pci_ids.h        | 3 +++
 2 files changed, 9 insertions(+)

(limited to 'include')

diff --git a/drivers/net/tulip/tulip_core.c b/drivers/net/tulip/tulip_core.c
index 14e5312e906..3a8d7efa2ac 100644
--- a/drivers/net/tulip/tulip_core.c
+++ b/drivers/net/tulip/tulip_core.c
@@ -1341,6 +1341,12 @@ static int __devinit tulip_init_one (struct pci_dev *pdev,
         if (pdev->subsystem_vendor == PCI_VENDOR_ID_LMC) {
 		pr_err(PFX "skipping LMC card\n");
 		return -ENODEV;
+	} else if (pdev->subsystem_vendor == PCI_VENDOR_ID_SBE &&
+		   (pdev->subsystem_device == PCI_SUBDEVICE_ID_SBE_T3E3 ||
+		    pdev->subsystem_device == PCI_SUBDEVICE_ID_SBE_2T3E3_P0 ||
+		    pdev->subsystem_device == PCI_SUBDEVICE_ID_SBE_2T3E3_P1)) {
+		pr_err(PFX "skipping SBE T3E3 port\n");
+		return -ENODEV;
 	}
 
 	/*
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 9ac60dabb6f..384c2a25db1 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -1493,6 +1493,9 @@
 #define PCI_DEVICE_ID_SBE_WANXL100	0x0301
 #define PCI_DEVICE_ID_SBE_WANXL200	0x0302
 #define PCI_DEVICE_ID_SBE_WANXL400	0x0104
+#define PCI_SUBDEVICE_ID_SBE_T3E3	0x0009
+#define PCI_SUBDEVICE_ID_SBE_2T3E3_P0	0x0901
+#define PCI_SUBDEVICE_ID_SBE_2T3E3_P1	0x0902
 
 #define PCI_VENDOR_ID_TOSHIBA		0x1179
 #define PCI_DEVICE_ID_TOSHIBA_PICCOLO_1	0x0101
-- 
cgit v1.2.3-70-g09d2


From 1d5cc192d431bce2ebe9fde64054ce903200e179 Mon Sep 17 00:00:00 2001
From: Dominik Brodowski <linux@dominikbrodowski.net>
Date: Sat, 24 Jul 2010 12:23:21 +0200
Subject: pcmcia: use pcmica_{read,write}_config_byte

Use pcmcia_read_config_byte and pcmcia_write_config_byte instead
of pcmcia_access_configuration_register.

CC: netdev@vger.kernel.org
CC: linux-wireless@vger.kernel.org
CC: linux-serial@vger.kernel.org
CC: Michael Buesch <mb@bu3sch.de>
Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
---
 drivers/net/pcmcia/axnet_cs.c              |  3 +-
 drivers/net/pcmcia/nmclan_cs.c             | 21 ++-----
 drivers/net/pcmcia/xirc2ps_cs.c            | 16 +++---
 drivers/net/wireless/hostap/hostap_cs.c    | 91 ++++++++----------------------
 drivers/net/wireless/orinoco/spectrum_cs.c | 32 ++++-------
 drivers/pcmcia/cistpl.c                    |  7 ++-
 drivers/pcmcia/cs_internal.h               |  4 +-
 drivers/pcmcia/pcmcia_resource.c           | 70 ++++++++++++++---------
 drivers/serial/serial_cs.c                 |  8 +--
 drivers/ssb/pcmcia.c                       | 14 +----
 include/pcmcia/cs.h                        | 12 ----
 include/pcmcia/ds.h                        |  4 +-
 12 files changed, 104 insertions(+), 178 deletions(-)

(limited to 'include')

diff --git a/drivers/net/pcmcia/axnet_cs.c b/drivers/net/pcmcia/axnet_cs.c
index 467fd4bfb2b..ee0a6d036f9 100644
--- a/drivers/net/pcmcia/axnet_cs.c
+++ b/drivers/net/pcmcia/axnet_cs.c
@@ -378,8 +378,7 @@ static int axnet_config(struct pcmcia_device *link)
     /* Maybe PHY is in power down mode. (PPD_SET = 1) 
        Bit 2 of CCSR is active low. */ 
     if (i == 32) {
-	conf_reg_t reg = { 0, CS_WRITE, CISREG_CCSR, 0x04 };
- 	pcmcia_access_configuration_register(link, &reg);
+	pcmcia_write_config_byte(link, CISREG_CCSR, 0x04);
 	for (i = 0; i < 32; i++) {
 	    j = mdio_read(dev->base_addr + AXNET_MII_EEP, i, 1);
 	    j2 = mdio_read(dev->base_addr + AXNET_MII_EEP, i, 2);
diff --git a/drivers/net/pcmcia/nmclan_cs.c b/drivers/net/pcmcia/nmclan_cs.c
index c0eacfae151..c0d85af3e94 100644
--- a/drivers/net/pcmcia/nmclan_cs.c
+++ b/drivers/net/pcmcia/nmclan_cs.c
@@ -757,29 +757,20 @@ static void nmclan_reset(struct net_device *dev)
 
 #if RESET_XILINX
   struct pcmcia_device *link = &lp->link;
-  conf_reg_t reg;
-  u_long OrigCorValue; 
+  u8 OrigCorValue;
 
   /* Save original COR value */
-  reg.Function = 0;
-  reg.Action = CS_READ;
-  reg.Offset = CISREG_COR;
-  reg.Value = 0;
-  pcmcia_access_configuration_register(link, &reg);
-  OrigCorValue = reg.Value;
+  pcmcia_read_config_byte(link, CISREG_COR, &OrigCorValue);
 
   /* Reset Xilinx */
-  reg.Action = CS_WRITE;
-  reg.Offset = CISREG_COR;
-  dev_dbg(&link->dev, "nmclan_reset: OrigCorValue=0x%lX, resetting...\n",
+  dev_dbg(&link->dev, "nmclan_reset: OrigCorValue=0x%x, resetting...\n",
 	OrigCorValue);
-  reg.Value = COR_SOFT_RESET;
-  pcmcia_access_configuration_register(link, &reg);
+  pcmcia_write_config_byte(link, CISREG_COR, COR_SOFT_RESET);
   /* Need to wait for 20 ms for PCMCIA to finish reset. */
 
   /* Restore original COR configuration index */
-  reg.Value = COR_LEVEL_REQ | (OrigCorValue & COR_CONFIG_MASK);
-  pcmcia_access_configuration_register(link, &reg);
+  pcmcia_write_config_byte(link, CISREG_COR,
+			  (COR_LEVEL_REQ | (OrigCorValue & COR_CONFIG_MASK)));
   /* Xilinx is now completely reset along with the MACE chip. */
   lp->tx_free_frames=AM2150_MAX_TX_FRAMES;
 
diff --git a/drivers/net/pcmcia/xirc2ps_cs.c b/drivers/net/pcmcia/xirc2ps_cs.c
index a7662f0832e..e3a85ce8988 100644
--- a/drivers/net/pcmcia/xirc2ps_cs.c
+++ b/drivers/net/pcmcia/xirc2ps_cs.c
@@ -869,7 +869,6 @@ xirc2ps_config(struct pcmcia_device * link)
 	goto config_error;
 
     if (local->dingo) {
-	conf_reg_t reg;
 	win_req_t req;
 	memreq_t mem;
 
@@ -878,15 +877,14 @@ xirc2ps_config(struct pcmcia_device * link)
 	 * the base address of the ethernet port (BasePort1) is written
 	 * to the BAR registers of the modem.
 	 */
-	reg.Action = CS_WRITE;
-	reg.Offset = CISREG_IOBASE_0;
-	reg.Value = link->io.BasePort2 & 0xff;
-	if ((err = pcmcia_access_configuration_register(link, &reg)))
+	err = pcmcia_write_config_byte(link, CISREG_IOBASE_0,
+				link->io.BasePort2 & 0xff);
+	if (err)
 	    goto config_error;
-	reg.Action = CS_WRITE;
-	reg.Offset = CISREG_IOBASE_1;
-	reg.Value = (link->io.BasePort2 >> 8) & 0xff;
-	if ((err = pcmcia_access_configuration_register(link, &reg)))
+
+	err = pcmcia_write_config_byte(link, CISREG_IOBASE_1,
+				(link->io.BasePort2 >> 8) & 0xff);
+	if (err)
 	    goto config_error;
 
 	/* There is no config entry for the Ethernet part which
diff --git a/drivers/net/wireless/hostap/hostap_cs.c b/drivers/net/wireless/hostap/hostap_cs.c
index 2f4b6d4350a..691293675a9 100644
--- a/drivers/net/wireless/hostap/hostap_cs.c
+++ b/drivers/net/wireless/hostap/hostap_cs.c
@@ -224,27 +224,18 @@ static int prism2_pccard_card_present(local_info_t *local)
 static void sandisk_set_iobase(local_info_t *local)
 {
 	int res;
-	conf_reg_t reg;
 	struct hostap_cs_priv *hw_priv = local->hw_priv;
 
-	reg.Function = 0;
-	reg.Action = CS_WRITE;
-	reg.Offset = 0x10; /* 0x3f0 IO base 1 */
-	reg.Value = hw_priv->link->io.BasePort1 & 0x00ff;
-	res = pcmcia_access_configuration_register(hw_priv->link,
-						   &reg);
+	res = pcmcia_write_config_byte(hw_priv->link, 0x10,
+				hw_priv->link->io.BasePort1 & 0x00ff);
 	if (res != 0) {
 		printk(KERN_DEBUG "Prism3 SanDisk - failed to set I/O base 0 -"
 		       " res=%d\n", res);
 	}
 	udelay(10);
 
-	reg.Function = 0;
-	reg.Action = CS_WRITE;
-	reg.Offset = 0x12; /* 0x3f2 IO base 2 */
-	reg.Value = (hw_priv->link->io.BasePort1 & 0xff00) >> 8;
-	res = pcmcia_access_configuration_register(hw_priv->link,
-						   &reg);
+	res = pcmcia_write_config_byte(hw_priv->link, 0x12,
+				(hw_priv->link->io.BasePort1 >> 8) & 0x00ff);
 	if (res != 0) {
 		printk(KERN_DEBUG "Prism3 SanDisk - failed to set I/O base 1 -"
 		       " res=%d\n", res);
@@ -270,7 +261,6 @@ static void sandisk_write_hcr(local_info_t *local, int hcr)
 static int sandisk_enable_wireless(struct net_device *dev)
 {
 	int res, ret = 0;
-	conf_reg_t reg;
 	struct hostap_interface *iface = netdev_priv(dev);
 	local_info_t *local = iface->local;
 	struct hostap_cs_priv *hw_priv = local->hw_priv;
@@ -297,12 +287,8 @@ static int sandisk_enable_wireless(struct net_device *dev)
 	       " - using vendor-specific initialization\n", dev->name);
 	hw_priv->sandisk_connectplus = 1;
 
-	reg.Function = 0;
-	reg.Action = CS_WRITE;
-	reg.Offset = CISREG_COR;
-	reg.Value = COR_SOFT_RESET;
-	res = pcmcia_access_configuration_register(hw_priv->link,
-						   &reg);
+	res = pcmcia_write_config_byte(hw_priv->link, CISREG_COR,
+				COR_SOFT_RESET);
 	if (res != 0) {
 		printk(KERN_DEBUG "%s: SanDisk - COR sreset failed (%d)\n",
 		       dev->name, res);
@@ -310,16 +296,13 @@ static int sandisk_enable_wireless(struct net_device *dev)
 	}
 	mdelay(5);
 
-	reg.Function = 0;
-	reg.Action = CS_WRITE;
-	reg.Offset = CISREG_COR;
 	/*
 	 * Do not enable interrupts here to avoid some bogus events. Interrupts
 	 * will be enabled during the first cor_sreset call.
 	 */
-	reg.Value = COR_LEVEL_REQ | 0x8 | COR_ADDR_DECODE | COR_FUNC_ENA;
-	res = pcmcia_access_configuration_register(hw_priv->link,
-						   &reg);
+	res = pcmcia_write_config_byte(hw_priv->link, CISREG_COR,
+				(COR_LEVEL_REQ | 0x8 | COR_ADDR_DECODE |
+					COR_FUNC_ENA));
 	if (res != 0) {
 		printk(KERN_DEBUG "%s: SanDisk - COR sreset failed (%d)\n",
 		       dev->name, res);
@@ -342,30 +325,23 @@ done:
 static void prism2_pccard_cor_sreset(local_info_t *local)
 {
 	int res;
-	conf_reg_t reg;
+	u8 val;
 	struct hostap_cs_priv *hw_priv = local->hw_priv;
 
 	if (!prism2_pccard_card_present(local))
 	       return;
 
-	reg.Function = 0;
-	reg.Action = CS_READ;
-	reg.Offset = CISREG_COR;
-	reg.Value = 0;
-	res = pcmcia_access_configuration_register(hw_priv->link,
-						   &reg);
+	res = pcmcia_read_config_byte(hw_priv->link, CISREG_COR, &val);
 	if (res != 0) {
 		printk(KERN_DEBUG "prism2_pccard_cor_sreset failed 1 (%d)\n",
 		       res);
 		return;
 	}
 	printk(KERN_DEBUG "prism2_pccard_cor_sreset: original COR %02x\n",
-	       reg.Value);
+		val);
 
-	reg.Action = CS_WRITE;
-	reg.Value |= COR_SOFT_RESET;
-	res = pcmcia_access_configuration_register(hw_priv->link,
-						   &reg);
+	val |= COR_SOFT_RESET;
+	res = pcmcia_write_config_byte(hw_priv->link, CISREG_COR, val);
 	if (res != 0) {
 		printk(KERN_DEBUG "prism2_pccard_cor_sreset failed 2 (%d)\n",
 		       res);
@@ -374,11 +350,10 @@ static void prism2_pccard_cor_sreset(local_info_t *local)
 
 	mdelay(hw_priv->sandisk_connectplus ? 5 : 2);
 
-	reg.Value &= ~COR_SOFT_RESET;
+	val &= ~COR_SOFT_RESET;
 	if (hw_priv->sandisk_connectplus)
-		reg.Value |= COR_IREQ_ENA;
-	res = pcmcia_access_configuration_register(hw_priv->link,
-						   &reg);
+		val |= COR_IREQ_ENA;
+	res = pcmcia_write_config_byte(hw_priv->link, CISREG_COR, val);
 	if (res != 0) {
 		printk(KERN_DEBUG "prism2_pccard_cor_sreset failed 3 (%d)\n",
 		       res);
@@ -395,8 +370,7 @@ static void prism2_pccard_cor_sreset(local_info_t *local)
 static void prism2_pccard_genesis_reset(local_info_t *local, int hcr)
 {
 	int res;
-	conf_reg_t reg;
-	int old_cor;
+	u8 old_cor;
 	struct hostap_cs_priv *hw_priv = local->hw_priv;
 
 	if (!prism2_pccard_card_present(local))
@@ -407,25 +381,17 @@ static void prism2_pccard_genesis_reset(local_info_t *local, int hcr)
 		return;
 	}
 
-	reg.Function = 0;
-	reg.Action = CS_READ;
-	reg.Offset = CISREG_COR;
-	reg.Value = 0;
-	res = pcmcia_access_configuration_register(hw_priv->link,
-						   &reg);
+	res = pcmcia_read_config_byte(hw_priv->link, CISREG_COR, &old_cor);
 	if (res != 0) {
 		printk(KERN_DEBUG "prism2_pccard_genesis_sreset failed 1 "
 		       "(%d)\n", res);
 		return;
 	}
 	printk(KERN_DEBUG "prism2_pccard_genesis_sreset: original COR %02x\n",
-	       reg.Value);
-	old_cor = reg.Value;
+		old_cor);
 
-	reg.Action = CS_WRITE;
-	reg.Value |= COR_SOFT_RESET;
-	res = pcmcia_access_configuration_register(hw_priv->link,
-						   &reg);
+	res = pcmcia_write_config_byte(hw_priv->link, CISREG_COR,
+				old_cor | COR_SOFT_RESET);
 	if (res != 0) {
 		printk(KERN_DEBUG "prism2_pccard_genesis_sreset failed 2 "
 		       "(%d)\n", res);
@@ -435,11 +401,7 @@ static void prism2_pccard_genesis_reset(local_info_t *local, int hcr)
 	mdelay(10);
 
 	/* Setup Genesis mode */
-	reg.Action = CS_WRITE;
-	reg.Value = hcr;
-	reg.Offset = CISREG_CCSR;
-	res = pcmcia_access_configuration_register(hw_priv->link,
-						   &reg);
+	res = pcmcia_write_config_byte(hw_priv->link, CISREG_CCSR, hcr);
 	if (res != 0) {
 		printk(KERN_DEBUG "prism2_pccard_genesis_sreset failed 3 "
 		       "(%d)\n", res);
@@ -447,11 +409,8 @@ static void prism2_pccard_genesis_reset(local_info_t *local, int hcr)
 	}
 	mdelay(10);
 
-	reg.Action = CS_WRITE;
-	reg.Offset = CISREG_COR;
-	reg.Value = old_cor & ~COR_SOFT_RESET;
-	res = pcmcia_access_configuration_register(hw_priv->link,
-						   &reg);
+	res = pcmcia_write_config_byte(hw_priv->link, CISREG_COR,
+				old_cor & ~COR_SOFT_RESET);
 	if (res != 0) {
 		printk(KERN_DEBUG "prism2_pccard_genesis_sreset failed 4 "
 		       "(%d)\n", res);
diff --git a/drivers/net/wireless/orinoco/spectrum_cs.c b/drivers/net/wireless/orinoco/spectrum_cs.c
index cad30e499db..39399cd2e68 100644
--- a/drivers/net/wireless/orinoco/spectrum_cs.c
+++ b/drivers/net/wireless/orinoco/spectrum_cs.c
@@ -79,35 +79,27 @@ static int
 spectrum_reset(struct pcmcia_device *link, int idle)
 {
 	int ret;
-	conf_reg_t reg;
-	u_int save_cor;
+	u8 save_cor;
+	u8 ccsr;
 
 	/* Doing it if hardware is gone is guaranteed crash */
 	if (!pcmcia_dev_present(link))
 		return -ENODEV;
 
 	/* Save original COR value */
-	reg.Function = 0;
-	reg.Action = CS_READ;
-	reg.Offset = CISREG_COR;
-	ret = pcmcia_access_configuration_register(link, &reg);
+	ret = pcmcia_read_config_byte(link, CISREG_COR, &save_cor);
 	if (ret)
 		goto failed;
-	save_cor = reg.Value;
 
 	/* Soft-Reset card */
-	reg.Action = CS_WRITE;
-	reg.Offset = CISREG_COR;
-	reg.Value = (save_cor | COR_SOFT_RESET);
-	ret = pcmcia_access_configuration_register(link, &reg);
+	ret = pcmcia_write_config_byte(link, CISREG_COR,
+				(save_cor | COR_SOFT_RESET));
 	if (ret)
 		goto failed;
 	udelay(1000);
 
 	/* Read CCSR */
-	reg.Action = CS_READ;
-	reg.Offset = CISREG_CCSR;
-	ret = pcmcia_access_configuration_register(link, &reg);
+	ret = pcmcia_read_config_byte(link, CISREG_CCSR, &ccsr);
 	if (ret)
 		goto failed;
 
@@ -115,19 +107,15 @@ spectrum_reset(struct pcmcia_device *link, int idle)
 	 * Start or stop the firmware.  Memory width bit should be
 	 * preserved from the value we've just read.
 	 */
-	reg.Action = CS_WRITE;
-	reg.Offset = CISREG_CCSR;
-	reg.Value = (idle ? HCR_IDLE : HCR_RUN) | (reg.Value & HCR_MEM16);
-	ret = pcmcia_access_configuration_register(link, &reg);
+	ccsr = (idle ? HCR_IDLE : HCR_RUN) | (ccsr & HCR_MEM16);
+	ret = pcmcia_write_config_byte(link, CISREG_CCSR, ccsr);
 	if (ret)
 		goto failed;
 	udelay(1000);
 
 	/* Restore original COR configuration index */
-	reg.Action = CS_WRITE;
-	reg.Offset = CISREG_COR;
-	reg.Value = (save_cor & ~COR_SOFT_RESET);
-	ret = pcmcia_access_configuration_register(link, &reg);
+	ret = pcmcia_write_config_byte(link, CISREG_COR,
+				(save_cor & ~COR_SOFT_RESET));
 	if (ret)
 		goto failed;
 	udelay(1000);
diff --git a/drivers/pcmcia/cistpl.c b/drivers/pcmcia/cistpl.c
index ba4a5acc2e9..1733fab469a 100644
--- a/drivers/pcmcia/cistpl.c
+++ b/drivers/pcmcia/cistpl.c
@@ -209,7 +209,7 @@ int pcmcia_read_cis_mem(struct pcmcia_socket *s, int attr, u_int addr,
  * Probably only useful for writing one-byte registers. Must be called
  * with ops_mutex held.
  */
-void pcmcia_write_cis_mem(struct pcmcia_socket *s, int attr, u_int addr,
+int pcmcia_write_cis_mem(struct pcmcia_socket *s, int attr, u_int addr,
 		   u_int len, void *ptr)
 {
 	void __iomem *sys, *end;
@@ -231,7 +231,7 @@ void pcmcia_write_cis_mem(struct pcmcia_socket *s, int attr, u_int addr,
 				((cis_width) ? MAP_16BIT : 0));
 		if (!sys) {
 			dev_dbg(&s->dev, "could not map memory\n");
-			return; /* FIXME: Error */
+			return -EINVAL;
 		}
 
 		writeb(flags, sys+CISREG_ICTRL0);
@@ -256,7 +256,7 @@ void pcmcia_write_cis_mem(struct pcmcia_socket *s, int attr, u_int addr,
 			sys = set_cis_map(s, card_offset, flags);
 			if (!sys) {
 				dev_dbg(&s->dev, "could not map memory\n");
-				return; /* FIXME: error */
+				return -EINVAL;
 			}
 
 			end = sys + s->map_size;
@@ -270,6 +270,7 @@ void pcmcia_write_cis_mem(struct pcmcia_socket *s, int attr, u_int addr,
 			addr = 0;
 		}
 	}
+	return 0;
 }
 
 
diff --git a/drivers/pcmcia/cs_internal.h b/drivers/pcmcia/cs_internal.h
index 45e7fd1aa0b..cebd40da8b9 100644
--- a/drivers/pcmcia/cs_internal.h
+++ b/drivers/pcmcia/cs_internal.h
@@ -158,8 +158,8 @@ extern struct bin_attribute pccard_cis_attr;
 
 int pcmcia_read_cis_mem(struct pcmcia_socket *s, int attr,
 			u_int addr, u_int len, void *ptr);
-void pcmcia_write_cis_mem(struct pcmcia_socket *s, int attr,
-			  u_int addr, u_int len, void *ptr);
+int pcmcia_write_cis_mem(struct pcmcia_socket *s, int attr,
+			u_int addr, u_int len, void *ptr);
 void release_cis_mem(struct pcmcia_socket *s);
 void destroy_cis_cache(struct pcmcia_socket *s);
 int pccard_read_tuple(struct pcmcia_socket *s, unsigned int function,
diff --git a/drivers/pcmcia/pcmcia_resource.c b/drivers/pcmcia/pcmcia_resource.c
index 2394de46860..563750e77ea 100644
--- a/drivers/pcmcia/pcmcia_resource.c
+++ b/drivers/pcmcia/pcmcia_resource.c
@@ -108,25 +108,25 @@ static void release_io_space(struct pcmcia_socket *s, unsigned int base,
 } /* release_io_space */
 
 
-/** pccard_access_configuration_register
+/**
+ * pcmcia_access_config() - read or write card configuration registers
  *
- * Access_configuration_register() reads and writes configuration
- * registers in attribute memory.  Memory window 0 is reserved for
- * this and the tuple reading services.
+ * pcmcia_access_config() reads and writes configuration registers in
+ * attribute memory.  Memory window 0 is reserved for this and the tuple
+ * reading services. Drivers must use pcmcia_read_config_byte() or
+ * pcmcia_write_config_byte().
  */
-
-int pcmcia_access_configuration_register(struct pcmcia_device *p_dev,
-					 conf_reg_t *reg)
+static int pcmcia_access_config(struct pcmcia_device *p_dev,
+				off_t where, u8 *val,
+				int (*accessf) (struct pcmcia_socket *s,
+						int attr, unsigned int addr,
+						unsigned int len, void *ptr))
 {
 	struct pcmcia_socket *s;
 	config_t *c;
 	int addr;
-	u_char val;
 	int ret = 0;
 
-	if (!p_dev || !p_dev->function_config)
-		return -EINVAL;
-
 	s = p_dev->socket;
 
 	mutex_lock(&s->ops_mutex);
@@ -138,26 +138,40 @@ int pcmcia_access_configuration_register(struct pcmcia_device *p_dev,
 		return -EACCES;
 	}
 
-	addr = (c->ConfigBase + reg->Offset) >> 1;
+	addr = (c->ConfigBase + where) >> 1;
+
+	ret = accessf(s, 1, addr, 1, val);
 
-	switch (reg->Action) {
-	case CS_READ:
-		ret = pcmcia_read_cis_mem(s, 1, addr, 1, &val);
-		reg->Value = val;
-		break;
-	case CS_WRITE:
-		val = reg->Value;
-		pcmcia_write_cis_mem(s, 1, addr, 1, &val);
-		break;
-	default:
-		dev_dbg(&s->dev, "Invalid conf register request\n");
-		ret = -EINVAL;
-		break;
-	}
 	mutex_unlock(&s->ops_mutex);
+
 	return ret;
-} /* pcmcia_access_configuration_register */
-EXPORT_SYMBOL(pcmcia_access_configuration_register);
+} /* pcmcia_access_config */
+
+
+/**
+ * pcmcia_read_config_byte() - read a byte from a card configuration register
+ *
+ * pcmcia_read_config_byte() reads a byte from a configuration register in
+ * attribute memory.
+ */
+int pcmcia_read_config_byte(struct pcmcia_device *p_dev, off_t where, u8 *val)
+{
+	return pcmcia_access_config(p_dev, where, val, pcmcia_read_cis_mem);
+}
+EXPORT_SYMBOL(pcmcia_read_config_byte);
+
+
+/**
+ * pcmcia_write_config_byte() - write a byte to a card configuration register
+ *
+ * pcmcia_write_config_byte() writes a byte to a configuration register in
+ * attribute memory.
+ */
+int pcmcia_write_config_byte(struct pcmcia_device *p_dev, off_t where, u8 val)
+{
+	return pcmcia_access_config(p_dev, where, &val, pcmcia_write_cis_mem);
+}
+EXPORT_SYMBOL(pcmcia_write_config_byte);
 
 
 int pcmcia_map_mem_page(struct pcmcia_device *p_dev, window_handle_t wh,
diff --git a/drivers/serial/serial_cs.c b/drivers/serial/serial_cs.c
index 2b99c7baf35..2be8b107ed5 100644
--- a/drivers/serial/serial_cs.c
+++ b/drivers/serial/serial_cs.c
@@ -114,16 +114,14 @@ static void quirk_setup_brainboxes_0104(struct pcmcia_device *link, struct uart_
 
 static int quirk_post_ibm(struct pcmcia_device *link)
 {
-	conf_reg_t reg = { 0, CS_READ, 0x800, 0 };
+	u8 val;
 	int ret;
 
-	ret = pcmcia_access_configuration_register(link, &reg);
+	ret = pcmcia_read_config_byte(link, 0x800, &val);
 	if (ret)
 		goto failed;
 
-	reg.Action = CS_WRITE;
-	reg.Value = reg.Value | 1;
-	ret = pcmcia_access_configuration_register(link, &reg);
+	ret = pcmcia_write_config_byte(link, 0x800, val | 1);
 	if (ret)
 		goto failed;
 	return 0;
diff --git a/drivers/ssb/pcmcia.c b/drivers/ssb/pcmcia.c
index 21520308178..526682d68de 100644
--- a/drivers/ssb/pcmcia.c
+++ b/drivers/ssb/pcmcia.c
@@ -71,14 +71,9 @@
 /* Write to a PCMCIA configuration register. */
 static int ssb_pcmcia_cfg_write(struct ssb_bus *bus, u8 offset, u8 value)
 {
-	conf_reg_t reg;
 	int res;
 
-	memset(&reg, 0, sizeof(reg));
-	reg.Offset = offset;
-	reg.Action = CS_WRITE;
-	reg.Value = value;
-	res = pcmcia_access_configuration_register(bus->host_pcmcia, &reg);
+	res = pcmcia_write_config_byte(bus->host_pcmcia, offset, value);
 	if (unlikely(res != 0))
 		return -EBUSY;
 
@@ -88,16 +83,11 @@ static int ssb_pcmcia_cfg_write(struct ssb_bus *bus, u8 offset, u8 value)
 /* Read from a PCMCIA configuration register. */
 static int ssb_pcmcia_cfg_read(struct ssb_bus *bus, u8 offset, u8 *value)
 {
-	conf_reg_t reg;
 	int res;
 
-	memset(&reg, 0, sizeof(reg));
-	reg.Offset = offset;
-	reg.Action = CS_READ;
-	res = pcmcia_access_configuration_register(bus->host_pcmcia, &reg);
+	res = pcmcia_read_config_byte(bus->host_pcmcia, offset, value);
 	if (unlikely(res != 0))
 		return -EBUSY;
-	*value = reg.Value;
 
 	return 0;
 }
diff --git a/include/pcmcia/cs.h b/include/pcmcia/cs.h
index c78d9b11208..64e853d58c3 100644
--- a/include/pcmcia/cs.h
+++ b/include/pcmcia/cs.h
@@ -19,18 +19,6 @@
 #include <linux/interrupt.h>
 #endif
 
-/* For AccessConfigurationRegister */
-typedef struct conf_reg_t {
-    u_char	Function;
-    u_int	Action;
-    off_t	Offset;
-    u_int	Value;
-} conf_reg_t;
-
-/* Actions */
-#define CS_READ		1
-#define CS_WRITE	2
-
 /* for AdjustResourceInfo */
 /* Action field */
 #define REMOVE_MANAGED_RESOURCE		1
diff --git a/include/pcmcia/ds.h b/include/pcmcia/ds.h
index e614aa0ca2a..d494ce417b4 100644
--- a/include/pcmcia/ds.h
+++ b/include/pcmcia/ds.h
@@ -174,8 +174,8 @@ struct pcmcia_device *pcmcia_dev_present(struct pcmcia_device *p_dev);
 int pcmcia_reset_card(struct pcmcia_socket *skt);
 
 /* CIS config */
-int pcmcia_access_configuration_register(struct pcmcia_device *p_dev,
-					 conf_reg_t *reg);
+int pcmcia_read_config_byte(struct pcmcia_device *p_dev, off_t where, u8 *val);
+int pcmcia_write_config_byte(struct pcmcia_device *p_dev, off_t where, u8 val);
 
 /* device configuration */
 int pcmcia_request_io(struct pcmcia_device *p_dev, io_req_t *req);
-- 
cgit v1.2.3-70-g09d2


From 3dace8cf15ae1dd7c9384758b3a29556b441a90a Mon Sep 17 00:00:00 2001
From: Dominik Brodowski <linux@dominikbrodowski.net>
Date: Sat, 24 Jul 2010 12:33:29 +0200
Subject: pcmcia: clean up cs.h

Remove some obsolete definitions from cs.h

Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
---
 drivers/pcmcia/rsrc_nonstatic.c |  3 +++
 include/pcmcia/cs.h             | 12 ------------
 2 files changed, 3 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/drivers/pcmcia/rsrc_nonstatic.c b/drivers/pcmcia/rsrc_nonstatic.c
index 0cca08ff65a..c17a17d9f9b 100644
--- a/drivers/pcmcia/rsrc_nonstatic.c
+++ b/drivers/pcmcia/rsrc_nonstatic.c
@@ -63,6 +63,9 @@ struct socket_data {
 #define MEM_PROBE_LOW	(1 << 0)
 #define MEM_PROBE_HIGH	(1 << 1)
 
+/* Action field */
+#define REMOVE_MANAGED_RESOURCE		1
+#define ADD_MANAGED_RESOURCE		2
 
 /*======================================================================
 
diff --git a/include/pcmcia/cs.h b/include/pcmcia/cs.h
index 64e853d58c3..7be0fcf7850 100644
--- a/include/pcmcia/cs.h
+++ b/include/pcmcia/cs.h
@@ -19,18 +19,6 @@
 #include <linux/interrupt.h>
 #endif
 
-/* for AdjustResourceInfo */
-/* Action field */
-#define REMOVE_MANAGED_RESOURCE		1
-#define ADD_MANAGED_RESOURCE		2
-
-/* For CardValues field */
-#define CV_OPTION_VALUE		0x01
-#define CV_STATUS_VALUE		0x02
-#define CV_PIN_REPLACEMENT	0x04
-#define CV_COPY_VALUE		0x08
-#define CV_EXT_STATUS		0x10
-
 /* ModifyConfiguration */
 typedef struct modconf_t {
     u_int	Attributes;
-- 
cgit v1.2.3-70-g09d2


From 2ce4905e4da9f512b38f56a53ece9da2072dd164 Mon Sep 17 00:00:00 2001
From: Dominik Brodowski <linux@dominikbrodowski.net>
Date: Sat, 24 Jul 2010 13:14:44 +0200
Subject: pcmcia: use struct resource for PCMCIA devices

Introduce a new field into struct pcmcia_device named "resource" and of
type struct resource *, which contains the IO port ranges allocated for
this device. Memory window ranges and registration with the resource
trees will follow at a later date.

Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
---
 drivers/pcmcia/cs_internal.h     |   4 +-
 drivers/pcmcia/ds.c              |  17 +++--
 drivers/pcmcia/pcmcia_resource.c | 141 ++++++++++++++++++++++-----------------
 include/pcmcia/cs.h              |   3 -
 include/pcmcia/ds.h              |   3 +-
 5 files changed, 95 insertions(+), 73 deletions(-)

(limited to 'include')

diff --git a/drivers/pcmcia/cs_internal.h b/drivers/pcmcia/cs_internal.h
index cebd40da8b9..a85558fc71f 100644
--- a/drivers/pcmcia/cs_internal.h
+++ b/drivers/pcmcia/cs_internal.h
@@ -35,7 +35,9 @@ typedef struct config_t {
 	unsigned int	ConfigBase;
 	unsigned char	Status, Pin, Copy, Option, ExtStatus;
 	unsigned int	CardValues;
-	io_req_t	io;
+
+	struct resource io[MAX_IO_WIN]; /* io ports */
+
 	struct {
 		u_int	Attributes;
 	} irq;
diff --git a/drivers/pcmcia/ds.c b/drivers/pcmcia/ds.c
index bacfc55f202..7ddd19a4033 100644
--- a/drivers/pcmcia/ds.c
+++ b/drivers/pcmcia/ds.c
@@ -531,7 +531,6 @@ static struct pcmcia_device *pcmcia_device_add(struct pcmcia_socket *s,
 	list_for_each_entry(tmp_dev, &s->devices_list, socket_device_list)
 		if (p_dev->func == tmp_dev->func) {
 			p_dev->function_config = tmp_dev->function_config;
-			p_dev->io = tmp_dev->io;
 			p_dev->irq = tmp_dev->irq;
 			kref_get(&p_dev->function_config->ref);
 		}
@@ -544,15 +543,23 @@ static struct pcmcia_device *pcmcia_device_add(struct pcmcia_socket *s,
 			"IRQ setup failed -- device might not work\n");
 
 	if (!p_dev->function_config) {
+		config_t *c;
 		dev_dbg(&p_dev->dev, "creating config_t\n");
-		p_dev->function_config = kzalloc(sizeof(struct config_t),
-						 GFP_KERNEL);
-		if (!p_dev->function_config) {
+		c = kzalloc(sizeof(struct config_t), GFP_KERNEL);
+		if (!c) {
 			mutex_unlock(&s->ops_mutex);
 			goto err_unreg;
 		}
-		kref_init(&p_dev->function_config->ref);
+		p_dev->function_config = c;
+		kref_init(&c->ref);
+		for (i = 0; i < MAX_IO_WIN; i++) {
+			c->io[i].name = dev_name(&p_dev->dev);
+			c->io[i].flags = IORESOURCE_IO;
+		}
 	}
+	for (i = 0; i < MAX_IO_WIN; i++)
+		p_dev->resource[i] = &p_dev->function_config->io[i];
+
 	mutex_unlock(&s->ops_mutex);
 
 	dev_printk(KERN_NOTICE, &p_dev->dev,
diff --git a/drivers/pcmcia/pcmcia_resource.c b/drivers/pcmcia/pcmcia_resource.c
index 563750e77ea..fcd48dae79b 100644
--- a/drivers/pcmcia/pcmcia_resource.c
+++ b/drivers/pcmcia/pcmcia_resource.c
@@ -60,43 +60,60 @@ struct resource *pcmcia_find_mem_region(u_long base, u_long num, u_long align,
  *
  * Special stuff for managing IO windows, because they are scarce
  */
-
-static int alloc_io_space(struct pcmcia_socket *s, u_int attr,
-			  unsigned int *base, unsigned int num, u_int lines)
+static int alloc_io_space(struct pcmcia_socket *s, struct resource *res,
+			unsigned int lines)
 {
 	unsigned int align;
+	unsigned int base = res->start;
+	unsigned int num = res->end;
+	int ret;
+
+	res->flags |= IORESOURCE_IO;
 
-	align = (*base) ? (lines ? 1<<lines : 0) : 1;
+	dev_dbg(&s->dev, "alloc_io_space request for %pR\n", res);
+
+	align = base ? (lines ? 1<<lines : 0) : 1;
 	if (align && (align < num)) {
-		if (*base) {
-			dev_dbg(&s->dev, "odd IO request: num %#x align %#x\n",
-			       num, align);
+		if (base) {
+			dev_dbg(&s->dev, "odd IO request\n");
 			align = 0;
 		} else
 			while (align && (align < num))
 				align <<= 1;
 	}
-	if (*base & ~(align-1)) {
-		dev_dbg(&s->dev, "odd IO request: base %#x align %#x\n",
-		       *base, align);
+	if (base & ~(align-1)) {
+		dev_dbg(&s->dev, "odd IO request\n");
 		align = 0;
 	}
 
-	return s->resource_ops->find_io(s, attr, base, num, align);
+	ret = s->resource_ops->find_io(s, res->flags, &base, num, align);
+	if (ret) {
+		dev_dbg(&s->dev, "alloc_io_space request returned %d", ret);
+		return -EINVAL;
+	}
+
+	res->start = base;
+	res->end = res->start + num - 1;
+	dev_dbg(&s->dev, "alloc_io_space request returned %pR, %d\n", res, ret);
+	return 0;
 } /* alloc_io_space */
 
 
-static void release_io_space(struct pcmcia_socket *s, unsigned int base,
-			     unsigned int num)
+static void release_io_space(struct pcmcia_socket *s, struct resource *res)
 {
+	resource_size_t num = resource_size(res);
 	int i;
 
+	dev_dbg(&s->dev, "release_io_space for %pR\n", res);
+
 	for (i = 0; i < MAX_IO_WIN; i++) {
 		if (!s->io[i].res)
 			continue;
-		if ((s->io[i].res->start <= base) &&
-		    (s->io[i].res->end >= base+num-1)) {
+		if ((s->io[i].res->start <= res->start) &&
+		    (s->io[i].res->end >= res->end)) {
 			s->io[i].InUse -= num;
+			res->start = res->end = 0;
+			res->flags = IORESOURCE_IO;
 			/* Free the window if no one else is using it */
 			if (s->io[i].InUse == 0) {
 				release_resource(s->io[i].res);
@@ -329,31 +346,25 @@ int pcmcia_release_configuration(struct pcmcia_device *p_dev)
  * don't bother checking the port ranges against the current socket
  * values.
  */
-static int pcmcia_release_io(struct pcmcia_device *p_dev, io_req_t *req)
+static int pcmcia_release_io(struct pcmcia_device *p_dev)
 {
 	struct pcmcia_socket *s = p_dev->socket;
 	int ret = -EINVAL;
 	config_t *c;
 
 	mutex_lock(&s->ops_mutex);
-	c = p_dev->function_config;
-
 	if (!p_dev->_io)
 		goto out;
 
-	p_dev->_io = 0;
+	c = p_dev->function_config;
 
-	if ((c->io.BasePort1 != req->BasePort1) ||
-	    (c->io.NumPorts1 != req->NumPorts1) ||
-	    (c->io.BasePort2 != req->BasePort2) ||
-	    (c->io.NumPorts2 != req->NumPorts2))
-		goto out;
+	release_io_space(s, &c->io[0]);
 
-	c->state &= ~CONFIG_IO_REQ;
+	if (c->io[1].end)
+		release_io_space(s, &c->io[1]);
 
-	release_io_space(s, req->BasePort1, req->NumPorts1);
-	if (req->NumPorts2)
-		release_io_space(s, req->BasePort2, req->NumPorts2);
+	p_dev->_io = 0;
+	c->state &= ~CONFIG_IO_REQ;
 
 out:
 	mutex_unlock(&s->ops_mutex);
@@ -486,13 +497,13 @@ int pcmcia_request_configuration(struct pcmcia_device *p_dev,
 		pcmcia_write_cis_mem(s, 1, (base + CISREG_ESR)>>1, 1, &c->ExtStatus);
 	}
 	if (req->Present & PRESENT_IOBASE_0) {
-		u_char b = c->io.BasePort1 & 0xff;
+		u8 b = c->io[0].start & 0xff;
 		pcmcia_write_cis_mem(s, 1, (base + CISREG_IOBASE_0)>>1, 1, &b);
-		b = (c->io.BasePort1 >> 8) & 0xff;
+		b = (c->io[0].start >> 8) & 0xff;
 		pcmcia_write_cis_mem(s, 1, (base + CISREG_IOBASE_1)>>1, 1, &b);
 	}
 	if (req->Present & PRESENT_IOSIZE) {
-		u_char b = c->io.NumPorts1 + c->io.NumPorts2 - 1;
+		u8 b = resource_size(&c->io[0]) + resource_size(&c->io[1]) - 1;
 		pcmcia_write_cis_mem(s, 1, (base + CISREG_IOSIZE)>>1, 1, &b);
 	}
 
@@ -526,28 +537,42 @@ int pcmcia_request_configuration(struct pcmcia_device *p_dev,
 EXPORT_SYMBOL(pcmcia_request_configuration);
 
 
-/** pcmcia_request_io
+/**
+ * pcmcia_request_io() - attempt to reserve port ranges for PCMCIA devices
+ *
+ * pcmcia_request_io() attepts to reserve the IO port ranges specified in
+ * struct pcmcia_device *p_dev->resource[0] and *p_dev->resource[1]. The
+ * "start" value is the requested start of the IO port resource; "end"
+ * relfects the number of ports requested.
  *
- * Request_io() reserves ranges of port addresses for a socket.
- * I have not implemented range sharing or alias addressing.
+ * If io_req_t is passed, those values are converted automatically.
  */
 int pcmcia_request_io(struct pcmcia_device *p_dev, io_req_t *req)
 {
 	struct pcmcia_socket *s = p_dev->socket;
 	config_t *c;
 	int ret = -EINVAL;
+	unsigned int lines = req->IOAddrLines;
 
 	mutex_lock(&s->ops_mutex);
 
 	if (!(s->state & SOCKET_PRESENT)) {
-		dev_dbg(&s->dev, "No card present\n");
+		dev_dbg(&s->dev, "pcmcia_request_io: No card present\n");
 		goto out;
 	}
 
-	if (!req)
-		goto out;
-
 	c = p_dev->function_config;
+	if (req) {
+		c->io[0].start = req->BasePort1;
+		c->io[0].end = req->NumPorts1;
+		c->io[0].flags |= req->Attributes1;
+		c->io[1].start = req->BasePort2;
+		c->io[1].end = req->NumPorts2;
+		c->io[1].flags |= req->Attributes2;
+	}
+
+	dev_dbg(&s->dev, "pcmcia_request_io: %pR , %pR", &c->io[0], &c->io[1]);
+
 	if (c->state & CONFIG_LOCKED) {
 		dev_dbg(&s->dev, "Configuration is locked\n");
 		goto out;
@@ -556,40 +581,30 @@ int pcmcia_request_io(struct pcmcia_device *p_dev, io_req_t *req)
 		dev_dbg(&s->dev, "IO already configured\n");
 		goto out;
 	}
-	if (req->Attributes1 & (IO_SHARED | IO_FORCE_ALIAS_ACCESS)) {
-		dev_dbg(&s->dev, "bad attribute setting for IO region 1\n");
-		goto out;
-	}
-	if ((req->NumPorts2 > 0) &&
-	    (req->Attributes2 & (IO_SHARED | IO_FORCE_ALIAS_ACCESS))) {
-		dev_dbg(&s->dev, "bad attribute setting for IO region 2\n");
-		goto out;
-	}
 
-	dev_dbg(&s->dev, "trying to allocate resource 1\n");
-	ret = alloc_io_space(s, req->Attributes1, &req->BasePort1,
-			     req->NumPorts1, req->IOAddrLines);
-	if (ret) {
-		dev_dbg(&s->dev, "allocation of resource 1 failed\n");
+	ret = alloc_io_space(s, &c->io[0], lines);
+	if (ret)
 		goto out;
-	}
 
-	if (req->NumPorts2) {
-		dev_dbg(&s->dev, "trying to allocate resource 2\n");
-		ret = alloc_io_space(s, req->Attributes2, &req->BasePort2,
-				     req->NumPorts2, req->IOAddrLines);
+	if (c->io[1].end) {
+		ret = alloc_io_space(s, &c->io[1], lines);
 		if (ret) {
-			dev_dbg(&s->dev, "allocation of resource 2 failed\n");
-			release_io_space(s, req->BasePort1, req->NumPorts1);
+			release_io_space(s, &c->io[0]);
 			goto out;
 		}
-	}
+	} else
+		c->io[1].start = 0;
 
-	c->io = *req;
 	c->state |= CONFIG_IO_REQ;
 	p_dev->_io = 1;
-	dev_dbg(&s->dev, "allocating resources succeeded: %d\n", ret);
 
+	if (!ret) {
+		req->BasePort1 = c->io[0].start;
+		req->BasePort2 = c->io[1].start;
+	}
+
+	dev_dbg(&s->dev, "pcmcia_request_io succeeded: %pR , %pR",
+		&c->io[0], &c->io[1]);
 out:
 	mutex_unlock(&s->ops_mutex);
 
@@ -869,7 +884,7 @@ EXPORT_SYMBOL(pcmcia_request_window);
 void pcmcia_disable_device(struct pcmcia_device *p_dev)
 {
 	pcmcia_release_configuration(p_dev);
-	pcmcia_release_io(p_dev, &p_dev->io);
+	pcmcia_release_io(p_dev);
 	if (p_dev->_irq) {
 		free_irq(p_dev->irq, p_dev->priv);
 		p_dev->_irq = 0;
diff --git a/include/pcmcia/cs.h b/include/pcmcia/cs.h
index 7be0fcf7850..0cd8c70d8aa 100644
--- a/include/pcmcia/cs.h
+++ b/include/pcmcia/cs.h
@@ -68,9 +68,6 @@ typedef struct io_req_t {
 } io_req_t;
 
 /* Attributes for RequestIO and ReleaseIO */
-#define IO_SHARED		0x01
-#define IO_FIRST_SHARED		0x02
-#define IO_FORCE_ALIAS_ACCESS	0x04
 #define IO_DATA_PATH_WIDTH	0x18
 #define IO_DATA_PATH_WIDTH_8	0x00
 #define IO_DATA_PATH_WIDTH_16	0x08
diff --git a/include/pcmcia/ds.h b/include/pcmcia/ds.h
index d494ce417b4..3dafd7db34d 100644
--- a/include/pcmcia/ds.h
+++ b/include/pcmcia/ds.h
@@ -80,13 +80,13 @@ struct pcmcia_device {
 	struct list_head	socket_device_list;
 
 	/* deprecated, will be cleaned up soon */
-	u_int			open;
 	io_req_t		io;
 	config_req_t		conf;
 	window_handle_t		win;
 
 	/* device setup */
 	unsigned int		irq;
+	struct resource		*resource[MAX_IO_WIN];
 
 	/* Is the device suspended? */
 	u16			suspended:1;
@@ -120,6 +120,7 @@ struct pcmcia_device {
 
 	/* data private to drivers */
 	void			*priv;
+	unsigned int		open;
 };
 
 #define to_pcmcia_dev(n) container_of(n, struct pcmcia_device, dev)
-- 
cgit v1.2.3-70-g09d2


From 90abdc3b973229bae98dd96649d9f7106cc177a4 Mon Sep 17 00:00:00 2001
From: Dominik Brodowski <linux@dominikbrodowski.net>
Date: Sat, 24 Jul 2010 17:23:51 +0200
Subject: pcmcia: do not use io_req_t when calling pcmcia_request_io()

Instead of io_req_t, drivers are now requested to fill out
struct pcmcia_device *p_dev->resource[0,1] for up to two ioport
ranges. After a call to pcmcia_request_io(), the ports found there
are reserved, after calling pcmcia_request_configuration(), they may
be used.

CC: netdev@vger.kernel.org
CC: linux-wireless@vger.kernel.org
CC: linux-ide@vger.kernel.org
CC: linux-usb@vger.kernel.org
CC: laforge@gnumonks.org
CC: linux-mtd@lists.infradead.org
CC: alsa-devel@alsa-project.org
CC: linux-serial@vger.kernel.org
CC: Michael Buesch <mb@bu3sch.de>
Acked-by: Marcel Holtmann <marcel@holtmann.org> (for drivers/bluetooth/)
Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
---
 Documentation/pcmcia/driver-changes.txt          |  7 ++++
 drivers/ata/pata_pcmcia.c                        | 29 ++++++++--------
 drivers/bluetooth/bluecard_cs.c                  | 13 ++++---
 drivers/bluetooth/bt3c_cs.c                      | 18 +++++-----
 drivers/bluetooth/btuart_cs.c                    | 18 +++++-----
 drivers/bluetooth/dtl1_cs.c                      | 21 ++++++------
 drivers/char/pcmcia/cm4000_cs.c                  | 17 ++++------
 drivers/char/pcmcia/cm4040_cs.c                  | 20 ++++-------
 drivers/char/pcmcia/ipwireless/main.c            | 10 +++---
 drivers/char/pcmcia/synclink_cs.c                | 21 +++++-------
 drivers/ide/ide-cs.c                             | 30 +++++++++--------
 drivers/isdn/hardware/avm/avm_cs.c               | 15 +++------
 drivers/isdn/hisax/avma1_cs.c                    | 20 +++++------
 drivers/isdn/hisax/elsa_cs.c                     | 15 +++++----
 drivers/isdn/hisax/sedlbauer_cs.c                | 28 +++++++--------
 drivers/isdn/hisax/teles_cs.c                    | 15 +++++----
 drivers/net/pcmcia/3c574_cs.c                    | 11 +++---
 drivers/net/pcmcia/3c589_cs.c                    | 11 +++---
 drivers/net/pcmcia/axnet_cs.c                    | 40 +++++++++++-----------
 drivers/net/pcmcia/com20020_cs.c                 | 19 ++++++-----
 drivers/net/pcmcia/fmvj18x_cs.c                  | 31 ++++++++---------
 drivers/net/pcmcia/ibmtr_cs.c                    | 14 ++++----
 drivers/net/pcmcia/nmclan_cs.c                   |  8 ++---
 drivers/net/pcmcia/pcnet_cs.c                    | 40 +++++++++++-----------
 drivers/net/pcmcia/smc91c92_cs.c                 | 43 ++++++++++++------------
 drivers/net/pcmcia/xirc2ps_cs.c                  | 34 +++++++++----------
 drivers/net/wireless/airo_cs.c                   | 22 ++++++------
 drivers/net/wireless/atmel_cs.c                  | 22 ++++++------
 drivers/net/wireless/b43/pcmcia.c                |  4 ---
 drivers/net/wireless/hostap/hostap_cs.c          | 28 ++++++---------
 drivers/net/wireless/libertas/if_cs.c            |  8 ++---
 drivers/net/wireless/orinoco/orinoco_cs.c        | 24 ++++++-------
 drivers/net/wireless/orinoco/spectrum_cs.c       | 24 ++++++-------
 drivers/net/wireless/ray_cs.c                    |  5 ++-
 drivers/net/wireless/wl3501_cs.c                 | 12 +++----
 drivers/parport/parport_cs.c                     | 16 ++++-----
 drivers/pcmcia/pcmcia_resource.c                 | 37 ++++++--------------
 drivers/scsi/pcmcia/aha152x_stub.c               | 14 ++++----
 drivers/scsi/pcmcia/fdomain_stub.c               | 10 +++---
 drivers/scsi/pcmcia/nsp_cs.c                     | 30 ++++++++---------
 drivers/scsi/pcmcia/qlogic_stub.c                | 14 ++++----
 drivers/scsi/pcmcia/sym53c500_cs.c               | 14 ++++----
 drivers/serial/serial_cs.c                       | 37 ++++++++++----------
 drivers/staging/comedi/drivers/cb_das16_cs.c     | 24 ++++++-------
 drivers/staging/comedi/drivers/das08_cs.c        | 23 ++++++-------
 drivers/staging/comedi/drivers/ni_daq_700.c      | 24 ++++++-------
 drivers/staging/comedi/drivers/ni_daq_dio24.c    | 24 ++++++-------
 drivers/staging/comedi/drivers/ni_labpc_cs.c     | 24 ++++++-------
 drivers/staging/comedi/drivers/ni_mio_cs.c       | 13 ++++---
 drivers/staging/comedi/drivers/quatech_daqp_cs.c | 24 ++++++-------
 drivers/staging/wlags49_h2/wl_cs.c               |  8 ++---
 drivers/telephony/ixj_pcmcia.c                   | 16 ++++-----
 drivers/usb/host/sl811_cs.c                      | 12 +++----
 include/pcmcia/cs.h                              | 17 ----------
 include/pcmcia/ds.h                              | 21 ++++++++++--
 sound/pcmcia/pdaudiocf/pdaudiocf.c               |  6 ++--
 sound/pcmcia/vx/vxpocket.c                       |  6 ++--
 57 files changed, 527 insertions(+), 584 deletions(-)

(limited to 'include')

diff --git a/Documentation/pcmcia/driver-changes.txt b/Documentation/pcmcia/driver-changes.txt
index ff5f0be2470..26c0f9c0054 100644
--- a/Documentation/pcmcia/driver-changes.txt
+++ b/Documentation/pcmcia/driver-changes.txt
@@ -1,4 +1,11 @@
 This file details changes in 2.6 which affect PCMCIA card driver authors:
+* pcmcia_request_io changes (as of 2.6.36)
+   Instead of io_req_t, drivers are now requested to fill out
+   struct pcmcia_device *p_dev->resource[0,1] for up to two ioport
+   ranges. After a call to pcmcia_request_io(), the ports found there
+   are reserved, after calling pcmcia_request_configuration(), they may
+   be used.
+
 * No dev_info_t, no cs_types.h (as of 2.6.36)
    dev_info_t and a few other typedefs are removed. No longer use them
    in PCMCIA device drivers. Also, do not include pcmcia/cs_types.h, as
diff --git a/drivers/ata/pata_pcmcia.c b/drivers/ata/pata_pcmcia.c
index 1fcd0659b3f..e944aa0c551 100644
--- a/drivers/ata/pata_pcmcia.c
+++ b/drivers/ata/pata_pcmcia.c
@@ -200,21 +200,23 @@ static int pcmcia_check_one_config(struct pcmcia_device *pdev,
 
 	if ((cfg->io.nwin > 0) || (dflt->io.nwin > 0)) {
 		cistpl_io_t *io = (cfg->io.nwin) ? &cfg->io : &dflt->io;
-		pdev->io.BasePort1 = io->win[0].base;
-		pdev->io.IOAddrLines = io->flags & CISTPL_IO_LINES_MASK;
-		if (!(io->flags & CISTPL_IO_16BIT))
-			pdev->io.Attributes1 = IO_DATA_PATH_WIDTH_8;
+		pdev->io_lines = io->flags & CISTPL_IO_LINES_MASK;
+		pdev->resource[0]->start = io->win[0].base;
+		if (!(io->flags & CISTPL_IO_16BIT)) {
+			pdev->resource[0]->flags &= ~IO_DATA_PATH_WIDTH;
+			pdev->resource[0]->flags |= IO_DATA_PATH_WIDTH_8;
+		}
 		if (io->nwin == 2) {
-			pdev->io.NumPorts1 = 8;
-			pdev->io.BasePort2 = io->win[1].base;
-			pdev->io.NumPorts2 = (stk->is_kme) ? 2 : 1;
-			if (pcmcia_request_io(pdev, &pdev->io) != 0)
+			pdev->resource[0]->end = 8;
+			pdev->resource[1]->start = io->win[1].base;
+			pdev->resource[1]->end = (stk->is_kme) ? 2 : 1;
+			if (pcmcia_request_io(pdev) != 0)
 				return -ENODEV;
 			stk->ctl_base = pdev->resource[1]->start;
 		} else if ((io->nwin == 1) && (io->win[0].len >= 16)) {
-			pdev->io.NumPorts1 = io->win[0].len;
-			pdev->io.NumPorts2 = 0;
-			if (pcmcia_request_io(pdev, &pdev->io) != 0)
+			pdev->resource[0]->end = io->win[0].len;
+			pdev->resource[1]->end = 0;
+			if (pcmcia_request_io(pdev) != 0)
 				return -ENODEV;
 			stk->ctl_base = pdev->resource[0]->start + 0x0e;
 		} else
@@ -245,9 +247,8 @@ static int pcmcia_init_one(struct pcmcia_device *pdev)
 	struct ata_port_operations *ops = &pcmcia_port_ops;
 
 	/* Set up attributes in order to probe card and get resources */
-	pdev->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO;
-	pdev->io.Attributes2 = IO_DATA_PATH_WIDTH_8;
-	pdev->io.IOAddrLines = 3;
+	pdev->resource[0]->flags |= IO_DATA_PATH_WIDTH_AUTO;
+	pdev->resource[1]->flags |= IO_DATA_PATH_WIDTH_8;
 	pdev->conf.Attributes = CONF_ENABLE_IRQ;
 	pdev->conf.IntType = INT_MEMORY_AND_IO;
 
diff --git a/drivers/bluetooth/bluecard_cs.c b/drivers/bluetooth/bluecard_cs.c
index 24d2007139e..d52e90a5a61 100644
--- a/drivers/bluetooth/bluecard_cs.c
+++ b/drivers/bluetooth/bluecard_cs.c
@@ -865,9 +865,6 @@ static int bluecard_probe(struct pcmcia_device *link)
 	info->p_dev = link;
 	link->priv = info;
 
-	link->io.Attributes1 = IO_DATA_PATH_WIDTH_8;
-	link->io.NumPorts1 = 8;
-
 	link->conf.Attributes = CONF_ENABLE_IRQ;
 	link->conf.IntType = INT_MEMORY_AND_IO;
 
@@ -890,12 +887,14 @@ static int bluecard_config(struct pcmcia_device *link)
 	int i, n;
 
 	link->conf.ConfigIndex = 0x20;
-	link->io.NumPorts1 = 64;
-	link->io.IOAddrLines = 6;
+
+	link->resource[0]->flags |= IO_DATA_PATH_WIDTH_8;
+	link->resource[0]->end = 64;
+	link->io_lines = 6;
 
 	for (n = 0; n < 0x400; n += 0x40) {
-		link->io.BasePort1 = n ^ 0x300;
-		i = pcmcia_request_io(link, &link->io);
+		link->resource[0]->start = n ^ 0x300;
+		i = pcmcia_request_io(link);
 		if (i == 0)
 			break;
 	}
diff --git a/drivers/bluetooth/bt3c_cs.c b/drivers/bluetooth/bt3c_cs.c
index 8ab494c0c17..7ab8f29d5e0 100644
--- a/drivers/bluetooth/bt3c_cs.c
+++ b/drivers/bluetooth/bt3c_cs.c
@@ -657,8 +657,8 @@ static int bt3c_probe(struct pcmcia_device *link)
 	info->p_dev = link;
 	link->priv = info;
 
-	link->io.Attributes1 = IO_DATA_PATH_WIDTH_8;
-	link->io.NumPorts1 = 8;
+	link->resource[0]->flags |= IO_DATA_PATH_WIDTH_8;
+	link->resource[0]->end = 8;
 
 	link->conf.Attributes = CONF_ENABLE_IRQ;
 	link->conf.IntType = INT_MEMORY_AND_IO;
@@ -683,14 +683,14 @@ static int bt3c_check_config(struct pcmcia_device *p_dev,
 {
 	unsigned long try = (unsigned long) priv_data;
 
+	p_dev->io_lines = (try == 0) ? 16 : cf->io.flags & CISTPL_IO_LINES_MASK;
+
 	if (cf->vpp1.present & (1 << CISTPL_POWER_VNOM))
 		p_dev->conf.Vpp = cf->vpp1.param[CISTPL_POWER_VNOM] / 10000;
 	if ((cf->io.nwin > 0) && (cf->io.win[0].len == 8) &&
 	    (cf->io.win[0].base != 0)) {
-		p_dev->io.BasePort1 = cf->io.win[0].base;
-		p_dev->io.IOAddrLines = (try == 0) ? 16 :
-			cf->io.flags & CISTPL_IO_LINES_MASK;
-		if (!pcmcia_request_io(p_dev, &p_dev->io))
+		p_dev->resource[0]->start = cf->io.win[0].base;
+		if (!pcmcia_request_io(p_dev))
 			return 0;
 	}
 	return -ENODEV;
@@ -707,9 +707,9 @@ static int bt3c_check_config_notpicky(struct pcmcia_device *p_dev,
 
 	if ((cf->io.nwin > 0) && ((cf->io.flags & CISTPL_IO_LINES_MASK) <= 3)) {
 		for (j = 0; j < 5; j++) {
-			p_dev->io.BasePort1 = base[j];
-			p_dev->io.IOAddrLines = base[j] ? 16 : 3;
-			if (!pcmcia_request_io(p_dev, &p_dev->io))
+			p_dev->resource[0]->start = base[j];
+			p_dev->io_lines = base[j] ? 16 : 3;
+			if (!pcmcia_request_io(p_dev))
 				return 0;
 		}
 	}
diff --git a/drivers/bluetooth/btuart_cs.c b/drivers/bluetooth/btuart_cs.c
index 7e770d40368..1c4f5e863b0 100644
--- a/drivers/bluetooth/btuart_cs.c
+++ b/drivers/bluetooth/btuart_cs.c
@@ -586,8 +586,8 @@ static int btuart_probe(struct pcmcia_device *link)
 	info->p_dev = link;
 	link->priv = info;
 
-	link->io.Attributes1 = IO_DATA_PATH_WIDTH_8;
-	link->io.NumPorts1 = 8;
+	link->resource[0]->flags |= IO_DATA_PATH_WIDTH_8;
+	link->resource[0]->end = 8;
 
 	link->conf.Attributes = CONF_ENABLE_IRQ;
 	link->conf.IntType = INT_MEMORY_AND_IO;
@@ -612,14 +612,14 @@ static int btuart_check_config(struct pcmcia_device *p_dev,
 {
 	int *try = priv_data;
 
+	p_dev->io_lines = (try == 0) ? 16 : cf->io.flags & CISTPL_IO_LINES_MASK;
+
 	if (cf->vpp1.present & (1 << CISTPL_POWER_VNOM))
 		p_dev->conf.Vpp = cf->vpp1.param[CISTPL_POWER_VNOM] / 10000;
 	if ((cf->io.nwin > 0) && (cf->io.win[0].len == 8) &&
 	    (cf->io.win[0].base != 0)) {
-		p_dev->io.BasePort1 = cf->io.win[0].base;
-		p_dev->io.IOAddrLines = (*try == 0) ? 16 :
-			cf->io.flags & CISTPL_IO_LINES_MASK;
-		if (!pcmcia_request_io(p_dev, &p_dev->io))
+		p_dev->resource[0]->start = cf->io.win[0].base;
+		if (!pcmcia_request_io(p_dev))
 			return 0;
 	}
 	return -ENODEV;
@@ -636,9 +636,9 @@ static int btuart_check_config_notpicky(struct pcmcia_device *p_dev,
 
 	if ((cf->io.nwin > 0) && ((cf->io.flags & CISTPL_IO_LINES_MASK) <= 3)) {
 		for (j = 0; j < 5; j++) {
-			p_dev->io.BasePort1 = base[j];
-			p_dev->io.IOAddrLines = base[j] ? 16 : 3;
-			if (!pcmcia_request_io(p_dev, &p_dev->io))
+			p_dev->resource[0]->start = base[j];
+			p_dev->io_lines = base[j] ? 16 : 3;
+			if (!pcmcia_request_io(p_dev))
 				return 0;
 		}
 	}
diff --git a/drivers/bluetooth/dtl1_cs.c b/drivers/bluetooth/dtl1_cs.c
index bfe9313516f..18ecc5734e9 100644
--- a/drivers/bluetooth/dtl1_cs.c
+++ b/drivers/bluetooth/dtl1_cs.c
@@ -572,8 +572,8 @@ static int dtl1_probe(struct pcmcia_device *link)
 	info->p_dev = link;
 	link->priv = info;
 
-	link->io.Attributes1 = IO_DATA_PATH_WIDTH_8;
-	link->io.NumPorts1 = 8;
+	link->resource[0]->flags |= IO_DATA_PATH_WIDTH_8;
+	link->resource[0]->end = 8;
 
 	link->conf.Attributes = CONF_ENABLE_IRQ;
 	link->conf.IntType = INT_MEMORY_AND_IO;
@@ -597,14 +597,13 @@ static int dtl1_confcheck(struct pcmcia_device *p_dev,
 			  unsigned int vcc,
 			  void *priv_data)
 {
-	if ((cf->io.nwin == 1) && (cf->io.win[0].len > 8)) {
-		p_dev->io.BasePort1 = cf->io.win[0].base;
-		p_dev->io.NumPorts1 = cf->io.win[0].len;	/*yo */
-		p_dev->io.IOAddrLines = cf->io.flags & CISTPL_IO_LINES_MASK;
-		if (!pcmcia_request_io(p_dev, &p_dev->io))
-			return 0;
-	}
-	return -ENODEV;
+	if ((cf->io.nwin != 1) || (cf->io.win[0].len <= 8))
+		return -ENODEV;
+
+	p_dev->resource[0]->start = cf->io.win[0].base;
+	p_dev->resource[0]->end = cf->io.win[0].len;	/*yo */
+	p_dev->io_lines = cf->io.flags & CISTPL_IO_LINES_MASK;
+	return pcmcia_request_io(p_dev);
 }
 
 static int dtl1_config(struct pcmcia_device *link)
@@ -613,7 +612,7 @@ static int dtl1_config(struct pcmcia_device *link)
 	int i;
 
 	/* Look for a generic full-sized window */
-	link->io.NumPorts1 = 8;
+	link->resource[0]->end = 8;
 	if (pcmcia_loop_config(link, dtl1_confcheck, NULL) < 0)
 		goto failed;
 
diff --git a/drivers/char/pcmcia/cm4000_cs.c b/drivers/char/pcmcia/cm4000_cs.c
index 18484edc125..ec73d9f6d9e 100644
--- a/drivers/char/pcmcia/cm4000_cs.c
+++ b/drivers/char/pcmcia/cm4000_cs.c
@@ -1751,17 +1751,12 @@ static int cm4000_config_check(struct pcmcia_device *p_dev,
 	if (!cfg->io.nwin)
 		return -ENODEV;
 
-	/* Get the IOaddr */
-	p_dev->io.BasePort1 = cfg->io.win[0].base;
-	p_dev->io.NumPorts1 = cfg->io.win[0].len;
-	p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO;
-	if (!(cfg->io.flags & CISTPL_IO_8BIT))
-		p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_16;
-	if (!(cfg->io.flags & CISTPL_IO_16BIT))
-		p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_8;
-	p_dev->io.IOAddrLines = cfg->io.flags & CISTPL_IO_LINES_MASK;
-
-	return pcmcia_request_io(p_dev, &p_dev->io);
+	p_dev->resource[0]->start = cfg->io.win[0].base;
+	p_dev->resource[0]->end = cfg->io.win[0].len;
+	p_dev->resource[0]->flags |= pcmcia_io_cfg_data_width(cfg->io.flags);
+	p_dev->io_lines = cfg->io.flags & CISTPL_IO_LINES_MASK;
+
+	return pcmcia_request_io(p_dev);
 }
 
 static int cm4000_config(struct pcmcia_device * link, int devno)
diff --git a/drivers/char/pcmcia/cm4040_cs.c b/drivers/char/pcmcia/cm4040_cs.c
index a6bbf199dde..815cde1d057 100644
--- a/drivers/char/pcmcia/cm4040_cs.c
+++ b/drivers/char/pcmcia/cm4040_cs.c
@@ -527,16 +527,12 @@ static int cm4040_config_check(struct pcmcia_device *p_dev,
 		return -ENODEV;
 
 	/* Get the IOaddr */
-	p_dev->io.BasePort1 = cfg->io.win[0].base;
-	p_dev->io.NumPorts1 = cfg->io.win[0].len;
-	p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO;
-	if (!(cfg->io.flags & CISTPL_IO_8BIT))
-		p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_16;
-	if (!(cfg->io.flags & CISTPL_IO_16BIT))
-		p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_8;
-	p_dev->io.IOAddrLines = cfg->io.flags & CISTPL_IO_LINES_MASK;
-
-	rc = pcmcia_request_io(p_dev, &p_dev->io);
+	p_dev->resource[0]->start = cfg->io.win[0].base;
+	p_dev->resource[0]->end = cfg->io.win[0].len;
+	p_dev->resource[0]->flags |= pcmcia_io_cfg_data_width(cfg->io.flags);
+	p_dev->io_lines = cfg->io.flags & CISTPL_IO_LINES_MASK;
+	rc = pcmcia_request_io(p_dev);
+
 	dev_printk(KERN_INFO, &p_dev->dev,
 		   "pcmcia_request_io returned 0x%x\n", rc);
 	return rc;
@@ -548,10 +544,6 @@ static int reader_config(struct pcmcia_device *link, int devno)
 	struct reader_dev *dev;
 	int fail_rc;
 
-	link->io.BasePort2 = 0;
-	link->io.NumPorts2 = 0;
-	link->io.Attributes2 = 0;
-
 	if (pcmcia_loop_config(link, cm4040_config_check, NULL))
 		goto cs_release;
 
diff --git a/drivers/char/pcmcia/ipwireless/main.c b/drivers/char/pcmcia/ipwireless/main.c
index 9467994d556..5f87b9f7b6d 100644
--- a/drivers/char/pcmcia/ipwireless/main.c
+++ b/drivers/char/pcmcia/ipwireless/main.c
@@ -88,15 +88,15 @@ static int ipwireless_probe(struct pcmcia_device *p_dev,
 	memreq_t memreq_common_memory;
 	int ret;
 
-	p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO;
-	p_dev->io.BasePort1 = cfg->io.win[0].base;
-	p_dev->io.NumPorts1 = cfg->io.win[0].len;
-	p_dev->io.IOAddrLines = 16;
+	p_dev->resource[0]->flags |= IO_DATA_PATH_WIDTH_AUTO;
+	p_dev->resource[0]->start = cfg->io.win[0].base;
+	p_dev->resource[0]->end = cfg->io.win[0].len;
 
 	/* 0x40 causes it to generate level mode interrupts. */
 	/* 0x04 enables IREQ pin. */
 	p_dev->conf.ConfigIndex = cfg->index | 0x44;
-	ret = pcmcia_request_io(p_dev, &p_dev->io);
+	p_dev->io_lines = 16;
+	ret = pcmcia_request_io(p_dev);
 	if (ret)
 		return ret;
 
diff --git a/drivers/char/pcmcia/synclink_cs.c b/drivers/char/pcmcia/synclink_cs.c
index 8ded9b02b9b..9ecd6bef5d3 100644
--- a/drivers/char/pcmcia/synclink_cs.c
+++ b/drivers/char/pcmcia/synclink_cs.c
@@ -571,18 +571,15 @@ static int mgslpc_ioprobe(struct pcmcia_device *p_dev,
 			  unsigned int vcc,
 			  void *priv_data)
 {
-	if (cfg->io.nwin > 0) {
-		p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO;
-		if (!(cfg->io.flags & CISTPL_IO_8BIT))
-			p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_16;
-		if (!(cfg->io.flags & CISTPL_IO_16BIT))
-			p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_8;
-		p_dev->io.IOAddrLines = cfg->io.flags & CISTPL_IO_LINES_MASK;
-		p_dev->io.BasePort1 = cfg->io.win[0].base;
-		p_dev->io.NumPorts1 = cfg->io.win[0].len;
-		return pcmcia_request_io(p_dev, &p_dev->io);
-	}
-	return -ENODEV;
+	if (!cfg->io.nwin)
+		return -ENODEV;
+
+	p_dev->resource[0]->start = cfg->io.win[0].base;
+	p_dev->resource[0]->end = cfg->io.win[0].len;
+	p_dev->resource[0]->flags |= pcmcia_io_cfg_data_width(cfg->io.flags);
+	p_dev->io_lines = cfg->io.flags & CISTPL_IO_LINES_MASK;
+
+	return pcmcia_request_io(p_dev);
 }
 
 static int mgslpc_config(struct pcmcia_device *link)
diff --git a/drivers/ide/ide-cs.c b/drivers/ide/ide-cs.c
index 6be0e5f108b..2a4cb9c18f0 100644
--- a/drivers/ide/ide-cs.c
+++ b/drivers/ide/ide-cs.c
@@ -97,9 +97,8 @@ static int ide_probe(struct pcmcia_device *link)
     info->p_dev = link;
     link->priv = info;
 
-    link->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO;
-    link->io.Attributes2 = IO_DATA_PATH_WIDTH_8;
-    link->io.IOAddrLines = 3;
+    link->resource[0]->flags |= IO_DATA_PATH_WIDTH_AUTO;
+    link->resource[1]->flags |= IO_DATA_PATH_WIDTH_8;
     link->conf.Attributes = CONF_ENABLE_IRQ;
     link->conf.IntType = INT_MEMORY_AND_IO;
 
@@ -228,22 +227,25 @@ static int pcmcia_check_one_config(struct pcmcia_device *pdev,
 
 	if ((cfg->io.nwin > 0) || (dflt->io.nwin > 0)) {
 		cistpl_io_t *io = (cfg->io.nwin) ? &cfg->io : &dflt->io;
+		pdev->io_lines = io->flags & CISTPL_IO_LINES_MASK;
+
 		pdev->conf.ConfigIndex = cfg->index;
-		pdev->io.BasePort1 = io->win[0].base;
-		pdev->io.IOAddrLines = io->flags & CISTPL_IO_LINES_MASK;
-		if (!(io->flags & CISTPL_IO_16BIT))
-			pdev->io.Attributes1 = IO_DATA_PATH_WIDTH_8;
+		pdev->resource[0]->start = io->win[0].base;
+		if (!(io->flags & CISTPL_IO_16BIT)) {
+			pdev->resource[0]->flags &= ~IO_DATA_PATH_WIDTH;
+			pdev->resource[0]->flags |= IO_DATA_PATH_WIDTH_8;
+		}
 		if (io->nwin == 2) {
-			pdev->io.NumPorts1 = 8;
-			pdev->io.BasePort2 = io->win[1].base;
-			pdev->io.NumPorts2 = (stk->is_kme) ? 2 : 1;
-			if (pcmcia_request_io(pdev, &pdev->io) != 0)
+			pdev->resource[0]->end = 8;
+			pdev->resource[1]->start = io->win[1].base;
+			pdev->resource[1]->end = (stk->is_kme) ? 2 : 1;
+			if (pcmcia_request_io(pdev) != 0)
 				return -ENODEV;
 			stk->ctl_base = pdev->resource[1]->start;
 		} else if ((io->nwin == 1) && (io->win[0].len >= 16)) {
-			pdev->io.NumPorts1 = io->win[0].len;
-			pdev->io.NumPorts2 = 0;
-			if (pcmcia_request_io(pdev, &pdev->io) != 0)
+			pdev->resource[0]->end = io->win[0].len;
+			pdev->resource[1]->end = 0;
+			if (pcmcia_request_io(pdev) != 0)
 				return -ENODEV;
 			stk->ctl_base = pdev->resource[0]->start + 0x0e;
 		} else
diff --git a/drivers/isdn/hardware/avm/avm_cs.c b/drivers/isdn/hardware/avm/avm_cs.c
index 7c8c51f2200..09b1795516f 100644
--- a/drivers/isdn/hardware/avm/avm_cs.c
+++ b/drivers/isdn/hardware/avm/avm_cs.c
@@ -75,9 +75,8 @@ static int avmcs_probe(struct pcmcia_device *p_dev)
 {
 
     /* The io structure describes IO port mapping */
-    p_dev->io.NumPorts1 = 16;
-    p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_8;
-    p_dev->io.NumPorts2 = 0;
+    p_dev->resource[0]->end = 16;
+    p_dev->resource[0]->flags |= IO_DATA_PATH_WIDTH_8;
 
     /* General socket configuration */
     p_dev->conf.Attributes = CONF_ENABLE_IRQ;
@@ -119,13 +118,9 @@ static int avmcs_configcheck(struct pcmcia_device *p_dev,
 	if (cf->io.nwin <= 0)
 		return -ENODEV;
 
-	p_dev->io.BasePort1 = cf->io.win[0].base;
-	p_dev->io.NumPorts1 = cf->io.win[0].len;
-	p_dev->io.NumPorts2 = 0;
-	printk(KERN_INFO "avm_cs: testing i/o %#x-%#x\n",
-	       p_dev->io.BasePort1,
-	       p_dev->io.BasePort1+p_dev->io.NumPorts1-1);
-	return pcmcia_request_io(p_dev, &p_dev->io);
+	p_dev->resource[0]->start = cf->io.win[0].base;
+	p_dev->resource[0]->end = cf->io.win[0].len;
+	return pcmcia_request_io(p_dev);
 }
 
 static int avmcs_config(struct pcmcia_device *link)
diff --git a/drivers/isdn/hisax/avma1_cs.c b/drivers/isdn/hisax/avma1_cs.c
index 88899638f83..94263c22b87 100644
--- a/drivers/isdn/hisax/avma1_cs.c
+++ b/drivers/isdn/hisax/avma1_cs.c
@@ -78,11 +78,10 @@ static int __devinit avma1cs_probe(struct pcmcia_device *p_dev)
     dev_dbg(&p_dev->dev, "avma1cs_attach()\n");
 
     /* The io structure describes IO port mapping */
-    p_dev->io.NumPorts1 = 16;
-    p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_8;
-    p_dev->io.NumPorts2 = 16;
-    p_dev->io.Attributes2 = IO_DATA_PATH_WIDTH_16;
-    p_dev->io.IOAddrLines = 5;
+    p_dev->resource[0]->end = 16;
+    p_dev->resource[0]->flags |= IO_DATA_PATH_WIDTH_8;
+    p_dev->resource[1]->end = 16;
+    p_dev->resource[1]->flags |= IO_DATA_PATH_WIDTH_16;
 
     /* General socket configuration */
     p_dev->conf.Attributes = CONF_ENABLE_IRQ;
@@ -126,13 +125,10 @@ static int avma1cs_configcheck(struct pcmcia_device *p_dev,
 	if (cf->io.nwin <= 0)
 		return -ENODEV;
 
-	p_dev->io.BasePort1 = cf->io.win[0].base;
-	p_dev->io.NumPorts1 = cf->io.win[0].len;
-	p_dev->io.NumPorts2 = 0;
-	printk(KERN_INFO "avma1_cs: testing i/o %#x-%#x\n",
-	       p_dev->io.BasePort1,
-	       p_dev->io.BasePort1+p_dev->io.NumPorts1-1);
-	return pcmcia_request_io(p_dev, &p_dev->io);
+	p_dev->resource[0]->start = cf->io.win[0].base;
+	p_dev->resource[0]->end = cf->io.win[0].len;
+	p_dev->io_lines = 5;
+	return pcmcia_request_io(p_dev);
 }
 
 
diff --git a/drivers/isdn/hisax/elsa_cs.c b/drivers/isdn/hisax/elsa_cs.c
index c10bfd3f458..b3c08aaf41c 100644
--- a/drivers/isdn/hisax/elsa_cs.c
+++ b/drivers/isdn/hisax/elsa_cs.c
@@ -126,9 +126,8 @@ static int __devinit elsa_cs_probe(struct pcmcia_device *link)
       and attributes of IO windows) are fixed by the nature of the
       device, and can be hard-wired here.
     */
-    link->io.NumPorts1 = 8;
-    link->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO;
-    link->io.IOAddrLines = 3;
+    link->resource[0]->end = 8;
+    link->resource[0]->flags |= IO_DATA_PATH_WIDTH_AUTO;
 
     link->conf.Attributes = CONF_ENABLE_IRQ;
     link->conf.IntType = INT_MEMORY_AND_IO;
@@ -173,16 +172,18 @@ static int elsa_cs_configcheck(struct pcmcia_device *p_dev,
 {
 	int j;
 
+	p_dev->io_lines = 3;
+
 	if ((cf->io.nwin > 0) && cf->io.win[0].base) {
 		printk(KERN_INFO "(elsa_cs: looks like the 96 model)\n");
-		p_dev->io.BasePort1 = cf->io.win[0].base;
-		if (!pcmcia_request_io(p_dev, &p_dev->io))
+		p_dev->resource[0]->start = cf->io.win[0].base;
+		if (!pcmcia_request_io(p_dev))
 			return 0;
 	} else {
 		printk(KERN_INFO "(elsa_cs: looks like the 97 model)\n");
 		for (j = 0x2f0; j > 0x100; j -= 0x10) {
-			p_dev->io.BasePort1 = j;
-			if (!pcmcia_request_io(p_dev, &p_dev->io))
+			p_dev->resource[0]->start = j;
+			if (!pcmcia_request_io(p_dev))
 				return 0;
 		}
 	}
diff --git a/drivers/isdn/hisax/sedlbauer_cs.c b/drivers/isdn/hisax/sedlbauer_cs.c
index cecb35ab9d3..4755eb440f7 100644
--- a/drivers/isdn/hisax/sedlbauer_cs.c
+++ b/drivers/isdn/hisax/sedlbauer_cs.c
@@ -129,9 +129,8 @@ static int __devinit sedlbauer_probe(struct pcmcia_device *link)
     /* from old sedl_cs 
     */
     /* The io structure describes IO port mapping */
-    link->io.NumPorts1 = 8;
-    link->io.Attributes1 = IO_DATA_PATH_WIDTH_8;
-    link->io.IOAddrLines = 3;
+    link->resource[0]->end = 8;
+    link->resource[0]->flags |= IO_DATA_PATH_WIDTH_8;
 
     link->conf.Attributes = 0;
     link->conf.IntType = INT_MEMORY_AND_IO;
@@ -201,23 +200,22 @@ static int sedlbauer_config_check(struct pcmcia_device *p_dev,
 	p_dev->conf.Attributes |= CONF_ENABLE_IRQ;
 
 	/* IO window settings */
-	p_dev->io.NumPorts1 = p_dev->io.NumPorts2 = 0;
+	p_dev->resource[0]->end = p_dev->resource[1]->end = 0;
 	if ((cfg->io.nwin > 0) || (dflt->io.nwin > 0)) {
 		cistpl_io_t *io = (cfg->io.nwin) ? &cfg->io : &dflt->io;
-		p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO;
-		if (!(io->flags & CISTPL_IO_8BIT))
-			p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_16;
-		if (!(io->flags & CISTPL_IO_16BIT))
-			p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_8;
-		p_dev->io.BasePort1 = io->win[0].base;
-		p_dev->io.NumPorts1 = io->win[0].len;
+		p_dev->resource[0]->start = io->win[0].base;
+		p_dev->resource[0]->end = io->win[0].len;
+		p_dev->resource[0]->flags &= ~IO_DATA_PATH_WIDTH;
+		p_dev->resource[0]->flags |=
+					pcmcia_io_cfg_data_width(io->flags);
 		if (io->nwin > 1) {
-			p_dev->io.Attributes2 = p_dev->io.Attributes1;
-			p_dev->io.BasePort2 = io->win[1].base;
-			p_dev->io.NumPorts2 = io->win[1].len;
+			p_dev->resource[1]->flags = p_dev->resource[0]->flags;
+			p_dev->resource[1]->start = io->win[1].base;
+			p_dev->resource[1]->end = io->win[1].len;
 		}
 		/* This reserves IO space but doesn't actually enable it */
-		if (pcmcia_request_io(p_dev, &p_dev->io) != 0)
+		p_dev->io_lines = 3;
+		if (pcmcia_request_io(p_dev) != 0)
 			return -ENODEV;
 	}
 
diff --git a/drivers/isdn/hisax/teles_cs.c b/drivers/isdn/hisax/teles_cs.c
index 3787fc70cf8..7296102ca25 100644
--- a/drivers/isdn/hisax/teles_cs.c
+++ b/drivers/isdn/hisax/teles_cs.c
@@ -106,9 +106,8 @@ static int __devinit teles_probe(struct pcmcia_device *link)
       and attributes of IO windows) are fixed by the nature of the
       device, and can be hard-wired here.
     */
-    link->io.NumPorts1 = 96;
-    link->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO;
-    link->io.IOAddrLines = 5;
+    link->resource[0]->end = 96;
+    link->resource[0]->flags |= IO_DATA_PATH_WIDTH_AUTO;
 
     link->conf.Attributes = CONF_ENABLE_IRQ;
     link->conf.IntType = INT_MEMORY_AND_IO;
@@ -153,16 +152,18 @@ static int teles_cs_configcheck(struct pcmcia_device *p_dev,
 {
 	int j;
 
+	p_dev->io_lines = 5;
+
 	if ((cf->io.nwin > 0) && cf->io.win[0].base) {
 		printk(KERN_INFO "(teles_cs: looks like the 96 model)\n");
-		p_dev->io.BasePort1 = cf->io.win[0].base;
-		if (!pcmcia_request_io(p_dev, &p_dev->io))
+		p_dev->resource[0]->start = cf->io.win[0].base;
+		if (!pcmcia_request_io(p_dev))
 			return 0;
 	} else {
 		printk(KERN_INFO "(teles_cs: looks like the 97 model)\n");
 		for (j = 0x2f0; j > 0x100; j -= 0x10) {
-			p_dev->io.BasePort1 = j;
-			if (!pcmcia_request_io(p_dev, &p_dev->io))
+			p_dev->resource[0]->start = j;
+			if (!pcmcia_request_io(p_dev))
 				return 0;
 		}
 	}
diff --git a/drivers/net/pcmcia/3c574_cs.c b/drivers/net/pcmcia/3c574_cs.c
index b5ea9b8cfd7..c683f77c6f4 100644
--- a/drivers/net/pcmcia/3c574_cs.c
+++ b/drivers/net/pcmcia/3c574_cs.c
@@ -278,8 +278,8 @@ static int tc574_probe(struct pcmcia_device *link)
 	lp->p_dev = link;
 
 	spin_lock_init(&lp->window_lock);
-	link->io.NumPorts1 = 32;
-	link->io.Attributes1 = IO_DATA_PATH_WIDTH_16;
+	link->resource[0]->end = 32;
+	link->resource[0]->flags |= IO_DATA_PATH_WIDTH_16;
 	link->conf.Attributes = CONF_ENABLE_IRQ;
 	link->conf.IntType = INT_MEMORY_AND_IO;
 	link->conf.ConfigIndex = 1;
@@ -337,10 +337,11 @@ static int tc574_config(struct pcmcia_device *link)
 
 	dev_dbg(&link->dev, "3c574_config()\n");
 
-	link->io.IOAddrLines = 16;
+	link->io_lines = 16;
+
 	for (i = j = 0; j < 0x400; j += 0x20) {
-		link->io.BasePort1 = j ^ 0x300;
-		i = pcmcia_request_io(link, &link->io);
+		link->resource[0]->start = j ^ 0x300;
+		i = pcmcia_request_io(link);
 		if (i == 0)
 			break;
 	}
diff --git a/drivers/net/pcmcia/3c589_cs.c b/drivers/net/pcmcia/3c589_cs.c
index 122ef4a9488..61f9cf2100f 100644
--- a/drivers/net/pcmcia/3c589_cs.c
+++ b/drivers/net/pcmcia/3c589_cs.c
@@ -213,8 +213,8 @@ static int tc589_probe(struct pcmcia_device *link)
     lp->p_dev = link;
 
     spin_lock_init(&lp->lock);
-    link->io.NumPorts1 = 16;
-    link->io.Attributes1 = IO_DATA_PATH_WIDTH_16;
+    link->resource[0]->end = 16;
+    link->resource[0]->flags |= IO_DATA_PATH_WIDTH_16;
 
     link->conf.Attributes = CONF_ENABLE_IRQ;
     link->conf.IntType = INT_MEMORY_AND_IO;
@@ -277,12 +277,13 @@ static int tc589_config(struct pcmcia_device *link)
 		   "3Com card??\n");
     multi = (link->card_id == PRODID_3COM_3C562);
 
+    link->io_lines = 16;
+
     /* For the 3c562, the base address must be xx00-xx7f */
-    link->io.IOAddrLines = 16;
     for (i = j = 0; j < 0x400; j += 0x10) {
 	if (multi && (j & 0x80)) continue;
-	link->io.BasePort1 = j ^ 0x300;
-	i = pcmcia_request_io(link, &link->io);
+	link->resource[0]->start = j ^ 0x300;
+	i = pcmcia_request_io(link);
 	if (i == 0)
 		break;
     }
diff --git a/drivers/net/pcmcia/axnet_cs.c b/drivers/net/pcmcia/axnet_cs.c
index c52fdf31cbf..5f05ffb240c 100644
--- a/drivers/net/pcmcia/axnet_cs.c
+++ b/drivers/net/pcmcia/axnet_cs.c
@@ -259,28 +259,30 @@ static int get_prom(struct pcmcia_device *link)
 static int try_io_port(struct pcmcia_device *link)
 {
     int j, ret;
-    if (link->io.NumPorts1 == 32) {
-	link->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO;
+    link->resource[0]->flags &= ~IO_DATA_PATH_WIDTH;
+    link->resource[1]->flags &= ~IO_DATA_PATH_WIDTH;
+    if (link->resource[0]->end == 32) {
+	link->resource[0]->flags |= IO_DATA_PATH_WIDTH_AUTO;
 	/* for master/slave multifunction cards */
-	if (link->io.NumPorts2 > 0)
-	    link->io.Attributes2 = IO_DATA_PATH_WIDTH_8;
+	if (link->resource[1]->end > 0)
+	    link->resource[1]->flags |= IO_DATA_PATH_WIDTH_8;
     } else {
 	/* This should be two 16-port windows */
-	link->io.Attributes1 = IO_DATA_PATH_WIDTH_8;
-	link->io.Attributes2 = IO_DATA_PATH_WIDTH_16;
+	link->resource[0]->flags |= IO_DATA_PATH_WIDTH_8;
+	link->resource[1]->flags |= IO_DATA_PATH_WIDTH_16;
     }
-    if (link->io.BasePort1 == 0) {
-	link->io.IOAddrLines = 16;
+    if (link->resource[0]->start == 0) {
 	for (j = 0; j < 0x400; j += 0x20) {
-	    link->io.BasePort1 = j ^ 0x300;
-	    link->io.BasePort2 = (j ^ 0x300) + 0x10;
-	    ret = pcmcia_request_io(link, &link->io);
+	    link->resource[0]->start = j ^ 0x300;
+	    link->resource[1]->start = (j ^ 0x300) + 0x10;
+	    link->io_lines = 16;
+	    ret = pcmcia_request_io(link);
 	    if (ret == 0)
 		    return ret;
 	}
 	return ret;
     } else {
-	return pcmcia_request_io(link, &link->io);
+	return pcmcia_request_io(link);
     }
 }
 
@@ -301,15 +303,15 @@ static int axnet_configcheck(struct pcmcia_device *p_dev,
 	   network function with window 0, and serial with window 1 */
 	if (io->nwin > 1) {
 		i = (io->win[1].len > io->win[0].len);
-		p_dev->io.BasePort2 = io->win[1-i].base;
-		p_dev->io.NumPorts2 = io->win[1-i].len;
+		p_dev->resource[1]->start = io->win[1-i].base;
+		p_dev->resource[1]->end = io->win[1-i].len;
 	} else {
-		i = p_dev->io.NumPorts2 = 0;
+		i = p_dev->resource[1]->end = 0;
 	}
-	p_dev->io.BasePort1 = io->win[i].base;
-	p_dev->io.NumPorts1 = io->win[i].len;
-	p_dev->io.IOAddrLines = io->flags & CISTPL_IO_LINES_MASK;
-	if (p_dev->io.NumPorts1 + p_dev->io.NumPorts2 >= 32)
+	p_dev->resource[0]->start = io->win[i].base;
+	p_dev->resource[0]->end = io->win[i].len;
+	p_dev->io_lines = io->flags & CISTPL_IO_LINES_MASK;
+	if (p_dev->resource[0]->end + p_dev->resource[1]->end >= 32)
 		return try_io_port(p_dev);
 
 	return -ENODEV;
diff --git a/drivers/net/pcmcia/com20020_cs.c b/drivers/net/pcmcia/com20020_cs.c
index 3b53818e3ee..3c400cfa82a 100644
--- a/drivers/net/pcmcia/com20020_cs.c
+++ b/drivers/net/pcmcia/com20020_cs.c
@@ -158,9 +158,8 @@ static int com20020_probe(struct pcmcia_device *p_dev)
     /* fill in our module parameters as defaults */
     dev->dev_addr[0] = node;
 
-    p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_8;
-    p_dev->io.NumPorts1 = 16;
-    p_dev->io.IOAddrLines = 16;
+    p_dev->resource[0]->flags |= IO_DATA_PATH_WIDTH_8;
+    p_dev->resource[0]->end = 16;
     p_dev->conf.Attributes = CONF_ENABLE_IRQ;
     p_dev->conf.IntType = INT_MEMORY_AND_IO;
 
@@ -245,20 +244,24 @@ static int com20020_config(struct pcmcia_device *link)
 
     dev_dbg(&link->dev, "com20020_config\n");
 
-    dev_dbg(&link->dev, "baseport1 is %Xh\n", link->io.BasePort1);
+    dev_dbg(&link->dev, "baseport1 is %Xh\n",
+	    (unsigned int) link->resource[0]->start);
+
     i = -ENODEV;
-    if (!link->io.BasePort1)
+    link->io_lines = 16;
+
+    if (!link->resource[0]->start)
     {
 	for (ioaddr = 0x100; ioaddr < 0x400; ioaddr += 0x10)
 	{
-	    link->io.BasePort1 = ioaddr;
-	    i = pcmcia_request_io(link, &link->io);
+	    link->resource[0]->start = ioaddr;
+	    i = pcmcia_request_io(link);
 	    if (i == 0)
 		break;
 	}
     }
     else
-	i = pcmcia_request_io(link, &link->io);
+	i = pcmcia_request_io(link);
     
     if (i != 0)
     {
diff --git a/drivers/net/pcmcia/fmvj18x_cs.c b/drivers/net/pcmcia/fmvj18x_cs.c
index bba6369a028..699304480ae 100644
--- a/drivers/net/pcmcia/fmvj18x_cs.c
+++ b/drivers/net/pcmcia/fmvj18x_cs.c
@@ -248,9 +248,8 @@ static int fmvj18x_probe(struct pcmcia_device *link)
     lp->base = NULL;
 
     /* The io structure describes IO port mapping */
-    link->io.NumPorts1 = 32;
-    link->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO;
-    link->io.IOAddrLines = 5;
+    link->resource[0]->end = 32;
+    link->resource[0]->flags |= IO_DATA_PATH_WIDTH_AUTO;
 
     /* General socket configuration */
     link->conf.Attributes = CONF_ENABLE_IRQ;
@@ -288,13 +287,13 @@ static int mfc_try_io_port(struct pcmcia_device *link)
 	{ 0x3f8, 0x2f8, 0x3e8, 0x2e8, 0x0 };
 
     for (i = 0; i < 5; i++) {
-	link->io.BasePort2 = serial_base[i];
-	link->io.Attributes2 = IO_DATA_PATH_WIDTH_8;
-	if (link->io.BasePort2 == 0) {
-	    link->io.NumPorts2 = 0;
+	link->resource[1]->start = serial_base[i];
+	link->resource[1]->flags |= IO_DATA_PATH_WIDTH_8;
+	if (link->resource[1]->start == 0) {
+	    link->resource[1]->end = 0;
 	    printk(KERN_NOTICE "fmvj18x_cs: out of resource for serial\n");
 	}
-	ret = pcmcia_request_io(link, &link->io);
+	ret = pcmcia_request_io(link);
 	if (ret == 0)
 		return ret;
     }
@@ -310,8 +309,8 @@ static int ungermann_try_io_port(struct pcmcia_device *link)
 	0x380,0x3c0 only for ioport.
     */
     for (ioaddr = 0x300; ioaddr < 0x3e0; ioaddr += 0x20) {
-	link->io.BasePort1 = ioaddr;
-	ret = pcmcia_request_io(link, &link->io);
+	link->resource[0]->start = ioaddr;
+	ret = pcmcia_request_io(link);
 	if (ret == 0) {
 	    /* calculate ConfigIndex value */
 	    link->conf.ConfigIndex = 
@@ -345,6 +344,8 @@ static int fmvj18x_config(struct pcmcia_device *link)
 
     dev_dbg(&link->dev, "fmvj18x_config\n");
 
+    link->io_lines = 5;
+
     len = pcmcia_get_tuple(link, CISTPL_FUNCE, &buf);
     kfree(buf);
 
@@ -363,20 +364,20 @@ static int fmvj18x_config(struct pcmcia_device *link)
 		/* MultiFunction Card */
 		link->conf.ConfigBase = 0x800;
 		link->conf.ConfigIndex = 0x47;
-		link->io.NumPorts2 = 8;
+		link->resource[1]->end = 8;
 	    }
 	    break;
 	case MANFID_NEC:
 	    cardtype = NEC; /* MultiFunction Card */
 	    link->conf.ConfigBase = 0x800;
 	    link->conf.ConfigIndex = 0x47;
-	    link->io.NumPorts2 = 8;
+	    link->resource[1]->end = 8;
 	    break;
 	case MANFID_KME:
 	    cardtype = KME; /* MultiFunction Card */
 	    link->conf.ConfigBase = 0x800;
 	    link->conf.ConfigIndex = 0x47;
-	    link->io.NumPorts2 = 8;
+	    link->resource[1]->end = 8;
 	    break;
 	case MANFID_CONTEC:
 	    cardtype = CONTEC;
@@ -417,14 +418,14 @@ static int fmvj18x_config(struct pcmcia_device *link)
 	}
     }
 
-    if (link->io.NumPorts2 != 0) {
+    if (link->resource[1]->end != 0) {
 	ret = mfc_try_io_port(link);
 	if (ret != 0) goto failed;
     } else if (cardtype == UNGERMANN) {
 	ret = ungermann_try_io_port(link);
 	if (ret != 0) goto failed;
     } else { 
-	    ret = pcmcia_request_io(link, &link->io);
+	    ret = pcmcia_request_io(link);
 	    if (ret)
 		    goto failed;
     }
diff --git a/drivers/net/pcmcia/ibmtr_cs.c b/drivers/net/pcmcia/ibmtr_cs.c
index e99abaa92be..3fd859570db 100644
--- a/drivers/net/pcmcia/ibmtr_cs.c
+++ b/drivers/net/pcmcia/ibmtr_cs.c
@@ -151,9 +151,8 @@ static int __devinit ibmtr_attach(struct pcmcia_device *link)
     link->priv = info;
     info->ti = netdev_priv(dev);
 
-    link->io.Attributes1 = IO_DATA_PATH_WIDTH_8;
-    link->io.NumPorts1 = 4;
-    link->io.IOAddrLines = 16;
+    link->resource[0]->flags |= IO_DATA_PATH_WIDTH_8;
+    link->resource[0]->end = 4;
     link->conf.Attributes = CONF_ENABLE_IRQ;
     link->conf.IntType = INT_MEMORY_AND_IO;
     link->conf.Present = PRESENT_OPTION;
@@ -218,16 +217,17 @@ static int __devinit ibmtr_config(struct pcmcia_device *link)
     dev_dbg(&link->dev, "ibmtr_config\n");
 
     link->conf.ConfigIndex = 0x61;
+    link->io_lines = 16;
 
     /* Determine if this is PRIMARY or ALTERNATE. */
 
     /* Try PRIMARY card at 0xA20-0xA23 */
-    link->io.BasePort1 = 0xA20;
-    i = pcmcia_request_io(link, &link->io);
+    link->resource[0]->start = 0xA20;
+    i = pcmcia_request_io(link);
     if (i != 0) {
 	/* Couldn't get 0xA20-0xA23.  Try ALTERNATE at 0xA24-0xA27. */
-	link->io.BasePort1 = 0xA24;
-	ret = pcmcia_request_io(link, &link->io);
+	link->resource[0]->start = 0xA24;
+	ret = pcmcia_request_io(link);
 	if (ret)
 		goto failed;
     }
diff --git a/drivers/net/pcmcia/nmclan_cs.c b/drivers/net/pcmcia/nmclan_cs.c
index 9980cbb81d3..68f2deeb3ad 100644
--- a/drivers/net/pcmcia/nmclan_cs.c
+++ b/drivers/net/pcmcia/nmclan_cs.c
@@ -458,9 +458,8 @@ static int nmclan_probe(struct pcmcia_device *link)
     link->priv = dev;
     
     spin_lock_init(&lp->bank_lock);
-    link->io.NumPorts1 = 32;
-    link->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO;
-    link->io.IOAddrLines = 5;
+    link->resource[0]->end = 32;
+    link->resource[0]->flags |= IO_DATA_PATH_WIDTH_AUTO;
     link->conf.Attributes = CONF_ENABLE_IRQ;
     link->conf.IntType = INT_MEMORY_AND_IO;
     link->conf.ConfigIndex = 1;
@@ -644,7 +643,8 @@ static int nmclan_config(struct pcmcia_device *link)
 
   dev_dbg(&link->dev, "nmclan_config\n");
 
-  ret = pcmcia_request_io(link, &link->io);
+  link->io_lines = 5;
+  ret = pcmcia_request_io(link);
   if (ret)
 	  goto failed;
   ret = pcmcia_request_exclusive_irq(link, mace_interrupt);
diff --git a/drivers/net/pcmcia/pcnet_cs.c b/drivers/net/pcmcia/pcnet_cs.c
index c9cd2377ef9..9c5fc9dfc55 100644
--- a/drivers/net/pcmcia/pcnet_cs.c
+++ b/drivers/net/pcmcia/pcnet_cs.c
@@ -477,29 +477,31 @@ static hw_info_t *get_hwired(struct pcmcia_device *link)
 static int try_io_port(struct pcmcia_device *link)
 {
     int j, ret;
-    if (link->io.NumPorts1 == 32) {
-	link->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO;
-	if (link->io.NumPorts2 > 0) {
+    link->resource[0]->flags &= ~IO_DATA_PATH_WIDTH;
+    link->resource[1]->flags &= ~IO_DATA_PATH_WIDTH;
+    if (link->resource[0]->end == 32) {
+	link->resource[0]->flags |= IO_DATA_PATH_WIDTH_AUTO;
+	if (link->resource[1]->end > 0) {
 	    /* for master/slave multifunction cards */
-	    link->io.Attributes2 = IO_DATA_PATH_WIDTH_8;
+	    link->resource[1]->flags |= IO_DATA_PATH_WIDTH_8;
 	}
     } else {
 	/* This should be two 16-port windows */
-	link->io.Attributes1 = IO_DATA_PATH_WIDTH_8;
-	link->io.Attributes2 = IO_DATA_PATH_WIDTH_16;
+	link->resource[0]->flags |= IO_DATA_PATH_WIDTH_8;
+	link->resource[1]->flags |= IO_DATA_PATH_WIDTH_16;
     }
-    if (link->io.BasePort1 == 0) {
-	link->io.IOAddrLines = 16;
+    if (link->resource[0]->start == 0) {
 	for (j = 0; j < 0x400; j += 0x20) {
-	    link->io.BasePort1 = j ^ 0x300;
-	    link->io.BasePort2 = (j ^ 0x300) + 0x10;
-	    ret = pcmcia_request_io(link, &link->io);
+	    link->resource[0]->start = j ^ 0x300;
+	    link->resource[1]->start = (j ^ 0x300) + 0x10;
+	    link->io_lines = 16;
+	    ret = pcmcia_request_io(link);
 	    if (ret == 0)
 		    return ret;
 	}
 	return ret;
     } else {
-	return pcmcia_request_io(link, &link->io);
+	return pcmcia_request_io(link);
     }
 }
 
@@ -520,18 +522,18 @@ static int pcnet_confcheck(struct pcmcia_device *p_dev,
 	   network function with window 0, and serial with window 1 */
 	if (io->nwin > 1) {
 		i = (io->win[1].len > io->win[0].len);
-		p_dev->io.BasePort2 = io->win[1-i].base;
-		p_dev->io.NumPorts2 = io->win[1-i].len;
+		p_dev->resource[1]->start = io->win[1-i].base;
+		p_dev->resource[1]->end = io->win[1-i].len;
 	} else {
-		i = p_dev->io.NumPorts2 = 0;
+		i = p_dev->resource[1]->end = 0;
 	}
 
 	*has_shmem = ((cfg->mem.nwin == 1) &&
 		      (cfg->mem.win[0].len >= 0x4000));
-	p_dev->io.BasePort1 = io->win[i].base;
-	p_dev->io.NumPorts1 = io->win[i].len;
-	p_dev->io.IOAddrLines = io->flags & CISTPL_IO_LINES_MASK;
-	if (p_dev->io.NumPorts1 + p_dev->io.NumPorts2 >= 32)
+	p_dev->resource[0]->start = io->win[i].base;
+	p_dev->resource[0]->end = io->win[i].len;
+	p_dev->io_lines = io->flags & CISTPL_IO_LINES_MASK;
+	if (p_dev->resource[0]->end + p_dev->resource[1]->end >= 32)
 		return try_io_port(p_dev);
 
 	return 0;
diff --git a/drivers/net/pcmcia/smc91c92_cs.c b/drivers/net/pcmcia/smc91c92_cs.c
index 1b0b3230dd7..a5e47796f6a 100644
--- a/drivers/net/pcmcia/smc91c92_cs.c
+++ b/drivers/net/pcmcia/smc91c92_cs.c
@@ -324,9 +324,8 @@ static int smc91c92_probe(struct pcmcia_device *link)
     link->priv = dev;
 
     spin_lock_init(&smc->lock);
-    link->io.NumPorts1 = 16;
-    link->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO;
-    link->io.IOAddrLines = 4;
+    link->resource[0]->end = 16;
+    link->resource[0]->flags |= IO_DATA_PATH_WIDTH_AUTO;
     link->conf.Attributes = CONF_ENABLE_IRQ;
     link->conf.IntType = INT_MEMORY_AND_IO;
 
@@ -427,12 +426,13 @@ static int mhz_mfc_config_check(struct pcmcia_device *p_dev,
 				void *priv_data)
 {
 	int k;
-	p_dev->io.BasePort2 = cf->io.win[0].base;
+	p_dev->resource[1]->start = cf->io.win[0].base;
 	for (k = 0; k < 0x400; k += 0x10) {
 		if (k & 0x80)
 			continue;
-		p_dev->io.BasePort1 = k ^ 0x300;
-		if (!pcmcia_request_io(p_dev, &p_dev->io))
+		p_dev->resource[0]->start = k ^ 0x300;
+		p_dev->io_lines = 16;
+		if (!pcmcia_request_io(p_dev))
 			return 0;
 	}
 	return -ENODEV;
@@ -448,9 +448,8 @@ static int mhz_mfc_config(struct pcmcia_device *link)
 
     link->conf.Attributes |= CONF_ENABLE_SPKR;
     link->conf.Status = CCSR_AUDIO_ENA;
-    link->io.IOAddrLines = 16;
-    link->io.Attributes2 = IO_DATA_PATH_WIDTH_8;
-    link->io.NumPorts2 = 8;
+    link->resource[1]->flags |= IO_DATA_PATH_WIDTH_8;
+    link->resource[1]->end = 8;
 
     /* The Megahertz combo cards have modem-like CIS entries, so
        we have to explicitly try a bunch of port combinations. */
@@ -601,9 +600,9 @@ static int smc_configcheck(struct pcmcia_device *p_dev,
 			   unsigned int vcc,
 			   void *priv_data)
 {
-	p_dev->io.BasePort1 = cf->io.win[0].base;
-	p_dev->io.IOAddrLines = cf->io.flags & CISTPL_IO_LINES_MASK;
-	return pcmcia_request_io(p_dev, &p_dev->io);
+	p_dev->resource[0]->start = cf->io.win[0].base;
+	p_dev->io_lines = cf->io.flags & CISTPL_IO_LINES_MASK;
+	return pcmcia_request_io(p_dev);
 }
 
 static int smc_config(struct pcmcia_device *link)
@@ -611,7 +610,7 @@ static int smc_config(struct pcmcia_device *link)
     struct net_device *dev = link->priv;
     int i;
 
-    link->io.NumPorts1 = 16;
+    link->resource[0]->end = 16;
     i = pcmcia_loop_config(link, smc_configcheck, NULL);
     if (!i)
 	    dev->base_addr = link->resource[0]->start;
@@ -646,25 +645,25 @@ static int osi_config(struct pcmcia_device *link)
 
     link->conf.Attributes |= CONF_ENABLE_SPKR;
     link->conf.Status = CCSR_AUDIO_ENA;
-    link->io.NumPorts1 = 64;
-    link->io.Attributes2 = IO_DATA_PATH_WIDTH_8;
-    link->io.NumPorts2 = 8;
-    link->io.IOAddrLines = 16;
+    link->resource[0]->end = 64;
+    link->resource[1]->flags |= IO_DATA_PATH_WIDTH_8;
+    link->resource[1]->end = 8;
 
     /* Enable Hard Decode, LAN, Modem */
     link->conf.ConfigIndex = 0x23;
+    link->io_lines = 16;
 
     for (i = j = 0; j < 4; j++) {
-	link->io.BasePort2 = com[j];
-	i = pcmcia_request_io(link, &link->io);
+	link->resource[1]->start = com[j];
+	i = pcmcia_request_io(link);
 	if (i == 0)
 		break;
     }
     if (i != 0) {
 	/* Fallback: turn off hard decode */
 	link->conf.ConfigIndex = 0x03;
-	link->io.NumPorts2 = 0;
-	i = pcmcia_request_io(link, &link->io);
+	link->resource[1]->end = 0;
+	i = pcmcia_request_io(link);
     }
     dev->base_addr = link->resource[0]->start + 0x10;
     return i;
@@ -803,7 +802,7 @@ static int check_sig(struct pcmcia_device *link)
     }
 
     /* Try setting bus width */
-    width = (link->io.Attributes1 == IO_DATA_PATH_WIDTH_AUTO);
+    width = (link->resource[0]->flags == IO_DATA_PATH_WIDTH_AUTO);
     s = inb(ioaddr + CONFIG);
     if (width)
 	s |= CFG_16BIT;
diff --git a/drivers/net/pcmcia/xirc2ps_cs.c b/drivers/net/pcmcia/xirc2ps_cs.c
index 034920b459d..8fb0eb1dc34 100644
--- a/drivers/net/pcmcia/xirc2ps_cs.c
+++ b/drivers/net/pcmcia/xirc2ps_cs.c
@@ -677,9 +677,9 @@ xirc2ps_config_modem(struct pcmcia_device *p_dev,
 
 	if (cf->io.nwin > 0  &&  (cf->io.win[0].base & 0xf) == 8) {
 		for (ioaddr = 0x300; ioaddr < 0x400; ioaddr += 0x10) {
-			p_dev->io.BasePort2 = cf->io.win[0].base;
-			p_dev->io.BasePort1 = ioaddr;
-			if (!pcmcia_request_io(p_dev, &p_dev->io))
+			p_dev->resource[1]->start = cf->io.win[0].base;
+			p_dev->resource[0]->start = ioaddr;
+			if (!pcmcia_request_io(p_dev))
 				return 0;
 		}
 	}
@@ -696,11 +696,11 @@ xirc2ps_config_check(struct pcmcia_device *p_dev,
 	int *pass = priv_data;
 
 	if (cf->io.nwin > 0 && (cf->io.win[0].base & 0xf) == 8) {
-		p_dev->io.BasePort2 = cf->io.win[0].base;
-		p_dev->io.BasePort1 = p_dev->io.BasePort2
+		p_dev->resource[1]->start = cf->io.win[0].base;
+		p_dev->resource[0]->start = p_dev->resource[1]->start
 			+ (*pass ? (cf->index & 0x20 ? -24:8)
 			   : (cf->index & 0x20 ?   8:-24));
-		if (!pcmcia_request_io(p_dev, &p_dev->io))
+		if (!pcmcia_request_io(p_dev))
 			return 0;
 	}
 	return -ENODEV;
@@ -807,8 +807,7 @@ xirc2ps_config(struct pcmcia_device * link)
 	goto failure;
     }
 
-    link->io.IOAddrLines =10;
-    link->io.Attributes1 = IO_DATA_PATH_WIDTH_16;
+    link->resource[0]->flags |= IO_DATA_PATH_WIDTH_16;
     if (local->modem) {
 	int pass;
 
@@ -816,16 +815,16 @@ xirc2ps_config(struct pcmcia_device * link)
 	    link->conf.Attributes |= CONF_ENABLE_SPKR;
 	    link->conf.Status |= CCSR_AUDIO_ENA;
 	}
-	link->io.NumPorts2 = 8;
-	link->io.Attributes2 = IO_DATA_PATH_WIDTH_8;
+	link->resource[1]->end = 8;
+	link->resource[1]->flags |= IO_DATA_PATH_WIDTH_8;
 	if (local->dingo) {
 	    /* Take the Modem IO port from the CIS and scan for a free
 	     * Ethernet port */
-	    link->io.NumPorts1 = 16; /* no Mako stuff anymore */
+	    link->resource[0]->end = 16; /* no Mako stuff anymore */
 	    if (!pcmcia_loop_config(link, xirc2ps_config_modem, NULL))
 		    goto port_found;
 	} else {
-	    link->io.NumPorts1 = 18;
+	    link->resource[0]->end = 18;
 	    /* We do 2 passes here: The first one uses the regular mapping and
 	     * the second tries again, thereby considering that the 32 ports are
 	     * mirrored every 32 bytes. Actually we use a mirrored port for
@@ -840,14 +839,15 @@ xirc2ps_config(struct pcmcia_device * link)
 	}
 	printk(KNOT_XIRC "no ports available\n");
     } else {
-	link->io.NumPorts1 = 16;
+	link->io_lines = 10;
+	link->resource[0]->end = 16;
 	for (ioaddr = 0x300; ioaddr < 0x400; ioaddr += 0x10) {
-	    link->io.BasePort1 = ioaddr;
-	    if (!(err=pcmcia_request_io(link, &link->io)))
+	    link->resource[0]->start = ioaddr;
+	    if (!(err = pcmcia_request_io(link)))
 		goto port_found;
 	}
-	link->io.BasePort1 = 0; /* let CS decide */
-	if ((err=pcmcia_request_io(link, &link->io)))
+	link->resource[0]->start = 0; /* let CS decide */
+	if ((err = pcmcia_request_io(link)))
 	    goto config_error;
     }
   port_found:
diff --git a/drivers/net/wireless/airo_cs.c b/drivers/net/wireless/airo_cs.c
index b7e7f5054e4..d241b4aed71 100644
--- a/drivers/net/wireless/airo_cs.c
+++ b/drivers/net/wireless/airo_cs.c
@@ -175,25 +175,23 @@ static int airo_cs_config_check(struct pcmcia_device *p_dev,
 	p_dev->conf.Attributes |= CONF_ENABLE_IRQ;
 
 	/* IO window settings */
-	p_dev->io.NumPorts1 = p_dev->io.NumPorts2 = 0;
+	p_dev->resource[0]->end = p_dev->resource[1]->end = 0;
 	if ((cfg->io.nwin > 0) || (dflt->io.nwin > 0)) {
 		cistpl_io_t *io = (cfg->io.nwin) ? &cfg->io : &dflt->io;
-		p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO;
-		if (!(io->flags & CISTPL_IO_8BIT))
-			p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_16;
-		if (!(io->flags & CISTPL_IO_16BIT))
-			p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_8;
-		p_dev->io.BasePort1 = io->win[0].base;
-		p_dev->io.NumPorts1 = io->win[0].len;
+		p_dev->resource[0]->flags &= ~IO_DATA_PATH_WIDTH;
+		p_dev->resource[0]->flags |=
+					pcmcia_io_cfg_data_width(io->flags);
+		p_dev->resource[0]->start = io->win[0].base;
+		p_dev->resource[0]->end = io->win[0].len;
 		if (io->nwin > 1) {
-			p_dev->io.Attributes2 = p_dev->io.Attributes1;
-			p_dev->io.BasePort2 = io->win[1].base;
-			p_dev->io.NumPorts2 = io->win[1].len;
+			p_dev->resource[1]->flags = p_dev->resource[0]->flags;
+			p_dev->resource[1]->start = io->win[1].base;
+			p_dev->resource[1]->end = io->win[1].len;
 		}
 	}
 
 	/* This reserves IO space but doesn't actually enable it */
-	if (pcmcia_request_io(p_dev, &p_dev->io) != 0)
+	if (pcmcia_request_io(p_dev) != 0)
 		return -ENODEV;
 
 	/*
diff --git a/drivers/net/wireless/atmel_cs.c b/drivers/net/wireless/atmel_cs.c
index 65b3aed49e5..3b632161c10 100644
--- a/drivers/net/wireless/atmel_cs.c
+++ b/drivers/net/wireless/atmel_cs.c
@@ -190,25 +190,23 @@ static int atmel_config_check(struct pcmcia_device *p_dev,
 	p_dev->conf.Attributes |= CONF_ENABLE_IRQ;
 
 	/* IO window settings */
-	p_dev->io.NumPorts1 = p_dev->io.NumPorts2 = 0;
+	p_dev->resource[0]->end = p_dev->resource[1]->end = 0;
 	if ((cfg->io.nwin > 0) || (dflt->io.nwin > 0)) {
 		cistpl_io_t *io = (cfg->io.nwin) ? &cfg->io : &dflt->io;
-		p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO;
-		if (!(io->flags & CISTPL_IO_8BIT))
-			p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_16;
-		if (!(io->flags & CISTPL_IO_16BIT))
-			p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_8;
-		p_dev->io.BasePort1 = io->win[0].base;
-		p_dev->io.NumPorts1 = io->win[0].len;
+		p_dev->resource[0]->flags &= ~IO_DATA_PATH_WIDTH;
+		p_dev->resource[0]->flags |=
+					pcmcia_io_cfg_data_width(io->flags);
+		p_dev->resource[0]->start = io->win[0].base;
+		p_dev->resource[0]->end = io->win[0].len;
 		if (io->nwin > 1) {
-			p_dev->io.Attributes2 = p_dev->io.Attributes1;
-			p_dev->io.BasePort2 = io->win[1].base;
-			p_dev->io.NumPorts2 = io->win[1].len;
+			p_dev->resource[1]->flags = p_dev->resource[0]->flags;
+			p_dev->resource[1]->start = io->win[1].base;
+			p_dev->resource[1]->end = io->win[1].len;
 		}
 	}
 
 	/* This reserves IO space but doesn't actually enable it */
-	return pcmcia_request_io(p_dev, &p_dev->io);
+	return pcmcia_request_io(p_dev);
 }
 
 static int atmel_config(struct pcmcia_device *link)
diff --git a/drivers/net/wireless/b43/pcmcia.c b/drivers/net/wireless/b43/pcmcia.c
index f71bc782137..7c9af82fcf7 100644
--- a/drivers/net/wireless/b43/pcmcia.c
+++ b/drivers/net/wireless/b43/pcmcia.c
@@ -77,10 +77,6 @@ static int __devinit b43_pcmcia_probe(struct pcmcia_device *dev)
 	dev->conf.Attributes = CONF_ENABLE_IRQ;
 	dev->conf.IntType = INT_MEMORY_AND_IO;
 
-	dev->io.BasePort2 = 0;
-	dev->io.NumPorts2 = 0;
-	dev->io.Attributes2 = 0;
-
 	win.Attributes = WIN_ADDR_SPACE_MEM | WIN_MEMORY_TYPE_CM |
 			 WIN_ENABLE | WIN_DATA_WIDTH_16 |
 			 WIN_USE_WAIT;
diff --git a/drivers/net/wireless/hostap/hostap_cs.c b/drivers/net/wireless/hostap/hostap_cs.c
index 4e13cedb823..ba54d1b04d2 100644
--- a/drivers/net/wireless/hostap/hostap_cs.c
+++ b/drivers/net/wireless/hostap/hostap_cs.c
@@ -519,30 +519,24 @@ static int prism2_config_check(struct pcmcia_device *p_dev,
 	PDEBUG(DEBUG_EXTRA, "IO window settings: cfg->io.nwin=%d "
 	       "dflt->io.nwin=%d\n",
 	       cfg->io.nwin, dflt->io.nwin);
-	p_dev->io.NumPorts1 = p_dev->io.NumPorts2 = 0;
+	p_dev->resource[0]->end = p_dev->resource[1]->end = 0;
 	if ((cfg->io.nwin > 0) || (dflt->io.nwin > 0)) {
 		cistpl_io_t *io = (cfg->io.nwin) ? &cfg->io : &dflt->io;
-		p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO;
-		PDEBUG(DEBUG_EXTRA, "io->flags = 0x%04X, "
-		       "io.base=0x%04x, len=%d\n", io->flags,
-		       io->win[0].base, io->win[0].len);
-		if (!(io->flags & CISTPL_IO_8BIT))
-			p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_16;
-		if (!(io->flags & CISTPL_IO_16BIT))
-			p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_8;
-		p_dev->io.IOAddrLines = io->flags &
-			CISTPL_IO_LINES_MASK;
-		p_dev->io.BasePort1 = io->win[0].base;
-		p_dev->io.NumPorts1 = io->win[0].len;
+		p_dev->resource[0]->flags &= ~IO_DATA_PATH_WIDTH;
+		p_dev->resource[0]->flags |=
+					pcmcia_io_cfg_data_width(io->flags);
+		p_dev->io_lines = io->flags & CISTPL_IO_LINES_MASK;
+		p_dev->resource[0]->start = io->win[0].base;
+		p_dev->resource[0]->end = io->win[0].len;
 		if (io->nwin > 1) {
-			p_dev->io.Attributes2 = p_dev->io.Attributes1;
-			p_dev->io.BasePort2 = io->win[1].base;
-			p_dev->io.NumPorts2 = io->win[1].len;
+			p_dev->resource[1]->flags = p_dev->resource[0]->flags;
+			p_dev->resource[1]->start = io->win[1].base;
+			p_dev->resource[1]->end = io->win[1].len;
 		}
 	}
 
 	/* This reserves IO space but doesn't actually enable it */
-	return pcmcia_request_io(p_dev, &p_dev->io);
+	return pcmcia_request_io(p_dev);
 }
 
 static int prism2_config(struct pcmcia_device *link)
diff --git a/drivers/net/wireless/libertas/if_cs.c b/drivers/net/wireless/libertas/if_cs.c
index be4c47594b5..9c298396be5 100644
--- a/drivers/net/wireless/libertas/if_cs.c
+++ b/drivers/net/wireless/libertas/if_cs.c
@@ -801,9 +801,9 @@ static int if_cs_ioprobe(struct pcmcia_device *p_dev,
 			 unsigned int vcc,
 			 void *priv_data)
 {
-	p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO;
-	p_dev->io.BasePort1 = cfg->io.win[0].base;
-	p_dev->io.NumPorts1 = cfg->io.win[0].len;
+	p_dev->resource[0]->flags |= IO_DATA_PATH_WIDTH_AUTO;
+	p_dev->resource[0]->start = cfg->io.win[0].base;
+	p_dev->resource[0]->end = cfg->io.win[0].len;
 
 	/* Do we need to allocate an interrupt? */
 	p_dev->conf.Attributes |= CONF_ENABLE_IRQ;
@@ -815,7 +815,7 @@ static int if_cs_ioprobe(struct pcmcia_device *p_dev,
 	}
 
 	/* This reserves IO space but doesn't actually enable it */
-	return pcmcia_request_io(p_dev, &p_dev->io);
+	return pcmcia_request_io(p_dev);
 }
 
 static int if_cs_probe(struct pcmcia_device *p_dev)
diff --git a/drivers/net/wireless/orinoco/orinoco_cs.c b/drivers/net/wireless/orinoco/orinoco_cs.c
index 6d514b5462f..ef46a2d8853 100644
--- a/drivers/net/wireless/orinoco/orinoco_cs.c
+++ b/drivers/net/wireless/orinoco/orinoco_cs.c
@@ -191,25 +191,23 @@ static int orinoco_cs_config_check(struct pcmcia_device *p_dev,
 	p_dev->conf.Attributes |= CONF_ENABLE_IRQ;
 
 	/* IO window settings */
-	p_dev->io.NumPorts1 = p_dev->io.NumPorts2 = 0;
+	p_dev->resource[0]->end = p_dev->resource[1]->end = 0;
 	if ((cfg->io.nwin > 0) || (dflt->io.nwin > 0)) {
 		cistpl_io_t *io = (cfg->io.nwin) ? &cfg->io : &dflt->io;
-		p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO;
-		if (!(io->flags & CISTPL_IO_8BIT))
-			p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_16;
-		if (!(io->flags & CISTPL_IO_16BIT))
-			p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_8;
-		p_dev->io.IOAddrLines = io->flags & CISTPL_IO_LINES_MASK;
-		p_dev->io.BasePort1 = io->win[0].base;
-		p_dev->io.NumPorts1 = io->win[0].len;
+		p_dev->io_lines = io->flags & CISTPL_IO_LINES_MASK;
+		p_dev->resource[0]->flags &= ~IO_DATA_PATH_WIDTH;
+		p_dev->resource[0]->flags |=
+			pcmcia_io_cfg_data_width(io->flags);
+		p_dev->resource[0]->start = io->win[0].base;
+		p_dev->resource[0]->end = io->win[0].len;
 		if (io->nwin > 1) {
-			p_dev->io.Attributes2 = p_dev->io.Attributes1;
-			p_dev->io.BasePort2 = io->win[1].base;
-			p_dev->io.NumPorts2 = io->win[1].len;
+			p_dev->resource[1]->flags = p_dev->resource[0]->flags;
+			p_dev->resource[1]->start = io->win[1].base;
+			p_dev->resource[1]->end = io->win[1].len;
 		}
 
 		/* This reserves IO space but doesn't actually enable it */
-		if (pcmcia_request_io(p_dev, &p_dev->io) != 0)
+		if (pcmcia_request_io(p_dev) != 0)
 			goto next_entry;
 	}
 	return 0;
diff --git a/drivers/net/wireless/orinoco/spectrum_cs.c b/drivers/net/wireless/orinoco/spectrum_cs.c
index 4f8f55eab95..873877e17e1 100644
--- a/drivers/net/wireless/orinoco/spectrum_cs.c
+++ b/drivers/net/wireless/orinoco/spectrum_cs.c
@@ -253,25 +253,23 @@ static int spectrum_cs_config_check(struct pcmcia_device *p_dev,
 	p_dev->conf.Attributes |= CONF_ENABLE_IRQ;
 
 	/* IO window settings */
-	p_dev->io.NumPorts1 = p_dev->io.NumPorts2 = 0;
+	p_dev->resource[0]->end = p_dev->resource[1]->end = 0;
 	if ((cfg->io.nwin > 0) || (dflt->io.nwin > 0)) {
 		cistpl_io_t *io = (cfg->io.nwin) ? &cfg->io : &dflt->io;
-		p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO;
-		if (!(io->flags & CISTPL_IO_8BIT))
-			p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_16;
-		if (!(io->flags & CISTPL_IO_16BIT))
-			p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_8;
-		p_dev->io.IOAddrLines = io->flags & CISTPL_IO_LINES_MASK;
-		p_dev->io.BasePort1 = io->win[0].base;
-		p_dev->io.NumPorts1 = io->win[0].len;
+		p_dev->io_lines = io->flags & CISTPL_IO_LINES_MASK;
+		p_dev->resource[0]->flags &= ~IO_DATA_PATH_WIDTH;
+		p_dev->resource[0]->flags |=
+			pcmcia_io_cfg_data_width(io->flags);
+		p_dev->resource[0]->start = io->win[0].base;
+		p_dev->resource[0]->end = io->win[0].len;
 		if (io->nwin > 1) {
-			p_dev->io.Attributes2 = p_dev->io.Attributes1;
-			p_dev->io.BasePort2 = io->win[1].base;
-			p_dev->io.NumPorts2 = io->win[1].len;
+			p_dev->resource[1]->flags = p_dev->resource[0]->flags;
+			p_dev->resource[1]->start = io->win[1].base;
+			p_dev->resource[1]->end = io->win[1].len;
 		}
 
 		/* This reserves IO space but doesn't actually enable it */
-		if (pcmcia_request_io(p_dev, &p_dev->io) != 0)
+		if (pcmcia_request_io(p_dev) != 0)
 			goto next_entry;
 	}
 	return 0;
diff --git a/drivers/net/wireless/ray_cs.c b/drivers/net/wireless/ray_cs.c
index 165beb6af84..b83d5ef1dff 100644
--- a/drivers/net/wireless/ray_cs.c
+++ b/drivers/net/wireless/ray_cs.c
@@ -315,9 +315,8 @@ static int ray_probe(struct pcmcia_device *p_dev)
 	local->finder = p_dev;
 
 	/* The io structure describes IO port mapping. None used here */
-	p_dev->io.NumPorts1 = 0;
-	p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_8;
-	p_dev->io.IOAddrLines = 5;
+	p_dev->resource[0]->end = 0;
+	p_dev->resource[0]->flags |= IO_DATA_PATH_WIDTH_8;
 
 	/* General socket configuration */
 	p_dev->conf.Attributes = CONF_ENABLE_IRQ;
diff --git a/drivers/net/wireless/wl3501_cs.c b/drivers/net/wireless/wl3501_cs.c
index a32f220648c..a1cc2d498a1 100644
--- a/drivers/net/wireless/wl3501_cs.c
+++ b/drivers/net/wireless/wl3501_cs.c
@@ -1884,9 +1884,8 @@ static int wl3501_probe(struct pcmcia_device *p_dev)
 	struct wl3501_card *this;
 
 	/* The io structure describes IO port mapping */
-	p_dev->io.NumPorts1	= 16;
-	p_dev->io.Attributes1	= IO_DATA_PATH_WIDTH_8;
-	p_dev->io.IOAddrLines	= 5;
+	p_dev->resource[0]->end	= 16;
+	p_dev->resource[0]->flags	= IO_DATA_PATH_WIDTH_8;
 
 	/* General socket configuration */
 	p_dev->conf.Attributes	= CONF_ENABLE_IRQ;
@@ -1932,13 +1931,14 @@ static int wl3501_config(struct pcmcia_device *link)
 	/* Try allocating IO ports.  This tries a few fixed addresses.  If you
 	 * want, you can also read the card's config table to pick addresses --
 	 * see the serial driver for an example. */
+	link->io_lines = 5;
 
 	for (j = 0x280; j < 0x400; j += 0x20) {
 		/* The '^0x300' is so that we probe 0x300-0x3ff first, then
 		 * 0x200-0x2ff, and so on, because this seems safer */
-		link->io.BasePort1 = j;
-		link->io.BasePort2 = link->io.BasePort1 + 0x10;
-		i = pcmcia_request_io(link, &link->io);
+		link->resource[0]->start = j;
+		link->resource[1]->start = link->resource[0]->start + 0x10;
+		i = pcmcia_request_io(link);
 		if (i == 0)
 			break;
 	}
diff --git a/drivers/parport/parport_cs.c b/drivers/parport/parport_cs.c
index fc1639c5ada..23e50f4a27c 100644
--- a/drivers/parport/parport_cs.c
+++ b/drivers/parport/parport_cs.c
@@ -101,8 +101,8 @@ static int parport_probe(struct pcmcia_device *link)
     link->priv = info;
     info->p_dev = link;
 
-    link->io.Attributes1 = IO_DATA_PATH_WIDTH_8;
-    link->io.Attributes2 = IO_DATA_PATH_WIDTH_8;
+    link->resource[0]->flags |= IO_DATA_PATH_WIDTH_8;
+    link->resource[1]->flags |= IO_DATA_PATH_WIDTH_8;
     link->conf.Attributes = CONF_ENABLE_IRQ;
     link->conf.IntType = INT_MEMORY_AND_IO;
 
@@ -143,16 +143,16 @@ static int parport_config_check(struct pcmcia_device *p_dev,
 {
 	if ((cfg->io.nwin > 0) || (dflt->io.nwin > 0)) {
 		cistpl_io_t *io = (cfg->io.nwin) ? &cfg->io : &dflt->io;
+		p_dev->io_lines = io->flags & CISTPL_IO_LINES_MASK;
 		if (epp_mode)
 			p_dev->conf.ConfigIndex |= FORCE_EPP_MODE;
-		p_dev->io.BasePort1 = io->win[0].base;
-		p_dev->io.NumPorts1 = io->win[0].len;
-		p_dev->io.IOAddrLines = io->flags & CISTPL_IO_LINES_MASK;
+		p_dev->resource[0]->start = io->win[0].base;
+		p_dev->resource[0]->end = io->win[0].len;
 		if (io->nwin == 2) {
-			p_dev->io.BasePort2 = io->win[1].base;
-			p_dev->io.NumPorts2 = io->win[1].len;
+			p_dev->resource[1]->start = io->win[1].base;
+			p_dev->resource[1]->end = io->win[1].len;
 		}
-		if (pcmcia_request_io(p_dev, &p_dev->io) != 0)
+		if (pcmcia_request_io(p_dev) != 0)
 			return -ENODEV;
 		return 0;
 	}
diff --git a/drivers/pcmcia/pcmcia_resource.c b/drivers/pcmcia/pcmcia_resource.c
index fcd48dae79b..a48d4a91d44 100644
--- a/drivers/pcmcia/pcmcia_resource.c
+++ b/drivers/pcmcia/pcmcia_resource.c
@@ -70,7 +70,8 @@ static int alloc_io_space(struct pcmcia_socket *s, struct resource *res,
 
 	res->flags |= IORESOURCE_IO;
 
-	dev_dbg(&s->dev, "alloc_io_space request for %pR\n", res);
+	dev_dbg(&s->dev, "alloc_io_space request for %pR, %d lines\n",
+		res, lines);
 
 	align = base ? (lines ? 1<<lines : 0) : 1;
 	if (align && (align < num)) {
@@ -541,38 +542,25 @@ EXPORT_SYMBOL(pcmcia_request_configuration);
  * pcmcia_request_io() - attempt to reserve port ranges for PCMCIA devices
  *
  * pcmcia_request_io() attepts to reserve the IO port ranges specified in
- * struct pcmcia_device *p_dev->resource[0] and *p_dev->resource[1]. The
+ * &struct pcmcia_device @p_dev->resource[0] and @p_dev->resource[1]. The
  * "start" value is the requested start of the IO port resource; "end"
- * relfects the number of ports requested.
- *
- * If io_req_t is passed, those values are converted automatically.
+ * reflects the number of ports requested. The number of IO lines requested
+ * is specified in &struct pcmcia_device @p_dev->io_lines.
  */
-int pcmcia_request_io(struct pcmcia_device *p_dev, io_req_t *req)
+int pcmcia_request_io(struct pcmcia_device *p_dev)
 {
 	struct pcmcia_socket *s = p_dev->socket;
-	config_t *c;
+	config_t *c = p_dev->function_config;
 	int ret = -EINVAL;
-	unsigned int lines = req->IOAddrLines;
 
 	mutex_lock(&s->ops_mutex);
+	dev_dbg(&s->dev, "pcmcia_request_io: %pR , %pR", &c->io[0], &c->io[1]);
 
 	if (!(s->state & SOCKET_PRESENT)) {
 		dev_dbg(&s->dev, "pcmcia_request_io: No card present\n");
 		goto out;
 	}
 
-	c = p_dev->function_config;
-	if (req) {
-		c->io[0].start = req->BasePort1;
-		c->io[0].end = req->NumPorts1;
-		c->io[0].flags |= req->Attributes1;
-		c->io[1].start = req->BasePort2;
-		c->io[1].end = req->NumPorts2;
-		c->io[1].flags |= req->Attributes2;
-	}
-
-	dev_dbg(&s->dev, "pcmcia_request_io: %pR , %pR", &c->io[0], &c->io[1]);
-
 	if (c->state & CONFIG_LOCKED) {
 		dev_dbg(&s->dev, "Configuration is locked\n");
 		goto out;
@@ -582,12 +570,12 @@ int pcmcia_request_io(struct pcmcia_device *p_dev, io_req_t *req)
 		goto out;
 	}
 
-	ret = alloc_io_space(s, &c->io[0], lines);
+	ret = alloc_io_space(s, &c->io[0], p_dev->io_lines);
 	if (ret)
 		goto out;
 
 	if (c->io[1].end) {
-		ret = alloc_io_space(s, &c->io[1], lines);
+		ret = alloc_io_space(s, &c->io[1], p_dev->io_lines);
 		if (ret) {
 			release_io_space(s, &c->io[0]);
 			goto out;
@@ -598,11 +586,6 @@ int pcmcia_request_io(struct pcmcia_device *p_dev, io_req_t *req)
 	c->state |= CONFIG_IO_REQ;
 	p_dev->_io = 1;
 
-	if (!ret) {
-		req->BasePort1 = c->io[0].start;
-		req->BasePort2 = c->io[1].start;
-	}
-
 	dev_dbg(&s->dev, "pcmcia_request_io succeeded: %pR , %pR",
 		&c->io[0], &c->io[1]);
 out:
diff --git a/drivers/scsi/pcmcia/aha152x_stub.c b/drivers/scsi/pcmcia/aha152x_stub.c
index 3e040f503af..61f49bdcc0c 100644
--- a/drivers/scsi/pcmcia/aha152x_stub.c
+++ b/drivers/scsi/pcmcia/aha152x_stub.c
@@ -100,9 +100,8 @@ static int aha152x_probe(struct pcmcia_device *link)
     info->p_dev = link;
     link->priv = info;
 
-    link->io.NumPorts1 = 0x20;
-    link->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO;
-    link->io.IOAddrLines = 10;
+    link->resource[0]->end = 0x20;
+    link->resource[0]->flags |= IO_DATA_PATH_WIDTH_AUTO;
     link->conf.Attributes = CONF_ENABLE_IRQ;
     link->conf.IntType = INT_MEMORY_AND_IO;
     link->conf.Present = PRESENT_OPTION;
@@ -130,15 +129,16 @@ static int aha152x_config_check(struct pcmcia_device *p_dev,
 				unsigned int vcc,
 				void *priv_data)
 {
+	p_dev->io_lines = 10;
 	/* For New Media T&J, look for a SCSI window */
 	if (cfg->io.win[0].len >= 0x20)
-		p_dev->io.BasePort1 = cfg->io.win[0].base;
+		p_dev->resource[0]->start = cfg->io.win[0].base;
 	else if ((cfg->io.nwin > 1) &&
 		 (cfg->io.win[1].len >= 0x20))
-		p_dev->io.BasePort1 = cfg->io.win[1].base;
+		p_dev->resource[0]->start = cfg->io.win[1].base;
 	if ((cfg->io.nwin > 0) &&
-	    (p_dev->io.BasePort1 < 0xffff)) {
-		if (!pcmcia_request_io(p_dev, &p_dev->io))
+	    (p_dev->resource[0]->start < 0xffff)) {
+		if (!pcmcia_request_io(p_dev))
 			return 0;
 	}
 	return -EINVAL;
diff --git a/drivers/scsi/pcmcia/fdomain_stub.c b/drivers/scsi/pcmcia/fdomain_stub.c
index 49a9a0a60c8..13dbe5c4849 100644
--- a/drivers/scsi/pcmcia/fdomain_stub.c
+++ b/drivers/scsi/pcmcia/fdomain_stub.c
@@ -83,9 +83,8 @@ static int fdomain_probe(struct pcmcia_device *link)
 
 	info->p_dev = link;
 	link->priv = info;
-	link->io.NumPorts1 = 0x10;
-	link->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO;
-	link->io.IOAddrLines = 10;
+	link->resource[0]->end = 0x10;
+	link->resource[0]->flags |= IO_DATA_PATH_WIDTH_AUTO;
 	link->conf.Attributes = CONF_ENABLE_IRQ;
 	link->conf.IntType = INT_MEMORY_AND_IO;
 	link->conf.Present = PRESENT_OPTION;
@@ -112,8 +111,9 @@ static int fdomain_config_check(struct pcmcia_device *p_dev,
 				unsigned int vcc,
 				void *priv_data)
 {
-	p_dev->io.BasePort1 = cfg->io.win[0].base;
-	return pcmcia_request_io(p_dev, &p_dev->io);
+	p_dev->io_lines = 10;
+	p_dev->resource[0]->start = cfg->io.win[0].base;
+	return pcmcia_request_io(p_dev);
 }
 
 
diff --git a/drivers/scsi/pcmcia/nsp_cs.c b/drivers/scsi/pcmcia/nsp_cs.c
index d929891809a..8bb598bb440 100644
--- a/drivers/scsi/pcmcia/nsp_cs.c
+++ b/drivers/scsi/pcmcia/nsp_cs.c
@@ -1558,9 +1558,8 @@ static int nsp_cs_probe(struct pcmcia_device *link)
 	nsp_dbg(NSP_DEBUG_INIT, "info=0x%p", info);
 
 	/* The io structure describes IO port mapping */
-	link->io.NumPorts1	 = 0x10;
-	link->io.Attributes1	 = IO_DATA_PATH_WIDTH_AUTO;
-	link->io.IOAddrLines	 = 10;	/* not used */
+	link->resource[0]->end	 = 0x10;
+	link->resource[0]->flags = IO_DATA_PATH_WIDTH_AUTO;
 
 	/* General socket configuration */
 	link->conf.Attributes	 = CONF_ENABLE_IRQ;
@@ -1641,24 +1640,23 @@ static int nsp_cs_config_check(struct pcmcia_device *p_dev,
 		p_dev->conf.Attributes |= CONF_ENABLE_IRQ;
 
 		/* IO window settings */
-		p_dev->io.NumPorts1 = p_dev->io.NumPorts2 = 0;
+		p_dev->resource[0]->end = p_dev->resource[1]->end = 0;
 		if ((cfg->io.nwin > 0) || (dflt->io.nwin > 0)) {
 			cistpl_io_t *io = (cfg->io.nwin) ? &cfg->io : &dflt->io;
-			p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO;
-			if (!(io->flags & CISTPL_IO_8BIT))
-				p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_16;
-			if (!(io->flags & CISTPL_IO_16BIT))
-				p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_8;
-			p_dev->io.IOAddrLines = io->flags & CISTPL_IO_LINES_MASK;
-			p_dev->io.BasePort1 = io->win[0].base;
-			p_dev->io.NumPorts1 = io->win[0].len;
+			p_dev->io_lines = io->flags & CISTPL_IO_LINES_MASK;
+			p_dev->resource[0]->flags &= ~IO_DATA_PATH_WIDTH;
+			p_dev->resource[0]->flags |=
+				pcmcia_io_cfg_data_width(io->flags);
+			p_dev->resource[0]->start = io->win[0].base;
+			p_dev->resource[0]->end = io->win[0].len;
 			if (io->nwin > 1) {
-				p_dev->io.Attributes2 = p_dev->io.Attributes1;
-				p_dev->io.BasePort2 = io->win[1].base;
-				p_dev->io.NumPorts2 = io->win[1].len;
+				p_dev->resource[1]->flags =
+					p_dev->resource[0]->flags;
+				p_dev->resource[1]->start = io->win[1].base;
+				p_dev->resource[1]->end = io->win[1].len;
 			}
 			/* This reserves IO space but doesn't actually enable it */
-			if (pcmcia_request_io(p_dev, &p_dev->io) != 0)
+			if (pcmcia_request_io(p_dev) != 0)
 				goto next_entry;
 		}
 
diff --git a/drivers/scsi/pcmcia/qlogic_stub.c b/drivers/scsi/pcmcia/qlogic_stub.c
index 4e2b83f2623..eb775f1a523 100644
--- a/drivers/scsi/pcmcia/qlogic_stub.c
+++ b/drivers/scsi/pcmcia/qlogic_stub.c
@@ -156,9 +156,8 @@ static int qlogic_probe(struct pcmcia_device *link)
 		return -ENOMEM;
 	info->p_dev = link;
 	link->priv = info;
-	link->io.NumPorts1 = 16;
-	link->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO;
-	link->io.IOAddrLines = 10;
+	link->resource[0]->end = 16;
+	link->resource[0]->flags |= IO_DATA_PATH_WIDTH_AUTO;
 	link->conf.Attributes = CONF_ENABLE_IRQ;
 	link->conf.IntType = INT_MEMORY_AND_IO;
 	link->conf.Present = PRESENT_OPTION;
@@ -185,13 +184,14 @@ static int qlogic_config_check(struct pcmcia_device *p_dev,
 			       unsigned int vcc,
 			       void *priv_data)
 {
-	p_dev->io.BasePort1 = cfg->io.win[0].base;
-	p_dev->io.NumPorts1 = cfg->io.win[0].len;
+	p_dev->io_lines = 10;
+	p_dev->resource[0]->start = cfg->io.win[0].base;
+	p_dev->resource[0]->end = cfg->io.win[0].len;
 
-	if (p_dev->io.BasePort1 == 0)
+	if (p_dev->resource[0]->start == 0)
 		return -ENODEV;
 
-	return pcmcia_request_io(p_dev, &p_dev->io);
+	return pcmcia_request_io(p_dev);
 }
 
 static int qlogic_config(struct pcmcia_device * link)
diff --git a/drivers/scsi/pcmcia/sym53c500_cs.c b/drivers/scsi/pcmcia/sym53c500_cs.c
index d99c0cbad2d..321e390c912 100644
--- a/drivers/scsi/pcmcia/sym53c500_cs.c
+++ b/drivers/scsi/pcmcia/sym53c500_cs.c
@@ -690,13 +690,14 @@ static int SYM53C500_config_check(struct pcmcia_device *p_dev,
 				  unsigned int vcc,
 				  void *priv_data)
 {
-	p_dev->io.BasePort1 = cfg->io.win[0].base;
-	p_dev->io.NumPorts1 = cfg->io.win[0].len;
+	p_dev->io_lines = 10;
+	p_dev->resource[0]->start = cfg->io.win[0].base;
+	p_dev->resource[0]->end = cfg->io.win[0].len;
 
-	if (p_dev->io.BasePort1 == 0)
+	if (p_dev->resource[0]->start == 0)
 		return -ENODEV;
 
-	return pcmcia_request_io(p_dev, &p_dev->io);
+	return pcmcia_request_io(p_dev);
 }
 
 static int
@@ -858,9 +859,8 @@ SYM53C500_probe(struct pcmcia_device *link)
 		return -ENOMEM;
 	info->p_dev = link;
 	link->priv = info;
-	link->io.NumPorts1 = 16;
-	link->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO;
-	link->io.IOAddrLines = 10;
+	link->resource[0]->end = 16;
+	link->resource[0]->flags |= IO_DATA_PATH_WIDTH_AUTO;
 	link->conf.Attributes = CONF_ENABLE_IRQ;
 	link->conf.IntType = INT_MEMORY_AND_IO;
 
diff --git a/drivers/serial/serial_cs.c b/drivers/serial/serial_cs.c
index fe7adcdfde9..141c69554bd 100644
--- a/drivers/serial/serial_cs.c
+++ b/drivers/serial/serial_cs.c
@@ -335,8 +335,8 @@ static int serial_probe(struct pcmcia_device *link)
 	info->p_dev = link;
 	link->priv = info;
 
-	link->io.Attributes1 = IO_DATA_PATH_WIDTH_8;
-	link->io.NumPorts1 = 8;
+	link->resource[0]->flags |= IO_DATA_PATH_WIDTH_8;
+	link->resource[0]->end = 8;
 	link->conf.Attributes = CONF_ENABLE_IRQ;
 	if (do_sound) {
 		link->conf.Attributes |= CONF_ENABLE_SPKR;
@@ -424,12 +424,13 @@ static int simple_config_check(struct pcmcia_device *p_dev,
 		p_dev->conf.Vpp =
 			cf->vpp1.param[CISTPL_POWER_VNOM] / 10000;
 
+	p_dev->io_lines = ((*try & 0x1) == 0) ?
+			16 : cf->io.flags & CISTPL_IO_LINES_MASK;
+
 	if ((cf->io.nwin > 0) && (cf->io.win[0].len == size_table[(*try >> 1)])
 	    && (cf->io.win[0].base != 0)) {
-		p_dev->io.BasePort1 = cf->io.win[0].base;
-		p_dev->io.IOAddrLines = ((*try & 0x1) == 0) ?
-			16 : cf->io.flags & CISTPL_IO_LINES_MASK;
-		if (!pcmcia_request_io(p_dev, &p_dev->io))
+		p_dev->resource[0]->start = cf->io.win[0].base;
+		if (!pcmcia_request_io(p_dev))
 			return 0;
 	}
 	return -EINVAL;
@@ -446,9 +447,9 @@ static int simple_config_check_notpicky(struct pcmcia_device *p_dev,
 
 	if ((cf->io.nwin > 0) && ((cf->io.flags & CISTPL_IO_LINES_MASK) <= 3)) {
 		for (j = 0; j < 5; j++) {
-			p_dev->io.BasePort1 = base[j];
-			p_dev->io.IOAddrLines = base[j] ? 16 : 3;
-			if (!pcmcia_request_io(p_dev, &p_dev->io))
+			p_dev->resource[0]->start = base[j];
+			p_dev->io_lines = base[j] ? 16 : 3;
+			if (!pcmcia_request_io(p_dev))
 				return 0;
 		}
 	}
@@ -521,9 +522,9 @@ static int multi_config_check(struct pcmcia_device *p_dev,
 	/* The quad port cards have bad CIS's, so just look for a
 	   window larger than 8 ports and assume it will be right */
 	if ((cf->io.nwin == 1) && (cf->io.win[0].len > 8)) {
-		p_dev->io.BasePort1 = cf->io.win[0].base;
-		p_dev->io.IOAddrLines = cf->io.flags & CISTPL_IO_LINES_MASK;
-		if (!pcmcia_request_io(p_dev, &p_dev->io)) {
+		p_dev->resource[0]->start = cf->io.win[0].base;
+		p_dev->io_lines = cf->io.flags & CISTPL_IO_LINES_MASK;
+		if (!pcmcia_request_io(p_dev)) {
 			*base2 = p_dev->resource[0]->start + 8;
 			return 0;
 		}
@@ -540,10 +541,10 @@ static int multi_config_check_notpicky(struct pcmcia_device *p_dev,
 	int *base2 = priv_data;
 
 	if (cf->io.nwin == 2) {
-		p_dev->io.BasePort1 = cf->io.win[0].base;
-		p_dev->io.BasePort2 = cf->io.win[1].base;
-		p_dev->io.IOAddrLines = cf->io.flags & CISTPL_IO_LINES_MASK;
-		if (!pcmcia_request_io(p_dev, &p_dev->io)) {
+		p_dev->resource[0]->start = cf->io.win[0].base;
+		p_dev->resource[1]->start = cf->io.win[1].base;
+		p_dev->io_lines = cf->io.flags & CISTPL_IO_LINES_MASK;
+		if (!pcmcia_request_io(p_dev)) {
 			*base2 = p_dev->resource[1]->start;
 			return 0;
 		}
@@ -557,10 +558,10 @@ static int multi_config(struct pcmcia_device *link)
 	int i, base2 = 0;
 
 	/* First, look for a generic full-sized window */
-	link->io.NumPorts1 = info->multi * 8;
+	link->resource[0]->end = info->multi * 8;
 	if (pcmcia_loop_config(link, multi_config_check, &base2)) {
 		/* If that didn't work, look for two windows */
-		link->io.NumPorts1 = link->io.NumPorts2 = 8;
+		link->resource[0]->end = link->resource[1]->end = 8;
 		info->multi = 2;
 		if (pcmcia_loop_config(link, multi_config_check_notpicky,
 				       &base2)) {
diff --git a/drivers/staging/comedi/drivers/cb_das16_cs.c b/drivers/staging/comedi/drivers/cb_das16_cs.c
index 208f1b7a131..7cf0ccb5ade 100644
--- a/drivers/staging/comedi/drivers/cb_das16_cs.c
+++ b/drivers/staging/comedi/drivers/cb_das16_cs.c
@@ -736,24 +736,22 @@ static int das16cs_pcmcia_config_loop(struct pcmcia_device *p_dev,
 	p_dev->conf.Attributes |= CONF_ENABLE_IRQ;
 
 	/* IO window settings */
-	p_dev->io.NumPorts1 = p_dev->io.NumPorts2 = 0;
+	p_dev->resource[0]->end = p_dev->resource[1]->end = 0;
 	if ((cfg->io.nwin > 0) || (dflt->io.nwin > 0)) {
 		cistpl_io_t *io = (cfg->io.nwin) ? &cfg->io : &dflt->io;
-		p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO;
-		if (!(io->flags & CISTPL_IO_8BIT))
-			p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_16;
-		if (!(io->flags & CISTPL_IO_16BIT))
-			p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_8;
-		p_dev->io.IOAddrLines = io->flags & CISTPL_IO_LINES_MASK;
-		p_dev->io.BasePort1 = io->win[0].base;
-		p_dev->io.NumPorts1 = io->win[0].len;
+		p_dev->io_lines = io->flags & CISTPL_IO_LINES_MASK;
+		p_dev->resource[0]->flags &= ~IO_DATA_PATH_WIDTH;
+		p_dev->resource[0]->flags |=
+			pcmcia_io_cfg_data_width(io->flags);
+		p_dev->resource[0]->start = io->win[0].base;
+		p_dev->resource[0]->end = io->win[0].len;
 		if (io->nwin > 1) {
-			p_dev->io.Attributes2 = p_dev->io.Attributes1;
-			p_dev->io.BasePort2 = io->win[1].base;
-			p_dev->io.NumPorts2 = io->win[1].len;
+			p_dev->resource[1]->flags = p_dev->resource[0]->flags;
+			p_dev->resource[1]->start = io->win[1].base;
+			p_dev->resource[1]->end = io->win[1].len;
 		}
 		/* This reserves IO space but doesn't actually enable it */
-		return pcmcia_request_io(p_dev, &p_dev->io);
+		return pcmcia_request_io(p_dev);
 	}
 
 	return 0;
diff --git a/drivers/staging/comedi/drivers/das08_cs.c b/drivers/staging/comedi/drivers/das08_cs.c
index c4cfcffc633..9ee677f14b6 100644
--- a/drivers/staging/comedi/drivers/das08_cs.c
+++ b/drivers/staging/comedi/drivers/das08_cs.c
@@ -224,24 +224,23 @@ static int das08_pcmcia_config_loop(struct pcmcia_device *p_dev,
 	p_dev->conf.Attributes |= CONF_ENABLE_IRQ;
 
 	/* IO window settings */
-	p_dev->io.NumPorts1 = p_dev->io.NumPorts2 = 0;
+	p_dev->resource[0]->end = p_dev->resource[1]->end = 0;
 	if ((cfg->io.nwin > 0) || (dflt->io.nwin > 0)) {
 		cistpl_io_t *io = (cfg->io.nwin) ? &cfg->io : &dflt->io;
-		p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO;
-		if (!(io->flags & CISTPL_IO_8BIT))
-			p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_16;
-		if (!(io->flags & CISTPL_IO_16BIT))
-			p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_8;
+		p_dev->io_lines = io->flags & CISTPL_IO_LINES_MASK;
+		p_dev->resource[0]->flags &= ~IO_DATA_PATH_WIDTH;
+		p_dev->resource[0]->flags |=
+			pcmcia_io_cfg_data_width(io->flags);
 		p_dev->io.IOAddrLines = io->flags & CISTPL_IO_LINES_MASK;
-		p_dev->io.BasePort1 = io->win[0].base;
-		p_dev->io.NumPorts1 = io->win[0].len;
+		p_dev->resource[0]->start = io->win[0].base;
+		p_dev->resource[0]->end = io->win[0].len;
 		if (io->nwin > 1) {
-			p_dev->io.Attributes2 = p_dev->io.Attributes1;
-			p_dev->io.BasePort2 = io->win[1].base;
-			p_dev->io.NumPorts2 = io->win[1].len;
+			p_dev->resource[1]->flags = p_dev->resource[0]->flags;
+			p_dev->resource[1]->start = io->win[1].base;
+			p_dev->resource[1]->end = io->win[1].len;
 		}
 		/* This reserves IO space but doesn't actually enable it */
-		return pcmcia_request_io(p_dev, &p_dev->io);
+		return pcmcia_request_io(p_dev);
 	}
 	return 0;
 }
diff --git a/drivers/staging/comedi/drivers/ni_daq_700.c b/drivers/staging/comedi/drivers/ni_daq_700.c
index 6d569579d67..7e41ad93703 100644
--- a/drivers/staging/comedi/drivers/ni_daq_700.c
+++ b/drivers/staging/comedi/drivers/ni_daq_700.c
@@ -571,24 +571,22 @@ static int dio700_pcmcia_config_loop(struct pcmcia_device *p_dev,
 	p_dev->conf.Attributes |= CONF_ENABLE_IRQ;
 
 	/* IO window settings */
-	p_dev->io.NumPorts1 = p_dev->io.NumPorts2 = 0;
+	p_dev->resource[0]->end = p_dev->resource[1]->end = 0;
 	if ((cfg->io.nwin > 0) || (dflt->io.nwin > 0)) {
 		cistpl_io_t *io = (cfg->io.nwin) ? &cfg->io : &dflt->io;
-		p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO;
-		if (!(io->flags & CISTPL_IO_8BIT))
-			p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_16;
-		if (!(io->flags & CISTPL_IO_16BIT))
-			p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_8;
-		p_dev->io.IOAddrLines = io->flags & CISTPL_IO_LINES_MASK;
-		p_dev->io.BasePort1 = io->win[0].base;
-		p_dev->io.NumPorts1 = io->win[0].len;
+		p_dev->io_lines = io->flags & CISTPL_IO_LINES_MASK;
+		p_dev->resource[0]->flags &= ~IO_DATA_PATH_WIDTH;
+		p_dev->resource[0]->flags |=
+			pcmcia_io_cfg_data_width(io->flags);
+		p_dev->resource[0]->start = io->win[0].base;
+		p_dev->resource[0]->end = io->win[0].len;
 		if (io->nwin > 1) {
-			p_dev->io.Attributes2 = p_dev->io.Attributes1;
-			p_dev->io.BasePort2 = io->win[1].base;
-			p_dev->io.NumPorts2 = io->win[1].len;
+			p_dev->resource[1]->flags = p_dev->resource[0]->flags;
+			p_dev->resource[1]->start = io->win[1].base;
+			p_dev->resource[1]->end = io->win[1].len;
 		}
 		/* This reserves IO space but doesn't actually enable it */
-		if (pcmcia_request_io(p_dev, &p_dev->io) != 0)
+		if (pcmcia_request_io(p_dev) != 0)
 			return -ENODEV;
 	}
 
diff --git a/drivers/staging/comedi/drivers/ni_daq_dio24.c b/drivers/staging/comedi/drivers/ni_daq_dio24.c
index 29e1daf6a9e..b2483f86c24 100644
--- a/drivers/staging/comedi/drivers/ni_daq_dio24.c
+++ b/drivers/staging/comedi/drivers/ni_daq_dio24.c
@@ -323,24 +323,22 @@ static int dio24_pcmcia_config_loop(struct pcmcia_device *p_dev,
 	p_dev->conf.Attributes |= CONF_ENABLE_IRQ;
 
 	/* IO window settings */
-	p_dev->io.NumPorts1 = p_dev->io.NumPorts2 = 0;
+	p_dev->resource[0]->end = p_dev->resource[1]->end = 0;
 	if ((cfg->io.nwin > 0) || (dflt->io.nwin > 0)) {
 		cistpl_io_t *io = (cfg->io.nwin) ? &cfg->io : &dflt->io;
-		p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO;
-		if (!(io->flags & CISTPL_IO_8BIT))
-			p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_16;
-		if (!(io->flags & CISTPL_IO_16BIT))
-			p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_8;
-		p_dev->io.IOAddrLines = io->flags & CISTPL_IO_LINES_MASK;
-		p_dev->io.BasePort1 = io->win[0].base;
-		p_dev->io.NumPorts1 = io->win[0].len;
+		p_dev->io_lines = io->flags & CISTPL_IO_LINES_MASK;
+		p_dev->resource[0]->flags &= ~IO_DATA_PATH_WIDTH;
+		p_dev->resource[0]->flags |=
+			pcmcia_io_cfg_data_width(io->flags);
+		p_dev->resource[0]->start = io->win[0].base;
+		p_dev->resource[0]->end = io->win[0].len;
 		if (io->nwin > 1) {
-			p_dev->io.Attributes2 = p_dev->io.Attributes1;
-			p_dev->io.BasePort2 = io->win[1].base;
-			p_dev->io.NumPorts2 = io->win[1].len;
+			p_dev->resource[1]->flags = p_dev->resource[0]->flags;
+			p_dev->resource[1]->start = io->win[1].base;
+			p_dev->resource[1]->end = io->win[1].len;
 		}
 		/* This reserves IO space but doesn't actually enable it */
-		if (pcmcia_request_io(p_dev, &p_dev->io) != 0)
+		if (pcmcia_request_io(p_dev) != 0)
 			return -ENODEV;
 	}
 
diff --git a/drivers/staging/comedi/drivers/ni_labpc_cs.c b/drivers/staging/comedi/drivers/ni_labpc_cs.c
index fb10987a97b..c1444b4a5b4 100644
--- a/drivers/staging/comedi/drivers/ni_labpc_cs.c
+++ b/drivers/staging/comedi/drivers/ni_labpc_cs.c
@@ -301,24 +301,22 @@ static int labpc_pcmcia_config_loop(struct pcmcia_device *p_dev,
 	p_dev->conf.Attributes |= CONF_ENABLE_IRQ | CONF_ENABLE_PULSE_IRQ;
 
 	/* IO window settings */
-	p_dev->io.NumPorts1 = p_dev->io.NumPorts2 = 0;
+	p_dev->resource[0]->end = p_dev->resource[1]->end = 0;
 	if ((cfg->io.nwin > 0) || (dflt->io.nwin > 0)) {
 		cistpl_io_t *io = (cfg->io.nwin) ? &cfg->io : &dflt->io;
-		p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO;
-		if (!(io->flags & CISTPL_IO_8BIT))
-			p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_16;
-		if (!(io->flags & CISTPL_IO_16BIT))
-			p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_8;
-		p_dev->io.IOAddrLines = io->flags & CISTPL_IO_LINES_MASK;
-		p_dev->io.BasePort1 = io->win[0].base;
-		p_dev->io.NumPorts1 = io->win[0].len;
+		p_dev->io_lines = io->flags & CISTPL_IO_LINES_MASK;
+		p_dev->resource[0]->flags &= ~IO_DATA_PATH_WIDTH;
+		p_dev->resource[0]->flags |=
+			pcmcia_io_cfg_data_width(io->flags);
+		p_dev->resource[0]->start = io->win[0].base;
+		p_dev->resource[0]->end = io->win[0].len;
 		if (io->nwin > 1) {
-			p_dev->io.Attributes2 = p_dev->io.Attributes1;
-			p_dev->io.BasePort2 = io->win[1].base;
-			p_dev->io.NumPorts2 = io->win[1].len;
+			p_dev->resource[1]->flags = p_dev->resource[0]->flags;
+			p_dev->resource[1]->start = io->win[1].base;
+			p_dev->resource[1]->end = io->win[1].len;
 		}
 		/* This reserves IO space but doesn't actually enable it */
-		if (pcmcia_request_io(p_dev, &p_dev->io) != 0)
+		if (pcmcia_request_io(p_dev) != 0)
 			return -ENODEV;
 	}
 
diff --git a/drivers/staging/comedi/drivers/ni_mio_cs.c b/drivers/staging/comedi/drivers/ni_mio_cs.c
index f37dc22b4db..d50b6c409fe 100644
--- a/drivers/staging/comedi/drivers/ni_mio_cs.c
+++ b/drivers/staging/comedi/drivers/ni_mio_cs.c
@@ -264,8 +264,8 @@ static const dev_info_t dev_info = "ni_mio_cs";
 
 static int cs_attach(struct pcmcia_device *link)
 {
-	link->io.Attributes1 = IO_DATA_PATH_WIDTH_16;
-	link->io.NumPorts1 = 16;
+	link->resource[0]->flags |= IO_DATA_PATH_WIDTH_16;
+	link->resource[0]->end = 16;
 	link->conf.Attributes = CONF_ENABLE_IRQ;
 	link->conf.IntType = INT_MEMORY_AND_IO;
 
@@ -310,13 +310,12 @@ static int mio_pcmcia_config_loop(struct pcmcia_device *p_dev,
 {
 	int base, ret;
 
-	p_dev->io.NumPorts1 = cfg->io.win[0].len;
-	p_dev->io.IOAddrLines = cfg->io.flags & CISTPL_IO_LINES_MASK;
-	p_dev->io.NumPorts2 = 0;
+	p_dev->resource[0]->end = cfg->io.win[0].len;
+	p_dev->io_lines = cfg->io.flags & CISTPL_IO_LINES_MASK;
 
 	for (base = 0x000; base < 0x400; base += 0x20) {
-		p_dev->io.BasePort1 = base;
-		ret = pcmcia_request_io(p_dev, &p_dev->io);
+		p_dev->resource[0]->start = base;
+		ret = pcmcia_request_io(p_dev);
 		if (!ret)
 			return 0;
 	}
diff --git a/drivers/staging/comedi/drivers/quatech_daqp_cs.c b/drivers/staging/comedi/drivers/quatech_daqp_cs.c
index 80b8d57c684..25f4e67e3e8 100644
--- a/drivers/staging/comedi/drivers/quatech_daqp_cs.c
+++ b/drivers/staging/comedi/drivers/quatech_daqp_cs.c
@@ -1102,26 +1102,24 @@ static int daqp_pcmcia_config_loop(struct pcmcia_device *p_dev,
 	p_dev->conf.Attributes |= CONF_ENABLE_IRQ;
 
 	/* IO window settings */
-	p_dev->io.NumPorts1 = p_dev->io.NumPorts2 = 0;
+	p_dev->resource[0]->end = p_dev->resource[1]->end = 0;
 	if ((cfg->io.nwin > 0) || (dflt->io.nwin > 0)) {
 		cistpl_io_t *io = (cfg->io.nwin) ? &cfg->io : &dflt->io;
-		p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO;
-		if (!(io->flags & CISTPL_IO_8BIT))
-			p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_16;
-		if (!(io->flags & CISTPL_IO_16BIT))
-			p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_8;
-		p_dev->io.IOAddrLines = io->flags & CISTPL_IO_LINES_MASK;
-		p_dev->io.BasePort1 = io->win[0].base;
-		p_dev->io.NumPorts1 = io->win[0].len;
+		p_dev->io_lines = io->flags & CISTPL_IO_LINES_MASK;
+		p_dev->resource[0]->flags &= ~IO_DATA_PATH_WIDTH;
+		p_dev->resource[0]->flags |=
+			pcmcia_io_cfg_data_width(io->flags);
+		p_dev->resource[0]->start = io->win[0].base;
+		p_dev->resource[0]->end = io->win[0].len;
 		if (io->nwin > 1) {
-			p_dev->io.Attributes2 = p_dev->io.Attributes1;
-			p_dev->io.BasePort2 = io->win[1].base;
-			p_dev->io.NumPorts2 = io->win[1].len;
+			p_dev->resource[1]->flags = p_dev->resource[0]->flags;
+			p_dev->resource[1]->start = io->win[1].base;
+			p_dev->resource[1]->end = io->win[1].len;
 		}
 	}
 
 	/* This reserves IO space but doesn't actually enable it */
-	return pcmcia_request_io(p_dev, &p_dev->io);
+	return pcmcia_request_io(p_dev);
 }
 
 static void daqp_cs_config(struct pcmcia_device *link)
diff --git a/drivers/staging/wlags49_h2/wl_cs.c b/drivers/staging/wlags49_h2/wl_cs.c
index 23615378acf..f15afd2050b 100644
--- a/drivers/staging/wlags49_h2/wl_cs.c
+++ b/drivers/staging/wlags49_h2/wl_cs.c
@@ -145,9 +145,8 @@ static int wl_adapter_attach(struct pcmcia_device *link)
 	return -ENOMEM;
     }
 
-    link->io.NumPorts1      = HCF_NUM_IO_PORTS;
-    link->io.Attributes1    = IO_DATA_PATH_WIDTH_16;
-    link->io.IOAddrLines    = 6;
+    link->resource[0]->end      = HCF_NUM_IO_PORTS;
+    link->resource[0]->flags    = IO_DATA_PATH_WIDTH_16;
     link->conf.Attributes   = CONF_ENABLE_IRQ;
     link->conf.IntType      = INT_MEMORY_AND_IO;
     link->conf.ConfigIndex  = 5;
@@ -305,8 +304,9 @@ void wl_adapter_insert( struct pcmcia_device *link )
 
     /* Do we need to allocate an interrupt? */
     link->conf.Attributes |= CONF_ENABLE_IRQ;
+    link->io_lines = 6;
 
-    ret = pcmcia_request_io(link, &link->io);
+    ret = pcmcia_request_io(link);
     if (ret != 0)
         goto failed;
 
diff --git a/drivers/telephony/ixj_pcmcia.c b/drivers/telephony/ixj_pcmcia.c
index a801036392c..a1900e50251 100644
--- a/drivers/telephony/ixj_pcmcia.c
+++ b/drivers/telephony/ixj_pcmcia.c
@@ -32,9 +32,8 @@ static int ixj_probe(struct pcmcia_device *p_dev)
 {
 	dev_dbg(&p_dev->dev, "ixj_attach()\n");
 	/* Create new ixj device */
-	p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_8;
-	p_dev->io.Attributes2 = IO_DATA_PATH_WIDTH_8;
-	p_dev->io.IOAddrLines = 3;
+	p_dev->resource[0]->flags |= IO_DATA_PATH_WIDTH_8;
+	p_dev->resource[1]->flags |= IO_DATA_PATH_WIDTH_8;
 	p_dev->conf.IntType = INT_MEMORY_AND_IO;
 	p_dev->priv = kzalloc(sizeof(struct ixj_info_t), GFP_KERNEL);
 	if (!p_dev->priv) {
@@ -120,13 +119,14 @@ static int ixj_config_check(struct pcmcia_device *p_dev,
 {
 	if ((cfg->io.nwin > 0) || (dflt->io.nwin > 0)) {
 		cistpl_io_t *io = (cfg->io.nwin) ? &cfg->io : &dflt->io;
-		p_dev->io.BasePort1 = io->win[0].base;
-		p_dev->io.NumPorts1 = io->win[0].len;
+		p_dev->resource[0]->start = io->win[0].base;
+		p_dev->resource[0]->end = io->win[0].len;
+		p_dev->io_lines = 3;
 		if (io->nwin == 2) {
-			p_dev->io.BasePort2 = io->win[1].base;
-			p_dev->io.NumPorts2 = io->win[1].len;
+			p_dev->resource[1]->start = io->win[1].base;
+			p_dev->resource[1]->end = io->win[1].len;
 		}
-		if (!pcmcia_request_io(p_dev, &p_dev->io))
+		if (!pcmcia_request_io(p_dev))
 			return 0;
 	}
 	return -ENODEV;
diff --git a/drivers/usb/host/sl811_cs.c b/drivers/usb/host/sl811_cs.c
index 22e04f20630..0e13a00eb2e 100644
--- a/drivers/usb/host/sl811_cs.c
+++ b/drivers/usb/host/sl811_cs.c
@@ -162,16 +162,16 @@ static int sl811_cs_config_check(struct pcmcia_device *p_dev,
 	p_dev->conf.Attributes |= CONF_ENABLE_IRQ;
 
 	/* IO window settings */
-	p_dev->io.NumPorts1 = p_dev->io.NumPorts2 = 0;
+	p_dev->resource[0]->end = p_dev->resource[1]->end = 0;
 	if ((cfg->io.nwin > 0) || (dflt->io.nwin > 0)) {
 		cistpl_io_t *io = (cfg->io.nwin) ? &cfg->io : &dflt->io;
+		p_dev->io_lines = io->flags & CISTPL_IO_LINES_MASK;
 
-		p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_8;
-		p_dev->io.IOAddrLines = io->flags & CISTPL_IO_LINES_MASK;
-		p_dev->io.BasePort1 = io->win[0].base;
-		p_dev->io.NumPorts1 = io->win[0].len;
+		p_dev->resource[0]->flags |= IO_DATA_PATH_WIDTH_8;
+		p_dev->resource[0]->start = io->win[0].base;
+		p_dev->resource[0]->end = io->win[0].len;
 
-		return pcmcia_request_io(p_dev, &p_dev->io);
+		return pcmcia_request_io(p_dev);
 	}
 	pcmcia_disable_device(p_dev);
 	return -ENODEV;
diff --git a/include/pcmcia/cs.h b/include/pcmcia/cs.h
index 0cd8c70d8aa..ad71bb5a865 100644
--- a/include/pcmcia/cs.h
+++ b/include/pcmcia/cs.h
@@ -56,23 +56,6 @@ typedef struct config_req_t {
 #define INT_CARDBUS		0x04
 #define INT_ZOOMED_VIDEO	0x08
 
-/* For RequestIO and ReleaseIO */
-typedef struct io_req_t {
-    u_int	BasePort1;
-    u_int	NumPorts1;
-    u_int	Attributes1;
-    u_int	BasePort2;
-    u_int	NumPorts2;
-    u_int	Attributes2;
-    u_int	IOAddrLines;
-} io_req_t;
-
-/* Attributes for RequestIO and ReleaseIO */
-#define IO_DATA_PATH_WIDTH	0x18
-#define IO_DATA_PATH_WIDTH_8	0x00
-#define IO_DATA_PATH_WIDTH_16	0x08
-#define IO_DATA_PATH_WIDTH_AUTO	0x10
-
 /* Bits in IRQInfo1 field */
 #define IRQ_NMI_ID		0x01
 #define IRQ_IOCK_ID		0x02
diff --git a/include/pcmcia/ds.h b/include/pcmcia/ds.h
index 3dafd7db34d..0748bec0a87 100644
--- a/include/pcmcia/ds.h
+++ b/include/pcmcia/ds.h
@@ -80,7 +80,6 @@ struct pcmcia_device {
 	struct list_head	socket_device_list;
 
 	/* deprecated, will be cleaned up soon */
-	io_req_t		io;
 	config_req_t		conf;
 	window_handle_t		win;
 
@@ -88,6 +87,8 @@ struct pcmcia_device {
 	unsigned int		irq;
 	struct resource		*resource[MAX_IO_WIN];
 
+	unsigned int		io_lines; /* number of I/O lines */
+
 	/* Is the device suspended? */
 	u16			suspended:1;
 
@@ -179,7 +180,7 @@ int pcmcia_read_config_byte(struct pcmcia_device *p_dev, off_t where, u8 *val);
 int pcmcia_write_config_byte(struct pcmcia_device *p_dev, off_t where, u8 val);
 
 /* device configuration */
-int pcmcia_request_io(struct pcmcia_device *p_dev, io_req_t *req);
+int pcmcia_request_io(struct pcmcia_device *p_dev);
 
 int __must_check
 __pcmcia_request_exclusive_irq(struct pcmcia_device *p_dev,
@@ -206,6 +207,22 @@ int pcmcia_map_mem_page(struct pcmcia_device *p_dev, window_handle_t win,
 int pcmcia_modify_configuration(struct pcmcia_device *p_dev, modconf_t *mod);
 void pcmcia_disable_device(struct pcmcia_device *p_dev);
 
+/* IO ports */
+#define IO_DATA_PATH_WIDTH	0x18
+#define IO_DATA_PATH_WIDTH_8	0x00
+#define IO_DATA_PATH_WIDTH_16	0x08
+#define IO_DATA_PATH_WIDTH_AUTO	0x10
+
+/* convert flag found in cfgtable to data path width parameter */
+static inline int pcmcia_io_cfg_data_width(unsigned int flags)
+{
+	if (!(flags & CISTPL_IO_8BIT))
+		return IO_DATA_PATH_WIDTH_16;
+	if (!(flags & CISTPL_IO_16BIT))
+		return IO_DATA_PATH_WIDTH_8;
+	return IO_DATA_PATH_WIDTH_AUTO;
+}
+
 #endif /* __KERNEL__ */
 
 #endif /* _LINUX_DS_H */
diff --git a/sound/pcmcia/pdaudiocf/pdaudiocf.c b/sound/pcmcia/pdaudiocf/pdaudiocf.c
index 9f897bca061..7ab9174a8a8 100644
--- a/sound/pcmcia/pdaudiocf/pdaudiocf.c
+++ b/sound/pcmcia/pdaudiocf/pdaudiocf.c
@@ -139,8 +139,8 @@ static int snd_pdacf_probe(struct pcmcia_device *link)
 	pdacf->p_dev = link;
 	link->priv = pdacf;
 
-	link->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO;
-	link->io.NumPorts1 = 16;
+	link->resource[0]->flags |= IO_DATA_PATH_WIDTH_AUTO;
+	link->resource[0]->end = 16;
 
 	link->conf.Attributes = CONF_ENABLE_IRQ | CONF_ENABLE_PULSE_IRQ;
 	link->conf.IntType = INT_MEMORY_AND_IO;
@@ -219,7 +219,7 @@ static int pdacf_config(struct pcmcia_device *link)
 	snd_printdd(KERN_DEBUG "pdacf_config called\n");
 	link->conf.ConfigIndex = 0x5;
 
-	ret = pcmcia_request_io(link, &link->io);
+	ret = pcmcia_request_io(link);
 	if (ret)
 		goto failed;
 
diff --git a/sound/pcmcia/vx/vxpocket.c b/sound/pcmcia/vx/vxpocket.c
index f23c235013a..a6edfc3be29 100644
--- a/sound/pcmcia/vx/vxpocket.c
+++ b/sound/pcmcia/vx/vxpocket.c
@@ -159,8 +159,8 @@ static int snd_vxpocket_new(struct snd_card *card, int ibl,
 	vxp->p_dev = link;
 	link->priv = chip;
 
-	link->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO;
-	link->io.NumPorts1 = 16;
+	link->resource[0]->flags |= IO_DATA_PATH_WIDTH_AUTO;
+	link->resource[0]->end = 16;
 
 	link->conf.Attributes = CONF_ENABLE_IRQ;
 	link->conf.IntType = INT_MEMORY_AND_IO;
@@ -226,7 +226,7 @@ static int vxpocket_config(struct pcmcia_device *link)
 		strcpy(chip->card->driver, vxp440_hw.name);
 	}
 
-	ret = pcmcia_request_io(link, &link->io);
+	ret = pcmcia_request_io(link);
 	if (ret)
 		goto failed;
 
-- 
cgit v1.2.3-70-g09d2


From a3d0d4d8dd45779b6e174a8567ffb9b485e472af Mon Sep 17 00:00:00 2001
From: Dominik Brodowski <linux@dominikbrodowski.net>
Date: Sat, 24 Jul 2010 17:43:10 +0200
Subject: pcmcia: move local definitions out of include/pcmcia/cs.h

Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
---
 drivers/pcmcia/cistpl.c      |  3 +++
 drivers/pcmcia/cs_internal.h |  3 +++
 include/pcmcia/cs.h          | 19 -------------------
 3 files changed, 6 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/drivers/pcmcia/cistpl.c b/drivers/pcmcia/cistpl.c
index 1733fab469a..91414a0ddc4 100644
--- a/drivers/pcmcia/cistpl.c
+++ b/drivers/pcmcia/cistpl.c
@@ -53,6 +53,9 @@ static const u_int exponent[] = {
 /* Upper limit on reasonable # of tuples */
 #define MAX_TUPLES		200
 
+/* Bits in IRQInfo1 field */
+#define IRQ_INFO2_VALID		0x10
+
 /* 16-bit CIS? */
 static int cis_width;
 module_param(cis_width, int, 0444);
diff --git a/drivers/pcmcia/cs_internal.h b/drivers/pcmcia/cs_internal.h
index a85558fc71f..511ac753b9d 100644
--- a/drivers/pcmcia/cs_internal.h
+++ b/drivers/pcmcia/cs_internal.h
@@ -26,6 +26,9 @@
 /* Flags in client state */
 #define CLIENT_WIN_REQ(i)	(0x1<<(i))
 
+/* Flag to access all functions */
+#define BIND_FN_ALL	0xff
+
 /* Each card function gets one of these guys */
 typedef struct config_t {
 	struct kref	ref;
diff --git a/include/pcmcia/cs.h b/include/pcmcia/cs.h
index ad71bb5a865..583a4e33039 100644
--- a/include/pcmcia/cs.h
+++ b/include/pcmcia/cs.h
@@ -56,16 +56,6 @@ typedef struct config_req_t {
 #define INT_CARDBUS		0x04
 #define INT_ZOOMED_VIDEO	0x08
 
-/* Bits in IRQInfo1 field */
-#define IRQ_NMI_ID		0x01
-#define IRQ_IOCK_ID		0x02
-#define IRQ_BERR_ID		0x04
-#define IRQ_VEND_ID		0x08
-#define IRQ_INFO2_VALID		0x10
-#define IRQ_LEVEL_ID		0x20
-#define IRQ_PULSE_ID		0x40
-#define IRQ_SHARE_ID		0x80
-
 /* Configuration registers present */
 #define PRESENT_OPTION		0x001
 #define PRESENT_STATUS		0x002
@@ -84,12 +74,6 @@ typedef struct memreq_t {
     u_short	Page;
 } memreq_t;
 
-/* For ModifyWindow */
-typedef struct modwin_t {
-    u_int	Attributes;
-    u_int	AccessSpeed;
-} modwin_t;
-
 /* For RequestWindow */
 typedef struct win_req_t {
     u_int	Attributes;
@@ -121,7 +105,4 @@ typedef struct win_req_t {
 #define WIN_BAR_MASK		0xe000
 #define WIN_BAR_SHIFT		13
 
-/* Flag to bind to all functions */
-#define BIND_FN_ALL	0xff
-
 #endif /* _LINUX_CS_H */
-- 
cgit v1.2.3-70-g09d2


From b5cb259e7fac5536c4ddf350af6a3d6cc950e47e Mon Sep 17 00:00:00 2001
From: Dominik Brodowski <linux@dominikbrodowski.net>
Date: Sat, 24 Jul 2010 18:46:42 +0200
Subject: pcmcia: remove memreq_t

Page already had to be set to 0; Offset can easily be passed as
parameter to pcmcia_map_mem_page.

CC: netdev@vger.kernel.org
CC: linux-wireless@vger.kernel.org
CC: linux-ide@vger.kernel.org
CC: linux-usb@vger.kernel.org
CC: laforge@gnumonks.org
CC: linux-mtd@lists.infradead.org
CC: linux-bluetooth@vger.kernel.org
CC: alsa-devel@alsa-project.org
CC: linux-serial@vger.kernel.org
CC: Michael Buesch <mb@bu3sch.de>
Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
---
 drivers/char/pcmcia/ipwireless/main.c         | 14 ++------------
 drivers/isdn/hisax/sedlbauer_cs.c             |  7 +++----
 drivers/mtd/maps/pcmciamtd.c                  | 13 ++++++-------
 drivers/net/pcmcia/fmvj18x_cs.c               | 10 ++--------
 drivers/net/pcmcia/ibmtr_cs.c                 | 11 +++--------
 drivers/net/pcmcia/pcnet_cs.c                 | 14 ++++----------
 drivers/net/pcmcia/smc91c92_cs.c              |  9 +++------
 drivers/net/pcmcia/xirc2ps_cs.c               |  5 +----
 drivers/net/wireless/airo_cs.c                |  6 ++----
 drivers/net/wireless/b43/pcmcia.c             |  5 +----
 drivers/net/wireless/ray_cs.c                 | 13 +++----------
 drivers/pcmcia/pcmcia_resource.c              |  9 +++------
 drivers/scsi/pcmcia/nsp_cs.c                  |  5 ++---
 drivers/staging/comedi/drivers/ni_daq_700.c   |  6 ++----
 drivers/staging/comedi/drivers/ni_daq_dio24.c |  6 ++----
 drivers/staging/comedi/drivers/ni_labpc_cs.c  |  6 ++----
 include/pcmcia/cs.h                           |  6 ------
 include/pcmcia/ds.h                           |  2 +-
 18 files changed, 42 insertions(+), 105 deletions(-)

(limited to 'include')

diff --git a/drivers/char/pcmcia/ipwireless/main.c b/drivers/char/pcmcia/ipwireless/main.c
index 5f87b9f7b6d..6c4aa4b0be9 100644
--- a/drivers/char/pcmcia/ipwireless/main.c
+++ b/drivers/char/pcmcia/ipwireless/main.c
@@ -84,8 +84,6 @@ static int ipwireless_probe(struct pcmcia_device *p_dev,
 {
 	struct ipw_dev *ipw = priv_data;
 	struct resource *io_resource;
-	memreq_t memreq_attr_memory;
-	memreq_t memreq_common_memory;
 	int ret;
 
 	p_dev->resource[0]->flags |= IO_DATA_PATH_WIDTH_AUTO;
@@ -121,11 +119,8 @@ static int ipwireless_probe(struct pcmcia_device *p_dev,
 	if (ret != 0)
 		goto exit1;
 
-	memreq_common_memory.CardOffset = cfg->mem.win[0].card_addr;
-	memreq_common_memory.Page = 0;
-
 	ret = pcmcia_map_mem_page(p_dev, ipw->handle_common_memory,
-				&memreq_common_memory);
+				cfg->mem.win[0].card_addr);
 
 	if (ret != 0)
 		goto exit2;
@@ -150,12 +145,7 @@ static int ipwireless_probe(struct pcmcia_device *p_dev,
 	if (ret != 0)
 		goto exit2;
 
-	memreq_attr_memory.CardOffset = 0;
-	memreq_attr_memory.Page = 0;
-
-	ret = pcmcia_map_mem_page(p_dev, ipw->handle_attr_memory,
-				&memreq_attr_memory);
-
+	ret = pcmcia_map_mem_page(p_dev, ipw->handle_attr_memory, 0);
 	if (ret != 0)
 		goto exit3;
 
diff --git a/drivers/isdn/hisax/sedlbauer_cs.c b/drivers/isdn/hisax/sedlbauer_cs.c
index 4755eb440f7..0b06dbb2d52 100644
--- a/drivers/isdn/hisax/sedlbauer_cs.c
+++ b/drivers/isdn/hisax/sedlbauer_cs.c
@@ -232,7 +232,6 @@ static int sedlbauer_config_check(struct pcmcia_device *p_dev,
 	*/
 	if ((cfg->mem.nwin > 0) || (dflt->mem.nwin > 0)) {
 		cistpl_mem_t *mem = (cfg->mem.nwin) ? &cfg->mem : &dflt->mem;
-		memreq_t map;
 		req->Attributes = WIN_DATA_WIDTH_16|WIN_MEMORY_TYPE_CM;
 		req->Attributes |= WIN_ENABLE;
 		req->Base = mem->win[0].host_addr;
@@ -240,9 +239,9 @@ static int sedlbauer_config_check(struct pcmcia_device *p_dev,
 		req->AccessSpeed = 0;
 		if (pcmcia_request_window(p_dev, req, &p_dev->win) != 0)
 			return -ENODEV;
-		map.Page = 0;
-		map.CardOffset = mem->win[0].card_addr;
-		if (pcmcia_map_mem_page(p_dev, p_dev->win, &map) != 0)
+
+		if (pcmcia_map_mem_page(p_dev, p_dev->win,
+						mem->win[0].card_addr) != 0)
 			return -ENODEV;
 	}
 	return 0;
diff --git a/drivers/mtd/maps/pcmciamtd.c b/drivers/mtd/maps/pcmciamtd.c
index 79488164e43..f97463ecfc5 100644
--- a/drivers/mtd/maps/pcmciamtd.c
+++ b/drivers/mtd/maps/pcmciamtd.c
@@ -102,7 +102,7 @@ static caddr_t remap_window(struct map_info *map, unsigned long to)
 {
 	struct pcmciamtd_dev *dev = (struct pcmciamtd_dev *)map->map_priv_1;
 	window_handle_t win = (window_handle_t)map->map_priv_2;
-	memreq_t mrq;
+	unsigned int offset;
 	int ret;
 
 	if (!pcmcia_dev_present(dev->p_dev)) {
@@ -110,15 +110,14 @@ static caddr_t remap_window(struct map_info *map, unsigned long to)
 		return 0;
 	}
 
-	mrq.CardOffset = to & ~(dev->win_size-1);
-	if(mrq.CardOffset != dev->offset) {
+	offset = to & ~(dev->win_size-1);
+	if (offset != dev->offset) {
 		DEBUG(2, "Remapping window from 0x%8.8x to 0x%8.8x",
-		      dev->offset, mrq.CardOffset);
-		mrq.Page = 0;
-		ret = pcmcia_map_mem_page(dev->p_dev, win, &mrq);
+		      dev->offset, offset);
+		ret = pcmcia_map_mem_page(dev->p_dev, win, offset);
 		if (ret != 0)
 			return NULL;
-		dev->offset = mrq.CardOffset;
+		dev->offset = offset;
 	}
 	return dev->win_base + (to & (dev->win_size-1));
 }
diff --git a/drivers/net/pcmcia/fmvj18x_cs.c b/drivers/net/pcmcia/fmvj18x_cs.c
index 699304480ae..98fffb03ecd 100644
--- a/drivers/net/pcmcia/fmvj18x_cs.c
+++ b/drivers/net/pcmcia/fmvj18x_cs.c
@@ -545,7 +545,6 @@ failed:
 static int fmvj18x_get_hwinfo(struct pcmcia_device *link, u_char *node_id)
 {
     win_req_t req;
-    memreq_t mem;
     u_char __iomem *base;
     int i, j;
 
@@ -558,9 +557,7 @@ static int fmvj18x_get_hwinfo(struct pcmcia_device *link, u_char *node_id)
 	return -1;
 
     base = ioremap(req.Base, req.Size);
-    mem.Page = 0;
-    mem.CardOffset = 0;
-    pcmcia_map_mem_page(link, link->win, &mem);
+    pcmcia_map_mem_page(link, link->win, 0);
 
     /*
      *  MBH10304 CISTPL_FUNCE_LAN_NODE_ID format
@@ -594,7 +591,6 @@ static int fmvj18x_get_hwinfo(struct pcmcia_device *link, u_char *node_id)
 static int fmvj18x_setup_mfc(struct pcmcia_device *link)
 {
     win_req_t req;
-    memreq_t mem;
     int i;
     struct net_device *dev = link->priv;
     unsigned int ioaddr;
@@ -614,9 +610,7 @@ static int fmvj18x_setup_mfc(struct pcmcia_device *link)
 	return -1;
     }
 
-    mem.Page = 0;
-    mem.CardOffset = 0;
-    i = pcmcia_map_mem_page(link, link->win, &mem);
+    i = pcmcia_map_mem_page(link, link->win, 0);
     if (i != 0) {
 	iounmap(lp->base);
 	lp->base = NULL;
diff --git a/drivers/net/pcmcia/ibmtr_cs.c b/drivers/net/pcmcia/ibmtr_cs.c
index 3fd859570db..c0b3cdd49c6 100644
--- a/drivers/net/pcmcia/ibmtr_cs.c
+++ b/drivers/net/pcmcia/ibmtr_cs.c
@@ -211,7 +211,6 @@ static int __devinit ibmtr_config(struct pcmcia_device *link)
     struct net_device *dev = info->dev;
     struct tok_info *ti = netdev_priv(dev);
     win_req_t req;
-    memreq_t mem;
     int i, ret;
 
     dev_dbg(&link->dev, "ibmtr_config\n");
@@ -250,9 +249,7 @@ static int __devinit ibmtr_config(struct pcmcia_device *link)
     if (ret)
 	    goto failed;
 
-    mem.CardOffset = mmiobase;
-    mem.Page = 0;
-    ret = pcmcia_map_mem_page(link, link->win, &mem);
+    ret = pcmcia_map_mem_page(link, link->win, mmiobase);
     if (ret)
 	    goto failed;
     ti->mmio = ioremap(req.Base, req.Size);
@@ -267,13 +264,11 @@ static int __devinit ibmtr_config(struct pcmcia_device *link)
     if (ret)
 	    goto failed;
 
-    mem.CardOffset = srambase;
-    mem.Page = 0;
-    ret = pcmcia_map_mem_page(link, info->sram_win_handle, &mem);
+    ret = pcmcia_map_mem_page(link, info->sram_win_handle, srambase);
     if (ret)
 	    goto failed;
 
-    ti->sram_base = mem.CardOffset >> 12;
+    ti->sram_base = srambase >> 12;
     ti->sram_virt = ioremap(req.Base, req.Size);
     ti->sram_phys = req.Base;
 
diff --git a/drivers/net/pcmcia/pcnet_cs.c b/drivers/net/pcmcia/pcnet_cs.c
index 9c5fc9dfc55..c3edfe4c265 100644
--- a/drivers/net/pcmcia/pcnet_cs.c
+++ b/drivers/net/pcmcia/pcnet_cs.c
@@ -301,7 +301,6 @@ static hw_info_t *get_hwinfo(struct pcmcia_device *link)
 {
     struct net_device *dev = link->priv;
     win_req_t req;
-    memreq_t mem;
     u_char __iomem *base, *virt;
     int i, j;
 
@@ -314,10 +313,8 @@ static hw_info_t *get_hwinfo(struct pcmcia_device *link)
 	return NULL;
 
     virt = ioremap(req.Base, req.Size);
-    mem.Page = 0;
     for (i = 0; i < NR_INFO; i++) {
-	mem.CardOffset = hw_info[i].offset & ~(req.Size-1);
-	pcmcia_map_mem_page(link, link->win, &mem);
+	pcmcia_map_mem_page(link, link->win, hw_info[i].offset & ~(req.Size-1));
 	base = &virt[hw_info[i].offset & (req.Size-1)];
 	if ((readb(base+0) == hw_info[i].a0) &&
 	    (readb(base+2) == hw_info[i].a1) &&
@@ -1463,7 +1460,6 @@ static int setup_shmem_window(struct pcmcia_device *link, int start_pg,
     struct net_device *dev = link->priv;
     pcnet_dev_t *info = PRIV(dev);
     win_req_t req;
-    memreq_t mem;
     int i, window_size, offset, ret;
 
     window_size = (stop_pg - start_pg) << 8;
@@ -1482,11 +1478,9 @@ static int setup_shmem_window(struct pcmcia_device *link, int start_pg,
     if (ret)
 	    goto failed;
 
-    mem.CardOffset = (start_pg << 8) + cm_offset;
-    offset = mem.CardOffset % window_size;
-    mem.CardOffset -= offset;
-    mem.Page = 0;
-    ret = pcmcia_map_mem_page(link, link->win, &mem);
+    offset = (start_pg << 8) + cm_offset;
+    offset -= offset % window_size;
+    ret = pcmcia_map_mem_page(link, link->win, offset);
     if (ret)
 	    goto failed;
 
diff --git a/drivers/net/pcmcia/smc91c92_cs.c b/drivers/net/pcmcia/smc91c92_cs.c
index a5e47796f6a..377367d03b4 100644
--- a/drivers/net/pcmcia/smc91c92_cs.c
+++ b/drivers/net/pcmcia/smc91c92_cs.c
@@ -443,7 +443,7 @@ static int mhz_mfc_config(struct pcmcia_device *link)
     struct net_device *dev = link->priv;
     struct smc_private *smc = netdev_priv(dev);
     win_req_t req;
-    memreq_t mem;
+    unsigned int offset;
     int i;
 
     link->conf.Attributes |= CONF_ENABLE_SPKR;
@@ -467,11 +467,8 @@ static int mhz_mfc_config(struct pcmcia_device *link)
 	    return -ENODEV;
 
     smc->base = ioremap(req.Base, req.Size);
-    mem.CardOffset = mem.Page = 0;
-    if (smc->manfid == MANFID_MOTOROLA)
-	mem.CardOffset = link->conf.ConfigBase;
-    i = pcmcia_map_mem_page(link, link->win, &mem);
-
+    offset = (smc->manfid == MANFID_MOTOROLA) ? link->conf.ConfigBase : 0;
+    i = pcmcia_map_mem_page(link, link->win, offset);
     if ((i == 0) &&
 	(smc->manfid == MANFID_MEGAHERTZ) &&
 	(smc->cardid == PRODID_MEGAHERTZ_EM3288))
diff --git a/drivers/net/pcmcia/xirc2ps_cs.c b/drivers/net/pcmcia/xirc2ps_cs.c
index 8fb0eb1dc34..4eb6f986703 100644
--- a/drivers/net/pcmcia/xirc2ps_cs.c
+++ b/drivers/net/pcmcia/xirc2ps_cs.c
@@ -870,7 +870,6 @@ xirc2ps_config(struct pcmcia_device * link)
 
     if (local->dingo) {
 	win_req_t req;
-	memreq_t mem;
 
 	/* Reset the modem's BAR to the correct value
 	 * This is necessary because in the RequestConfiguration call,
@@ -898,9 +897,7 @@ xirc2ps_config(struct pcmcia_device * link)
 	    goto config_error;
 
 	local->dingo_ccr = ioremap(req.Base,0x1000) + 0x0800;
-	mem.CardOffset = 0x0;
-	mem.Page = 0;
-	if ((err = pcmcia_map_mem_page(link, link->win, &mem)))
+	if ((err = pcmcia_map_mem_page(link, link->win, 0)))
 	    goto config_error;
 
 	/* Setup the CCRs; there are no infos in the CIS about the Ethernet
diff --git a/drivers/net/wireless/airo_cs.c b/drivers/net/wireless/airo_cs.c
index d241b4aed71..d47672cb419 100644
--- a/drivers/net/wireless/airo_cs.c
+++ b/drivers/net/wireless/airo_cs.c
@@ -207,16 +207,14 @@ static int airo_cs_config_check(struct pcmcia_device *p_dev,
 	*/
 	if ((cfg->mem.nwin > 0) || (dflt->mem.nwin > 0)) {
 		cistpl_mem_t *mem = (cfg->mem.nwin) ? &cfg->mem : &dflt->mem;
-		memreq_t map;
 		req->Attributes = WIN_DATA_WIDTH_16|WIN_MEMORY_TYPE_CM;
 		req->Base = mem->win[0].host_addr;
 		req->Size = mem->win[0].len;
 		req->AccessSpeed = 0;
 		if (pcmcia_request_window(p_dev, req, &p_dev->win) != 0)
 			return -ENODEV;
-		map.Page = 0;
-		map.CardOffset = mem->win[0].card_addr;
-		if (pcmcia_map_mem_page(p_dev, p_dev->win, &map) != 0)
+		if (pcmcia_map_mem_page(p_dev, p_dev->win,
+						mem->win[0].card_addr) != 0)
 			return -ENODEV;
 	}
 	/* If we got this far, we're cool! */
diff --git a/drivers/net/wireless/b43/pcmcia.c b/drivers/net/wireless/b43/pcmcia.c
index 7c9af82fcf7..ffe1f89d5f7 100644
--- a/drivers/net/wireless/b43/pcmcia.c
+++ b/drivers/net/wireless/b43/pcmcia.c
@@ -64,7 +64,6 @@ static int __devinit b43_pcmcia_probe(struct pcmcia_device *dev)
 {
 	struct ssb_bus *ssb;
 	win_req_t win;
-	memreq_t mem;
 	int err = -ENOMEM;
 	int res = 0;
 
@@ -87,9 +86,7 @@ static int __devinit b43_pcmcia_probe(struct pcmcia_device *dev)
 	if (res != 0)
 		goto err_kfree_ssb;
 
-	mem.CardOffset = 0;
-	mem.Page = 0;
-	res = pcmcia_map_mem_page(dev, dev->win, &mem);
+	res = pcmcia_map_mem_page(dev, dev->win, 0);
 	if (res != 0)
 		goto err_disable;
 
diff --git a/drivers/net/wireless/ray_cs.c b/drivers/net/wireless/ray_cs.c
index b83d5ef1dff..7eb339af351 100644
--- a/drivers/net/wireless/ray_cs.c
+++ b/drivers/net/wireless/ray_cs.c
@@ -393,7 +393,6 @@ static int ray_config(struct pcmcia_device *link)
 	int ret = 0;
 	int i;
 	win_req_t req;
-	memreq_t mem;
 	struct net_device *dev = (struct net_device *)link->priv;
 	ray_dev_t *local = netdev_priv(dev);
 
@@ -430,9 +429,7 @@ static int ray_config(struct pcmcia_device *link)
 	ret = pcmcia_request_window(link, &req, &link->win);
 	if (ret)
 		goto failed;
-	mem.CardOffset = 0x0000;
-	mem.Page = 0;
-	ret = pcmcia_map_mem_page(link, link->win, &mem);
+	ret = pcmcia_map_mem_page(link, link->win, 0);
 	if (ret)
 		goto failed;
 	local->sram = ioremap(req.Base, req.Size);
@@ -446,9 +443,7 @@ static int ray_config(struct pcmcia_device *link)
 	ret = pcmcia_request_window(link, &req, &local->rmem_handle);
 	if (ret)
 		goto failed;
-	mem.CardOffset = 0x8000;
-	mem.Page = 0;
-	ret = pcmcia_map_mem_page(link, local->rmem_handle, &mem);
+	ret = pcmcia_map_mem_page(link, local->rmem_handle, 0x8000);
 	if (ret)
 		goto failed;
 	local->rmem = ioremap(req.Base, req.Size);
@@ -462,9 +457,7 @@ static int ray_config(struct pcmcia_device *link)
 	ret = pcmcia_request_window(link, &req, &local->amem_handle);
 	if (ret)
 		goto failed;
-	mem.CardOffset = 0x0000;
-	mem.Page = 0;
-	ret = pcmcia_map_mem_page(link, local->amem_handle, &mem);
+	ret = pcmcia_map_mem_page(link, local->amem_handle, 0);
 	if (ret)
 		goto failed;
 	local->amem = ioremap(req.Base, req.Size);
diff --git a/drivers/pcmcia/pcmcia_resource.c b/drivers/pcmcia/pcmcia_resource.c
index a48d4a91d44..975baaa8168 100644
--- a/drivers/pcmcia/pcmcia_resource.c
+++ b/drivers/pcmcia/pcmcia_resource.c
@@ -193,7 +193,7 @@ EXPORT_SYMBOL(pcmcia_write_config_byte);
 
 
 int pcmcia_map_mem_page(struct pcmcia_device *p_dev, window_handle_t wh,
-			memreq_t *req)
+			unsigned int offset)
 {
 	struct pcmcia_socket *s = p_dev->socket;
 	int ret;
@@ -201,12 +201,9 @@ int pcmcia_map_mem_page(struct pcmcia_device *p_dev, window_handle_t wh,
 	wh--;
 	if (wh >= MAX_WIN)
 		return -EINVAL;
-	if (req->Page != 0) {
-		dev_dbg(&s->dev, "failure: requested page is zero\n");
-		return -EINVAL;
-	}
+
 	mutex_lock(&s->ops_mutex);
-	s->win[wh].card_start = req->CardOffset;
+	s->win[wh].card_start = offset;
 	ret = s->ops->set_mem_map(s, &s->win[wh]);
 	if (ret)
 		dev_warn(&s->dev, "failed to set_mem_map\n");
diff --git a/drivers/scsi/pcmcia/nsp_cs.c b/drivers/scsi/pcmcia/nsp_cs.c
index 8bb598bb440..dd9b40306f3 100644
--- a/drivers/scsi/pcmcia/nsp_cs.c
+++ b/drivers/scsi/pcmcia/nsp_cs.c
@@ -1661,7 +1661,6 @@ static int nsp_cs_config_check(struct pcmcia_device *p_dev,
 		}
 
 		if ((cfg->mem.nwin > 0) || (dflt->mem.nwin > 0)) {
-			memreq_t	map;
 			cistpl_mem_t	*mem =
 				(cfg->mem.nwin) ? &cfg->mem : &dflt->mem;
 			cfg_mem->req.Attributes = WIN_DATA_WIDTH_16|WIN_MEMORY_TYPE_CM;
@@ -1673,8 +1672,8 @@ static int nsp_cs_config_check(struct pcmcia_device *p_dev,
 			cfg_mem->req.AccessSpeed = 0;
 			if (pcmcia_request_window(p_dev, &cfg_mem->req, &p_dev->win) != 0)
 				goto next_entry;
-			map.Page = 0; map.CardOffset = mem->win[0].card_addr;
-			if (pcmcia_map_mem_page(p_dev, p_dev->win, &map) != 0)
+			if (pcmcia_map_mem_page(p_dev, p_dev->win,
+					mem->win[0].card_addr) != 0)
 				goto next_entry;
 
 			cfg_mem->data->MmioAddress = (unsigned long) ioremap_nocache(cfg_mem->req.Base, cfg_mem->req.Size);
diff --git a/drivers/staging/comedi/drivers/ni_daq_700.c b/drivers/staging/comedi/drivers/ni_daq_700.c
index 7e41ad93703..abaa40b8be7 100644
--- a/drivers/staging/comedi/drivers/ni_daq_700.c
+++ b/drivers/staging/comedi/drivers/ni_daq_700.c
@@ -556,7 +556,6 @@ static int dio700_pcmcia_config_loop(struct pcmcia_device *p_dev,
 				void *priv_data)
 {
 	win_req_t *req = priv_data;
-	memreq_t map;
 
 	if (cfg->index == 0)
 		return -ENODEV;
@@ -602,9 +601,8 @@ static int dio700_pcmcia_config_loop(struct pcmcia_device *p_dev,
 		req->AccessSpeed = 0;
 		if (pcmcia_request_window(p_dev, req, &p_dev->win))
 			return -ENODEV;
-		map.Page = 0;
-		map.CardOffset = mem->win[0].card_addr;
-		if (pcmcia_map_mem_page(p_dev, p_dev->win, &map))
+		if (pcmcia_map_mem_page(p_dev, p_dev->win,
+				mem->win[0].card_addr))
 			return -ENODEV;
 	}
 	/* If we got this far, we're cool! */
diff --git a/drivers/staging/comedi/drivers/ni_daq_dio24.c b/drivers/staging/comedi/drivers/ni_daq_dio24.c
index b2483f86c24..caccece2085 100644
--- a/drivers/staging/comedi/drivers/ni_daq_dio24.c
+++ b/drivers/staging/comedi/drivers/ni_daq_dio24.c
@@ -308,7 +308,6 @@ static int dio24_pcmcia_config_loop(struct pcmcia_device *p_dev,
 				void *priv_data)
 {
 	win_req_t *req = priv_data;
-	memreq_t map;
 
 	if (cfg->index == 0)
 		return -ENODEV;
@@ -354,9 +353,8 @@ static int dio24_pcmcia_config_loop(struct pcmcia_device *p_dev,
 		req->AccessSpeed = 0;
 		if (pcmcia_request_window(p_dev, req, &p_dev->win))
 			return -ENODEV;
-		map.Page = 0;
-		map.CardOffset = mem->win[0].card_addr;
-		if (pcmcia_map_mem_page(p_dev, p_dev->win, &map))
+		if (pcmcia_map_mem_page(p_dev, p_dev->win,
+						mem->win[0].card_addr))
 			return -ENODEV;
 	}
 	/* If we got this far, we're cool! */
diff --git a/drivers/staging/comedi/drivers/ni_labpc_cs.c b/drivers/staging/comedi/drivers/ni_labpc_cs.c
index c1444b4a5b4..94d9f7fe6f2 100644
--- a/drivers/staging/comedi/drivers/ni_labpc_cs.c
+++ b/drivers/staging/comedi/drivers/ni_labpc_cs.c
@@ -286,7 +286,6 @@ static int labpc_pcmcia_config_loop(struct pcmcia_device *p_dev,
 				void *priv_data)
 {
 	win_req_t *req = priv_data;
-	memreq_t map;
 
 	if (cfg->index == 0)
 		return -ENODEV;
@@ -332,9 +331,8 @@ static int labpc_pcmcia_config_loop(struct pcmcia_device *p_dev,
 		req->AccessSpeed = 0;
 		if (pcmcia_request_window(p_dev, req, &p_dev->win))
 			return -ENODEV;
-		map.Page = 0;
-		map.CardOffset = mem->win[0].card_addr;
-		if (pcmcia_map_mem_page(p_dev, p_dev->win, &map))
+		if (pcmcia_map_mem_page(p_dev, p_dev->win,
+						mem->win[0].card_addr))
 			return -ENODEV;
 	}
 	/* If we got this far, we're cool! */
diff --git a/include/pcmcia/cs.h b/include/pcmcia/cs.h
index 583a4e33039..e4faf4420f2 100644
--- a/include/pcmcia/cs.h
+++ b/include/pcmcia/cs.h
@@ -68,12 +68,6 @@ typedef struct config_req_t {
 #define PRESENT_IOBASE_3	0x100
 #define PRESENT_IOSIZE		0x200
 
-/* For GetMemPage, MapMemPage */
-typedef struct memreq_t {
-    u_int	CardOffset;
-    u_short	Page;
-} memreq_t;
-
 /* For RequestWindow */
 typedef struct win_req_t {
     u_int	Attributes;
diff --git a/include/pcmcia/ds.h b/include/pcmcia/ds.h
index 0748bec0a87..a2bf3a702c0 100644
--- a/include/pcmcia/ds.h
+++ b/include/pcmcia/ds.h
@@ -202,7 +202,7 @@ int pcmcia_request_window(struct pcmcia_device *p_dev, win_req_t *req,
 			  window_handle_t *wh);
 int pcmcia_release_window(struct pcmcia_device *p_dev, window_handle_t win);
 int pcmcia_map_mem_page(struct pcmcia_device *p_dev, window_handle_t win,
-			memreq_t *req);
+			unsigned int offset);
 
 int pcmcia_modify_configuration(struct pcmcia_device *p_dev, modconf_t *mod);
 void pcmcia_disable_device(struct pcmcia_device *p_dev);
-- 
cgit v1.2.3-70-g09d2


From 0ca724d37af370dbf2d55dc4d6359ead558e5756 Mon Sep 17 00:00:00 2001
From: Dominik Brodowski <linux@dominikbrodowski.net>
Date: Sat, 24 Jul 2010 19:03:02 +0200
Subject: pcmcia: use struct resource for PCMCIA devices, part 2

Use struct resource * also for iomem resources.

CC: linux-mtd@lists.infradead.org
CC: netdev@vger.kernel.org
CC: linux-wireless@vger.kernel.org
CC: Jiri Kosina <jkosina@suse.cz>
Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
---
 drivers/char/pcmcia/ipwireless/main.c | 13 +------
 drivers/mtd/maps/pcmciamtd.c          |  1 -
 drivers/net/pcmcia/ibmtr_cs.c         |  1 -
 drivers/net/wireless/b43/pcmcia.c     |  3 +-
 drivers/net/wireless/ray_cs.c         |  8 ----
 drivers/pcmcia/cs_internal.h          |  1 +
 drivers/pcmcia/ds.c                   |  6 +++
 drivers/pcmcia/pcmcia_resource.c      | 70 +++++++++++++++++++----------------
 include/pcmcia/cs.h                   | 35 +++++++-----------
 include/pcmcia/ds.h                   | 15 +++++++-
 10 files changed, 75 insertions(+), 78 deletions(-)

(limited to 'include')

diff --git a/drivers/char/pcmcia/ipwireless/main.c b/drivers/char/pcmcia/ipwireless/main.c
index 6c4aa4b0be9..67bdb05798b 100644
--- a/drivers/char/pcmcia/ipwireless/main.c
+++ b/drivers/char/pcmcia/ipwireless/main.c
@@ -157,15 +157,12 @@ static int ipwireless_probe(struct pcmcia_device *p_dev,
 	return 0;
 
 exit3:
-	pcmcia_release_window(p_dev, ipw->handle_attr_memory);
 exit2:
 	if (ipw->common_memory) {
 		release_mem_region(ipw->request_common_memory.Base,
 				ipw->request_common_memory.Size);
 		iounmap(ipw->common_memory);
-		pcmcia_release_window(p_dev, ipw->handle_common_memory);
-	} else
-		pcmcia_release_window(p_dev, ipw->handle_common_memory);
+	}
 exit1:
 	release_resource(io_resource);
 	pcmcia_disable_device(p_dev);
@@ -238,13 +235,12 @@ exit:
 		release_mem_region(ipw->request_attr_memory.Base,
 				ipw->request_attr_memory.Size);
 		iounmap(ipw->attr_memory);
-		pcmcia_release_window(link, ipw->handle_attr_memory);
+
 	}
 	if (ipw->common_memory) {
 		release_mem_region(ipw->request_common_memory.Base,
 				ipw->request_common_memory.Size);
 		iounmap(ipw->common_memory);
-		pcmcia_release_window(link, ipw->handle_common_memory);
 	}
 	pcmcia_disable_device(link);
 	return -1;
@@ -262,11 +258,6 @@ static void release_ipwireless(struct ipw_dev *ipw)
 				ipw->request_attr_memory.Size);
 		iounmap(ipw->attr_memory);
 	}
-	if (ipw->common_memory)
-		pcmcia_release_window(ipw->link, ipw->handle_common_memory);
-	if (ipw->attr_memory)
-		pcmcia_release_window(ipw->link, ipw->handle_attr_memory);
-
 	pcmcia_disable_device(ipw->link);
 }
 
diff --git a/drivers/mtd/maps/pcmciamtd.c b/drivers/mtd/maps/pcmciamtd.c
index f97463ecfc5..e9ca5ba7d9d 100644
--- a/drivers/mtd/maps/pcmciamtd.c
+++ b/drivers/mtd/maps/pcmciamtd.c
@@ -344,7 +344,6 @@ static void pcmciamtd_release(struct pcmcia_device *link)
 			iounmap(dev->win_base);
 			dev->win_base = NULL;
 		}
-		pcmcia_release_window(link, link->win);
 	}
 	pcmcia_disable_device(link);
 }
diff --git a/drivers/net/pcmcia/ibmtr_cs.c b/drivers/net/pcmcia/ibmtr_cs.c
index c0b3cdd49c6..b0d06a3d962 100644
--- a/drivers/net/pcmcia/ibmtr_cs.c
+++ b/drivers/net/pcmcia/ibmtr_cs.c
@@ -319,7 +319,6 @@ static void ibmtr_release(struct pcmcia_device *link)
 	if (link->win) {
 		struct tok_info *ti = netdev_priv(dev);
 		iounmap(ti->mmio);
-		pcmcia_release_window(link, info->sram_win_handle);
 	}
 	pcmcia_disable_device(link);
 }
diff --git a/drivers/net/wireless/b43/pcmcia.c b/drivers/net/wireless/b43/pcmcia.c
index ffe1f89d5f7..dfbc41d431f 100644
--- a/drivers/net/wireless/b43/pcmcia.c
+++ b/drivers/net/wireless/b43/pcmcia.c
@@ -76,8 +76,7 @@ static int __devinit b43_pcmcia_probe(struct pcmcia_device *dev)
 	dev->conf.Attributes = CONF_ENABLE_IRQ;
 	dev->conf.IntType = INT_MEMORY_AND_IO;
 
-	win.Attributes = WIN_ADDR_SPACE_MEM | WIN_MEMORY_TYPE_CM |
-			 WIN_ENABLE | WIN_DATA_WIDTH_16 |
+	win.Attributes =  WIN_ENABLE | WIN_DATA_WIDTH_16 |
 			 WIN_USE_WAIT;
 	win.Base = 0;
 	win.Size = SSB_CORE_SIZE;
diff --git a/drivers/net/wireless/ray_cs.c b/drivers/net/wireless/ray_cs.c
index 7eb339af351..a860bce6849 100644
--- a/drivers/net/wireless/ray_cs.c
+++ b/drivers/net/wireless/ray_cs.c
@@ -785,7 +785,6 @@ static void ray_release(struct pcmcia_device *link)
 {
 	struct net_device *dev = link->priv;
 	ray_dev_t *local = netdev_priv(dev);
-	int i;
 
 	dev_dbg(&link->dev, "ray_release\n");
 
@@ -794,13 +793,6 @@ static void ray_release(struct pcmcia_device *link)
 	iounmap(local->sram);
 	iounmap(local->rmem);
 	iounmap(local->amem);
-	/* Do bother checking to see if these succeed or not */
-	i = pcmcia_release_window(link, local->amem_handle);
-	if (i != 0)
-		dev_dbg(&link->dev, "ReleaseWindow(local->amem) ret = %x\n", i);
-	i = pcmcia_release_window(link, local->rmem_handle);
-	if (i != 0)
-		dev_dbg(&link->dev, "ReleaseWindow(local->rmem) ret = %x\n", i);
 	pcmcia_disable_device(link);
 
 	dev_dbg(&link->dev, "ray_release ending\n");
diff --git a/drivers/pcmcia/cs_internal.h b/drivers/pcmcia/cs_internal.h
index 511ac753b9d..37d38b5a197 100644
--- a/drivers/pcmcia/cs_internal.h
+++ b/drivers/pcmcia/cs_internal.h
@@ -40,6 +40,7 @@ typedef struct config_t {
 	unsigned int	CardValues;
 
 	struct resource io[MAX_IO_WIN]; /* io ports */
+	struct resource mem[MAX_WIN];   /* mem areas */
 
 	struct {
 		u_int	Attributes;
diff --git a/drivers/pcmcia/ds.c b/drivers/pcmcia/ds.c
index 7ddd19a4033..0bb780c3f26 100644
--- a/drivers/pcmcia/ds.c
+++ b/drivers/pcmcia/ds.c
@@ -556,9 +556,15 @@ static struct pcmcia_device *pcmcia_device_add(struct pcmcia_socket *s,
 			c->io[i].name = dev_name(&p_dev->dev);
 			c->io[i].flags = IORESOURCE_IO;
 		}
+		for (i = 0; i< MAX_WIN; i++) {
+			c->mem[i].name = dev_name(&p_dev->dev);
+			c->mem[i].flags = IORESOURCE_MEM;
+		}
 	}
 	for (i = 0; i < MAX_IO_WIN; i++)
 		p_dev->resource[i] = &p_dev->function_config->io[i];
+	for (; i < (MAX_IO_WIN + MAX_WIN); i++)
+		p_dev->resource[i] = &p_dev->function_config->mem[i-MAX_IO_WIN];
 
 	mutex_unlock(&s->ops_mutex);
 
diff --git a/drivers/pcmcia/pcmcia_resource.c b/drivers/pcmcia/pcmcia_resource.c
index 975baaa8168..01f8e56c8d2 100644
--- a/drivers/pcmcia/pcmcia_resource.c
+++ b/drivers/pcmcia/pcmcia_resource.c
@@ -196,15 +196,17 @@ int pcmcia_map_mem_page(struct pcmcia_device *p_dev, window_handle_t wh,
 			unsigned int offset)
 {
 	struct pcmcia_socket *s = p_dev->socket;
+	struct resource *res = wh;
+	unsigned int w;
 	int ret;
 
-	wh--;
-	if (wh >= MAX_WIN)
+	w = ((res->flags & IORESOURCE_BITS & WIN_FLAGS_REQ) >> 2) - 1;
+	if (w >= MAX_WIN)
 		return -EINVAL;
 
 	mutex_lock(&s->ops_mutex);
-	s->win[wh].card_start = offset;
-	ret = s->ops->set_mem_map(s, &s->win[wh]);
+	s->win[w].card_start = offset;
+	ret = s->ops->set_mem_map(s, &s->win[w]);
 	if (ret)
 		dev_warn(&s->dev, "failed to set_mem_map\n");
 	mutex_unlock(&s->ops_mutex);
@@ -371,19 +373,22 @@ out:
 } /* pcmcia_release_io */
 
 
-int pcmcia_release_window(struct pcmcia_device *p_dev, window_handle_t wh)
+int pcmcia_release_window(struct pcmcia_device *p_dev, struct resource *res)
 {
 	struct pcmcia_socket *s = p_dev->socket;
 	pccard_mem_map *win;
+	unsigned int w;
 
-	wh--;
-	if (wh >= MAX_WIN)
+	dev_dbg(&p_dev->dev, "releasing window %pR\n", res);
+
+	w = ((res->flags & IORESOURCE_BITS & WIN_FLAGS_REQ) >> 2) - 1;
+	if (w >= MAX_WIN)
 		return -EINVAL;
 
 	mutex_lock(&s->ops_mutex);
-	win = &s->win[wh];
+	win = &s->win[w];
 
-	if (!(p_dev->_win & CLIENT_WIN_REQ(wh))) {
+	if (!(p_dev->_win & CLIENT_WIN_REQ(w))) {
 		dev_dbg(&s->dev, "not releasing unknown window\n");
 		mutex_unlock(&s->ops_mutex);
 		return -EINVAL;
@@ -392,7 +397,7 @@ int pcmcia_release_window(struct pcmcia_device *p_dev, window_handle_t wh)
 	/* Shut down memory window */
 	win->flags &= ~MAP_ACTIVE;
 	s->ops->set_mem_map(s, win);
-	s->state &= ~SOCKET_WIN_REQ(wh);
+	s->state &= ~SOCKET_WIN_REQ(w);
 
 	/* Release system memory */
 	if (win->res) {
@@ -400,7 +405,7 @@ int pcmcia_release_window(struct pcmcia_device *p_dev, window_handle_t wh)
 		kfree(win->res);
 		win->res = NULL;
 	}
-	p_dev->_win &= ~CLIENT_WIN_REQ(wh);
+	p_dev->_win &= ~CLIENT_WIN_REQ(w);
 	mutex_unlock(&s->ops_mutex);
 
 	return 0;
@@ -775,23 +780,18 @@ int pcmcia_request_window(struct pcmcia_device *p_dev, win_req_t *req, window_ha
 	struct pcmcia_socket *s = p_dev->socket;
 	pccard_mem_map *win;
 	u_long align;
+	struct resource *res;
 	int w;
 
 	if (!(s->state & SOCKET_PRESENT)) {
 		dev_dbg(&s->dev, "No card present\n");
 		return -ENODEV;
 	}
-	if (req->Attributes & (WIN_PAGED | WIN_SHARED)) {
-		dev_dbg(&s->dev, "bad attribute setting for iomem region\n");
-		return -EINVAL;
-	}
 
 	/* Window size defaults to smallest available */
 	if (req->Size == 0)
 		req->Size = s->map_size;
-	align = (((s->features & SS_CAP_MEM_ALIGN) ||
-		  (req->Attributes & WIN_STRICT_ALIGN)) ?
-		 req->Size : s->map_size);
+	align = (s->features & SS_CAP_MEM_ALIGN) ? req->Size : s->map_size;
 	if (req->Size & (s->map_size-1)) {
 		dev_dbg(&s->dev, "invalid map size\n");
 		return -EINVAL;
@@ -805,20 +805,21 @@ int pcmcia_request_window(struct pcmcia_device *p_dev, win_req_t *req, window_ha
 		align = 0;
 
 	/* Allocate system memory window */
+	mutex_lock(&s->ops_mutex);
 	for (w = 0; w < MAX_WIN; w++)
 		if (!(s->state & SOCKET_WIN_REQ(w)))
 			break;
 	if (w == MAX_WIN) {
 		dev_dbg(&s->dev, "all windows are used already\n");
+		mutex_unlock(&s->ops_mutex);
 		return -EINVAL;
 	}
 
-	mutex_lock(&s->ops_mutex);
 	win = &s->win[w];
 
 	if (!(s->features & SS_CAP_STATIC_MAP)) {
 		win->res = pcmcia_find_mem_region(req->Base, req->Size, align,
-						      (req->Attributes & WIN_MAP_BELOW_1MB), s);
+						0, s);
 		if (!win->res) {
 			dev_dbg(&s->dev, "allocating mem region failed\n");
 			mutex_unlock(&s->ops_mutex);
@@ -829,16 +830,8 @@ int pcmcia_request_window(struct pcmcia_device *p_dev, win_req_t *req, window_ha
 
 	/* Configure the socket controller */
 	win->map = w+1;
-	win->flags = 0;
+	win->flags = req->Attributes;
 	win->speed = req->AccessSpeed;
-	if (req->Attributes & WIN_MEMORY_TYPE)
-		win->flags |= MAP_ATTRIB;
-	if (req->Attributes & WIN_ENABLE)
-		win->flags |= MAP_ACTIVE;
-	if (req->Attributes & WIN_DATA_WIDTH_16)
-		win->flags |= MAP_16BIT;
-	if (req->Attributes & WIN_USE_WAIT)
-		win->flags |= MAP_USE_WAIT;
 	win->card_start = 0;
 
 	if (s->ops->set_mem_map(s, win) != 0) {
@@ -854,8 +847,16 @@ int pcmcia_request_window(struct pcmcia_device *p_dev, win_req_t *req, window_ha
 	else
 		req->Base = win->res->start;
 
+	/* convert to new-style resources */
+	res = p_dev->resource[w + MAX_IO_WIN];
+	res->start = req->Base;
+	res->end = req->Base + req->Size - 1;
+	res->flags &= ~IORESOURCE_BITS;
+	res->flags |= (req->Attributes & WIN_FLAGS_MAP) | (win->map << 2);
+	dev_dbg(&s->dev, "request_window results in %pR\n", res);
+
 	mutex_unlock(&s->ops_mutex);
-	*wh = w + 1;
+	*wh = res;
 
 	return 0;
 } /* pcmcia_request_window */
@@ -863,13 +864,18 @@ EXPORT_SYMBOL(pcmcia_request_window);
 
 void pcmcia_disable_device(struct pcmcia_device *p_dev)
 {
+	int i;
+	for (i = 0; i < MAX_WIN; i++) {
+		struct resource *res = p_dev->resource[MAX_IO_WIN + i];
+		if (res->flags & WIN_FLAGS_REQ)
+			pcmcia_release_window(p_dev, res);
+	}
+
 	pcmcia_release_configuration(p_dev);
 	pcmcia_release_io(p_dev);
 	if (p_dev->_irq) {
 		free_irq(p_dev->irq, p_dev->priv);
 		p_dev->_irq = 0;
 	}
-	if (p_dev->win)
-		pcmcia_release_window(p_dev, p_dev->win);
 }
 EXPORT_SYMBOL(pcmcia_disable_device);
diff --git a/include/pcmcia/cs.h b/include/pcmcia/cs.h
index e4faf4420f2..68d8bde7e8d 100644
--- a/include/pcmcia/cs.h
+++ b/include/pcmcia/cs.h
@@ -77,26 +77,19 @@ typedef struct win_req_t {
 } win_req_t;
 
 /* Attributes for RequestWindow */
-#define WIN_ADDR_SPACE		0x0001
-#define WIN_ADDR_SPACE_MEM	0x0000
-#define WIN_ADDR_SPACE_IO	0x0001
-#define WIN_MEMORY_TYPE		0x0002
-#define WIN_MEMORY_TYPE_CM	0x0000
-#define WIN_MEMORY_TYPE_AM	0x0002
-#define WIN_ENABLE		0x0004
-#define WIN_DATA_WIDTH		0x0018
-#define WIN_DATA_WIDTH_8	0x0000
-#define WIN_DATA_WIDTH_16	0x0008
-#define WIN_DATA_WIDTH_32	0x0010
-#define WIN_PAGED		0x0020
-#define WIN_SHARED		0x0040
-#define WIN_FIRST_SHARED	0x0080
-#define WIN_USE_WAIT		0x0100
-#define WIN_STRICT_ALIGN	0x0200
-#define WIN_MAP_BELOW_1MB	0x0400
-#define WIN_PREFETCH		0x0800
-#define WIN_CACHEABLE		0x1000
-#define WIN_BAR_MASK		0xe000
-#define WIN_BAR_SHIFT		13
+#define WIN_MEMORY_TYPE_CM	0x00 /* default */
+#define WIN_MEMORY_TYPE_AM	0x20 /* MAP_ATTRIB */
+#define WIN_DATA_WIDTH_8	0x00 /* default */
+#define WIN_DATA_WIDTH_16	0x02 /* MAP_16BIT */
+#define WIN_ENABLE		0x01 /* MAP_ACTIVE */
+#define WIN_USE_WAIT		0x40 /* MAP_USE_WAIT */
+
+#define WIN_FLAGS_MAP		0x63 /* MAP_ATTRIB | MAP_16BIT | MAP_ACTIVE |
+					MAP_USE_WAIT */
+#define WIN_FLAGS_REQ		0x1c /* mapping to socket->win[i]:
+					0x04 -> 0
+					0x08 -> 1
+					0x0c -> 2
+					0x10 -> 3 */
 
 #endif /* _LINUX_CS_H */
diff --git a/include/pcmcia/ds.h b/include/pcmcia/ds.h
index a2bf3a702c0..70c58ed2278 100644
--- a/include/pcmcia/ds.h
+++ b/include/pcmcia/ds.h
@@ -36,7 +36,7 @@ struct pcmcia_device;
 struct config_t;
 struct net_device;
 
-typedef unsigned long window_handle_t;
+typedef struct resource *window_handle_t;
 
 /* dynamic device IDs for PCMCIA device drivers. See
  * Documentation/pcmcia/driver.txt for details.
@@ -63,6 +63,17 @@ struct pcmcia_driver {
 int pcmcia_register_driver(struct pcmcia_driver *driver);
 void pcmcia_unregister_driver(struct pcmcia_driver *driver);
 
+/* for struct resource * array embedded in struct pcmcia_device */
+enum {
+	PCMCIA_IOPORT_0,
+	PCMCIA_IOPORT_1,
+	PCMCIA_IOMEM_0,
+	PCMCIA_IOMEM_1,
+	PCMCIA_IOMEM_2,
+	PCMCIA_IOMEM_3,
+	PCMCIA_NUM_RESOURCES,
+};
+
 struct pcmcia_device {
 	/* the socket and the device_no [for multifunction devices]
 	   uniquely define a pcmcia_device */
@@ -85,7 +96,7 @@ struct pcmcia_device {
 
 	/* device setup */
 	unsigned int		irq;
-	struct resource		*resource[MAX_IO_WIN];
+	struct resource		*resource[PCMCIA_NUM_RESOURCES];
 
 	unsigned int		io_lines; /* number of I/O lines */
 
-- 
cgit v1.2.3-70-g09d2


From cff0d6e6edac7672b3f915bb4fb59f279243b7f9 Mon Sep 17 00:00:00 2001
From: Oliver Hartkopp <socketcan@hartkopp.net>
Date: Tue, 3 Aug 2010 00:31:48 -0700
Subject: can-raw: Fix skb_orphan_try handling

Commit fc6055a5ba31e2c14e36e8939f9bf2b6d586a7f5 (net: Introduce
skb_orphan_try()) allows an early orphan of the skb and takes care on
tx timestamping, which needs the sk-reference in the skb on driver level.
So does the can-raw socket, which has not been taken into account here.

The patch below adds a 'prevent_sk_orphan' bit in the skb tx shared info,
which fixes the problem discovered by Matthias Fuchs here:

      http://marc.info/?t=128030411900003&r=1&w=2

Even if it's not a primary tx timestamp topic it fits well into some skb
shared tx context. Or should be find a different place for the information to
protect the sk reference until it reaches the driver level?

Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 4 +++-
 net/can/raw.c          | 4 ++++
 2 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index d89876b806a..d20d9e7a9bb 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -169,6 +169,7 @@ struct skb_shared_hwtstamps {
  * @software:		generate software time stamp
  * @in_progress:	device driver is going to provide
  *			hardware time stamp
+ * @prevent_sk_orphan:	make sk reference available on driver level
  * @flags:		all shared_tx flags
  *
  * These flags are attached to packets as part of the
@@ -178,7 +179,8 @@ union skb_shared_tx {
 	struct {
 		__u8	hardware:1,
 			software:1,
-			in_progress:1;
+			in_progress:1,
+			prevent_sk_orphan:1;
 	};
 	__u8 flags;
 };
diff --git a/net/can/raw.c b/net/can/raw.c
index ccfe633eec8..a10e3338f08 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -650,6 +650,10 @@ static int raw_sendmsg(struct kiocb *iocb, struct socket *sock,
 	err = sock_tx_timestamp(msg, sk, skb_tx(skb));
 	if (err < 0)
 		goto free_skb;
+
+	/* to be able to check the received tx sock reference in raw_rcv() */
+	skb_tx(skb)->prevent_sk_orphan = 1;
+
 	skb->dev = dev;
 	skb->sk  = sk;
 
-- 
cgit v1.2.3-70-g09d2


From 4565956dc0847985c0403c9ebbf274b6a122e1e2 Mon Sep 17 00:00:00 2001
From: James Chapman <jchapman@katalix.com>
Date: Tue, 3 Aug 2010 00:42:17 -0700
Subject: l2tp: fix export of header file for userspace

The header file l2tp.h should be exported to the installed include/linux/
tree for userspace programs.

This patch fixes compilation errors in L2TP userspace apps which want to
use the new L2TP support introduced in 2.6.35.

Signed-off-by: James Chapman <jchapman@katalix.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/Kbuild | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index 2fc8e14cc24..9aa9bcadf86 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -276,6 +276,7 @@ ifneq ($(wildcard $(srctree)/arch/$(SRCARCH)/include/asm/kvm_para.h \
 		  $(srctree)/include/asm-$(SRCARCH)/kvm_para.h),)
 unifdef-y += kvm_para.h
 endif
+unifdef-y += l2tp.h
 unifdef-y += llc.h
 unifdef-y += loop.h
 unifdef-y += lp.h
-- 
cgit v1.2.3-70-g09d2


From 078ff546a806b2c2ab74c25c8edd4c6d4680656a Mon Sep 17 00:00:00 2001
From: Ville Syrjälä <ville.syrjala@nokia.com>
Date: Wed, 17 Mar 2010 20:36:51 +0200
Subject: OMAP: DSS2: OMAPFB: Add support for switching memory regions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Separate the memory region from the framebuffer device a little bit.
It's now possible to select the memory region used by the framebuffer
device using the new mem_idx parameter of omapfb_plane_info. If the
mem_idx is specified it will be interpreted as an index into the
memory regions array, if it's not specified the framebuffer's index is
used instead. So by default each framebuffer keeps using it's own
memory region which preserves backwards compatibility.

This allows cloning the same memory region to several overlays and yet
each overlay can be controlled independently since they can be
associated with separate framebuffer devices.

Signed-off-by: Ville Syrjälä <ville.syrjala@nokia.com>
Signed-off-by: Tomi Valkeinen <tomi.valkeinen@nokia.com>
---
 drivers/video/omap2/omapfb/omapfb-ioctl.c | 125 ++++++++++++++++++++++++------
 drivers/video/omap2/omapfb/omapfb-main.c  |  71 ++++++++++-------
 drivers/video/omap2/omapfb/omapfb-sysfs.c |  37 ++++++---
 drivers/video/omap2/omapfb/omapfb.h       |   9 ++-
 include/linux/omapfb.h                    |   5 +-
 5 files changed, 183 insertions(+), 64 deletions(-)

(limited to 'include')

diff --git a/drivers/video/omap2/omapfb/omapfb-ioctl.c b/drivers/video/omap2/omapfb/omapfb-ioctl.c
index 9c7361871d7..6635bd75aff 100644
--- a/drivers/video/omap2/omapfb/omapfb-ioctl.c
+++ b/drivers/video/omap2/omapfb/omapfb-ioctl.c
@@ -34,12 +34,37 @@
 
 #include "omapfb.h"
 
+static u8 get_mem_idx(struct omapfb_info *ofbi)
+{
+	if (ofbi->id == ofbi->region->id)
+		return 0;
+
+	return OMAPFB_MEM_IDX_ENABLED | ofbi->region->id;
+}
+
+static struct omapfb2_mem_region *get_mem_region(struct omapfb_info *ofbi,
+						 u8 mem_idx)
+{
+	struct omapfb2_device *fbdev = ofbi->fbdev;
+
+	if (mem_idx & OMAPFB_MEM_IDX_ENABLED)
+		mem_idx &= OMAPFB_MEM_IDX_MASK;
+	else
+		mem_idx = ofbi->id;
+
+	if (mem_idx >= fbdev->num_fbs)
+		return NULL;
+
+	return &fbdev->regions[mem_idx];
+}
+
 static int omapfb_setup_plane(struct fb_info *fbi, struct omapfb_plane_info *pi)
 {
 	struct omapfb_info *ofbi = FB2OFB(fbi);
 	struct omapfb2_device *fbdev = ofbi->fbdev;
 	struct omap_overlay *ovl;
-	struct omap_overlay_info info;
+	struct omap_overlay_info old_info;
+	struct omapfb2_mem_region *old_rg, *new_rg;
 	int r = 0;
 
 	DBG("omapfb_setup_plane\n");
@@ -52,7 +77,14 @@ static int omapfb_setup_plane(struct fb_info *fbi, struct omapfb_plane_info *pi)
 	/* XXX uses only the first overlay */
 	ovl = ofbi->overlays[0];
 
-	if (pi->enabled && !ofbi->region.size) {
+	old_rg = ofbi->region;
+	new_rg = get_mem_region(ofbi, pi->mem_idx);
+	if (!new_rg) {
+		r = -EINVAL;
+		goto out;
+	}
+
+	if (pi->enabled && !new_rg->size) {
 		/*
 		 * This plane's memory was freed, can't enable it
 		 * until it's reallocated.
@@ -61,27 +93,60 @@ static int omapfb_setup_plane(struct fb_info *fbi, struct omapfb_plane_info *pi)
 		goto out;
 	}
 
-	ovl->get_overlay_info(ovl, &info);
+	ovl->get_overlay_info(ovl, &old_info);
 
-	info.pos_x = pi->pos_x;
-	info.pos_y = pi->pos_y;
-	info.out_width = pi->out_width;
-	info.out_height = pi->out_height;
-	info.enabled = pi->enabled;
+	if (old_rg != new_rg) {
+		ofbi->region = new_rg;
+		set_fb_fix(fbi);
+	}
 
-	r = ovl->set_overlay_info(ovl, &info);
-	if (r)
-		goto out;
+	if (pi->enabled) {
+		struct omap_overlay_info info;
+
+		r = omapfb_setup_overlay(fbi, ovl, pi->pos_x, pi->pos_y,
+			pi->out_width, pi->out_height);
+		if (r)
+			goto undo;
 
-	if (ovl->manager) {
-		r = ovl->manager->apply(ovl->manager);
+		ovl->get_overlay_info(ovl, &info);
+
+		if (!info.enabled) {
+			info.enabled = pi->enabled;
+			r = ovl->set_overlay_info(ovl, &info);
+			if (r)
+				goto undo;
+		}
+	} else {
+		struct omap_overlay_info info;
+
+		ovl->get_overlay_info(ovl, &info);
+
+		info.enabled = pi->enabled;
+		info.pos_x = pi->pos_x;
+		info.pos_y = pi->pos_y;
+		info.out_width = pi->out_width;
+		info.out_height = pi->out_height;
+
+		r = ovl->set_overlay_info(ovl, &info);
 		if (r)
-			goto out;
+			goto undo;
 	}
 
-out:
-	if (r)
-		dev_err(fbdev->dev, "setup_plane failed\n");
+	if (ovl->manager)
+		ovl->manager->apply(ovl->manager);
+
+	return 0;
+
+ undo:
+	if (old_rg != new_rg) {
+		ofbi->region = old_rg;
+		set_fb_fix(fbi);
+	}
+
+	ovl->set_overlay_info(ovl, &old_info);
+ out:
+	dev_err(fbdev->dev, "setup_plane failed\n");
+
 	return r;
 }
 
@@ -92,8 +157,8 @@ static int omapfb_query_plane(struct fb_info *fbi, struct omapfb_plane_info *pi)
 	if (ofbi->num_overlays != 1) {
 		memset(pi, 0, sizeof(*pi));
 	} else {
-		struct omap_overlay_info *ovli;
 		struct omap_overlay *ovl;
+		struct omap_overlay_info *ovli;
 
 		ovl = ofbi->overlays[0];
 		ovli = &ovl->info;
@@ -103,6 +168,7 @@ static int omapfb_query_plane(struct fb_info *fbi, struct omapfb_plane_info *pi)
 		pi->enabled = ovli->enabled;
 		pi->channel_out = 0; /* xxx */
 		pi->mirror = 0;
+		pi->mem_idx = get_mem_idx(ofbi);
 		pi->out_width = ovli->out_width;
 		pi->out_height = ovli->out_height;
 	}
@@ -123,11 +189,24 @@ static int omapfb_setup_mem(struct fb_info *fbi, struct omapfb_mem_info *mi)
 
 	size = PAGE_ALIGN(mi->size);
 
-	rg = &ofbi->region;
+	rg = ofbi->region;
 
-	for (i = 0; i < ofbi->num_overlays; i++) {
-		if (ofbi->overlays[i]->info.enabled)
-			return -EBUSY;
+	if (atomic_read(&rg->map_count))
+		return -EBUSY;
+
+	for (i = 0; i < fbdev->num_fbs; i++) {
+		struct omapfb_info *ofbi2 = FB2OFB(fbdev->fbs[i]);
+		int j;
+
+		if (ofbi2->region != rg)
+			continue;
+
+		for (j = 0; j < ofbi2->num_overlays; j++) {
+			if (ofbi2->overlays[j]->info.enabled) {
+				r = -EBUSY;
+				return r;
+			}
+		}
 	}
 
 	if (rg->size != size || rg->type != mi->type) {
@@ -146,7 +225,7 @@ static int omapfb_query_mem(struct fb_info *fbi, struct omapfb_mem_info *mi)
 	struct omapfb_info *ofbi = FB2OFB(fbi);
 	struct omapfb2_mem_region *rg;
 
-	rg = &ofbi->region;
+	rg = ofbi->region;
 	memset(mi, 0, sizeof(*mi));
 
 	mi->size = rg->size;
diff --git a/drivers/video/omap2/omapfb/omapfb-main.c b/drivers/video/omap2/omapfb/omapfb-main.c
index ccccf3d71a3..4a0588022b3 100644
--- a/drivers/video/omap2/omapfb/omapfb-main.c
+++ b/drivers/video/omap2/omapfb/omapfb-main.c
@@ -157,7 +157,7 @@ static void fill_fb(struct fb_info *fbi)
 
 static unsigned omapfb_get_vrfb_offset(const struct omapfb_info *ofbi, int rot)
 {
-	const struct vrfb *vrfb = &ofbi->region.vrfb;
+	const struct vrfb *vrfb = &ofbi->region->vrfb;
 	unsigned offset;
 
 	switch (rot) {
@@ -185,27 +185,27 @@ static unsigned omapfb_get_vrfb_offset(const struct omapfb_info *ofbi, int rot)
 static u32 omapfb_get_region_rot_paddr(const struct omapfb_info *ofbi, int rot)
 {
 	if (ofbi->rotation_type == OMAP_DSS_ROT_VRFB) {
-		return ofbi->region.vrfb.paddr[rot]
+		return ofbi->region->vrfb.paddr[rot]
 			+ omapfb_get_vrfb_offset(ofbi, rot);
 	} else {
-		return ofbi->region.paddr;
+		return ofbi->region->paddr;
 	}
 }
 
 static u32 omapfb_get_region_paddr(const struct omapfb_info *ofbi)
 {
 	if (ofbi->rotation_type == OMAP_DSS_ROT_VRFB)
-		return ofbi->region.vrfb.paddr[0];
+		return ofbi->region->vrfb.paddr[0];
 	else
-		return ofbi->region.paddr;
+		return ofbi->region->paddr;
 }
 
 static void __iomem *omapfb_get_region_vaddr(const struct omapfb_info *ofbi)
 {
 	if (ofbi->rotation_type == OMAP_DSS_ROT_VRFB)
-		return ofbi->region.vrfb.vaddr[0];
+		return ofbi->region->vrfb.vaddr[0];
 	else
-		return ofbi->region.vaddr;
+		return ofbi->region->vaddr;
 }
 
 static struct omapfb_colormode omapfb_colormodes[] = {
@@ -450,7 +450,7 @@ static int check_vrfb_fb_size(unsigned long region_size,
 static int check_fb_size(const struct omapfb_info *ofbi,
 		struct fb_var_screeninfo *var)
 {
-	unsigned long max_frame_size = ofbi->region.size;
+	unsigned long max_frame_size = ofbi->region->size;
 	int bytespp = var->bits_per_pixel >> 3;
 	unsigned long line_size = var->xres_virtual * bytespp;
 
@@ -497,7 +497,7 @@ static int check_fb_size(const struct omapfb_info *ofbi,
 static int setup_vrfb_rotation(struct fb_info *fbi)
 {
 	struct omapfb_info *ofbi = FB2OFB(fbi);
-	struct omapfb2_mem_region *rg = &ofbi->region;
+	struct omapfb2_mem_region *rg = ofbi->region;
 	struct vrfb *vrfb = &rg->vrfb;
 	struct fb_var_screeninfo *var = &fbi->var;
 	struct fb_fix_screeninfo *fix = &fbi->fix;
@@ -558,9 +558,9 @@ static int setup_vrfb_rotation(struct fb_info *fbi)
 		return r;
 
 	/* used by open/write in fbmem.c */
-	fbi->screen_base = ofbi->region.vrfb.vaddr[0];
+	fbi->screen_base = ofbi->region->vrfb.vaddr[0];
 
-	fix->smem_start = ofbi->region.vrfb.paddr[0];
+	fix->smem_start = ofbi->region->vrfb.paddr[0];
 
 	switch (var->nonstd) {
 	case OMAPFB_COLOR_YUV422:
@@ -599,7 +599,7 @@ void set_fb_fix(struct fb_info *fbi)
 	struct fb_fix_screeninfo *fix = &fbi->fix;
 	struct fb_var_screeninfo *var = &fbi->var;
 	struct omapfb_info *ofbi = FB2OFB(fbi);
-	struct omapfb2_mem_region *rg = &ofbi->region;
+	struct omapfb2_mem_region *rg = ofbi->region;
 
 	DBG("set_fb_fix\n");
 
@@ -688,7 +688,7 @@ int check_fb_var(struct fb_info *fbi, struct fb_var_screeninfo *var)
 		return -EINVAL;
 
 	/* When no memory is allocated ignore the size check */
-	if (ofbi->region.size != 0 && check_fb_size(ofbi, var))
+	if (ofbi->region->size != 0 && check_fb_size(ofbi, var))
 		return -EINVAL;
 
 	if (var->xres + var->xoffset > var->xres_virtual)
@@ -856,7 +856,7 @@ static void omapfb_calc_addr(const struct omapfb_info *ofbi,
 }
 
 /* setup overlay according to the fb */
-static int omapfb_setup_overlay(struct fb_info *fbi, struct omap_overlay *ovl,
+int omapfb_setup_overlay(struct fb_info *fbi, struct omap_overlay *ovl,
 		u16 posx, u16 posy, u16 outw, u16 outh)
 {
 	int r = 0;
@@ -892,7 +892,7 @@ static int omapfb_setup_overlay(struct fb_info *fbi, struct omap_overlay *ovl,
 		yres = var->yres;
 	}
 
-	if (ofbi->region.size)
+	if (ofbi->region->size)
 		omapfb_calc_addr(ofbi, var, fix, rotation,
 				 &data_start_p, &data_start_v);
 
@@ -971,7 +971,7 @@ int omapfb_apply_changes(struct fb_info *fbi, int init)
 
 		DBG("apply_changes, fb %d, ovl %d\n", ofbi->id, ovl->id);
 
-		if (ofbi->region.size == 0) {
+		if (ofbi->region->size == 0) {
 			/* the fb is not available. disable the overlay */
 			omapfb_overlay_enable(ovl, 0);
 			if (!init && ovl->manager)
@@ -1071,16 +1071,16 @@ static int omapfb_pan_display(struct fb_var_screeninfo *var,
 
 static void mmap_user_open(struct vm_area_struct *vma)
 {
-	struct omapfb_info *ofbi = (struct omapfb_info *)vma->vm_private_data;
+	struct omapfb2_mem_region *rg = vma->vm_private_data;
 
-	atomic_inc(&ofbi->map_count);
+	atomic_inc(&rg->map_count);
 }
 
 static void mmap_user_close(struct vm_area_struct *vma)
 {
-	struct omapfb_info *ofbi = (struct omapfb_info *)vma->vm_private_data;
+	struct omapfb2_mem_region *rg = vma->vm_private_data;
 
-	atomic_dec(&ofbi->map_count);
+	atomic_dec(&rg->map_count);
 }
 
 static struct vm_operations_struct mmap_user_ops = {
@@ -1092,6 +1092,7 @@ static int omapfb_mmap(struct fb_info *fbi, struct vm_area_struct *vma)
 {
 	struct omapfb_info *ofbi = FB2OFB(fbi);
 	struct fb_fix_screeninfo *fix = &fbi->fix;
+	struct omapfb2_mem_region *rg;
 	unsigned long off;
 	unsigned long start;
 	u32 len;
@@ -1102,6 +1103,8 @@ static int omapfb_mmap(struct fb_info *fbi, struct vm_area_struct *vma)
 		return -EINVAL;
 	off = vma->vm_pgoff << PAGE_SHIFT;
 
+	rg = ofbi->region;
+
 	start = omapfb_get_region_paddr(ofbi);
 	len = fix->smem_len;
 	if (off >= len)
@@ -1117,12 +1120,12 @@ static int omapfb_mmap(struct fb_info *fbi, struct vm_area_struct *vma)
 	vma->vm_flags |= VM_IO | VM_RESERVED;
 	vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
 	vma->vm_ops = &mmap_user_ops;
-	vma->vm_private_data = ofbi;
+	vma->vm_private_data = rg;
 	if (io_remap_pfn_range(vma, vma->vm_start, off >> PAGE_SHIFT,
 			     vma->vm_end - vma->vm_start, vma->vm_page_prot))
 		return -EAGAIN;
 	/* vm_ops.open won't be called for mmap itself. */
-	atomic_inc(&ofbi->map_count);
+	atomic_inc(&rg->map_count);
 	return 0;
 }
 
@@ -1312,7 +1315,9 @@ static void omapfb_free_fbmem(struct fb_info *fbi)
 	struct omapfb2_device *fbdev = ofbi->fbdev;
 	struct omapfb2_mem_region *rg;
 
-	rg = &ofbi->region;
+	rg = ofbi->region;
+
+	WARN_ON(atomic_read(&rg->map_count));
 
 	if (rg->paddr)
 		if (omap_vram_free(rg->paddr, rg->size))
@@ -1367,8 +1372,15 @@ static int omapfb_alloc_fbmem(struct fb_info *fbi, unsigned long size,
 	void __iomem *vaddr;
 	int r;
 
-	rg = &ofbi->region;
-	memset(rg, 0, sizeof(*rg));
+	rg = ofbi->region;
+
+	rg->paddr = 0;
+	rg->vaddr = NULL;
+	memset(&rg->vrfb, 0, sizeof rg->vrfb);
+	rg->size = 0;
+	rg->type = 0;
+	rg->alloc = false;
+	rg->map = false;
 
 	size = PAGE_ALIGN(size);
 
@@ -1621,7 +1633,7 @@ static int omapfb_allocate_all_fbs(struct omapfb2_device *fbdev)
 	for (i = 0; i < fbdev->num_fbs; i++) {
 		struct omapfb_info *ofbi = FB2OFB(fbdev->fbs[i]);
 		struct omapfb2_mem_region *rg;
-		rg = &ofbi->region;
+		rg = ofbi->region;
 
 		DBG("region%d phys %08x virt %p size=%lu\n",
 				i,
@@ -1638,7 +1650,7 @@ int omapfb_realloc_fbmem(struct fb_info *fbi, unsigned long size, int type)
 	struct omapfb_info *ofbi = FB2OFB(fbi);
 	struct omapfb2_device *fbdev = ofbi->fbdev;
 	struct omap_dss_device *display = fb2display(fbi);
-	struct omapfb2_mem_region *rg = &ofbi->region;
+	struct omapfb2_mem_region *rg = ofbi->region;
 	unsigned long old_size = rg->size;
 	unsigned long old_paddr = rg->paddr;
 	int old_type = rg->type;
@@ -1721,7 +1733,7 @@ static int omapfb_fb_init(struct omapfb2_device *fbdev, struct fb_info *fbi)
 	fbi->flags = FBINFO_FLAG_DEFAULT;
 	fbi->pseudo_palette = fbdev->pseudo_palette;
 
-	if (ofbi->region.size == 0) {
+	if (ofbi->region->size == 0) {
 		clear_fb_info(fbi);
 		return 0;
 	}
@@ -1883,6 +1895,9 @@ static int omapfb_create_framebuffers(struct omapfb2_device *fbdev)
 		ofbi->fbdev = fbdev;
 		ofbi->id = i;
 
+		ofbi->region = &fbdev->regions[i];
+		ofbi->region->id = i;
+
 		/* assign these early, so that fb alloc can use them */
 		ofbi->rotation_type = def_vrfb ? OMAP_DSS_ROT_VRFB :
 			OMAP_DSS_ROT_DMA;
diff --git a/drivers/video/omap2/omapfb/omapfb-sysfs.c b/drivers/video/omap2/omapfb/omapfb-sysfs.c
index 5179219128b..dea1aa46a7d 100644
--- a/drivers/video/omap2/omapfb/omapfb-sysfs.c
+++ b/drivers/video/omap2/omapfb/omapfb-sysfs.c
@@ -64,7 +64,7 @@ static ssize_t store_rotate_type(struct device *dev,
 	if (rot_type == ofbi->rotation_type)
 		goto out;
 
-	if (ofbi->region.size) {
+	if (ofbi->region->size) {
 		r = -EBUSY;
 		goto out;
 	}
@@ -408,7 +408,7 @@ static ssize_t show_size(struct device *dev,
 	struct fb_info *fbi = dev_get_drvdata(dev);
 	struct omapfb_info *ofbi = FB2OFB(fbi);
 
-	return snprintf(buf, PAGE_SIZE, "%lu\n", ofbi->region.size);
+	return snprintf(buf, PAGE_SIZE, "%lu\n", ofbi->region->size);
 }
 
 static ssize_t store_size(struct device *dev, struct device_attribute *attr,
@@ -416,6 +416,8 @@ static ssize_t store_size(struct device *dev, struct device_attribute *attr,
 {
 	struct fb_info *fbi = dev_get_drvdata(dev);
 	struct omapfb_info *ofbi = FB2OFB(fbi);
+	struct omapfb2_device *fbdev = ofbi->fbdev;
+	struct omapfb2_mem_region *rg;
 	unsigned long size;
 	int r;
 	int i;
@@ -425,15 +427,30 @@ static ssize_t store_size(struct device *dev, struct device_attribute *attr,
 	if (!lock_fb_info(fbi))
 		return -ENODEV;
 
-	for (i = 0; i < ofbi->num_overlays; i++) {
-		if (ofbi->overlays[i]->info.enabled) {
-			r = -EBUSY;
-			goto out;
+	rg = ofbi->region;
+
+	if (atomic_read(&rg->map_count)) {
+		r = -EBUSY;
+		goto out;
+	}
+
+	for (i = 0; i < fbdev->num_fbs; i++) {
+		struct omapfb_info *ofbi2 = FB2OFB(fbdev->fbs[i]);
+		int j;
+
+		if (ofbi2->region != rg)
+			continue;
+
+		for (j = 0; j < ofbi2->num_overlays; j++) {
+			if (ofbi2->overlays[j]->info.enabled) {
+				r = -EBUSY;
+				goto out;
+			}
 		}
 	}
 
-	if (size != ofbi->region.size) {
-		r = omapfb_realloc_fbmem(fbi, size, ofbi->region.type);
+	if (size != ofbi->region->size) {
+		r = omapfb_realloc_fbmem(fbi, size, ofbi->region->type);
 		if (r) {
 			dev_err(dev, "realloc fbmem failed\n");
 			goto out;
@@ -453,7 +470,7 @@ static ssize_t show_phys(struct device *dev,
 	struct fb_info *fbi = dev_get_drvdata(dev);
 	struct omapfb_info *ofbi = FB2OFB(fbi);
 
-	return snprintf(buf, PAGE_SIZE, "%0x\n", ofbi->region.paddr);
+	return snprintf(buf, PAGE_SIZE, "%0x\n", ofbi->region->paddr);
 }
 
 static ssize_t show_virt(struct device *dev,
@@ -462,7 +479,7 @@ static ssize_t show_virt(struct device *dev,
 	struct fb_info *fbi = dev_get_drvdata(dev);
 	struct omapfb_info *ofbi = FB2OFB(fbi);
 
-	return snprintf(buf, PAGE_SIZE, "%p\n", ofbi->region.vaddr);
+	return snprintf(buf, PAGE_SIZE, "%p\n", ofbi->region->vaddr);
 }
 
 static struct device_attribute omapfb_attrs[] = {
diff --git a/drivers/video/omap2/omapfb/omapfb.h b/drivers/video/omap2/omapfb/omapfb.h
index c9866be0460..02f1ba9b228 100644
--- a/drivers/video/omap2/omapfb/omapfb.h
+++ b/drivers/video/omap2/omapfb/omapfb.h
@@ -44,6 +44,7 @@ extern unsigned int omapfb_debug;
 #define OMAPFB_MAX_OVL_PER_FB 3
 
 struct omapfb2_mem_region {
+	int             id;
 	u32		paddr;
 	void __iomem	*vaddr;
 	struct vrfb	vrfb;
@@ -51,13 +52,13 @@ struct omapfb2_mem_region {
 	u8		type;		/* OMAPFB_PLANE_MEM_* */
 	bool		alloc;		/* allocated by the driver */
 	bool		map;		/* kernel mapped by the driver */
+	atomic_t	map_count;
 };
 
 /* appended to fb_info */
 struct omapfb_info {
 	int id;
-	struct omapfb2_mem_region region;
-	atomic_t map_count;
+	struct omapfb2_mem_region *region;
 	int num_overlays;
 	struct omap_overlay *overlays[OMAPFB_MAX_OVL_PER_FB];
 	struct omapfb2_device *fbdev;
@@ -76,6 +77,7 @@ struct omapfb2_device {
 
 	unsigned num_fbs;
 	struct fb_info *fbs[10];
+	struct omapfb2_mem_region regions[10];
 
 	unsigned num_displays;
 	struct omap_dss_device *displays[10];
@@ -117,6 +119,9 @@ int omapfb_update_window(struct fb_info *fbi,
 int dss_mode_to_fb_mode(enum omap_color_mode dssmode,
 			struct fb_var_screeninfo *var);
 
+int omapfb_setup_overlay(struct fb_info *fbi, struct omap_overlay *ovl,
+		u16 posx, u16 posy, u16 outw, u16 outh);
+
 /* find the display connected to this fb, if any */
 static inline struct omap_dss_device *fb2display(struct fb_info *fbi)
 {
diff --git a/include/linux/omapfb.h b/include/linux/omapfb.h
index 9bdd91486b4..0ecf7311c1a 100644
--- a/include/linux/omapfb.h
+++ b/include/linux/omapfb.h
@@ -85,6 +85,9 @@
 #define OMAPFB_MEMTYPE_SRAM		1
 #define OMAPFB_MEMTYPE_MAX		1
 
+#define OMAPFB_MEM_IDX_ENABLED	0x80
+#define OMAPFB_MEM_IDX_MASK	0x7f
+
 enum omapfb_color_format {
 	OMAPFB_COLOR_RGB565 = 0,
 	OMAPFB_COLOR_YUV422,
@@ -136,7 +139,7 @@ struct omapfb_plane_info {
 	__u8  enabled;
 	__u8  channel_out;
 	__u8  mirror;
-	__u8  reserved1;
+	__u8  mem_idx;
 	__u32 out_width;
 	__u32 out_height;
 	__u32 reserved2[12];
-- 
cgit v1.2.3-70-g09d2


From 8cadd2831bf3abc94f4530e7fdbab7bb39b6b27d Mon Sep 17 00:00:00 2001
From: Jesse Barnes <jbarnes@virtuousgeek.org>
Date: Mon, 10 May 2010 14:26:20 -0700
Subject: timer: add on-stack deferrable timer interfaces

In some cases (for instance with kernel threads) it may be desireable to
use on-stack deferrable timers to get their power saving benefits.  Add
interfaces to support this for the IPS driver.

Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Matthew Garrett <mjg@redhat.com>
---
 include/linux/timer.h | 15 +++++++++++++++
 kernel/timer.c        | 13 +++++++++++++
 2 files changed, 28 insertions(+)

(limited to 'include')

diff --git a/include/linux/timer.h b/include/linux/timer.h
index ea965b857a5..38cf093ef62 100644
--- a/include/linux/timer.h
+++ b/include/linux/timer.h
@@ -100,6 +100,13 @@ void init_timer_deferrable_key(struct timer_list *timer,
 		setup_timer_on_stack_key((timer), #timer, &__key,	\
 					 (fn), (data));			\
 	} while (0)
+#define setup_deferrable_timer_on_stack(timer, fn, data)		\
+	do {								\
+		static struct lock_class_key __key;			\
+		setup_deferrable_timer_on_stack_key((timer), #timer,	\
+						    &__key, (fn),	\
+						    (data));		\
+	} while (0)
 #else
 #define init_timer(timer)\
 	init_timer_key((timer), NULL, NULL)
@@ -111,6 +118,8 @@ void init_timer_deferrable_key(struct timer_list *timer,
 	setup_timer_key((timer), NULL, NULL, (fn), (data))
 #define setup_timer_on_stack(timer, fn, data)\
 	setup_timer_on_stack_key((timer), NULL, NULL, (fn), (data))
+#define setup_deferrable_timer_on_stack(timer, fn, data)\
+	setup_deferrable_timer_on_stack_key((timer), NULL, NULL, (fn), (data))
 #endif
 
 #ifdef CONFIG_DEBUG_OBJECTS_TIMERS
@@ -150,6 +159,12 @@ static inline void setup_timer_on_stack_key(struct timer_list *timer,
 	init_timer_on_stack_key(timer, name, key);
 }
 
+extern void setup_deferrable_timer_on_stack_key(struct timer_list *timer,
+						const char *name,
+						struct lock_class_key *key,
+						void (*function)(unsigned long),
+						unsigned long data);
+
 /**
  * timer_pending - is a timer pending?
  * @timer: the timer in question
diff --git a/kernel/timer.c b/kernel/timer.c
index ee305c8d4e1..efde11e197c 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -577,6 +577,19 @@ static void __init_timer(struct timer_list *timer,
 	lockdep_init_map(&timer->lockdep_map, name, key, 0);
 }
 
+void setup_deferrable_timer_on_stack_key(struct timer_list *timer,
+					 const char *name,
+					 struct lock_class_key *key,
+					 void (*function)(unsigned long),
+					 unsigned long data)
+{
+	timer->function = function;
+	timer->data = data;
+	init_timer_on_stack_key(timer, name, key);
+	timer_set_deferrable(timer);
+}
+EXPORT_SYMBOL_GPL(setup_deferrable_timer_on_stack_key);
+
 /**
  * init_timer_key - initialize a timer
  * @timer: the timer to be initialized
-- 
cgit v1.2.3-70-g09d2


From aa7ffc01d254c91a36bf854d57a14049c6134c72 Mon Sep 17 00:00:00 2001
From: Jesse Barnes <jbarnes@virtuousgeek.org>
Date: Fri, 14 May 2010 15:41:14 -0700
Subject: x86 platform driver: intelligent power sharing driver

Intel Core i3/5 platforms with integrated graphics support both CPU and
GPU turbo mode.  CPU turbo mode is opportunistic: the CPU will use any
available power to increase core frequencies if thermal headroom is
available.  The GPU side is more manual however; the graphics driver
must monitor GPU power and temperature and coordinate with a core
thermal driver to take advantage of available thermal and power headroom
in the package.

The intelligent power sharing (IPS) driver is intended to coordinate
this activity by monitoring MCP (multi-chip package) temperature and
power, allowing the CPU and/or GPU to increase their power consumption,
and thus performance, when possible.  The goal is to maximize
performance within a given platform's TDP (thermal design point).

Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Matthew Garrett <mjg@redhat.com>
---
 drivers/platform/x86/Kconfig     |   10 +
 drivers/platform/x86/Makefile    |    1 +
 drivers/platform/x86/intel_ips.c | 1655 ++++++++++++++++++++++++++++++++++++++
 include/drm/i915_drm.h           |    9 +
 4 files changed, 1675 insertions(+)
 create mode 100644 drivers/platform/x86/intel_ips.c

(limited to 'include')

diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig
index fd060016b7e..724b2ed1a3c 100644
--- a/drivers/platform/x86/Kconfig
+++ b/drivers/platform/x86/Kconfig
@@ -539,4 +539,14 @@ config INTEL_SCU_IPC
 	  some embedded Intel x86 platforms. This is not needed for PC-type
 	  machines.
 
+config INTEL_IPS
+	tristate "Intel Intelligent Power Sharing"
+	depends on ACPI
+	---help---
+	  Intel Calpella platforms support dynamic power sharing between the
+	  CPU and GPU, maximizing performance in a given TDP.  This driver,
+	  along with the CPU frequency and i915 drivers, provides that
+	  functionality.  If in doubt, say Y here; it will only load on
+	  supported platforms.
+
 endif # X86_PLATFORM_DEVICES
diff --git a/drivers/platform/x86/Makefile b/drivers/platform/x86/Makefile
index 8770bfe7143..7318fc2c162 100644
--- a/drivers/platform/x86/Makefile
+++ b/drivers/platform/x86/Makefile
@@ -26,3 +26,4 @@ obj-$(CONFIG_TOPSTAR_LAPTOP)	+= topstar-laptop.o
 obj-$(CONFIG_ACPI_TOSHIBA)	+= toshiba_acpi.o
 obj-$(CONFIG_TOSHIBA_BT_RFKILL)	+= toshiba_bluetooth.o
 obj-$(CONFIG_INTEL_SCU_IPC)	+= intel_scu_ipc.o
+obj-$(CONFIG_INTEL_IPS)		+= intel_ips.o
diff --git a/drivers/platform/x86/intel_ips.c b/drivers/platform/x86/intel_ips.c
new file mode 100644
index 00000000000..f1dce3b8372
--- /dev/null
+++ b/drivers/platform/x86/intel_ips.c
@@ -0,0 +1,1655 @@
+/*
+ * Copyright (c) 2009-2010 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Authors:
+ *	Jesse Barnes <jbarnes@virtuousgeek.org>
+ */
+
+/*
+ * Some Intel Ibex Peak based platforms support so-called "intelligent
+ * power sharing", which allows the CPU and GPU to cooperate to maximize
+ * performance within a given TDP (thermal design point).  This driver
+ * performs the coordination between the CPU and GPU, monitors thermal and
+ * power statistics in the platform, and initializes power monitoring
+ * hardware.  It also provides a few tunables to control behavior.  Its
+ * primary purpose is to safely allow CPU and GPU turbo modes to be enabled
+ * by tracking power and thermal budget; secondarily it can boost turbo
+ * performance by allocating more power or thermal budget to the CPU or GPU
+ * based on available headroom and activity.
+ *
+ * The basic algorithm is driven by a 5s moving average of tempurature.  If
+ * thermal headroom is available, the CPU and/or GPU power clamps may be
+ * adjusted upwards.  If we hit the thermal ceiling or a thermal trigger,
+ * we scale back the clamp.  Aside from trigger events (when we're critically
+ * close or over our TDP) we don't adjust the clamps more than once every
+ * five seconds.
+ *
+ * The thermal device (device 31, function 6) has a set of registers that
+ * are updated by the ME firmware.  The ME should also take the clamp values
+ * written to those registers and write them to the CPU, but we currently
+ * bypass that functionality and write the CPU MSR directly.
+ *
+ * UNSUPPORTED:
+ *   - dual MCP configs
+ *
+ * TODO:
+ *   - handle CPU hotplug
+ *   - provide turbo enable/disable api
+ *   - make sure we can write turbo enable/disable reg based on MISC_EN
+ *
+ * Related documents:
+ *   - CDI 403777, 403778 - Auburndale EDS vol 1 & 2
+ *   - CDI 401376 - Ibex Peak EDS
+ *   - ref 26037, 26641 - IPS BIOS spec
+ *   - ref 26489 - Nehalem BIOS writer's guide
+ *   - ref 26921 - Ibex Peak BIOS Specification
+ */
+
+#include <linux/debugfs.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/kthread.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/sched.h>
+#include <linux/seq_file.h>
+#include <linux/string.h>
+#include <linux/tick.h>
+#include <linux/timer.h>
+#include <drm/i915_drm.h>
+#include <asm/msr.h>
+#include <asm/processor.h>
+
+#define PCI_DEVICE_ID_INTEL_THERMAL_SENSOR 0x3b32
+
+/*
+ * Package level MSRs for monitor/control
+ */
+#define PLATFORM_INFO	0xce
+#define   PLATFORM_TDP		(1<<29)
+#define   PLATFORM_RATIO	(1<<28)
+
+#define IA32_MISC_ENABLE	0x1a0
+#define   IA32_MISC_TURBO_EN	(1ULL<<38)
+
+#define TURBO_POWER_CURRENT_LIMIT	0x1ac
+#define   TURBO_TDC_OVR_EN	(1UL<<31)
+#define   TURBO_TDC_MASK	(0x000000007fff0000UL)
+#define   TURBO_TDC_SHIFT	(16)
+#define   TURBO_TDP_OVR_EN	(1UL<<15)
+#define   TURBO_TDP_MASK	(0x0000000000003fffUL)
+
+/*
+ * Core/thread MSRs for monitoring
+ */
+#define IA32_PERF_CTL		0x199
+#define   IA32_PERF_TURBO_DIS	(1ULL<<32)
+
+/*
+ * Thermal PCI device regs
+ */
+#define THM_CFG_TBAR	0x10
+#define THM_CFG_TBAR_HI	0x14
+
+#define THM_TSIU	0x00
+#define THM_TSE		0x01
+#define   TSE_EN	0xb8
+#define THM_TSS		0x02
+#define THM_TSTR	0x03
+#define THM_TSTTP	0x04
+#define THM_TSCO	0x08
+#define THM_TSES	0x0c
+#define THM_TSGPEN	0x0d
+#define   TSGPEN_HOT_LOHI	(1<<1)
+#define   TSGPEN_CRIT_LOHI	(1<<2)
+#define THM_TSPC	0x0e
+#define THM_PPEC	0x10
+#define THM_CTA		0x12
+#define THM_PTA		0x14
+#define   PTA_SLOPE_MASK	(0xff00)
+#define   PTA_SLOPE_SHIFT	8
+#define   PTA_OFFSET_MASK	(0x00ff)
+#define THM_MGTA	0x16
+#define   MGTA_SLOPE_MASK	(0xff00)
+#define   MGTA_SLOPE_SHIFT	8
+#define   MGTA_OFFSET_MASK	(0x00ff)
+#define THM_TRC		0x1a
+#define   TRC_CORE2_EN	(1<<15)
+#define   TRC_THM_EN	(1<<12)
+#define   TRC_C6_WAR	(1<<8)
+#define   TRC_CORE1_EN	(1<<7)
+#define   TRC_CORE_PWR	(1<<6)
+#define   TRC_PCH_EN	(1<<5)
+#define   TRC_MCH_EN	(1<<4)
+#define   TRC_DIMM4	(1<<3)
+#define   TRC_DIMM3	(1<<2)
+#define   TRC_DIMM2	(1<<1)
+#define   TRC_DIMM1	(1<<0)
+#define THM_TES		0x20
+#define THM_TEN		0x21
+#define   TEN_UPDATE_EN	1
+#define THM_PSC		0x24
+#define   PSC_NTG	(1<<0) /* No GFX turbo support */
+#define   PSC_NTPC	(1<<1) /* No CPU turbo support */
+#define   PSC_PP_DEF	(0<<2) /* Perf policy up to driver */
+#define   PSP_PP_PC	(1<<2) /* BIOS prefers CPU perf */
+#define   PSP_PP_BAL	(2<<2) /* BIOS wants balanced perf */
+#define   PSP_PP_GFX	(3<<2) /* BIOS prefers GFX perf */
+#define   PSP_PBRT	(1<<4) /* BIOS run time support */
+#define THM_CTV1	0x30
+#define   CTV_TEMP_ERROR (1<<15)
+#define   CTV_TEMP_MASK	0x3f
+#define   CTV_
+#define THM_CTV2	0x32
+#define THM_CEC		0x34 /* undocumented power accumulator in joules */
+#define THM_AE		0x3f
+#define THM_HTS		0x50 /* 32 bits */
+#define   HTS_PCPL_MASK	(0x7fe00000)
+#define   HTS_PCPL_SHIFT 21
+#define   HTS_GPL_MASK  (0x001ff000)
+#define   HTS_GPL_SHIFT 12
+#define   HTS_PP_MASK	(0x00000c00)
+#define   HTS_PP_SHIFT  10
+#define   HTS_PP_DEF	0
+#define   HTS_PP_PROC	1
+#define   HTS_PP_BAL	2
+#define   HTS_PP_GFX	3
+#define   HTS_PCTD_DIS	(1<<9)
+#define   HTS_GTD_DIS	(1<<8)
+#define   HTS_PTL_MASK  (0x000000fe)
+#define   HTS_PTL_SHIFT 1
+#define   HTS_NVV	(1<<0)
+#define THM_HTSHI	0x54 /* 16 bits */
+#define   HTS2_PPL_MASK		(0x03ff)
+#define   HTS2_PRST_MASK	(0x3c00)
+#define   HTS2_PRST_SHIFT	10
+#define   HTS2_PRST_UNLOADED	0
+#define   HTS2_PRST_RUNNING	1
+#define   HTS2_PRST_TDISOP	2 /* turbo disabled due to power */
+#define   HTS2_PRST_TDISHT	3 /* turbo disabled due to high temp */
+#define   HTS2_PRST_TDISUSR	4 /* user disabled turbo */
+#define   HTS2_PRST_TDISPLAT	5 /* platform disabled turbo */
+#define   HTS2_PRST_TDISPM	6 /* power management disabled turbo */
+#define   HTS2_PRST_TDISERR	7 /* some kind of error disabled turbo */
+#define THM_PTL		0x56
+#define THM_MGTV	0x58
+#define   TV_MASK	0x000000000000ff00
+#define   TV_SHIFT	8
+#define THM_PTV		0x60
+#define   PTV_MASK	0x00ff
+#define THM_MMGPC	0x64
+#define THM_MPPC	0x66
+#define THM_MPCPC	0x68
+#define THM_TSPIEN	0x82
+#define   TSPIEN_AUX_LOHI	(1<<0)
+#define   TSPIEN_HOT_LOHI	(1<<1)
+#define   TSPIEN_CRIT_LOHI	(1<<2)
+#define   TSPIEN_AUX2_LOHI	(1<<3)
+#define THM_TSLOCK	0x83
+#define THM_ATR		0x84
+#define THM_TOF		0x87
+#define THM_STS		0x98
+#define   STS_PCPL_MASK		(0x7fe00000)
+#define   STS_PCPL_SHIFT	21
+#define   STS_GPL_MASK		(0x001ff000)
+#define   STS_GPL_SHIFT		12
+#define   STS_PP_MASK		(0x00000c00)
+#define   STS_PP_SHIFT		10
+#define   STS_PP_DEF		0
+#define   STS_PP_PROC		1
+#define   STS_PP_BAL		2
+#define   STS_PP_GFX		3
+#define   STS_PCTD_DIS		(1<<9)
+#define   STS_GTD_DIS		(1<<8)
+#define   STS_PTL_MASK		(0x000000fe)
+#define   STS_PTL_SHIFT		1
+#define   STS_NVV		(1<<0)
+#define THM_SEC		0x9c
+#define   SEC_ACK	(1<<0)
+#define THM_TC3		0xa4
+#define THM_TC1		0xa8
+#define   STS_PPL_MASK		(0x0003ff00)
+#define   STS_PPL_SHIFT		16
+#define THM_TC2		0xac
+#define THM_DTV		0xb0
+#define THM_ITV		0xd8
+#define   ITV_ME_SEQNO_MASK 0x000f0000 /* ME should update every ~200ms */
+#define   ITV_ME_SEQNO_SHIFT (16)
+#define   ITV_MCH_TEMP_MASK 0x0000ff00
+#define   ITV_MCH_TEMP_SHIFT (8)
+#define   ITV_PCH_TEMP_MASK 0x000000ff
+
+#define thm_readb(off) readb(ips->regmap + (off))
+#define thm_readw(off) readw(ips->regmap + (off))
+#define thm_readl(off) readl(ips->regmap + (off))
+#define thm_readq(off) readq(ips->regmap + (off))
+
+#define thm_writeb(off, val) writeb((val), ips->regmap + (off))
+#define thm_writew(off, val) writew((val), ips->regmap + (off))
+#define thm_writel(off, val) writel((val), ips->regmap + (off))
+
+static const int IPS_ADJUST_PERIOD = 5000; /* ms */
+
+/* For initial average collection */
+static const int IPS_SAMPLE_PERIOD = 200; /* ms */
+static const int IPS_SAMPLE_WINDOW = 5000; /* 5s moving window of samples */
+#define IPS_SAMPLE_COUNT (IPS_SAMPLE_WINDOW / IPS_SAMPLE_PERIOD)
+
+/* Per-SKU limits */
+struct ips_mcp_limits {
+	int cpu_family;
+	int cpu_model; /* includes extended model... */
+	int mcp_power_limit; /* mW units */
+	int core_power_limit;
+	int mch_power_limit;
+	int core_temp_limit; /* degrees C */
+	int mch_temp_limit;
+};
+
+/* Max temps are -10 degrees C to avoid PROCHOT# */
+
+struct ips_mcp_limits ips_sv_limits = {
+	.mcp_power_limit = 35000,
+	.core_power_limit = 29000,
+	.mch_power_limit = 20000,
+	.core_temp_limit = 95,
+	.mch_temp_limit = 90
+};
+
+struct ips_mcp_limits ips_lv_limits = {
+	.mcp_power_limit = 25000,
+	.core_power_limit = 21000,
+	.mch_power_limit = 13000,
+	.core_temp_limit = 95,
+	.mch_temp_limit = 90
+};
+
+struct ips_mcp_limits ips_ulv_limits = {
+	.mcp_power_limit = 18000,
+	.core_power_limit = 14000,
+	.mch_power_limit = 11000,
+	.core_temp_limit = 95,
+	.mch_temp_limit = 90
+};
+
+struct ips_driver {
+	struct pci_dev *dev;
+	void *regmap;
+	struct task_struct *monitor;
+	struct task_struct *adjust;
+	struct dentry *debug_root;
+
+	/* Average CPU core temps (all averages in .01 degrees C for precision) */
+	u16 ctv1_avg_temp;
+	u16 ctv2_avg_temp;
+	/* GMCH average */
+	u16 mch_avg_temp;
+	/* Average for the CPU (both cores?) */
+	u16 mcp_avg_temp;
+	/* Average power consumption (in mW) */
+	u32 cpu_avg_power;
+	u32 mch_avg_power;
+
+	/* Offset values */
+	u16 cta_val;
+	u16 pta_val;
+	u16 mgta_val;
+
+	/* Maximums & prefs, protected by turbo status lock */
+	spinlock_t turbo_status_lock;
+	u16 mcp_temp_limit;
+	u16 mcp_power_limit;
+	u16 core_power_limit;
+	u16 mch_power_limit;
+	bool cpu_turbo_enabled;
+	bool __cpu_turbo_on;
+	bool gpu_turbo_enabled;
+	bool __gpu_turbo_on;
+	bool gpu_preferred;
+	bool poll_turbo_status;
+	bool second_cpu;
+	struct ips_mcp_limits *limits;
+
+	/* Optional MCH interfaces for if i915 is in use */
+	unsigned long (*read_mch_val)(void);
+	bool (*gpu_raise)(void);
+	bool (*gpu_lower)(void);
+	bool (*gpu_busy)(void);
+	bool (*gpu_turbo_disable)(void);
+
+	/* For restoration at unload */
+	u64 orig_turbo_limit;
+	u64 orig_turbo_ratios;
+};
+
+/**
+ * ips_cpu_busy - is CPU busy?
+ * @ips: IPS driver struct
+ *
+ * Check CPU for load to see whether we should increase its thermal budget.
+ *
+ * RETURNS:
+ * True if the CPU could use more power, false otherwise.
+ */
+static bool ips_cpu_busy(struct ips_driver *ips)
+{
+	if ((avenrun[0] >> FSHIFT) > 1)
+		return true;
+
+	return false;
+}
+
+/**
+ * ips_cpu_raise - raise CPU power clamp
+ * @ips: IPS driver struct
+ *
+ * Raise the CPU power clamp by %IPS_CPU_STEP, in accordance with TDP for
+ * this platform.
+ *
+ * We do this by adjusting the TURBO_POWER_CURRENT_LIMIT MSR upwards (as
+ * long as we haven't hit the TDP limit for the SKU).
+ */
+static void ips_cpu_raise(struct ips_driver *ips)
+{
+	u64 turbo_override;
+	u16 cur_tdp_limit, new_tdp_limit;
+
+	if (!ips->cpu_turbo_enabled)
+		return;
+
+	rdmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override);
+
+	cur_tdp_limit = turbo_override & TURBO_TDP_MASK;
+	new_tdp_limit = cur_tdp_limit + 8; /* 1W increase */
+
+	/* Clamp to SKU TDP limit */
+	if (((new_tdp_limit * 10) / 8) > ips->core_power_limit)
+		new_tdp_limit = cur_tdp_limit;
+
+	thm_writew(THM_MPCPC, (new_tdp_limit * 10) / 8);
+
+	turbo_override |= TURBO_TDC_OVR_EN | TURBO_TDC_OVR_EN;
+	wrmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override);
+
+	turbo_override &= ~TURBO_TDP_MASK;
+	turbo_override |= new_tdp_limit;
+
+	wrmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override);
+}
+
+/**
+ * ips_cpu_lower - lower CPU power clamp
+ * @ips: IPS driver struct
+ *
+ * Lower CPU power clamp b %IPS_CPU_STEP if possible.
+ *
+ * We do this by adjusting the TURBO_POWER_CURRENT_LIMIT MSR down, going
+ * as low as the platform limits will allow (though we could go lower there
+ * wouldn't be much point).
+ */
+static void ips_cpu_lower(struct ips_driver *ips)
+{
+	u64 turbo_override;
+	u16 cur_limit, new_limit;
+
+	rdmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override);
+
+	cur_limit = turbo_override & TURBO_TDP_MASK;
+	new_limit = cur_limit - 8; /* 1W decrease */
+
+	/* Clamp to SKU TDP limit */
+	if (((new_limit * 10) / 8) < (ips->orig_turbo_limit & TURBO_TDP_MASK))
+		new_limit = ips->orig_turbo_limit & TURBO_TDP_MASK;
+
+	thm_writew(THM_MPCPC, (new_limit * 10) / 8);
+
+	turbo_override |= TURBO_TDC_OVR_EN | TURBO_TDC_OVR_EN;
+	wrmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override);
+
+	turbo_override &= ~TURBO_TDP_MASK;
+	turbo_override |= new_limit;
+
+	wrmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override);
+}
+
+/**
+ * do_enable_cpu_turbo - internal turbo enable function
+ * @data: unused
+ *
+ * Internal function for actually updating MSRs.  When we enable/disable
+ * turbo, we need to do it on each CPU; this function is the one called
+ * by on_each_cpu() when needed.
+ */
+static void do_enable_cpu_turbo(void *data)
+{
+	u64 perf_ctl;
+
+	rdmsrl(IA32_PERF_CTL, perf_ctl);
+	if (perf_ctl & IA32_PERF_TURBO_DIS) {
+		perf_ctl &= ~IA32_PERF_TURBO_DIS;
+		wrmsrl(IA32_PERF_CTL, perf_ctl);
+	}
+}
+
+/**
+ * ips_enable_cpu_turbo - enable turbo mode on all CPUs
+ * @ips: IPS driver struct
+ *
+ * Enable turbo mode by clearing the disable bit in IA32_PERF_CTL on
+ * all logical threads.
+ */
+static void ips_enable_cpu_turbo(struct ips_driver *ips)
+{
+	/* Already on, no need to mess with MSRs */
+	if (ips->__cpu_turbo_on)
+		return;
+
+	on_each_cpu(do_enable_cpu_turbo, ips, 1);
+
+	ips->__cpu_turbo_on = true;
+}
+
+/**
+ * do_disable_cpu_turbo - internal turbo disable function
+ * @data: unused
+ *
+ * Internal function for actually updating MSRs.  When we enable/disable
+ * turbo, we need to do it on each CPU; this function is the one called
+ * by on_each_cpu() when needed.
+ */
+static void do_disable_cpu_turbo(void *data)
+{
+	u64 perf_ctl;
+
+	rdmsrl(IA32_PERF_CTL, perf_ctl);
+	if (!(perf_ctl & IA32_PERF_TURBO_DIS)) {
+		perf_ctl |= IA32_PERF_TURBO_DIS;
+		wrmsrl(IA32_PERF_CTL, perf_ctl);
+	}
+}
+
+/**
+ * ips_disable_cpu_turbo - disable turbo mode on all CPUs
+ * @ips: IPS driver struct
+ *
+ * Disable turbo mode by setting the disable bit in IA32_PERF_CTL on
+ * all logical threads.
+ */
+static void ips_disable_cpu_turbo(struct ips_driver *ips)
+{
+	/* Already off, leave it */
+	if (!ips->__cpu_turbo_on)
+		return;
+
+	on_each_cpu(do_disable_cpu_turbo, ips, 1);
+
+	ips->__cpu_turbo_on = false;
+}
+
+/**
+ * ips_gpu_busy - is GPU busy?
+ * @ips: IPS driver struct
+ *
+ * Check GPU for load to see whether we should increase its thermal budget.
+ * We need to call into the i915 driver in this case.
+ *
+ * RETURNS:
+ * True if the GPU could use more power, false otherwise.
+ */
+static bool ips_gpu_busy(struct ips_driver *ips)
+{
+	return false;
+}
+
+/**
+ * ips_gpu_raise - raise GPU power clamp
+ * @ips: IPS driver struct
+ *
+ * Raise the GPU frequency/power if possible.  We need to call into the
+ * i915 driver in this case.
+ */
+static void ips_gpu_raise(struct ips_driver *ips)
+{
+	if (!ips->gpu_turbo_enabled)
+		return;
+
+	if (!ips->gpu_raise())
+		ips->gpu_turbo_enabled = false;
+
+	return;
+}
+
+/**
+ * ips_gpu_lower - lower GPU power clamp
+ * @ips: IPS driver struct
+ *
+ * Lower GPU frequency/power if possible.  Need to call i915.
+ */
+static void ips_gpu_lower(struct ips_driver *ips)
+{
+	if (!ips->gpu_turbo_enabled)
+		return;
+
+	if (!ips->gpu_lower())
+		ips->gpu_turbo_enabled = false;
+
+	return;
+}
+
+/**
+ * ips_enable_gpu_turbo - notify the gfx driver turbo is available
+ * @ips: IPS driver struct
+ *
+ * Call into the graphics driver indicating that it can safely use
+ * turbo mode.
+ */
+static void ips_enable_gpu_turbo(struct ips_driver *ips)
+{
+	if (ips->__gpu_turbo_on)
+		return;
+	ips->__gpu_turbo_on = true;
+}
+
+/**
+ * ips_disable_gpu_turbo - notify the gfx driver to disable turbo mode
+ * @ips: IPS driver struct
+ *
+ * Request that the graphics driver disable turbo mode.
+ */
+static void ips_disable_gpu_turbo(struct ips_driver *ips)
+{
+	/* Avoid calling i915 if turbo is already disabled */
+	if (!ips->__gpu_turbo_on)
+		return;
+
+	if (!ips->gpu_turbo_disable())
+		dev_err(&ips->dev->dev, "failed to disable graphis turbo\n");
+	else
+		ips->__gpu_turbo_on = false;
+}
+
+/**
+ * mcp_exceeded - check whether we're outside our thermal & power limits
+ * @ips: IPS driver struct
+ *
+ * Check whether the MCP is over its thermal or power budget.
+ */
+static bool mcp_exceeded(struct ips_driver *ips)
+{
+	unsigned long flags;
+	bool ret = false;
+
+	spin_lock_irqsave(&ips->turbo_status_lock, flags);
+	if (ips->mcp_avg_temp > (ips->mcp_temp_limit * 100))
+		ret = true;
+	if (ips->cpu_avg_power + ips->mch_avg_power > ips->mcp_power_limit)
+		ret = true;
+	spin_unlock_irqrestore(&ips->turbo_status_lock, flags);
+
+	if (ret)
+		dev_warn(&ips->dev->dev,
+			 "MCP power or thermal limit exceeded\n");
+
+	return ret;
+}
+
+/**
+ * cpu_exceeded - check whether a CPU core is outside its limits
+ * @ips: IPS driver struct
+ * @cpu: CPU number to check
+ *
+ * Check a given CPU's average temp or power is over its limit.
+ */
+static bool cpu_exceeded(struct ips_driver *ips, int cpu)
+{
+	unsigned long flags;
+	int avg;
+	bool ret = false;
+
+	spin_lock_irqsave(&ips->turbo_status_lock, flags);
+	avg = cpu ? ips->ctv2_avg_temp : ips->ctv1_avg_temp;
+	if (avg > (ips->limits->core_temp_limit * 100))
+		ret = true;
+	if (ips->cpu_avg_power > ips->core_power_limit)
+		ret = true;
+	spin_unlock_irqrestore(&ips->turbo_status_lock, flags);
+
+	if (ret)
+		dev_warn(&ips->dev->dev,
+			 "CPU power or thermal limit exceeded\n");
+
+	return ret;
+}
+
+/**
+ * mch_exceeded - check whether the GPU is over budget
+ * @ips: IPS driver struct
+ *
+ * Check the MCH temp & power against their maximums.
+ */
+static bool mch_exceeded(struct ips_driver *ips)
+{
+	unsigned long flags;
+	bool ret = false;
+
+	spin_lock_irqsave(&ips->turbo_status_lock, flags);
+	if (ips->mch_avg_temp > (ips->limits->mch_temp_limit * 100))
+		ret = true;
+	spin_unlock_irqrestore(&ips->turbo_status_lock, flags);
+
+	return ret;
+}
+
+/**
+ * update_turbo_limits - get various limits & settings from regs
+ * @ips: IPS driver struct
+ *
+ * Update the IPS power & temp limits, along with turbo enable flags,
+ * based on latest register contents.
+ *
+ * Used at init time and for runtime BIOS support, which requires polling
+ * the regs for updates (as a result of AC->DC transition for example).
+ *
+ * LOCKING:
+ * Caller must hold turbo_status_lock (outside of init)
+ */
+static void update_turbo_limits(struct ips_driver *ips)
+{
+	u32 hts = thm_readl(THM_HTS);
+
+	ips->cpu_turbo_enabled = !(hts & HTS_PCTD_DIS);
+	ips->gpu_turbo_enabled = !(hts & HTS_GTD_DIS);
+	ips->core_power_limit = thm_readw(THM_MPCPC);
+	ips->mch_power_limit = thm_readw(THM_MMGPC);
+	ips->mcp_temp_limit = thm_readw(THM_PTL);
+	ips->mcp_power_limit = thm_readw(THM_MPPC);
+
+	/* Ignore BIOS CPU vs GPU pref */
+}
+
+/**
+ * ips_adjust - adjust power clamp based on thermal state
+ * @data: ips driver structure
+ *
+ * Wake up every 5s or so and check whether we should adjust the power clamp.
+ * Check CPU and GPU load to determine which needs adjustment.  There are
+ * several things to consider here:
+ *   - do we need to adjust up or down?
+ *   - is CPU busy?
+ *   - is GPU busy?
+ *   - is CPU in turbo?
+ *   - is GPU in turbo?
+ *   - is CPU or GPU preferred? (CPU is default)
+ *
+ * So, given the above, we do the following:
+ *   - up (TDP available)
+ *     - CPU not busy, GPU not busy - nothing
+ *     - CPU busy, GPU not busy - adjust CPU up
+ *     - CPU not busy, GPU busy - adjust GPU up
+ *     - CPU busy, GPU busy - adjust preferred unit up, taking headroom from
+ *       non-preferred unit if necessary
+ *   - down (at TDP limit)
+ *     - adjust both CPU and GPU down if possible
+ *
+		cpu+ gpu+	cpu+gpu-	cpu-gpu+	cpu-gpu-
+cpu < gpu <	cpu+gpu+	cpu+		gpu+		nothing
+cpu < gpu >=	cpu+gpu-(mcp<)	cpu+gpu-(mcp<)	gpu-		gpu-
+cpu >= gpu <	cpu-gpu+(mcp<)	cpu-		cpu-gpu+(mcp<)	cpu-
+cpu >= gpu >=	cpu-gpu-	cpu-gpu-	cpu-gpu-	cpu-gpu-
+ *
+ */
+static int ips_adjust(void *data)
+{
+	struct ips_driver *ips = data;
+	unsigned long flags;
+
+	dev_dbg(&ips->dev->dev, "starting ips-adjust thread\n");
+
+	/*
+	 * Adjust CPU and GPU clamps every 5s if needed.  Doing it more
+	 * often isn't recommended due to ME interaction.
+	 */
+	do {
+		bool cpu_busy = ips_cpu_busy(ips);
+		bool gpu_busy = ips_gpu_busy(ips);
+
+		spin_lock_irqsave(&ips->turbo_status_lock, flags);
+		if (ips->poll_turbo_status)
+			update_turbo_limits(ips);
+		spin_unlock_irqrestore(&ips->turbo_status_lock, flags);
+
+		/* Update turbo status if necessary */
+		if (ips->cpu_turbo_enabled)
+			ips_enable_cpu_turbo(ips);
+		else
+			ips_disable_cpu_turbo(ips);
+
+		if (ips->gpu_turbo_enabled)
+			ips_enable_gpu_turbo(ips);
+		else
+			ips_disable_gpu_turbo(ips);
+
+		/* We're outside our comfort zone, crank them down */
+		if (!mcp_exceeded(ips)) {
+			ips_cpu_lower(ips);
+			ips_gpu_lower(ips);
+			goto sleep;
+		}
+
+		if (!cpu_exceeded(ips, 0) && cpu_busy)
+			ips_cpu_raise(ips);
+		else
+			ips_cpu_lower(ips);
+
+		if (!mch_exceeded(ips) && gpu_busy)
+			ips_gpu_raise(ips);
+		else
+			ips_gpu_lower(ips);
+
+sleep:
+		schedule_timeout_interruptible(msecs_to_jiffies(IPS_ADJUST_PERIOD));
+	} while (!kthread_should_stop());
+
+	dev_dbg(&ips->dev->dev, "ips-adjust thread stopped\n");
+
+	return 0;
+}
+
+/*
+ * Helpers for reading out temp/power values and calculating their
+ * averages for the decision making and monitoring functions.
+ */
+
+static u16 calc_avg_temp(struct ips_driver *ips, u16 *array)
+{
+	u64 total = 0;
+	int i;
+	u16 avg;
+
+	for (i = 0; i < IPS_SAMPLE_COUNT; i++)
+		total += (u64)(array[i] * 100);
+
+	do_div(total, IPS_SAMPLE_COUNT);
+
+	avg = (u16)total;
+
+	return avg;
+}
+
+static u16 read_mgtv(struct ips_driver *ips)
+{
+	u16 ret;
+	u64 slope, offset;
+	u64 val;
+
+	val = thm_readq(THM_MGTV);
+	val = (val & TV_MASK) >> TV_SHIFT;
+
+	slope = offset = thm_readw(THM_MGTA);
+	slope = (slope & MGTA_SLOPE_MASK) >> MGTA_SLOPE_SHIFT;
+	offset = offset & MGTA_OFFSET_MASK;
+
+	ret = ((val * slope + 0x40) >> 7) + offset;
+
+
+	return ret;
+}
+
+static u16 read_ptv(struct ips_driver *ips)
+{
+	u16 val, slope, offset;
+
+	slope = (ips->pta_val & PTA_SLOPE_MASK) >> PTA_SLOPE_SHIFT;
+	offset = ips->pta_val & PTA_OFFSET_MASK;
+
+	val = thm_readw(THM_PTV) & PTV_MASK;
+
+	return val;
+}
+
+static u16 read_ctv(struct ips_driver *ips, int cpu)
+{
+	int reg = cpu ? THM_CTV2 : THM_CTV1;
+	u16 val;
+
+	val = thm_readw(reg);
+	if (!(val & CTV_TEMP_ERROR))
+		val = (val) >> 6; /* discard fractional component */
+	else
+		val = 0;
+
+	return val;
+}
+
+static u32 get_cpu_power(struct ips_driver *ips, u32 *last, int period)
+{
+	u32 val;
+	u32 ret;
+
+	/*
+	 * CEC is in joules/65535.  Take difference over time to
+	 * get watts.
+	 */
+	val = thm_readl(THM_CEC);
+
+	/* period is in ms and we want mW */
+	ret = (((val - *last) * 1000) / period);
+	ret = (ret * 1000) / 65535;
+	*last = val;
+
+	return ret;
+}
+
+static const u16 temp_decay_factor = 2;
+static u16 update_average_temp(u16 avg, u16 val)
+{
+	u16 ret;
+
+	/* Multiply by 100 for extra precision */
+	ret = (val * 100 / temp_decay_factor) +
+		(((temp_decay_factor - 1) * avg) / temp_decay_factor);
+	return ret;
+}
+
+static const u16 power_decay_factor = 2;
+static u16 update_average_power(u32 avg, u32 val)
+{
+	u32 ret;
+
+	ret = (val / power_decay_factor) +
+		(((power_decay_factor - 1) * avg) / power_decay_factor);
+
+	return ret;
+}
+
+static u32 calc_avg_power(struct ips_driver *ips, u32 *array)
+{
+	u64 total = 0;
+	u32 avg;
+	int i;
+
+	for (i = 0; i < IPS_SAMPLE_COUNT; i++)
+		total += array[i];
+
+	do_div(total, IPS_SAMPLE_COUNT);
+	avg = (u32)total;
+
+	return avg;
+}
+
+static void monitor_timeout(unsigned long arg)
+{
+	wake_up_process((struct task_struct *)arg);
+}
+
+/**
+ * ips_monitor - temp/power monitoring thread
+ * @data: ips driver structure
+ *
+ * This is the main function for the IPS driver.  It monitors power and
+ * tempurature in the MCP and adjusts CPU and GPU power clams accordingly.
+ *
+ * We keep a 5s moving average of power consumption and tempurature.  Using
+ * that data, along with CPU vs GPU preference, we adjust the power clamps
+ * up or down.
+ */
+static int ips_monitor(void *data)
+{
+	struct ips_driver *ips = data;
+	struct timer_list timer;
+	unsigned long seqno_timestamp, expire, last_msecs, last_sample_period;
+	int i;
+	u32 *cpu_samples = NULL, *mchp_samples = NULL, old_cpu_power;
+	u16 *mcp_samples = NULL, *ctv1_samples = NULL, *ctv2_samples = NULL,
+		*mch_samples = NULL;
+	u8 cur_seqno, last_seqno;
+
+	mcp_samples = kzalloc(sizeof(u16) * IPS_SAMPLE_COUNT, GFP_KERNEL);
+	ctv1_samples = kzalloc(sizeof(u16) * IPS_SAMPLE_COUNT, GFP_KERNEL);
+	ctv2_samples = kzalloc(sizeof(u16) * IPS_SAMPLE_COUNT, GFP_KERNEL);
+	mch_samples = kzalloc(sizeof(u16) * IPS_SAMPLE_COUNT, GFP_KERNEL);
+	cpu_samples = kzalloc(sizeof(u32) * IPS_SAMPLE_COUNT, GFP_KERNEL);
+	mchp_samples = kzalloc(sizeof(u32) * IPS_SAMPLE_COUNT, GFP_KERNEL);
+	if (!mcp_samples || !ctv1_samples || !ctv2_samples || !mch_samples) {
+		dev_err(&ips->dev->dev,
+			"failed to allocate sample array, ips disabled\n");
+		kfree(mcp_samples);
+		kfree(ctv1_samples);
+		kfree(ctv2_samples);
+		kfree(mch_samples);
+		kfree(cpu_samples);
+		kthread_stop(ips->adjust);
+		return -ENOMEM;
+	}
+
+	last_seqno = (thm_readl(THM_ITV) & ITV_ME_SEQNO_MASK) >>
+		ITV_ME_SEQNO_SHIFT;
+	seqno_timestamp = get_jiffies_64();
+
+	old_cpu_power = thm_readl(THM_CEC) / 65535;
+	schedule_timeout_interruptible(msecs_to_jiffies(IPS_SAMPLE_PERIOD));
+
+	/* Collect an initial average */
+	for (i = 0; i < IPS_SAMPLE_COUNT; i++) {
+		u32 mchp, cpu_power;
+		u16 val;
+
+		mcp_samples[i] = read_ptv(ips);
+
+		val = read_ctv(ips, 0);
+		ctv1_samples[i] = val;
+
+		val = read_ctv(ips, 1);
+		ctv2_samples[i] = val;
+
+		val = read_mgtv(ips);
+		mch_samples[i] = val;
+
+		cpu_power = get_cpu_power(ips, &old_cpu_power,
+					  IPS_SAMPLE_PERIOD);
+		cpu_samples[i] = cpu_power;
+
+		if (ips->read_mch_val) {
+			mchp = ips->read_mch_val();
+			mchp_samples[i] = mchp;
+		}
+
+		schedule_timeout_interruptible(msecs_to_jiffies(IPS_SAMPLE_PERIOD));
+		if (kthread_should_stop())
+			break;
+	}
+
+	ips->mcp_avg_temp = calc_avg_temp(ips, mcp_samples);
+	ips->ctv1_avg_temp = calc_avg_temp(ips, ctv1_samples);
+	ips->ctv2_avg_temp = calc_avg_temp(ips, ctv2_samples);
+	ips->mch_avg_temp = calc_avg_temp(ips, mch_samples);
+	ips->cpu_avg_power = calc_avg_power(ips, cpu_samples);
+	ips->mch_avg_power = calc_avg_power(ips, mchp_samples);
+	kfree(mcp_samples);
+	kfree(ctv1_samples);
+	kfree(ctv2_samples);
+	kfree(mch_samples);
+	kfree(cpu_samples);
+	kfree(mchp_samples);
+
+	/* Start the adjustment thread now that we have data */
+	wake_up_process(ips->adjust);
+
+	/*
+	 * Ok, now we have an initial avg.  From here on out, we track the
+	 * running avg using a decaying average calculation.  This allows
+	 * us to reduce the sample frequency if the CPU and GPU are idle.
+	 */
+	old_cpu_power = thm_readl(THM_CEC);
+	schedule_timeout_interruptible(msecs_to_jiffies(IPS_SAMPLE_PERIOD));
+	last_sample_period = IPS_SAMPLE_PERIOD;
+
+	setup_deferrable_timer_on_stack(&timer, monitor_timeout,
+					(unsigned long)current);
+	do {
+		u32 cpu_val, mch_val;
+		u16 val;
+
+		/* MCP itself */
+		val = read_ptv(ips);
+		ips->mcp_avg_temp = update_average_temp(ips->mcp_avg_temp, val);
+
+		/* Processor 0 */
+		val = read_ctv(ips, 0);
+		ips->ctv1_avg_temp =
+			update_average_temp(ips->ctv1_avg_temp, val);
+		/* Power */
+		cpu_val = get_cpu_power(ips, &old_cpu_power,
+					last_sample_period);
+		ips->cpu_avg_power =
+			update_average_power(ips->cpu_avg_power, cpu_val);
+
+		if (ips->second_cpu) {
+			/* Processor 1 */
+			val = read_ctv(ips, 1);
+			ips->ctv2_avg_temp =
+				update_average_temp(ips->ctv2_avg_temp, val);
+		}
+
+		/* MCH */
+		val = read_mgtv(ips);
+		ips->mch_avg_temp = update_average_temp(ips->mch_avg_temp, val);
+		/* Power */
+		if (ips->read_mch_val) {
+			mch_val = ips->read_mch_val();
+			ips->mch_avg_power =
+				update_average_power(ips->mch_avg_power,
+						     mch_val);
+		}
+
+		/*
+		 * Make sure ME is updating thermal regs.
+		 * Note:
+		 * If it's been more than a second since the last update,
+		 * the ME is probably hung.
+		 */
+		cur_seqno = (thm_readl(THM_ITV) & ITV_ME_SEQNO_MASK) >>
+			ITV_ME_SEQNO_SHIFT;
+		if (cur_seqno == last_seqno &&
+		    time_after(jiffies, seqno_timestamp + HZ)) {
+			dev_warn(&ips->dev->dev, "ME failed to update for more than 1s, likely hung\n");
+		} else {
+			seqno_timestamp = get_jiffies_64();
+			last_seqno = cur_seqno;
+		}
+
+		last_msecs = jiffies_to_msecs(jiffies);
+		expire = jiffies + msecs_to_jiffies(IPS_SAMPLE_PERIOD);
+
+		__set_current_state(TASK_UNINTERRUPTIBLE);
+		mod_timer(&timer, expire);
+		schedule();
+
+		/* Calculate actual sample period for power averaging */
+		last_sample_period = jiffies_to_msecs(jiffies) - last_msecs;
+		if (!last_sample_period)
+			last_sample_period = 1;
+	} while (!kthread_should_stop());
+
+	del_timer_sync(&timer);
+	destroy_timer_on_stack(&timer);
+
+	dev_dbg(&ips->dev->dev, "ips-monitor thread stopped\n");
+
+	return 0;
+}
+
+#if 0
+#define THM_DUMPW(reg) \
+	{ \
+	u16 val = thm_readw(reg); \
+	dev_dbg(&ips->dev->dev, #reg ": 0x%04x\n", val); \
+	}
+#define THM_DUMPL(reg) \
+	{ \
+	u32 val = thm_readl(reg); \
+	dev_dbg(&ips->dev->dev, #reg ": 0x%08x\n", val); \
+	}
+#define THM_DUMPQ(reg) \
+	{ \
+	u64 val = thm_readq(reg); \
+	dev_dbg(&ips->dev->dev, #reg ": 0x%016x\n", val); \
+	}
+
+static void dump_thermal_info(struct ips_driver *ips)
+{
+	u16 ptl;
+
+	ptl = thm_readw(THM_PTL);
+	dev_dbg(&ips->dev->dev, "Processor temp limit: %d\n", ptl);
+
+	THM_DUMPW(THM_CTA);
+	THM_DUMPW(THM_TRC);
+	THM_DUMPW(THM_CTV1);
+	THM_DUMPL(THM_STS);
+	THM_DUMPW(THM_PTV);
+	THM_DUMPQ(THM_MGTV);
+}
+#endif
+
+/**
+ * ips_irq_handler - handle temperature triggers and other IPS events
+ * @irq: irq number
+ * @arg: unused
+ *
+ * Handle temperature limit trigger events, generally by lowering the clamps.
+ * If we're at a critical limit, we clamp back to the lowest possible value
+ * to prevent emergency shutdown.
+ */
+static irqreturn_t ips_irq_handler(int irq, void *arg)
+{
+	struct ips_driver *ips = arg;
+	u8 tses = thm_readb(THM_TSES);
+	u8 tes = thm_readb(THM_TES);
+
+	if (!tses && !tes)
+		return IRQ_NONE;
+
+	dev_info(&ips->dev->dev, "TSES: 0x%02x\n", tses);
+	dev_info(&ips->dev->dev, "TES: 0x%02x\n", tes);
+
+	/* STS update from EC? */
+	if (tes & 1) {
+		u32 sts, tc1;
+
+		sts = thm_readl(THM_STS);
+		tc1 = thm_readl(THM_TC1);
+
+		if (sts & STS_NVV) {
+			spin_lock(&ips->turbo_status_lock);
+			ips->core_power_limit = (sts & STS_PCPL_MASK) >>
+				STS_PCPL_SHIFT;
+			ips->mch_power_limit = (sts & STS_GPL_MASK) >>
+				STS_GPL_SHIFT;
+			/* ignore EC CPU vs GPU pref */
+			ips->cpu_turbo_enabled = !(sts & STS_PCTD_DIS);
+			ips->gpu_turbo_enabled = !(sts & STS_GTD_DIS);
+			ips->mcp_temp_limit = (sts & STS_PTL_MASK) >>
+				STS_PTL_SHIFT;
+			ips->mcp_power_limit = (tc1 & STS_PPL_MASK) >>
+				STS_PPL_SHIFT;
+			spin_unlock(&ips->turbo_status_lock);
+
+			thm_writeb(THM_SEC, SEC_ACK);
+		}
+		thm_writeb(THM_TES, tes);
+	}
+
+	/* Thermal trip */
+	if (tses) {
+		dev_warn(&ips->dev->dev,
+			 "thermal trip occurred, tses: 0x%04x\n", tses);
+		thm_writeb(THM_TSES, tses);
+	}
+
+	return IRQ_HANDLED;
+}
+
+#ifndef CONFIG_DEBUG_FS
+static void ips_debugfs_init(struct ips_driver *ips) { return; }
+static void ips_debugfs_cleanup(struct ips_driver *ips) { return; }
+#else
+
+/* Expose current state and limits in debugfs if possible */
+
+struct ips_debugfs_node {
+	struct ips_driver *ips;
+	char *name;
+	int (*show)(struct seq_file *m, void *data);
+};
+
+static int show_cpu_temp(struct seq_file *m, void *data)
+{
+	struct ips_driver *ips = m->private;
+
+	seq_printf(m, "%d.%02d\n", ips->ctv1_avg_temp / 100,
+		   ips->ctv1_avg_temp % 100);
+
+	return 0;
+}
+
+static int show_cpu_power(struct seq_file *m, void *data)
+{
+	struct ips_driver *ips = m->private;
+
+	seq_printf(m, "%dmW\n", ips->cpu_avg_power);
+
+	return 0;
+}
+
+static int show_cpu_clamp(struct seq_file *m, void *data)
+{
+	u64 turbo_override;
+	int tdp, tdc;
+
+	rdmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override);
+
+	tdp = (int)(turbo_override & TURBO_TDP_MASK);
+	tdc = (int)((turbo_override & TURBO_TDC_MASK) >> TURBO_TDC_SHIFT);
+
+	/* Convert to .1W/A units */
+	tdp = tdp * 10 / 8;
+	tdc = tdc * 10 / 8;
+
+	/* Watts Amperes */
+	seq_printf(m, "%d.%dW %d.%dA\n", tdp / 10, tdp % 10,
+		   tdc / 10, tdc % 10);
+
+	return 0;
+}
+
+static int show_mch_temp(struct seq_file *m, void *data)
+{
+	struct ips_driver *ips = m->private;
+
+	seq_printf(m, "%d.%02d\n", ips->mch_avg_temp / 100,
+		   ips->mch_avg_temp % 100);
+
+	return 0;
+}
+
+static int show_mch_power(struct seq_file *m, void *data)
+{
+	struct ips_driver *ips = m->private;
+
+	seq_printf(m, "%dmW\n", ips->mch_avg_power);
+
+	return 0;
+}
+
+static struct ips_debugfs_node ips_debug_files[] = {
+	{ NULL, "cpu_temp", show_cpu_temp },
+	{ NULL, "cpu_power", show_cpu_power },
+	{ NULL, "cpu_clamp", show_cpu_clamp },
+	{ NULL, "mch_temp", show_mch_temp },
+	{ NULL, "mch_power", show_mch_power },
+};
+
+static int ips_debugfs_open(struct inode *inode, struct file *file)
+{
+	struct ips_debugfs_node *node = inode->i_private;
+
+	return single_open(file, node->show, node->ips);
+}
+
+static const struct file_operations ips_debugfs_ops = {
+	.owner = THIS_MODULE,
+	.open = ips_debugfs_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+static void ips_debugfs_cleanup(struct ips_driver *ips)
+{
+	if (ips->debug_root)
+		debugfs_remove_recursive(ips->debug_root);
+	return;
+}
+
+static void ips_debugfs_init(struct ips_driver *ips)
+{
+	int i;
+
+	ips->debug_root = debugfs_create_dir("ips", NULL);
+	if (!ips->debug_root) {
+		dev_err(&ips->dev->dev,
+			"failed to create debugfs entries: %ld\n",
+			PTR_ERR(ips->debug_root));
+		return;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(ips_debug_files); i++) {
+		struct dentry *ent;
+		struct ips_debugfs_node *node = &ips_debug_files[i];
+
+		node->ips = ips;
+		ent = debugfs_create_file(node->name, S_IFREG | S_IRUGO,
+					  ips->debug_root, node,
+					  &ips_debugfs_ops);
+		if (!ent) {
+			dev_err(&ips->dev->dev,
+				"failed to create debug file: %ld\n",
+				PTR_ERR(ent));
+			goto err_cleanup;
+		}
+	}
+
+	return;
+
+err_cleanup:
+	ips_debugfs_cleanup(ips);
+	return;
+}
+#endif /* CONFIG_DEBUG_FS */
+
+/**
+ * ips_detect_cpu - detect whether CPU supports IPS
+ *
+ * Walk our list and see if we're on a supported CPU.  If we find one,
+ * return the limits for it.
+ */
+static struct ips_mcp_limits *ips_detect_cpu(struct ips_driver *ips)
+{
+	u64 turbo_power, misc_en;
+	struct ips_mcp_limits *limits = NULL;
+	u16 tdp;
+
+	if (!(boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 37)) {
+		dev_info(&ips->dev->dev, "Non-IPS CPU detected.\n");
+		goto out;
+	}
+
+	rdmsrl(IA32_MISC_ENABLE, misc_en);
+	/*
+	 * If the turbo enable bit isn't set, we shouldn't try to enable/disable
+	 * turbo manually or we'll get an illegal MSR access, even though
+	 * turbo will still be available.
+	 */
+	if (!(misc_en & IA32_MISC_TURBO_EN))
+		; /* add turbo MSR write allowed flag if necessary */
+
+	if (strstr(boot_cpu_data.x86_model_id, "CPU       M"))
+		limits = &ips_sv_limits;
+	else if (strstr(boot_cpu_data.x86_model_id, "CPU       L"))
+		limits = &ips_lv_limits;
+	else if (strstr(boot_cpu_data.x86_model_id, "CPU       U"))
+		limits = &ips_ulv_limits;
+	else
+		dev_info(&ips->dev->dev, "No CPUID match found.\n");
+
+	rdmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_power);
+	tdp = turbo_power & TURBO_TDP_MASK;
+
+	/* Sanity check TDP against CPU */
+	if (limits->mcp_power_limit != (tdp / 8) * 1000) {
+		dev_warn(&ips->dev->dev, "Warning: CPU TDP doesn't match expected value (found %d, expected %d)\n",
+			 tdp / 8, limits->mcp_power_limit / 1000);
+	}
+
+out:
+	return limits;
+}
+
+/**
+ * ips_get_i915_syms - try to get GPU control methods from i915 driver
+ * @ips: IPS driver
+ *
+ * The i915 driver exports several interfaces to allow the IPS driver to
+ * monitor and control graphics turbo mode.  If we can find them, we can
+ * enable graphics turbo, otherwise we must disable it to avoid exceeding
+ * thermal and power limits in the MCP.
+ */
+static bool ips_get_i915_syms(struct ips_driver *ips)
+{
+	ips->read_mch_val = symbol_get(i915_read_mch_val);
+	if (!ips->read_mch_val)
+		goto out_err;
+	ips->gpu_raise = symbol_get(i915_gpu_raise);
+	if (!ips->gpu_raise)
+		goto out_put_mch;
+	ips->gpu_lower = symbol_get(i915_gpu_lower);
+	if (!ips->gpu_lower)
+		goto out_put_raise;
+	ips->gpu_busy = symbol_get(i915_gpu_busy);
+	if (!ips->gpu_busy)
+		goto out_put_lower;
+	ips->gpu_turbo_disable = symbol_get(i915_gpu_turbo_disable);
+	if (!ips->gpu_turbo_disable)
+		goto out_put_busy;
+
+	return true;
+
+out_put_busy:
+	symbol_put(i915_gpu_turbo_disable);
+out_put_lower:
+	symbol_put(i915_gpu_lower);
+out_put_raise:
+	symbol_put(i915_gpu_raise);
+out_put_mch:
+	symbol_put(i915_read_mch_val);
+out_err:
+	return false;
+}
+
+static DEFINE_PCI_DEVICE_TABLE(ips_id_table) = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL,
+		     PCI_DEVICE_ID_INTEL_THERMAL_SENSOR), },
+	{ 0, }
+};
+
+MODULE_DEVICE_TABLE(pci, ips_id_table);
+
+static int ips_probe(struct pci_dev *dev, const struct pci_device_id *id)
+{
+	u64 platform_info;
+	struct ips_driver *ips;
+	u32 hts;
+	int ret = 0;
+	u16 htshi, trc, trc_required_mask;
+	u8 tse;
+
+	ips = kzalloc(sizeof(struct ips_driver), GFP_KERNEL);
+	if (!ips)
+		return -ENOMEM;
+
+	pci_set_drvdata(dev, ips);
+	ips->dev = dev;
+
+	ips->limits = ips_detect_cpu(ips);
+	if (!ips->limits) {
+		dev_info(&dev->dev, "IPS not supported on this CPU\n");
+		ret = -ENXIO;
+		goto error_free;
+	}
+
+	spin_lock_init(&ips->turbo_status_lock);
+
+	if (!pci_resource_start(dev, 0)) {
+		dev_err(&dev->dev, "TBAR not assigned, aborting\n");
+		ret = -ENXIO;
+		goto error_free;
+	}
+
+	ret = pci_request_regions(dev, "ips thermal sensor");
+	if (ret) {
+		dev_err(&dev->dev, "thermal resource busy, aborting\n");
+		goto error_free;
+	}
+
+	ret = pci_enable_device(dev);
+	if (ret) {
+		dev_err(&dev->dev, "can't enable PCI device, aborting\n");
+		goto error_free;
+	}
+
+	ips->regmap = ioremap(pci_resource_start(dev, 0),
+			      pci_resource_len(dev, 0));
+	if (!ips->regmap) {
+		dev_err(&dev->dev, "failed to map thermal regs, aborting\n");
+		ret = -EBUSY;
+		goto error_release;
+	}
+
+	tse = thm_readb(THM_TSE);
+	if (tse != TSE_EN) {
+		dev_err(&dev->dev, "thermal device not enabled (0x%02x), aborting\n", tse);
+		ret = -ENXIO;
+		goto error_unmap;
+	}
+
+	trc = thm_readw(THM_TRC);
+	trc_required_mask = TRC_CORE1_EN | TRC_CORE_PWR | TRC_MCH_EN;
+	if ((trc & trc_required_mask) != trc_required_mask) {
+		dev_err(&dev->dev, "thermal reporting for required devices not enabled, aborting\n");
+		ret = -ENXIO;
+		goto error_unmap;
+	}
+
+	if (trc & TRC_CORE2_EN)
+		ips->second_cpu = true;
+
+	if (!ips_get_i915_syms(ips)) {
+		dev_err(&dev->dev, "failed to get i915 symbols, graphics turbo disabled\n");
+		ips->gpu_turbo_enabled = false;
+	} else {
+		dev_dbg(&dev->dev, "graphics turbo enabled\n");
+		ips->gpu_turbo_enabled = true;
+	}
+
+	update_turbo_limits(ips);
+	dev_dbg(&dev->dev, "max cpu power clamp: %dW\n",
+		ips->mcp_power_limit / 10);
+	dev_dbg(&dev->dev, "max core power clamp: %dW\n",
+		ips->core_power_limit / 10);
+	/* BIOS may update limits at runtime */
+	if (thm_readl(THM_PSC) & PSP_PBRT)
+		ips->poll_turbo_status = true;
+
+	/*
+	 * Check PLATFORM_INFO MSR to make sure this chip is
+	 * turbo capable.
+	 */
+	rdmsrl(PLATFORM_INFO, platform_info);
+	if (!(platform_info & PLATFORM_TDP)) {
+		dev_err(&dev->dev, "platform indicates TDP override unavailable, aborting\n");
+		ret = -ENODEV;
+		goto error_unmap;
+	}
+
+	/*
+	 * IRQ handler for ME interaction
+	 * Note: don't use MSI here as the PCH has bugs.
+	 */
+	pci_disable_msi(dev);
+	ret = request_irq(dev->irq, ips_irq_handler, IRQF_SHARED, "ips",
+			  ips);
+	if (ret) {
+		dev_err(&dev->dev, "request irq failed, aborting\n");
+		goto error_unmap;
+	}
+
+	/* Enable aux, hot & critical interrupts */
+	thm_writeb(THM_TSPIEN, TSPIEN_AUX2_LOHI | TSPIEN_CRIT_LOHI |
+		   TSPIEN_HOT_LOHI | TSPIEN_AUX_LOHI);
+	thm_writeb(THM_TEN, TEN_UPDATE_EN);
+
+	/* Collect adjustment values */
+	ips->cta_val = thm_readw(THM_CTA);
+	ips->pta_val = thm_readw(THM_PTA);
+	ips->mgta_val = thm_readw(THM_MGTA);
+
+	/* Save turbo limits & ratios */
+	rdmsrl(TURBO_POWER_CURRENT_LIMIT, ips->orig_turbo_limit);
+
+	ips_enable_cpu_turbo(ips);
+	ips->cpu_turbo_enabled = true;
+
+	/* Set up the work queue and monitor/adjust threads */
+	ips->monitor = kthread_run(ips_monitor, ips, "ips-monitor");
+	if (IS_ERR(ips->monitor)) {
+		dev_err(&dev->dev,
+			"failed to create thermal monitor thread, aborting\n");
+		ret = -ENOMEM;
+		goto error_free_irq;
+	}
+
+	ips->adjust = kthread_create(ips_adjust, ips, "ips-adjust");
+	if (IS_ERR(ips->adjust)) {
+		dev_err(&dev->dev,
+			"failed to create thermal adjust thread, aborting\n");
+		ret = -ENOMEM;
+		goto error_thread_cleanup;
+	}
+
+	hts = (ips->core_power_limit << HTS_PCPL_SHIFT) |
+		(ips->mcp_temp_limit << HTS_PTL_SHIFT) | HTS_NVV;
+	htshi = HTS2_PRST_RUNNING << HTS2_PRST_SHIFT;
+
+	thm_writew(THM_HTSHI, htshi);
+	thm_writel(THM_HTS, hts);
+
+	ips_debugfs_init(ips);
+
+	dev_info(&dev->dev, "IPS driver initialized, MCP temp limit %d\n",
+		 ips->mcp_temp_limit);
+	return ret;
+
+error_thread_cleanup:
+	kthread_stop(ips->monitor);
+error_free_irq:
+	free_irq(ips->dev->irq, ips);
+error_unmap:
+	iounmap(ips->regmap);
+error_release:
+	pci_release_regions(dev);
+error_free:
+	kfree(ips);
+	return ret;
+}
+
+static void ips_remove(struct pci_dev *dev)
+{
+	struct ips_driver *ips = pci_get_drvdata(dev);
+	u64 turbo_override;
+
+	if (!ips)
+		return;
+
+	ips_debugfs_cleanup(ips);
+
+	/* Release i915 driver */
+	if (ips->read_mch_val)
+		symbol_put(i915_read_mch_val);
+	if (ips->gpu_raise)
+		symbol_put(i915_gpu_raise);
+	if (ips->gpu_lower)
+		symbol_put(i915_gpu_lower);
+	if (ips->gpu_busy)
+		symbol_put(i915_gpu_busy);
+	if (ips->gpu_turbo_disable)
+		symbol_put(i915_gpu_turbo_disable);
+
+	rdmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override);
+	turbo_override &= ~(TURBO_TDC_OVR_EN | TURBO_TDP_OVR_EN);
+	wrmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override);
+	wrmsrl(TURBO_POWER_CURRENT_LIMIT, ips->orig_turbo_limit);
+
+	free_irq(ips->dev->irq, ips);
+	if (ips->adjust)
+		kthread_stop(ips->adjust);
+	if (ips->monitor)
+		kthread_stop(ips->monitor);
+	iounmap(ips->regmap);
+	pci_release_regions(dev);
+	kfree(ips);
+	dev_dbg(&dev->dev, "IPS driver removed\n");
+}
+
+#ifdef CONFIG_PM
+static int ips_suspend(struct pci_dev *dev, pm_message_t state)
+{
+	return 0;
+}
+
+static int ips_resume(struct pci_dev *dev)
+{
+	return 0;
+}
+#else
+#define ips_suspend NULL
+#define ips_resume NULL
+#endif /* CONFIG_PM */
+
+static void ips_shutdown(struct pci_dev *dev)
+{
+}
+
+static struct pci_driver ips_pci_driver = {
+	.name = "intel ips",
+	.id_table = ips_id_table,
+	.probe = ips_probe,
+	.remove = ips_remove,
+	.suspend = ips_suspend,
+	.resume = ips_resume,
+	.shutdown = ips_shutdown,
+};
+
+static int __init ips_init(void)
+{
+	return pci_register_driver(&ips_pci_driver);
+}
+module_init(ips_init);
+
+static void ips_exit(void)
+{
+	pci_unregister_driver(&ips_pci_driver);
+	return;
+}
+module_exit(ips_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Jesse Barnes <jbarnes@virtuousgeek.org>");
+MODULE_DESCRIPTION("Intelligent Power Sharing Driver");
diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
index 7f0028e1010..8f8b072c4c7 100644
--- a/include/drm/i915_drm.h
+++ b/include/drm/i915_drm.h
@@ -33,6 +33,15 @@
  * subject to backwards-compatibility constraints.
  */
 
+#ifdef __KERNEL__
+/* For use by IPS driver */
+extern unsigned long i915_read_mch_val(void);
+extern bool i915_gpu_raise(void);
+extern bool i915_gpu_lower(void);
+extern bool i915_gpu_busy(void);
+extern bool i915_gpu_turbo_disable(void);
+#endif
+
 /* Each region is a minimum of 16k, and there are at most 255 of them.
  */
 #define I915_NR_TEX_REGIONS 255	/* table size 2k - maximum due to use
-- 
cgit v1.2.3-70-g09d2


From c715a38bb7fc22fb8018b916c8a9f7ff017a8ad7 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Fri, 18 Jun 2010 14:05:52 +0100
Subject: rar: Move the RAR driver into the right place as its now clean

We exit staging rar! rar! rar!...

Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Matthew Garrett <mjg@redhat.com>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/platform/x86/Kconfig                |  22 +
 drivers/platform/x86/Makefile               |   1 +
 drivers/platform/x86/intel_rar_register.c   | 671 +++++++++++++++++++++++++++
 drivers/staging/Kconfig                     |   2 -
 drivers/staging/Makefile                    |   1 -
 drivers/staging/memrar/memrar_handler.c     |   3 +-
 drivers/staging/rar_register/Kconfig        |  30 --
 drivers/staging/rar_register/Makefile       |   2 -
 drivers/staging/rar_register/rar_register.c | 675 ----------------------------
 drivers/staging/rar_register/rar_register.h |  44 --
 include/linux/rar_register.h                |  44 ++
 11 files changed, 739 insertions(+), 756 deletions(-)
 create mode 100644 drivers/platform/x86/intel_rar_register.c
 delete mode 100644 drivers/staging/rar_register/Kconfig
 delete mode 100644 drivers/staging/rar_register/Makefile
 delete mode 100644 drivers/staging/rar_register/rar_register.c
 delete mode 100644 drivers/staging/rar_register/rar_register.h
 create mode 100644 include/linux/rar_register.h

(limited to 'include')

diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig
index 724b2ed1a3c..2f173bc0ff0 100644
--- a/drivers/platform/x86/Kconfig
+++ b/drivers/platform/x86/Kconfig
@@ -539,6 +539,28 @@ config INTEL_SCU_IPC
 	  some embedded Intel x86 platforms. This is not needed for PC-type
 	  machines.
 
+config RAR_REGISTER
+	bool "Restricted Access Region Register Driver"
+	depends on PCI && X86_MRST
+	default n
+	---help---
+	  This driver allows other kernel drivers access to the
+	  contents of the restricted access region control registers.
+
+	  The restricted access region control registers
+	  (rar_registers) are used to pass address and
+	  locking information on restricted access regions
+	  to other drivers that use restricted access regions.
+
+	  The restricted access regions are regions of memory
+	  on the Intel MID Platform that are not accessible to
+	  the x86 processor, but are accessible to dedicated
+	  processors on board peripheral devices.
+
+	  The purpose of the restricted access regions is to
+	  protect sensitive data from compromise by unauthorized
+	  programs running on the x86 processor.
+
 config INTEL_IPS
 	tristate "Intel Intelligent Power Sharing"
 	depends on ACPI
diff --git a/drivers/platform/x86/Makefile b/drivers/platform/x86/Makefile
index 7318fc2c162..ed50eca1b55 100644
--- a/drivers/platform/x86/Makefile
+++ b/drivers/platform/x86/Makefile
@@ -26,4 +26,5 @@ obj-$(CONFIG_TOPSTAR_LAPTOP)	+= topstar-laptop.o
 obj-$(CONFIG_ACPI_TOSHIBA)	+= toshiba_acpi.o
 obj-$(CONFIG_TOSHIBA_BT_RFKILL)	+= toshiba_bluetooth.o
 obj-$(CONFIG_INTEL_SCU_IPC)	+= intel_scu_ipc.o
+obj-$(CONFIG_RAR_REGISTER)	+= intel_rar_register.o
 obj-$(CONFIG_INTEL_IPS)		+= intel_ips.o
diff --git a/drivers/platform/x86/intel_rar_register.c b/drivers/platform/x86/intel_rar_register.c
new file mode 100644
index 00000000000..73f8e6d7266
--- /dev/null
+++ b/drivers/platform/x86/intel_rar_register.c
@@ -0,0 +1,671 @@
+/*
+ *  rar_register.c - An Intel Restricted Access Region register driver
+ *
+ *  Copyright(c) 2009 Intel Corporation. All rights reserved.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License as
+ *  published by the Free Software Foundation; either version 2 of the
+ *  License, or (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+ *  02111-1307, USA.
+ *
+ * -------------------------------------------------------------------
+ *  20091204 Mark Allyn <mark.a.allyn@intel.com>
+ *	     Ossama Othman <ossama.othman@intel.com>
+ *	Cleanup per feedback from Alan Cox and Arjan Van De Ven
+ *
+ *  20090806 Ossama Othman <ossama.othman@intel.com>
+ *      Return zero high address if upper 22 bits is zero.
+ *      Cleaned up checkpatch errors.
+ *      Clarified that driver is dealing with bus addresses.
+ *
+ *  20090702 Ossama Othman <ossama.othman@intel.com>
+ *      Removed unnecessary include directives
+ *      Cleaned up spinlocks.
+ *      Cleaned up logging.
+ *      Improved invalid parameter checks.
+ *      Fixed and simplified RAR address retrieval and RAR locking
+ *      code.
+ *
+ *  20090626 Mark Allyn <mark.a.allyn@intel.com>
+ *      Initial publish
+ */
+
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/spinlock.h>
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/rar_register.h>
+
+/* === Lincroft Message Bus Interface === */
+#define LNC_MCR_OFFSET		0xD0	/* Message Control Register */
+#define LNC_MDR_OFFSET		0xD4	/* Message Data Register */
+
+/* Message Opcodes */
+#define LNC_MESSAGE_READ_OPCODE	0xD0
+#define LNC_MESSAGE_WRITE_OPCODE 0xE0
+
+/* Message Write Byte Enables */
+#define LNC_MESSAGE_BYTE_WRITE_ENABLES	0xF
+
+/* B-unit Port */
+#define LNC_BUNIT_PORT	0x3
+
+/* === Lincroft B-Unit Registers - Programmed by IA32 firmware === */
+#define LNC_BRAR0L	0x10
+#define LNC_BRAR0H	0x11
+#define LNC_BRAR1L	0x12
+#define LNC_BRAR1H	0x13
+/* Reserved for SeP */
+#define LNC_BRAR2L	0x14
+#define LNC_BRAR2H	0x15
+
+/* Moorestown supports three restricted access regions. */
+#define MRST_NUM_RAR 3
+
+/* RAR Bus Address Range */
+struct rar_addr {
+	dma_addr_t low;
+	dma_addr_t high;
+};
+
+/*
+ *	We create one of these for each RAR
+ */
+struct client {
+	int (*callback)(unsigned long data);
+	unsigned long driver_priv;
+	bool busy;
+};
+
+static DEFINE_MUTEX(rar_mutex);
+static DEFINE_MUTEX(lnc_reg_mutex);
+
+/*
+ *	One per RAR device (currently only one device)
+ */
+struct rar_device {
+	struct rar_addr rar_addr[MRST_NUM_RAR];
+	struct pci_dev *rar_dev;
+	bool registered;
+	bool allocated;
+	struct client client[MRST_NUM_RAR];
+};
+
+/* Current platforms have only one rar_device for 3 rar regions */
+static struct rar_device my_rar_device;
+
+/*
+ *	Abstract out multiple device support. Current platforms only
+ *	have a single RAR device.
+ */
+
+/**
+ *	alloc_rar_device	-	return a new RAR structure
+ *
+ *	Return a new (but not yet ready) RAR device object
+ */
+static struct rar_device *alloc_rar_device(void)
+{
+	if (my_rar_device.allocated)
+		return NULL;
+	my_rar_device.allocated = 1;
+	return &my_rar_device;
+}
+
+/**
+ *	free_rar_device		-	free a RAR object
+ *	@rar: the RAR device being freed
+ *
+ *	Release a RAR object and any attached resources
+ */
+static void free_rar_device(struct rar_device *rar)
+{
+	pci_dev_put(rar->rar_dev);
+	rar->allocated = 0;
+}
+
+/**
+ *	_rar_to_device		-	return the device handling this RAR
+ *	@rar: RAR number
+ *	@off: returned offset
+ *
+ *	Internal helper for looking up RAR devices. This and alloc are the
+ *	two functions that need touching to go to multiple RAR devices.
+ */
+static struct rar_device *_rar_to_device(int rar, int *off)
+{
+	if (rar >= 0 && rar <= 3) {
+		*off = rar;
+		return &my_rar_device;
+	}
+	return NULL;
+}
+
+/**
+ *	rar_to_device		-	return the device handling this RAR
+ *	@rar: RAR number
+ *	@off: returned offset
+ *
+ *	Return the device this RAR maps to if one is present, otherwise
+ *	returns NULL. Reports the offset relative to the base of this
+ *	RAR device in off.
+ */
+static struct rar_device *rar_to_device(int rar, int *off)
+{
+	struct rar_device *rar_dev = _rar_to_device(rar, off);
+	if (rar_dev == NULL || !rar_dev->registered)
+		return NULL;
+	return rar_dev;
+}
+
+/**
+ *	rar_to_client		-	return the client handling this RAR
+ *	@rar: RAR number
+ *
+ *	Return the client this RAR maps to if a mapping is known, otherwise
+ *	returns NULL.
+ */
+static struct client *rar_to_client(int rar)
+{
+	int idx;
+	struct rar_device *r = _rar_to_device(rar, &idx);
+	if (r != NULL)
+		return &r->client[idx];
+	return NULL;
+}
+
+/**
+ *	rar_read_addr		-	retrieve a RAR mapping
+ *	@pdev: PCI device for the RAR
+ *	@offset: offset for message
+ *	@addr: returned address
+ *
+ *	Reads the address of a given RAR register. Returns 0 on success
+ *	or an error code on failure.
+ */
+static int rar_read_addr(struct pci_dev *pdev, int offset, dma_addr_t *addr)
+{
+	/*
+	 * ======== The Lincroft Message Bus Interface ========
+	 * Lincroft registers may be obtained via PCI from
+	 * the host bridge using the Lincroft Message Bus
+	 * Interface.  That message bus interface is generally
+	 * comprised of two registers: a control register (MCR, 0xDO)
+	 * and a data register (MDR, 0xD4).
+	 *
+	 * The MCR (message control register) format is the following:
+	 *   1.  [31:24]: Opcode
+	 *   2.  [23:16]: Port
+	 *   3.  [15:8]: Register Offset
+	 *   4.  [7:4]: Byte Enables (use 0xF to set all of these bits
+	 *              to 1)
+	 *   5.  [3:0]: reserved
+	 *
+	 *  Read (0xD0) and write (0xE0) opcodes are written to the
+	 *  control register when reading and writing to Lincroft
+	 *  registers, respectively.
+	 *
+	 *  We're interested in registers found in the Lincroft
+	 *  B-unit.  The B-unit port is 0x3.
+	 *
+	 *  The six B-unit RAR register offsets we use are listed
+	 *  earlier in this file.
+	 *
+	 *  Lastly writing to the MCR register requires the "Byte
+	 *  enables" bits to be set to 1.  This may be achieved by
+	 *  writing 0xF at bit 4.
+	 *
+	 * The MDR (message data register) format is the following:
+	 *   1. [31:0]: Read/Write Data
+	 *
+	 *  Data being read from this register is only available after
+	 *  writing the appropriate control message to the MCR
+	 *  register.
+	 *
+	 *  Data being written to this register must be written before
+	 *  writing the appropriate control message to the MCR
+	 *  register.
+	*/
+
+	int result;
+	u32 addr32;
+
+	/* Construct control message */
+	u32 const message =
+		 (LNC_MESSAGE_READ_OPCODE << 24)
+		 | (LNC_BUNIT_PORT << 16)
+		 | (offset << 8)
+		 | (LNC_MESSAGE_BYTE_WRITE_ENABLES << 4);
+
+	dev_dbg(&pdev->dev, "Offset for 'get' LNC MSG is %x\n", offset);
+
+	/*
+	* We synchronize access to the Lincroft MCR and MDR registers
+	* until BOTH the command is issued through the MCR register
+	* and the corresponding data is read from the MDR register.
+	* Otherwise a race condition would exist between accesses to
+	* both registers.
+	*/
+
+	mutex_lock(&lnc_reg_mutex);
+
+	/* Send the control message */
+	result = pci_write_config_dword(pdev, LNC_MCR_OFFSET, message);
+	if (!result) {
+		/* Read back the address as a 32bit value */
+		result = pci_read_config_dword(pdev, LNC_MDR_OFFSET, &addr32);
+		*addr = (dma_addr_t)addr32;
+	}
+	mutex_unlock(&lnc_reg_mutex);
+	return result;
+}
+
+/**
+ *	rar_set_addr		-	Set a RAR mapping
+ *	@pdev: PCI device for the RAR
+ *	@offset: offset for message
+ *	@addr: address to set
+ *
+ *	Sets the address of a given RAR register. Returns 0 on success
+ *	or an error code on failure.
+ */
+static int rar_set_addr(struct pci_dev *pdev,
+	int offset,
+	dma_addr_t addr)
+{
+	/*
+	* Data being written to this register must be written before
+	* writing the appropriate control message to the MCR
+	* register.
+	* See rar_get_addrs() for a description of the
+	* message bus interface being used here.
+	*/
+
+	int result;
+
+	/* Construct control message */
+	u32 const message = (LNC_MESSAGE_WRITE_OPCODE << 24)
+		| (LNC_BUNIT_PORT << 16)
+		| (offset << 8)
+		| (LNC_MESSAGE_BYTE_WRITE_ENABLES << 4);
+
+	/*
+	* We synchronize access to the Lincroft MCR and MDR registers
+	* until BOTH the command is issued through the MCR register
+	* and the corresponding data is read from the MDR register.
+	* Otherwise a race condition would exist between accesses to
+	* both registers.
+	*/
+
+	mutex_lock(&lnc_reg_mutex);
+
+	/* Send the control message */
+	result = pci_write_config_dword(pdev, LNC_MDR_OFFSET, addr);
+	if (!result)
+		/* And address */
+		result = pci_write_config_dword(pdev, LNC_MCR_OFFSET, message);
+
+	mutex_unlock(&lnc_reg_mutex);
+	return result;
+}
+
+/*
+ *	rar_init_params		-	Initialize RAR parameters
+ *	@rar: RAR device to initialise
+ *
+ *	Initialize RAR parameters, such as bus addresses, etc. Returns 0
+ *	on success, or an error code on failure.
+ */
+static int init_rar_params(struct rar_device *rar)
+{
+	struct pci_dev *pdev = rar->rar_dev;
+	unsigned int i;
+	int result = 0;
+	int offset = 0x10;	/* RAR 0 to 2 in order low/high/low/high/... */
+
+	/* Retrieve RAR start and end bus addresses.
+	* Access the RAR registers through the Lincroft Message Bus
+	* Interface on PCI device: 00:00.0 Host bridge.
+	*/
+
+	for (i = 0; i < MRST_NUM_RAR; ++i) {
+		struct rar_addr *addr = &rar->rar_addr[i];
+
+		result = rar_read_addr(pdev, offset++, &addr->low);
+		if (result != 0)
+			return result;
+
+		result = rar_read_addr(pdev, offset++, &addr->high);
+		if (result != 0)
+			return result;
+
+
+		/*
+		* Only the upper 22 bits of the RAR addresses are
+		* stored in their corresponding RAR registers so we
+		* must set the lower 10 bits accordingly.
+
+		* The low address has its lower 10 bits cleared, and
+		* the high address has all its lower 10 bits set,
+		* e.g.:
+		* low = 0x2ffffc00
+		*/
+
+		addr->low &= (dma_addr_t)0xfffffc00u;
+
+		/*
+		* Set bits 9:0 on uppser address if bits 31:10 are non
+		* zero; otherwize clear all bits
+		*/
+
+		if ((addr->high & 0xfffffc00u) == 0)
+			addr->high = 0;
+		else
+			addr->high |= 0x3ffu;
+	}
+	/* Done accessing the device. */
+
+	if (result == 0) {
+		for (i = 0; i != MRST_NUM_RAR; ++i) {
+			/*
+			* "BRAR" refers to the RAR registers in the
+			* Lincroft B-unit.
+			*/
+			dev_info(&pdev->dev, "BRAR[%u] bus address range = "
+			  "[%lx, %lx]\n", i,
+			  (unsigned long)rar->rar_addr[i].low,
+			  (unsigned long)rar->rar_addr[i].high);
+		}
+	}
+	return result;
+}
+
+/**
+ *	rar_get_address		-	get the bus address in a RAR
+ *	@start: return value of start address of block
+ *	@end: return value of end address of block
+ *
+ *	The rar_get_address function is used by other device drivers
+ *	to obtain RAR address information on a RAR. It takes three
+ *	parameters:
+ *
+ *	The function returns a 0 upon success or an error if there is no RAR
+ *	facility on this system.
+ */
+int rar_get_address(int rar_index, dma_addr_t *start, dma_addr_t *end)
+{
+	int idx;
+	struct rar_device *rar = rar_to_device(rar_index, &idx);
+
+	if (rar == NULL) {
+		WARN_ON(1);
+		return -ENODEV;
+	}
+
+	*start = rar->rar_addr[idx].low;
+	*end = rar->rar_addr[idx].high;
+	return 0;
+}
+EXPORT_SYMBOL(rar_get_address);
+
+/**
+ *	rar_lock	-	lock a RAR register
+ *	@rar_index: RAR to lock (0-2)
+ *
+ *	The rar_lock function is ued by other device drivers to lock an RAR.
+ *	once a RAR is locked, it stays locked until the next system reboot.
+ *
+ *	The function returns a 0 upon success or an error if there is no RAR
+ *	facility on this system, or the locking fails
+ */
+int rar_lock(int rar_index)
+{
+	struct rar_device *rar;
+	int result;
+	int idx;
+	dma_addr_t low, high;
+
+	rar = rar_to_device(rar_index, &idx);
+
+	if (rar == NULL) {
+		WARN_ON(1);
+		return -EINVAL;
+	}
+
+	low = rar->rar_addr[idx].low & 0xfffffc00u;
+	high = rar->rar_addr[idx].high & 0xfffffc00u;
+
+	/*
+	* Only allow I/O from the graphics and Langwell;
+	* not from the x86 processor
+	*/
+
+	if (rar_index == RAR_TYPE_VIDEO) {
+		low |= 0x00000009;
+		high |= 0x00000015;
+	} else if (rar_index == RAR_TYPE_AUDIO) {
+		/* Only allow I/O from Langwell; nothing from x86 */
+		low |= 0x00000008;
+		high |= 0x00000018;
+	} else
+		/* Read-only from all agents */
+		high |= 0x00000018;
+
+	/*
+	* Now program the register using the Lincroft message
+	* bus interface.
+	*/
+	result = rar_set_addr(rar->rar_dev,
+				2 * idx, low);
+
+	if (result == 0)
+		result = rar_set_addr(rar->rar_dev,
+				2 * idx + 1, high);
+
+	return result;
+}
+EXPORT_SYMBOL(rar_lock);
+
+/**
+ *	register_rar		-	register a RAR handler
+ *	@num: RAR we wish to register for
+ *	@callback: function to call when RAR support is available
+ *	@data: data to pass to this function
+ *
+ *	The register_rar function is to used by other device drivers
+ *	to ensure that this driver is ready. As we cannot be sure of
+ *	the compile/execute order of drivers in ther kernel, it is
+ *	best to give this driver a callback function to call when
+ *	it is ready to give out addresses. The callback function
+ *	would have those steps that continue the initialization of
+ *	a driver that do require a valid RAR address. One of those
+ *	steps would be to call rar_get_address()
+ *
+ *	This function return 0 on success or an error code on failure.
+ */
+int register_rar(int num, int (*callback)(unsigned long data),
+							unsigned long data)
+{
+	/* For now we hardcode a single RAR device */
+	struct rar_device *rar;
+	struct client *c;
+	int idx;
+	int retval = 0;
+
+	mutex_lock(&rar_mutex);
+
+	/* Do we have a client mapping for this RAR number ? */
+	c = rar_to_client(num);
+	if (c == NULL) {
+		retval = -ERANGE;
+		goto done;
+	}
+	/* Is it claimed ? */
+	if (c->busy) {
+		retval = -EBUSY;
+		goto done;
+	}
+	c->busy = 1;
+
+	/* See if we have a handler for this RAR yet, if we do then fire it */
+	rar = rar_to_device(num, &idx);
+
+	if (rar) {
+		/*
+		* if the driver already registered, then we can simply
+		* call the callback right now
+		*/
+		(*callback)(data);
+		goto done;
+	}
+
+	/* Arrange to be called back when the hardware is found */
+	c->callback = callback;
+	c->driver_priv = data;
+done:
+	mutex_unlock(&rar_mutex);
+	return retval;
+}
+EXPORT_SYMBOL(register_rar);
+
+/**
+ *	unregister_rar	-	release a RAR allocation
+ *	@num: RAR number
+ *
+ *	Releases a RAR allocation, or pending allocation. If a callback is
+ *	pending then this function will either complete before the unregister
+ *	returns or not at all.
+ */
+
+void unregister_rar(int num)
+{
+	struct client *c;
+
+	mutex_lock(&rar_mutex);
+	c = rar_to_client(num);
+	if (c == NULL || !c->busy)
+		WARN_ON(1);
+	else
+		c->busy = 0;
+	mutex_unlock(&rar_mutex);
+}
+EXPORT_SYMBOL(unregister_rar);
+
+/**
+ *	rar_callback		-	Process callbacks
+ *	@rar: new RAR device
+ *
+ *	Process the callbacks for a newly found RAR device.
+ */
+
+static void rar_callback(struct rar_device *rar)
+{
+	struct client *c = &rar->client[0];
+	int i;
+
+	mutex_lock(&rar_mutex);
+
+	rar->registered = 1;	/* Ensure no more callbacks queue */
+
+	for (i = 0; i < MRST_NUM_RAR; i++) {
+		if (c->callback && c->busy) {
+			c->callback(c->driver_priv);
+			c->callback = NULL;
+		}
+		c++;
+	}
+	mutex_unlock(&rar_mutex);
+}
+
+/**
+ *	rar_probe		-	PCI probe callback
+ *	@dev: PCI device
+ *	@id: matching entry in the match table
+ *
+ *	A RAR device has been discovered. Initialise it and if successful
+ *	process any pending callbacks that can now be completed.
+ */
+static int rar_probe(struct pci_dev *dev, const struct pci_device_id *id)
+{
+	int error;
+	struct rar_device *rar;
+
+	dev_dbg(&dev->dev, "PCI probe starting\n");
+
+	rar = alloc_rar_device();
+	if (rar == NULL)
+		return -EBUSY;
+
+	/* Enable the device */
+	error = pci_enable_device(dev);
+	if (error) {
+		dev_err(&dev->dev,
+			"Error enabling RAR register PCI device\n");
+		goto end_function;
+	}
+
+	/* Fill in the rar_device structure */
+	rar->rar_dev = pci_dev_get(dev);
+	pci_set_drvdata(dev, rar);
+
+	/*
+	 * Initialize the RAR parameters, which have to be retrieved
+	 * via the message bus interface.
+	 */
+	error = init_rar_params(rar);
+	if (error) {
+		pci_disable_device(dev);
+		dev_err(&dev->dev, "Error retrieving RAR addresses\n");
+		goto end_function;
+	}
+	/* now call anyone who has registered (using callbacks) */
+	rar_callback(rar);
+	return 0;
+end_function:
+	free_rar_device(rar);
+	return error;
+}
+
+const struct pci_device_id rar_pci_id_tbl[] = {
+	{ PCI_VDEVICE(INTEL, 0x4110) },
+	{ 0 }
+};
+
+MODULE_DEVICE_TABLE(pci, rar_pci_id_tbl);
+
+const struct pci_device_id *my_id_table = rar_pci_id_tbl;
+
+/* field for registering driver to PCI device */
+static struct pci_driver rar_pci_driver = {
+	.name = "rar_register_driver",
+	.id_table = rar_pci_id_tbl,
+	.probe = rar_probe,
+	/* Cannot be unplugged - no remove */
+};
+
+static int __init rar_init_handler(void)
+{
+	return pci_register_driver(&rar_pci_driver);
+}
+
+static void __exit rar_exit_handler(void)
+{
+	pci_unregister_driver(&rar_pci_driver);
+}
+
+module_init(rar_init_handler);
+module_exit(rar_exit_handler);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Intel Restricted Access Region Register Driver");
diff --git a/drivers/staging/Kconfig b/drivers/staging/Kconfig
index 984a7544071..9dfef8a5997 100644
--- a/drivers/staging/Kconfig
+++ b/drivers/staging/Kconfig
@@ -109,8 +109,6 @@ source "drivers/staging/hv/Kconfig"
 
 source "drivers/staging/vme/Kconfig"
 
-source "drivers/staging/rar_register/Kconfig"
-
 source "drivers/staging/memrar/Kconfig"
 
 source "drivers/staging/sep/Kconfig"
diff --git a/drivers/staging/Makefile b/drivers/staging/Makefile
index 9fa25133874..3dbf681ca64 100644
--- a/drivers/staging/Makefile
+++ b/drivers/staging/Makefile
@@ -35,7 +35,6 @@ obj-$(CONFIG_VT6656)		+= vt6656/
 obj-$(CONFIG_FB_UDL)		+= udlfb/
 obj-$(CONFIG_HYPERV)		+= hv/
 obj-$(CONFIG_VME_BUS)		+= vme/
-obj-$(CONFIG_RAR_REGISTER)	+= rar_register/
 obj-$(CONFIG_MRST_RAR_HANDLER)	+= memrar/
 obj-$(CONFIG_DX_SEP)		+= sep/
 obj-$(CONFIG_IIO)		+= iio/
diff --git a/drivers/staging/memrar/memrar_handler.c b/drivers/staging/memrar/memrar_handler.c
index efa7fd62d39..41876f2b0e5 100644
--- a/drivers/staging/memrar/memrar_handler.c
+++ b/drivers/staging/memrar/memrar_handler.c
@@ -47,8 +47,7 @@
 #include <linux/mm.h>
 #include <linux/ioport.h>
 #include <linux/io.h>
-
-#include "../rar_register/rar_register.h"
+#include <linux/rar_register.h>
 
 #include "memrar.h"
 #include "memrar_allocator.h"
diff --git a/drivers/staging/rar_register/Kconfig b/drivers/staging/rar_register/Kconfig
deleted file mode 100644
index e9c27738199..00000000000
--- a/drivers/staging/rar_register/Kconfig
+++ /dev/null
@@ -1,30 +0,0 @@
-#
-# RAR device configuration
-#
-
-menu "RAR Register Driver"
-#
-#	Restricted Access Register Manager
-#
-config RAR_REGISTER
-	tristate "Restricted Access Region Register Driver"
-	depends on PCI
-	default n
-	---help---
-	  This driver allows other kernel drivers access to the
-	  contents of the restricted access region control registers.
-
-	  The restricted access region control registers
-	  (rar_registers) are used to pass address and
-	  locking information on restricted access regions
-	  to other drivers that use restricted access regions.
-
-	  The restricted access regions are regions of memory
-	  on the Intel MID Platform that are not accessible to
-	  the x86 processor, but are accessible to dedicated
-	  processors on board peripheral devices.
-
-	  The purpose of the restricted access regions is to
-	  protect sensitive data from compromise by unauthorized
-	  programs running on the x86 processor.
-endmenu
diff --git a/drivers/staging/rar_register/Makefile b/drivers/staging/rar_register/Makefile
deleted file mode 100644
index d5954ccc16c..00000000000
--- a/drivers/staging/rar_register/Makefile
+++ /dev/null
@@ -1,2 +0,0 @@
-EXTRA_CFLAGS += -DLITTLE__ENDIAN
-obj-$(CONFIG_RAR_REGISTER) += rar_register.o
diff --git a/drivers/staging/rar_register/rar_register.c b/drivers/staging/rar_register/rar_register.c
deleted file mode 100644
index 618503f422e..00000000000
--- a/drivers/staging/rar_register/rar_register.c
+++ /dev/null
@@ -1,675 +0,0 @@
-/*
- *  rar_register.c - An Intel Restricted Access Region register driver
- *
- *  Copyright(c) 2009 Intel Corporation. All rights reserved.
- *
- *  This program is free software; you can redistribute it and/or
- *  modify it under the terms of the GNU General Public License as
- *  published by the Free Software Foundation; either version 2 of the
- *  License, or (at your option) any later version.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- *  General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to the Free Software
- *  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
- *  02111-1307, USA.
- *
- * -------------------------------------------------------------------
- *  20091204 Mark Allyn <mark.a.allyn@intel.com>
- *	     Ossama Othman <ossama.othman@intel.com>
- *	Cleanup per feedback from Alan Cox and Arjan Van De Ven
- *
- *  20090806 Ossama Othman <ossama.othman@intel.com>
- *      Return zero high address if upper 22 bits is zero.
- *      Cleaned up checkpatch errors.
- *      Clarified that driver is dealing with bus addresses.
- *
- *  20090702 Ossama Othman <ossama.othman@intel.com>
- *      Removed unnecessary include directives
- *      Cleaned up spinlocks.
- *      Cleaned up logging.
- *      Improved invalid parameter checks.
- *      Fixed and simplified RAR address retrieval and RAR locking
- *      code.
- *
- *  20090626 Mark Allyn <mark.a.allyn@intel.com>
- *      Initial publish
- */
-
-#define DEBUG 1
-
-#include "rar_register.h"
-
-#include <linux/module.h>
-#include <linux/pci.h>
-#include <linux/spinlock.h>
-#include <linux/device.h>
-#include <linux/kernel.h>
-
-/* === Lincroft Message Bus Interface === */
-#define LNC_MCR_OFFSET		0xD0	/* Message Control Register */
-#define LNC_MDR_OFFSET		0xD4	/* Message Data Register */
-
-/* Message Opcodes */
-#define LNC_MESSAGE_READ_OPCODE	0xD0
-#define LNC_MESSAGE_WRITE_OPCODE 0xE0
-
-/* Message Write Byte Enables */
-#define LNC_MESSAGE_BYTE_WRITE_ENABLES	0xF
-
-/* B-unit Port */
-#define LNC_BUNIT_PORT	0x3
-
-/* === Lincroft B-Unit Registers - Programmed by IA32 firmware === */
-#define LNC_BRAR0L	0x10
-#define LNC_BRAR0H	0x11
-#define LNC_BRAR1L	0x12
-#define LNC_BRAR1H	0x13
-/* Reserved for SeP */
-#define LNC_BRAR2L	0x14
-#define LNC_BRAR2H	0x15
-
-/* Moorestown supports three restricted access regions. */
-#define MRST_NUM_RAR 3
-
-/* RAR Bus Address Range */
-struct rar_addr {
-	dma_addr_t low;
-	dma_addr_t high;
-};
-
-/*
- *	We create one of these for each RAR
- */
-struct client {
-	int (*callback)(unsigned long data);
-	unsigned long driver_priv;
-	bool busy;
-};
-
-static DEFINE_MUTEX(rar_mutex);
-static DEFINE_MUTEX(lnc_reg_mutex);
-
-/*
- *	One per RAR device (currently only one device)
- */
-struct rar_device {
-	struct rar_addr rar_addr[MRST_NUM_RAR];
-	struct pci_dev *rar_dev;
-	bool registered;
-	bool allocated;
-	struct client client[MRST_NUM_RAR];
-};
-
-/* Current platforms have only one rar_device for 3 rar regions */
-static struct rar_device my_rar_device;
-
-/*
- *	Abstract out multiple device support. Current platforms only
- *	have a single RAR device.
- */
-
-/**
- *	alloc_rar_device	-	return a new RAR structure
- *
- *	Return a new (but not yet ready) RAR device object
- */
-static struct rar_device *alloc_rar_device(void)
-{
-	if (my_rar_device.allocated)
-		return NULL;
-	my_rar_device.allocated = 1;
-	return &my_rar_device;
-}
-
-/**
- *	free_rar_device		-	free a RAR object
- *	@rar: the RAR device being freed
- *
- *	Release a RAR object and any attached resources
- */
-static void free_rar_device(struct rar_device *rar)
-{
-	pci_dev_put(rar->rar_dev);
-	rar->allocated = 0;
-}
-
-/**
- *	_rar_to_device		-	return the device handling this RAR
- *	@rar: RAR number
- *	@off: returned offset
- *
- *	Internal helper for looking up RAR devices. This and alloc are the
- *	two functions that need touching to go to multiple RAR devices.
- */
-static struct rar_device *_rar_to_device(int rar, int *off)
-{
-	if (rar >= 0 && rar <= 3) {
-		*off = rar;
-		return &my_rar_device;
-	}
-	return NULL;
-}
-
-
-/**
- *	rar_to_device		-	return the device handling this RAR
- *	@rar: RAR number
- *	@off: returned offset
- *
- *	Return the device this RAR maps to if one is present, otherwise
- *	returns NULL. Reports the offset relative to the base of this
- *	RAR device in off.
- */
-static struct rar_device *rar_to_device(int rar, int *off)
-{
-	struct rar_device *rar_dev = _rar_to_device(rar, off);
-	if (rar_dev == NULL || !rar_dev->registered)
-		return NULL;
-	return rar_dev;
-}
-
-/**
- *	rar_to_client		-	return the client handling this RAR
- *	@rar: RAR number
- *
- *	Return the client this RAR maps to if a mapping is known, otherwise
- *	returns NULL.
- */
-static struct client *rar_to_client(int rar)
-{
-	int idx;
-	struct rar_device *r = _rar_to_device(rar, &idx);
-	if (r != NULL)
-		return &r->client[idx];
-	return NULL;
-}
-
-/**
- *	rar_read_addr		-	retrieve a RAR mapping
- *	@pdev: PCI device for the RAR
- *	@offset: offset for message
- *	@addr: returned address
- *
- *	Reads the address of a given RAR register. Returns 0 on success
- *	or an error code on failure.
- */
-static int rar_read_addr(struct pci_dev *pdev, int offset, dma_addr_t *addr)
-{
-	/*
-	 * ======== The Lincroft Message Bus Interface ========
-	 * Lincroft registers may be obtained via PCI from
-	 * the host bridge using the Lincroft Message Bus
-	 * Interface.  That message bus interface is generally
-	 * comprised of two registers: a control register (MCR, 0xDO)
-	 * and a data register (MDR, 0xD4).
-	 *
-	 * The MCR (message control register) format is the following:
-	 *   1.  [31:24]: Opcode
-	 *   2.  [23:16]: Port
-	 *   3.  [15:8]: Register Offset
-	 *   4.  [7:4]: Byte Enables (use 0xF to set all of these bits
-	 *              to 1)
-	 *   5.  [3:0]: reserved
-	 *
-	 *  Read (0xD0) and write (0xE0) opcodes are written to the
-	 *  control register when reading and writing to Lincroft
-	 *  registers, respectively.
-	 *
-	 *  We're interested in registers found in the Lincroft
-	 *  B-unit.  The B-unit port is 0x3.
-	 *
-	 *  The six B-unit RAR register offsets we use are listed
-	 *  earlier in this file.
-	 *
-	 *  Lastly writing to the MCR register requires the "Byte
-	 *  enables" bits to be set to 1.  This may be achieved by
-	 *  writing 0xF at bit 4.
-	 *
-	 * The MDR (message data register) format is the following:
-	 *   1. [31:0]: Read/Write Data
-	 *
-	 *  Data being read from this register is only available after
-	 *  writing the appropriate control message to the MCR
-	 *  register.
-	 *
-	 *  Data being written to this register must be written before
-	 *  writing the appropriate control message to the MCR
-	 *  register.
-	*/
-
-	int result;
-	u32 addr32;
-
-	/* Construct control message */
-	u32 const message =
-		 (LNC_MESSAGE_READ_OPCODE << 24)
-		 | (LNC_BUNIT_PORT << 16)
-		 | (offset << 8)
-		 | (LNC_MESSAGE_BYTE_WRITE_ENABLES << 4);
-
-	dev_dbg(&pdev->dev, "Offset for 'get' LNC MSG is %x\n", offset);
-
-	/*
-	* We synchronize access to the Lincroft MCR and MDR registers
-	* until BOTH the command is issued through the MCR register
-	* and the corresponding data is read from the MDR register.
-	* Otherwise a race condition would exist between accesses to
-	* both registers.
-	*/
-
-	mutex_lock(&lnc_reg_mutex);
-
-	/* Send the control message */
-	result = pci_write_config_dword(pdev, LNC_MCR_OFFSET, message);
-	if (!result) {
-		/* Read back the address as a 32bit value */
-		result = pci_read_config_dword(pdev, LNC_MDR_OFFSET, &addr32);
-		*addr = (dma_addr_t)addr32;
-	}
-	mutex_unlock(&lnc_reg_mutex);
-	return result;
-}
-
-/**
- *	rar_set_addr		-	Set a RAR mapping
- *	@pdev: PCI device for the RAR
- *	@offset: offset for message
- *	@addr: address to set
- *
- *	Sets the address of a given RAR register. Returns 0 on success
- *	or an error code on failure.
- */
-static int rar_set_addr(struct pci_dev *pdev,
-	int offset,
-	dma_addr_t addr)
-{
-	/*
-	* Data being written to this register must be written before
-	* writing the appropriate control message to the MCR
-	* register.
-	* See rar_get_addrs() for a description of the
-	* message bus interface being used here.
-	*/
-
-	int result;
-
-	/* Construct control message */
-	u32 const message = (LNC_MESSAGE_WRITE_OPCODE << 24)
-		| (LNC_BUNIT_PORT << 16)
-		| (offset << 8)
-		| (LNC_MESSAGE_BYTE_WRITE_ENABLES << 4);
-
-	/*
-	* We synchronize access to the Lincroft MCR and MDR registers
-	* until BOTH the command is issued through the MCR register
-	* and the corresponding data is read from the MDR register.
-	* Otherwise a race condition would exist between accesses to
-	* both registers.
-	*/
-
-	mutex_lock(&lnc_reg_mutex);
-
-	/* Send the control message */
-	result = pci_write_config_dword(pdev, LNC_MDR_OFFSET, addr);
-	if (!result)
-		/* And address */
-		result = pci_write_config_dword(pdev, LNC_MCR_OFFSET, message);
-
-	mutex_unlock(&lnc_reg_mutex);
-	return result;
-}
-
-/*
- *	rar_init_params		-	Initialize RAR parameters
- *	@rar: RAR device to initialise
- *
- *	Initialize RAR parameters, such as bus addresses, etc. Returns 0
- *	on success, or an error code on failure.
- */
-static int init_rar_params(struct rar_device *rar)
-{
-	struct pci_dev *pdev = rar->rar_dev;
-	unsigned int i;
-	int result = 0;
-	int offset = 0x10;	/* RAR 0 to 2 in order low/high/low/high/... */
-
-	/* Retrieve RAR start and end bus addresses.
-	* Access the RAR registers through the Lincroft Message Bus
-	* Interface on PCI device: 00:00.0 Host bridge.
-	*/
-
-	for (i = 0; i < MRST_NUM_RAR; ++i) {
-		struct rar_addr *addr = &rar->rar_addr[i];
-
-		result = rar_read_addr(pdev, offset++, &addr->low);
-		if (result != 0)
-			return result;
-
-		result = rar_read_addr(pdev, offset++, &addr->high);
-		if (result != 0)
-			return result;
-
-
-		/*
-		* Only the upper 22 bits of the RAR addresses are
-		* stored in their corresponding RAR registers so we
-		* must set the lower 10 bits accordingly.
-
-		* The low address has its lower 10 bits cleared, and
-		* the high address has all its lower 10 bits set,
-		* e.g.:
-		* low = 0x2ffffc00
-		*/
-
-		addr->low &= (dma_addr_t)0xfffffc00u;
-
-		/*
-		* Set bits 9:0 on uppser address if bits 31:10 are non
-		* zero; otherwize clear all bits
-		*/
-
-		if ((addr->high & 0xfffffc00u) == 0)
-			addr->high = 0;
-		else
-			addr->high |= 0x3ffu;
-	}
-	/* Done accessing the device. */
-
-	if (result == 0) {
-		for (i = 0; i != MRST_NUM_RAR; ++i) {
-			/*
-			* "BRAR" refers to the RAR registers in the
-			* Lincroft B-unit.
-			*/
-			dev_info(&pdev->dev, "BRAR[%u] bus address range = "
-			  "[%lx, %lx]\n", i,
-			  (unsigned long)rar->rar_addr[i].low,
-			  (unsigned long)rar->rar_addr[i].high);
-		}
-	}
-	return result;
-}
-
-/**
- *	rar_get_address		-	get the bus address in a RAR
- *	@start: return value of start address of block
- *	@end: return value of end address of block
- *
- *	The rar_get_address function is used by other device drivers
- *	to obtain RAR address information on a RAR. It takes three
- *	parameters:
- *
- *	The function returns a 0 upon success or an error if there is no RAR
- *	facility on this system.
- */
-int rar_get_address(int rar_index, dma_addr_t *start, dma_addr_t *end)
-{
-	int idx;
-	struct rar_device *rar = rar_to_device(rar_index, &idx);
-
-	if (rar == NULL) {
-		WARN_ON(1);
-		return -ENODEV;
-	}
-
-	*start = rar->rar_addr[idx].low;
-	*end = rar->rar_addr[idx].high;
-	return 0;
-}
-EXPORT_SYMBOL(rar_get_address);
-
-/**
- *	rar_lock	-	lock a RAR register
- *	@rar_index: RAR to lock (0-2)
- *
- *	The rar_lock function is ued by other device drivers to lock an RAR.
- *	once a RAR is locked, it stays locked until the next system reboot.
- *
- *	The function returns a 0 upon success or an error if there is no RAR
- *	facility on this system, or the locking fails
- */
-int rar_lock(int rar_index)
-{
-	struct rar_device *rar;
-	int result;
-	int idx;
-	dma_addr_t low, high;
-
-	rar = rar_to_device(rar_index, &idx);
-
-	if (rar == NULL) {
-		WARN_ON(1);
-		return -EINVAL;
-	}
-
-	low = rar->rar_addr[idx].low & 0xfffffc00u;
-	high = rar->rar_addr[idx].high & 0xfffffc00u;
-
-	/*
-	* Only allow I/O from the graphics and Langwell;
-	* not from the x86 processor
-	*/
-
-	if (rar_index == RAR_TYPE_VIDEO) {
-		low |= 0x00000009;
-		high |= 0x00000015;
-	} else if (rar_index == RAR_TYPE_AUDIO) {
-		/* Only allow I/O from Langwell; nothing from x86 */
-		low |= 0x00000008;
-		high |= 0x00000018;
-	} else
-		/* Read-only from all agents */
-		high |= 0x00000018;
-
-	/*
-	* Now program the register using the Lincroft message
-	* bus interface.
-	*/
-	result = rar_set_addr(rar->rar_dev,
-				2 * idx, low);
-
-	if (result == 0)
-		result = rar_set_addr(rar->rar_dev,
-				2 * idx + 1, high);
-
-	return result;
-}
-EXPORT_SYMBOL(rar_lock);
-
-/**
- *	register_rar		-	register a RAR handler
- *	@num: RAR we wish to register for
- *	@callback: function to call when RAR support is available
- *	@data: data to pass to this function
- *
- *	The register_rar function is to used by other device drivers
- *	to ensure that this driver is ready. As we cannot be sure of
- *	the compile/execute order of drivers in ther kernel, it is
- *	best to give this driver a callback function to call when
- *	it is ready to give out addresses. The callback function
- *	would have those steps that continue the initialization of
- *	a driver that do require a valid RAR address. One of those
- *	steps would be to call rar_get_address()
- *
- *	This function return 0 on success an error code on failure.
- */
-int register_rar(int num, int (*callback)(unsigned long data),
-							unsigned long data)
-{
-	/* For now we hardcode a single RAR device */
-	struct rar_device *rar;
-	struct client *c;
-	int idx;
-	int retval = 0;
-
-	mutex_lock(&rar_mutex);
-
-	/* Do we have a client mapping for this RAR number ? */
-	c = rar_to_client(num);
-	if (c == NULL) {
-		retval = -ERANGE;
-		goto done;
-	}
-	/* Is it claimed ? */
-	if (c->busy) {
-		retval = -EBUSY;
-		goto done;
-	}
-	c->busy = 1;
-
-	/* See if we have a handler for this RAR yet, if we do then fire it */
-	rar = rar_to_device(num, &idx);
-
-	if (rar) {
-		/*
-		* if the driver already registered, then we can simply
-		* call the callback right now
-		*/
-		(*callback)(data);
-		goto done;
-	}
-
-	/* Arrange to be called back when the hardware is found */
-	c->callback = callback;
-	c->driver_priv = data;
-done:
-	mutex_unlock(&rar_mutex);
-	return retval;
-}
-EXPORT_SYMBOL(register_rar);
-
-/**
- *	unregister_rar	-	release a RAR allocation
- *	@num: RAR number
- *
- *	Releases a RAR allocation, or pending allocation. If a callback is
- *	pending then this function will either complete before the unregister
- *	returns or not at all.
- */
-
-void unregister_rar(int num)
-{
-	struct client *c;
-
-	mutex_lock(&rar_mutex);
-	c = rar_to_client(num);
-	if (c == NULL || !c->busy)
-		WARN_ON(1);
-	else
-		c->busy = 0;
-	mutex_unlock(&rar_mutex);
-}
-EXPORT_SYMBOL(unregister_rar);
-
-/**
- *	rar_callback		-	Process callbacks
- *	@rar: new RAR device
- *
- *	Process the callbacks for a newly found RAR device.
- */
-
-static void rar_callback(struct rar_device *rar)
-{
-	struct client *c = &rar->client[0];
-	int i;
-
-	mutex_lock(&rar_mutex);
-
-	rar->registered = 1;	/* Ensure no more callbacks queue */
-
-	for (i = 0; i < MRST_NUM_RAR; i++) {
-		if (c->callback && c->busy) {
-			c->callback(c->driver_priv);
-			c->callback = NULL;
-		}
-		c++;
-	}
-	mutex_unlock(&rar_mutex);
-}
-
-/**
- *	rar_probe		-	PCI probe callback
- *	@dev: PCI device
- *	@id: matching entry in the match table
- *
- *	A RAR device has been discovered. Initialise it and if successful
- *	process any pending callbacks that can now be completed.
- */
-static int rar_probe(struct pci_dev *dev, const struct pci_device_id *id)
-{
-	int error;
-	struct rar_device *rar;
-
-	dev_dbg(&dev->dev, "PCI probe starting\n");
-
-	rar = alloc_rar_device();
-	if (rar == NULL)
-		return -EBUSY;
-
-	/* Enable the device */
-	error = pci_enable_device(dev);
-	if (error) {
-		dev_err(&dev->dev,
-			"Error enabling RAR register PCI device\n");
-		goto end_function;
-	}
-
-	/* Fill in the rar_device structure */
-	rar->rar_dev = pci_dev_get(dev);
-	pci_set_drvdata(dev, rar);
-
-	/*
-	 * Initialize the RAR parameters, which have to be retrieved
-	 * via the message bus interface.
-	 */
-	error = init_rar_params(rar);
-	if (error) {
-		pci_disable_device(dev);
-		dev_err(&dev->dev, "Error retrieving RAR addresses\n");
-		goto end_function;
-	}
-	/* now call anyone who has registered (using callbacks) */
-	rar_callback(rar);
-	return 0;
-end_function:
-	free_rar_device(rar);
-	return error;
-}
-
-const struct pci_device_id rar_pci_id_tbl[] = {
-	{ PCI_VDEVICE(INTEL, 0x4110) },
-	{ 0 }
-};
-
-MODULE_DEVICE_TABLE(pci, rar_pci_id_tbl);
-
-const struct pci_device_id *my_id_table = rar_pci_id_tbl;
-
-/* field for registering driver to PCI device */
-static struct pci_driver rar_pci_driver = {
-	.name = "rar_register_driver",
-	.id_table = rar_pci_id_tbl,
-	.probe = rar_probe,
-	/* Cannot be unplugged - no remove */
-};
-
-static int __init rar_init_handler(void)
-{
-	return pci_register_driver(&rar_pci_driver);
-}
-
-static void __exit rar_exit_handler(void)
-{
-	pci_unregister_driver(&rar_pci_driver);
-}
-
-module_init(rar_init_handler);
-module_exit(rar_exit_handler);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("Intel Restricted Access Region Register Driver");
diff --git a/drivers/staging/rar_register/rar_register.h b/drivers/staging/rar_register/rar_register.h
deleted file mode 100644
index ffa805780f8..00000000000
--- a/drivers/staging/rar_register/rar_register.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Copyright (C) 2010 Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General
- * Public License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be
- * useful, but WITHOUT ANY WARRANTY; without even the implied
- * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
- * PURPOSE.  See the GNU General Public License for more details.
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the Free
- * Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA  02111-1307, USA.
- * The full GNU General Public License is included in this
- * distribution in the file called COPYING.
- */
-
-
-#ifndef _RAR_REGISTER_H
-#define _RAR_REGISTER_H
-
-#include <linux/types.h>
-
-/* following are used both in drivers as well as user space apps */
-
-#define	RAR_TYPE_VIDEO	0
-#define	RAR_TYPE_AUDIO	1
-#define	RAR_TYPE_IMAGE	2
-#define	RAR_TYPE_DATA	3
-
-#ifdef __KERNEL__
-
-struct rar_device;
-
-int register_rar(int num,
-		int (*callback)(unsigned long data), unsigned long data);
-void unregister_rar(int num);
-int rar_get_address(int rar_index, dma_addr_t *start, dma_addr_t *end);
-int rar_lock(int rar_index);
-
-#endif  /* __KERNEL__ */
-#endif  /* _RAR_REGISTER_H */
diff --git a/include/linux/rar_register.h b/include/linux/rar_register.h
new file mode 100644
index 00000000000..ffa805780f8
--- /dev/null
+++ b/include/linux/rar_register.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2010 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General
+ * Public License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be
+ * useful, but WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ * PURPOSE.  See the GNU General Public License for more details.
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the Free
+ * Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA  02111-1307, USA.
+ * The full GNU General Public License is included in this
+ * distribution in the file called COPYING.
+ */
+
+
+#ifndef _RAR_REGISTER_H
+#define _RAR_REGISTER_H
+
+#include <linux/types.h>
+
+/* following are used both in drivers as well as user space apps */
+
+#define	RAR_TYPE_VIDEO	0
+#define	RAR_TYPE_AUDIO	1
+#define	RAR_TYPE_IMAGE	2
+#define	RAR_TYPE_DATA	3
+
+#ifdef __KERNEL__
+
+struct rar_device;
+
+int register_rar(int num,
+		int (*callback)(unsigned long data), unsigned long data);
+void unregister_rar(int num);
+int rar_get_address(int rar_index, dma_addr_t *start, dma_addr_t *end);
+int rar_lock(int rar_index);
+
+#endif  /* __KERNEL__ */
+#endif  /* _RAR_REGISTER_H */
-- 
cgit v1.2.3-70-g09d2


From 8950778704cf8483cc5cc0140f557adf0d3f45a5 Mon Sep 17 00:00:00 2001
From: Alek Du <alek.du@intel.com>
Date: Tue, 13 Jul 2010 10:56:25 +0100
Subject: gpio: Add PMIC GPIO block support

Moorestown has PMIC chip which contains GPIO blocks. The PMIC chip is
connected to Langwell by SPI interface. So this GPIO driver will be regarded
as SPI GPIO expander though the actual GPIO access is through IPC and SRAM.
The SPI master contoller will probe this device driver by parsing SPIB table.

Cleaned up for new IPC, GPE removed and some printk and other tidying by
Alan Cox. Fixes for points noted by Matthew Garrett

Signed-off-by: Alek Du <alek.du@intel.com>
Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Matthew Garrett <mjg@redhat.com>
---
 drivers/platform/x86/Kconfig           |   7 +
 drivers/platform/x86/Makefile          |   2 +
 drivers/platform/x86/intel_pmic_gpio.c | 340 +++++++++++++++++++++++++++++++++
 include/linux/intel_pmic_gpio.h        |  15 ++
 4 files changed, 364 insertions(+)
 create mode 100644 drivers/platform/x86/intel_pmic_gpio.c
 create mode 100644 include/linux/intel_pmic_gpio.h

(limited to 'include')

diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig
index 2189565d9e0..ca30e561859 100644
--- a/drivers/platform/x86/Kconfig
+++ b/drivers/platform/x86/Kconfig
@@ -540,6 +540,13 @@ config INTEL_SCU_IPC
 	  some embedded Intel x86 platforms. This is not needed for PC-type
 	  machines.
 
+config GPIO_INTEL_PMIC
+	bool "Intel PMIC GPIO support"
+	depends on INTEL_SCU_IPC && GPIOLIB
+	---help---
+	  Say Y here to support GPIO via the SCU IPC interface
+	  on Intel MID platforms.
+
 config RAR_REGISTER
 	bool "Restricted Access Region Register Driver"
 	depends on PCI && X86_MRST
diff --git a/drivers/platform/x86/Makefile b/drivers/platform/x86/Makefile
index ed50eca1b55..4744c7744ff 100644
--- a/drivers/platform/x86/Makefile
+++ b/drivers/platform/x86/Makefile
@@ -28,3 +28,5 @@ obj-$(CONFIG_TOSHIBA_BT_RFKILL)	+= toshiba_bluetooth.o
 obj-$(CONFIG_INTEL_SCU_IPC)	+= intel_scu_ipc.o
 obj-$(CONFIG_RAR_REGISTER)	+= intel_rar_register.o
 obj-$(CONFIG_INTEL_IPS)		+= intel_ips.o
+obj-$(CONFIG_GPIO_INTEL_PMIC)	+= intel_pmic_gpio.o
+
diff --git a/drivers/platform/x86/intel_pmic_gpio.c b/drivers/platform/x86/intel_pmic_gpio.c
new file mode 100644
index 00000000000..5cdcff65391
--- /dev/null
+++ b/drivers/platform/x86/intel_pmic_gpio.c
@@ -0,0 +1,340 @@
+/* Moorestown PMIC GPIO (access through IPC) driver
+ * Copyright (c) 2008 - 2009, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/* Supports:
+ * Moorestown platform PMIC chip
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/stddef.h>
+#include <linux/slab.h>
+#include <linux/ioport.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/gpio.h>
+#include <linux/interrupt.h>
+#include <asm/intel_scu_ipc.h>
+#include <linux/device.h>
+#include <linux/intel_pmic_gpio.h>
+#include <linux/platform_device.h>
+
+#define DRIVER_NAME "pmic_gpio"
+
+/* register offset that IPC driver should use
+ * 8 GPIO + 8 GPOSW (6 controllable) + 8GPO
+ */
+enum pmic_gpio_register {
+	GPIO0		= 0xE0,
+	GPIO7		= 0xE7,
+	GPIOINT		= 0xE8,
+	GPOSWCTL0	= 0xEC,
+	GPOSWCTL5	= 0xF1,
+	GPO		= 0xF4,
+};
+
+/* bits definition for GPIO & GPOSW */
+#define GPIO_DRV 0x01
+#define GPIO_DIR 0x02
+#define GPIO_DIN 0x04
+#define GPIO_DOU 0x08
+#define GPIO_INTCTL 0x30
+#define GPIO_DBC 0xc0
+
+#define GPOSW_DRV 0x01
+#define GPOSW_DOU 0x08
+#define GPOSW_RDRV 0x30
+
+
+#define NUM_GPIO 24
+
+struct pmic_gpio_irq {
+	spinlock_t lock;
+	u32 trigger[NUM_GPIO];
+	u32 dirty;
+	struct work_struct work;
+};
+
+
+struct pmic_gpio {
+	struct gpio_chip	chip;
+	struct pmic_gpio_irq	irqtypes;
+	void			*gpiointr;
+	int			irq;
+	unsigned		irq_base;
+};
+
+static void pmic_program_irqtype(int gpio, int type)
+{
+	if (type & IRQ_TYPE_EDGE_RISING)
+		intel_scu_ipc_update_register(GPIO0 + gpio, 0x20, 0x20);
+	else
+		intel_scu_ipc_update_register(GPIO0 + gpio, 0x00, 0x20);
+
+	if (type & IRQ_TYPE_EDGE_FALLING)
+		intel_scu_ipc_update_register(GPIO0 + gpio, 0x10, 0x10);
+	else
+		intel_scu_ipc_update_register(GPIO0 + gpio, 0x00, 0x10);
+};
+
+static void pmic_irqtype_work(struct work_struct *work)
+{
+	struct pmic_gpio_irq *t =
+		container_of(work, struct pmic_gpio_irq, work);
+	unsigned long flags;
+	int i;
+	u16 type;
+
+	spin_lock_irqsave(&t->lock, flags);
+	/* As we drop the lock, we may need multiple scans if we race the
+	   pmic_irq_type function */
+	while (t->dirty) {
+		/*
+		 *	For each pin that has the dirty bit set send an IPC
+		 *	message to configure the hardware via the PMIC
+		 */
+		for (i = 0; i < NUM_GPIO; i++) {
+			if (!(t->dirty & (1 << i)))
+				continue;
+			t->dirty &= ~(1 << i);
+			/* We can't trust the array entry or dirty
+			   once the lock is dropped */
+			type = t->trigger[i];
+			spin_unlock_irqrestore(&t->lock, flags);
+			pmic_program_irqtype(i, type);
+			spin_lock_irqsave(&t->lock, flags);
+		}
+	}
+	spin_unlock_irqrestore(&t->lock, flags);
+}
+
+static int pmic_gpio_direction_input(struct gpio_chip *chip, unsigned offset)
+{
+	if (offset > 8) {
+		printk(KERN_ERR
+			"%s: only pin 0-7 support input\n", __func__);
+		return -1;/* we only have 8 GPIO can use as input */
+	}
+	return intel_scu_ipc_update_register(GPIO0 + offset,
+							GPIO_DIR, GPIO_DIR);
+}
+
+static int pmic_gpio_direction_output(struct gpio_chip *chip,
+			unsigned offset, int value)
+{
+	int rc = 0;
+
+	if (offset < 8)/* it is GPIO */
+		rc = intel_scu_ipc_update_register(GPIO0 + offset,
+				GPIO_DRV | GPIO_DOU | GPIO_DIR,
+				GPIO_DRV | (value ? GPIO_DOU : 0));
+	else if (offset < 16)/* it is GPOSW */
+		rc = intel_scu_ipc_update_register(GPOSWCTL0 + offset - 8,
+				GPOSW_DRV | GPOSW_DOU | GPOSW_RDRV,
+				GPOSW_DRV | (value ? GPOSW_DOU : 0));
+	else if (offset > 15 && offset < 24)/* it is GPO */
+		rc = intel_scu_ipc_update_register(GPO,
+				1 << (offset - 16),
+				value ? 1 << (offset - 16) : 0);
+	else {
+		printk(KERN_ERR
+			"%s: invalid PMIC GPIO pin %d!\n", __func__, offset);
+		WARN_ON(1);
+	}
+
+	return rc;
+}
+
+static int pmic_gpio_get(struct gpio_chip *chip, unsigned offset)
+{
+	u8 r;
+	int ret;
+
+	/* we only have 8 GPIO pins we can use as input */
+	if (offset > 8)
+		return -EOPNOTSUPP;
+	ret = intel_scu_ipc_ioread8(GPIO0 + offset, &r);
+	if (ret < 0)
+		return ret;
+	return r & GPIO_DIN;
+}
+
+static void pmic_gpio_set(struct gpio_chip *chip, unsigned offset, int value)
+{
+	if (offset < 8)/* it is GPIO */
+		intel_scu_ipc_update_register(GPIO0 + offset,
+			GPIO_DRV | GPIO_DOU,
+			GPIO_DRV | (value ? GPIO_DOU : 0));
+	else if (offset < 16)/* it is GPOSW */
+		intel_scu_ipc_update_register(GPOSWCTL0 + offset - 8,
+			GPOSW_DRV | GPOSW_DOU | GPOSW_RDRV,
+			GPOSW_DRV | (value ? GPOSW_DOU : 0));
+	else if (offset > 15 && offset < 24) /* it is GPO */
+		intel_scu_ipc_update_register(GPO,
+			1 << (offset - 16),
+			value ? 1 << (offset - 16) : 0);
+}
+
+static int pmic_irq_type(unsigned irq, unsigned type)
+{
+	struct pmic_gpio *pg = get_irq_chip_data(irq);
+	u32 gpio = irq - pg->irq_base;
+	unsigned long flags;
+
+	if (gpio > pg->chip.ngpio)
+		return -EINVAL;
+
+	spin_lock_irqsave(&pg->irqtypes.lock, flags);
+	pg->irqtypes.trigger[gpio] = type;
+	pg->irqtypes.dirty |=  (1 << gpio);
+	spin_unlock_irqrestore(&pg->irqtypes.lock, flags);
+	schedule_work(&pg->irqtypes.work);
+	return 0;
+}
+
+
+
+static int pmic_gpio_to_irq(struct gpio_chip *chip, unsigned offset)
+{
+	struct pmic_gpio *pg = container_of(chip, struct pmic_gpio, chip);
+
+	return pg->irq_base + offset;
+}
+
+/* the gpiointr register is read-clear, so just do nothing. */
+static void pmic_irq_unmask(unsigned irq)
+{
+};
+
+static void pmic_irq_mask(unsigned irq)
+{
+};
+
+static struct irq_chip pmic_irqchip = {
+	.name		= "PMIC-GPIO",
+	.mask		= pmic_irq_mask,
+	.unmask		= pmic_irq_unmask,
+	.set_type	= pmic_irq_type,
+};
+
+static void pmic_irq_handler(unsigned irq, struct irq_desc *desc)
+{
+	struct pmic_gpio *pg = (struct pmic_gpio *)get_irq_data(irq);
+	u8 intsts = *((u8 *)pg->gpiointr + 4);
+	int gpio;
+
+	for (gpio = 0; gpio < 8; gpio++) {
+		if (intsts & (1 << gpio)) {
+			pr_debug("pmic pin %d triggered\n", gpio);
+			generic_handle_irq(pg->irq_base + gpio);
+		}
+	}
+	desc->chip->eoi(irq);
+}
+
+static int __devinit platform_pmic_gpio_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	int irq = platform_get_irq(pdev, 0);
+	struct intel_pmic_gpio_platform_data *pdata = dev->platform_data;
+
+	struct pmic_gpio *pg;
+	int retval;
+	int i;
+
+	if (irq < 0) {
+		dev_dbg(dev, "no IRQ line\n");
+		return -EINVAL;
+	}
+
+	if (!pdata || !pdata->gpio_base || !pdata->irq_base) {
+		dev_dbg(dev, "incorrect or missing platform data\n");
+		return -EINVAL;
+	}
+
+	pg = kzalloc(sizeof(*pg), GFP_KERNEL);
+	if (!pg)
+		return -ENOMEM;
+
+	dev_set_drvdata(dev, pg);
+
+	pg->irq = irq;
+	/* setting up SRAM mapping for GPIOINT register */
+	pg->gpiointr = ioremap_nocache(pdata->gpiointr, 8);
+	if (!pg->gpiointr) {
+		printk(KERN_ERR "%s: Can not map GPIOINT.\n", __func__);
+		retval = -EINVAL;
+		goto err2;
+	}
+	pg->irq_base = pdata->irq_base;
+	pg->chip.label = "intel_pmic";
+	pg->chip.direction_input = pmic_gpio_direction_input;
+	pg->chip.direction_output = pmic_gpio_direction_output;
+	pg->chip.get = pmic_gpio_get;
+	pg->chip.set = pmic_gpio_set;
+	pg->chip.to_irq = pmic_gpio_to_irq;
+	pg->chip.base = pdata->gpio_base;
+	pg->chip.ngpio = NUM_GPIO;
+	pg->chip.can_sleep = 1;
+	pg->chip.dev = dev;
+
+	INIT_WORK(&pg->irqtypes.work, pmic_irqtype_work);
+	spin_lock_init(&pg->irqtypes.lock);
+
+	pg->chip.dev = dev;
+	retval = gpiochip_add(&pg->chip);
+	if (retval) {
+		printk(KERN_ERR "%s: Can not add pmic gpio chip.\n", __func__);
+		goto err;
+	}
+	set_irq_data(pg->irq, pg);
+	set_irq_chained_handler(pg->irq, pmic_irq_handler);
+	for (i = 0; i < 8; i++) {
+		set_irq_chip_and_handler_name(i + pg->irq_base, &pmic_irqchip,
+					handle_simple_irq, "demux");
+		set_irq_chip_data(i + pg->irq_base, pg);
+	}
+	return 0;
+err:
+	iounmap(pg->gpiointr);
+err2:
+	kfree(pg);
+	return retval;
+}
+
+/* at the same time, register a platform driver
+ * this supports the sfi 0.81 fw */
+static struct platform_driver platform_pmic_gpio_driver = {
+	.driver = {
+		.name		= DRIVER_NAME,
+		.owner		= THIS_MODULE,
+	},
+	.probe		= platform_pmic_gpio_probe,
+};
+
+static int __init platform_pmic_gpio_init(void)
+{
+	return platform_driver_register(&platform_pmic_gpio_driver);
+}
+
+subsys_initcall(platform_pmic_gpio_init);
+
+MODULE_AUTHOR("Alek Du <alek.du@intel.com>");
+MODULE_DESCRIPTION("Intel Moorestown PMIC GPIO driver");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/intel_pmic_gpio.h b/include/linux/intel_pmic_gpio.h
new file mode 100644
index 00000000000..920109a2919
--- /dev/null
+++ b/include/linux/intel_pmic_gpio.h
@@ -0,0 +1,15 @@
+#ifndef LINUX_INTEL_PMIC_H
+#define LINUX_INTEL_PMIC_H
+
+struct intel_pmic_gpio_platform_data {
+	/* the first IRQ of the chip */
+	unsigned	irq_base;
+	/* number assigned to the first GPIO */
+	unsigned	gpio_base;
+	/* sram address for gpiointr register, the langwell chip will map
+	 * the PMIC spi GPIO expander's GPIOINTR register in sram.
+	 */
+	unsigned	gpiointr;
+};
+
+#endif
-- 
cgit v1.2.3-70-g09d2


From 226528c6100e4191842e61997110c8ace40605f7 Mon Sep 17 00:00:00 2001
From: Amerigo Wang <amwang@redhat.com>
Date: Thu, 4 Mar 2010 03:23:36 -0500
Subject: [CPUFREQ] unexport (un)lock_policy_rwsem* functions

lock_policy_rwsem_* and unlock_policy_rwsem_* functions are scheduled
to be unexported when 2.6.33. Now there are no other callers of them
out of cpufreq.c, unexport them and make them static.

Signed-off-by: WANG Cong <amwang@redhat.com>
Cc: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Dave Jones <davej@redhat.com>
---
 Documentation/feature-removal-schedule.txt | 10 ----------
 drivers/cpufreq/cpufreq.c                  | 10 +++-------
 include/linux/cpufreq.h                    |  5 -----
 3 files changed, 3 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index 1571c0c83db..182bbe49429 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -377,16 +377,6 @@ Who:	Eric Paris <eparis@redhat.com>
 
 ----------------------------
 
-What:	lock_policy_rwsem_* and unlock_policy_rwsem_* will not be
-	exported interface anymore.
-When:	2.6.33
-Why:	cpu_policy_rwsem has a new cleaner definition making it local to
-	cpufreq core and contained inside cpufreq.c. Other dependent
-	drivers should not use it in order to safely avoid lockdep issues.
-Who:	Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
-
-----------------------------
-
 What:	sound-slot/service-* module aliases and related clutters in
 	sound/sound_core.c
 When:	August 2010
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 938b74ea9ff..40877d21908 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -68,7 +68,7 @@ static DEFINE_PER_CPU(int, cpufreq_policy_cpu);
 static DEFINE_PER_CPU(struct rw_semaphore, cpu_policy_rwsem);
 
 #define lock_policy_rwsem(mode, cpu)					\
-int lock_policy_rwsem_##mode						\
+static int lock_policy_rwsem_##mode					\
 (int cpu)								\
 {									\
 	int policy_cpu = per_cpu(cpufreq_policy_cpu, cpu);		\
@@ -83,26 +83,22 @@ int lock_policy_rwsem_##mode						\
 }
 
 lock_policy_rwsem(read, cpu);
-EXPORT_SYMBOL_GPL(lock_policy_rwsem_read);
 
 lock_policy_rwsem(write, cpu);
-EXPORT_SYMBOL_GPL(lock_policy_rwsem_write);
 
-void unlock_policy_rwsem_read(int cpu)
+static void unlock_policy_rwsem_read(int cpu)
 {
 	int policy_cpu = per_cpu(cpufreq_policy_cpu, cpu);
 	BUG_ON(policy_cpu == -1);
 	up_read(&per_cpu(cpu_policy_rwsem, policy_cpu));
 }
-EXPORT_SYMBOL_GPL(unlock_policy_rwsem_read);
 
-void unlock_policy_rwsem_write(int cpu)
+static void unlock_policy_rwsem_write(int cpu)
 {
 	int policy_cpu = per_cpu(cpufreq_policy_cpu, cpu);
 	BUG_ON(policy_cpu == -1);
 	up_write(&per_cpu(cpu_policy_rwsem, policy_cpu));
 }
-EXPORT_SYMBOL_GPL(unlock_policy_rwsem_write);
 
 
 /* internal prototypes */
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 9f15150ce8d..c3e9de8321c 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -196,11 +196,6 @@ extern int __cpufreq_driver_getavg(struct cpufreq_policy *policy,
 int cpufreq_register_governor(struct cpufreq_governor *governor);
 void cpufreq_unregister_governor(struct cpufreq_governor *governor);
 
-int lock_policy_rwsem_read(int cpu);
-int lock_policy_rwsem_write(int cpu);
-void unlock_policy_rwsem_read(int cpu);
-void unlock_policy_rwsem_write(int cpu);
-
 
 /*********************************************************************
  *                      CPUFREQ DRIVER INTERFACE                     *
-- 
cgit v1.2.3-70-g09d2


From 6f4f2723d08534fd4e407e1ef8500b0f4d12c30c Mon Sep 17 00:00:00 2001
From: Thomas Renninger <trenn@suse.de>
Date: Tue, 20 Apr 2010 13:17:36 +0200
Subject: [CPUFREQ] x86 cpufreq: Make trace_power_frequency cpufreq driver
 independent

and fix the broken case if a core's frequency depends on others.

trace_power_frequency was only implemented in a rather ungeneric way
in acpi-cpufreq driver's target() function only.
-> Move the call to trace_power_frequency to
   cpufreq.c:cpufreq_notify_transition() where CPUFREQ_POSTCHANGE
   notifier is triggered.
   This will support power frequency tracing by all cpufreq drivers

trace_power_frequency did not trace frequency changes correctly when
the userspace governor was used or when CPU cores' frequency depend
on each other.
-> Moving this into the CPUFREQ_POSTCHANGE notifier and pass the cpu
   which gets switched automatically fixes this.

Robert Schoene provided some important fixes on top of my initial
quick shot version which are integrated in this patch:
- Forgot some changes in power_end trace (TP_printk/variable names)
- Variable dummy in power_end must now be cpu_id
- Use static 64 bit variable instead of unsigned int for cpu_id

Signed-off-by: Thomas Renninger <trenn@suse.de>
CC: davej@redhat.com
CC: arjan@infradead.org
CC: linux-kernel@vger.kernel.org
CC: robert.schoene@tu-dresden.de
Tested-by: robert.schoene@tu-dresden.de
Signed-off-by: Dave Jones <davej@redhat.com>
---
 arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c |  3 ---
 arch/x86/kernel/process.c                  |  8 ++++----
 drivers/cpufreq/cpufreq.c                  |  5 +++++
 drivers/cpuidle/cpuidle.c                  |  2 +-
 include/trace/events/power.h               | 27 +++++++++++++++------------
 tools/perf/builtin-timechart.c             | 11 ++++++-----
 6 files changed, 31 insertions(+), 25 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index cee7aa949c3..246cd3afbb5 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -34,7 +34,6 @@
 #include <linux/compiler.h>
 #include <linux/dmi.h>
 #include <linux/slab.h>
-#include <trace/events/power.h>
 
 #include <linux/acpi.h>
 #include <linux/io.h>
@@ -324,8 +323,6 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
 		}
 	}
 
-	trace_power_frequency(POWER_PSTATE, data->freq_table[next_state].frequency);
-
 	switch (data->cpu_feature) {
 	case SYSTEM_INTEL_MSR_CAPABLE:
 		cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index e7e35219b32..787572d43d9 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -371,7 +371,7 @@ static inline int hlt_use_halt(void)
 void default_idle(void)
 {
 	if (hlt_use_halt()) {
-		trace_power_start(POWER_CSTATE, 1);
+		trace_power_start(POWER_CSTATE, 1, smp_processor_id());
 		current_thread_info()->status &= ~TS_POLLING;
 		/*
 		 * TS_POLLING-cleared state must be visible before we
@@ -441,7 +441,7 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait);
  */
 void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
 {
-	trace_power_start(POWER_CSTATE, (ax>>4)+1);
+	trace_power_start(POWER_CSTATE, (ax>>4)+1, smp_processor_id());
 	if (!need_resched()) {
 		if (cpu_has(&current_cpu_data, X86_FEATURE_CLFLUSH_MONITOR))
 			clflush((void *)&current_thread_info()->flags);
@@ -457,7 +457,7 @@ void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
 static void mwait_idle(void)
 {
 	if (!need_resched()) {
-		trace_power_start(POWER_CSTATE, 1);
+		trace_power_start(POWER_CSTATE, 1, smp_processor_id());
 		if (cpu_has(&current_cpu_data, X86_FEATURE_CLFLUSH_MONITOR))
 			clflush((void *)&current_thread_info()->flags);
 
@@ -478,7 +478,7 @@ static void mwait_idle(void)
  */
 static void poll_idle(void)
 {
-	trace_power_start(POWER_CSTATE, 0);
+	trace_power_start(POWER_CSTATE, 0, smp_processor_id());
 	local_irq_enable();
 	while (!need_resched())
 		cpu_relax();
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 40877d21908..6ce1bb73563 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -29,6 +29,8 @@
 #include <linux/completion.h>
 #include <linux/mutex.h>
 
+#include <trace/events/power.h>
+
 #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_CORE, \
 						"cpufreq-core", msg)
 
@@ -350,6 +352,9 @@ void cpufreq_notify_transition(struct cpufreq_freqs *freqs, unsigned int state)
 
 	case CPUFREQ_POSTCHANGE:
 		adjust_jiffies(CPUFREQ_POSTCHANGE, freqs);
+		dprintk("FREQ: %lu - CPU: %lu", (unsigned long)freqs->new,
+			(unsigned long)freqs->cpu);
+		trace_power_frequency(POWER_PSTATE, freqs->new, freqs->cpu);
 		srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
 				CPUFREQ_POSTCHANGE, freqs);
 		if (likely(policy) && likely(policy->cpu == freqs->cpu))
diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index 199488576a0..dbefe15bd58 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -95,7 +95,7 @@ static void cpuidle_idle_call(void)
 	/* give the governor an opportunity to reflect on the outcome */
 	if (cpuidle_curr_governor->reflect)
 		cpuidle_curr_governor->reflect(dev);
-	trace_power_end(0);
+	trace_power_end(smp_processor_id());
 }
 
 /**
diff --git a/include/trace/events/power.h b/include/trace/events/power.h
index c4efe9b8280..35a2a6e7bf1 100644
--- a/include/trace/events/power.h
+++ b/include/trace/events/power.h
@@ -18,52 +18,55 @@ enum {
 
 DECLARE_EVENT_CLASS(power,
 
-	TP_PROTO(unsigned int type, unsigned int state),
+	TP_PROTO(unsigned int type, unsigned int state, unsigned int cpu_id),
 
-	TP_ARGS(type, state),
+	TP_ARGS(type, state, cpu_id),
 
 	TP_STRUCT__entry(
 		__field(	u64,		type		)
 		__field(	u64,		state		)
+		__field(	u64,		cpu_id		)
 	),
 
 	TP_fast_assign(
 		__entry->type = type;
 		__entry->state = state;
+		__entry->cpu_id = cpu_id;
 	),
 
-	TP_printk("type=%lu state=%lu", (unsigned long)__entry->type, (unsigned long)__entry->state)
+	TP_printk("type=%lu state=%lu cpu_id=%lu", (unsigned long)__entry->type,
+		(unsigned long)__entry->state, (unsigned long)__entry->cpu_id)
 );
 
 DEFINE_EVENT(power, power_start,
 
-	TP_PROTO(unsigned int type, unsigned int state),
+	TP_PROTO(unsigned int type, unsigned int state, unsigned int cpu_id),
 
-	TP_ARGS(type, state)
+	TP_ARGS(type, state, cpu_id)
 );
 
 DEFINE_EVENT(power, power_frequency,
 
-	TP_PROTO(unsigned int type, unsigned int state),
+	TP_PROTO(unsigned int type, unsigned int state, unsigned int cpu_id),
 
-	TP_ARGS(type, state)
+	TP_ARGS(type, state, cpu_id)
 );
 
 TRACE_EVENT(power_end,
 
-	TP_PROTO(int dummy),
+	TP_PROTO(unsigned int cpu_id),
 
-	TP_ARGS(dummy),
+	TP_ARGS(cpu_id),
 
 	TP_STRUCT__entry(
-		__field(	u64,		dummy		)
+		__field(	u64,		cpu_id		)
 	),
 
 	TP_fast_assign(
-		__entry->dummy = 0xffff;
+		__entry->cpu_id = cpu_id;
 	),
 
-	TP_printk("dummy=%lu", (unsigned long)__entry->dummy)
+	TP_printk("cpu_id=%lu", (unsigned long)__entry->cpu_id)
 
 );
 
diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c
index 5a52ed9fc10..5161619d471 100644
--- a/tools/perf/builtin-timechart.c
+++ b/tools/perf/builtin-timechart.c
@@ -300,8 +300,9 @@ struct trace_entry {
 
 struct power_entry {
 	struct trace_entry te;
-	s64	type;
-	s64	value;
+	u64	type;
+	u64	value;
+	u64	cpu_id;
 };
 
 #define TASK_COMM_LEN 16
@@ -498,13 +499,13 @@ static int process_sample_event(event_t *event, struct perf_session *session)
 			return 0;
 
 		if (strcmp(event_str, "power:power_start") == 0)
-			c_state_start(data.cpu, data.time, pe->value);
+			c_state_start(pe->cpu_id, data.time, pe->value);
 
 		if (strcmp(event_str, "power:power_end") == 0)
-			c_state_end(data.cpu, data.time);
+			c_state_end(pe->cpu_id, data.time);
 
 		if (strcmp(event_str, "power:power_frequency") == 0)
-			p_state_change(data.cpu, data.time, pe->value);
+			p_state_change(pe->cpu_id, data.time, pe->value);
 
 		if (strcmp(event_str, "sched:sched_wakeup") == 0)
 			sched_wakeup(data.cpu, data.time, data.pid, te);
-- 
cgit v1.2.3-70-g09d2


From ba4420c224c2808f2661cf8428f43ceef7a73a4a Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Tue, 9 Mar 2010 10:56:52 +1000
Subject: drm: move ttm global code to core drm

I wrote this for the prime sharing work, but I also noticed other external
non-upstream drivers from a large company carrying a similiar patch, so I
may as well ship it in master.

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/Makefile                 |   2 +-
 drivers/gpu/drm/drm_drv.c                |   1 +
 drivers/gpu/drm/drm_global.c             | 112 +++++++++++++++++++++++++++++++
 drivers/gpu/drm/nouveau/nouveau_drv.h    |   2 +-
 drivers/gpu/drm/nouveau/nouveau_ttm.c    |  20 +++---
 drivers/gpu/drm/radeon/radeon.h          |   2 +-
 drivers/gpu/drm/radeon/radeon_ttm.c      |  20 +++---
 drivers/gpu/drm/ttm/Makefile             |   2 +-
 drivers/gpu/drm/ttm/ttm_bo.c             |   4 +-
 drivers/gpu/drm/ttm/ttm_global.c         | 112 -------------------------------
 drivers/gpu/drm/ttm/ttm_module.c         |   4 --
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.h      |   2 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c |  20 +++---
 include/drm/drm.h                        |   2 +
 include/drm/drmP.h                       |   2 +
 include/drm/drm_global.h                 |  53 +++++++++++++++
 include/drm/ttm/ttm_bo_driver.h          |   7 +-
 include/drm/ttm/ttm_module.h             |  20 ------
 18 files changed, 211 insertions(+), 176 deletions(-)
 create mode 100644 drivers/gpu/drm/drm_global.c
 delete mode 100644 drivers/gpu/drm/ttm/ttm_global.c
 create mode 100644 include/drm/drm_global.h

(limited to 'include')

diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index df8f9232286..f3a23a329f4 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -12,7 +12,7 @@ drm-y       :=	drm_auth.o drm_buffer.o drm_bufs.o drm_cache.o \
 		drm_platform.o drm_sysfs.o drm_hashtab.o drm_sman.o drm_mm.o \
 		drm_crtc.o drm_modes.o drm_edid.o \
 		drm_info.o drm_debugfs.o drm_encoder_slave.o \
-		drm_trace_points.o
+		drm_trace_points.o drm_global.o
 
 drm-$(CONFIG_COMPAT) += drm_ioc32.o
 
diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
index b5a51686f49..d5b349d279f 100644
--- a/drivers/gpu/drm/drm_drv.c
+++ b/drivers/gpu/drm/drm_drv.c
@@ -288,6 +288,7 @@ static int __init drm_core_init(void)
 {
 	int ret = -ENOMEM;
 
+	drm_global_init();
 	idr_init(&drm_minors_idr);
 
 	if (register_chrdev(DRM_MAJOR, "drm", &drm_stub_fops))
diff --git a/drivers/gpu/drm/drm_global.c b/drivers/gpu/drm/drm_global.c
new file mode 100644
index 00000000000..c87dc96444d
--- /dev/null
+++ b/drivers/gpu/drm/drm_global.c
@@ -0,0 +1,112 @@
+/**************************************************************************
+ *
+ * Copyright 2008-2009 VMware, Inc., Palo Alto, CA., USA
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+/*
+ * Authors: Thomas Hellstrom <thellstrom-at-vmware-dot-com>
+ */
+
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include "drm_global.h"
+
+struct drm_global_item {
+	struct mutex mutex;
+	void *object;
+	int refcount;
+};
+
+static struct drm_global_item glob[DRM_GLOBAL_NUM];
+
+void drm_global_init(void)
+{
+	int i;
+
+	for (i = 0; i < DRM_GLOBAL_NUM; ++i) {
+		struct drm_global_item *item = &glob[i];
+		mutex_init(&item->mutex);
+		item->object = NULL;
+		item->refcount = 0;
+	}
+}
+
+void drm_global_release(void)
+{
+	int i;
+	for (i = 0; i < DRM_GLOBAL_NUM; ++i) {
+		struct drm_global_item *item = &glob[i];
+		BUG_ON(item->object != NULL);
+		BUG_ON(item->refcount != 0);
+	}
+}
+
+int drm_global_item_ref(struct drm_global_reference *ref)
+{
+	int ret;
+	struct drm_global_item *item = &glob[ref->global_type];
+	void *object;
+
+	mutex_lock(&item->mutex);
+	if (item->refcount == 0) {
+		item->object = kzalloc(ref->size, GFP_KERNEL);
+		if (unlikely(item->object == NULL)) {
+			ret = -ENOMEM;
+			goto out_err;
+		}
+
+		ref->object = item->object;
+		ret = ref->init(ref);
+		if (unlikely(ret != 0))
+			goto out_err;
+
+	}
+	++item->refcount;
+	ref->object = item->object;
+	object = item->object;
+	mutex_unlock(&item->mutex);
+	return 0;
+out_err:
+	mutex_unlock(&item->mutex);
+	item->object = NULL;
+	return ret;
+}
+EXPORT_SYMBOL(drm_global_item_ref);
+
+void drm_global_item_unref(struct drm_global_reference *ref)
+{
+	struct drm_global_item *item = &glob[ref->global_type];
+
+	mutex_lock(&item->mutex);
+	BUG_ON(item->refcount == 0);
+	BUG_ON(ref->object != item->object);
+	if (--item->refcount == 0) {
+		ref->release(ref);
+		item->object = NULL;
+	}
+	mutex_unlock(&item->mutex);
+}
+EXPORT_SYMBOL(drm_global_item_unref);
+
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h
index d0a35d9ba52..e15db15dca7 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
@@ -533,7 +533,7 @@ struct drm_nouveau_private {
 	struct list_head vbl_waiting;
 
 	struct {
-		struct ttm_global_reference mem_global_ref;
+		struct drm_global_reference mem_global_ref;
 		struct ttm_bo_global_ref bo_global_ref;
 		struct ttm_bo_device bdev;
 		spinlock_t bo_list_lock;
diff --git a/drivers/gpu/drm/nouveau/nouveau_ttm.c b/drivers/gpu/drm/nouveau/nouveau_ttm.c
index c385d50f041..bd35f930568 100644
--- a/drivers/gpu/drm/nouveau/nouveau_ttm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_ttm.c
@@ -42,13 +42,13 @@ nouveau_ttm_mmap(struct file *filp, struct vm_area_struct *vma)
 }
 
 static int
-nouveau_ttm_mem_global_init(struct ttm_global_reference *ref)
+nouveau_ttm_mem_global_init(struct drm_global_reference *ref)
 {
 	return ttm_mem_global_init(ref->object);
 }
 
 static void
-nouveau_ttm_mem_global_release(struct ttm_global_reference *ref)
+nouveau_ttm_mem_global_release(struct drm_global_reference *ref)
 {
 	ttm_mem_global_release(ref->object);
 }
@@ -56,16 +56,16 @@ nouveau_ttm_mem_global_release(struct ttm_global_reference *ref)
 int
 nouveau_ttm_global_init(struct drm_nouveau_private *dev_priv)
 {
-	struct ttm_global_reference *global_ref;
+	struct drm_global_reference *global_ref;
 	int ret;
 
 	global_ref = &dev_priv->ttm.mem_global_ref;
-	global_ref->global_type = TTM_GLOBAL_TTM_MEM;
+	global_ref->global_type = DRM_GLOBAL_TTM_MEM;
 	global_ref->size = sizeof(struct ttm_mem_global);
 	global_ref->init = &nouveau_ttm_mem_global_init;
 	global_ref->release = &nouveau_ttm_mem_global_release;
 
-	ret = ttm_global_item_ref(global_ref);
+	ret = drm_global_item_ref(global_ref);
 	if (unlikely(ret != 0)) {
 		DRM_ERROR("Failed setting up TTM memory accounting\n");
 		dev_priv->ttm.mem_global_ref.release = NULL;
@@ -74,15 +74,15 @@ nouveau_ttm_global_init(struct drm_nouveau_private *dev_priv)
 
 	dev_priv->ttm.bo_global_ref.mem_glob = global_ref->object;
 	global_ref = &dev_priv->ttm.bo_global_ref.ref;
-	global_ref->global_type = TTM_GLOBAL_TTM_BO;
+	global_ref->global_type = DRM_GLOBAL_TTM_BO;
 	global_ref->size = sizeof(struct ttm_bo_global);
 	global_ref->init = &ttm_bo_global_init;
 	global_ref->release = &ttm_bo_global_release;
 
-	ret = ttm_global_item_ref(global_ref);
+	ret = drm_global_item_ref(global_ref);
 	if (unlikely(ret != 0)) {
 		DRM_ERROR("Failed setting up TTM BO subsystem\n");
-		ttm_global_item_unref(&dev_priv->ttm.mem_global_ref);
+		drm_global_item_unref(&dev_priv->ttm.mem_global_ref);
 		dev_priv->ttm.mem_global_ref.release = NULL;
 		return ret;
 	}
@@ -96,8 +96,8 @@ nouveau_ttm_global_release(struct drm_nouveau_private *dev_priv)
 	if (dev_priv->ttm.mem_global_ref.release == NULL)
 		return;
 
-	ttm_global_item_unref(&dev_priv->ttm.bo_global_ref.ref);
-	ttm_global_item_unref(&dev_priv->ttm.mem_global_ref);
+	drm_global_item_unref(&dev_priv->ttm.bo_global_ref.ref);
+	drm_global_item_unref(&dev_priv->ttm.mem_global_ref);
 	dev_priv->ttm.mem_global_ref.release = NULL;
 }
 
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 368fecf0c2b..3cd1c470b77 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -235,7 +235,7 @@ struct radeon_surface_reg {
  */
 struct radeon_mman {
 	struct ttm_bo_global_ref        bo_global_ref;
-	struct ttm_global_reference	mem_global_ref;
+	struct drm_global_reference	mem_global_ref;
 	struct ttm_bo_device		bdev;
 	bool				mem_global_referenced;
 	bool				initialized;
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index e9918d88f5b..84c53e41a88 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -59,28 +59,28 @@ static struct radeon_device *radeon_get_rdev(struct ttm_bo_device *bdev)
 /*
  * Global memory.
  */
-static int radeon_ttm_mem_global_init(struct ttm_global_reference *ref)
+static int radeon_ttm_mem_global_init(struct drm_global_reference *ref)
 {
 	return ttm_mem_global_init(ref->object);
 }
 
-static void radeon_ttm_mem_global_release(struct ttm_global_reference *ref)
+static void radeon_ttm_mem_global_release(struct drm_global_reference *ref)
 {
 	ttm_mem_global_release(ref->object);
 }
 
 static int radeon_ttm_global_init(struct radeon_device *rdev)
 {
-	struct ttm_global_reference *global_ref;
+	struct drm_global_reference *global_ref;
 	int r;
 
 	rdev->mman.mem_global_referenced = false;
 	global_ref = &rdev->mman.mem_global_ref;
-	global_ref->global_type = TTM_GLOBAL_TTM_MEM;
+	global_ref->global_type = DRM_GLOBAL_TTM_MEM;
 	global_ref->size = sizeof(struct ttm_mem_global);
 	global_ref->init = &radeon_ttm_mem_global_init;
 	global_ref->release = &radeon_ttm_mem_global_release;
-	r = ttm_global_item_ref(global_ref);
+	r = drm_global_item_ref(global_ref);
 	if (r != 0) {
 		DRM_ERROR("Failed setting up TTM memory accounting "
 			  "subsystem.\n");
@@ -90,14 +90,14 @@ static int radeon_ttm_global_init(struct radeon_device *rdev)
 	rdev->mman.bo_global_ref.mem_glob =
 		rdev->mman.mem_global_ref.object;
 	global_ref = &rdev->mman.bo_global_ref.ref;
-	global_ref->global_type = TTM_GLOBAL_TTM_BO;
+	global_ref->global_type = DRM_GLOBAL_TTM_BO;
 	global_ref->size = sizeof(struct ttm_bo_global);
 	global_ref->init = &ttm_bo_global_init;
 	global_ref->release = &ttm_bo_global_release;
-	r = ttm_global_item_ref(global_ref);
+	r = drm_global_item_ref(global_ref);
 	if (r != 0) {
 		DRM_ERROR("Failed setting up TTM BO subsystem.\n");
-		ttm_global_item_unref(&rdev->mman.mem_global_ref);
+		drm_global_item_unref(&rdev->mman.mem_global_ref);
 		return r;
 	}
 
@@ -108,8 +108,8 @@ static int radeon_ttm_global_init(struct radeon_device *rdev)
 static void radeon_ttm_global_fini(struct radeon_device *rdev)
 {
 	if (rdev->mman.mem_global_referenced) {
-		ttm_global_item_unref(&rdev->mman.bo_global_ref.ref);
-		ttm_global_item_unref(&rdev->mman.mem_global_ref);
+		drm_global_item_unref(&rdev->mman.bo_global_ref.ref);
+		drm_global_item_unref(&rdev->mman.mem_global_ref);
 		rdev->mman.mem_global_referenced = false;
 	}
 }
diff --git a/drivers/gpu/drm/ttm/Makefile b/drivers/gpu/drm/ttm/Makefile
index 4256e200647..b256d4adfaf 100644
--- a/drivers/gpu/drm/ttm/Makefile
+++ b/drivers/gpu/drm/ttm/Makefile
@@ -3,7 +3,7 @@
 
 ccflags-y := -Iinclude/drm
 ttm-y := ttm_agp_backend.o ttm_memory.o ttm_tt.o ttm_bo.o \
-	ttm_bo_util.o ttm_bo_vm.o ttm_module.o ttm_global.o \
+	ttm_bo_util.o ttm_bo_vm.o ttm_module.o \
 	ttm_object.o ttm_lock.o ttm_execbuf_util.o ttm_page_alloc.o
 
 obj-$(CONFIG_DRM_TTM) += ttm.o
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 9763288c6b2..cb4cf7ef4d1 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -1395,7 +1395,7 @@ static void ttm_bo_global_kobj_release(struct kobject *kobj)
 	kfree(glob);
 }
 
-void ttm_bo_global_release(struct ttm_global_reference *ref)
+void ttm_bo_global_release(struct drm_global_reference *ref)
 {
 	struct ttm_bo_global *glob = ref->object;
 
@@ -1404,7 +1404,7 @@ void ttm_bo_global_release(struct ttm_global_reference *ref)
 }
 EXPORT_SYMBOL(ttm_bo_global_release);
 
-int ttm_bo_global_init(struct ttm_global_reference *ref)
+int ttm_bo_global_init(struct drm_global_reference *ref)
 {
 	struct ttm_bo_global_ref *bo_ref =
 		container_of(ref, struct ttm_bo_global_ref, ref);
diff --git a/drivers/gpu/drm/ttm/ttm_global.c b/drivers/gpu/drm/ttm/ttm_global.c
deleted file mode 100644
index b17007178a3..00000000000
--- a/drivers/gpu/drm/ttm/ttm_global.c
+++ /dev/null
@@ -1,112 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2008-2009 VMware, Inc., Palo Alto, CA., USA
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-/*
- * Authors: Thomas Hellstrom <thellstrom-at-vmware-dot-com>
- */
-
-#include "ttm/ttm_module.h"
-#include <linux/mutex.h>
-#include <linux/slab.h>
-#include <linux/module.h>
-
-struct ttm_global_item {
-	struct mutex mutex;
-	void *object;
-	int refcount;
-};
-
-static struct ttm_global_item glob[TTM_GLOBAL_NUM];
-
-void ttm_global_init(void)
-{
-	int i;
-
-	for (i = 0; i < TTM_GLOBAL_NUM; ++i) {
-		struct ttm_global_item *item = &glob[i];
-		mutex_init(&item->mutex);
-		item->object = NULL;
-		item->refcount = 0;
-	}
-}
-
-void ttm_global_release(void)
-{
-	int i;
-	for (i = 0; i < TTM_GLOBAL_NUM; ++i) {
-		struct ttm_global_item *item = &glob[i];
-		BUG_ON(item->object != NULL);
-		BUG_ON(item->refcount != 0);
-	}
-}
-
-int ttm_global_item_ref(struct ttm_global_reference *ref)
-{
-	int ret;
-	struct ttm_global_item *item = &glob[ref->global_type];
-	void *object;
-
-	mutex_lock(&item->mutex);
-	if (item->refcount == 0) {
-		item->object = kzalloc(ref->size, GFP_KERNEL);
-		if (unlikely(item->object == NULL)) {
-			ret = -ENOMEM;
-			goto out_err;
-		}
-
-		ref->object = item->object;
-		ret = ref->init(ref);
-		if (unlikely(ret != 0))
-			goto out_err;
-
-	}
-	++item->refcount;
-	ref->object = item->object;
-	object = item->object;
-	mutex_unlock(&item->mutex);
-	return 0;
-out_err:
-	mutex_unlock(&item->mutex);
-	item->object = NULL;
-	return ret;
-}
-EXPORT_SYMBOL(ttm_global_item_ref);
-
-void ttm_global_item_unref(struct ttm_global_reference *ref)
-{
-	struct ttm_global_item *item = &glob[ref->global_type];
-
-	mutex_lock(&item->mutex);
-	BUG_ON(item->refcount == 0);
-	BUG_ON(ref->object != item->object);
-	if (--item->refcount == 0) {
-		ref->release(ref);
-		item->object = NULL;
-	}
-	mutex_unlock(&item->mutex);
-}
-EXPORT_SYMBOL(ttm_global_item_unref);
-
diff --git a/drivers/gpu/drm/ttm/ttm_module.c b/drivers/gpu/drm/ttm/ttm_module.c
index 9a6edbfeaa9..902d7cf9fb4 100644
--- a/drivers/gpu/drm/ttm/ttm_module.c
+++ b/drivers/gpu/drm/ttm/ttm_module.c
@@ -70,8 +70,6 @@ static int __init ttm_init(void)
 	if (unlikely(ret != 0))
 		return ret;
 
-	ttm_global_init();
-
 	atomic_set(&device_released, 0);
 	ret = drm_class_device_register(&ttm_drm_class_device);
 	if (unlikely(ret != 0))
@@ -81,7 +79,6 @@ static int __init ttm_init(void)
 out_no_dev_reg:
 	atomic_set(&device_released, 1);
 	wake_up_all(&exit_q);
-	ttm_global_release();
 	return ret;
 }
 
@@ -95,7 +92,6 @@ static void __exit ttm_exit(void)
 	 */
 
 	wait_event(exit_q, atomic_read(&device_released) == 1);
-	ttm_global_release();
 }
 
 module_init(ttm_init);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index eaad5209533..429f917b60b 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -164,7 +164,7 @@ struct vmw_vga_topology_state {
 struct vmw_private {
 	struct ttm_bo_device bdev;
 	struct ttm_bo_global_ref bo_global_ref;
-	struct ttm_global_reference mem_global_ref;
+	struct drm_global_reference mem_global_ref;
 
 	struct vmw_fifo_state fifo;
 
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c
index e3df4adfb4d..83123287c60 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c
@@ -44,29 +44,29 @@ int vmw_mmap(struct file *filp, struct vm_area_struct *vma)
 	return ttm_bo_mmap(filp, vma, &dev_priv->bdev);
 }
 
-static int vmw_ttm_mem_global_init(struct ttm_global_reference *ref)
+static int vmw_ttm_mem_global_init(struct drm_global_reference *ref)
 {
 	DRM_INFO("global init.\n");
 	return ttm_mem_global_init(ref->object);
 }
 
-static void vmw_ttm_mem_global_release(struct ttm_global_reference *ref)
+static void vmw_ttm_mem_global_release(struct drm_global_reference *ref)
 {
 	ttm_mem_global_release(ref->object);
 }
 
 int vmw_ttm_global_init(struct vmw_private *dev_priv)
 {
-	struct ttm_global_reference *global_ref;
+	struct drm_global_reference *global_ref;
 	int ret;
 
 	global_ref = &dev_priv->mem_global_ref;
-	global_ref->global_type = TTM_GLOBAL_TTM_MEM;
+	global_ref->global_type = DRM_GLOBAL_TTM_MEM;
 	global_ref->size = sizeof(struct ttm_mem_global);
 	global_ref->init = &vmw_ttm_mem_global_init;
 	global_ref->release = &vmw_ttm_mem_global_release;
 
-	ret = ttm_global_item_ref(global_ref);
+	ret = drm_global_item_ref(global_ref);
 	if (unlikely(ret != 0)) {
 		DRM_ERROR("Failed setting up TTM memory accounting.\n");
 		return ret;
@@ -75,11 +75,11 @@ int vmw_ttm_global_init(struct vmw_private *dev_priv)
 	dev_priv->bo_global_ref.mem_glob =
 		dev_priv->mem_global_ref.object;
 	global_ref = &dev_priv->bo_global_ref.ref;
-	global_ref->global_type = TTM_GLOBAL_TTM_BO;
+	global_ref->global_type = DRM_GLOBAL_TTM_BO;
 	global_ref->size = sizeof(struct ttm_bo_global);
 	global_ref->init = &ttm_bo_global_init;
 	global_ref->release = &ttm_bo_global_release;
-		ret = ttm_global_item_ref(global_ref);
+	ret = drm_global_item_ref(global_ref);
 
 	if (unlikely(ret != 0)) {
 		DRM_ERROR("Failed setting up TTM buffer objects.\n");
@@ -88,12 +88,12 @@ int vmw_ttm_global_init(struct vmw_private *dev_priv)
 
 	return 0;
 out_no_bo:
-	ttm_global_item_unref(&dev_priv->mem_global_ref);
+	drm_global_item_unref(&dev_priv->mem_global_ref);
 	return ret;
 }
 
 void vmw_ttm_global_release(struct vmw_private *dev_priv)
 {
-	ttm_global_item_unref(&dev_priv->bo_global_ref.ref);
-	ttm_global_item_unref(&dev_priv->mem_global_ref);
+	drm_global_item_unref(&dev_priv->bo_global_ref.ref);
+	drm_global_item_unref(&dev_priv->mem_global_ref);
 }
diff --git a/include/drm/drm.h b/include/drm/drm.h
index e3f46e0cb7d..e5f70617dec 100644
--- a/include/drm/drm.h
+++ b/include/drm/drm.h
@@ -663,6 +663,8 @@ struct drm_gem_open {
 #define DRM_IOCTL_UNLOCK		DRM_IOW( 0x2b, struct drm_lock)
 #define DRM_IOCTL_FINISH		DRM_IOW( 0x2c, struct drm_lock)
 
+#define DRM_IOCTL_GEM_PRIME_OPEN        DRM_IOWR(0x2e, struct drm_gem_open)
+
 #define DRM_IOCTL_AGP_ACQUIRE		DRM_IO(  0x30)
 #define DRM_IOCTL_AGP_RELEASE		DRM_IO(  0x31)
 #define DRM_IOCTL_AGP_ENABLE		DRM_IOW( 0x32, struct drm_agp_mode)
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index 04b564bfc4a..53017ba0ab7 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -1453,6 +1453,8 @@ void drm_gem_vm_open(struct vm_area_struct *vma);
 void drm_gem_vm_close(struct vm_area_struct *vma);
 int drm_gem_mmap(struct file *filp, struct vm_area_struct *vma);
 
+#include "drm_global.h"
+
 static inline void
 drm_gem_object_reference(struct drm_gem_object *obj)
 {
diff --git a/include/drm/drm_global.h b/include/drm/drm_global.h
new file mode 100644
index 00000000000..a06805eaf64
--- /dev/null
+++ b/include/drm/drm_global.h
@@ -0,0 +1,53 @@
+/**************************************************************************
+ *
+ * Copyright 2008-2009 VMware, Inc., Palo Alto, CA., USA
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+/*
+ * Authors: Thomas Hellstrom <thellstrom-at-vmware-dot-com>
+ */
+
+#ifndef _DRM_GLOBAL_H_
+#define _DRM_GLOBAL_H_
+enum drm_global_types {
+	DRM_GLOBAL_TTM_MEM = 0,
+	DRM_GLOBAL_TTM_BO,
+	DRM_GLOBAL_TTM_OBJECT,
+	DRM_GLOBAL_NUM
+};
+
+struct drm_global_reference {
+	enum drm_global_types global_type;
+	size_t size;
+	void *object;
+	int (*init) (struct drm_global_reference *);
+	void (*release) (struct drm_global_reference *);
+};
+
+extern void drm_global_init(void);
+extern void drm_global_release(void);
+extern int drm_global_item_ref(struct drm_global_reference *ref);
+extern void drm_global_item_unref(struct drm_global_reference *ref);
+
+#endif
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index 0ea602da43e..b87504235f1 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -34,6 +34,7 @@
 #include "ttm/ttm_memory.h"
 #include "ttm/ttm_module.h"
 #include "drm_mm.h"
+#include "drm_global.h"
 #include "linux/workqueue.h"
 #include "linux/fs.h"
 #include "linux/spinlock.h"
@@ -362,7 +363,7 @@ struct ttm_bo_driver {
  */
 
 struct ttm_bo_global_ref {
-	struct ttm_global_reference ref;
+	struct drm_global_reference ref;
 	struct ttm_mem_global *mem_glob;
 };
 
@@ -687,8 +688,8 @@ extern int ttm_mem_io_reserve(struct ttm_bo_device *bdev,
 extern void ttm_mem_io_free(struct ttm_bo_device *bdev,
 				struct ttm_mem_reg *mem);
 
-extern void ttm_bo_global_release(struct ttm_global_reference *ref);
-extern int ttm_bo_global_init(struct ttm_global_reference *ref);
+extern void ttm_bo_global_release(struct drm_global_reference *ref);
+extern int ttm_bo_global_init(struct drm_global_reference *ref);
 
 extern int ttm_bo_device_release(struct ttm_bo_device *bdev);
 
diff --git a/include/drm/ttm/ttm_module.h b/include/drm/ttm/ttm_module.h
index cf416aee19a..45fa318c158 100644
--- a/include/drm/ttm/ttm_module.h
+++ b/include/drm/ttm/ttm_module.h
@@ -35,26 +35,6 @@
 struct kobject;
 
 #define TTM_PFX "[TTM] "
-
-enum ttm_global_types {
-	TTM_GLOBAL_TTM_MEM = 0,
-	TTM_GLOBAL_TTM_BO,
-	TTM_GLOBAL_TTM_OBJECT,
-	TTM_GLOBAL_NUM
-};
-
-struct ttm_global_reference {
-	enum ttm_global_types global_type;
-	size_t size;
-	void *object;
-	int (*init) (struct ttm_global_reference *);
-	void (*release) (struct ttm_global_reference *);
-};
-
-extern void ttm_global_init(void);
-extern void ttm_global_release(void);
-extern int ttm_global_item_ref(struct ttm_global_reference *ref);
-extern void ttm_global_item_unref(struct ttm_global_reference *ref);
 extern struct kobject *ttm_get_kobj(void);
 
 #endif /* _TTM_MODULE_H_ */
-- 
cgit v1.2.3-70-g09d2


From a931da6ac9331a6c80dd91c199105806f2336188 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Tue, 3 Aug 2010 21:35:12 -0400
Subject: jbd2: Change j_state_lock to be a rwlock_t

Lockstat reports have shown that j_state_lock is a major source of
lock contention, especially on systems with more than 4 CPU cores.  So
change it to be a read/write spinlock.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/inode.c       |  4 +--
 fs/ext4/super.c       |  4 +--
 fs/jbd2/checkpoint.c  | 16 ++++-----
 fs/jbd2/commit.c      | 26 +++++++-------
 fs/jbd2/journal.c     | 94 +++++++++++++++++++++++++--------------------------
 fs/jbd2/transaction.c | 74 +++++++++++++++++++++-------------------
 fs/ocfs2/journal.c    |  4 +--
 include/linux/jbd2.h  |  2 +-
 8 files changed, 114 insertions(+), 110 deletions(-)

(limited to 'include')

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 533b607f9cb..ab2247d642c 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -5066,7 +5066,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
 		transaction_t *transaction;
 		tid_t tid;
 
-		spin_lock(&journal->j_state_lock);
+		read_lock(&journal->j_state_lock);
 		if (journal->j_running_transaction)
 			transaction = journal->j_running_transaction;
 		else
@@ -5075,7 +5075,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
 			tid = transaction->t_tid;
 		else
 			tid = journal->j_commit_sequence;
-		spin_unlock(&journal->j_state_lock);
+		read_unlock(&journal->j_state_lock);
 		ei->i_sync_tid = tid;
 		ei->i_datasync_tid = tid;
 	}
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 3fd65eb66cc..81cb3fc1218 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3232,7 +3232,7 @@ static void ext4_init_journal_params(struct super_block *sb, journal_t *journal)
 	journal->j_min_batch_time = sbi->s_min_batch_time;
 	journal->j_max_batch_time = sbi->s_max_batch_time;
 
-	spin_lock(&journal->j_state_lock);
+	write_lock(&journal->j_state_lock);
 	if (test_opt(sb, BARRIER))
 		journal->j_flags |= JBD2_BARRIER;
 	else
@@ -3241,7 +3241,7 @@ static void ext4_init_journal_params(struct super_block *sb, journal_t *journal)
 		journal->j_flags |= JBD2_ABORT_ON_SYNCDATA_ERR;
 	else
 		journal->j_flags &= ~JBD2_ABORT_ON_SYNCDATA_ERR;
-	spin_unlock(&journal->j_state_lock);
+	write_unlock(&journal->j_state_lock);
 }
 
 static journal_t *ext4_get_journal(struct super_block *sb,
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index f8cdc02520f..1c23a0f4e8a 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -118,13 +118,13 @@ static int __try_to_free_cp_buf(struct journal_head *jh)
 void __jbd2_log_wait_for_space(journal_t *journal)
 {
 	int nblocks, space_left;
-	assert_spin_locked(&journal->j_state_lock);
+	/* assert_spin_locked(&journal->j_state_lock); */
 
 	nblocks = jbd_space_needed(journal);
 	while (__jbd2_log_space_left(journal) < nblocks) {
 		if (journal->j_flags & JBD2_ABORT)
 			return;
-		spin_unlock(&journal->j_state_lock);
+		write_unlock(&journal->j_state_lock);
 		mutex_lock(&journal->j_checkpoint_mutex);
 
 		/*
@@ -138,7 +138,7 @@ void __jbd2_log_wait_for_space(journal_t *journal)
 		 * filesystem, so abort the journal and leave a stack
 		 * trace for forensic evidence.
 		 */
-		spin_lock(&journal->j_state_lock);
+		write_lock(&journal->j_state_lock);
 		spin_lock(&journal->j_list_lock);
 		nblocks = jbd_space_needed(journal);
 		space_left = __jbd2_log_space_left(journal);
@@ -149,7 +149,7 @@ void __jbd2_log_wait_for_space(journal_t *journal)
 			if (journal->j_committing_transaction)
 				tid = journal->j_committing_transaction->t_tid;
 			spin_unlock(&journal->j_list_lock);
-			spin_unlock(&journal->j_state_lock);
+			write_unlock(&journal->j_state_lock);
 			if (chkpt) {
 				jbd2_log_do_checkpoint(journal);
 			} else if (jbd2_cleanup_journal_tail(journal) == 0) {
@@ -167,7 +167,7 @@ void __jbd2_log_wait_for_space(journal_t *journal)
 				WARN_ON(1);
 				jbd2_journal_abort(journal, 0);
 			}
-			spin_lock(&journal->j_state_lock);
+			write_lock(&journal->j_state_lock);
 		} else {
 			spin_unlock(&journal->j_list_lock);
 		}
@@ -474,7 +474,7 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
 	 * next transaction ID we will write, and where it will
 	 * start. */
 
-	spin_lock(&journal->j_state_lock);
+	write_lock(&journal->j_state_lock);
 	spin_lock(&journal->j_list_lock);
 	transaction = journal->j_checkpoint_transactions;
 	if (transaction) {
@@ -496,7 +496,7 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
 	/* If the oldest pinned transaction is at the tail of the log
            already then there's not much we can do right now. */
 	if (journal->j_tail_sequence == first_tid) {
-		spin_unlock(&journal->j_state_lock);
+		write_unlock(&journal->j_state_lock);
 		return 1;
 	}
 
@@ -516,7 +516,7 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
 	journal->j_free += freed;
 	journal->j_tail_sequence = first_tid;
 	journal->j_tail = blocknr;
-	spin_unlock(&journal->j_state_lock);
+	write_unlock(&journal->j_state_lock);
 
 	/*
 	 * If there is an external journal, we need to make sure that
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index fbd2c564e91..67bb0a2f35e 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -152,9 +152,9 @@ static int journal_submit_commit_record(journal_t *journal,
 		printk(KERN_WARNING
 		       "JBD2: Disabling barriers on %s, "
 		       "not supported by device\n", journal->j_devname);
-		spin_lock(&journal->j_state_lock);
+		write_lock(&journal->j_state_lock);
 		journal->j_flags &= ~JBD2_BARRIER;
-		spin_unlock(&journal->j_state_lock);
+		write_unlock(&journal->j_state_lock);
 
 		/* And try again, without the barrier */
 		lock_buffer(bh);
@@ -182,9 +182,9 @@ retry:
 		printk(KERN_WARNING
 		       "JBD2: %s: disabling barries on %s - not supported "
 		       "by device\n", __func__, journal->j_devname);
-		spin_lock(&journal->j_state_lock);
+		write_lock(&journal->j_state_lock);
 		journal->j_flags &= ~JBD2_BARRIER;
-		spin_unlock(&journal->j_state_lock);
+		write_unlock(&journal->j_state_lock);
 
 		lock_buffer(bh);
 		clear_buffer_dirty(bh);
@@ -400,7 +400,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
 	jbd_debug(1, "JBD: starting commit of transaction %d\n",
 			commit_transaction->t_tid);
 
-	spin_lock(&journal->j_state_lock);
+	write_lock(&journal->j_state_lock);
 	commit_transaction->t_state = T_LOCKED;
 
 	/*
@@ -424,9 +424,9 @@ void jbd2_journal_commit_transaction(journal_t *journal)
 					TASK_UNINTERRUPTIBLE);
 		if (atomic_read(&commit_transaction->t_updates)) {
 			spin_unlock(&commit_transaction->t_handle_lock);
-			spin_unlock(&journal->j_state_lock);
+			write_unlock(&journal->j_state_lock);
 			schedule();
-			spin_lock(&journal->j_state_lock);
+			write_lock(&journal->j_state_lock);
 			spin_lock(&commit_transaction->t_handle_lock);
 		}
 		finish_wait(&journal->j_wait_updates, &wait);
@@ -497,7 +497,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
 	start_time = ktime_get();
 	commit_transaction->t_log_start = journal->j_head;
 	wake_up(&journal->j_wait_transaction_locked);
-	spin_unlock(&journal->j_state_lock);
+	write_unlock(&journal->j_state_lock);
 
 	jbd_debug (3, "JBD: commit phase 2\n");
 
@@ -519,9 +519,9 @@ void jbd2_journal_commit_transaction(journal_t *journal)
 	 * transaction!  Now comes the tricky part: we need to write out
 	 * metadata.  Loop over the transaction's entire buffer list:
 	 */
-	spin_lock(&journal->j_state_lock);
+	write_lock(&journal->j_state_lock);
 	commit_transaction->t_state = T_COMMIT;
-	spin_unlock(&journal->j_state_lock);
+	write_unlock(&journal->j_state_lock);
 
 	trace_jbd2_commit_logging(journal, commit_transaction);
 	stats.run.rs_logging = jiffies;
@@ -978,7 +978,7 @@ restart_loop:
 	 * __jbd2_journal_drop_transaction(). Otherwise we could race with
 	 * other checkpointing code processing the transaction...
 	 */
-	spin_lock(&journal->j_state_lock);
+	write_lock(&journal->j_state_lock);
 	spin_lock(&journal->j_list_lock);
 	/*
 	 * Now recheck if some buffers did not get attached to the transaction
@@ -986,7 +986,7 @@ restart_loop:
 	 */
 	if (commit_transaction->t_forget) {
 		spin_unlock(&journal->j_list_lock);
-		spin_unlock(&journal->j_state_lock);
+		write_unlock(&journal->j_state_lock);
 		goto restart_loop;
 	}
 
@@ -1038,7 +1038,7 @@ restart_loop:
 				journal->j_average_commit_time*3) / 4;
 	else
 		journal->j_average_commit_time = commit_time;
-	spin_unlock(&journal->j_state_lock);
+	write_unlock(&journal->j_state_lock);
 
 	if (commit_transaction->t_checkpoint_list == NULL &&
 	    commit_transaction->t_checkpoint_io_list == NULL) {
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index a79d3345b55..e7bf0fd9cec 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -142,7 +142,7 @@ static int kjournald2(void *arg)
 	/*
 	 * And now, wait forever for commit wakeup events.
 	 */
-	spin_lock(&journal->j_state_lock);
+	write_lock(&journal->j_state_lock);
 
 loop:
 	if (journal->j_flags & JBD2_UNMOUNT)
@@ -153,10 +153,10 @@ loop:
 
 	if (journal->j_commit_sequence != journal->j_commit_request) {
 		jbd_debug(1, "OK, requests differ\n");
-		spin_unlock(&journal->j_state_lock);
+		write_unlock(&journal->j_state_lock);
 		del_timer_sync(&journal->j_commit_timer);
 		jbd2_journal_commit_transaction(journal);
-		spin_lock(&journal->j_state_lock);
+		write_lock(&journal->j_state_lock);
 		goto loop;
 	}
 
@@ -168,9 +168,9 @@ loop:
 		 * be already stopped.
 		 */
 		jbd_debug(1, "Now suspending kjournald2\n");
-		spin_unlock(&journal->j_state_lock);
+		write_unlock(&journal->j_state_lock);
 		refrigerator();
-		spin_lock(&journal->j_state_lock);
+		write_lock(&journal->j_state_lock);
 	} else {
 		/*
 		 * We assume on resume that commits are already there,
@@ -190,9 +190,9 @@ loop:
 		if (journal->j_flags & JBD2_UNMOUNT)
 			should_sleep = 0;
 		if (should_sleep) {
-			spin_unlock(&journal->j_state_lock);
+			write_unlock(&journal->j_state_lock);
 			schedule();
-			spin_lock(&journal->j_state_lock);
+			write_lock(&journal->j_state_lock);
 		}
 		finish_wait(&journal->j_wait_commit, &wait);
 	}
@@ -210,7 +210,7 @@ loop:
 	goto loop;
 
 end_loop:
-	spin_unlock(&journal->j_state_lock);
+	write_unlock(&journal->j_state_lock);
 	del_timer_sync(&journal->j_commit_timer);
 	journal->j_task = NULL;
 	wake_up(&journal->j_wait_done_commit);
@@ -233,16 +233,16 @@ static int jbd2_journal_start_thread(journal_t *journal)
 
 static void journal_kill_thread(journal_t *journal)
 {
-	spin_lock(&journal->j_state_lock);
+	write_lock(&journal->j_state_lock);
 	journal->j_flags |= JBD2_UNMOUNT;
 
 	while (journal->j_task) {
 		wake_up(&journal->j_wait_commit);
-		spin_unlock(&journal->j_state_lock);
+		write_unlock(&journal->j_state_lock);
 		wait_event(journal->j_wait_done_commit, journal->j_task == NULL);
-		spin_lock(&journal->j_state_lock);
+		write_lock(&journal->j_state_lock);
 	}
-	spin_unlock(&journal->j_state_lock);
+	write_unlock(&journal->j_state_lock);
 }
 
 /*
@@ -452,7 +452,7 @@ int __jbd2_log_space_left(journal_t *journal)
 {
 	int left = journal->j_free;
 
-	assert_spin_locked(&journal->j_state_lock);
+	/* assert_spin_locked(&journal->j_state_lock); */
 
 	/*
 	 * Be pessimistic here about the number of those free blocks which
@@ -497,9 +497,9 @@ int jbd2_log_start_commit(journal_t *journal, tid_t tid)
 {
 	int ret;
 
-	spin_lock(&journal->j_state_lock);
+	write_lock(&journal->j_state_lock);
 	ret = __jbd2_log_start_commit(journal, tid);
-	spin_unlock(&journal->j_state_lock);
+	write_unlock(&journal->j_state_lock);
 	return ret;
 }
 
@@ -518,7 +518,7 @@ int jbd2_journal_force_commit_nested(journal_t *journal)
 	transaction_t *transaction = NULL;
 	tid_t tid;
 
-	spin_lock(&journal->j_state_lock);
+	read_lock(&journal->j_state_lock);
 	if (journal->j_running_transaction && !current->journal_info) {
 		transaction = journal->j_running_transaction;
 		__jbd2_log_start_commit(journal, transaction->t_tid);
@@ -526,12 +526,12 @@ int jbd2_journal_force_commit_nested(journal_t *journal)
 		transaction = journal->j_committing_transaction;
 
 	if (!transaction) {
-		spin_unlock(&journal->j_state_lock);
+		read_unlock(&journal->j_state_lock);
 		return 0;	/* Nothing to retry */
 	}
 
 	tid = transaction->t_tid;
-	spin_unlock(&journal->j_state_lock);
+	read_unlock(&journal->j_state_lock);
 	jbd2_log_wait_commit(journal, tid);
 	return 1;
 }
@@ -545,7 +545,7 @@ int jbd2_journal_start_commit(journal_t *journal, tid_t *ptid)
 {
 	int ret = 0;
 
-	spin_lock(&journal->j_state_lock);
+	write_lock(&journal->j_state_lock);
 	if (journal->j_running_transaction) {
 		tid_t tid = journal->j_running_transaction->t_tid;
 
@@ -564,7 +564,7 @@ int jbd2_journal_start_commit(journal_t *journal, tid_t *ptid)
 			*ptid = journal->j_committing_transaction->t_tid;
 		ret = 1;
 	}
-	spin_unlock(&journal->j_state_lock);
+	write_unlock(&journal->j_state_lock);
 	return ret;
 }
 
@@ -576,26 +576,24 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid)
 {
 	int err = 0;
 
+	read_lock(&journal->j_state_lock);
 #ifdef CONFIG_JBD2_DEBUG
-	spin_lock(&journal->j_state_lock);
 	if (!tid_geq(journal->j_commit_request, tid)) {
 		printk(KERN_EMERG
 		       "%s: error: j_commit_request=%d, tid=%d\n",
 		       __func__, journal->j_commit_request, tid);
 	}
-	spin_unlock(&journal->j_state_lock);
 #endif
-	spin_lock(&journal->j_state_lock);
 	while (tid_gt(tid, journal->j_commit_sequence)) {
 		jbd_debug(1, "JBD: want %d, j_commit_sequence=%d\n",
 				  tid, journal->j_commit_sequence);
 		wake_up(&journal->j_wait_commit);
-		spin_unlock(&journal->j_state_lock);
+		read_unlock(&journal->j_state_lock);
 		wait_event(journal->j_wait_done_commit,
 				!tid_gt(tid, journal->j_commit_sequence));
-		spin_lock(&journal->j_state_lock);
+		read_lock(&journal->j_state_lock);
 	}
-	spin_unlock(&journal->j_state_lock);
+	read_unlock(&journal->j_state_lock);
 
 	if (unlikely(is_journal_aborted(journal))) {
 		printk(KERN_EMERG "journal commit I/O error\n");
@@ -612,7 +610,7 @@ int jbd2_journal_next_log_block(journal_t *journal, unsigned long long *retp)
 {
 	unsigned long blocknr;
 
-	spin_lock(&journal->j_state_lock);
+	write_lock(&journal->j_state_lock);
 	J_ASSERT(journal->j_free > 1);
 
 	blocknr = journal->j_head;
@@ -620,7 +618,7 @@ int jbd2_journal_next_log_block(journal_t *journal, unsigned long long *retp)
 	journal->j_free--;
 	if (journal->j_head == journal->j_last)
 		journal->j_head = journal->j_first;
-	spin_unlock(&journal->j_state_lock);
+	write_unlock(&journal->j_state_lock);
 	return jbd2_journal_bmap(journal, blocknr, retp);
 }
 
@@ -840,7 +838,7 @@ static journal_t * journal_init_common (void)
 	mutex_init(&journal->j_checkpoint_mutex);
 	spin_lock_init(&journal->j_revoke_lock);
 	spin_lock_init(&journal->j_list_lock);
-	spin_lock_init(&journal->j_state_lock);
+	rwlock_init(&journal->j_state_lock);
 
 	journal->j_commit_interval = (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE);
 	journal->j_min_batch_time = 0;
@@ -1106,14 +1104,14 @@ void jbd2_journal_update_superblock(journal_t *journal, int wait)
 		set_buffer_uptodate(bh);
 	}
 
-	spin_lock(&journal->j_state_lock);
+	read_lock(&journal->j_state_lock);
 	jbd_debug(1,"JBD: updating superblock (start %ld, seq %d, errno %d)\n",
 		  journal->j_tail, journal->j_tail_sequence, journal->j_errno);
 
 	sb->s_sequence = cpu_to_be32(journal->j_tail_sequence);
 	sb->s_start    = cpu_to_be32(journal->j_tail);
 	sb->s_errno    = cpu_to_be32(journal->j_errno);
-	spin_unlock(&journal->j_state_lock);
+	read_unlock(&journal->j_state_lock);
 
 	BUFFER_TRACE(bh, "marking dirty");
 	mark_buffer_dirty(bh);
@@ -1134,12 +1132,12 @@ out:
 	 * any future commit will have to be careful to update the
 	 * superblock again to re-record the true start of the log. */
 
-	spin_lock(&journal->j_state_lock);
+	write_lock(&journal->j_state_lock);
 	if (sb->s_start)
 		journal->j_flags &= ~JBD2_FLUSHED;
 	else
 		journal->j_flags |= JBD2_FLUSHED;
-	spin_unlock(&journal->j_state_lock);
+	write_unlock(&journal->j_state_lock);
 }
 
 /*
@@ -1551,7 +1549,7 @@ int jbd2_journal_flush(journal_t *journal)
 	transaction_t *transaction = NULL;
 	unsigned long old_tail;
 
-	spin_lock(&journal->j_state_lock);
+	write_lock(&journal->j_state_lock);
 
 	/* Force everything buffered to the log... */
 	if (journal->j_running_transaction) {
@@ -1564,10 +1562,10 @@ int jbd2_journal_flush(journal_t *journal)
 	if (transaction) {
 		tid_t tid = transaction->t_tid;
 
-		spin_unlock(&journal->j_state_lock);
+		write_unlock(&journal->j_state_lock);
 		jbd2_log_wait_commit(journal, tid);
 	} else {
-		spin_unlock(&journal->j_state_lock);
+		write_unlock(&journal->j_state_lock);
 	}
 
 	/* ...and flush everything in the log out to disk. */
@@ -1591,12 +1589,12 @@ int jbd2_journal_flush(journal_t *journal)
 	 * the magic code for a fully-recovered superblock.  Any future
 	 * commits of data to the journal will restore the current
 	 * s_start value. */
-	spin_lock(&journal->j_state_lock);
+	write_lock(&journal->j_state_lock);
 	old_tail = journal->j_tail;
 	journal->j_tail = 0;
-	spin_unlock(&journal->j_state_lock);
+	write_unlock(&journal->j_state_lock);
 	jbd2_journal_update_superblock(journal, 1);
-	spin_lock(&journal->j_state_lock);
+	write_lock(&journal->j_state_lock);
 	journal->j_tail = old_tail;
 
 	J_ASSERT(!journal->j_running_transaction);
@@ -1604,7 +1602,7 @@ int jbd2_journal_flush(journal_t *journal)
 	J_ASSERT(!journal->j_checkpoint_transactions);
 	J_ASSERT(journal->j_head == journal->j_tail);
 	J_ASSERT(journal->j_tail_sequence == journal->j_transaction_sequence);
-	spin_unlock(&journal->j_state_lock);
+	write_unlock(&journal->j_state_lock);
 	return 0;
 }
 
@@ -1668,12 +1666,12 @@ void __jbd2_journal_abort_hard(journal_t *journal)
 	printk(KERN_ERR "Aborting journal on device %s.\n",
 	       journal->j_devname);
 
-	spin_lock(&journal->j_state_lock);
+	write_lock(&journal->j_state_lock);
 	journal->j_flags |= JBD2_ABORT;
 	transaction = journal->j_running_transaction;
 	if (transaction)
 		__jbd2_log_start_commit(journal, transaction->t_tid);
-	spin_unlock(&journal->j_state_lock);
+	write_unlock(&journal->j_state_lock);
 }
 
 /* Soft abort: record the abort error status in the journal superblock,
@@ -1758,12 +1756,12 @@ int jbd2_journal_errno(journal_t *journal)
 {
 	int err;
 
-	spin_lock(&journal->j_state_lock);
+	read_lock(&journal->j_state_lock);
 	if (journal->j_flags & JBD2_ABORT)
 		err = -EROFS;
 	else
 		err = journal->j_errno;
-	spin_unlock(&journal->j_state_lock);
+	read_unlock(&journal->j_state_lock);
 	return err;
 }
 
@@ -1778,12 +1776,12 @@ int jbd2_journal_clear_err(journal_t *journal)
 {
 	int err = 0;
 
-	spin_lock(&journal->j_state_lock);
+	write_lock(&journal->j_state_lock);
 	if (journal->j_flags & JBD2_ABORT)
 		err = -EROFS;
 	else
 		journal->j_errno = 0;
-	spin_unlock(&journal->j_state_lock);
+	write_unlock(&journal->j_state_lock);
 	return err;
 }
 
@@ -1796,10 +1794,10 @@ int jbd2_journal_clear_err(journal_t *journal)
  */
 void jbd2_journal_ack_err(journal_t *journal)
 {
-	spin_lock(&journal->j_state_lock);
+	write_lock(&journal->j_state_lock);
 	if (journal->j_errno)
 		journal->j_flags |= JBD2_ACK_ERR;
-	spin_unlock(&journal->j_state_lock);
+	write_unlock(&journal->j_state_lock);
 }
 
 int jbd2_journal_blocks_per_page(struct inode *inode)
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 9c64c7ec48d..663065142b4 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -124,36 +124,38 @@ alloc_transaction:
 
 	jbd_debug(3, "New handle %p going live.\n", handle);
 
-repeat:
-
 	/*
 	 * We need to hold j_state_lock until t_updates has been incremented,
 	 * for proper journal barrier handling
 	 */
-	spin_lock(&journal->j_state_lock);
-repeat_locked:
+repeat:
+	read_lock(&journal->j_state_lock);
 	if (is_journal_aborted(journal) ||
 	    (journal->j_errno != 0 && !(journal->j_flags & JBD2_ACK_ERR))) {
-		spin_unlock(&journal->j_state_lock);
+		read_unlock(&journal->j_state_lock);
 		kfree(new_transaction);
 		return -EROFS;
 	}
 
 	/* Wait on the journal's transaction barrier if necessary */
 	if (journal->j_barrier_count) {
-		spin_unlock(&journal->j_state_lock);
+		read_unlock(&journal->j_state_lock);
 		wait_event(journal->j_wait_transaction_locked,
 				journal->j_barrier_count == 0);
 		goto repeat;
 	}
 
 	if (!journal->j_running_transaction) {
-		if (!new_transaction) {
-			spin_unlock(&journal->j_state_lock);
+		read_unlock(&journal->j_state_lock);
+		if (!new_transaction)
 			goto alloc_transaction;
+		write_lock(&journal->j_state_lock);
+		if (!journal->j_running_transaction) {
+			jbd2_get_transaction(journal, new_transaction);
+			new_transaction = NULL;
 		}
-		jbd2_get_transaction(journal, new_transaction);
-		new_transaction = NULL;
+		write_unlock(&journal->j_state_lock);
+		goto repeat;
 	}
 
 	transaction = journal->j_running_transaction;
@@ -167,7 +169,7 @@ repeat_locked:
 
 		prepare_to_wait(&journal->j_wait_transaction_locked,
 					&wait, TASK_UNINTERRUPTIBLE);
-		spin_unlock(&journal->j_state_lock);
+		read_unlock(&journal->j_state_lock);
 		schedule();
 		finish_wait(&journal->j_wait_transaction_locked, &wait);
 		goto repeat;
@@ -194,7 +196,7 @@ repeat_locked:
 		prepare_to_wait(&journal->j_wait_transaction_locked, &wait,
 				TASK_UNINTERRUPTIBLE);
 		__jbd2_log_start_commit(journal, transaction->t_tid);
-		spin_unlock(&journal->j_state_lock);
+		read_unlock(&journal->j_state_lock);
 		schedule();
 		finish_wait(&journal->j_wait_transaction_locked, &wait);
 		goto repeat;
@@ -228,8 +230,12 @@ repeat_locked:
 	if (__jbd2_log_space_left(journal) < jbd_space_needed(journal)) {
 		jbd_debug(2, "Handle %p waiting for checkpoint...\n", handle);
 		spin_unlock(&transaction->t_handle_lock);
-		__jbd2_log_wait_for_space(journal);
-		goto repeat_locked;
+		read_unlock(&journal->j_state_lock);
+		write_lock(&journal->j_state_lock);
+		if (__jbd2_log_space_left(journal) < jbd_space_needed(journal))
+			__jbd2_log_wait_for_space(journal);
+		write_unlock(&journal->j_state_lock);
+		goto repeat;
 	}
 
 	/* OK, account for the buffers that this operation expects to
@@ -250,7 +256,7 @@ repeat_locked:
 		  atomic_read(&transaction->t_outstanding_credits),
 		  __jbd2_log_space_left(journal));
 	spin_unlock(&transaction->t_handle_lock);
-	spin_unlock(&journal->j_state_lock);
+	read_unlock(&journal->j_state_lock);
 
 	lock_map_acquire(&handle->h_lockdep_map);
 	kfree(new_transaction);
@@ -362,7 +368,7 @@ int jbd2_journal_extend(handle_t *handle, int nblocks)
 
 	result = 1;
 
-	spin_lock(&journal->j_state_lock);
+	read_lock(&journal->j_state_lock);
 
 	/* Don't extend a locked-down transaction! */
 	if (handle->h_transaction->t_state != T_RUNNING) {
@@ -394,7 +400,7 @@ int jbd2_journal_extend(handle_t *handle, int nblocks)
 unlock:
 	spin_unlock(&transaction->t_handle_lock);
 error_out:
-	spin_unlock(&journal->j_state_lock);
+	read_unlock(&journal->j_state_lock);
 out:
 	return result;
 }
@@ -432,7 +438,7 @@ int jbd2__journal_restart(handle_t *handle, int nblocks, int gfp_mask)
 	J_ASSERT(atomic_read(&transaction->t_updates) > 0);
 	J_ASSERT(journal_current_handle() == handle);
 
-	spin_lock(&journal->j_state_lock);
+	read_lock(&journal->j_state_lock);
 	spin_lock(&transaction->t_handle_lock);
 	atomic_sub(handle->h_buffer_credits,
 		   &transaction->t_outstanding_credits);
@@ -442,7 +448,7 @@ int jbd2__journal_restart(handle_t *handle, int nblocks, int gfp_mask)
 
 	jbd_debug(2, "restarting handle %p\n", handle);
 	__jbd2_log_start_commit(journal, transaction->t_tid);
-	spin_unlock(&journal->j_state_lock);
+	read_unlock(&journal->j_state_lock);
 
 	lock_map_release(&handle->h_lockdep_map);
 	handle->h_buffer_credits = nblocks;
@@ -472,7 +478,7 @@ void jbd2_journal_lock_updates(journal_t *journal)
 {
 	DEFINE_WAIT(wait);
 
-	spin_lock(&journal->j_state_lock);
+	write_lock(&journal->j_state_lock);
 	++journal->j_barrier_count;
 
 	/* Wait until there are no running updates */
@@ -490,12 +496,12 @@ void jbd2_journal_lock_updates(journal_t *journal)
 		prepare_to_wait(&journal->j_wait_updates, &wait,
 				TASK_UNINTERRUPTIBLE);
 		spin_unlock(&transaction->t_handle_lock);
-		spin_unlock(&journal->j_state_lock);
+		write_unlock(&journal->j_state_lock);
 		schedule();
 		finish_wait(&journal->j_wait_updates, &wait);
-		spin_lock(&journal->j_state_lock);
+		write_lock(&journal->j_state_lock);
 	}
-	spin_unlock(&journal->j_state_lock);
+	write_unlock(&journal->j_state_lock);
 
 	/*
 	 * We have now established a barrier against other normal updates, but
@@ -519,9 +525,9 @@ void jbd2_journal_unlock_updates (journal_t *journal)
 	J_ASSERT(journal->j_barrier_count != 0);
 
 	mutex_unlock(&journal->j_barrier);
-	spin_lock(&journal->j_state_lock);
+	write_lock(&journal->j_state_lock);
 	--journal->j_barrier_count;
-	spin_unlock(&journal->j_state_lock);
+	write_unlock(&journal->j_state_lock);
 	wake_up(&journal->j_wait_transaction_locked);
 }
 
@@ -1314,9 +1320,9 @@ int jbd2_journal_stop(handle_t *handle)
 
 		journal->j_last_sync_writer = pid;
 
-		spin_lock(&journal->j_state_lock);
+		read_lock(&journal->j_state_lock);
 		commit_time = journal->j_average_commit_time;
-		spin_unlock(&journal->j_state_lock);
+		read_unlock(&journal->j_state_lock);
 
 		trans_time = ktime_to_ns(ktime_sub(ktime_get(),
 						   transaction->t_start_time));
@@ -1748,7 +1754,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
 		goto zap_buffer_unlocked;
 
 	/* OK, we have data buffer in journaled mode */
-	spin_lock(&journal->j_state_lock);
+	write_lock(&journal->j_state_lock);
 	jbd_lock_bh_state(bh);
 	spin_lock(&journal->j_list_lock);
 
@@ -1801,7 +1807,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
 			jbd2_journal_put_journal_head(jh);
 			spin_unlock(&journal->j_list_lock);
 			jbd_unlock_bh_state(bh);
-			spin_unlock(&journal->j_state_lock);
+			write_unlock(&journal->j_state_lock);
 			return ret;
 		} else {
 			/* There is no currently-running transaction. So the
@@ -1815,7 +1821,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
 				jbd2_journal_put_journal_head(jh);
 				spin_unlock(&journal->j_list_lock);
 				jbd_unlock_bh_state(bh);
-				spin_unlock(&journal->j_state_lock);
+				write_unlock(&journal->j_state_lock);
 				return ret;
 			} else {
 				/* The orphan record's transaction has
@@ -1839,7 +1845,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
 		jbd2_journal_put_journal_head(jh);
 		spin_unlock(&journal->j_list_lock);
 		jbd_unlock_bh_state(bh);
-		spin_unlock(&journal->j_state_lock);
+		write_unlock(&journal->j_state_lock);
 		return 0;
 	} else {
 		/* Good, the buffer belongs to the running transaction.
@@ -1858,7 +1864,7 @@ zap_buffer:
 zap_buffer_no_jh:
 	spin_unlock(&journal->j_list_lock);
 	jbd_unlock_bh_state(bh);
-	spin_unlock(&journal->j_state_lock);
+	write_unlock(&journal->j_state_lock);
 zap_buffer_unlocked:
 	clear_buffer_dirty(bh);
 	J_ASSERT_BH(bh, !buffer_jbddirty(bh));
@@ -2165,9 +2171,9 @@ int jbd2_journal_begin_ordered_truncate(journal_t *journal,
 	/* Locks are here just to force reading of recent values, it is
 	 * enough that the transaction was not committing before we started
 	 * a transaction adding the inode to orphan list */
-	spin_lock(&journal->j_state_lock);
+	read_lock(&journal->j_state_lock);
 	commit_trans = journal->j_committing_transaction;
-	spin_unlock(&journal->j_state_lock);
+	read_unlock(&journal->j_state_lock);
 	spin_lock(&journal->j_list_lock);
 	inode_trans = jinode->i_transaction;
 	spin_unlock(&journal->j_list_lock);
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 47878cf1641..9c1b92ebeb9 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -760,13 +760,13 @@ void ocfs2_set_journal_params(struct ocfs2_super *osb)
 	if (osb->osb_commit_interval)
 		commit_interval = osb->osb_commit_interval;
 
-	spin_lock(&journal->j_state_lock);
+	write_lock(&journal->j_state_lock);
 	journal->j_commit_interval = commit_interval;
 	if (osb->s_mount_opt & OCFS2_MOUNT_BARRIER)
 		journal->j_flags |= JBD2_BARRIER;
 	else
 		journal->j_flags &= ~JBD2_BARRIER;
-	spin_unlock(&journal->j_state_lock);
+	write_unlock(&journal->j_state_lock);
 }
 
 int ocfs2_journal_init(struct ocfs2_journal *journal, int *dirty)
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index a72ce21de0e..15d5743ccfb 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -764,7 +764,7 @@ struct journal_s
 	/*
 	 * Protect the various scalars in the journal
 	 */
-	spinlock_t		j_state_lock;
+	rwlock_t		j_state_lock;
 
 	/*
 	 * Number of processes waiting to create a barrier lock [j_state_lock]
-- 
cgit v1.2.3-70-g09d2


From 8dd420466c7bfc459fa04680bd5690bfc41a4553 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Tue, 3 Aug 2010 21:38:29 -0400
Subject: jbd2: Remove t_handle_lock from start_this_handle()

This should remove the last exclusive lock from start_this_handle(),
so that we should now be able to start multiple transactions at the
same time on large SMP systems.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/jbd2/commit.c      |  3 ++-
 fs/jbd2/transaction.c | 33 ++++++++++++++++++++++-----------
 include/linux/jbd2.h  |  2 +-
 3 files changed, 25 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 67bb0a2f35e..f52e5e8049f 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -1004,7 +1004,8 @@ restart_loop:
 	 * File the transaction statistics
 	 */
 	stats.ts_tid = commit_transaction->t_tid;
-	stats.run.rs_handle_count = commit_transaction->t_handle_count;
+	stats.run.rs_handle_count =
+		atomic_read(&commit_transaction->t_handle_count);
 	trace_jbd2_run_stats(journal->j_fs_dev->bd_dev,
 			     commit_transaction->t_tid, &stats.run);
 
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 663065142b4..0752bcda535 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -57,6 +57,7 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction)
 	spin_lock_init(&transaction->t_handle_lock);
 	atomic_set(&transaction->t_updates, 0);
 	atomic_set(&transaction->t_outstanding_credits, 0);
+	atomic_set(&transaction->t_handle_count, 0);
 	INIT_LIST_HEAD(&transaction->t_inode_list);
 	INIT_LIST_HEAD(&transaction->t_private_list);
 
@@ -180,8 +181,8 @@ repeat:
 	 * buffers requested by this operation, we need to stall pending a log
 	 * checkpoint to free some more log space.
 	 */
-	spin_lock(&transaction->t_handle_lock);
-	needed = atomic_read(&transaction->t_outstanding_credits) + nblocks;
+	needed = atomic_add_return(nblocks,
+				   &transaction->t_outstanding_credits);
 
 	if (needed > journal->j_max_transaction_buffers) {
 		/*
@@ -192,7 +193,7 @@ repeat:
 		DEFINE_WAIT(wait);
 
 		jbd_debug(2, "Handle %p starting new commit...\n", handle);
-		spin_unlock(&transaction->t_handle_lock);
+		atomic_sub(nblocks, &transaction->t_outstanding_credits);
 		prepare_to_wait(&journal->j_wait_transaction_locked, &wait,
 				TASK_UNINTERRUPTIBLE);
 		__jbd2_log_start_commit(journal, transaction->t_tid);
@@ -229,7 +230,7 @@ repeat:
 	 */
 	if (__jbd2_log_space_left(journal) < jbd_space_needed(journal)) {
 		jbd_debug(2, "Handle %p waiting for checkpoint...\n", handle);
-		spin_unlock(&transaction->t_handle_lock);
+		atomic_sub(nblocks, &transaction->t_outstanding_credits);
 		read_unlock(&journal->j_state_lock);
 		write_lock(&journal->j_state_lock);
 		if (__jbd2_log_space_left(journal) < jbd_space_needed(journal))
@@ -239,23 +240,33 @@ repeat:
 	}
 
 	/* OK, account for the buffers that this operation expects to
-	 * use and add the handle to the running transaction. */
-
-	if (time_after(transaction->t_start, ts)) {
+	 * use and add the handle to the running transaction. 
+	 *
+	 * In order for t_max_wait to be reliable, it must be
+	 * protected by a lock.  But doing so will mean that
+	 * start_this_handle() can not be run in parallel on SMP
+	 * systems, which limits our scalability.  So we only enable
+	 * it when debugging is enabled.  We may want to use a
+	 * separate flag, eventually, so we can enable this
+	 * independently of debugging.
+	 */
+#ifdef CONFIG_JBD2_DEBUG
+	if (jbd2_journal_enable_debug &&
+	    time_after(transaction->t_start, ts)) {
 		ts = jbd2_time_diff(ts, transaction->t_start);
+		spin_lock(&transaction->t_handle_lock);
 		if (ts > transaction->t_max_wait)
 			transaction->t_max_wait = ts;
+		spin_unlock(&transaction->t_handle_lock);
 	}
-
+#endif
 	handle->h_transaction = transaction;
-	atomic_add(nblocks, &transaction->t_outstanding_credits);
 	atomic_inc(&transaction->t_updates);
-	transaction->t_handle_count++;
+	atomic_inc(&transaction->t_handle_count);
 	jbd_debug(4, "Handle %p given %d credits (total %d, free %d)\n",
 		  handle, nblocks,
 		  atomic_read(&transaction->t_outstanding_credits),
 		  __jbd2_log_space_left(journal));
-	spin_unlock(&transaction->t_handle_lock);
 	read_unlock(&journal->j_state_lock);
 
 	lock_map_acquire(&handle->h_lockdep_map);
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 15d5743ccfb..01743b5446f 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -629,7 +629,7 @@ struct transaction_s
 	/*
 	 * How many handles used this transaction? [t_handle_lock]
 	 */
-	int t_handle_count;
+	atomic_t		t_handle_count;
 
 	/*
 	 * This transaction is being forced and some process is
-- 
cgit v1.2.3-70-g09d2


From f1f88fc7e818c6678c6799a2edb8f1aeccc124aa Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sat, 31 Jul 2010 14:29:07 -0400
Subject: SUNRPC: The function rpc_restart_call() should return success/failure

Both rpc_restart_call_prepare() and rpc_restart_call() test for the
RPC_TASK_KILLED flag, and fail to restart the RPC call if that flag is set.

This patch allows callers to know whether or not the restart was
successful, so that they can perform cleanups etc in case of failure.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/clnt.h |  4 ++--
 net/sunrpc/clnt.c           | 11 ++++++-----
 2 files changed, 8 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 8ed9642a5a7..debe7553219 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -148,8 +148,8 @@ int		rpc_call_sync(struct rpc_clnt *clnt,
 			      const struct rpc_message *msg, int flags);
 struct rpc_task *rpc_call_null(struct rpc_clnt *clnt, struct rpc_cred *cred,
 			       int flags);
-void		rpc_restart_call_prepare(struct rpc_task *);
-void		rpc_restart_call(struct rpc_task *);
+int		rpc_restart_call_prepare(struct rpc_task *);
+int		rpc_restart_call(struct rpc_task *);
 void		rpc_setbufsize(struct rpc_clnt *, unsigned int, unsigned int);
 size_t		rpc_max_payload(struct rpc_clnt *);
 void		rpc_force_rebind(struct rpc_clnt *);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 756fc324db9..234c40c15f6 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -756,12 +756,13 @@ EXPORT_SYMBOL_GPL(rpc_force_rebind);
  * Restart an (async) RPC call from the call_prepare state.
  * Usually called from within the exit handler.
  */
-void
+int
 rpc_restart_call_prepare(struct rpc_task *task)
 {
 	if (RPC_ASSASSINATED(task))
-		return;
+		return 0;
 	task->tk_action = rpc_prepare_task;
+	return 1;
 }
 EXPORT_SYMBOL_GPL(rpc_restart_call_prepare);
 
@@ -769,13 +770,13 @@ EXPORT_SYMBOL_GPL(rpc_restart_call_prepare);
  * Restart an (async) RPC call. Usually called from within the
  * exit handler.
  */
-void
+int
 rpc_restart_call(struct rpc_task *task)
 {
 	if (RPC_ASSASSINATED(task))
-		return;
-
+		return 0;
 	task->tk_action = call_start;
+	return 1;
 }
 EXPORT_SYMBOL_GPL(rpc_restart_call);
 
-- 
cgit v1.2.3-70-g09d2


From e1b004c3ef9c59db5f013528628b51c8653155ec Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 4 Aug 2010 10:53:00 +0200
Subject: Revert "timer: Added usleep[_range] timer"

This reverts commit 22b8f15c2f7130bb0386f548428df2ffd4e81903 to merge
an advanced version.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/delay.h |  6 ------
 kernel/timer.c        | 22 ----------------------
 2 files changed, 28 deletions(-)

(limited to 'include')

diff --git a/include/linux/delay.h b/include/linux/delay.h
index 0e303d1aacd..fd832c6d419 100644
--- a/include/linux/delay.h
+++ b/include/linux/delay.h
@@ -45,12 +45,6 @@ extern unsigned long lpj_fine;
 void calibrate_delay(void);
 void msleep(unsigned int msecs);
 unsigned long msleep_interruptible(unsigned int msecs);
-void usleep_range(unsigned long min, unsigned long max);
-
-static inline void usleep(unsigned long usecs)
-{
-	usleep_range(usecs, usecs);
-}
 
 static inline void ssleep(unsigned int seconds)
 {
diff --git a/kernel/timer.c b/kernel/timer.c
index f110f241ab6..ce98685cd1c 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1755,25 +1755,3 @@ unsigned long msleep_interruptible(unsigned int msecs)
 }
 
 EXPORT_SYMBOL(msleep_interruptible);
-
-static int __sched do_usleep_range(unsigned long min, unsigned long max)
-{
-	ktime_t kmin;
-	unsigned long delta;
-
-	kmin = ktime_set(0, min * NSEC_PER_USEC);
-	delta = max - min;
-	return schedule_hrtimeout_range(&kmin, delta, HRTIMER_MODE_REL);
-}
-
-/**
- * usleep_range - Drop in replacement for udelay where wakeup is flexible
- * @min: Minimum time in usecs to sleep
- * @max: Maximum time in usecs to sleep
- */
-void usleep_range(unsigned long min, unsigned long max)
-{
-	__set_current_state(TASK_UNINTERRUPTIBLE);
-	do_usleep_range(min, max);
-}
-EXPORT_SYMBOL(usleep_range);
-- 
cgit v1.2.3-70-g09d2


From 5e7f5a178bba45c5aca3448fddecabd4e28f1f6b Mon Sep 17 00:00:00 2001
From: Patrick Pannuto <ppannuto@codeaurora.org>
Date: Mon, 2 Aug 2010 15:01:04 -0700
Subject: timer: Added usleep_range timer

usleep_range is a finer precision implementations of msleep
and is designed to be a drop-in replacement for udelay where
a precise sleep / busy-wait is unnecessary.

Since an easy interface to hrtimers could lead to an undesired
proliferation of interrupts, we provide only a "range" API,
forcing the caller to think about an acceptable tolerance on
both ends and hopefully avoiding introducing another interrupt.

INTRO

As discussed here ( http://lkml.org/lkml/2007/8/3/250 ), msleep(1) is not
precise enough for many drivers (yes, sleep precision is an unfair notion,
but consistently sleeping for ~an order of magnitude greater than requested
is worth fixing). This patch adds a usleep API so that udelay does not have
to be used. Obviously not every udelay can be replaced (those in atomic
contexts or being used for simple bitbanging come to mind), but there are
many, many examples of

mydriver_write(...)
/* Wait for hardware to latch */
udelay(100)

in various drivers where a busy-wait loop is neither beneficial nor
necessary, but msleep simply does not provide enough precision and people
are using a busy-wait loop instead.

CONCERNS FROM THE RFC

Why is udelay a problem / necessary? Most callers of udelay are in device/
driver initialization code, which is serial...

	As I see it, there is only benefit to sleeping over a delay; the
	notion of "refactoring" areas that use udelay was presented, but
	I see usleep as the refactoring. Consider i2c, if the bus is busy,
	you need to wait a bit (say 100us) before trying again, your
	current options are:

		* udelay(100)
		* msleep(1) <-- As noted above, actually as high as ~20ms
				on some platforms, so not really an option
		* Manually set up an hrtimer to try again in 100us (which
		  is what usleep does anyway...)

	People choose the udelay route because it is EASY; we need to
	provide a better easy route.

	Device / driver / boot code is *currently* serial, but every few
	months someone makes noise about parallelizing boot, and IMHO, a
	little forward-thinking now is one less thing to worry about
	if/when that ever happens

udelay's could be preempted

	Sure, but if udelay plans on looping 1000 times, and it gets
	preempted on loop 200, whenever it's scheduled again, it is
	going to do the next 800 loops.

Is the interruptible case needed?

	Probably not, but I see usleep as a very logical parallel to msleep,
	so it made sense to include the "full" API. Processors are getting
	faster (albeit not as quickly as they are becoming more parallel),
	so if someone wanted to be interruptible for a few usecs, why not
	let them? If this is a contentious point, I'm happy to remove it.

OTHER THOUGHTS

I believe there is also value in exposing the usleep_range option; it gives
the scheduler a lot more flexibility and allows the programmer to express
his intent much more clearly; it's something I would hope future driver
writers will take advantage of.

To get the results in the NUMBERS section below, I literally s/udelay/usleep
the kernel tree; I had to go in and undo the changes to the USB drivers, but
everything else booted successfully; I find that extremely telling in and
of itself -- many people are using a delay API where a sleep will suit them
just fine.

SOME ATTEMPTS AT NUMBERS

It turns out that calculating quantifiable benefit on this is challenging,
so instead I will simply present the current state of things, and I hope
this to be sufficient:

How many udelay calls are there in 2.6.35-rc5?

	udealy(ARG) >=	| COUNT
	1000		| 319
	500		| 414
	100		| 1146
	20		| 1832

I am working on Android, so that is my focus for this. The following table
is a modified usleep that simply printk's the amount of time requested to
sleep; these tests were run on a kernel with udelay >= 20 --> usleep

"boot" is power-on to lock screen
"power collapse" is when the power button is pushed and the device suspends
"resume" is when the power button is pushed and the lock screen is displayed
         (no touchscreen events or anything, just turning on the display)
"use device" is from the unlock swipe to clicking around a bit; there is no
	sd card in this phone, so fail loading music, video, camera

	ACTION		| TOTAL NUMBER OF USLEEP CALLS	| NET TIME (us)
	boot		| 22				| 1250
	power-collapse	| 9				| 1200
	resume		| 5				| 500
	use device	| 59				| 7700

The most interesting category to me is the "use device" field; 7700us of
busy-wait time that could be put towards better responsiveness, or at the
least less power usage.

Signed-off-by: Patrick Pannuto <ppannuto@codeaurora.org>
Cc: apw@canonical.com
Cc: corbet@lwn.net
Cc: arjan@linux.intel.com
Cc: Randy Dunlap <rdunlap@xenotime.net>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/delay.h |  1 +
 kernel/timer.c        | 22 ++++++++++++++++++++++
 2 files changed, 23 insertions(+)

(limited to 'include')

diff --git a/include/linux/delay.h b/include/linux/delay.h
index fd832c6d419..a6ecb34cf54 100644
--- a/include/linux/delay.h
+++ b/include/linux/delay.h
@@ -45,6 +45,7 @@ extern unsigned long lpj_fine;
 void calibrate_delay(void);
 void msleep(unsigned int msecs);
 unsigned long msleep_interruptible(unsigned int msecs);
+void usleep_range(unsigned long min, unsigned long max);
 
 static inline void ssleep(unsigned int seconds)
 {
diff --git a/kernel/timer.c b/kernel/timer.c
index ce98685cd1c..723a62e86dc 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1755,3 +1755,25 @@ unsigned long msleep_interruptible(unsigned int msecs)
 }
 
 EXPORT_SYMBOL(msleep_interruptible);
+
+static int __sched do_usleep_range(unsigned long min, unsigned long max)
+{
+	ktime_t kmin;
+	unsigned long delta;
+
+	kmin = ktime_set(0, min * NSEC_PER_USEC);
+	delta = (max - min) * NSEC_PER_USEC;
+	return schedule_hrtimeout_range(&kmin, delta, HRTIMER_MODE_REL);
+}
+
+/**
+ * usleep_range - Drop in replacement for udelay where wakeup is flexible
+ * @min: Minimum time in usecs to sleep
+ * @max: Maximum time in usecs to sleep
+ */
+void usleep_range(unsigned long min, unsigned long max)
+{
+	__set_current_state(TASK_UNINTERRUPTIBLE);
+	do_usleep_range(min, max);
+}
+EXPORT_SYMBOL(usleep_range);
-- 
cgit v1.2.3-70-g09d2


From 5d8d9a4d9ff74c55901642b4e2ac5124830ddafe Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sat, 31 Jul 2010 14:29:07 -0400
Subject: NFS: Ensure the AUTH_UNIX credcache is allocated dynamically

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/auth.h |  7 ++++---
 net/sunrpc/auth.c           | 19 ++++++++++++++++---
 net/sunrpc/auth_generic.c   | 12 +++---------
 net/sunrpc/auth_unix.c      | 15 +++++++--------
 net/sunrpc/sunrpc_syms.c    | 15 ++++++++++-----
 5 files changed, 40 insertions(+), 28 deletions(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h
index 87d7ec0bf77..784e78c73ec 100644
--- a/include/linux/sunrpc/auth.h
+++ b/include/linux/sunrpc/auth.h
@@ -125,11 +125,12 @@ struct rpc_credops {
 extern const struct rpc_authops	authunix_ops;
 extern const struct rpc_authops	authnull_ops;
 
-void __init		rpc_init_authunix(void);
-void __init		rpc_init_generic_auth(void);
-void __init		rpcauth_init_module(void);
+int __init		rpc_init_authunix(void);
+int __init		rpc_init_generic_auth(void);
+int __init		rpcauth_init_module(void);
 void __exit		rpcauth_remove_module(void);
 void __exit		rpc_destroy_generic_auth(void);
+void 			rpc_destroy_authunix(void);
 
 struct rpc_cred *	rpc_lookup_cred(void);
 struct rpc_cred *	rpc_lookup_machine_cred(void);
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index 73affb8624f..db135543d21 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -587,14 +587,27 @@ static struct shrinker rpc_cred_shrinker = {
 	.seeks = DEFAULT_SEEKS,
 };
 
-void __init rpcauth_init_module(void)
+int __init rpcauth_init_module(void)
 {
-	rpc_init_authunix();
-	rpc_init_generic_auth();
+	int err;
+
+	err = rpc_init_authunix();
+	if (err < 0)
+		goto out1;
+	err = rpc_init_generic_auth();
+	if (err < 0)
+		goto out2;
 	register_shrinker(&rpc_cred_shrinker);
+	return 0;
+out2:
+	rpc_destroy_authunix();
+out1:
+	return err;
 }
 
 void __exit rpcauth_remove_module(void)
 {
+	rpc_destroy_authunix();
+	rpc_destroy_generic_auth();
 	unregister_shrinker(&rpc_cred_shrinker);
 }
diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c
index 8f623b0f03d..8bae33b36cc 100644
--- a/net/sunrpc/auth_generic.c
+++ b/net/sunrpc/auth_generic.c
@@ -27,7 +27,6 @@ struct generic_cred {
 };
 
 static struct rpc_auth generic_auth;
-static struct rpc_cred_cache generic_cred_cache;
 static const struct rpc_credops generic_credops;
 
 /*
@@ -159,20 +158,16 @@ out_nomatch:
 	return 0;
 }
 
-void __init rpc_init_generic_auth(void)
+int __init rpc_init_generic_auth(void)
 {
-	spin_lock_init(&generic_cred_cache.lock);
+	return rpcauth_init_credcache(&generic_auth);
 }
 
 void __exit rpc_destroy_generic_auth(void)
 {
-	rpcauth_clear_credcache(&generic_cred_cache);
+	rpcauth_destroy_credcache(&generic_auth);
 }
 
-static struct rpc_cred_cache generic_cred_cache = {
-	{{ NULL, },},
-};
-
 static const struct rpc_authops generic_auth_ops = {
 	.owner = THIS_MODULE,
 	.au_name = "Generic",
@@ -183,7 +178,6 @@ static const struct rpc_authops generic_auth_ops = {
 static struct rpc_auth generic_auth = {
 	.au_ops = &generic_auth_ops,
 	.au_count = ATOMIC_INIT(0),
-	.au_credcache = &generic_cred_cache,
 };
 
 static const struct rpc_credops generic_credops = {
diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c
index aac2f8b4ee2..d5e37dbf207 100644
--- a/net/sunrpc/auth_unix.c
+++ b/net/sunrpc/auth_unix.c
@@ -29,7 +29,6 @@ struct unx_cred {
 #endif
 
 static struct rpc_auth		unix_auth;
-static struct rpc_cred_cache	unix_cred_cache;
 static const struct rpc_credops	unix_credops;
 
 static struct rpc_auth *
@@ -203,9 +202,14 @@ unx_validate(struct rpc_task *task, __be32 *p)
 	return p;
 }
 
-void __init rpc_init_authunix(void)
+int __init rpc_init_authunix(void)
 {
-	spin_lock_init(&unix_cred_cache.lock);
+	return rpcauth_init_credcache(&unix_auth);
+}
+
+void rpc_destroy_authunix(void)
+{
+	rpcauth_destroy_credcache(&unix_auth);
 }
 
 const struct rpc_authops authunix_ops = {
@@ -218,10 +222,6 @@ const struct rpc_authops authunix_ops = {
 	.crcreate	= unx_create_cred,
 };
 
-static
-struct rpc_cred_cache	unix_cred_cache = {
-};
-
 static
 struct rpc_auth		unix_auth = {
 	.au_cslack	= UNX_WRITESLACK,
@@ -229,7 +229,6 @@ struct rpc_auth		unix_auth = {
 	.au_ops		= &authunix_ops,
 	.au_flavor	= RPC_AUTH_UNIX,
 	.au_count	= ATOMIC_INIT(0),
-	.au_credcache	= &unix_cred_cache,
 };
 
 static
diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c
index f438347d817..34b58f9e704 100644
--- a/net/sunrpc/sunrpc_syms.c
+++ b/net/sunrpc/sunrpc_syms.c
@@ -33,10 +33,11 @@ init_sunrpc(void)
 	if (err)
 		goto out;
 	err = rpc_init_mempool();
-	if (err) {
-		unregister_rpc_pipefs();
-		goto out;
-	}
+	if (err)
+		goto out2;
+	err = rpcauth_init_module();
+	if (err)
+		goto out3;
 #ifdef RPC_DEBUG
 	rpc_register_sysctl();
 #endif
@@ -47,7 +48,11 @@ init_sunrpc(void)
 	cache_register(&unix_gid_cache);
 	svc_init_xprt_sock();	/* svc sock transport */
 	init_socket_xprt();	/* clnt sock transport */
-	rpcauth_init_module();
+	return 0;
+out3:
+	rpc_destroy_mempool();
+out2:
+	unregister_rpc_pipefs();
 out:
 	return err;
 }
-- 
cgit v1.2.3-70-g09d2


From 988664a0f6bbfc356e6ce55f7a87b8594050012f Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sat, 31 Jul 2010 14:29:07 -0400
Subject: SUNRPC: Store the hashtable size in struct rpc_cred_cache

Cleanup in preparation for allowing the user to determine the maximum hash
table size.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/auth.h |  1 +
 net/sunrpc/auth.c           | 10 +++++++---
 2 files changed, 8 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h
index 784e78c73ec..84d64b6926a 100644
--- a/include/linux/sunrpc/auth.h
+++ b/include/linux/sunrpc/auth.h
@@ -65,6 +65,7 @@ struct rpc_cred {
 #define RPC_CREDCACHE_NR	(1 << RPC_CREDCACHE_HASHBITS)
 struct rpc_cred_cache {
 	struct hlist_head	hashtable[RPC_CREDCACHE_NR];
+	unsigned int		hashbits;
 	spinlock_t		lock;
 };
 
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index db135543d21..eef76a1f1dd 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -145,12 +145,15 @@ int
 rpcauth_init_credcache(struct rpc_auth *auth)
 {
 	struct rpc_cred_cache *new;
+	unsigned int hashsize;
 	int i;
 
 	new = kmalloc(sizeof(*new), GFP_KERNEL);
 	if (!new)
 		return -ENOMEM;
-	for (i = 0; i < RPC_CREDCACHE_NR; i++)
+	new->hashbits = RPC_CREDCACHE_HASHBITS;
+	hashsize = 1U << new->hashbits;
+	for (i = 0; i < hashsize; i++)
 		INIT_HLIST_HEAD(&new->hashtable[i]);
 	spin_lock_init(&new->lock);
 	auth->au_credcache = new;
@@ -183,11 +186,12 @@ rpcauth_clear_credcache(struct rpc_cred_cache *cache)
 	LIST_HEAD(free);
 	struct hlist_head *head;
 	struct rpc_cred	*cred;
+	unsigned int hashsize = 1U << cache->hashbits;
 	int		i;
 
 	spin_lock(&rpc_credcache_lock);
 	spin_lock(&cache->lock);
-	for (i = 0; i < RPC_CREDCACHE_NR; i++) {
+	for (i = 0; i < hashsize; i++) {
 		head = &cache->hashtable[i];
 		while (!hlist_empty(head)) {
 			cred = hlist_entry(head->first, struct rpc_cred, cr_hash);
@@ -297,7 +301,7 @@ rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred,
 			*entry, *new;
 	unsigned int nr;
 
-	nr = hash_long(acred->uid, RPC_CREDCACHE_HASHBITS);
+	nr = hash_long(acred->uid, cache->hashbits);
 
 	rcu_read_lock();
 	hlist_for_each_entry_rcu(entry, pos, &cache->hashtable[nr], cr_hash) {
-- 
cgit v1.2.3-70-g09d2


From 241269bd0b580faae71575443d9ab38df7469126 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sat, 31 Jul 2010 14:29:08 -0400
Subject: SUNRPC: Make the credential cache hashtable size configurable

This patch allows the user to configure the credential cache hashtable size
using a new module parameter: auth_hashtable_size
When set, this parameter will be rounded up to the nearest power of two,
with a maximum allowed value of 1024 elements.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/auth.h |  9 +------
 net/sunrpc/auth.c           | 60 +++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 56 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h
index 84d64b6926a..d2737625a24 100644
--- a/include/linux/sunrpc/auth.h
+++ b/include/linux/sunrpc/auth.h
@@ -61,14 +61,7 @@ struct rpc_cred {
 /*
  * Client authentication handle
  */
-#define RPC_CREDCACHE_HASHBITS	4
-#define RPC_CREDCACHE_NR	(1 << RPC_CREDCACHE_HASHBITS)
-struct rpc_cred_cache {
-	struct hlist_head	hashtable[RPC_CREDCACHE_NR];
-	unsigned int		hashbits;
-	spinlock_t		lock;
-};
-
+struct rpc_cred_cache;
 struct rpc_authops;
 struct rpc_auth {
 	unsigned int		au_cslack;	/* call cred size estimate */
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index eef76a1f1dd..d80f01725fc 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -19,6 +19,15 @@
 # define RPCDBG_FACILITY	RPCDBG_AUTH
 #endif
 
+#define RPC_CREDCACHE_DEFAULT_HASHBITS	(4)
+struct rpc_cred_cache {
+	struct hlist_head	*hashtable;
+	unsigned int		hashbits;
+	spinlock_t		lock;
+};
+
+static unsigned int auth_hashbits = RPC_CREDCACHE_DEFAULT_HASHBITS;
+
 static DEFINE_SPINLOCK(rpc_authflavor_lock);
 static const struct rpc_authops *auth_flavors[RPC_AUTH_MAXFLAVOR] = {
 	&authnull_ops,		/* AUTH_NULL */
@@ -29,6 +38,42 @@ static const struct rpc_authops *auth_flavors[RPC_AUTH_MAXFLAVOR] = {
 static LIST_HEAD(cred_unused);
 static unsigned long number_cred_unused;
 
+#define MAX_HASHTABLE_BITS (10) 
+static int param_set_hashtbl_sz(const char *val, struct kernel_param *kp)
+{
+	unsigned long num;
+	unsigned int nbits;
+	int ret;
+
+	if (!val)
+		goto out_inval;
+	ret = strict_strtoul(val, 0, &num);
+	if (ret == -EINVAL)
+		goto out_inval;
+	nbits = fls(num);
+	if (num > (1U << nbits))
+		nbits++;
+	if (nbits > MAX_HASHTABLE_BITS || nbits < 2)
+		goto out_inval;
+	*(unsigned int *)kp->arg = nbits;
+	return 0;
+out_inval:
+	return -EINVAL;
+}
+
+static int param_get_hashtbl_sz(char *buffer, struct kernel_param *kp)
+{
+	unsigned int nbits;
+
+	nbits = *(unsigned int *)kp->arg;
+	return sprintf(buffer, "%u", 1U << nbits);
+}
+
+#define param_check_hashtbl_sz(name, p) __param_check(name, p, unsigned int);
+
+module_param_named(auth_hashtable_size, auth_hashbits, hashtbl_sz, 0644);
+MODULE_PARM_DESC(auth_hashtable_size, "RPC credential cache hashtable size");
+
 static u32
 pseudoflavor_to_flavor(u32 flavor) {
 	if (flavor >= RPC_AUTH_MAXFLAVOR)
@@ -146,18 +191,22 @@ rpcauth_init_credcache(struct rpc_auth *auth)
 {
 	struct rpc_cred_cache *new;
 	unsigned int hashsize;
-	int i;
 
 	new = kmalloc(sizeof(*new), GFP_KERNEL);
 	if (!new)
-		return -ENOMEM;
-	new->hashbits = RPC_CREDCACHE_HASHBITS;
+		goto out_nocache;
+	new->hashbits = auth_hashbits;
 	hashsize = 1U << new->hashbits;
-	for (i = 0; i < hashsize; i++)
-		INIT_HLIST_HEAD(&new->hashtable[i]);
+	new->hashtable = kcalloc(hashsize, sizeof(new->hashtable[0]), GFP_KERNEL);
+	if (!new->hashtable)
+		goto out_nohashtbl;
 	spin_lock_init(&new->lock);
 	auth->au_credcache = new;
 	return 0;
+out_nohashtbl:
+	kfree(new);
+out_nocache:
+	return -ENOMEM;
 }
 EXPORT_SYMBOL_GPL(rpcauth_init_credcache);
 
@@ -220,6 +269,7 @@ rpcauth_destroy_credcache(struct rpc_auth *auth)
 	if (cache) {
 		auth->au_credcache = NULL;
 		rpcauth_clear_credcache(cache);
+		kfree(cache->hashtable);
 		kfree(cache);
 	}
 }
-- 
cgit v1.2.3-70-g09d2


From d9b6cd94601e1d17273f93a326a135fbf487a918 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sat, 31 Jul 2010 14:29:08 -0400
Subject: SUNRPC: Ensure that rpc_exit() always wakes up a sleeping task

Make rpc_exit() non-inline, and ensure that it always wakes up a task that
has been queued.

Kill off the now unused rpc_wake_up_task().

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/sched.h |  7 +------
 net/sunrpc/sched.c           | 20 +++++++++-----------
 2 files changed, 10 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index 7be4f3a6d24..88513fd8e20 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -213,6 +213,7 @@ struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req,
 				const struct rpc_call_ops *ops);
 void		rpc_put_task(struct rpc_task *);
 void		rpc_exit_task(struct rpc_task *);
+void		rpc_exit(struct rpc_task *, int);
 void		rpc_release_calldata(const struct rpc_call_ops *, void *);
 void		rpc_killall_tasks(struct rpc_clnt *);
 void		rpc_execute(struct rpc_task *);
@@ -241,12 +242,6 @@ void		rpc_destroy_mempool(void);
 extern struct workqueue_struct *rpciod_workqueue;
 void		rpc_prepare_task(struct rpc_task *task);
 
-static inline void rpc_exit(struct rpc_task *task, int status)
-{
-	task->tk_status = status;
-	task->tk_action = rpc_exit_task;
-}
-
 static inline int rpc_wait_for_completion_task(struct rpc_task *task)
 {
 	return __rpc_wait_for_completion_task(task, NULL);
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 4a843b883b8..37452762af7 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -405,14 +405,6 @@ void rpc_wake_up_queued_task(struct rpc_wait_queue *queue, struct rpc_task *task
 }
 EXPORT_SYMBOL_GPL(rpc_wake_up_queued_task);
 
-/*
- * Wake up the specified task
- */
-static void rpc_wake_up_task(struct rpc_task *task)
-{
-	rpc_wake_up_queued_task(task->tk_waitqueue, task);
-}
-
 /*
  * Wake up the next task on a priority queue.
  */
@@ -600,7 +592,15 @@ void rpc_exit_task(struct rpc_task *task)
 		}
 	}
 }
-EXPORT_SYMBOL_GPL(rpc_exit_task);
+
+void rpc_exit(struct rpc_task *task, int status)
+{
+	task->tk_status = status;
+	task->tk_action = rpc_exit_task;
+	if (RPC_IS_QUEUED(task))
+		rpc_wake_up_queued_task(task->tk_waitqueue, task);
+}
+EXPORT_SYMBOL_GPL(rpc_exit);
 
 void rpc_release_calldata(const struct rpc_call_ops *ops, void *calldata)
 {
@@ -690,7 +690,6 @@ static void __rpc_execute(struct rpc_task *task)
 			dprintk("RPC: %5u got signal\n", task->tk_pid);
 			task->tk_flags |= RPC_TASK_KILLED;
 			rpc_exit(task, -ERESTARTSYS);
-			rpc_wake_up_task(task);
 		}
 		rpc_set_running(task);
 		dprintk("RPC: %5u sync task resuming\n", task->tk_pid);
@@ -950,7 +949,6 @@ void rpc_killall_tasks(struct rpc_clnt *clnt)
 		if (!(rovr->tk_flags & RPC_TASK_KILLED)) {
 			rovr->tk_flags |= RPC_TASK_KILLED;
 			rpc_exit(rovr, -EIO);
-			rpc_wake_up_task(rovr);
 		}
 	}
 	spin_unlock(&clnt->cl_lock);
-- 
cgit v1.2.3-70-g09d2


From 58f9612c6ea858f532021a0ce42ec53cb0a493b3 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sat, 31 Jul 2010 14:29:08 -0400
Subject: SUNRPC: Move remaining RPC client related task initialisation into
 clnt.c

Now that rpc_run_task() is the sole entry point for RPC calls, we can move
the remaining rpc_client-related initialisation of struct rpc_task from
sched.c into clnt.c.

Also move rpc_killall_tasks() into the same file, since that too is
relative to the rpc_clnt.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/clnt.h |  1 +
 net/sunrpc/clnt.c           | 84 +++++++++++++++++++++++++++++++++++++++++++++
 net/sunrpc/sched.c          | 77 +++--------------------------------------
 3 files changed, 89 insertions(+), 73 deletions(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index debe7553219..569dc722a60 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -131,6 +131,7 @@ struct rpc_clnt	*rpc_bind_new_program(struct rpc_clnt *,
 struct rpc_clnt *rpc_clone_client(struct rpc_clnt *);
 void		rpc_shutdown_client(struct rpc_clnt *);
 void		rpc_release_client(struct rpc_clnt *);
+void		rpc_task_release_client(struct rpc_task *);
 
 int		rpcb_register(u32, u32, int, unsigned short);
 int		rpcb_v4_register(const u32 program, const u32 version,
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 234c40c15f6..3647c81fd68 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -413,6 +413,35 @@ out_no_clnt:
 }
 EXPORT_SYMBOL_GPL(rpc_clone_client);
 
+/*
+ * Kill all tasks for the given client.
+ * XXX: kill their descendants as well?
+ */
+void rpc_killall_tasks(struct rpc_clnt *clnt)
+{
+	struct rpc_task	*rovr;
+
+
+	if (list_empty(&clnt->cl_tasks))
+		return;
+	dprintk("RPC:       killing all tasks for client %p\n", clnt);
+	/*
+	 * Spin lock all_tasks to prevent changes...
+	 */
+	spin_lock(&clnt->cl_lock);
+	list_for_each_entry(rovr, &clnt->cl_tasks, tk_task) {
+		if (!RPC_IS_ACTIVATED(rovr))
+			continue;
+		if (!(rovr->tk_flags & RPC_TASK_KILLED)) {
+			rovr->tk_flags |= RPC_TASK_KILLED;
+			rpc_exit(rovr, -EIO);
+			rpc_wake_up_queued_task(rovr->tk_waitqueue, rovr);
+		}
+	}
+	spin_unlock(&clnt->cl_lock);
+}
+EXPORT_SYMBOL_GPL(rpc_killall_tasks);
+
 /*
  * Properly shut down an RPC client, terminating all outstanding
  * requests.
@@ -538,6 +567,49 @@ out:
 }
 EXPORT_SYMBOL_GPL(rpc_bind_new_program);
 
+void rpc_task_release_client(struct rpc_task *task)
+{
+	struct rpc_clnt *clnt = task->tk_client;
+
+	if (clnt != NULL) {
+		/* Remove from client task list */
+		spin_lock(&clnt->cl_lock);
+		list_del(&task->tk_task);
+		spin_unlock(&clnt->cl_lock);
+		task->tk_client = NULL;
+
+		rpc_release_client(clnt);
+	}
+}
+
+static
+void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt)
+{
+	if (clnt != NULL) {
+		rpc_task_release_client(task);
+		task->tk_client = clnt;
+		kref_get(&clnt->cl_kref);
+		if (clnt->cl_softrtry)
+			task->tk_flags |= RPC_TASK_SOFT;
+		/* Add to the client's list of all tasks */
+		spin_lock(&clnt->cl_lock);
+		list_add_tail(&task->tk_task, &clnt->cl_tasks);
+		spin_unlock(&clnt->cl_lock);
+	}
+}
+
+static void
+rpc_task_set_rpc_message(struct rpc_task *task, const struct rpc_message *msg)
+{
+	if (msg != NULL) {
+		task->tk_msg.rpc_proc = msg->rpc_proc;
+		task->tk_msg.rpc_argp = msg->rpc_argp;
+		task->tk_msg.rpc_resp = msg->rpc_resp;
+		/* Bind the user cred */
+		rpcauth_bindcred(task, msg->rpc_cred, task->tk_flags);
+	}
+}
+
 /*
  * Default callback for async RPC calls
  */
@@ -562,6 +634,18 @@ struct rpc_task *rpc_run_task(const struct rpc_task_setup *task_setup_data)
 	if (IS_ERR(task))
 		goto out;
 
+	rpc_task_set_client(task, task_setup_data->rpc_client);
+	rpc_task_set_rpc_message(task, task_setup_data->rpc_message);
+
+	if (task->tk_status != 0) {
+		int ret = task->tk_status;
+		rpc_put_task(task);
+		return ERR_PTR(ret);
+	}
+
+	if (task->tk_action == NULL)
+		rpc_call_start(task);
+
 	atomic_inc(&task->tk_count);
 	rpc_execute(task);
 out:
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 37452762af7..a42296db2ec 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -246,17 +246,8 @@ static inline void rpc_task_set_debuginfo(struct rpc_task *task)
 
 static void rpc_set_active(struct rpc_task *task)
 {
-	struct rpc_clnt *clnt;
-	if (test_and_set_bit(RPC_TASK_ACTIVE, &task->tk_runstate) != 0)
-		return;
 	rpc_task_set_debuginfo(task);
-	/* Add to global list of all tasks */
-	clnt = task->tk_client;
-	if (clnt != NULL) {
-		spin_lock(&clnt->cl_lock);
-		list_add_tail(&task->tk_task, &clnt->cl_tasks);
-		spin_unlock(&clnt->cl_lock);
-	}
+	set_bit(RPC_TASK_ACTIVE, &task->tk_runstate);
 }
 
 /*
@@ -319,11 +310,6 @@ static void __rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task,
 	dprintk("RPC: %5u sleep_on(queue \"%s\" time %lu)\n",
 			task->tk_pid, rpc_qname(q), jiffies);
 
-	if (!RPC_IS_ASYNC(task) && !RPC_IS_ACTIVATED(task)) {
-		printk(KERN_ERR "RPC: Inactive synchronous task put to sleep!\n");
-		return;
-	}
-
 	__rpc_add_wait_queue(q, task);
 
 	BUG_ON(task->tk_callback != NULL);
@@ -334,8 +320,8 @@ static void __rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task,
 void rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task,
 				rpc_action action)
 {
-	/* Mark the task as being activated if so needed */
-	rpc_set_active(task);
+	/* We shouldn't ever put an inactive task to sleep */
+	BUG_ON(!RPC_IS_ACTIVATED(task));
 
 	/*
 	 * Protect the queue operations.
@@ -807,26 +793,9 @@ static void rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *ta
 	/* Initialize workqueue for async tasks */
 	task->tk_workqueue = task_setup_data->workqueue;
 
-	task->tk_client = task_setup_data->rpc_client;
-	if (task->tk_client != NULL) {
-		kref_get(&task->tk_client->cl_kref);
-		if (task->tk_client->cl_softrtry)
-			task->tk_flags |= RPC_TASK_SOFT;
-	}
-
 	if (task->tk_ops->rpc_call_prepare != NULL)
 		task->tk_action = rpc_prepare_task;
 
-	if (task_setup_data->rpc_message != NULL) {
-		task->tk_msg.rpc_proc = task_setup_data->rpc_message->rpc_proc;
-		task->tk_msg.rpc_argp = task_setup_data->rpc_message->rpc_argp;
-		task->tk_msg.rpc_resp = task_setup_data->rpc_message->rpc_resp;
-		/* Bind the user cred */
-		rpcauth_bindcred(task, task_setup_data->rpc_message->rpc_cred, task_setup_data->flags);
-		if (task->tk_action == NULL)
-			rpc_call_start(task);
-	}
-
 	/* starting timestamp */
 	task->tk_start = ktime_get();
 
@@ -896,10 +865,7 @@ void rpc_put_task(struct rpc_task *task)
 		xprt_release(task);
 	if (task->tk_msg.rpc_cred)
 		rpcauth_unbindcred(task);
-	if (task->tk_client) {
-		rpc_release_client(task->tk_client);
-		task->tk_client = NULL;
-	}
+	rpc_task_release_client(task);
 	if (task->tk_workqueue != NULL) {
 		INIT_WORK(&task->u.tk_work, rpc_async_release);
 		queue_work(task->tk_workqueue, &task->u.tk_work);
@@ -912,13 +878,6 @@ static void rpc_release_task(struct rpc_task *task)
 {
 	dprintk("RPC: %5u release task\n", task->tk_pid);
 
-	if (!list_empty(&task->tk_task)) {
-		struct rpc_clnt *clnt = task->tk_client;
-		/* Remove from client task list */
-		spin_lock(&clnt->cl_lock);
-		list_del(&task->tk_task);
-		spin_unlock(&clnt->cl_lock);
-	}
 	BUG_ON (RPC_IS_QUEUED(task));
 
 	/* Wake up anyone who is waiting for task completion */
@@ -927,34 +886,6 @@ static void rpc_release_task(struct rpc_task *task)
 	rpc_put_task(task);
 }
 
-/*
- * Kill all tasks for the given client.
- * XXX: kill their descendants as well?
- */
-void rpc_killall_tasks(struct rpc_clnt *clnt)
-{
-	struct rpc_task	*rovr;
-
-
-	if (list_empty(&clnt->cl_tasks))
-		return;
-	dprintk("RPC:       killing all tasks for client %p\n", clnt);
-	/*
-	 * Spin lock all_tasks to prevent changes...
-	 */
-	spin_lock(&clnt->cl_lock);
-	list_for_each_entry(rovr, &clnt->cl_tasks, tk_task) {
-		if (! RPC_IS_ACTIVATED(rovr))
-			continue;
-		if (!(rovr->tk_flags & RPC_TASK_KILLED)) {
-			rovr->tk_flags |= RPC_TASK_KILLED;
-			rpc_exit(rovr, -EIO);
-		}
-	}
-	spin_unlock(&clnt->cl_lock);
-}
-EXPORT_SYMBOL_GPL(rpc_killall_tasks);
-
 int rpciod_up(void)
 {
 	return try_module_get(THIS_MODULE) ? 0 : -EINVAL;
-- 
cgit v1.2.3-70-g09d2


From 8572b8e2e3c5f3d990122348c4d2c64dad338611 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sat, 31 Jul 2010 14:29:08 -0400
Subject: SUNRPC: Clean up of rpc_bindcred()

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/auth.h |  6 +++---
 net/sunrpc/auth.c           | 37 +++++++++++++++++--------------------
 net/sunrpc/auth_generic.c   | 11 +++--------
 net/sunrpc/clnt.c           |  2 +-
 4 files changed, 24 insertions(+), 32 deletions(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h
index d2737625a24..90e4c3827ac 100644
--- a/include/linux/sunrpc/auth.h
+++ b/include/linux/sunrpc/auth.h
@@ -106,7 +106,7 @@ struct rpc_credops {
 	void			(*crdestroy)(struct rpc_cred *);
 
 	int			(*crmatch)(struct auth_cred *, struct rpc_cred *, int);
-	void			(*crbind)(struct rpc_task *, struct rpc_cred *, int);
+	struct rpc_cred *	(*crbind)(struct rpc_task *, struct rpc_cred *, int);
 	__be32 *		(*crmarshal)(struct rpc_task *, __be32 *);
 	int			(*crrefresh)(struct rpc_task *);
 	__be32 *		(*crvalidate)(struct rpc_task *, __be32 *);
@@ -135,8 +135,8 @@ void			rpcauth_release(struct rpc_auth *);
 struct rpc_cred *	rpcauth_lookup_credcache(struct rpc_auth *, struct auth_cred *, int);
 void			rpcauth_init_cred(struct rpc_cred *, const struct auth_cred *, struct rpc_auth *, const struct rpc_credops *);
 struct rpc_cred *	rpcauth_lookupcred(struct rpc_auth *, int);
-void			rpcauth_bindcred(struct rpc_task *, struct rpc_cred *, int);
-void			rpcauth_generic_bind_cred(struct rpc_task *, struct rpc_cred *, int);
+int			rpcauth_bindcred(struct rpc_task *, struct rpc_cred *, int);
+struct rpc_cred *	rpcauth_generic_bind_cred(struct rpc_task *, struct rpc_cred *, int);
 void			put_rpccred(struct rpc_cred *);
 void			rpcauth_unbindcred(struct rpc_task *);
 __be32 *		rpcauth_marshcred(struct rpc_task *, __be32 *);
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index d80f01725fc..d8968faf5cc 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -444,16 +444,16 @@ rpcauth_init_cred(struct rpc_cred *cred, const struct auth_cred *acred,
 }
 EXPORT_SYMBOL_GPL(rpcauth_init_cred);
 
-void
+struct rpc_cred *
 rpcauth_generic_bind_cred(struct rpc_task *task, struct rpc_cred *cred, int lookupflags)
 {
-	task->tk_msg.rpc_cred = get_rpccred(cred);
 	dprintk("RPC: %5u holding %s cred %p\n", task->tk_pid,
 			cred->cr_auth->au_ops->au_name, cred);
+	return get_rpccred(cred);
 }
 EXPORT_SYMBOL_GPL(rpcauth_generic_bind_cred);
 
-static void
+static struct rpc_cred *
 rpcauth_bind_root_cred(struct rpc_task *task, int lookupflags)
 {
 	struct rpc_auth *auth = task->tk_client->cl_auth;
@@ -461,45 +461,42 @@ rpcauth_bind_root_cred(struct rpc_task *task, int lookupflags)
 		.uid = 0,
 		.gid = 0,
 	};
-	struct rpc_cred *ret;
 
 	dprintk("RPC: %5u looking up %s cred\n",
 		task->tk_pid, task->tk_client->cl_auth->au_ops->au_name);
-	ret = auth->au_ops->lookup_cred(auth, &acred, lookupflags);
-	if (!IS_ERR(ret))
-		task->tk_msg.rpc_cred = ret;
-	else
-		task->tk_status = PTR_ERR(ret);
+	return auth->au_ops->lookup_cred(auth, &acred, lookupflags);
 }
 
-static void
+static struct rpc_cred *
 rpcauth_bind_new_cred(struct rpc_task *task, int lookupflags)
 {
 	struct rpc_auth *auth = task->tk_client->cl_auth;
-	struct rpc_cred *ret;
 
 	dprintk("RPC: %5u looking up %s cred\n",
 		task->tk_pid, auth->au_ops->au_name);
-	ret = rpcauth_lookupcred(auth, lookupflags);
-	if (!IS_ERR(ret))
-		task->tk_msg.rpc_cred = ret;
-	else
-		task->tk_status = PTR_ERR(ret);
+	return rpcauth_lookupcred(auth, lookupflags);
 }
 
-void
+int
 rpcauth_bindcred(struct rpc_task *task, struct rpc_cred *cred, int flags)
 {
+	struct rpc_cred *new;
 	int lookupflags = 0;
 
 	if (flags & RPC_TASK_ASYNC)
 		lookupflags |= RPCAUTH_LOOKUP_NEW;
 	if (cred != NULL)
-		cred->cr_ops->crbind(task, cred, lookupflags);
+		new = cred->cr_ops->crbind(task, cred, lookupflags);
 	else if (flags & RPC_TASK_ROOTCREDS)
-		rpcauth_bind_root_cred(task, lookupflags);
+		new = rpcauth_bind_root_cred(task, lookupflags);
 	else
-		rpcauth_bind_new_cred(task, lookupflags);
+		new = rpcauth_bind_new_cred(task, lookupflags);
+	if (IS_ERR(new))
+		return PTR_ERR(new);
+	if (task->tk_msg.rpc_cred != NULL)
+		put_rpccred(task->tk_msg.rpc_cred);
+	task->tk_msg.rpc_cred = new;
+	return 0;
 }
 
 void
diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c
index 8bae33b36cc..43162bb3b78 100644
--- a/net/sunrpc/auth_generic.c
+++ b/net/sunrpc/auth_generic.c
@@ -54,18 +54,13 @@ struct rpc_cred *rpc_lookup_machine_cred(void)
 }
 EXPORT_SYMBOL_GPL(rpc_lookup_machine_cred);
 
-static void
-generic_bind_cred(struct rpc_task *task, struct rpc_cred *cred, int lookupflags)
+static struct rpc_cred *generic_bind_cred(struct rpc_task *task,
+		struct rpc_cred *cred, int lookupflags)
 {
 	struct rpc_auth *auth = task->tk_client->cl_auth;
 	struct auth_cred *acred = &container_of(cred, struct generic_cred, gc_base)->acred;
-	struct rpc_cred *ret;
 
-	ret = auth->au_ops->lookup_cred(auth, acred, lookupflags);
-	if (!IS_ERR(ret))
-		task->tk_msg.rpc_cred = ret;
-	else
-		task->tk_status = PTR_ERR(ret);
+	return auth->au_ops->lookup_cred(auth, acred, lookupflags);
 }
 
 /*
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 3647c81fd68..f34b5e3823c 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -606,7 +606,7 @@ rpc_task_set_rpc_message(struct rpc_task *task, const struct rpc_message *msg)
 		task->tk_msg.rpc_argp = msg->rpc_argp;
 		task->tk_msg.rpc_resp = msg->rpc_resp;
 		/* Bind the user cred */
-		rpcauth_bindcred(task, msg->rpc_cred, task->tk_flags);
+		task->tk_status = rpcauth_bindcred(task, msg->rpc_cred, task->tk_flags);
 	}
 }
 
-- 
cgit v1.2.3-70-g09d2


From a17c2153d2e271b0cbacae9bed83b0eaa41db7e1 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sat, 31 Jul 2010 14:29:08 -0400
Subject: SUNRPC: Move the bound cred to struct rpc_rqst

This will allow us to save the original generic cred in rpc_message, so
that if we migrate from one server to another, we can generate a new bound
cred without having to punt back to the NFS layer.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs2xdr.c               |  7 ++--
 fs/nfs/nfs3xdr.c               |  8 ++--
 fs/nfs/nfs4xdr.c               |  2 +-
 include/linux/sunrpc/auth.h    |  2 -
 include/linux/sunrpc/xprt.h    |  1 +
 net/sunrpc/auth.c              | 43 ++++++++++----------
 net/sunrpc/auth_gss/auth_gss.c | 22 +++++-----
 net/sunrpc/auth_null.c         |  2 +-
 net/sunrpc/auth_unix.c         |  6 +--
 net/sunrpc/clnt.c              | 91 +++++++++++++++++++++---------------------
 net/sunrpc/sched.c             |  2 +-
 net/sunrpc/xprt.c              |  2 +
 12 files changed, 92 insertions(+), 96 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 81cf1425791..db8846a0e82 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -233,7 +233,7 @@ nfs_xdr_removeargs(struct rpc_rqst *req, __be32 *p, const struct nfs_removeargs
 static int
 nfs_xdr_readargs(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args)
 {
-	struct rpc_auth	*auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
+	struct rpc_auth	*auth = req->rq_cred->cr_auth;
 	unsigned int replen;
 	u32 offset = (u32)args->offset;
 	u32 count = args->count;
@@ -393,8 +393,7 @@ nfs_xdr_symlinkargs(struct rpc_rqst *req, __be32 *p, struct nfs_symlinkargs *arg
 static int
 nfs_xdr_readdirargs(struct rpc_rqst *req, __be32 *p, struct nfs_readdirargs *args)
 {
-	struct rpc_task	*task = req->rq_task;
-	struct rpc_auth	*auth = task->tk_msg.rpc_cred->cr_auth;
+	struct rpc_auth	*auth = req->rq_cred->cr_auth;
 	unsigned int replen;
 	u32 count = args->count;
 
@@ -575,7 +574,7 @@ nfs_xdr_diropres(struct rpc_rqst *req, __be32 *p, struct nfs_diropok *res)
 static int
 nfs_xdr_readlinkargs(struct rpc_rqst *req, __be32 *p, struct nfs_readlinkargs *args)
 {
-	struct rpc_auth	*auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
+	struct rpc_auth	*auth = req->rq_cred->cr_auth;
 	unsigned int replen;
 
 	p = xdr_encode_fhandle(p, args->fh);
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 75dcfc7da36..9769704f8ce 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -330,7 +330,7 @@ nfs3_xdr_accessargs(struct rpc_rqst *req, __be32 *p, struct nfs3_accessargs *arg
 static int
 nfs3_xdr_readargs(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args)
 {
-	struct rpc_auth	*auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
+	struct rpc_auth	*auth = req->rq_cred->cr_auth;
 	unsigned int replen;
 	u32 count = args->count;
 
@@ -471,7 +471,7 @@ nfs3_xdr_linkargs(struct rpc_rqst *req, __be32 *p, struct nfs3_linkargs *args)
 static int
 nfs3_xdr_readdirargs(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirargs *args)
 {
-	struct rpc_auth	*auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
+	struct rpc_auth	*auth = req->rq_cred->cr_auth;
 	unsigned int replen;
 	u32 count = args->count;
 
@@ -675,7 +675,7 @@ static int
 nfs3_xdr_getaclargs(struct rpc_rqst *req, __be32 *p,
 		    struct nfs3_getaclargs *args)
 {
-	struct rpc_auth	*auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
+	struct rpc_auth	*auth = req->rq_cred->cr_auth;
 	unsigned int replen;
 
 	p = xdr_encode_fhandle(p, args->fh);
@@ -802,7 +802,7 @@ nfs3_xdr_accessres(struct rpc_rqst *req, __be32 *p, struct nfs3_accessres *res)
 static int
 nfs3_xdr_readlinkargs(struct rpc_rqst *req, __be32 *p, struct nfs3_readlinkargs *args)
 {
-	struct rpc_auth	*auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
+	struct rpc_auth	*auth = req->rq_cred->cr_auth;
 	unsigned int replen;
 
 	p = xdr_encode_fhandle(p, args->fh);
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 257c1811feb..08ef9129113 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -758,7 +758,7 @@ static void encode_compound_hdr(struct xdr_stream *xdr,
 				struct compound_hdr *hdr)
 {
 	__be32 *p;
-	struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
+	struct rpc_auth *auth = req->rq_cred->cr_auth;
 
 	/* initialize running count of expected bytes in reply.
 	 * NOTE: the replied tag SHOULD be the same is the one sent,
diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h
index 90e4c3827ac..5bbc447175d 100644
--- a/include/linux/sunrpc/auth.h
+++ b/include/linux/sunrpc/auth.h
@@ -135,10 +135,8 @@ void			rpcauth_release(struct rpc_auth *);
 struct rpc_cred *	rpcauth_lookup_credcache(struct rpc_auth *, struct auth_cred *, int);
 void			rpcauth_init_cred(struct rpc_cred *, const struct auth_cred *, struct rpc_auth *, const struct rpc_credops *);
 struct rpc_cred *	rpcauth_lookupcred(struct rpc_auth *, int);
-int			rpcauth_bindcred(struct rpc_task *, struct rpc_cred *, int);
 struct rpc_cred *	rpcauth_generic_bind_cred(struct rpc_task *, struct rpc_cred *, int);
 void			put_rpccred(struct rpc_cred *);
-void			rpcauth_unbindcred(struct rpc_task *);
 __be32 *		rpcauth_marshcred(struct rpc_task *, __be32 *);
 __be32 *		rpcauth_checkverf(struct rpc_task *, __be32 *);
 int			rpcauth_wrap_req(struct rpc_task *task, kxdrproc_t encode, void *rqstp, __be32 *data, void *obj);
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index b5147030239..ff5a77b28c5 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -64,6 +64,7 @@ struct rpc_rqst {
 	 * This is the private part
 	 */
 	struct rpc_task *	rq_task;	/* RPC task data */
+	struct rpc_cred *	rq_cred;	/* Bound cred */
 	__be32			rq_xid;		/* request XID */
 	int			rq_cong;	/* has incremented xprt->cong */
 	u32			rq_seqno;	/* gss seq no. used on req. */
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index d8968faf5cc..95721426296 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -477,9 +477,10 @@ rpcauth_bind_new_cred(struct rpc_task *task, int lookupflags)
 	return rpcauth_lookupcred(auth, lookupflags);
 }
 
-int
+static int
 rpcauth_bindcred(struct rpc_task *task, struct rpc_cred *cred, int flags)
 {
+	struct rpc_rqst *req = task->tk_rqstp;
 	struct rpc_cred *new;
 	int lookupflags = 0;
 
@@ -493,9 +494,9 @@ rpcauth_bindcred(struct rpc_task *task, struct rpc_cred *cred, int flags)
 		new = rpcauth_bind_new_cred(task, lookupflags);
 	if (IS_ERR(new))
 		return PTR_ERR(new);
-	if (task->tk_msg.rpc_cred != NULL)
-		put_rpccred(task->tk_msg.rpc_cred);
-	task->tk_msg.rpc_cred = new;
+	if (req->rq_cred != NULL)
+		put_rpccred(req->rq_cred);
+	req->rq_cred = new;
 	return 0;
 }
 
@@ -535,22 +536,10 @@ out_nodestroy:
 }
 EXPORT_SYMBOL_GPL(put_rpccred);
 
-void
-rpcauth_unbindcred(struct rpc_task *task)
-{
-	struct rpc_cred	*cred = task->tk_msg.rpc_cred;
-
-	dprintk("RPC: %5u releasing %s cred %p\n",
-		task->tk_pid, cred->cr_auth->au_ops->au_name, cred);
-
-	put_rpccred(cred);
-	task->tk_msg.rpc_cred = NULL;
-}
-
 __be32 *
 rpcauth_marshcred(struct rpc_task *task, __be32 *p)
 {
-	struct rpc_cred	*cred = task->tk_msg.rpc_cred;
+	struct rpc_cred	*cred = task->tk_rqstp->rq_cred;
 
 	dprintk("RPC: %5u marshaling %s cred %p\n",
 		task->tk_pid, cred->cr_auth->au_ops->au_name, cred);
@@ -561,7 +550,7 @@ rpcauth_marshcred(struct rpc_task *task, __be32 *p)
 __be32 *
 rpcauth_checkverf(struct rpc_task *task, __be32 *p)
 {
-	struct rpc_cred	*cred = task->tk_msg.rpc_cred;
+	struct rpc_cred	*cred = task->tk_rqstp->rq_cred;
 
 	dprintk("RPC: %5u validating %s cred %p\n",
 		task->tk_pid, cred->cr_auth->au_ops->au_name, cred);
@@ -573,7 +562,7 @@ int
 rpcauth_wrap_req(struct rpc_task *task, kxdrproc_t encode, void *rqstp,
 		__be32 *data, void *obj)
 {
-	struct rpc_cred *cred = task->tk_msg.rpc_cred;
+	struct rpc_cred *cred = task->tk_rqstp->rq_cred;
 
 	dprintk("RPC: %5u using %s cred %p to wrap rpc data\n",
 			task->tk_pid, cred->cr_ops->cr_name, cred);
@@ -587,7 +576,7 @@ int
 rpcauth_unwrap_resp(struct rpc_task *task, kxdrproc_t decode, void *rqstp,
 		__be32 *data, void *obj)
 {
-	struct rpc_cred *cred = task->tk_msg.rpc_cred;
+	struct rpc_cred *cred = task->tk_rqstp->rq_cred;
 
 	dprintk("RPC: %5u using %s cred %p to unwrap rpc data\n",
 			task->tk_pid, cred->cr_ops->cr_name, cred);
@@ -601,13 +590,21 @@ rpcauth_unwrap_resp(struct rpc_task *task, kxdrproc_t decode, void *rqstp,
 int
 rpcauth_refreshcred(struct rpc_task *task)
 {
-	struct rpc_cred	*cred = task->tk_msg.rpc_cred;
+	struct rpc_cred	*cred = task->tk_rqstp->rq_cred;
 	int err;
 
+	cred = task->tk_rqstp->rq_cred;
+	if (cred == NULL) {
+		err = rpcauth_bindcred(task, task->tk_msg.rpc_cred, task->tk_flags);
+		if (err < 0)
+			goto out;
+		cred = task->tk_rqstp->rq_cred;
+	};
 	dprintk("RPC: %5u refreshing %s cred %p\n",
 		task->tk_pid, cred->cr_auth->au_ops->au_name, cred);
 
 	err = cred->cr_ops->crrefresh(task);
+out:
 	if (err < 0)
 		task->tk_status = err;
 	return err;
@@ -616,7 +613,7 @@ rpcauth_refreshcred(struct rpc_task *task)
 void
 rpcauth_invalcred(struct rpc_task *task)
 {
-	struct rpc_cred *cred = task->tk_msg.rpc_cred;
+	struct rpc_cred *cred = task->tk_rqstp->rq_cred;
 
 	dprintk("RPC: %5u invalidating %s cred %p\n",
 		task->tk_pid, cred->cr_auth->au_ops->au_name, cred);
@@ -627,7 +624,7 @@ rpcauth_invalcred(struct rpc_task *task)
 int
 rpcauth_uptodatecred(struct rpc_task *task)
 {
-	struct rpc_cred *cred = task->tk_msg.rpc_cred;
+	struct rpc_cred *cred = task->tk_rqstp->rq_cred;
 
 	return cred == NULL ||
 		test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) != 0;
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 8da2a0e6857..096e1260bc6 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -373,7 +373,7 @@ gss_handle_downcall_result(struct gss_cred *gss_cred, struct gss_upcall_msg *gss
 static void
 gss_upcall_callback(struct rpc_task *task)
 {
-	struct gss_cred *gss_cred = container_of(task->tk_msg.rpc_cred,
+	struct gss_cred *gss_cred = container_of(task->tk_rqstp->rq_cred,
 			struct gss_cred, gc_base);
 	struct gss_upcall_msg *gss_msg = gss_cred->gc_upcall;
 	struct inode *inode = &gss_msg->inode->vfs_inode;
@@ -502,7 +502,7 @@ static void warn_gssd(void)
 static inline int
 gss_refresh_upcall(struct rpc_task *task)
 {
-	struct rpc_cred *cred = task->tk_msg.rpc_cred;
+	struct rpc_cred *cred = task->tk_rqstp->rq_cred;
 	struct gss_auth *gss_auth = container_of(cred->cr_auth,
 			struct gss_auth, rpc_auth);
 	struct gss_cred *gss_cred = container_of(cred,
@@ -1064,12 +1064,12 @@ out:
 static __be32 *
 gss_marshal(struct rpc_task *task, __be32 *p)
 {
-	struct rpc_cred *cred = task->tk_msg.rpc_cred;
+	struct rpc_rqst *req = task->tk_rqstp;
+	struct rpc_cred *cred = req->rq_cred;
 	struct gss_cred	*gss_cred = container_of(cred, struct gss_cred,
 						 gc_base);
 	struct gss_cl_ctx	*ctx = gss_cred_get_ctx(cred);
 	__be32		*cred_len;
-	struct rpc_rqst *req = task->tk_rqstp;
 	u32             maj_stat = 0;
 	struct xdr_netobj mic;
 	struct kvec	iov;
@@ -1119,7 +1119,7 @@ out_put_ctx:
 
 static int gss_renew_cred(struct rpc_task *task)
 {
-	struct rpc_cred *oldcred = task->tk_msg.rpc_cred;
+	struct rpc_cred *oldcred = task->tk_rqstp->rq_cred;
 	struct gss_cred *gss_cred = container_of(oldcred,
 						 struct gss_cred,
 						 gc_base);
@@ -1133,7 +1133,7 @@ static int gss_renew_cred(struct rpc_task *task)
 	new = gss_lookup_cred(auth, &acred, RPCAUTH_LOOKUP_NEW);
 	if (IS_ERR(new))
 		return PTR_ERR(new);
-	task->tk_msg.rpc_cred = new;
+	task->tk_rqstp->rq_cred = new;
 	put_rpccred(oldcred);
 	return 0;
 }
@@ -1161,7 +1161,7 @@ static int gss_cred_is_negative_entry(struct rpc_cred *cred)
 static int
 gss_refresh(struct rpc_task *task)
 {
-	struct rpc_cred *cred = task->tk_msg.rpc_cred;
+	struct rpc_cred *cred = task->tk_rqstp->rq_cred;
 	int ret = 0;
 
 	if (gss_cred_is_negative_entry(cred))
@@ -1172,7 +1172,7 @@ gss_refresh(struct rpc_task *task)
 		ret = gss_renew_cred(task);
 		if (ret < 0)
 			goto out;
-		cred = task->tk_msg.rpc_cred;
+		cred = task->tk_rqstp->rq_cred;
 	}
 
 	if (test_bit(RPCAUTH_CRED_NEW, &cred->cr_flags))
@@ -1191,7 +1191,7 @@ gss_refresh_null(struct rpc_task *task)
 static __be32 *
 gss_validate(struct rpc_task *task, __be32 *p)
 {
-	struct rpc_cred *cred = task->tk_msg.rpc_cred;
+	struct rpc_cred *cred = task->tk_rqstp->rq_cred;
 	struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred);
 	__be32		seq;
 	struct kvec	iov;
@@ -1400,7 +1400,7 @@ static int
 gss_wrap_req(struct rpc_task *task,
 	     kxdrproc_t encode, void *rqstp, __be32 *p, void *obj)
 {
-	struct rpc_cred *cred = task->tk_msg.rpc_cred;
+	struct rpc_cred *cred = task->tk_rqstp->rq_cred;
 	struct gss_cred	*gss_cred = container_of(cred, struct gss_cred,
 			gc_base);
 	struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred);
@@ -1503,7 +1503,7 @@ static int
 gss_unwrap_resp(struct rpc_task *task,
 		kxdrproc_t decode, void *rqstp, __be32 *p, void *obj)
 {
-	struct rpc_cred *cred = task->tk_msg.rpc_cred;
+	struct rpc_cred *cred = task->tk_rqstp->rq_cred;
 	struct gss_cred *gss_cred = container_of(cred, struct gss_cred,
 			gc_base);
 	struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred);
diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c
index 1db618f56ec..a5c36c01707 100644
--- a/net/sunrpc/auth_null.c
+++ b/net/sunrpc/auth_null.c
@@ -75,7 +75,7 @@ nul_marshal(struct rpc_task *task, __be32 *p)
 static int
 nul_refresh(struct rpc_task *task)
 {
-	set_bit(RPCAUTH_CRED_UPTODATE, &task->tk_msg.rpc_cred->cr_flags);
+	set_bit(RPCAUTH_CRED_UPTODATE, &task->tk_rqstp->rq_cred->cr_flags);
 	return 0;
 }
 
diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c
index d5e37dbf207..4cb70dc6e7a 100644
--- a/net/sunrpc/auth_unix.c
+++ b/net/sunrpc/auth_unix.c
@@ -140,7 +140,7 @@ static __be32 *
 unx_marshal(struct rpc_task *task, __be32 *p)
 {
 	struct rpc_clnt	*clnt = task->tk_client;
-	struct unx_cred	*cred = container_of(task->tk_msg.rpc_cred, struct unx_cred, uc_base);
+	struct unx_cred	*cred = container_of(task->tk_rqstp->rq_cred, struct unx_cred, uc_base);
 	__be32		*base, *hold;
 	int		i;
 
@@ -173,7 +173,7 @@ unx_marshal(struct rpc_task *task, __be32 *p)
 static int
 unx_refresh(struct rpc_task *task)
 {
-	set_bit(RPCAUTH_CRED_UPTODATE, &task->tk_msg.rpc_cred->cr_flags);
+	set_bit(RPCAUTH_CRED_UPTODATE, &task->tk_rqstp->rq_cred->cr_flags);
 	return 0;
 }
 
@@ -196,7 +196,7 @@ unx_validate(struct rpc_task *task, __be32 *p)
 		printk("RPC: giant verf size: %u\n", size);
 		return NULL;
 	}
-	task->tk_msg.rpc_cred->cr_auth->au_rslack = (size >> 2) + 2;
+	task->tk_rqstp->rq_cred->cr_auth->au_rslack = (size >> 2) + 2;
 	p += (size >> 2);
 
 	return p;
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index f34b5e3823c..2388d83b68f 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -605,8 +605,8 @@ rpc_task_set_rpc_message(struct rpc_task *task, const struct rpc_message *msg)
 		task->tk_msg.rpc_proc = msg->rpc_proc;
 		task->tk_msg.rpc_argp = msg->rpc_argp;
 		task->tk_msg.rpc_resp = msg->rpc_resp;
-		/* Bind the user cred */
-		task->tk_status = rpcauth_bindcred(task, msg->rpc_cred, task->tk_flags);
+		if (msg->rpc_cred != NULL)
+			task->tk_msg.rpc_cred = get_rpccred(msg->rpc_cred);
 	}
 }
 
@@ -909,11 +909,6 @@ call_reserve(struct rpc_task *task)
 {
 	dprint_status(task);
 
-	if (!rpcauth_uptodatecred(task)) {
-		task->tk_action = call_refresh;
-		return;
-	}
-
 	task->tk_status  = 0;
 	task->tk_action  = call_reserveresult;
 	xprt_reserve(task);
@@ -977,7 +972,7 @@ call_reserveresult(struct rpc_task *task)
 static void
 call_allocate(struct rpc_task *task)
 {
-	unsigned int slack = task->tk_msg.rpc_cred->cr_auth->au_cslack;
+	unsigned int slack = task->tk_client->cl_auth->au_cslack;
 	struct rpc_rqst *req = task->tk_rqstp;
 	struct rpc_xprt *xprt = task->tk_xprt;
 	struct rpc_procinfo *proc = task->tk_msg.rpc_proc;
@@ -985,7 +980,7 @@ call_allocate(struct rpc_task *task)
 	dprint_status(task);
 
 	task->tk_status = 0;
-	task->tk_action = call_bind;
+	task->tk_action = call_refresh;
 
 	if (req->rq_buffer)
 		return;
@@ -1022,6 +1017,47 @@ call_allocate(struct rpc_task *task)
 	rpc_exit(task, -ERESTARTSYS);
 }
 
+/*
+ * 2a.	Bind and/or refresh the credentials
+ */
+static void
+call_refresh(struct rpc_task *task)
+{
+	dprint_status(task);
+
+	task->tk_action = call_refreshresult;
+	task->tk_status = 0;
+	task->tk_client->cl_stats->rpcauthrefresh++;
+	rpcauth_refreshcred(task);
+}
+
+/*
+ * 2b.	Process the results of a credential refresh
+ */
+static void
+call_refreshresult(struct rpc_task *task)
+{
+	int status = task->tk_status;
+
+	dprint_status(task);
+
+	task->tk_status = 0;
+	task->tk_action = call_bind;
+	if (status >= 0 && rpcauth_uptodatecred(task))
+		return;
+	switch (status) {
+	case -EACCES:
+		rpc_exit(task, -EACCES);
+		return;
+	case -ENOMEM:
+		rpc_exit(task, -ENOMEM);
+		return;
+	case -ETIMEDOUT:
+		rpc_delay(task, 3*HZ);
+	}
+	task->tk_action = call_refresh;
+}
+
 static inline int
 rpc_task_need_encode(struct rpc_task *task)
 {
@@ -1557,43 +1593,6 @@ out_retry:
 	}
 }
 
-/*
- * 8.	Refresh the credentials if rejected by the server
- */
-static void
-call_refresh(struct rpc_task *task)
-{
-	dprint_status(task);
-
-	task->tk_action = call_refreshresult;
-	task->tk_status = 0;
-	task->tk_client->cl_stats->rpcauthrefresh++;
-	rpcauth_refreshcred(task);
-}
-
-/*
- * 8a.	Process the results of a credential refresh
- */
-static void
-call_refreshresult(struct rpc_task *task)
-{
-	int status = task->tk_status;
-
-	dprint_status(task);
-
-	task->tk_status = 0;
-	task->tk_action = call_reserve;
-	if (status >= 0 && rpcauth_uptodatecred(task))
-		return;
-	if (status == -EACCES) {
-		rpc_exit(task, -EACCES);
-		return;
-	}
-	task->tk_action = call_refresh;
-	if (status != -ETIMEDOUT)
-		rpc_delay(task, 3*HZ);
-}
-
 static __be32 *
 rpc_encode_header(struct rpc_task *task)
 {
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index a42296db2ec..f6db6131fb2 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -864,7 +864,7 @@ void rpc_put_task(struct rpc_task *task)
 	if (task->tk_rqstp)
 		xprt_release(task);
 	if (task->tk_msg.rpc_cred)
-		rpcauth_unbindcred(task);
+		put_rpccred(task->tk_msg.rpc_cred);
 	rpc_task_release_client(task);
 	if (task->tk_workqueue != NULL) {
 		INIT_WORK(&task->u.tk_work, rpc_async_release);
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index dcd0132396b..70297836a19 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -1032,6 +1032,8 @@ void xprt_release(struct rpc_task *task)
 	spin_unlock_bh(&xprt->transport_lock);
 	if (req->rq_buffer)
 		xprt->ops->buf_free(req->rq_buffer);
+	if (req->rq_cred != NULL)
+		put_rpccred(req->rq_cred);
 	task->tk_rqstp = NULL;
 	if (req->rq_release_snd_buf)
 		req->rq_release_snd_buf(req);
-- 
cgit v1.2.3-70-g09d2


From a2ebf07ae53e65bd073f96877e4818f2e89271ae Mon Sep 17 00:00:00 2001
From: Aleksey Senin <alex@senin.name>
Date: Sun, 4 Jul 2010 13:55:57 +0000
Subject: IB: Rename RAW_ETY to RAW_ETHERTYPE

Change abbreviated IB_QPT_RAW_ETY to IB_QPT_RAW_ETHERTYPE to make
the special QP type easier to understand.

cf http://www.mail-archive.com/linux-rdma@vger.kernel.org/msg04530.html

Signed-off-by: Aleksey Senin <alekseys@voltaire.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/core/verbs.c         | 4 ++--
 drivers/infiniband/hw/ehca/ehca_qp.c    | 2 +-
 drivers/infiniband/hw/mthca/mthca_cmd.c | 2 +-
 include/rdma/ib_verbs.h                 | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index a7da9be43e6..e0fa2223871 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -310,8 +310,8 @@ EXPORT_SYMBOL(ib_create_qp);
 
 static const struct {
 	int			valid;
-	enum ib_qp_attr_mask	req_param[IB_QPT_RAW_ETY + 1];
-	enum ib_qp_attr_mask	opt_param[IB_QPT_RAW_ETY + 1];
+	enum ib_qp_attr_mask	req_param[IB_QPT_RAW_ETHERTYPE + 1];
+	enum ib_qp_attr_mask	opt_param[IB_QPT_RAW_ETHERTYPE + 1];
 } qp_state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = {
 	[IB_QPS_RESET] = {
 		[IB_QPS_RESET] = { .valid = 1 },
diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c
index 47d388ec1cd..32fb34201ab 100644
--- a/drivers/infiniband/hw/ehca/ehca_qp.c
+++ b/drivers/infiniband/hw/ehca/ehca_qp.c
@@ -251,7 +251,7 @@ static inline int ibqptype2servicetype(enum ib_qp_type ibqptype)
 		return ST_UD;
 	case IB_QPT_RAW_IPV6:
 		return -EINVAL;
-	case IB_QPT_RAW_ETY:
+	case IB_QPT_RAW_ETHERTYPE:
 		return -EINVAL;
 	default:
 		ehca_gen_err("Invalid ibqptype=%x", ibqptype);
diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c
index 3603ae89b60..f4ceecd9684 100644
--- a/drivers/infiniband/hw/mthca/mthca_cmd.c
+++ b/drivers/infiniband/hw/mthca/mthca_cmd.c
@@ -1817,7 +1817,7 @@ int mthca_CONF_SPECIAL_QP(struct mthca_dev *dev, int type, u32 qpn,
 	case IB_QPT_RAW_IPV6:
 		op_mod = 2;
 		break;
-	case IB_QPT_RAW_ETY:
+	case IB_QPT_RAW_ETHERTYPE:
 		op_mod = 3;
 		break;
 	default:
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index f3e8f3c0772..857b3b9cf12 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -555,7 +555,7 @@ enum ib_qp_type {
 	IB_QPT_UC,
 	IB_QPT_UD,
 	IB_QPT_RAW_IPV6,
-	IB_QPT_RAW_ETY
+	IB_QPT_RAW_ETHERTYPE
 };
 
 enum ib_qp_create_flags {
-- 
cgit v1.2.3-70-g09d2


From e3b5e0d552b34d65e15b20610273b200555eea53 Mon Sep 17 00:00:00 2001
From: Ilya Yanok <yanok@emcraft.com>
Date: Thu, 8 Jul 2010 10:10:38 +0000
Subject: powerpc/fsl_pci: add quirk for mpc8308 pcie bridge

This patch adds the quirk for PCIE controller found on Freescale MPC8308.
The quirk is the same as for other MPC83xx processors.

Signed-off-by: Ilya Yanok <yanok@emcraft.com>
Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
---
 arch/powerpc/sysdev/fsl_pci.c | 1 +
 include/linux/pci_ids.h       | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include')

diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c
index a14760fe513..7e900ec988f 100644
--- a/arch/powerpc/sysdev/fsl_pci.c
+++ b/arch/powerpc/sysdev/fsl_pci.c
@@ -412,6 +412,7 @@ DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P4080, quirk_fsl_pcie_header);
 #endif /* CONFIG_FSL_SOC_BOOKE || CONFIG_PPC_86xx */
 
 #if defined(CONFIG_PPC_83xx) || defined(CONFIG_PPC_MPC512x)
+DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_MPC8308, quirk_fsl_pcie_header);
 DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_MPC8314E, quirk_fsl_pcie_header);
 DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_MPC8314, quirk_fsl_pcie_header);
 DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_MPC8315E, quirk_fsl_pcie_header);
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 3bedcc149c8..79bb11f35c4 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2264,6 +2264,7 @@
 #define PCI_DEVICE_ID_TDI_EHCI          0x0101
 
 #define PCI_VENDOR_ID_FREESCALE		0x1957
+#define PCI_DEVICE_ID_MPC8308		0xc006
 #define PCI_DEVICE_ID_MPC8315E		0x00b4
 #define PCI_DEVICE_ID_MPC8315		0x00b5
 #define PCI_DEVICE_ID_MPC8314E		0x00b6
-- 
cgit v1.2.3-70-g09d2


From 1297c05a8dfb568c689f057d51a65eebe5ddc86f Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexdeucher@gmail.com>
Date: Wed, 4 Aug 2010 11:40:00 -0400
Subject: drm/radeon: add new pci ids

New evergreen and r7xx ids.

Signed-off-by: Alex Deucher <alexdeucher@gmail.com>
Cc: stable@kernel.org
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 include/drm/drm_pciids.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/drm/drm_pciids.h b/include/drm/drm_pciids.h
index 2d428b088cc..3a9940ef728 100644
--- a/include/drm/drm_pciids.h
+++ b/include/drm/drm_pciids.h
@@ -146,6 +146,8 @@
 	{0x1002, 0x6888, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYPRESS|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x6889, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYPRESS|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x688A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYPRESS|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x688C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYPRESS|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x688D, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYPRESS|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x6898, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYPRESS|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x6899, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYPRESS|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x689c, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_HEMLOCK|RADEON_NEW_MEMMAP}, \
@@ -161,6 +163,7 @@
 	{0x1002, 0x68be, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_JUNIPER|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x68c0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_REDWOOD|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x68c1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_REDWOOD|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x68c7, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_REDWOOD|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x68c8, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_REDWOOD|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x68c9, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_REDWOOD|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x68d8, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_REDWOOD|RADEON_NEW_MEMMAP}, \
@@ -174,6 +177,7 @@
 	{0x1002, 0x68e8, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CEDAR|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x68e9, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CEDAR|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x68f1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CEDAR|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x68f2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CEDAR|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x68f8, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CEDAR|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x68f9, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CEDAR|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x68fe, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CEDAR|RADEON_NEW_MEMMAP}, \
@@ -314,6 +318,7 @@
 	{0x1002, 0x9456, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV770|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x945A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV770|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x945B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV770|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x945E, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV770|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x9460, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV770|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x9462, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV770|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x946A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV770|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
@@ -324,6 +329,7 @@
 	{0x1002, 0x9487, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV730|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x9488, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV730|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x9489, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV730|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x948A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV730|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x948F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV730|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x9490, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV730|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x9491, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV730|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
@@ -366,6 +372,7 @@
 	{0x1002, 0x9553, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV710|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x9555, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV710|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x9557, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV710|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x955f, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV710|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x9580, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV630|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x9581, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV630|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x9583, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV630|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
-- 
cgit v1.2.3-70-g09d2


From fca3ec01e0b40cab82cac7745e154b01969e6219 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Wed, 4 Aug 2010 14:34:24 +0100
Subject: drm,io-mapping: Specify slot to use for atomic mappings

This is required should we ever attempt to use an io-mapping where
KM_USER0 is verboten, such as inside an IRQ context.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Eric Anholt <eric@anholt.net>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/i915/i915_gem.c        |  9 +++++----
 drivers/gpu/drm/i915/intel_overlay.c   |  5 +++--
 drivers/gpu/drm/nouveau/nouveau_bios.c |  8 ++++----
 include/linux/io-mapping.h             | 16 ++++++++++------
 4 files changed, 22 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 4efd4fd3b34..2a4ed7ca8b4 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -496,10 +496,10 @@ fast_user_write(struct io_mapping *mapping,
 	char *vaddr_atomic;
 	unsigned long unwritten;
 
-	vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base);
+	vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base, KM_USER0);
 	unwritten = __copy_from_user_inatomic_nocache(vaddr_atomic + page_offset,
 						      user_data, length);
-	io_mapping_unmap_atomic(vaddr_atomic);
+	io_mapping_unmap_atomic(vaddr_atomic, KM_USER0);
 	if (unwritten)
 		return -EFAULT;
 	return 0;
@@ -3487,7 +3487,8 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj,
 		reloc_offset = obj_priv->gtt_offset + reloc->offset;
 		reloc_page = io_mapping_map_atomic_wc(dev_priv->mm.gtt_mapping,
 						      (reloc_offset &
-						       ~(PAGE_SIZE - 1)));
+						       ~(PAGE_SIZE - 1)),
+						      KM_USER0);
 		reloc_entry = (uint32_t __iomem *)(reloc_page +
 						   (reloc_offset & (PAGE_SIZE - 1)));
 		reloc_val = target_obj_priv->gtt_offset + reloc->delta;
@@ -3498,7 +3499,7 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj,
 			  readl(reloc_entry), reloc_val);
 #endif
 		writel(reloc_val, reloc_entry);
-		io_mapping_unmap_atomic(reloc_page);
+		io_mapping_unmap_atomic(reloc_page, KM_USER0);
 
 		/* The updated presumed offset for this entry will be
 		 * copied back out to the user.
diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
index f26ec2f27d3..d39aea24eab 100644
--- a/drivers/gpu/drm/i915/intel_overlay.c
+++ b/drivers/gpu/drm/i915/intel_overlay.c
@@ -185,7 +185,8 @@ static struct overlay_registers *intel_overlay_map_regs_atomic(struct intel_over
 
 	if (OVERLAY_NONPHYSICAL(overlay->dev)) {
 		regs = io_mapping_map_atomic_wc(dev_priv->mm.gtt_mapping,
-				overlay->reg_bo->gtt_offset);
+						overlay->reg_bo->gtt_offset,
+						KM_USER0);
 
 		if (!regs) {
 			DRM_ERROR("failed to map overlay regs in GTT\n");
@@ -200,7 +201,7 @@ static struct overlay_registers *intel_overlay_map_regs_atomic(struct intel_over
 static void intel_overlay_unmap_regs_atomic(struct intel_overlay *overlay)
 {
 	if (OVERLAY_NONPHYSICAL(overlay->dev))
-		io_mapping_unmap_atomic(overlay->virt_addr);
+		io_mapping_unmap_atomic(overlay->virt_addr, KM_USER0);
 
 	overlay->virt_addr = NULL;
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_bios.c b/drivers/gpu/drm/nouveau/nouveau_bios.c
index b59f348f14f..7369b5e7364 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bios.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bios.c
@@ -2083,11 +2083,11 @@ peek_fb(struct drm_device *dev, struct io_mapping *fb,
 	uint32_t val = 0;
 
 	if (off < pci_resource_len(dev->pdev, 1)) {
-		uint32_t __iomem *p = io_mapping_map_atomic_wc(fb, off);
+		uint32_t __iomem *p = io_mapping_map_atomic_wc(fb, off, KM_USER0);
 
 		val = ioread32(p);
 
-		io_mapping_unmap_atomic(p);
+		io_mapping_unmap_atomic(p, KM_USER0);
 	}
 
 	return val;
@@ -2098,12 +2098,12 @@ poke_fb(struct drm_device *dev, struct io_mapping *fb,
 	uint32_t off, uint32_t val)
 {
 	if (off < pci_resource_len(dev->pdev, 1)) {
-		uint32_t __iomem *p = io_mapping_map_atomic_wc(fb, off);
+		uint32_t __iomem *p = io_mapping_map_atomic_wc(fb, off, KM_USER0);
 
 		iowrite32(val, p);
 		wmb();
 
-		io_mapping_unmap_atomic(p);
+		io_mapping_unmap_atomic(p, KM_USER0);
 	}
 }
 
diff --git a/include/linux/io-mapping.h b/include/linux/io-mapping.h
index 25085ddd955..e0ea40f6c51 100644
--- a/include/linux/io-mapping.h
+++ b/include/linux/io-mapping.h
@@ -79,7 +79,9 @@ io_mapping_free(struct io_mapping *mapping)
 
 /* Atomic map/unmap */
 static inline void *
-io_mapping_map_atomic_wc(struct io_mapping *mapping, unsigned long offset)
+io_mapping_map_atomic_wc(struct io_mapping *mapping,
+			 unsigned long offset,
+			 int slot)
 {
 	resource_size_t phys_addr;
 	unsigned long pfn;
@@ -87,13 +89,13 @@ io_mapping_map_atomic_wc(struct io_mapping *mapping, unsigned long offset)
 	BUG_ON(offset >= mapping->size);
 	phys_addr = mapping->base + offset;
 	pfn = (unsigned long) (phys_addr >> PAGE_SHIFT);
-	return iomap_atomic_prot_pfn(pfn, KM_USER0, mapping->prot);
+	return iomap_atomic_prot_pfn(pfn, slot, mapping->prot);
 }
 
 static inline void
-io_mapping_unmap_atomic(void *vaddr)
+io_mapping_unmap_atomic(void *vaddr, int slot)
 {
-	iounmap_atomic(vaddr, KM_USER0);
+	iounmap_atomic(vaddr, slot);
 }
 
 static inline void *
@@ -133,13 +135,15 @@ io_mapping_free(struct io_mapping *mapping)
 
 /* Atomic map/unmap */
 static inline void *
-io_mapping_map_atomic_wc(struct io_mapping *mapping, unsigned long offset)
+io_mapping_map_atomic_wc(struct io_mapping *mapping,
+			 unsigned long offset,
+			 int slot)
 {
 	return ((char *) mapping) + offset;
 }
 
 static inline void
-io_mapping_unmap_atomic(void *vaddr)
+io_mapping_unmap_atomic(void *vaddr, int slot)
 {
 }
 
-- 
cgit v1.2.3-70-g09d2


From 58374713c9dfb4d231f8c56cac089f6fbdedc2ec Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Sat, 10 Jul 2010 23:51:39 +0200
Subject: drm: kill BKL from common code

This restricts the use of the big kernel lock to the i830 and i810
device drivers. The three remaining users in common code (open, ioctl
and release) get converted to a new mutex, the drm_global_mutex,
making the locking stricter than the big kernel lock.

This may have a performance impact, but only in those cases that
currently don't use DRM_UNLOCKED flag in the ioctl list and would
benefit from that anyway.

The reason why i810 and i830 cannot use drm_global_mutex in their
mmap functions is a lock-order inversion problem between the current
use of the BKL and mmap_sem in these drivers. Since the BKL has
release-on-sleep semantics, it's harmless but it would cause trouble
if we replace the BKL with a mutex.

Instead, these drivers get their own ioctl wrappers that take the
BKL around every ioctl call and then set their own handlers as
DRM_UNLOCKED.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: David Airlie <airlied@linux.ie>
Cc: dri-devel@lists.freedesktop.org
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_drv.c       |  4 ++--
 drivers/gpu/drm/drm_fops.c      | 23 +++++++++++----------
 drivers/gpu/drm/i810/i810_dma.c | 44 +++++++++++++++++++++++++++--------------
 drivers/gpu/drm/i810/i810_drv.c |  2 +-
 drivers/gpu/drm/i810/i810_drv.h |  1 +
 drivers/gpu/drm/i830/i830_dma.c | 42 ++++++++++++++++++++++++++-------------
 drivers/gpu/drm/i830/i830_drv.c |  2 +-
 drivers/gpu/drm/i830/i830_drv.h |  1 +
 include/drm/drmP.h              |  2 +-
 9 files changed, 75 insertions(+), 46 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
index d5b349d279f..90288ec7c28 100644
--- a/drivers/gpu/drm/drm_drv.c
+++ b/drivers/gpu/drm/drm_drv.c
@@ -481,9 +481,9 @@ long drm_ioctl(struct file *filp,
 		if (ioctl->flags & DRM_UNLOCKED)
 			retcode = func(dev, kdata, file_priv);
 		else {
-			lock_kernel();
+			mutex_lock(&drm_global_mutex);
 			retcode = func(dev, kdata, file_priv);
-			unlock_kernel();
+			mutex_unlock(&drm_global_mutex);
 		}
 
 		if (cmd & IOC_OUT) {
diff --git a/drivers/gpu/drm/drm_fops.c b/drivers/gpu/drm/drm_fops.c
index e7aace20981..2ca8df8b610 100644
--- a/drivers/gpu/drm/drm_fops.c
+++ b/drivers/gpu/drm/drm_fops.c
@@ -39,6 +39,9 @@
 #include <linux/slab.h>
 #include <linux/smp_lock.h>
 
+/* from BKL pushdown: note that nothing else serializes idr_find() */
+DEFINE_MUTEX(drm_global_mutex);
+
 static int drm_open_helper(struct inode *inode, struct file *filp,
 			   struct drm_device * dev);
 
@@ -175,8 +178,7 @@ int drm_stub_open(struct inode *inode, struct file *filp)
 
 	DRM_DEBUG("\n");
 
-	/* BKL pushdown: note that nothing else serializes idr_find() */
-	lock_kernel();
+	mutex_lock(&drm_global_mutex);
 	minor = idr_find(&drm_minors_idr, minor_id);
 	if (!minor)
 		goto out;
@@ -197,7 +199,7 @@ int drm_stub_open(struct inode *inode, struct file *filp)
 	fops_put(old_fops);
 
 out:
-	unlock_kernel();
+	mutex_unlock(&drm_global_mutex);
 	return err;
 }
 
@@ -472,7 +474,7 @@ int drm_release(struct inode *inode, struct file *filp)
 	struct drm_device *dev = file_priv->minor->dev;
 	int retcode = 0;
 
-	lock_kernel();
+	mutex_lock(&drm_global_mutex);
 
 	DRM_DEBUG("open_count = %d\n", dev->open_count);
 
@@ -573,17 +575,14 @@ int drm_release(struct inode *inode, struct file *filp)
 		if (atomic_read(&dev->ioctl_count)) {
 			DRM_ERROR("Device busy: %d\n",
 				  atomic_read(&dev->ioctl_count));
-			spin_unlock(&dev->count_lock);
-			unlock_kernel();
-			return -EBUSY;
+			retcode = -EBUSY;
+			goto out;
 		}
-		spin_unlock(&dev->count_lock);
-		unlock_kernel();
-		return drm_lastclose(dev);
+		retcode = drm_lastclose(dev);
 	}
+out:
 	spin_unlock(&dev->count_lock);
-
-	unlock_kernel();
+	mutex_unlock(&drm_global_mutex);
 
 	return retcode;
 }
diff --git a/drivers/gpu/drm/i810/i810_dma.c b/drivers/gpu/drm/i810/i810_dma.c
index 09c86ed8992..0e6c131313d 100644
--- a/drivers/gpu/drm/i810/i810_dma.c
+++ b/drivers/gpu/drm/i810/i810_dma.c
@@ -37,6 +37,7 @@
 #include <linux/interrupt.h>	/* For task queue support */
 #include <linux/delay.h>
 #include <linux/slab.h>
+#include <linux/smp_lock.h>
 #include <linux/pagemap.h>
 
 #define I810_BUF_FREE		2
@@ -1240,22 +1241,35 @@ int i810_driver_dma_quiescent(struct drm_device *dev)
 	return 0;
 }
 
+/*
+ * call the drm_ioctl under the big kernel lock because
+ * to lock against the i810_mmap_buffers function.
+ */
+long i810_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	int ret;
+	lock_kernel();
+	ret = drm_ioctl(file, cmd, arg);
+	unlock_kernel();
+	return ret;
+}
+
 struct drm_ioctl_desc i810_ioctls[] = {
-	DRM_IOCTL_DEF(DRM_I810_INIT, i810_dma_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-	DRM_IOCTL_DEF(DRM_I810_VERTEX, i810_dma_vertex, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_I810_CLEAR, i810_clear_bufs, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_I810_FLUSH, i810_flush_ioctl, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_I810_GETAGE, i810_getage, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_I810_GETBUF, i810_getbuf, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_I810_SWAP, i810_swap_bufs, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_I810_COPY, i810_copybuf, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_I810_DOCOPY, i810_docopy, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_I810_OV0INFO, i810_ov0_info, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_I810_FSTATUS, i810_fstatus, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_I810_OV0FLIP, i810_ov0_flip, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_I810_MC, i810_dma_mc, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-	DRM_IOCTL_DEF(DRM_I810_RSTATUS, i810_rstatus, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_I810_FLIP, i810_flip_bufs, DRM_AUTH)
+	DRM_IOCTL_DEF(DRM_I810_INIT, i810_dma_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_I810_VERTEX, i810_dma_vertex, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_I810_CLEAR, i810_clear_bufs, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_I810_FLUSH, i810_flush_ioctl, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_I810_GETAGE, i810_getage, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_I810_GETBUF, i810_getbuf, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_I810_SWAP, i810_swap_bufs, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_I810_COPY, i810_copybuf, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_I810_DOCOPY, i810_docopy, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_I810_OV0INFO, i810_ov0_info, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_I810_FSTATUS, i810_fstatus, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_I810_OV0FLIP, i810_ov0_flip, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_I810_MC, i810_dma_mc, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_I810_RSTATUS, i810_rstatus, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_I810_FLIP, i810_flip_bufs, DRM_AUTH|DRM_UNLOCKED),
 };
 
 int i810_max_ioctl = DRM_ARRAY_SIZE(i810_ioctls);
diff --git a/drivers/gpu/drm/i810/i810_drv.c b/drivers/gpu/drm/i810/i810_drv.c
index c1e02752e02..b4250b2cac1 100644
--- a/drivers/gpu/drm/i810/i810_drv.c
+++ b/drivers/gpu/drm/i810/i810_drv.c
@@ -59,7 +59,7 @@ static struct drm_driver driver = {
 		 .owner = THIS_MODULE,
 		 .open = drm_open,
 		 .release = drm_release,
-		 .unlocked_ioctl = drm_ioctl,
+		 .unlocked_ioctl = i810_ioctl,
 		 .mmap = drm_mmap,
 		 .poll = drm_poll,
 		 .fasync = drm_fasync,
diff --git a/drivers/gpu/drm/i810/i810_drv.h b/drivers/gpu/drm/i810/i810_drv.h
index 0743fe90f1e..c9339f48179 100644
--- a/drivers/gpu/drm/i810/i810_drv.h
+++ b/drivers/gpu/drm/i810/i810_drv.h
@@ -126,6 +126,7 @@ extern void i810_driver_reclaim_buffers_locked(struct drm_device *dev,
 					       struct drm_file *file_priv);
 extern int i810_driver_device_is_agp(struct drm_device *dev);
 
+extern long i810_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
 extern struct drm_ioctl_desc i810_ioctls[];
 extern int i810_max_ioctl;
 
diff --git a/drivers/gpu/drm/i830/i830_dma.c b/drivers/gpu/drm/i830/i830_dma.c
index 7ee85ea507c..5168862c922 100644
--- a/drivers/gpu/drm/i830/i830_dma.c
+++ b/drivers/gpu/drm/i830/i830_dma.c
@@ -36,6 +36,7 @@
 #include "i830_drm.h"
 #include "i830_drv.h"
 #include <linux/interrupt.h>	/* For task queue support */
+#include <linux/smp_lock.h>
 #include <linux/pagemap.h>
 #include <linux/delay.h>
 #include <linux/slab.h>
@@ -1509,21 +1510,34 @@ int i830_driver_dma_quiescent(struct drm_device *dev)
 	return 0;
 }
 
+/*
+ * call the drm_ioctl under the big kernel lock because
+ * to lock against the i830_mmap_buffers function.
+ */
+long i830_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	int ret;
+	lock_kernel();
+	ret = drm_ioctl(file, cmd, arg);
+	unlock_kernel();
+	return ret;
+}
+
 struct drm_ioctl_desc i830_ioctls[] = {
-	DRM_IOCTL_DEF(DRM_I830_INIT, i830_dma_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-	DRM_IOCTL_DEF(DRM_I830_VERTEX, i830_dma_vertex, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_I830_CLEAR, i830_clear_bufs, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_I830_FLUSH, i830_flush_ioctl, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_I830_GETAGE, i830_getage, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_I830_GETBUF, i830_getbuf, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_I830_SWAP, i830_swap_bufs, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_I830_COPY, i830_copybuf, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_I830_DOCOPY, i830_docopy, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_I830_FLIP, i830_flip_bufs, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_I830_IRQ_EMIT, i830_irq_emit, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_I830_IRQ_WAIT, i830_irq_wait, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_I830_GETPARAM, i830_getparam, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_I830_SETPARAM, i830_setparam, DRM_AUTH)
+	DRM_IOCTL_DEF(DRM_I830_INIT, i830_dma_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_I830_VERTEX, i830_dma_vertex, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_I830_CLEAR, i830_clear_bufs, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_I830_FLUSH, i830_flush_ioctl, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_I830_GETAGE, i830_getage, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_I830_GETBUF, i830_getbuf, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_I830_SWAP, i830_swap_bufs, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_I830_COPY, i830_copybuf, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_I830_DOCOPY, i830_docopy, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_I830_FLIP, i830_flip_bufs, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_I830_IRQ_EMIT, i830_irq_emit, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_I830_IRQ_WAIT, i830_irq_wait, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_I830_GETPARAM, i830_getparam, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_I830_SETPARAM, i830_setparam, DRM_AUTH|DRM_UNLOCKED),
 };
 
 int i830_max_ioctl = DRM_ARRAY_SIZE(i830_ioctls);
diff --git a/drivers/gpu/drm/i830/i830_drv.c b/drivers/gpu/drm/i830/i830_drv.c
index 44f990bed8f..a5c66aa82f0 100644
--- a/drivers/gpu/drm/i830/i830_drv.c
+++ b/drivers/gpu/drm/i830/i830_drv.c
@@ -70,7 +70,7 @@ static struct drm_driver driver = {
 		 .owner = THIS_MODULE,
 		 .open = drm_open,
 		 .release = drm_release,
-		 .unlocked_ioctl = drm_ioctl,
+		 .unlocked_ioctl = i830_ioctl,
 		 .mmap = drm_mmap,
 		 .poll = drm_poll,
 		 .fasync = drm_fasync,
diff --git a/drivers/gpu/drm/i830/i830_drv.h b/drivers/gpu/drm/i830/i830_drv.h
index ecfd25a35da..0df1c720560 100644
--- a/drivers/gpu/drm/i830/i830_drv.h
+++ b/drivers/gpu/drm/i830/i830_drv.h
@@ -122,6 +122,7 @@ typedef struct drm_i830_private {
 
 } drm_i830_private_t;
 
+long i830_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
 extern struct drm_ioctl_desc i830_ioctls[];
 extern int i830_max_ioctl;
 
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index 53017ba0ab7..e2a4da7d7fa 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -52,7 +52,6 @@
 #include <linux/platform_device.h>
 #include <linux/pci.h>
 #include <linux/jiffies.h>
-#include <linux/smp_lock.h>	/* For (un)lock_kernel */
 #include <linux/dma-mapping.h>
 #include <linux/mm.h>
 #include <linux/cdev.h>
@@ -1152,6 +1151,7 @@ extern long drm_compat_ioctl(struct file *filp,
 extern int drm_lastclose(struct drm_device *dev);
 
 				/* Device support (drm_fops.h) */
+extern struct mutex drm_global_mutex;
 extern int drm_open(struct inode *inode, struct file *filp);
 extern int drm_stub_open(struct inode *inode, struct file *filp);
 extern int drm_fasync(int fd, struct file *filp, int on);
-- 
cgit v1.2.3-70-g09d2


From f6a21388bd255773cc80d4423afb4c69d4daa173 Mon Sep 17 00:00:00 2001
From: Lars-Peter Clausen <lars@metafoo.de>
Date: Sat, 19 Jun 2010 04:08:29 +0000
Subject: POWER: Add JZ4740 battery driver.

Add support for the battery voltage measurement part of the JZ4740 ADC unit.

Signed-off-by: Lars-Peter Clausen <lars@metafoo.de>
Acked-by: Anton Vorontsov <cbouatmailru@gmail.com>
Cc: linux-mips@linux-mips.org
Cc: linux-kernel@vger.kernel.org
Patchwork: https://patchwork.linux-mips.org/patch/1416/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 drivers/power/Kconfig                |  11 +
 drivers/power/Makefile               |   1 +
 drivers/power/jz4740-battery.c       | 445 +++++++++++++++++++++++++++++++++++
 include/linux/power/jz4740-battery.h |  24 ++
 4 files changed, 481 insertions(+)
 create mode 100644 drivers/power/jz4740-battery.c
 create mode 100644 include/linux/power/jz4740-battery.h

(limited to 'include')

diff --git a/drivers/power/Kconfig b/drivers/power/Kconfig
index 8e9ba177d81..1e5506be39b 100644
--- a/drivers/power/Kconfig
+++ b/drivers/power/Kconfig
@@ -142,4 +142,15 @@ config CHARGER_PCF50633
 	help
 	 Say Y to include support for NXP PCF50633 Main Battery Charger.
 
+config BATTERY_JZ4740
+	tristate "Ingenic JZ4740 battery"
+	depends on MACH_JZ4740
+	depends on MFD_JZ4740_ADC
+	help
+	  Say Y to enable support for the battery on Ingenic JZ4740 based
+	  boards.
+
+	  This driver can be build as a module. If so, the module will be
+	  called jz4740-battery.
+
 endif # POWER_SUPPLY
diff --git a/drivers/power/Makefile b/drivers/power/Makefile
index 00050809a6c..cf95009d9bc 100644
--- a/drivers/power/Makefile
+++ b/drivers/power/Makefile
@@ -34,3 +34,4 @@ obj-$(CONFIG_BATTERY_DA9030)	+= da9030_battery.o
 obj-$(CONFIG_BATTERY_MAX17040)	+= max17040_battery.o
 obj-$(CONFIG_BATTERY_Z2)	+= z2_battery.o
 obj-$(CONFIG_CHARGER_PCF50633)	+= pcf50633-charger.o
+obj-$(CONFIG_BATTERY_JZ4740)	+= jz4740-battery.o
diff --git a/drivers/power/jz4740-battery.c b/drivers/power/jz4740-battery.c
new file mode 100644
index 00000000000..20c4b952e9b
--- /dev/null
+++ b/drivers/power/jz4740-battery.c
@@ -0,0 +1,445 @@
+/*
+ * Battery measurement code for Ingenic JZ SOC.
+ *
+ * Copyright (C) 2009 Jiejing Zhang <kzjeef@gmail.com>
+ * Copyright (C) 2010, Lars-Peter Clausen <lars@metafoo.de>
+ *
+ * based on tosa_battery.c
+ *
+ * Copyright (C) 2008 Marek Vasut <marek.vasut@gmail.com>
+*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+#include <linux/delay.h>
+#include <linux/gpio.h>
+#include <linux/mfd/core.h>
+#include <linux/power_supply.h>
+
+#include <linux/power/jz4740-battery.h>
+#include <linux/jz4740-adc.h>
+
+struct jz_battery {
+	struct jz_battery_platform_data *pdata;
+	struct platform_device *pdev;
+
+	struct resource *mem;
+	void __iomem *base;
+
+	int irq;
+	int charge_irq;
+
+	struct mfd_cell *cell;
+
+	int status;
+	long voltage;
+
+	struct completion read_completion;
+
+	struct power_supply battery;
+	struct delayed_work work;
+};
+
+static inline struct jz_battery *psy_to_jz_battery(struct power_supply *psy)
+{
+	return container_of(psy, struct jz_battery, battery);
+}
+
+static irqreturn_t jz_battery_irq_handler(int irq, void *devid)
+{
+	struct jz_battery *battery = devid;
+
+	complete(&battery->read_completion);
+	return IRQ_HANDLED;
+}
+
+static long jz_battery_read_voltage(struct jz_battery *battery)
+{
+	unsigned long t;
+	unsigned long val;
+	long voltage;
+
+	INIT_COMPLETION(battery->read_completion);
+
+	enable_irq(battery->irq);
+	battery->cell->enable(battery->pdev);
+
+	t = wait_for_completion_interruptible_timeout(&battery->read_completion,
+		HZ);
+
+	if (t > 0) {
+		val = readw(battery->base) & 0xfff;
+
+		if (battery->pdata->info.voltage_max_design <= 2500000)
+			val = (val * 78125UL) >> 7UL;
+		else
+			val = ((val * 924375UL) >> 9UL) + 33000;
+		voltage = (long)val;
+	} else {
+		voltage = t ? t : -ETIMEDOUT;
+	}
+
+	battery->cell->disable(battery->pdev);
+	disable_irq(battery->irq);
+
+	return voltage;
+}
+
+static int jz_battery_get_capacity(struct power_supply *psy)
+{
+	struct jz_battery *jz_battery = psy_to_jz_battery(psy);
+	struct power_supply_info *info = &jz_battery->pdata->info;
+	long voltage;
+	int ret;
+	int voltage_span;
+
+	voltage = jz_battery_read_voltage(jz_battery);
+
+	if (voltage < 0)
+		return voltage;
+
+	voltage_span = info->voltage_max_design - info->voltage_min_design;
+	ret = ((voltage - info->voltage_min_design) * 100) / voltage_span;
+
+	if (ret > 100)
+		ret = 100;
+	else if (ret < 0)
+		ret = 0;
+
+	return ret;
+}
+
+static int jz_battery_get_property(struct power_supply *psy,
+	enum power_supply_property psp, union power_supply_propval *val)
+{
+	struct jz_battery *jz_battery = psy_to_jz_battery(psy);
+	struct power_supply_info *info = &jz_battery->pdata->info;
+	long voltage;
+
+	switch (psp) {
+	case POWER_SUPPLY_PROP_STATUS:
+		val->intval = jz_battery->status;
+		break;
+	case POWER_SUPPLY_PROP_TECHNOLOGY:
+		val->intval = jz_battery->pdata->info.technology;
+		break;
+	case POWER_SUPPLY_PROP_HEALTH:
+		voltage = jz_battery_read_voltage(jz_battery);
+		if (voltage < info->voltage_min_design)
+			val->intval = POWER_SUPPLY_HEALTH_DEAD;
+		else
+			val->intval = POWER_SUPPLY_HEALTH_GOOD;
+		break;
+	case POWER_SUPPLY_PROP_CAPACITY:
+		val->intval = jz_battery_get_capacity(psy);
+		break;
+	case POWER_SUPPLY_PROP_VOLTAGE_NOW:
+		val->intval = jz_battery_read_voltage(jz_battery);
+		if (val->intval < 0)
+			return val->intval;
+		break;
+	case POWER_SUPPLY_PROP_VOLTAGE_MAX_DESIGN:
+		val->intval = info->voltage_max_design;
+		break;
+	case POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN:
+		val->intval = info->voltage_min_design;
+		break;
+	case POWER_SUPPLY_PROP_PRESENT:
+		val->intval = 1;
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static void jz_battery_external_power_changed(struct power_supply *psy)
+{
+	struct jz_battery *jz_battery = psy_to_jz_battery(psy);
+
+	cancel_delayed_work(&jz_battery->work);
+	schedule_delayed_work(&jz_battery->work, 0);
+}
+
+static irqreturn_t jz_battery_charge_irq(int irq, void *data)
+{
+	struct jz_battery *jz_battery = data;
+
+	cancel_delayed_work(&jz_battery->work);
+	schedule_delayed_work(&jz_battery->work, 0);
+
+	return IRQ_HANDLED;
+}
+
+static void jz_battery_update(struct jz_battery *jz_battery)
+{
+	int status;
+	long voltage;
+	bool has_changed = false;
+	int is_charging;
+
+	if (gpio_is_valid(jz_battery->pdata->gpio_charge)) {
+		is_charging = gpio_get_value(jz_battery->pdata->gpio_charge);
+		is_charging ^= jz_battery->pdata->gpio_charge_active_low;
+		if (is_charging)
+			status = POWER_SUPPLY_STATUS_CHARGING;
+		else
+			status = POWER_SUPPLY_STATUS_NOT_CHARGING;
+
+		if (status != jz_battery->status) {
+			jz_battery->status = status;
+			has_changed = true;
+		}
+	}
+
+	voltage = jz_battery_read_voltage(jz_battery);
+	if (abs(voltage - jz_battery->voltage) < 50000) {
+		jz_battery->voltage = voltage;
+		has_changed = true;
+	}
+
+	if (has_changed)
+		power_supply_changed(&jz_battery->battery);
+}
+
+static enum power_supply_property jz_battery_properties[] = {
+	POWER_SUPPLY_PROP_STATUS,
+	POWER_SUPPLY_PROP_TECHNOLOGY,
+	POWER_SUPPLY_PROP_HEALTH,
+	POWER_SUPPLY_PROP_CAPACITY,
+	POWER_SUPPLY_PROP_VOLTAGE_NOW,
+	POWER_SUPPLY_PROP_VOLTAGE_MAX_DESIGN,
+	POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN,
+	POWER_SUPPLY_PROP_PRESENT,
+};
+
+static void jz_battery_work(struct work_struct *work)
+{
+	/* Too small interval will increase system workload */
+	const int interval = HZ * 30;
+	struct jz_battery *jz_battery = container_of(work, struct jz_battery,
+					    work.work);
+
+	jz_battery_update(jz_battery);
+	schedule_delayed_work(&jz_battery->work, interval);
+}
+
+static int __devinit jz_battery_probe(struct platform_device *pdev)
+{
+	int ret = 0;
+	struct jz_battery_platform_data *pdata = pdev->dev.parent->platform_data;
+	struct jz_battery *jz_battery;
+	struct power_supply *battery;
+
+	jz_battery = kzalloc(sizeof(*jz_battery), GFP_KERNEL);
+	if (!jz_battery) {
+		dev_err(&pdev->dev, "Failed to allocate driver structure\n");
+		return -ENOMEM;
+	}
+
+	jz_battery->cell = pdev->dev.platform_data;
+
+	jz_battery->irq = platform_get_irq(pdev, 0);
+	if (jz_battery->irq < 0) {
+		ret = jz_battery->irq;
+		dev_err(&pdev->dev, "Failed to get platform irq: %d\n", ret);
+		goto err_free;
+	}
+
+	jz_battery->mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!jz_battery->mem) {
+		ret = -ENOENT;
+		dev_err(&pdev->dev, "Failed to get platform mmio resource\n");
+		goto err_free;
+	}
+
+	jz_battery->mem = request_mem_region(jz_battery->mem->start,
+				resource_size(jz_battery->mem),	pdev->name);
+	if (!jz_battery->mem) {
+		ret = -EBUSY;
+		dev_err(&pdev->dev, "Failed to request mmio memory region\n");
+		goto err_free;
+	}
+
+	jz_battery->base = ioremap_nocache(jz_battery->mem->start,
+				resource_size(jz_battery->mem));
+	if (!jz_battery->base) {
+		ret = -EBUSY;
+		dev_err(&pdev->dev, "Failed to ioremap mmio memory\n");
+		goto err_release_mem_region;
+	}
+
+	battery = &jz_battery->battery;
+	battery->name = pdata->info.name;
+	battery->type = POWER_SUPPLY_TYPE_BATTERY;
+	battery->properties	= jz_battery_properties;
+	battery->num_properties	= ARRAY_SIZE(jz_battery_properties);
+	battery->get_property = jz_battery_get_property;
+	battery->external_power_changed = jz_battery_external_power_changed;
+	battery->use_for_apm = 1;
+
+	jz_battery->pdata = pdata;
+	jz_battery->pdev = pdev;
+
+	init_completion(&jz_battery->read_completion);
+
+	INIT_DELAYED_WORK(&jz_battery->work, jz_battery_work);
+
+	ret = request_irq(jz_battery->irq, jz_battery_irq_handler, 0, pdev->name,
+			jz_battery);
+	if (ret) {
+		dev_err(&pdev->dev, "Failed to request irq %d\n", ret);
+		goto err_iounmap;
+	}
+	disable_irq(jz_battery->irq);
+
+	if (gpio_is_valid(pdata->gpio_charge)) {
+		ret = gpio_request(pdata->gpio_charge, dev_name(&pdev->dev));
+		if (ret) {
+			dev_err(&pdev->dev, "charger state gpio request failed.\n");
+			goto err_free_irq;
+		}
+		ret = gpio_direction_input(pdata->gpio_charge);
+		if (ret) {
+			dev_err(&pdev->dev, "charger state gpio set direction failed.\n");
+			goto err_free_gpio;
+		}
+
+		jz_battery->charge_irq = gpio_to_irq(pdata->gpio_charge);
+
+		if (jz_battery->charge_irq >= 0) {
+			ret = request_irq(jz_battery->charge_irq,
+				    jz_battery_charge_irq,
+				    IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING,
+				    dev_name(&pdev->dev), jz_battery);
+			if (ret) {
+				dev_err(&pdev->dev, "Failed to request charge irq: %d\n", ret);
+				goto err_free_gpio;
+			}
+		}
+	} else {
+		jz_battery->charge_irq = -1;
+	}
+
+	if (jz_battery->pdata->info.voltage_max_design <= 2500000)
+		jz4740_adc_set_config(pdev->dev.parent, JZ_ADC_CONFIG_BAT_MB,
+			JZ_ADC_CONFIG_BAT_MB);
+	else
+		jz4740_adc_set_config(pdev->dev.parent, JZ_ADC_CONFIG_BAT_MB, 0);
+
+	ret = power_supply_register(&pdev->dev, &jz_battery->battery);
+	if (ret) {
+		dev_err(&pdev->dev, "power supply battery register failed.\n");
+		goto err_free_charge_irq;
+	}
+
+	platform_set_drvdata(pdev, jz_battery);
+	schedule_delayed_work(&jz_battery->work, 0);
+
+	return 0;
+
+err_free_charge_irq:
+	if (jz_battery->charge_irq >= 0)
+		free_irq(jz_battery->charge_irq, jz_battery);
+err_free_gpio:
+	if (gpio_is_valid(pdata->gpio_charge))
+		gpio_free(jz_battery->pdata->gpio_charge);
+err_free_irq:
+	free_irq(jz_battery->irq, jz_battery);
+err_iounmap:
+	platform_set_drvdata(pdev, NULL);
+	iounmap(jz_battery->base);
+err_release_mem_region:
+	release_mem_region(jz_battery->mem->start, resource_size(jz_battery->mem));
+err_free:
+	kfree(jz_battery);
+	return ret;
+}
+
+static int __devexit jz_battery_remove(struct platform_device *pdev)
+{
+	struct jz_battery *jz_battery = platform_get_drvdata(pdev);
+
+	cancel_delayed_work_sync(&jz_battery->work);
+
+	if (gpio_is_valid(jz_battery->pdata->gpio_charge)) {
+		if (jz_battery->charge_irq >= 0)
+			free_irq(jz_battery->charge_irq, jz_battery);
+		gpio_free(jz_battery->pdata->gpio_charge);
+	}
+
+	power_supply_unregister(&jz_battery->battery);
+
+	free_irq(jz_battery->irq, jz_battery);
+
+	iounmap(jz_battery->base);
+	release_mem_region(jz_battery->mem->start, resource_size(jz_battery->mem));
+
+	return 0;
+}
+
+#ifdef CONFIG_PM
+static int jz_battery_suspend(struct device *dev)
+{
+	struct jz_battery *jz_battery = dev_get_drvdata(dev);
+
+	cancel_delayed_work_sync(&jz_battery->work);
+	jz_battery->status = POWER_SUPPLY_STATUS_UNKNOWN;
+
+	return 0;
+}
+
+static int jz_battery_resume(struct device *dev)
+{
+	struct jz_battery *jz_battery = dev_get_drvdata(dev);
+
+	schedule_delayed_work(&jz_battery->work, 0);
+
+	return 0;
+}
+
+static const struct dev_pm_ops jz_battery_pm_ops = {
+	.suspend	= jz_battery_suspend,
+	.resume		= jz_battery_resume,
+};
+
+#define JZ_BATTERY_PM_OPS (&jz_battery_pm_ops)
+#else
+#define JZ_BATTERY_PM_OPS NULL
+#endif
+
+static struct platform_driver jz_battery_driver = {
+	.probe		= jz_battery_probe,
+	.remove		= __devexit_p(jz_battery_remove),
+	.driver = {
+		.name = "jz4740-battery",
+		.owner = THIS_MODULE,
+		.pm = JZ_BATTERY_PM_OPS,
+	},
+};
+
+static int __init jz_battery_init(void)
+{
+	return platform_driver_register(&jz_battery_driver);
+}
+module_init(jz_battery_init);
+
+static void __exit jz_battery_exit(void)
+{
+	platform_driver_unregister(&jz_battery_driver);
+}
+module_exit(jz_battery_exit);
+
+MODULE_ALIAS("platform:jz4740-battery");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Lars-Peter Clausen <lars@metafoo.de>");
+MODULE_DESCRIPTION("JZ4740 SoC battery driver");
diff --git a/include/linux/power/jz4740-battery.h b/include/linux/power/jz4740-battery.h
new file mode 100644
index 00000000000..19c9610c720
--- /dev/null
+++ b/include/linux/power/jz4740-battery.h
@@ -0,0 +1,24 @@
+/*
+ *  Copyright (C) 2009, Jiejing Zhang <kzjeef@gmail.com>
+ *
+ *  This program is free software; you can redistribute	 it and/or modify it
+ *  under  the terms of	 the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the	License, or (at your
+ *  option) any later version.
+ *
+ *  You should have received a copy of the  GNU General Public License along
+ *  with this program; if not, write  to the Free Software Foundation, Inc.,
+ *  675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#ifndef __JZ4740_BATTERY_H
+#define __JZ4740_BATTERY_H
+
+struct jz_battery_platform_data {
+	struct power_supply_info info;
+	int gpio_charge;	/* GPIO port of Charger state */
+	int gpio_charge_active_low;
+};
+
+#endif
-- 
cgit v1.2.3-70-g09d2


From 534af1082329392bc29f6badf815e69ae2ae0f4c Mon Sep 17 00:00:00 2001
From: Jason Wessel <jason.wessel@windriver.com>
Date: Thu, 5 Aug 2010 09:22:20 -0500
Subject: kgdb,kdb: individual register set and and get API

The kdb shell specification includes the ability to get and set
architecture specific registers by name.

For the time being individual register get and set will be implemented
on a per architecture basis.  If an architecture defines
DBG_MAX_REG_NUM > 0 then kdb and the gdbstub will use the capability
for individually getting and setting architecture specific registers.

Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
---
 include/linux/kgdb.h        |  13 +++++
 kernel/debug/gdbstub.c      |  26 +++++++++
 kernel/debug/kdb/kdb_main.c | 132 ++++++++++++++++++++++++++++++++++++++++----
 3 files changed, 159 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/linux/kgdb.h b/include/linux/kgdb.h
index 9340f34d1bb..d5eb882e01f 100644
--- a/include/linux/kgdb.h
+++ b/include/linux/kgdb.h
@@ -90,6 +90,19 @@ struct kgdb_bkpt {
 	enum kgdb_bpstate	state;
 };
 
+struct dbg_reg_def_t {
+	char *name;
+	int size;
+	int offset;
+};
+
+#ifndef DBG_MAX_REG_NUM
+#define DBG_MAX_REG_NUM 0
+#else
+extern struct dbg_reg_def_t dbg_reg_def[];
+extern char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs);
+extern int dbg_set_reg(int regno, void *mem, struct pt_regs *regs);
+#endif
 #ifndef KGDB_MAX_BREAKPOINTS
 # define KGDB_MAX_BREAKPOINTS	1000
 #endif
diff --git a/kernel/debug/gdbstub.c b/kernel/debug/gdbstub.c
index e117cfd7588..006bad8905d 100644
--- a/kernel/debug/gdbstub.c
+++ b/kernel/debug/gdbstub.c
@@ -328,6 +328,32 @@ static int kgdb_ebin2mem(char *buf, char *mem, int count)
 	return probe_kernel_write(mem, c, size);
 }
 
+#if DBG_MAX_REG_NUM > 0
+void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs)
+{
+	int i;
+	int idx = 0;
+	char *ptr = (char *)gdb_regs;
+
+	for (i = 0; i < DBG_MAX_REG_NUM; i++) {
+		dbg_get_reg(i, ptr + idx, regs);
+		idx += dbg_reg_def[i].size;
+	}
+}
+
+void gdb_regs_to_pt_regs(unsigned long *gdb_regs, struct pt_regs *regs)
+{
+	int i;
+	int idx = 0;
+	char *ptr = (char *)gdb_regs;
+
+	for (i = 0; i < DBG_MAX_REG_NUM; i++) {
+		dbg_set_reg(i, ptr + idx, regs);
+		idx += dbg_reg_def[i].size;
+	}
+}
+#endif /* DBG_MAX_REG_NUM > 0 */
+
 /* Write memory due to an 'M' or 'X' packet. */
 static int write_mem_msg(int binary)
 {
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index ebe4a287419..8577e45a9a5 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -312,7 +312,7 @@ int kdbgetularg(const char *arg, unsigned long *value)
 
 	if (endp == arg) {
 		/*
-		 * Try base 16, for us folks too lazy to type the
+		 * Also try base 16, for us folks too lazy to type the
 		 * leading 0x...
 		 */
 		val = simple_strtoul(arg, &endp, 16);
@@ -325,6 +325,25 @@ int kdbgetularg(const char *arg, unsigned long *value)
 	return 0;
 }
 
+int kdbgetu64arg(const char *arg, u64 *value)
+{
+	char *endp;
+	u64 val;
+
+	val = simple_strtoull(arg, &endp, 0);
+
+	if (endp == arg) {
+
+		val = simple_strtoull(arg, &endp, 16);
+		if (endp == arg)
+			return KDB_BADINT;
+	}
+
+	*value = val;
+
+	return 0;
+}
+
 /*
  * kdb_set - This function implements the 'set' command.  Alter an
  *	existing environment variable or create a new one.
@@ -1770,11 +1789,65 @@ static int kdb_go(int argc, const char **argv)
  */
 static int kdb_rd(int argc, const char **argv)
 {
-	int diag = kdb_check_regs();
-	if (diag)
-		return diag;
+	int len = kdb_check_regs();
+#if DBG_MAX_REG_NUM > 0
+	int i;
+	char *rname;
+	int rsize;
+	u64 reg64;
+	u32 reg32;
+	u16 reg16;
+	u8 reg8;
+
+	if (len)
+		return len;
+
+	for (i = 0; i < DBG_MAX_REG_NUM; i++) {
+		rsize = dbg_reg_def[i].size * 2;
+		if (rsize > 16)
+			rsize = 2;
+		if (len + strlen(dbg_reg_def[i].name) + 4 + rsize > 80) {
+			len = 0;
+			kdb_printf("\n");
+		}
+		if (len)
+			len += kdb_printf("  ");
+		switch(dbg_reg_def[i].size * 8) {
+		case 8:
+			rname = dbg_get_reg(i, &reg8, kdb_current_regs);
+			if (!rname)
+				break;
+			len += kdb_printf("%s: %02x", rname, reg8);
+			break;
+		case 16:
+			rname = dbg_get_reg(i, &reg16, kdb_current_regs);
+			if (!rname)
+				break;
+			len += kdb_printf("%s: %04x", rname, reg16);
+			break;
+		case 32:
+			rname = dbg_get_reg(i, &reg32, kdb_current_regs);
+			if (!rname)
+				break;
+			len += kdb_printf("%s: %08x", rname, reg32);
+			break;
+		case 64:
+			rname = dbg_get_reg(i, &reg64, kdb_current_regs);
+			if (!rname)
+				break;
+			len += kdb_printf("%s: %016llx", rname, reg64);
+			break;
+		default:
+			len += kdb_printf("%s: ??", dbg_reg_def[i].name);
+		}
+	}
+	kdb_printf("\n");
+#else
+	if (len)
+		return len;
 
 	kdb_dumpregs(kdb_current_regs);
+#endif
 	return 0;
 }
 
@@ -1782,32 +1855,67 @@ static int kdb_rd(int argc, const char **argv)
  * kdb_rm - This function implements the 'rm' (register modify)  command.
  *	rm register-name new-contents
  * Remarks:
- *	Currently doesn't allow modification of control or
- *	debug registers.
+ *	Allows register modification with the same restrictions as gdb
  */
 static int kdb_rm(int argc, const char **argv)
 {
+#if DBG_MAX_REG_NUM > 0
 	int diag;
-	int ind = 0;
-	unsigned long contents;
+	const char *rname;
+	int i;
+	u64 reg64;
+	u32 reg32;
+	u16 reg16;
+	u8 reg8;
 
 	if (argc != 2)
 		return KDB_ARGCOUNT;
 	/*
 	 * Allow presence or absence of leading '%' symbol.
 	 */
-	if (argv[1][0] == '%')
-		ind = 1;
+	rname = argv[1];
+	if (*rname == '%')
+		rname++;
 
-	diag = kdbgetularg(argv[2], &contents);
+	diag = kdbgetu64arg(argv[2], &reg64);
 	if (diag)
 		return diag;
 
 	diag = kdb_check_regs();
 	if (diag)
 		return diag;
+
+	diag = KDB_BADREG;
+	for (i = 0; i < DBG_MAX_REG_NUM; i++) {
+		if (strcmp(rname, dbg_reg_def[i].name) == 0) {
+			diag = 0;
+			break;
+		}
+	}
+	if (!diag) {
+		switch(dbg_reg_def[i].size * 8) {
+		case 8:
+			reg8 = reg64;
+			dbg_set_reg(i, &reg8, kdb_current_regs);
+			break;
+		case 16:
+			reg16 = reg64;
+			dbg_set_reg(i, &reg16, kdb_current_regs);
+			break;
+		case 32:
+			reg32 = reg64;
+			dbg_set_reg(i, &reg32, kdb_current_regs);
+			break;
+		case 64:
+			dbg_set_reg(i, &reg64, kdb_current_regs);
+			break;
+		}
+	}
+	return diag;
+#else
 	kdb_printf("ERROR: Register set currently not implemented\n");
-	return 0;
+    return 0;
+#endif
 }
 
 #if defined(CONFIG_MAGIC_SYSRQ)
-- 
cgit v1.2.3-70-g09d2


From 55751145dc1e08e16df418cdd101661f5c6ac991 Mon Sep 17 00:00:00 2001
From: Jason Wessel <jason.wessel@windriver.com>
Date: Thu, 5 Aug 2010 09:22:21 -0500
Subject: gdbstub: Implement gdbserial 'p' and 'P' packets

The gdbserial 'p' and 'P' packets allow gdb to individually get and
set registers instead of querying for all the available registers.

Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
---
 include/linux/kgdb.h   |  2 +-
 kernel/debug/gdbstub.c | 97 +++++++++++++++++++++++++++++++++++++++-----------
 2 files changed, 78 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/include/linux/kgdb.h b/include/linux/kgdb.h
index d5eb882e01f..cc96f0f23e0 100644
--- a/include/linux/kgdb.h
+++ b/include/linux/kgdb.h
@@ -294,7 +294,7 @@ extern void kgdb_unregister_io_module(struct kgdb_io *local_kgdb_io_ops);
 extern struct kgdb_io *dbg_io_ops;
 
 extern int kgdb_hex2long(char **ptr, unsigned long *long_val);
-extern int kgdb_mem2hex(char *mem, char *buf, int count);
+extern char *kgdb_mem2hex(char *mem, char *buf, int count);
 extern int kgdb_hex2mem(char *buf, char *mem, int count);
 
 extern int kgdb_isremovedbreak(unsigned long addr);
diff --git a/kernel/debug/gdbstub.c b/kernel/debug/gdbstub.c
index 006bad8905d..4ef9dddf458 100644
--- a/kernel/debug/gdbstub.c
+++ b/kernel/debug/gdbstub.c
@@ -225,7 +225,7 @@ void gdbstub_msg_write(const char *s, int len)
  * buf.  Return a pointer to the last char put in buf (null). May
  * return an error.
  */
-int kgdb_mem2hex(char *mem, char *buf, int count)
+char *kgdb_mem2hex(char *mem, char *buf, int count)
 {
 	char *tmp;
 	int err;
@@ -237,17 +237,16 @@ int kgdb_mem2hex(char *mem, char *buf, int count)
 	tmp = buf + count;
 
 	err = probe_kernel_read(tmp, mem, count);
-	if (!err) {
-		while (count > 0) {
-			buf = pack_hex_byte(buf, *tmp);
-			tmp++;
-			count--;
-		}
-
-		*buf = 0;
+	if (err)
+		return NULL;
+	while (count > 0) {
+		buf = pack_hex_byte(buf, *tmp);
+		tmp++;
+		count--;
 	}
+	*buf = 0;
 
-	return err;
+	return buf;
 }
 
 /*
@@ -481,8 +480,7 @@ static void gdb_cmd_status(struct kgdb_state *ks)
 	pack_hex_byte(&remcom_out_buffer[1], ks->signo);
 }
 
-/* Handle the 'g' get registers request */
-static void gdb_cmd_getregs(struct kgdb_state *ks)
+static void gdb_get_regs_helper(struct kgdb_state *ks)
 {
 	struct task_struct *thread;
 	void *local_debuggerinfo;
@@ -523,6 +521,12 @@ static void gdb_cmd_getregs(struct kgdb_state *ks)
 		 */
 		sleeping_thread_to_gdb_regs(gdb_regs, thread);
 	}
+}
+
+/* Handle the 'g' get registers request */
+static void gdb_cmd_getregs(struct kgdb_state *ks)
+{
+	gdb_get_regs_helper(ks);
 	kgdb_mem2hex((char *)gdb_regs, remcom_out_buffer, NUMREGBYTES);
 }
 
@@ -545,13 +549,13 @@ static void gdb_cmd_memread(struct kgdb_state *ks)
 	char *ptr = &remcom_in_buffer[1];
 	unsigned long length;
 	unsigned long addr;
-	int err;
+	char *err;
 
 	if (kgdb_hex2long(&ptr, &addr) > 0 && *ptr++ == ',' &&
 					kgdb_hex2long(&ptr, &length) > 0) {
 		err = kgdb_mem2hex((char *)addr, remcom_out_buffer, length);
-		if (err)
-			error_packet(remcom_out_buffer, err);
+		if (!err)
+			error_packet(remcom_out_buffer, -EINVAL);
 	} else {
 		error_packet(remcom_out_buffer, -EINVAL);
 	}
@@ -568,6 +572,52 @@ static void gdb_cmd_memwrite(struct kgdb_state *ks)
 		strcpy(remcom_out_buffer, "OK");
 }
 
+#if DBG_MAX_REG_NUM > 0
+static char *gdb_hex_reg_helper(int regnum, char *out)
+{
+	int i;
+	int offset = 0;
+
+	for (i = 0; i < regnum; i++)
+		offset += dbg_reg_def[i].size;
+	return kgdb_mem2hex((char *)gdb_regs + offset, out,
+			    dbg_reg_def[i].size);
+}
+
+/* Handle the 'p' individual regster get */
+static void gdb_cmd_reg_get(struct kgdb_state *ks)
+{
+	unsigned long regnum;
+	char *ptr = &remcom_in_buffer[1];
+
+	kgdb_hex2long(&ptr, &regnum);
+	if (regnum >= DBG_MAX_REG_NUM) {
+		error_packet(remcom_out_buffer, -EINVAL);
+		return;
+	}
+	gdb_get_regs_helper(ks);
+	gdb_hex_reg_helper(regnum, remcom_out_buffer);
+}
+
+/* Handle the 'P' individual regster set */
+static void gdb_cmd_reg_set(struct kgdb_state *ks)
+{
+	unsigned long regnum;
+	char *ptr = &remcom_in_buffer[1];
+
+	kgdb_hex2long(&ptr, &regnum);
+	if (*ptr++ != '=' ||
+	    !(!kgdb_usethread || kgdb_usethread == current) ||
+	    !dbg_get_reg(regnum, gdb_regs, ks->linux_regs)) {
+		error_packet(remcom_out_buffer, -EINVAL);
+		return;
+	}
+	kgdb_hex2mem(ptr, (char *)gdb_regs, dbg_reg_def[regnum].size);
+	dbg_set_reg(regnum, gdb_regs, ks->linux_regs);
+	strcpy(remcom_out_buffer, "OK");
+}
+#endif /* DBG_MAX_REG_NUM > 0 */
+
 /* Handle the 'X' memory binary write bytes */
 static void gdb_cmd_binwrite(struct kgdb_state *ks)
 {
@@ -874,8 +924,11 @@ int gdb_serial_stub(struct kgdb_state *ks)
 	int error = 0;
 	int tmp;
 
-	/* Clear the out buffer. */
+	/* Initialize comm buffer and globals. */
 	memset(remcom_out_buffer, 0, sizeof(remcom_out_buffer));
+	kgdb_usethread = kgdb_info[ks->cpu].task;
+	ks->kgdb_usethreadid = shadow_pid(kgdb_info[ks->cpu].task->pid);
+	ks->pass_exception = 0;
 
 	if (kgdb_connected) {
 		unsigned char thref[BUF_THREAD_ID_SIZE];
@@ -892,10 +945,6 @@ int gdb_serial_stub(struct kgdb_state *ks)
 		put_packet(remcom_out_buffer);
 	}
 
-	kgdb_usethread = kgdb_info[ks->cpu].task;
-	ks->kgdb_usethreadid = shadow_pid(kgdb_info[ks->cpu].task->pid);
-	ks->pass_exception = 0;
-
 	while (1) {
 		error = 0;
 
@@ -920,6 +969,14 @@ int gdb_serial_stub(struct kgdb_state *ks)
 		case 'M': /* MAA..AA,LLLL: Write LLLL bytes at address AA..AA */
 			gdb_cmd_memwrite(ks);
 			break;
+#if DBG_MAX_REG_NUM > 0
+		case 'p': /* pXX Return gdb register XX (in hex) */
+			gdb_cmd_reg_get(ks);
+			break;
+		case 'P': /* PXX=aaaa Set gdb register XX to aaaa (in hex) */
+			gdb_cmd_reg_set(ks);
+			break;
+#endif /* DBG_MAX_REG_NUM > 0 */
 		case 'X': /* XAA..AA,LLLL: Write LLLL bytes at address AA..AA */
 			gdb_cmd_binwrite(ks);
 			break;
-- 
cgit v1.2.3-70-g09d2


From b45cfba4e9005d64d419718e7ff7f7cab44c1994 Mon Sep 17 00:00:00 2001
From: Jesse Barnes <jbarnes@virtuousgeek.org>
Date: Thu, 5 Aug 2010 09:22:30 -0500
Subject: vt,console,kdb: implement atomic console enter/leave functions

These functions allow the kernel debugger to save and restore the
state of the system console.

Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
CC: David Airlie <airlied@linux.ie>
CC: Andrew Morton <akpm@linux-foundation.org>
---
 drivers/char/vt.c       | 61 +++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/console.h | 13 +++++++++++
 2 files changed, 74 insertions(+)

(limited to 'include')

diff --git a/drivers/char/vt.c b/drivers/char/vt.c
index 7cdb6ee569c..117ce99115d 100644
--- a/drivers/char/vt.c
+++ b/drivers/char/vt.c
@@ -187,10 +187,15 @@ static DECLARE_WORK(console_work, console_callback);
  * fg_console is the current virtual console,
  * last_console is the last used one,
  * want_console is the console we want to switch to,
+ * saved_* variants are for save/restore around kernel debugger enter/leave
  */
 int fg_console;
 int last_console;
 int want_console = -1;
+int saved_fg_console;
+int saved_last_console;
+int saved_want_console;
+int saved_vc_mode;
 
 /*
  * For each existing display, we have a pointer to console currently visible
@@ -3413,6 +3418,62 @@ int con_is_bound(const struct consw *csw)
 }
 EXPORT_SYMBOL(con_is_bound);
 
+/**
+ * con_debug_enter - prepare the console for the kernel debugger
+ * @sw: console driver
+ *
+ * Called when the console is taken over by the kernel debugger, this
+ * function needs to save the current console state, then put the console
+ * into a state suitable for the kernel debugger.
+ *
+ * RETURNS:
+ * Zero on success, nonzero if a failure occurred when trying to prepare
+ * the console for the debugger.
+ */
+int con_debug_enter(struct vc_data *vc)
+{
+	int ret = 0;
+
+	saved_fg_console = fg_console;
+	saved_last_console = last_console;
+	saved_want_console = want_console;
+	saved_vc_mode = vc->vc_mode;
+	vc->vc_mode = KD_TEXT;
+	console_blanked = 0;
+	if (vc->vc_sw->con_debug_enter)
+		ret = vc->vc_sw->con_debug_enter(vc);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(con_debug_enter);
+
+/**
+ * con_debug_leave - restore console state
+ * @sw: console driver
+ *
+ * Restore the console state to what it was before the kernel debugger
+ * was invoked.
+ *
+ * RETURNS:
+ * Zero on success, nonzero if a failure occurred when trying to restore
+ * the console.
+ */
+int con_debug_leave(void)
+{
+	struct vc_data *vc;
+	int ret = 0;
+
+	fg_console = saved_fg_console;
+	last_console = saved_last_console;
+	want_console = saved_want_console;
+	vc_cons[fg_console].d->vc_mode = saved_vc_mode;
+
+	vc = vc_cons[fg_console].d;
+	if (vc->vc_sw->con_debug_leave)
+		ret = vc->vc_sw->con_debug_leave(vc);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(con_debug_leave);
+
 /**
  * register_con_driver - register console driver to console layer
  * @csw: console driver
diff --git a/include/linux/console.h b/include/linux/console.h
index dcca5339ceb..f76fc297322 100644
--- a/include/linux/console.h
+++ b/include/linux/console.h
@@ -55,6 +55,16 @@ struct consw {
 	void	(*con_invert_region)(struct vc_data *, u16 *, int);
 	u16    *(*con_screen_pos)(struct vc_data *, int);
 	unsigned long (*con_getxy)(struct vc_data *, unsigned long, int *, int *);
+	/*
+	 * Prepare the console for the debugger.  This includes, but is not
+	 * limited to, unblanking the console, loading an appropriate
+	 * palette, and allowing debugger generated output.
+	 */
+	int	(*con_debug_enter)(struct vc_data *);
+	/*
+	 * Restore the console to its pre-debug state as closely as possible.
+	 */
+	int	(*con_debug_leave)(struct vc_data *);
 };
 
 extern const struct consw *conswitchp;
@@ -69,6 +79,9 @@ int register_con_driver(const struct consw *csw, int first, int last);
 int unregister_con_driver(const struct consw *csw);
 int take_over_console(const struct consw *sw, int first, int last, int deflt);
 void give_up_console(const struct consw *sw);
+int con_debug_enter(struct vc_data *vc);
+int con_debug_leave(void);
+
 /* scroll */
 #define SM_UP       (1)
 #define SM_DOWN     (2)
-- 
cgit v1.2.3-70-g09d2


From 81d4450732c68aa728f2c86c0c2993c6cfc3d032 Mon Sep 17 00:00:00 2001
From: Jason Wessel <jason.wessel@windriver.com>
Date: Thu, 5 Aug 2010 09:22:30 -0500
Subject: vt,console,kdb: automatically set kdb LINES variable

The kernel console interface stores the number of lines it is
configured to use. The kdb debugger can greatly benefit by knowing how
many lines there are on the console for the pager functionality
without having the end user compile in the setting or have to
repeatedly change it at run time.

Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
CC: David Airlie <airlied@linux.ie>
CC: Andrew Morton <akpm@linux-foundation.org>
---
 drivers/char/vt.c              | 17 +++++++++++++++++
 include/linux/kdb.h            |  4 ++++
 kernel/debug/kdb/kdb_private.h |  2 --
 3 files changed, 21 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/char/vt.c b/drivers/char/vt.c
index 117ce99115d..4a9eb3044e5 100644
--- a/drivers/char/vt.c
+++ b/drivers/char/vt.c
@@ -104,6 +104,7 @@
 #include <linux/io.h>
 #include <asm/system.h>
 #include <linux/uaccess.h>
+#include <linux/kdb.h>
 
 #define MAX_NR_CON_DRIVER 16
 
@@ -3442,6 +3443,22 @@ int con_debug_enter(struct vc_data *vc)
 	console_blanked = 0;
 	if (vc->vc_sw->con_debug_enter)
 		ret = vc->vc_sw->con_debug_enter(vc);
+#ifdef CONFIG_KGDB_KDB
+	/* Set the initial LINES variable if it is not already set */
+	if (vc->vc_rows < 999) {
+		int linecount;
+		char lns[4];
+		const char *setargs[3] = {
+			"set",
+			"LINES",
+			lns,
+		};
+		if (kdbgetintenv(setargs[0], &linecount)) {
+			snprintf(lns, 4, "%i", vc->vc_rows);
+			kdb_set(2, setargs);
+		}
+	}
+#endif /* CONFIG_KGDB_KDB */
 	return ret;
 }
 EXPORT_SYMBOL_GPL(con_debug_enter);
diff --git a/include/linux/kdb.h b/include/linux/kdb.h
index ccb2b3ec0fe..ea6e5244ed3 100644
--- a/include/linux/kdb.h
+++ b/include/linux/kdb.h
@@ -114,4 +114,8 @@ enum {
 	KDB_INIT_EARLY,
 	KDB_INIT_FULL,
 };
+
+extern int kdbgetintenv(const char *, int *);
+extern int kdb_set(int, const char **);
+
 #endif	/* !_KDB_H */
diff --git a/kernel/debug/kdb/kdb_private.h b/kernel/debug/kdb/kdb_private.h
index 97d3ba69775..c438f545a32 100644
--- a/kernel/debug/kdb/kdb_private.h
+++ b/kernel/debug/kdb/kdb_private.h
@@ -144,9 +144,7 @@ extern int kdb_getword(unsigned long *, unsigned long, size_t);
 extern int kdb_putword(unsigned long, unsigned long, size_t);
 
 extern int kdbgetularg(const char *, unsigned long *);
-extern int kdb_set(int, const char **);
 extern char *kdbgetenv(const char *);
-extern int kdbgetintenv(const char *, int *);
 extern int kdbgetaddrarg(int, const char **, int*, unsigned long *,
 			 long *, char **);
 extern int kdbgetsymval(const char *, kdb_symtab_t *);
-- 
cgit v1.2.3-70-g09d2


From d219adc1228a3887486b58a430e736b0831f192c Mon Sep 17 00:00:00 2001
From: Jesse Barnes <jbarnes@virtuousgeek.org>
Date: Mon, 2 Aug 2010 12:05:41 -0700
Subject: fb: add hooks to handle KDB enter/exit

Add fb ops to handle enter/exit of the kernel debugger.  If present, the
fb core will register them with KGDB and they'll be called when the
debugger is entered and exited.  The new functions are responsible for
switching to an appropriate debug framebuffer and restoring the
interrupted state at exit time.

Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
---
 drivers/video/console/fbcon.c | 26 ++++++++++++++++++++++++++
 drivers/video/console/fbcon.h |  1 +
 include/linux/fb.h            | 13 +++++++++++++
 3 files changed, 40 insertions(+)

(limited to 'include')

diff --git a/drivers/video/console/fbcon.c b/drivers/video/console/fbcon.c
index b0a3fa00706..3b3f5749af9 100644
--- a/drivers/video/console/fbcon.c
+++ b/drivers/video/console/fbcon.c
@@ -2342,6 +2342,30 @@ static int fbcon_blank(struct vc_data *vc, int blank, int mode_switch)
 	return 0;
 }
 
+static int fbcon_debug_enter(struct vc_data *vc)
+{
+	struct fb_info *info = registered_fb[con2fb_map[vc->vc_num]];
+	struct fbcon_ops *ops = info->fbcon_par;
+
+	ops->save_graphics = ops->graphics;
+	ops->graphics = 0;
+	if (info->fbops->fb_debug_enter)
+		info->fbops->fb_debug_enter(info);
+	fbcon_set_palette(vc, color_table);
+	return 0;
+}
+
+static int fbcon_debug_leave(struct vc_data *vc)
+{
+	struct fb_info *info = registered_fb[con2fb_map[vc->vc_num]];
+	struct fbcon_ops *ops = info->fbcon_par;
+
+	ops->graphics = ops->save_graphics;
+	if (info->fbops->fb_debug_leave)
+		info->fbops->fb_debug_leave(info);
+	return 0;
+}
+
 static int fbcon_get_font(struct vc_data *vc, struct console_font *font)
 {
 	u8 *fontdata = vc->vc_font.data;
@@ -3276,6 +3300,8 @@ static const struct consw fb_con = {
 	.con_screen_pos 	= fbcon_screen_pos,
 	.con_getxy 		= fbcon_getxy,
 	.con_resize             = fbcon_resize,
+	.con_debug_enter	= fbcon_debug_enter,
+	.con_debug_leave	= fbcon_debug_leave,
 };
 
 static struct notifier_block fbcon_event_notifier = {
diff --git a/drivers/video/console/fbcon.h b/drivers/video/console/fbcon.h
index 89a346880ec..6bd2e0c7f20 100644
--- a/drivers/video/console/fbcon.h
+++ b/drivers/video/console/fbcon.h
@@ -74,6 +74,7 @@ struct fbcon_ops {
 	int    cursor_reset;
 	int    blank_state;
 	int    graphics;
+	int    save_graphics; /* for debug enter/leave */
 	int    flags;
 	int    rotate;
 	int    cur_rotate;
diff --git a/include/linux/fb.h b/include/linux/fb.h
index e7445df44d6..0c5659c41b0 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -3,6 +3,9 @@
 
 #include <linux/types.h>
 #include <linux/i2c.h>
+#ifdef __KERNEL__
+#include <linux/kgdb.h>
+#endif /* __KERNEL__ */
 
 /* Definitions of frame buffers						*/
 
@@ -607,6 +610,12 @@ struct fb_deferred_io {
  * LOCKING NOTE: those functions must _ALL_ be called with the console
  * semaphore held, this is the only suitable locking mechanism we have
  * in 2.6. Some may be called at interrupt time at this point though.
+ *
+ * The exception to this is the debug related hooks.  Putting the fb
+ * into a debug state (e.g. flipping to the kernel console) and restoring
+ * it must be done in a lock-free manner, so low level drivers should
+ * keep track of the initial console (if applicable) and may need to
+ * perform direct, unlocked hardware writes in these hooks.
  */
 
 struct fb_ops {
@@ -676,6 +685,10 @@ struct fb_ops {
 
 	/* teardown any resources to do with this framebuffer */
 	void (*fb_destroy)(struct fb_info *info);
+
+	/* called at KDB enter and leave time to prepare the console */
+	int (*fb_debug_enter)(struct fb_info *info);
+	int (*fb_debug_leave)(struct fb_info *info);
 };
 
 #ifdef CONFIG_FB_TILEBLITTING
-- 
cgit v1.2.3-70-g09d2


From 1a7aba7f4e45014c5a4741164b1ecb4ffe616fb7 Mon Sep 17 00:00:00 2001
From: Jesse Barnes <jbarnes@virtuousgeek.org>
Date: Thu, 5 Aug 2010 09:22:31 -0500
Subject: drm: add KGDB/KDB support

Implement the callbacks for KDB entry/exit via the drm helpers.

Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
---
 drivers/gpu/drm/drm_fb_helper.c | 74 +++++++++++++++++++++++++++++++++++++++++
 include/drm/drm_crtc_helper.h   |  2 ++
 include/drm/drm_fb_helper.h     |  5 +++
 3 files changed, 81 insertions(+)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
index 719662034bb..6245add3768 100644
--- a/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c
@@ -241,6 +241,80 @@ static int drm_fb_helper_parse_command_line(struct drm_fb_helper *fb_helper)
 	return 0;
 }
 
+int drm_fb_helper_debug_enter(struct fb_info *info)
+{
+	struct drm_fb_helper *helper = info->par;
+	struct drm_crtc_helper_funcs *funcs;
+	int i;
+
+	if (list_empty(&kernel_fb_helper_list))
+		return false;
+
+	list_for_each_entry(helper, &kernel_fb_helper_list, kernel_fb_list) {
+		for (i = 0; i < helper->crtc_count; i++) {
+			struct drm_mode_set *mode_set =
+				&helper->crtc_info[i].mode_set;
+
+			if (!mode_set->crtc->enabled)
+				continue;
+
+			funcs =	mode_set->crtc->helper_private;
+			funcs->mode_set_base_atomic(mode_set->crtc,
+						    mode_set->fb,
+						    mode_set->x,
+						    mode_set->y);
+
+		}
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(drm_fb_helper_debug_enter);
+
+/* Find the real fb for a given fb helper CRTC */
+static struct drm_framebuffer *drm_mode_config_fb(struct drm_crtc *crtc)
+{
+	struct drm_device *dev = crtc->dev;
+	struct drm_crtc *c;
+
+	list_for_each_entry(c, &dev->mode_config.crtc_list, head) {
+		if (crtc->base.id == c->base.id)
+			return c->fb;
+	}
+
+	return NULL;
+}
+
+int drm_fb_helper_debug_leave(struct fb_info *info)
+{
+	struct drm_fb_helper *helper = info->par;
+	struct drm_crtc *crtc;
+	struct drm_crtc_helper_funcs *funcs;
+	struct drm_framebuffer *fb;
+	int i;
+
+	for (i = 0; i < helper->crtc_count; i++) {
+		struct drm_mode_set *mode_set = &helper->crtc_info[i].mode_set;
+		crtc = mode_set->crtc;
+		funcs = crtc->helper_private;
+		fb = drm_mode_config_fb(crtc);
+
+		if (!crtc->enabled)
+			continue;
+
+		if (!fb) {
+			DRM_ERROR("no fb to restore??\n");
+			continue;
+		}
+
+		funcs->mode_set_base_atomic(mode_set->crtc, fb, crtc->x,
+					    crtc->y);
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(drm_fb_helper_debug_leave);
+
 bool drm_fb_helper_force_kernel_mode(void)
 {
 	int i = 0;
diff --git a/include/drm/drm_crtc_helper.h b/include/drm/drm_crtc_helper.h
index 1121f7799c6..10f7d03e58a 100644
--- a/include/drm/drm_crtc_helper.h
+++ b/include/drm/drm_crtc_helper.h
@@ -60,6 +60,8 @@ struct drm_crtc_helper_funcs {
 	/* Move the crtc on the current fb to the given position *optional* */
 	int (*mode_set_base)(struct drm_crtc *crtc, int x, int y,
 			     struct drm_framebuffer *old_fb);
+	int (*mode_set_base_atomic)(struct drm_crtc *crtc,
+				    struct drm_framebuffer *fb, int x, int y);
 
 	/* reload the current crtc LUT */
 	void (*load_lut)(struct drm_crtc *crtc);
diff --git a/include/drm/drm_fb_helper.h b/include/drm/drm_fb_helper.h
index f0a6afc47e7..f22e7fe4b6d 100644
--- a/include/drm/drm_fb_helper.h
+++ b/include/drm/drm_fb_helper.h
@@ -32,6 +32,8 @@
 
 struct drm_fb_helper;
 
+#include <linux/kgdb.h>
+
 struct drm_fb_helper_crtc {
 	uint32_t crtc_id;
 	struct drm_mode_set mode_set;
@@ -78,6 +80,7 @@ struct drm_fb_helper_connector {
 
 struct drm_fb_helper {
 	struct drm_framebuffer *fb;
+	struct drm_framebuffer *saved_fb;
 	struct drm_device *dev;
 	struct drm_display_mode *mode;
 	int crtc_count;
@@ -126,5 +129,7 @@ int drm_fb_helper_setcmap(struct fb_cmap *cmap, struct fb_info *info);
 bool drm_fb_helper_hotplug_event(struct drm_fb_helper *fb_helper);
 bool drm_fb_helper_initial_config(struct drm_fb_helper *fb_helper, int bpp_sel);
 int drm_fb_helper_single_add_all_connectors(struct drm_fb_helper *fb_helper);
+int drm_fb_helper_debug_enter(struct fb_info *info);
+int drm_fb_helper_debug_leave(struct fb_info *info);
 
 #endif
-- 
cgit v1.2.3-70-g09d2


From 1a4240f4764ac78adbf4b0ebb49b3bd8c72ffa11 Mon Sep 17 00:00:00 2001
From: Wang Lei <wang840925@gmail.com>
Date: Wed, 4 Aug 2010 15:16:33 +0100
Subject: DNS: Separate out CIFS DNS Resolver code

Separate out the DNS resolver key type from the CIFS filesystem into its own
module so that it can be made available for general use, including the AFS
filesystem module.

This facility makes it possible for the kernel to upcall to userspace to have
it issue DNS requests, package up the replies and present them to the kernel
in a useful form.  The kernel is then able to cache the DNS replies as keys
can be retained in keyrings.

Resolver keys are of type "dns_resolver" and have a case-insensitive
description that is of the form "[<type>:]<domain_name>".  The optional <type>
indicates the particular DNS lookup and packaging that's required.  The
<domain_name> is the query to be made.

If <type> isn't given, a basic hostname to IP address lookup is made, and the
result is stored in the key in the form of a printable string consisting of a
comma-separated list of IPv4 and IPv6 addresses.

This key type is supported by userspace helpers driven from /sbin/request-key
and configured through /etc/request-key.conf.  The cifs.upcall utility is
invoked for UNC path server name to IP address resolution.

The CIFS functionality is encapsulated by the dns_resolve_unc_to_ip() function,
which is used to resolve a UNC path to an IP address for CIFS filesystem.  This
part remains in the CIFS module for now.

See the added Documentation/networking/dns_resolver.txt for more information.

Signed-off-by: Wang Lei <wang840925@gmail.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 Documentation/networking/dns_resolver.txt | 146 +++++++++++++++++++
 fs/cifs/Kconfig                           |  17 +--
 fs/cifs/cifsfs.c                          |  13 +-
 fs/cifs/dns_resolve.c                     | 229 ++++++------------------------
 fs/cifs/dns_resolve.h                     |   2 -
 include/keys/dns_resolver-type.h          |  23 +++
 include/linux/dns_resolver.h              |  34 +++++
 net/Kconfig                               |   1 +
 net/Makefile                              |   1 +
 net/dns_resolver/Kconfig                  |  27 ++++
 net/dns_resolver/Makefile                 |   7 +
 net/dns_resolver/dns_key.c                | 210 +++++++++++++++++++++++++++
 net/dns_resolver/dns_query.c              | 159 +++++++++++++++++++++
 net/dns_resolver/internal.h               |  44 ++++++
 14 files changed, 708 insertions(+), 205 deletions(-)
 create mode 100644 Documentation/networking/dns_resolver.txt
 create mode 100644 include/keys/dns_resolver-type.h
 create mode 100644 include/linux/dns_resolver.h
 create mode 100644 net/dns_resolver/Kconfig
 create mode 100644 net/dns_resolver/Makefile
 create mode 100644 net/dns_resolver/dns_key.c
 create mode 100644 net/dns_resolver/dns_query.c
 create mode 100644 net/dns_resolver/internal.h

(limited to 'include')

diff --git a/Documentation/networking/dns_resolver.txt b/Documentation/networking/dns_resolver.txt
new file mode 100644
index 00000000000..d8e0ce1d38c
--- /dev/null
+++ b/Documentation/networking/dns_resolver.txt
@@ -0,0 +1,146 @@
+			     ===================
+			     DNS Resolver Module
+			     ===================
+
+Contents:
+
+ - Overview.
+ - Compilation.
+ - Setting up.
+ - Usage.
+ - Mechanism.
+ - Debugging.
+
+
+========
+OVERVIEW
+========
+
+The DNS resolver module provides a way for kernel services to make DNS queries
+by way of requesting a key of key type dns_resolver.  These queries are
+upcalled to userspace through /sbin/request-key.
+
+These routines must be supported by userspace tools dns.upcall, cifs.upcall and
+request-key.  It is under development and does not yet provide the full feature
+set.  The features it does support include:
+
+ (*) Implements the dns_resolver key_type to contact userspace.
+
+It does not yet support the following AFS features:
+
+ (*) Dns query support for AFSDB resource record.
+
+This code is extracted from the CIFS filesystem.
+
+
+===========
+COMPILATION
+===========
+
+The module should be enabled by turning on the kernel configuration options:
+
+	CONFIG_DNS_RESOLVER	- tristate "DNS Resolver support"
+
+
+==========
+SETTING UP
+==========
+
+To set up this facility, the /etc/request-key.conf file must be altered so that
+/sbin/request-key can appropriately direct the upcalls.  For example, to handle
+basic dname to IPv4/IPv6 address resolution, the following line should be
+added:
+
+	#OP	TYPE		DESC	CO-INFO	PROGRAM ARG1 ARG2 ARG3 ...
+	#======	============	=======	=======	==========================
+	create	dns_resolver  	*	*	/usr/sbin/cifs.upcall %k
+
+To direct a query for query type 'foo', a line of the following should be added
+before the more general line given above as the first match is the one taken.
+
+	create	dns_resolver  	foo:*	*	/usr/sbin/dns.foo %k
+
+
+
+=====
+USAGE
+=====
+
+To make use of this facility, one of the following functions that are
+implemented in the module can be called after doing:
+
+	#include <linux/dns_resolver.h>
+
+ (1) int dns_query(const char *type, const char *name, size_t namelen,
+		   const char *options, char **_result, time_t *_expiry);
+
+     This is the basic access function.  It looks for a cached DNS query and if
+     it doesn't find it, it upcalls to userspace to make a new DNS query, which
+     may then be cached.  The key description is constructed as a string of the
+     form:
+
+		[<type>:]<name>
+
+     where <type> optionally specifies the particular upcall program to invoke,
+     and thus the type of query to do, and <name> specifies the string to be
+     looked up.  The default query type is a straight hostname to IP address
+     set lookup.
+
+     The name parameter is not required to be a NUL-terminated string, and its
+     length should be given by the namelen argument.
+
+     The options parameter may be NULL or it may be a set of options
+     appropriate to the query type.
+
+     The return value is a string appropriate to the query type.  For instance,
+     for the default query type it is just a list of comma-separated IPv4 and
+     IPv6 addresses.  The caller must free the result.
+
+     The length of the result string is returned on success, and a -ve error
+     code is returned otherwise.  -EKEYREJECTED will be returned if the DNS
+     lookup failed.
+
+     If _expiry is non-NULL, the expiry time (TTL) of the result will be
+     returned also.
+
+
+=========
+MECHANISM
+=========
+
+The dnsresolver module registers a key type called "dns_resolver".  Keys of
+this type are used to transport and cache DNS lookup results from userspace.
+
+When dns_query() is invoked, it calls request_key() to search the local
+keyrings for a cached DNS result.  If that fails to find one, it upcalls to
+userspace to get a new result.
+
+Upcalls to userspace are made through the request_key() upcall vector, and are
+directed by means of configuration lines in /etc/request-key.conf that tell
+/sbin/request-key what program to run to instantiate the key.
+
+The upcall handler program is responsible for querying the DNS, processing the
+result into a form suitable for passing to the keyctl_instantiate_key()
+routine.  This then passes the data to dns_resolver_instantiate() which strips
+off and processes any options included in the data, and then attaches the
+remainder of the string to the key as its payload.
+
+The upcall handler program should set the expiry time on the key to that of the
+lowest TTL of all the records it has extracted a result from.  This means that
+the key will be discarded and recreated when the data it holds has expired.
+
+dns_query() returns a copy of the value attached to the key, or an error if
+that is indicated instead.
+
+See <file:Documentation/keys-request-key.txt> for further information about
+request-key function.
+
+
+=========
+DEBUGGING
+=========
+
+Debugging messages can be turned on dynamically by writing a 1 into the
+following file:
+
+        /sys/module/dnsresolver/parameters/debug
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
index 5739fd7f88b..57f0aa9f141 100644
--- a/fs/cifs/Kconfig
+++ b/fs/cifs/Kconfig
@@ -71,14 +71,14 @@ config CIFS_WEAK_PW_HASH
 	  If unsure, say N.
 
 config CIFS_UPCALL
-	  bool "Kerberos/SPNEGO advanced session setup"
-	  depends on CIFS && KEYS
-	  help
-	    Enables an upcall mechanism for CIFS which accesses
-	    userspace helper utilities to provide SPNEGO packaged (RFC 4178)
-	    Kerberos tickets which are needed to mount to certain secure servers
-	    (for which more secure Kerberos authentication is required). If
-	    unsure, say N.
+	bool "Kerberos/SPNEGO advanced session setup"
+	depends on CIFS && KEYS
+	select DNS_RESOLVER
+	help
+	  Enables an upcall mechanism for CIFS which accesses userspace helper
+	  utilities to provide SPNEGO packaged (RFC 4178) Kerberos tickets
+	  which are needed to mount to certain secure servers (for which more
+	  secure Kerberos authentication is required). If unsure, say N.
 
 config CIFS_XATTR
         bool "CIFS extended attributes"
@@ -122,6 +122,7 @@ config CIFS_DEBUG2
 config CIFS_DFS_UPCALL
 	  bool "DFS feature support"
 	  depends on CIFS && KEYS
+	  select DNS_RESOLVER
 	  help
 	    Distributed File System (DFS) support is used to access shares
 	    transparently in an enterprise name space, even if the share
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 8a2cf129e53..2a0c892959f 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -45,7 +45,6 @@
 #include "cifs_fs_sb.h"
 #include <linux/mm.h>
 #include <linux/key-type.h>
-#include "dns_resolve.h"
 #include "cifs_spnego.h"
 #include "fscache.h"
 #define CIFS_MAGIC_NUMBER 0xFF534D42	/* the first four bytes of SMB PDUs */
@@ -933,23 +932,14 @@ init_cifs(void)
 	rc = register_key_type(&cifs_spnego_key_type);
 	if (rc)
 		goto out_unregister_filesystem;
-#endif
-#ifdef CONFIG_CIFS_DFS_UPCALL
-	rc = cifs_init_dns_resolver();
-	if (rc)
-		goto out_unregister_key_type;
 #endif
 	rc = slow_work_register_user(THIS_MODULE);
 	if (rc)
-		goto out_unregister_resolver_key;
+		goto out_unregister_key_type;
 
 	return 0;
 
- out_unregister_resolver_key:
-#ifdef CONFIG_CIFS_DFS_UPCALL
-	cifs_exit_dns_resolver();
  out_unregister_key_type:
-#endif
 #ifdef CONFIG_CIFS_UPCALL
 	unregister_key_type(&cifs_spnego_key_type);
  out_unregister_filesystem:
@@ -976,7 +966,6 @@ exit_cifs(void)
 	cifs_fscache_unregister();
 #ifdef CONFIG_CIFS_DFS_UPCALL
 	cifs_dfs_release_automount_timer();
-	cifs_exit_dns_resolver();
 #endif
 #ifdef CONFIG_CIFS_UPCALL
 	unregister_key_type(&cifs_spnego_key_type);
diff --git a/fs/cifs/dns_resolve.c b/fs/cifs/dns_resolve.c
index aa967e7917f..0eb87026cad 100644
--- a/fs/cifs/dns_resolve.c
+++ b/fs/cifs/dns_resolve.c
@@ -4,6 +4,8 @@
  *   Copyright (c) 2007 Igor Mammedov
  *   Author(s): Igor Mammedov (niallain@gmail.com)
  *              Steve French (sfrench@us.ibm.com)
+ *              Wang Lei (wang840925@gmail.com)
+ *		David Howells (dhowells@redhat.com)
  *
  *   Contains the CIFS DFS upcall routines used for hostname to
  *   IP address translation.
@@ -24,212 +26,73 @@
  */
 
 #include <linux/slab.h>
-#include <linux/keyctl.h>
-#include <linux/key-type.h>
-#include <keys/user-type.h>
+#include <linux/dns_resolver.h>
 #include "dns_resolve.h"
 #include "cifsglob.h"
 #include "cifsproto.h"
 #include "cifs_debug.h"
 
-static const struct cred *dns_resolver_cache;
-
-/* Checks if supplied name is IP address
- * returns:
- * 		1 - name is IP
- * 		0 - name is not IP
- */
-static int
-is_ip(const char *name, int len)
-{
-	struct sockaddr_storage ss;
-
-	return cifs_convert_address((struct sockaddr *)&ss, name, len);
-}
-
-static int
-dns_resolver_instantiate(struct key *key, const void *data,
-		size_t datalen)
-{
-	int rc = 0;
-	char *ip;
-
-	/* make sure this looks like an address */
-	if (!is_ip(data, datalen))
-		return -EINVAL;
-
-	ip = kmalloc(datalen + 1, GFP_KERNEL);
-	if (!ip)
-		return -ENOMEM;
-
-	memcpy(ip, data, datalen);
-	ip[datalen] = '\0';
-
-	key->type_data.x[0] = datalen;
-	key->payload.data = ip;
-
-	return rc;
-}
-
-static void
-dns_resolver_destroy(struct key *key)
-{
-	kfree(key->payload.data);
-}
-
-struct key_type key_type_dns_resolver = {
-	.name        = "dns_resolver",
-	.def_datalen = sizeof(struct in_addr),
-	.describe    = user_describe,
-	.instantiate = dns_resolver_instantiate,
-	.destroy     = dns_resolver_destroy,
-	.match       = user_match,
-};
-
-/* Resolves server name to ip address.
- * input:
- * 	unc - server UNC
- * output:
- * 	*ip_addr - pointer to server ip, caller responcible for freeing it.
- * return the length of the returned string on success
+/**
+ * dns_resolve_server_name_to_ip - Resolve UNC server name to ip address.
+ * @unc: UNC path specifying the server
+ * @ip_addr: Where to return the IP address.
+ *
+ * The IP address will be returned in string form, and the caller is
+ * responsible for freeing it.
+ *
+ * Returns length of result on success, -ve on error.
  */
 int
 dns_resolve_server_name_to_ip(const char *unc, char **ip_addr)
 {
-	const struct cred *saved_cred;
-	int rc = -EAGAIN;
-	struct key *rkey = ERR_PTR(-EAGAIN);
+	struct sockaddr_storage ss;
+	const char *hostname, *sep;
 	char *name;
-	char *data = NULL;
-	int len;
+	int len, rc;
 
 	if (!ip_addr || !unc)
 		return -EINVAL;
 
-	/* search for server name delimiter */
 	len = strlen(unc);
 	if (len < 3) {
 		cFYI(1, "%s: unc is too short: %s", __func__, unc);
 		return -EINVAL;
 	}
-	len -= 2;
-	name = memchr(unc+2, '\\', len);
-	if (!name) {
-		cFYI(1, "%s: probably server name is whole unc: %s",
-					__func__, unc);
-	} else {
-		len = (name - unc) - 2/* leading // */;
-	}
-
-	name = kmalloc(len+1, GFP_KERNEL);
-	if (!name) {
-		rc = -ENOMEM;
-		return rc;
-	}
-	memcpy(name, unc+2, len);
-	name[len] = 0;
-
-	if (is_ip(name, len)) {
-		cFYI(1, "%s: it is IP, skipping dns upcall: %s",
-					__func__, name);
-		data = name;
-		goto skip_upcall;
-	}
 
-	saved_cred = override_creds(dns_resolver_cache);
-	rkey = request_key(&key_type_dns_resolver, name, "");
-	revert_creds(saved_cred);
-	if (!IS_ERR(rkey)) {
-		if (!(rkey->perm & KEY_USR_VIEW)) {
-			down_read(&rkey->sem);
-			rkey->perm |= KEY_USR_VIEW;
-			up_read(&rkey->sem);
-		}
-		len = rkey->type_data.x[0];
-		data = rkey->payload.data;
-	} else {
-		cERROR(1, "%s: unable to resolve: %s", __func__, name);
-		goto out;
-	}
-
-skip_upcall:
-	if (data) {
-		*ip_addr = kmalloc(len + 1, GFP_KERNEL);
-		if (*ip_addr) {
-			memcpy(*ip_addr, data, len + 1);
-			if (!IS_ERR(rkey))
-				cFYI(1, "%s: resolved: %s to %s", __func__,
-							name,
-							*ip_addr
-					);
-			rc = len;
-		} else {
-			rc = -ENOMEM;
-		}
-		if (!IS_ERR(rkey))
-			key_put(rkey);
-	}
+	/* Discount leading slashes for cifs */
+	len -= 2;
+	hostname = unc + 2;
 
-out:
-	kfree(name);
+	/* Search for server name delimiter */
+	sep = memchr(hostname, '\\', len);
+	if (sep)
+		len = sep - unc;
+	else
+		cFYI(1, "%s: probably server name is whole unc: %s",
+		     __func__, unc);
+
+	/* Try to interpret hostname as an IPv4 or IPv6 address */
+	rc = cifs_convert_address((struct sockaddr *)&ss, hostname, len);
+	if (rc > 0)
+		goto name_is_IP_address;
+
+	/* Perform the upcall */
+	rc = dns_query(NULL, hostname, len, NULL, ip_addr, NULL);
+	if (rc < 0)
+		cERROR(1, "%s: unable to resolve: %*.*s",
+		       __func__, len, len, hostname);
+	else
+		cFYI(1, "%s: resolved: %*.*s to %s",
+		     __func__, len, len, hostname, *ip_addr);
 	return rc;
-}
 
-int __init cifs_init_dns_resolver(void)
-{
-	struct cred *cred;
-	struct key *keyring;
-	int ret;
-
-	printk(KERN_NOTICE "Registering the %s key type\n",
-	       key_type_dns_resolver.name);
-
-	/* create an override credential set with a special thread keyring in
-	 * which DNS requests are cached
-	 *
-	 * this is used to prevent malicious redirections from being installed
-	 * with add_key().
-	 */
-	cred = prepare_kernel_cred(NULL);
-	if (!cred)
+name_is_IP_address:
+	name = kmalloc(len + 1, GFP_KERNEL);
+	if (!name)
 		return -ENOMEM;
-
-	keyring = key_alloc(&key_type_keyring, ".dns_resolver", 0, 0, cred,
-			    (KEY_POS_ALL & ~KEY_POS_SETATTR) |
-			    KEY_USR_VIEW | KEY_USR_READ,
-			    KEY_ALLOC_NOT_IN_QUOTA);
-	if (IS_ERR(keyring)) {
-		ret = PTR_ERR(keyring);
-		goto failed_put_cred;
-	}
-
-	ret = key_instantiate_and_link(keyring, NULL, 0, NULL, NULL);
-	if (ret < 0)
-		goto failed_put_key;
-
-	ret = register_key_type(&key_type_dns_resolver);
-	if (ret < 0)
-		goto failed_put_key;
-
-	/* instruct request_key() to use this special keyring as a cache for
-	 * the results it looks up */
-	cred->thread_keyring = keyring;
-	cred->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING;
-	dns_resolver_cache = cred;
+	memcpy(name, hostname, len);
+	name[len] = 0;
+	cFYI(1, "%s: unc is IP, skipping dns upcall: %s", __func__, name);
+	*ip_addr = name;
 	return 0;
-
-failed_put_key:
-	key_put(keyring);
-failed_put_cred:
-	put_cred(cred);
-	return ret;
-}
-
-void cifs_exit_dns_resolver(void)
-{
-	key_revoke(dns_resolver_cache->thread_keyring);
-	unregister_key_type(&key_type_dns_resolver);
-	put_cred(dns_resolver_cache);
-	printk(KERN_NOTICE "Unregistered %s key type\n",
-	       key_type_dns_resolver.name);
 }
diff --git a/fs/cifs/dns_resolve.h b/fs/cifs/dns_resolve.h
index 5d7f291df16..d3f5d27f4d0 100644
--- a/fs/cifs/dns_resolve.h
+++ b/fs/cifs/dns_resolve.h
@@ -24,8 +24,6 @@
 #define _DNS_RESOLVE_H
 
 #ifdef __KERNEL__
-extern int __init cifs_init_dns_resolver(void);
-extern void cifs_exit_dns_resolver(void);
 extern int dns_resolve_server_name_to_ip(const char *unc, char **ip_addr);
 #endif /* KERNEL */
 
diff --git a/include/keys/dns_resolver-type.h b/include/keys/dns_resolver-type.h
new file mode 100644
index 00000000000..9284a19393a
--- /dev/null
+++ b/include/keys/dns_resolver-type.h
@@ -0,0 +1,23 @@
+/* DNS resolver key type
+ *
+ * Copyright (C) 2010 Wang Lei. All Rights Reserved.
+ * Written by Wang Lei (wang840925@gmail.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _KEYS_DNS_RESOLVER_TYPE_H
+#define _KEYS_DNS_RESOLVER_TYPE_H
+
+#include <linux/key-type.h>
+
+extern struct key_type key_type_dns_resolver;
+
+extern int request_dns_resolver_key(const char *description,
+				    const char *callout_info,
+				    char **data);
+
+#endif /* _KEYS_DNS_RESOLVER_TYPE_H */
diff --git a/include/linux/dns_resolver.h b/include/linux/dns_resolver.h
new file mode 100644
index 00000000000..cc92268af89
--- /dev/null
+++ b/include/linux/dns_resolver.h
@@ -0,0 +1,34 @@
+/*
+ *   DNS Resolver upcall management for CIFS DFS and AFS
+ *   Handles host name to IP address resolution and DNS query for AFSDB RR.
+ *
+ *   Copyright (c) International Business Machines  Corp., 2008
+ *   Author(s): Steve French (sfrench@us.ibm.com)
+ *              Wang Lei (wang840925@gmail.com)
+ *
+ *   This library is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU Lesser General Public License as published
+ *   by the Free Software Foundation; either version 2.1 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This library is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ *   the GNU Lesser General Public License for more details.
+ *
+ *   You should have received a copy of the GNU Lesser General Public License
+ *   along with this library; if not, write to the Free Software
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _LINUX_DNS_RESOLVER_H
+#define _LINUX_DNS_RESOLVER_H
+
+#ifdef __KERNEL__
+
+extern int dns_query(const char *type, const char *name, size_t namelen,
+		     const char *options, char **_result, time_t *_expiry);
+
+#endif /* KERNEL */
+
+#endif /* _LINUX_DNS_RESOLVER_H */
diff --git a/net/Kconfig b/net/Kconfig
index e24fa0873f3..e330594d370 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -213,6 +213,7 @@ source "net/phonet/Kconfig"
 source "net/ieee802154/Kconfig"
 source "net/sched/Kconfig"
 source "net/dcb/Kconfig"
+source "net/dns_resolver/Kconfig"
 
 config RPS
 	boolean
diff --git a/net/Makefile b/net/Makefile
index 41d420070a3..ea60fbce9b1 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -67,3 +67,4 @@ ifeq ($(CONFIG_NET),y)
 obj-$(CONFIG_SYSCTL)		+= sysctl_net.o
 endif
 obj-$(CONFIG_WIMAX)		+= wimax/
+obj-$(CONFIG_DNS_RESOLVER)	+= dns_resolver/
diff --git a/net/dns_resolver/Kconfig b/net/dns_resolver/Kconfig
new file mode 100644
index 00000000000..2ec47cb5d0d
--- /dev/null
+++ b/net/dns_resolver/Kconfig
@@ -0,0 +1,27 @@
+#
+# Configuration for DNS Resolver
+#
+config DNS_RESOLVER
+	tristate "DNS Resolver support"
+	depends on NET && KEYS
+	help
+	  Saying Y here will include support for the DNS Resolver key type
+	  which can be used to make upcalls to perform DNS lookups in
+	  userspace.
+
+	  DNS Resolver is used to query DNS server for information.  Examples
+	  being resolving a UNC hostname element to an IP address for CIFS or
+	  performing a DNS query for AFSDB records so that AFS can locate a
+	  cell's volume location database servers.
+
+	  DNS Resolver is used by the CIFS and AFS modules, and would support
+	  samba4 later.  DNS Resolver is supported by the userspace upcall
+	  helper "/sbin/dns.resolver" via /etc/request-key.conf.
+
+	  See <file:Documentation/networking/dns_resolver.txt> for further
+	  information.
+
+	  To compile this as a module, choose M here: the module will be called
+	  dnsresolver.
+
+	  If unsure, say N.
diff --git a/net/dns_resolver/Makefile b/net/dns_resolver/Makefile
new file mode 100644
index 00000000000..c0ef4e71dc4
--- /dev/null
+++ b/net/dns_resolver/Makefile
@@ -0,0 +1,7 @@
+#
+# Makefile for the Linux DNS Resolver.
+#
+
+obj-$(CONFIG_DNS_RESOLVER) += dns_resolver.o
+
+dns_resolver-objs :=  dns_key.o dns_query.o
diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c
new file mode 100644
index 00000000000..1b1b411adcf
--- /dev/null
+++ b/net/dns_resolver/dns_key.c
@@ -0,0 +1,210 @@
+/* Key type used to cache DNS lookups made by the kernel
+ *
+ * See Documentation/networking/dns_resolver.txt
+ *
+ *   Copyright (c) 2007 Igor Mammedov
+ *   Author(s): Igor Mammedov (niallain@gmail.com)
+ *              Steve French (sfrench@us.ibm.com)
+ *              Wang Lei (wang840925@gmail.com)
+ *		David Howells (dhowells@redhat.com)
+ *
+ *   This library is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU Lesser General Public License as published
+ *   by the Free Software Foundation; either version 2.1 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This library is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ *   the GNU Lesser General Public License for more details.
+ *
+ *   You should have received a copy of the GNU Lesser General Public License
+ *   along with this library; if not, write to the Free Software
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/keyctl.h>
+#include <keys/dns_resolver-type.h>
+#include <keys/user-type.h>
+#include "internal.h"
+
+MODULE_DESCRIPTION("DNS Resolver");
+MODULE_AUTHOR("Wang Lei");
+MODULE_LICENSE("GPL");
+
+unsigned dns_resolver_debug;
+module_param_named(debug, dns_resolver_debug, uint, S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(debug, "DNS Resolver debugging mask");
+
+const struct cred *dns_resolver_cache;
+
+/*
+ * Instantiate a user defined key for dns_resolver.
+ *
+ * The data must be a NUL-terminated string, with the NUL char accounted in
+ * datalen.
+ *
+ * If the data contains a '#' characters, then we take the clause after each
+ * one to be an option of the form 'key=value'.  The actual data of interest is
+ * the string leading up to the first '#'.  For instance:
+ *
+ *        "ip1,ip2,...#foo=bar"
+ */
+static int
+dns_resolver_instantiate(struct key *key, const void *_data, size_t datalen)
+{
+	struct user_key_payload *upayload;
+	int ret;
+	size_t result_len = 0;
+	const char *data = _data, *opt;
+
+	kenter("%%%d,%s,'%s',%zu",
+	       key->serial, key->description, data, datalen);
+
+	if (datalen <= 1 || !data || data[datalen - 1] != '\0')
+		return -EINVAL;
+	datalen--;
+
+	/* deal with any options embedded in the data */
+	opt = memchr(data, '#', datalen);
+	if (!opt) {
+		kdebug("no options currently supported");
+		return -EINVAL;
+	}
+
+	result_len = datalen;
+	ret = key_payload_reserve(key, result_len);
+	if (ret < 0)
+		return -EINVAL;
+
+	upayload = kmalloc(sizeof(*upayload) + result_len + 1, GFP_KERNEL);
+	if (!upayload) {
+		kleave(" = -ENOMEM");
+		return -ENOMEM;
+	}
+
+	upayload->datalen = result_len;
+	memcpy(upayload->data, data, result_len);
+	upayload->data[result_len] = '\0';
+	rcu_assign_pointer(key->payload.data, upayload);
+
+	kleave(" = 0");
+	return 0;
+}
+
+/*
+ * The description is of the form "[<type>:]<domain_name>"
+ *
+ * The domain name may be a simple name or an absolute domain name (which
+ * should end with a period).  The domain name is case-independent.
+ */
+static int
+dns_resolver_match(const struct key *key, const void *description)
+{
+	int slen, dlen, ret = 0;
+	const char *src = key->description, *dsp = description;
+
+	kenter("%s,%s", src, dsp);
+
+	if (!src || !dsp)
+		goto no_match;
+
+	if (strcasecmp(src, dsp) == 0)
+		goto matched;
+
+	slen = strlen(src);
+	dlen = strlen(dsp);
+	if (slen <= 0 || dlen <= 0)
+		goto no_match;
+	if (src[slen - 1] == '.')
+		slen--;
+	if (dsp[dlen - 1] == '.')
+		dlen--;
+	if (slen != dlen || strncasecmp(src, dsp, slen) != 0)
+		goto no_match;
+
+matched:
+	ret = 1;
+no_match:
+	kleave(" = %d", ret);
+	return ret;
+}
+
+struct key_type key_type_dns_resolver = {
+	.name		= "dns_resolver",
+	.instantiate	= dns_resolver_instantiate,
+	.match		= dns_resolver_match,
+	.revoke		= user_revoke,
+	.destroy	= user_destroy,
+	.describe	= user_describe,
+	.read		= user_read,
+};
+
+static int __init init_dns_resolver(void)
+{
+	struct cred *cred;
+	struct key *keyring;
+	int ret;
+
+	printk(KERN_NOTICE "Registering the %s key type\n",
+	       key_type_dns_resolver.name);
+
+	/* create an override credential set with a special thread keyring in
+	 * which DNS requests are cached
+	 *
+	 * this is used to prevent malicious redirections from being installed
+	 * with add_key().
+	 */
+	cred = prepare_kernel_cred(NULL);
+	if (!cred)
+		return -ENOMEM;
+
+	keyring = key_alloc(&key_type_keyring, ".dns_resolver", 0, 0, cred,
+			    (KEY_POS_ALL & ~KEY_POS_SETATTR) |
+			    KEY_USR_VIEW | KEY_USR_READ,
+			    KEY_ALLOC_NOT_IN_QUOTA);
+	if (IS_ERR(keyring)) {
+		ret = PTR_ERR(keyring);
+		goto failed_put_cred;
+	}
+
+	ret = key_instantiate_and_link(keyring, NULL, 0, NULL, NULL);
+	if (ret < 0)
+		goto failed_put_key;
+
+	ret = register_key_type(&key_type_dns_resolver);
+	if (ret < 0)
+		goto failed_put_key;
+
+	/* instruct request_key() to use this special keyring as a cache for
+	 * the results it looks up */
+	cred->thread_keyring = keyring;
+	cred->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING;
+	dns_resolver_cache = cred;
+
+	kdebug("DNS resolver keyring: %d\n", key_serial(keyring));
+	return 0;
+
+failed_put_key:
+	key_put(keyring);
+failed_put_cred:
+	put_cred(cred);
+	return ret;
+}
+
+static void __exit exit_dns_resolver(void)
+{
+	key_revoke(dns_resolver_cache->thread_keyring);
+	unregister_key_type(&key_type_dns_resolver);
+	put_cred(dns_resolver_cache);
+	printk(KERN_NOTICE "Unregistered %s key type\n",
+	       key_type_dns_resolver.name);
+}
+
+module_init(init_dns_resolver)
+module_exit(exit_dns_resolver)
+MODULE_LICENSE("GPL");
diff --git a/net/dns_resolver/dns_query.c b/net/dns_resolver/dns_query.c
new file mode 100644
index 00000000000..6c0cf31ea00
--- /dev/null
+++ b/net/dns_resolver/dns_query.c
@@ -0,0 +1,159 @@
+/* Upcall routine, designed to work as a key type and working through
+ * /sbin/request-key to contact userspace when handling DNS queries.
+ *
+ * See Documentation/networking/dns_resolver.txt
+ *
+ *   Copyright (c) 2007 Igor Mammedov
+ *   Author(s): Igor Mammedov (niallain@gmail.com)
+ *              Steve French (sfrench@us.ibm.com)
+ *              Wang Lei (wang840925@gmail.com)
+ *		David Howells (dhowells@redhat.com)
+ *
+ *   The upcall wrapper used to make an arbitrary DNS query.
+ *
+ *   This function requires the appropriate userspace tool dns.upcall to be
+ *   installed and something like the following lines should be added to the
+ *   /etc/request-key.conf file:
+ *
+ *	create dns_resolver * * /sbin/dns.upcall %k
+ *
+ *   For example to use this module to query AFSDB RR:
+ *
+ *	create dns_resolver afsdb:* * /sbin/dns.afsdb %k
+ *
+ *   This library is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU Lesser General Public License as published
+ *   by the Free Software Foundation; either version 2.1 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This library is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ *   the GNU Lesser General Public License for more details.
+ *
+ *   You should have received a copy of the GNU Lesser General Public License
+ *   along with this library; if not, write to the Free Software
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/dns_resolver.h>
+#include <keys/dns_resolver-type.h>
+#include <keys/user-type.h>
+
+#include "internal.h"
+
+/*
+ * dns_query - Query the DNS
+ * @type: Query type (or NULL for straight host->IP lookup)
+ * @name: Name to look up
+ * @namelen: Length of name
+ * @options: Request options (or NULL if no options)
+ * @_result: Where to place the returned data.
+ * @_expiry: Where to store the result expiry time (or NULL)
+ *
+ * The data will be returned in the pointer at *result, and the caller is
+ * responsible for freeing it.
+ *
+ * The description should be of the form "[<query_type>:]<domain_name>", and
+ * the options need to be appropriate for the query type requested.  If no
+ * query_type is given, then the query is a straight hostname to IP address
+ * lookup.
+ *
+ * The DNS resolution lookup is performed by upcalling to userspace by way of
+ * requesting a key of type dns_resolver.
+ *
+ * Returns the size of the result on success, -ve error code otherwise.
+ */
+int dns_query(const char *type, const char *name, size_t namelen,
+	      const char *options, char **_result, time_t *_expiry)
+{
+	struct key *rkey;
+	struct user_key_payload *upayload;
+	const struct cred *saved_cred;
+	size_t typelen, desclen;
+	char *desc, *cp;
+	int ret, len;
+
+	kenter("%s,%*.*s,%zu,%s",
+	       type, (int)namelen, (int)namelen, name, namelen, options);
+
+	if (!name || namelen == 0 || !_result)
+		return -EINVAL;
+
+	/* construct the query key description as "[<type>:]<name>" */
+	typelen = 0;
+	desclen = 0;
+	if (type) {
+		typelen = strlen(type);
+		if (typelen < 1)
+			return -EINVAL;
+		desclen += typelen + 1;
+	}
+
+	if (!namelen)
+		namelen = strlen(name);
+	if (namelen < 3)
+		return -EINVAL;
+	desclen += namelen + 1;
+
+	desc = kmalloc(desclen, GFP_KERNEL);
+	if (!desc)
+		return -ENOMEM;
+
+	cp = desc;
+	if (type) {
+		memcpy(cp, type, typelen);
+		cp += typelen;
+		*cp++ = ':';
+	}
+	memcpy(cp, name, namelen);
+	cp += namelen;
+	*cp = '\0';
+
+	if (!options)
+		options = "";
+	kdebug("call request_key(,%s,%s)", desc, options);
+
+	/* make the upcall, using special credentials to prevent the use of
+	 * add_key() to preinstall malicious redirections
+	 */
+	saved_cred = override_creds(dns_resolver_cache);
+	rkey = request_key(&key_type_dns_resolver, desc, options);
+	revert_creds(saved_cred);
+	kfree(desc);
+	if (IS_ERR(rkey)) {
+		ret = PTR_ERR(rkey);
+		goto out;
+	}
+
+	down_read(&rkey->sem);
+	rkey->perm |= KEY_USR_VIEW;
+
+	ret = key_validate(rkey);
+	if (ret < 0)
+		goto put;
+
+	upayload = rcu_dereference_protected(rkey->payload.data,
+					     lockdep_is_held(&rkey->sem));
+	len = upayload->datalen;
+
+	ret = -ENOMEM;
+	*_result = kmalloc(len + 1, GFP_KERNEL);
+	if (!*_result)
+		goto put;
+
+	memcpy(*_result, upayload->data, len + 1);
+	if (_expiry)
+		*_expiry = rkey->expiry;
+
+	ret = len;
+put:
+	up_read(&rkey->sem);
+	key_put(rkey);
+out:
+	kleave(" = %d", ret);
+	return ret;
+}
+EXPORT_SYMBOL(dns_query);
diff --git a/net/dns_resolver/internal.h b/net/dns_resolver/internal.h
new file mode 100644
index 00000000000..189ca9e9b78
--- /dev/null
+++ b/net/dns_resolver/internal.h
@@ -0,0 +1,44 @@
+/*
+ *   Copyright (c) 2010 Wang Lei
+ *   Author(s): Wang Lei (wang840925@gmail.com). All Rights Reserved.
+ *
+ *   Internal DNS Rsolver stuff
+ *
+ *   This library is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU Lesser General Public License as published
+ *   by the Free Software Foundation; either version 2.1 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This library is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ *   the GNU Lesser General Public License for more details.
+ *
+ *   You should have received a copy of the GNU Lesser General Public License
+ *   along with this library; if not, write to the Free Software
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/compiler.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+
+/*
+ * dns_key.c
+ */
+extern const struct cred *dns_resolver_cache;
+
+/*
+ * debug tracing
+ */
+extern unsigned dns_resolver_debug;
+
+#define	kdebug(FMT, ...)				\
+do {							\
+	if (unlikely(dns_resolver_debug))		\
+		printk(KERN_DEBUG "[%-6.6s] "FMT"\n",	\
+		       current->comm, ##__VA_ARGS__);	\
+} while (0)
+
+#define kenter(FMT, ...) kdebug("==> %s("FMT")", __func__, ##__VA_ARGS__)
+#define kleave(FMT, ...) kdebug("<== %s()"FMT"", __func__, ##__VA_ARGS__)
-- 
cgit v1.2.3-70-g09d2


From cc7447a5fa92759b0856d6a83ba2539c6a94e67e Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Wed, 16 Jun 2010 11:44:18 +0200
Subject: Driver core: Drop __must_check from bus_for_each_drv()

There is little rationale for marking bus_for_each_drv() __must_check.
It is more of an iteration helper than a real function. You don't know
in advance which callback it will be used on, so you have no clue how
important it can be to check the returned value. In practice, this
helper function can be used for best-effort tasks.

As a matter of fact, bus_for_each_dev() is not marked __must_check.
So remove it from bus_for_each_drv() as well. This is the same that
was done back in October 2006 by Russell King for
device_for_each_child(), for exactly the same reasons.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Cc: Andrew Morton <akpm@osdl.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/device.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/device.h b/include/linux/device.h
index 6a8276f683b..ddffdf7da39 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -84,9 +84,8 @@ struct device *bus_find_device_by_name(struct bus_type *bus,
 				       struct device *start,
 				       const char *name);
 
-int __must_check bus_for_each_drv(struct bus_type *bus,
-				  struct device_driver *start, void *data,
-				  int (*fn)(struct device_driver *, void *));
+int bus_for_each_drv(struct bus_type *bus, struct device_driver *start,
+		     void *data, int (*fn)(struct device_driver *, void *));
 
 void bus_sort_breadthfirst(struct bus_type *bus,
 			   int (*compare)(const struct device *a,
-- 
cgit v1.2.3-70-g09d2


From 44f28bdea09415d40b4d73a7668db5961362ec53 Mon Sep 17 00:00:00 2001
From: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Date: Mon, 21 Jun 2010 16:11:44 +0200
Subject: Driver core: reduce duplicated code for platform_device creation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This makes the two similar functions platform_device_register_simple
and platform_device_register_data one line inline functions using a new
generic function platform_device_register_resndata.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 Documentation/DocBook/device-drivers.tmpl |   1 +
 drivers/base/platform.c                   | 104 ++++++++----------------------
 include/linux/platform_device.h           |  62 ++++++++++++++++--
 3 files changed, 85 insertions(+), 82 deletions(-)

(limited to 'include')

diff --git a/Documentation/DocBook/device-drivers.tmpl b/Documentation/DocBook/device-drivers.tmpl
index 1b2dd4fc3db..ecd35e9d441 100644
--- a/Documentation/DocBook/device-drivers.tmpl
+++ b/Documentation/DocBook/device-drivers.tmpl
@@ -111,6 +111,7 @@ X!Edrivers/base/attribute_container.c
 <!--
 X!Edrivers/base/interface.c
 -->
+!Iinclude/linux/platform_device.h
 !Edrivers/base/platform.c
 !Edrivers/base/bus.c
      </sect1>
diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index 26eb69d88eb..ffcfd73fe8a 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -344,108 +344,56 @@ void platform_device_unregister(struct platform_device *pdev)
 EXPORT_SYMBOL_GPL(platform_device_unregister);
 
 /**
- * platform_device_register_simple - add a platform-level device and its resources
- * @name: base name of the device we're adding
- * @id: instance id
- * @res: set of resources that needs to be allocated for the device
- * @num: number of resources
- *
- * This function creates a simple platform device that requires minimal
- * resource and memory management. Canned release function freeing memory
- * allocated for the device allows drivers using such devices to be
- * unloaded without waiting for the last reference to the device to be
- * dropped.
+ * platform_device_register_resndata - add a platform-level device with
+ * resources and platform-specific data
  *
- * This interface is primarily intended for use with legacy drivers which
- * probe hardware directly.  Because such drivers create sysfs device nodes
- * themselves, rather than letting system infrastructure handle such device
- * enumeration tasks, they don't fully conform to the Linux driver model.
- * In particular, when such drivers are built as modules, they can't be
- * "hotplugged".
- *
- * Returns &struct platform_device pointer on success, or ERR_PTR() on error.
- */
-struct platform_device *platform_device_register_simple(const char *name,
-							int id,
-							const struct resource *res,
-							unsigned int num)
-{
-	struct platform_device *pdev;
-	int retval;
-
-	pdev = platform_device_alloc(name, id);
-	if (!pdev) {
-		retval = -ENOMEM;
-		goto error;
-	}
-
-	if (num) {
-		retval = platform_device_add_resources(pdev, res, num);
-		if (retval)
-			goto error;
-	}
-
-	retval = platform_device_add(pdev);
-	if (retval)
-		goto error;
-
-	return pdev;
-
-error:
-	platform_device_put(pdev);
-	return ERR_PTR(retval);
-}
-EXPORT_SYMBOL_GPL(platform_device_register_simple);
-
-/**
- * platform_device_register_data - add a platform-level device with platform-specific data
  * @parent: parent device for the device we're adding
  * @name: base name of the device we're adding
  * @id: instance id
+ * @res: set of resources that needs to be allocated for the device
+ * @num: number of resources
  * @data: platform specific data for this platform device
  * @size: size of platform specific data
  *
- * This function creates a simple platform device that requires minimal
- * resource and memory management. Canned release function freeing memory
- * allocated for the device allows drivers using such devices to be
- * unloaded without waiting for the last reference to the device to be
- * dropped.
- *
  * Returns &struct platform_device pointer on success, or ERR_PTR() on error.
  */
-struct platform_device *platform_device_register_data(
+struct platform_device *platform_device_register_resndata(
 		struct device *parent,
 		const char *name, int id,
+		const struct resource *res, unsigned int num,
 		const void *data, size_t size)
 {
+	int ret = -ENOMEM;
 	struct platform_device *pdev;
-	int retval;
 
 	pdev = platform_device_alloc(name, id);
-	if (!pdev) {
-		retval = -ENOMEM;
-		goto error;
-	}
+	if (!pdev)
+		goto err;
 
 	pdev->dev.parent = parent;
 
-	if (size) {
-		retval = platform_device_add_data(pdev, data, size);
-		if (retval)
-			goto error;
+	if (res) {
+		ret = platform_device_add_resources(pdev, res, num);
+		if (ret)
+			goto err;
 	}
 
-	retval = platform_device_add(pdev);
-	if (retval)
-		goto error;
+	if (data) {
+		ret = platform_device_add_data(pdev, data, size);
+		if (ret)
+			goto err;
+	}
 
-	return pdev;
+	ret = platform_device_add(pdev);
+	if (ret) {
+err:
+		platform_device_put(pdev);
+		return ERR_PTR(ret);
+	}
 
-error:
-	platform_device_put(pdev);
-	return ERR_PTR(retval);
+	return pdev;
 }
-EXPORT_SYMBOL_GPL(platform_device_register_data);
+EXPORT_SYMBOL_GPL(platform_device_register_resndata);
 
 static int platform_drv_probe(struct device *_dev)
 {
diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h
index 5417944d368..d7ecad0093b 100644
--- a/include/linux/platform_device.h
+++ b/include/linux/platform_device.h
@@ -43,10 +43,64 @@ extern struct resource *platform_get_resource_byname(struct platform_device *, u
 extern int platform_get_irq_byname(struct platform_device *, const char *);
 extern int platform_add_devices(struct platform_device **, int);
 
-extern struct platform_device *platform_device_register_simple(const char *, int id,
-					const struct resource *, unsigned int);
-extern struct platform_device *platform_device_register_data(struct device *,
-		const char *, int, const void *, size_t);
+extern struct platform_device *platform_device_register_resndata(
+		struct device *parent, const char *name, int id,
+		const struct resource *res, unsigned int num,
+		const void *data, size_t size);
+
+/**
+ * platform_device_register_simple - add a platform-level device and its resources
+ * @name: base name of the device we're adding
+ * @id: instance id
+ * @res: set of resources that needs to be allocated for the device
+ * @num: number of resources
+ *
+ * This function creates a simple platform device that requires minimal
+ * resource and memory management. Canned release function freeing memory
+ * allocated for the device allows drivers using such devices to be
+ * unloaded without waiting for the last reference to the device to be
+ * dropped.
+ *
+ * This interface is primarily intended for use with legacy drivers which
+ * probe hardware directly.  Because such drivers create sysfs device nodes
+ * themselves, rather than letting system infrastructure handle such device
+ * enumeration tasks, they don't fully conform to the Linux driver model.
+ * In particular, when such drivers are built as modules, they can't be
+ * "hotplugged".
+ *
+ * Returns &struct platform_device pointer on success, or ERR_PTR() on error.
+ */
+static inline struct platform_device *platform_device_register_simple(
+		const char *name, int id,
+		const struct resource *res, unsigned int num)
+{
+	return platform_device_register_resndata(NULL, name, id,
+			res, num, NULL, 0);
+}
+
+/**
+ * platform_device_register_data - add a platform-level device with platform-specific data
+ * @parent: parent device for the device we're adding
+ * @name: base name of the device we're adding
+ * @id: instance id
+ * @data: platform specific data for this platform device
+ * @size: size of platform specific data
+ *
+ * This function creates a simple platform device that requires minimal
+ * resource and memory management. Canned release function freeing memory
+ * allocated for the device allows drivers using such devices to be
+ * unloaded without waiting for the last reference to the device to be
+ * dropped.
+ *
+ * Returns &struct platform_device pointer on success, or ERR_PTR() on error.
+ */
+static inline struct platform_device *platform_device_register_data(
+		struct device *parent, const char *name, int id,
+		const void *data, size_t size)
+{
+	return platform_device_register_resndata(parent, name, id,
+			NULL, 0, data, size);
+}
 
 extern struct platform_device *platform_device_alloc(const char *name, int id);
 extern int platform_device_add_resources(struct platform_device *pdev,
-- 
cgit v1.2.3-70-g09d2


From 49c19400f60bbe362202d7e7b3e68cc66040d0fa Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Fri, 2 Jul 2010 16:54:05 +0200
Subject: sysfs: sysfs_chmod_file's attr can be const

sysfs_chmod_file doesn't change the attribute it operates on, so this
attribute can be marked const.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 fs/sysfs/file.c       | 3 ++-
 include/linux/sysfs.h | 6 +++---
 2 files changed, 5 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 1beaa739d0a..1b27b5688f6 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -593,7 +593,8 @@ EXPORT_SYMBOL_GPL(sysfs_add_file_to_group);
  * @mode: file permissions.
  *
  */
-int sysfs_chmod_file(struct kobject *kobj, struct attribute *attr, mode_t mode)
+int sysfs_chmod_file(struct kobject *kobj, const struct attribute *attr,
+		     mode_t mode)
 {
 	struct sysfs_dirent *sd;
 	struct iattr newattrs;
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index f2694eb4dd3..8bf06b64487 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -136,8 +136,8 @@ int __must_check sysfs_create_file(struct kobject *kobj,
 				   const struct attribute *attr);
 int __must_check sysfs_create_files(struct kobject *kobj,
 				   const struct attribute **attr);
-int __must_check sysfs_chmod_file(struct kobject *kobj, struct attribute *attr,
-				  mode_t mode);
+int __must_check sysfs_chmod_file(struct kobject *kobj,
+				  const struct attribute *attr, mode_t mode);
 void sysfs_remove_file(struct kobject *kobj, const struct attribute *attr);
 void sysfs_remove_files(struct kobject *kobj, const struct attribute **attr);
 
@@ -225,7 +225,7 @@ static inline int sysfs_create_files(struct kobject *kobj,
 }
 
 static inline int sysfs_chmod_file(struct kobject *kobj,
-				   struct attribute *attr, mode_t mode)
+				   const struct attribute *attr, mode_t mode)
 {
 	return 0;
 }
-- 
cgit v1.2.3-70-g09d2


From 45daef0fdcc44f6af86fdebc4fc7eb7c79375398 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@opensource.se>
Date: Fri, 23 Jul 2010 19:56:18 +0900
Subject: Driver core: Add BUS_NOTIFY_BIND_DRIVER

Add BUS_NOTIFY_BIND_DRIVER as a bus notifier event.

For driver binding/unbinding we with this in
place have the following bus notifier events:
 - BUS_NOTIFY_BIND_DRIVER - before ->probe()
 - BUS_NOTIFY_BOUND_DRIVER - after ->probe()
 - BUS_NOTIFY_UNBIND_DRIVER - before ->remove()
 - BUS_NOTIFY_UNBOUND_DRIVER - after ->remove()

The event BUS_NOTIFY_BIND_DRIVER allows bus code
to be notified that ->probe() is about to be called.

Useful for bus code that needs to setup hardware before
the driver gets to run. With this in place platform
drivers can be loaded and unloaded as modules and the
new BIND event allows bus code to control for instance
device clocks that must be enabled before the driver
can be executed.

Without this patch there is no way for the bus code to
get notified that a modular driver is about to be probed.

Signed-off-by: Magnus Damm <damm@opensource.se>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/base/dd.c      | 4 ++++
 include/linux/device.h | 8 +++++---
 2 files changed, 9 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/base/dd.c b/drivers/base/dd.c
index 503c2620bbc..da57ee9d63f 100644
--- a/drivers/base/dd.c
+++ b/drivers/base/dd.c
@@ -51,6 +51,10 @@ static int driver_sysfs_add(struct device *dev)
 {
 	int ret;
 
+	if (dev->bus)
+		blocking_notifier_call_chain(&dev->bus->p->bus_notifier,
+					     BUS_NOTIFY_BIND_DRIVER, dev);
+
 	ret = sysfs_create_link(&dev->driver->p->kobj, &dev->kobj,
 			  kobject_name(&dev->kobj));
 	if (ret == 0) {
diff --git a/include/linux/device.h b/include/linux/device.h
index ddffdf7da39..0ca24e93304 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -109,10 +109,12 @@ extern int bus_unregister_notifier(struct bus_type *bus,
  */
 #define BUS_NOTIFY_ADD_DEVICE		0x00000001 /* device added */
 #define BUS_NOTIFY_DEL_DEVICE		0x00000002 /* device removed */
-#define BUS_NOTIFY_BOUND_DRIVER		0x00000003 /* driver bound to device */
-#define BUS_NOTIFY_UNBIND_DRIVER	0x00000004 /* driver about to be
+#define BUS_NOTIFY_BIND_DRIVER		0x00000003 /* driver about to be
+						      bound */
+#define BUS_NOTIFY_BOUND_DRIVER		0x00000004 /* driver bound to device */
+#define BUS_NOTIFY_UNBIND_DRIVER	0x00000005 /* driver about to be
 						      unbound */
-#define BUS_NOTIFY_UNBOUND_DRIVER	0x00000005 /* driver is unbound
+#define BUS_NOTIFY_UNBOUND_DRIVER	0x00000006 /* driver is unbound
 						      from the device */
 
 extern struct kset *bus_get_kset(struct bus_type *bus);
-- 
cgit v1.2.3-70-g09d2


From 6fd69dc578fa0b1bbc3aad70ae3af9a137211707 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <guenter.roeck@ericsson.com>
Date: Wed, 28 Jul 2010 22:09:26 -0700
Subject: sysfs: Remove owner field from sysfs struct attribute

Signed-off-by: Guenter Roeck <guenter.roeck@ericsson.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/sysfs.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index 8bf06b64487..3c92121ba9a 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -22,14 +22,8 @@ struct kobject;
 struct module;
 enum kobj_ns_type;
 
-/* FIXME
- * The *owner field is no longer used.
- * x86 tree has been cleaned up. The owner
- * attribute is still left for other arches.
- */
 struct attribute {
 	const char		*name;
-	struct module		*owner;
 	mode_t			mode;
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 	struct lock_class_key	*key;
-- 
cgit v1.2.3-70-g09d2


From 6937e8f8c0135f2325194c372ada6dc655499992 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Thu, 5 Aug 2010 17:38:18 +0200
Subject: driver core: device_rename's new_name can be const

The new_name argument to device_rename() can be
const as kobject_rename's new_name argument is.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/base/core.c    | 2 +-
 include/linux/device.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/base/core.c b/drivers/base/core.c
index f8e72724dd4..d1b2c9adc27 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -1599,7 +1599,7 @@ EXPORT_SYMBOL_GPL(device_destroy);
  * on the same device to ensure that new_name is valid and
  * won't conflict with other devices.
  */
-int device_rename(struct device *dev, char *new_name)
+int device_rename(struct device *dev, const char *new_name)
 {
 	char *old_class_name = NULL;
 	char *new_class_name = NULL;
diff --git a/include/linux/device.h b/include/linux/device.h
index 0ca24e93304..516fecacf27 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -552,7 +552,7 @@ extern int device_for_each_child(struct device *dev, void *data,
 		     int (*fn)(struct device *dev, void *data));
 extern struct device *device_find_child(struct device *dev, void *data,
 				int (*match)(struct device *dev, void *data));
-extern int device_rename(struct device *dev, char *new_name);
+extern int device_rename(struct device *dev, const char *new_name);
 extern int device_move(struct device *dev, struct device *new_parent,
 		       enum dpm_order dpm_order);
 extern const char *device_get_devnode(struct device *dev,
-- 
cgit v1.2.3-70-g09d2


From 31d1d48e199e99077fb30f6fb9a793be7bec756f Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 6 Aug 2010 16:34:43 +0100
Subject: Fix init ordering of /dev/console vs callers of modprobe

Make /dev/console get initialised before any initialisation routine that
invokes modprobe because if modprobe fails, it's going to want to open
/dev/console, presumably to write an error message to.

The problem with that is that if the /dev/console driver is not yet
initialised, the chardev handler will call request_module() to invoke
modprobe, which will fail, because we never compile /dev/console as a
module.

This will lead to a modprobe loop, showing the following in the kernel
log:

	request_module: runaway loop modprobe char-major-5-1
	request_module: runaway loop modprobe char-major-5-1
	request_module: runaway loop modprobe char-major-5-1
	request_module: runaway loop modprobe char-major-5-1
	request_module: runaway loop modprobe char-major-5-1

This can happen, for example, when the built in md5 module can't find
the built in cryptomgr module (because the latter fails to initialise).
The md5 module comes before the call to tty_init(), presumably because
'crypto' comes before 'drivers' alphabetically.

Fix this by calling tty_init() from chrdev_init().

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/mem.c    | 2 +-
 drivers/char/tty_io.c | 4 ++--
 fs/char_dev.c         | 1 +
 include/linux/tty.h   | 3 +++
 4 files changed, 7 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index f54dab8acdc..a398ecdbd75 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -916,7 +916,7 @@ static int __init chr_dev_init(void)
 			      NULL, devlist[minor].name);
 	}
 
-	return 0;
+	return tty_init();
 }
 
 fs_initcall(chr_dev_init);
diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
index d71f0fc34b4..507441ac6ed 100644
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -3128,7 +3128,7 @@ static struct cdev tty_cdev, console_cdev;
  * Ok, now we can initialize the rest of the tty devices and can count
  * on memory allocations, interrupts etc..
  */
-static int __init tty_init(void)
+int __init tty_init(void)
 {
 	cdev_init(&tty_cdev, &tty_fops);
 	if (cdev_add(&tty_cdev, MKDEV(TTYAUX_MAJOR, 0), 1) ||
@@ -3149,4 +3149,4 @@ static int __init tty_init(void)
 #endif
 	return 0;
 }
-module_init(tty_init);
+
diff --git a/fs/char_dev.c b/fs/char_dev.c
index d6db933df2b..f80a4f25123 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -20,6 +20,7 @@
 #include <linux/cdev.h>
 #include <linux/mutex.h>
 #include <linux/backing-dev.h>
+#include <linux/tty.h>
 
 #include "internal.h"
 
diff --git a/include/linux/tty.h b/include/linux/tty.h
index 931078b7322..7802a243ee1 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -552,6 +552,9 @@ static inline void tty_audit_push_task(struct task_struct *tsk,
 }
 #endif
 
+/* tty_io.c */
+extern int __init tty_init(void);
+
 /* tty_ioctl.c */
 extern int n_tty_ioctl_helper(struct tty_struct *tty, struct file *file,
 		       unsigned int cmd, unsigned long arg);
-- 
cgit v1.2.3-70-g09d2


From d5eff1a3412f6d75bf28f423c5015ece8055407a Mon Sep 17 00:00:00 2001
From: Bryan Schumaker <bjschuma@netapp.com>
Date: Tue, 3 Aug 2010 13:04:00 -0400
Subject: NFS: Fix /proc/mount for legacy binary interface

Add a flag so we know if we mounted the NFS server using the legacy
binary interface.  If we used the legacy interface, then we should not
show the mountd options.

Signed-off-by: Bryan Schumaker <bjschuma@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/super.c            | 4 ++++
 include/linux/nfs_mount.h | 1 +
 2 files changed, 5 insertions(+)

(limited to 'include')

diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index f9df16de4a5..f1ae39f6cb0 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -546,6 +546,9 @@ static void nfs_show_mountd_options(struct seq_file *m, struct nfs_server *nfss,
 {
 	struct sockaddr *sap = (struct sockaddr *)&nfss->mountd_address;
 
+	if (nfss->flags & NFS_MOUNT_LEGACY_INTERFACE)
+		return;
+
 	switch (sap->sa_family) {
 	case AF_INET: {
 		struct sockaddr_in *sin = (struct sockaddr_in *)sap;
@@ -1780,6 +1783,7 @@ static int nfs_validate_mount_data(void *options,
 		 * can deal with.
 		 */
 		args->flags		= data->flags & NFS_MOUNT_FLAGMASK;
+		args->flags		|= NFS_MOUNT_LEGACY_INTERFACE;
 		args->rsize		= data->rsize;
 		args->wsize		= data->wsize;
 		args->timeo		= data->timeo;
diff --git a/include/linux/nfs_mount.h b/include/linux/nfs_mount.h
index 4499016e6d0..5d59ae861aa 100644
--- a/include/linux/nfs_mount.h
+++ b/include/linux/nfs_mount.h
@@ -69,5 +69,6 @@ struct nfs_mount_data {
 #define NFS_MOUNT_LOOKUP_CACHE_NONEG	0x10000
 #define NFS_MOUNT_LOOKUP_CACHE_NONE	0x20000
 #define NFS_MOUNT_NORESVPORT		0x40000
+#define NFS_MOUNT_LEGACY_INTERFACE	0x80000
 
 #endif
-- 
cgit v1.2.3-70-g09d2


From 9261ec1a8d7b17e2540bef7cad3470870d13b61e Mon Sep 17 00:00:00 2001
From: Jason Wessel <jason.wessel@windriver.com>
Date: Fri, 6 Aug 2010 15:36:47 -0500
Subject: console: Fix compilation regression

A regression of building without CONFIG_HW_CONSOLE was introduced with
commit b45cfba4e9005d64d419718e7ff7f7cab44c1994 (vt,console,kdb:
implement atomic console enter/leave functions).

ERROR: "con_debug_enter" [drivers/serial/kgdboc.ko] undefined!
ERROR: "vc_cons" [drivers/serial/kgdboc.ko] undefined!
ERROR: "fg_console" [drivers/serial/kgdboc.ko] undefined!
ERROR: "con_debug_leave" [drivers/serial/kgdboc.ko] undefined!

When there is no HW console the con_debug_enter and con_debug_leave
functions should have no code.

Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
CC: Jesse Barnes <jbarnes@virtuousgeek.org>
Reported-by: Randy Dunlap <randy.dunlap@oracle.com>
---
 include/linux/console.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/console.h b/include/linux/console.h
index f76fc297322..95cf6f08a59 100644
--- a/include/linux/console.h
+++ b/include/linux/console.h
@@ -79,8 +79,13 @@ int register_con_driver(const struct consw *csw, int first, int last);
 int unregister_con_driver(const struct consw *csw);
 int take_over_console(const struct consw *sw, int first, int last, int deflt);
 void give_up_console(const struct consw *sw);
+#ifdef CONFIG_HW_CONSOLE
 int con_debug_enter(struct vc_data *vc);
 int con_debug_leave(void);
+#else
+#define con_debug_enter(vc) (0)
+#define con_debug_leave() (0)
+#endif
 
 /* scroll */
 #define SM_UP       (1)
-- 
cgit v1.2.3-70-g09d2