From 1813a68457bb45b378d5bbec615b167deff3bcfc Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 20 Jul 2010 15:22:54 -0700 Subject: x86: Move alloc_desk_mask variables inside ifdef They are only useful with CONFIG_CPUMASK_OFFSTACK Avoids hundreds of warnings with a gcc 4.6 -Wall build. Signed-off-by: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Thomas Gleixner --- include/linux/irq.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/irq.h b/include/linux/irq.h index c03243ad84b..fff1d77c375 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -439,12 +439,12 @@ extern int set_irq_msi(unsigned int irq, struct msi_desc *entry); static inline bool alloc_desc_masks(struct irq_desc *desc, int node, bool boot) { +#ifdef CONFIG_CPUMASK_OFFSTACK gfp_t gfp = GFP_ATOMIC; if (boot) gfp = GFP_NOWAIT; -#ifdef CONFIG_CPUMASK_OFFSTACK if (!alloc_cpumask_var_node(&desc->affinity, gfp, node)) return false; -- cgit v1.2.3-18-g5258 From e3239ff92a17976ac5d26fa0fe40ef3a9daf2523 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 4 Aug 2010 14:06:41 +1000 Subject: memblock: Rename memblock_region to memblock_type and memblock_property to memblock_region Signed-off-by: Benjamin Herrenschmidt --- include/linux/memblock.h | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index a59faf2b5ed..86e7daf742f 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -18,22 +18,22 @@ #define MAX_MEMBLOCK_REGIONS 128 -struct memblock_property { +struct memblock_region { u64 base; u64 size; }; -struct memblock_region { +struct memblock_type { unsigned long cnt; u64 size; - struct memblock_property region[MAX_MEMBLOCK_REGIONS+1]; + struct memblock_region regions[MAX_MEMBLOCK_REGIONS+1]; }; struct memblock { unsigned long debug; u64 rmo_size; - struct memblock_region memory; - struct memblock_region reserved; + struct memblock_type memory; + struct memblock_type reserved; }; extern struct memblock memblock; @@ -56,27 +56,27 @@ extern u64 memblock_end_of_DRAM(void); extern void __init memblock_enforce_memory_limit(u64 memory_limit); extern int __init memblock_is_reserved(u64 addr); extern int memblock_is_region_reserved(u64 base, u64 size); -extern int memblock_find(struct memblock_property *res); +extern int memblock_find(struct memblock_region *res); extern void memblock_dump_all(void); static inline u64 -memblock_size_bytes(struct memblock_region *type, unsigned long region_nr) +memblock_size_bytes(struct memblock_type *type, unsigned long region_nr) { - return type->region[region_nr].size; + return type->regions[region_nr].size; } static inline u64 -memblock_size_pages(struct memblock_region *type, unsigned long region_nr) +memblock_size_pages(struct memblock_type *type, unsigned long region_nr) { return memblock_size_bytes(type, region_nr) >> PAGE_SHIFT; } static inline u64 -memblock_start_pfn(struct memblock_region *type, unsigned long region_nr) +memblock_start_pfn(struct memblock_type *type, unsigned long region_nr) { - return type->region[region_nr].base >> PAGE_SHIFT; + return type->regions[region_nr].base >> PAGE_SHIFT; } static inline u64 -memblock_end_pfn(struct memblock_region *type, unsigned long region_nr) +memblock_end_pfn(struct memblock_type *type, unsigned long region_nr) { return memblock_start_pfn(type, region_nr) + memblock_size_pages(type, region_nr); -- cgit v1.2.3-18-g5258 From 411a25a80da328f5ae6b6c037872ffe867fcc130 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 6 Jul 2010 15:38:56 -0700 Subject: memblock: No reason to include asm/memblock.h late Signed-off-by: Benjamin Herrenschmidt --- include/linux/memblock.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 86e7daf742f..4b6931327b2 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -16,6 +16,8 @@ #include #include +#include + #define MAX_MEMBLOCK_REGIONS 128 struct memblock_region { @@ -82,8 +84,6 @@ memblock_end_pfn(struct memblock_type *type, unsigned long region_nr) memblock_size_pages(type, region_nr); } -#include - #endif /* __KERNEL__ */ #endif /* _LINUX_MEMBLOCK_H */ -- cgit v1.2.3-18-g5258 From 72d4b0b4e0e7fa858767e03972771a9f7c02b689 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 4 Aug 2010 14:38:47 +1000 Subject: memblock: Implement memblock_is_memory and memblock_is_region_memory To make it fast, we steal ARM's binary search for memblock_is_memory() and we use that to also the replace existing implementation of memblock_is_reserved(). Signed-off-by: Benjamin Herrenschmidt --- include/linux/memblock.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 4b6931327b2..47bceb18705 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -56,6 +56,8 @@ extern u64 __init __memblock_alloc_base(u64 size, extern u64 __init memblock_phys_mem_size(void); extern u64 memblock_end_of_DRAM(void); extern void __init memblock_enforce_memory_limit(u64 memory_limit); +extern int memblock_is_memory(u64 addr); +extern int memblock_is_region_memory(u64 base, u64 size); extern int __init memblock_is_reserved(u64 addr); extern int memblock_is_region_reserved(u64 base, u64 size); extern int memblock_find(struct memblock_region *res); -- cgit v1.2.3-18-g5258 From 5b385f259fa4d356452e3b4729cbaf5213f4f55b Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 4 Aug 2010 13:40:38 +1000 Subject: memblock: Introduce for_each_memblock() and new accessors Walk memblock's using for_each_memblock() and use memblock_region_base/end_pfn() for getting to PFNs. Signed-off-by: Benjamin Herrenschmidt --- include/linux/memblock.h | 52 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) (limited to 'include') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 47bceb18705..c914112cd24 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -64,6 +64,7 @@ extern int memblock_find(struct memblock_region *res); extern void memblock_dump_all(void); +/* Obsolete accessors */ static inline u64 memblock_size_bytes(struct memblock_type *type, unsigned long region_nr) { @@ -86,6 +87,57 @@ memblock_end_pfn(struct memblock_type *type, unsigned long region_nr) memblock_size_pages(type, region_nr); } +/* + * pfn conversion functions + * + * While the memory MEMBLOCKs should always be page aligned, the reserved + * MEMBLOCKs may not be. This accessor attempt to provide a very clear + * idea of what they return for such non aligned MEMBLOCKs. + */ + +/** + * memblock_region_base_pfn - Return the lowest pfn intersecting with the region + * @reg: memblock_region structure + */ +static inline unsigned long memblock_region_base_pfn(const struct memblock_region *reg) +{ + return reg->base >> PAGE_SHIFT; +} + +/** + * memblock_region_last_pfn - Return the highest pfn intersecting with the region + * @reg: memblock_region structure + */ +static inline unsigned long memblock_region_last_pfn(const struct memblock_region *reg) +{ + return (reg->base + reg->size - 1) >> PAGE_SHIFT; +} + +/** + * memblock_region_end_pfn - Return the pfn of the first page following the region + * but not intersecting it + * @reg: memblock_region structure + */ +static inline unsigned long memblock_region_end_pfn(const struct memblock_region *reg) +{ + return memblock_region_last_pfn(reg) + 1; +} + +/** + * memblock_region_pages - Return the number of pages covering a region + * @reg: memblock_region structure + */ +static inline unsigned long memblock_region_pages(const struct memblock_region *reg) +{ + return memblock_region_end_pfn(reg) - memblock_region_end_pfn(reg); +} + +#define for_each_memblock(memblock_type, region) \ + for (region = memblock.memblock_type.regions; \ + region < (memblock.memblock_type.regions + memblock.memblock_type.cnt); \ + region++) + + #endif /* __KERNEL__ */ #endif /* _LINUX_MEMBLOCK_H */ -- cgit v1.2.3-18-g5258 From 1e2b904026e9debf95f500b8980a00c43ac0f31c Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 4 Aug 2010 13:52:25 +1000 Subject: memblock: Remove obsolete accessors Signed-off-by: Benjamin Herrenschmidt --- include/linux/memblock.h | 23 ----------------------- 1 file changed, 23 deletions(-) (limited to 'include') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index c914112cd24..7d70fdd43db 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -64,29 +64,6 @@ extern int memblock_find(struct memblock_region *res); extern void memblock_dump_all(void); -/* Obsolete accessors */ -static inline u64 -memblock_size_bytes(struct memblock_type *type, unsigned long region_nr) -{ - return type->regions[region_nr].size; -} -static inline u64 -memblock_size_pages(struct memblock_type *type, unsigned long region_nr) -{ - return memblock_size_bytes(type, region_nr) >> PAGE_SHIFT; -} -static inline u64 -memblock_start_pfn(struct memblock_type *type, unsigned long region_nr) -{ - return type->regions[region_nr].base >> PAGE_SHIFT; -} -static inline u64 -memblock_end_pfn(struct memblock_type *type, unsigned long region_nr) -{ - return memblock_start_pfn(type, region_nr) + - memblock_size_pages(type, region_nr); -} - /* * pfn conversion functions * -- cgit v1.2.3-18-g5258 From b693fffb189fbfe7e1e8317ce5838808be8666a0 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 4 Aug 2010 13:52:55 +1000 Subject: memblock: Remove memblock_find() Nobody uses it anymore. It's semantics were ... weird Signed-off-by: Benjamin Herrenschmidt --- include/linux/memblock.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 7d70fdd43db..776c7d945dc 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -60,7 +60,6 @@ extern int memblock_is_memory(u64 addr); extern int memblock_is_region_memory(u64 base, u64 size); extern int __init memblock_is_reserved(u64 addr); extern int memblock_is_region_reserved(u64 base, u64 size); -extern int memblock_find(struct memblock_region *res); extern void memblock_dump_all(void); -- cgit v1.2.3-18-g5258 From 35a1f0bd07015dde66501b47cfb6ddc72ebe7346 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 6 Jul 2010 15:38:58 -0700 Subject: memblock: Remove nid_range argument, arch provides memblock_nid_range() instead Signed-off-by: Benjamin Herrenschmidt --- include/linux/memblock.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 776c7d945dc..367dea6e95a 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -46,8 +46,7 @@ extern long memblock_add(u64 base, u64 size); extern long memblock_remove(u64 base, u64 size); extern long __init memblock_free(u64 base, u64 size); extern long __init memblock_reserve(u64 base, u64 size); -extern u64 __init memblock_alloc_nid(u64 size, u64 align, int nid, - u64 (*nid_range)(u64, u64, int *)); +extern u64 __init memblock_alloc_nid(u64 size, u64 align, int nid); extern u64 __init memblock_alloc(u64 size, u64 align); extern u64 __init memblock_alloc_base(u64 size, u64, u64 max_addr); @@ -63,6 +62,10 @@ extern int memblock_is_region_reserved(u64 base, u64 size); extern void memblock_dump_all(void); +/* Provided by the architecture */ +extern u64 memblock_nid_range(u64 start, u64 end, int *nid); + + /* * pfn conversion functions * -- cgit v1.2.3-18-g5258 From 27f574c223d2c09610058b3ec7a29582d63a3e06 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 6 Jul 2010 15:39:00 -0700 Subject: memblock: Expose MEMBLOCK_ALLOC_ANYWHERE Signed-off-by: Benjamin Herrenschmidt --- include/linux/memblock.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 367dea6e95a..3cf3304e901 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -50,6 +50,7 @@ extern u64 __init memblock_alloc_nid(u64 size, u64 align, int nid); extern u64 __init memblock_alloc(u64 size, u64 align); extern u64 __init memblock_alloc_base(u64 size, u64, u64 max_addr); +#define MEMBLOCK_ALLOC_ANYWHERE 0 extern u64 __init __memblock_alloc_base(u64 size, u64 align, u64 max_addr); extern u64 __init memblock_phys_mem_size(void); -- cgit v1.2.3-18-g5258 From e63075a3c9377536d085bc013cd3fe6323162449 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 6 Jul 2010 15:39:01 -0700 Subject: memblock: Introduce default allocation limit and use it to replace explicit ones This introduce memblock.current_limit which is used to limit allocations from memblock_alloc() or memblock_alloc_base(..., MEMBLOCK_ALLOC_ACCESSIBLE). The old MEMBLOCK_ALLOC_ANYWHERE changes value from 0 to ~(u64)0 and can still be used with memblock_alloc_base() to allocate really anywhere. It is -no-longer- cropped to MEMBLOCK_REAL_LIMIT which disappears. Note to archs: I'm leaving the default limit to MEMBLOCK_ALLOC_ANYWHERE. I strongly recommend that you ensure that you set an appropriate limit during boot in order to guarantee that an memblock_alloc() at any time results in something that is accessible with a simple __va(). The reason is that a subsequent patch will introduce the ability for the array to resize itself by reallocating itself. The MEMBLOCK core will honor the current limit when performing those allocations. Signed-off-by: Benjamin Herrenschmidt --- include/linux/memblock.h | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 3cf3304e901..c4f6e53264e 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -34,6 +34,7 @@ struct memblock_type { struct memblock { unsigned long debug; u64 rmo_size; + u64 current_limit; struct memblock_type memory; struct memblock_type reserved; }; @@ -46,11 +47,16 @@ extern long memblock_add(u64 base, u64 size); extern long memblock_remove(u64 base, u64 size); extern long __init memblock_free(u64 base, u64 size); extern long __init memblock_reserve(u64 base, u64 size); + extern u64 __init memblock_alloc_nid(u64 size, u64 align, int nid); extern u64 __init memblock_alloc(u64 size, u64 align); + +/* Flags for memblock_alloc_base() amd __memblock_alloc_base() */ +#define MEMBLOCK_ALLOC_ANYWHERE (~(u64)0) +#define MEMBLOCK_ALLOC_ACCESSIBLE 0 + extern u64 __init memblock_alloc_base(u64 size, u64, u64 max_addr); -#define MEMBLOCK_ALLOC_ANYWHERE 0 extern u64 __init __memblock_alloc_base(u64 size, u64 align, u64 max_addr); extern u64 __init memblock_phys_mem_size(void); @@ -66,6 +72,14 @@ extern void memblock_dump_all(void); /* Provided by the architecture */ extern u64 memblock_nid_range(u64 start, u64 end, int *nid); +/** + * memblock_set_current_limit - Set the current allocation limit to allow + * limiting allocations to what is currently + * accessible during boot + * @limit: New limit value (physical address) + */ +extern void memblock_set_current_limit(u64 limit); + /* * pfn conversion functions -- cgit v1.2.3-18-g5258 From cd3db0c4ca3d237e7ad20f7107216e575705d2b0 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 6 Jul 2010 15:39:02 -0700 Subject: memblock: Remove rmo_size, burry it in arch/powerpc where it belongs The RMA (RMO is a misnomer) is a concept specific to ppc64 (in fact server ppc64 though I hijack it on embedded ppc64 for similar purposes) and represents the area of memory that can be accessed in real mode (aka with MMU off), or on embedded, from the exception vectors (which is bolted in the TLB) which pretty much boils down to the same thing. We take that out of the generic MEMBLOCK data structure and move it into arch/powerpc where it belongs, renaming it to "RMA" while at it. Signed-off-by: Benjamin Herrenschmidt --- include/linux/memblock.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index c4f6e53264e..71b8edc6ede 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -33,7 +33,6 @@ struct memblock_type { struct memblock { unsigned long debug; - u64 rmo_size; u64 current_limit; struct memblock_type memory; struct memblock_type reserved; -- cgit v1.2.3-18-g5258 From 2898cc4cdf208f15246b7a1c6951d2b126a70fd6 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 4 Aug 2010 13:34:42 +1000 Subject: memblock: Change u64 to phys_addr_t Let's not waste space and cycles on archs that don't support >32-bit physical address space. Signed-off-by: Benjamin Herrenschmidt --- include/linux/memblock.h | 48 ++++++++++++++++++++++++------------------------ 1 file changed, 24 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 71b8edc6ede..b65045a4ed0 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -21,19 +21,19 @@ #define MAX_MEMBLOCK_REGIONS 128 struct memblock_region { - u64 base; - u64 size; + phys_addr_t base; + phys_addr_t size; }; struct memblock_type { unsigned long cnt; - u64 size; + phys_addr_t size; struct memblock_region regions[MAX_MEMBLOCK_REGIONS+1]; }; struct memblock { unsigned long debug; - u64 current_limit; + phys_addr_t current_limit; struct memblock_type memory; struct memblock_type reserved; }; @@ -42,34 +42,34 @@ extern struct memblock memblock; extern void __init memblock_init(void); extern void __init memblock_analyze(void); -extern long memblock_add(u64 base, u64 size); -extern long memblock_remove(u64 base, u64 size); -extern long __init memblock_free(u64 base, u64 size); -extern long __init memblock_reserve(u64 base, u64 size); +extern long memblock_add(phys_addr_t base, phys_addr_t size); +extern long memblock_remove(phys_addr_t base, phys_addr_t size); +extern long __init memblock_free(phys_addr_t base, phys_addr_t size); +extern long __init memblock_reserve(phys_addr_t base, phys_addr_t size); -extern u64 __init memblock_alloc_nid(u64 size, u64 align, int nid); -extern u64 __init memblock_alloc(u64 size, u64 align); +extern phys_addr_t __init memblock_alloc_nid(phys_addr_t size, phys_addr_t align, int nid); +extern phys_addr_t __init memblock_alloc(phys_addr_t size, phys_addr_t align); /* Flags for memblock_alloc_base() amd __memblock_alloc_base() */ -#define MEMBLOCK_ALLOC_ANYWHERE (~(u64)0) +#define MEMBLOCK_ALLOC_ANYWHERE (~(phys_addr_t)0) #define MEMBLOCK_ALLOC_ACCESSIBLE 0 -extern u64 __init memblock_alloc_base(u64 size, - u64, u64 max_addr); -extern u64 __init __memblock_alloc_base(u64 size, - u64 align, u64 max_addr); -extern u64 __init memblock_phys_mem_size(void); -extern u64 memblock_end_of_DRAM(void); -extern void __init memblock_enforce_memory_limit(u64 memory_limit); -extern int memblock_is_memory(u64 addr); -extern int memblock_is_region_memory(u64 base, u64 size); -extern int __init memblock_is_reserved(u64 addr); -extern int memblock_is_region_reserved(u64 base, u64 size); +extern phys_addr_t __init memblock_alloc_base(phys_addr_t size, + phys_addr_t, phys_addr_t max_addr); +extern phys_addr_t __init __memblock_alloc_base(phys_addr_t size, + phys_addr_t align, phys_addr_t max_addr); +extern phys_addr_t __init memblock_phys_mem_size(void); +extern phys_addr_t memblock_end_of_DRAM(void); +extern void __init memblock_enforce_memory_limit(phys_addr_t memory_limit); +extern int memblock_is_memory(phys_addr_t addr); +extern int memblock_is_region_memory(phys_addr_t base, phys_addr_t size); +extern int __init memblock_is_reserved(phys_addr_t addr); +extern int memblock_is_region_reserved(phys_addr_t base, phys_addr_t size); extern void memblock_dump_all(void); /* Provided by the architecture */ -extern u64 memblock_nid_range(u64 start, u64 end, int *nid); +extern phys_addr_t memblock_nid_range(phys_addr_t start, phys_addr_t end, int *nid); /** * memblock_set_current_limit - Set the current allocation limit to allow @@ -77,7 +77,7 @@ extern u64 memblock_nid_range(u64 start, u64 end, int *nid); * accessible during boot * @limit: New limit value (physical address) */ -extern void memblock_set_current_limit(u64 limit); +extern void memblock_set_current_limit(phys_addr_t limit); /* -- cgit v1.2.3-18-g5258 From 9d3c30f5a17ec35894eadb7171f724643dce19c3 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 6 Jul 2010 15:39:04 -0700 Subject: memblock: Remove unused memblock.debug struct member Signed-off-by: Benjamin Herrenschmidt --- include/linux/memblock.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index b65045a4ed0..0fe6dd56a4b 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -32,7 +32,6 @@ struct memblock_type { }; struct memblock { - unsigned long debug; phys_addr_t current_limit; struct memblock_type memory; struct memblock_type reserved; @@ -55,9 +54,11 @@ extern phys_addr_t __init memblock_alloc(phys_addr_t size, phys_addr_t align); #define MEMBLOCK_ALLOC_ACCESSIBLE 0 extern phys_addr_t __init memblock_alloc_base(phys_addr_t size, - phys_addr_t, phys_addr_t max_addr); + phys_addr_t align, + phys_addr_t max_addr); extern phys_addr_t __init __memblock_alloc_base(phys_addr_t size, - phys_addr_t align, phys_addr_t max_addr); + phys_addr_t align, + phys_addr_t max_addr); extern phys_addr_t __init memblock_phys_mem_size(void); extern phys_addr_t memblock_end_of_DRAM(void); extern void __init memblock_enforce_memory_limit(phys_addr_t memory_limit); -- cgit v1.2.3-18-g5258 From 4734b594c6ca1be796d30c82d93fdf5160f45124 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 28 Jul 2010 14:31:29 +1000 Subject: memblock: Remove memblock_type.size and add memblock.memory_size instead Right now, both the "memory" and "reserved" memblock_type structures have a "size" member. It represents the calculated memory size in the former case and is unused in the latter. This moves it out to the main memblock structure instead Signed-off-by: Benjamin Herrenschmidt --- include/linux/memblock.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 0fe6dd56a4b..c9c7b0f344a 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -27,12 +27,12 @@ struct memblock_region { struct memblock_type { unsigned long cnt; - phys_addr_t size; struct memblock_region regions[MAX_MEMBLOCK_REGIONS+1]; }; struct memblock { phys_addr_t current_limit; + phys_addr_t memory_size; /* Updated by memblock_analyze() */ struct memblock_type memory; struct memblock_type reserved; }; -- cgit v1.2.3-18-g5258 From bf23c51f1f49d3960f3cd8e3d2e7f943d9c41042 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 6 Jul 2010 15:39:06 -0700 Subject: memblock: Move memblock arrays to static storage in memblock.c and make their size a variable This is in preparation for having resizable arrays. Note that we still allocate one more than needed, this is unchanged from the previous implementation. Signed-off-by: Benjamin Herrenschmidt --- include/linux/memblock.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index c9c7b0f344a..150be938b91 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -18,7 +18,7 @@ #include -#define MAX_MEMBLOCK_REGIONS 128 +#define INIT_MEMBLOCK_REGIONS 128 struct memblock_region { phys_addr_t base; @@ -26,8 +26,9 @@ struct memblock_region { }; struct memblock_type { - unsigned long cnt; - struct memblock_region regions[MAX_MEMBLOCK_REGIONS+1]; + unsigned long cnt; /* number of regions */ + unsigned long max; /* size of the allocated array */ + struct memblock_region *regions; }; struct memblock { -- cgit v1.2.3-18-g5258 From d2cd563ba82c424083b78e0ce97d68bfb04d1242 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 6 Jul 2010 15:39:14 -0700 Subject: memblock: Add arch function to control coalescing of memblock memory regions Some archs such as ARM want to avoid coalescing accross things such as the lowmem/highmem boundary or similar. This provides the option to control it via an arch callback for which a weak default is provided which always allows coalescing. Signed-off-by: Benjamin Herrenschmidt --- include/linux/memblock.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 150be938b91..e5e8f9db3a8 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -72,6 +72,8 @@ extern void memblock_dump_all(void); /* Provided by the architecture */ extern phys_addr_t memblock_nid_range(phys_addr_t start, phys_addr_t end, int *nid); +extern int memblock_memory_can_coalesce(phys_addr_t addr1, phys_addr_t size1, + phys_addr_t addr2, phys_addr_t size2); /** * memblock_set_current_limit - Set the current allocation limit to allow -- cgit v1.2.3-18-g5258 From c196f76fd5ece716ee3b7fa5dda3576961c0cecc Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 6 Jul 2010 15:39:16 -0700 Subject: memblock: NUMA allocate can now use early_pfn_map We now provide a default (weak) implementation of memblock_nid_range() which uses the early_pfn_map[] if CONFIG_ARCH_POPULATES_NODE_MAP is set. Sparc still needs to use its own method due to the way the pages can be scattered between nodes. This implementation is inefficient due to our main algorithm and callback construct wanting to work on an ascending addresses bases while early_pfn_map[] would rather work with nid's (it's unsorted at that stage). But it should work and we can look into improving it subsequently, possibly using arch compile options to chose a different algorithm alltogether. Signed-off-by: Benjamin Herrenschmidt --- include/linux/memblock.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index e5e8f9db3a8..82b030244aa 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -47,6 +47,9 @@ extern long memblock_remove(phys_addr_t base, phys_addr_t size); extern long __init memblock_free(phys_addr_t base, phys_addr_t size); extern long __init memblock_reserve(phys_addr_t base, phys_addr_t size); +/* The numa aware allocator is only available if + * CONFIG_ARCH_POPULATES_NODE_MAP is set + */ extern phys_addr_t __init memblock_alloc_nid(phys_addr_t size, phys_addr_t align, int nid); extern phys_addr_t __init memblock_alloc(phys_addr_t size, phys_addr_t align); -- cgit v1.2.3-18-g5258 From 9d1e24928e6a0728d1c7c76818ccbd11b93e7ac9 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 6 Jul 2010 15:39:17 -0700 Subject: memblock: Separate memblock_alloc_nid() and memblock_alloc_try_nid() The former is now strict, it will fail if it cannot honor the allocation within the node, while the later implements the previous semantic which falls back to allocating anywhere. Signed-off-by: Benjamin Herrenschmidt --- include/linux/memblock.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 82b030244aa..c8da03eb7ba 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -50,7 +50,11 @@ extern long __init memblock_reserve(phys_addr_t base, phys_addr_t size); /* The numa aware allocator is only available if * CONFIG_ARCH_POPULATES_NODE_MAP is set */ -extern phys_addr_t __init memblock_alloc_nid(phys_addr_t size, phys_addr_t align, int nid); +extern phys_addr_t __init memblock_alloc_nid(phys_addr_t size, phys_addr_t align, + int nid); +extern phys_addr_t __init memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align, + int nid); + extern phys_addr_t __init memblock_alloc(phys_addr_t size, phys_addr_t align); /* Flags for memblock_alloc_base() amd __memblock_alloc_base() */ -- cgit v1.2.3-18-g5258 From 5e63cf43af844ed30acc278b38b8c9bc51eba493 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 28 Jul 2010 15:07:21 +1000 Subject: memblock: Expose some memblock bits for use by x86 This exposes memblock_debug and associated memblock_dbg() macro, along with memblock_can_resize so that x86 can use these when ported to use memblock Signed-off-by: Yinghai Lu Signed-off-by: Benjamin Herrenschmidt --- include/linux/memblock.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index c8da03eb7ba..eed0f9b8e52 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -39,6 +39,11 @@ struct memblock { }; extern struct memblock memblock; +extern int memblock_debug; +extern int memblock_can_resize; + +#define memblock_dbg(fmt, ...) \ + if (memblock_debug) printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__) extern void __init memblock_init(void); extern void __init memblock_analyze(void); -- cgit v1.2.3-18-g5258 From 37d8d4bf489e39eedc9537f8616fe87879b13cb0 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 28 Jul 2010 15:20:58 +1000 Subject: memblock: Export MEMBLOCK_ERROR will used by x86 memblock_x86_find_in_range_node and nobootmem replacement Signed-off-by: Yinghai Lu Signed-off-by: Benjamin Herrenschmidt --- include/linux/memblock.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index eed0f9b8e52..1a9c29cc92f 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -18,7 +18,8 @@ #include -#define INIT_MEMBLOCK_REGIONS 128 +#define INIT_MEMBLOCK_REGIONS 128 +#define MEMBLOCK_ERROR (~(phys_addr_t)0) struct memblock_region { phys_addr_t base; -- cgit v1.2.3-18-g5258 From 25818f0f288cd5333ba5a90ad6dde3def4c4ff58 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 28 Jul 2010 15:25:10 +1000 Subject: memblock: Make MEMBLOCK_ERROR be 0 And ensure we don't hand out 0 as a valid allocation. We put the low limit at PAGE_SIZE arbitrarily. Signed-off-by: Benjamin Herrenschmidt --- include/linux/memblock.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 1a9c29cc92f..dfa64494ced 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -19,7 +19,7 @@ #include #define INIT_MEMBLOCK_REGIONS 128 -#define MEMBLOCK_ERROR (~(phys_addr_t)0) +#define MEMBLOCK_ERROR 0 struct memblock_region { phys_addr_t base; -- cgit v1.2.3-18-g5258 From f0b37fad9a63217c39997b2d2b31f44e3d8be727 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 28 Jul 2010 15:28:21 +1000 Subject: memblock: Protect memblock.h with CONFIG_HAVE_MEMBLOCK This should make it easier to catch/debug incorrect use when the CONFIG_ option isn't set. Signed-off-by: Yinghai Lu Signed-off-by: Benjamin Herrenschmidt --- include/linux/memblock.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index dfa64494ced..c24b2784909 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -2,6 +2,7 @@ #define _LINUX_MEMBLOCK_H #ifdef __KERNEL__ +#ifdef CONFIG_HAVE_MEMBLOCK /* * Logical memory blocks. * @@ -148,6 +149,8 @@ static inline unsigned long memblock_region_pages(const struct memblock_region * region++) +#endif /* CONFIG_HAVE_MEMBLOCK */ + #endif /* __KERNEL__ */ #endif /* _LINUX_MEMBLOCK_H */ -- cgit v1.2.3-18-g5258 From 10d0643988e976360eb3497dcafb55b393b8e480 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 28 Jul 2010 15:43:02 +1000 Subject: memblock: Option for the architecture to put memblock into the .init section Arch code can define ARCH_DISCARD_MEMBLOCK in asm/memblock.h, which in turns causes memblock code and data to go respectively into the .init and .initdata sections. This will be used by the x86 architecture. If ARCH_DISCARD_MEMBLOCK is defined, the debugfs files to inspect the memblock arrays after boot are not created. Signed-off-by: Yinghai Lu Signed-off-by: Benjamin Herrenschmidt --- include/linux/memblock.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index c24b2784909..3978e6a8e82 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -149,6 +149,14 @@ static inline unsigned long memblock_region_pages(const struct memblock_region * region++) +#ifdef ARCH_DISCARD_MEMBLOCK +#define __init_memblock __init +#define __initdata_memblock __initdata +#else +#define __init_memblock +#define __initdata_memblock +#endif + #endif /* CONFIG_HAVE_MEMBLOCK */ #endif /* __KERNEL__ */ -- cgit v1.2.3-18-g5258 From 5303b68f57c227c27193a14e57dd12be27cd670f Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 28 Jul 2010 15:38:40 +1000 Subject: memblock: Add memblock_find_in_range() This is a wrapper for memblock_find_base() using slightly different arguments (start,end instead of start,size for example) in order to make it easier to convert existing arch/x86 code. Signed-off-by: Yinghai Lu Signed-off-by: Benjamin Herrenschmidt --- include/linux/memblock.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 3978e6a8e82..4df09bdcae4 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -47,6 +47,8 @@ extern int memblock_can_resize; #define memblock_dbg(fmt, ...) \ if (memblock_debug) printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__) +u64 memblock_find_in_range(u64 start, u64 end, u64 size, u64 align); + extern void __init memblock_init(void); extern void __init memblock_analyze(void); extern long memblock_add(phys_addr_t base, phys_addr_t size); -- cgit v1.2.3-18-g5258 From 73e4008ddddc84d5f2499c17012b340a0dae153e Mon Sep 17 00:00:00 2001 From: Nikolai Kondrashov Date: Fri, 6 Aug 2010 23:03:06 +0400 Subject: HID: allow resizing and replacing report descriptors Update hid_driver's report_fixup prototype to allow changing report descriptor size and/or returning completely different report descriptor. Update existing usage accordingly. This is to give more freedom in descriptor fixup and to allow having a whole fixed descriptor in the code for the sake of readability. Signed-off-by: Nikolai Kondrashov Signed-off-by: Jiri Kosina --- include/linux/hid.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/hid.h b/include/linux/hid.h index 42a0f1d1136..0a34fb07137 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -626,8 +626,8 @@ struct hid_driver { int (*event)(struct hid_device *hdev, struct hid_field *field, struct hid_usage *usage, __s32 value); - void (*report_fixup)(struct hid_device *hdev, __u8 *buf, - unsigned int size); + __u8 *(*report_fixup)(struct hid_device *hdev, __u8 *buf, + unsigned int *size); int (*input_mapping)(struct hid_device *hdev, struct hid_input *hidinput, struct hid_field *field, -- cgit v1.2.3-18-g5258 From c84e032e145775032fa9078b55e6333dd866603b Mon Sep 17 00:00:00 2001 From: "Justin P. Mattock" Date: Sat, 14 Aug 2010 09:43:22 -0700 Subject: include/video/vga.h: update web address. The below updates a broken web address to one(hopefully) thats the new correct address. Signed-off-by: Justin P. Mattock Signed-off-by: Jiri Kosina --- include/video/vga.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/video/vga.h b/include/video/vga.h index b49a5120ca2..2b8691f7d25 100644 --- a/include/video/vga.h +++ b/include/video/vga.h @@ -5,7 +5,7 @@ * * Copyright history from vga16fb.c: * Copyright 1999 Ben Pfaff and Petr Vandrovec - * Based on VGA info at http://www.goodnet.com/~tinara/FreeVGA/home.htm + * Based on VGA info at http://www.osdever.net/FreeVGA/home.htm * Based on VESA framebuffer (c) 1998 Gerd Knorr * * This file is subject to the terms and conditions of the GNU General -- cgit v1.2.3-18-g5258 From 4e6cbfd09c66893e5134c9896e9af353c2322b66 Mon Sep 17 00:00:00 2001 From: "John W. Linville" Date: Thu, 29 Jul 2010 16:14:13 -0400 Subject: mac80211: support use of NAPI for bottom-half processing This patch implement basic infrastructure to support use of NAPI by mac80211-based hardware drivers. Because mac80211 devices can support multiple netdevs, a dummy netdev is used for interfacing with the NAPI code in the core of the network stack. That structure is hidden from the hardware drivers, but the actual napi_struct is exposed in the ieee80211_hw structure so that the poll routines in drivers can retrieve that structure. Hardware drivers can also specify their own weight value for NAPI polling. Signed-off-by: John W. Linville --- include/net/mac80211.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index b0787a1dea9..3f1e03b521e 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1102,6 +1102,10 @@ enum ieee80211_hw_flags { * * @max_rates: maximum number of alternate rate retry stages * @max_rate_tries: maximum number of tries for each stage + * + * @napi_weight: weight used for NAPI polling. You must specify an + * appropriate value here if a napi_poll operation is provided + * by your driver. */ struct ieee80211_hw { struct ieee80211_conf conf; @@ -1113,6 +1117,7 @@ struct ieee80211_hw { int channel_change_time; int vif_data_size; int sta_data_size; + int napi_weight; u16 queues; u16 max_listen_interval; s8 max_signal; @@ -1687,6 +1692,8 @@ enum ieee80211_ampdu_mlme_action { * switch operation for CSAs received from the AP may implement this * callback. They must then call ieee80211_chswitch_done() to indicate * completion of the channel switch. + * + * @napi_poll: Poll Rx queue for incoming data frames. */ struct ieee80211_ops { int (*tx)(struct ieee80211_hw *hw, struct sk_buff *skb); @@ -1752,6 +1759,7 @@ struct ieee80211_ops { void (*flush)(struct ieee80211_hw *hw, bool drop); void (*channel_switch)(struct ieee80211_hw *hw, struct ieee80211_channel_switch *ch_switch); + int (*napi_poll)(struct ieee80211_hw *hw, int budget); }; /** @@ -1897,6 +1905,22 @@ void ieee80211_free_hw(struct ieee80211_hw *hw); */ void ieee80211_restart_hw(struct ieee80211_hw *hw); +/** ieee80211_napi_schedule - schedule NAPI poll + * + * Use this function to schedule NAPI polling on a device. + * + * @hw: the hardware to start polling + */ +void ieee80211_napi_schedule(struct ieee80211_hw *hw); + +/** ieee80211_napi_complete - complete NAPI polling + * + * Use this function to finish NAPI polling on a device. + * + * @hw: the hardware to stop polling + */ +void ieee80211_napi_complete(struct ieee80211_hw *hw); + /** * ieee80211_rx - receive frame * -- cgit v1.2.3-18-g5258 From 7da7cc1d42d8ce02cca16df8c021e6d657f1f8fd Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 5 Aug 2010 17:02:38 +0200 Subject: mac80211: per interface idle notification Sometimes we don't just need to know whether or not the device is idle, but also per interface. This adds that reporting capability to mac80211. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 3f1e03b521e..3a3c26f647b 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -149,6 +149,7 @@ struct ieee80211_low_level_stats { * @BSS_CHANGED_ARP_FILTER: Hardware ARP filter address list or state changed. * @BSS_CHANGED_QOS: QoS for this association was enabled/disabled. Note * that it is only ever disabled for station mode. + * @BSS_CHANGED_IDLE: Idle changed for this BSS/interface. */ enum ieee80211_bss_change { BSS_CHANGED_ASSOC = 1<<0, @@ -165,6 +166,7 @@ enum ieee80211_bss_change { BSS_CHANGED_IBSS = 1<<11, BSS_CHANGED_ARP_FILTER = 1<<12, BSS_CHANGED_QOS = 1<<13, + BSS_CHANGED_IDLE = 1<<14, /* when adding here, make sure to change ieee80211_reconfig */ }; @@ -223,6 +225,9 @@ enum ieee80211_bss_change { * hardware must not perform any ARP filtering. Note, that the filter will * be enabled also in promiscuous mode. * @qos: This is a QoS-enabled BSS. + * @idle: This interface is idle. There's also a global idle flag in the + * hardware config which may be more appropriate depending on what + * your driver/device needs to do. */ struct ieee80211_bss_conf { const u8 *bssid; @@ -247,6 +252,7 @@ struct ieee80211_bss_conf { u8 arp_addr_cnt; bool arp_filter_enabled; bool qos; + bool idle; }; /** -- cgit v1.2.3-18-g5258 From d1f5b7a34aa5ff703c4966ea2652d4212ac75940 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 5 Aug 2010 17:05:55 +0200 Subject: mac80211: allow drivers to request SM PS mode change Sometimes drivers have more information than the stack about how their antennas/chains are used, and may require that the SM PS mode be changed. This could happen, for example, when detecting that the user disconnected an antenna. Thus this patch introduces API to allow drivers to request SM PS mode changes. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 3a3c26f647b..871ed1de736 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2548,6 +2548,18 @@ void ieee80211_cqm_rssi_notify(struct ieee80211_vif *vif, */ void ieee80211_chswitch_done(struct ieee80211_vif *vif, bool success); +/** + * ieee80211_request_smps - request SM PS transition + * @vif: &struct ieee80211_vif pointer from the add_interface callback. + * @mode: new SM PS mode + * + * This allows the driver to request an SM PS transition in managed + * mode. This is useful when the driver has more information than + * the stack about possible interference, for example by bluetooth. + */ +void ieee80211_request_smps(struct ieee80211_vif *vif, + enum ieee80211_smps_mode smps_mode); + /* Rate control API */ /** -- cgit v1.2.3-18-g5258 From 04600794958f1833f5571c6cde40f260ab557f55 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 5 Aug 2010 17:45:15 +0200 Subject: cfg80211: support sysfs namespaces Enable using network namespaces with wireless devices even when sysfs is enabled using the same infrastructure that was built for netdevs. Signed-off-by: Johannes Berg Acked-by: "Eric W. Biederman" Signed-off-by: John W. Linville --- include/linux/netdevice.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 46c36ffe20e..a4b14fd81c6 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2171,6 +2171,8 @@ extern void dev_seq_stop(struct seq_file *seq, void *v); extern int netdev_class_create_file(struct class_attribute *class_attr); extern void netdev_class_remove_file(struct class_attribute *class_attr); +extern struct kobj_ns_type_operations net_ns_type_operations; + extern char *netdev_drivername(const struct net_device *dev, char *buffer, int len); extern void linkwatch_run_queue(void); -- cgit v1.2.3-18-g5258 From 97359d1235eaf634fe706c9faa6e40181cc95fb8 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 10 Aug 2010 09:46:38 +0200 Subject: mac80211: use cipher suite selectors Currently, mac80211 translates the cfg80211 cipher suite selectors into ALG_* values. That isn't all too useful, and some drivers benefit from the distinction between WEP40 and WEP104 as well. Therefore, convert it all to use the cipher suite selectors. Signed-off-by: Johannes Berg Acked-by: Gertjan van Wingerde Signed-off-by: John W. Linville --- include/net/mac80211.h | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 871ed1de736..914c747ac3a 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -788,20 +788,6 @@ static inline bool ieee80211_vif_is_mesh(struct ieee80211_vif *vif) return false; } -/** - * enum ieee80211_key_alg - key algorithm - * @ALG_WEP: WEP40 or WEP104 - * @ALG_TKIP: TKIP - * @ALG_CCMP: CCMP (AES) - * @ALG_AES_CMAC: AES-128-CMAC - */ -enum ieee80211_key_alg { - ALG_WEP, - ALG_TKIP, - ALG_CCMP, - ALG_AES_CMAC, -}; - /** * enum ieee80211_key_flags - key flags * @@ -839,7 +825,7 @@ enum ieee80211_key_flags { * @hw_key_idx: To be set by the driver, this is the key index the driver * wants to be given when a frame is transmitted and needs to be * encrypted in hardware. - * @alg: The key algorithm. + * @cipher: The key's cipher suite selector. * @flags: key flags, see &enum ieee80211_key_flags. * @keyidx: the key index (0-3) * @keylen: key material length @@ -852,7 +838,7 @@ enum ieee80211_key_flags { * @iv_len: The IV length for this key type */ struct ieee80211_key_conf { - enum ieee80211_key_alg alg; + u32 cipher; u8 icv_len; u8 iv_len; u8 hw_key_idx; -- cgit v1.2.3-18-g5258 From bfb564e7391340638afe4ad67744a8f3858e7566 Mon Sep 17 00:00:00 2001 From: Krishna Kumar Date: Wed, 4 Aug 2010 06:15:52 +0000 Subject: core: Factor out flow calculation from get_rps_cpu Factor out flow calculation code from get_rps_cpu, since other functions can use the same code. Revisions: v2 (Ben): Separate flow calcuation out and use in select queue. v3 (Arnd): Don't re-implement MIN. v4 (Changli): skb->data points to ethernet header in macvtap, and make a fast path. Tested macvtap with this patch. v5 (Changli): - Cache skb->rxhash in skb_get_rxhash - macvtap may not have pow(2) queues, so change code for queue selection. (Arnd): - Use first available queue if all fails. Signed-off-by: Krishna Kumar Signed-off-by: David S. Miller --- include/linux/skbuff.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 77eb60d2b49..d8050382b18 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -558,6 +558,15 @@ extern unsigned int skb_find_text(struct sk_buff *skb, unsigned int from, unsigned int to, struct ts_config *config, struct ts_state *state); +extern __u32 __skb_get_rxhash(struct sk_buff *skb); +static inline __u32 skb_get_rxhash(struct sk_buff *skb) +{ + if (!skb->rxhash) + skb->rxhash = __skb_get_rxhash(skb); + + return skb->rxhash; +} + #ifdef NET_SKBUFF_DATA_USES_OFFSET static inline unsigned char *skb_end_pointer(const struct sk_buff *skb) { -- cgit v1.2.3-18-g5258 From 1565c7c1c4c8e931bdba66abc8aa6f141a406872 Mon Sep 17 00:00:00 2001 From: Krishna Kumar Date: Wed, 4 Aug 2010 06:15:59 +0000 Subject: macvtap: Implement multiqueue for macvtap driver Implement multiqueue facility for macvtap driver. The idea is that a macvtap device can be opened multiple times and the fd's can be used to register eg, as backend for vhost. Signed-off-by: Krishna Kumar Signed-off-by: David S. Miller --- include/linux/if_macvlan.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h index 35280b30229..8a2fd66a8b5 100644 --- a/include/linux/if_macvlan.h +++ b/include/linux/if_macvlan.h @@ -40,6 +40,12 @@ struct macvlan_rx_stats { unsigned long rx_errors; }; +/* + * Maximum times a macvtap device can be opened. This can be used to + * configure the number of receive queue, e.g. for multiqueue virtio. + */ +#define MAX_MACVTAP_QUEUES (NR_CPUS < 16 ? NR_CPUS : 16) + struct macvlan_dev { struct net_device *dev; struct list_head list; @@ -50,7 +56,8 @@ struct macvlan_dev { enum macvlan_mode mode; int (*receive)(struct sk_buff *skb); int (*forward)(struct net_device *dev, struct sk_buff *skb); - struct macvtap_queue *tap; + struct macvtap_queue *taps[MAX_MACVTAP_QUEUES]; + int numvtaps; }; static inline void macvlan_count_rx(const struct macvlan_dev *vlan, -- cgit v1.2.3-18-g5258 From f1b499f029c5dde85d46a8811353c62f29157541 Mon Sep 17 00:00:00 2001 From: John Kacur Date: Thu, 5 Aug 2010 17:10:53 +0200 Subject: lockdep: Remove __debug_show_held_locks There is no longer any functional difference between __debug_show_held_locks() and debug_show_held_locks(), so remove the former. Signed-off-by: John Kacur Cc: Peter Zijlstra LKML-Reference: <1281021054-4228-1-git-send-email-jkacur@redhat.com> Signed-off-by: Ingo Molnar --- include/linux/debug_locks.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/linux/debug_locks.h b/include/linux/debug_locks.h index 29b3ce3f2a1..2833452ea01 100644 --- a/include/linux/debug_locks.h +++ b/include/linux/debug_locks.h @@ -49,7 +49,6 @@ struct task_struct; #ifdef CONFIG_LOCKDEP extern void debug_show_all_locks(void); -extern void __debug_show_held_locks(struct task_struct *task); extern void debug_show_held_locks(struct task_struct *task); extern void debug_check_no_locks_freed(const void *from, unsigned long len); extern void debug_check_no_locks_held(struct task_struct *task); @@ -58,10 +57,6 @@ static inline void debug_show_all_locks(void) { } -static inline void __debug_show_held_locks(struct task_struct *task) -{ -} - static inline void debug_show_held_locks(struct task_struct *task) { } -- cgit v1.2.3-18-g5258 From 3d529946ce292336793b85198bd59afc75e16bd4 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 18 Aug 2010 09:48:31 +1000 Subject: Fix spelling mistake in jhash Fix a spelling mistake. Signed-off-by: Anton Blanchard Signed-off-by: Jiri Kosina --- include/linux/jhash.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/jhash.h b/include/linux/jhash.h index 2a2f99fbcb1..ced1159fa4f 100644 --- a/include/linux/jhash.h +++ b/include/linux/jhash.h @@ -116,7 +116,7 @@ static inline u32 jhash2(const u32 *k, u32 length, u32 initval) /* A special ultra-optimized versions that knows they are hashing exactly * 3, 2 or 1 word(s). * - * NOTE: In partilar the "c += length; __jhash_mix(a,b,c);" normally + * NOTE: In particular the "c += length; __jhash_mix(a,b,c);" normally * done at the end is not done here. */ static inline u32 jhash_3words(u32 a, u32 b, u32 c, u32 initval) -- cgit v1.2.3-18-g5258 From 70791ce9ba68a5921c9905ef05d23f62a90bc10c Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 29 Jun 2010 19:34:05 +0200 Subject: perf: Generalize callchain_store() callchain_store() is the same on every archs, inline it in perf_event.h and rename it to perf_callchain_store() to avoid any collision. This removes repetitive code. Signed-off-by: Frederic Weisbecker Acked-by: Paul Mackerras Tested-by: Will Deacon Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Stephane Eranian Cc: David Miller Cc: Paul Mundt Cc: Borislav Petkov --- include/linux/perf_event.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 937495c2507..358880404b4 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -978,6 +978,13 @@ extern void perf_event_fork(struct task_struct *tsk); extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs); +static inline void +perf_callchain_store(struct perf_callchain_entry *entry, u64 ip) +{ + if (entry->nr < PERF_MAX_STACK_DEPTH) + entry->ip[entry->nr++] = ip; +} + extern int sysctl_perf_event_paranoid; extern int sysctl_perf_event_mlock; extern int sysctl_perf_event_sample_rate; -- cgit v1.2.3-18-g5258 From 56962b4449af34070bb1994621ef4f0265eed4d8 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 30 Jun 2010 23:03:51 +0200 Subject: perf: Generalize some arch callchain code - Most archs use one callchain buffer per cpu, except x86 that needs to deal with NMIs. Provide a default perf_callchain_buffer() implementation that x86 overrides. - Centralize all the kernel/user regs handling and invoke new arch handlers from there: perf_callchain_user() / perf_callchain_kernel() That avoid all the user_mode(), current->mm checks and so... - Invert some parameters in perf_callchain_*() helpers: entry to the left, regs to the right, following the traditional (dst, src). Signed-off-by: Frederic Weisbecker Acked-by: Paul Mackerras Tested-by: Will Deacon Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Stephane Eranian Cc: David Miller Cc: Paul Mundt Cc: Borislav Petkov --- include/linux/perf_event.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 358880404b4..4db61dded38 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -976,7 +976,15 @@ extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks extern void perf_event_comm(struct task_struct *tsk); extern void perf_event_fork(struct task_struct *tsk); -extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs); +/* Callchains */ +DECLARE_PER_CPU(struct perf_callchain_entry, perf_callchain_entry); + +extern void perf_callchain_user(struct perf_callchain_entry *entry, + struct pt_regs *regs); +extern void perf_callchain_kernel(struct perf_callchain_entry *entry, + struct pt_regs *regs); +extern struct perf_callchain_entry *perf_callchain_buffer(void); + static inline void perf_callchain_store(struct perf_callchain_entry *entry, u64 ip) -- cgit v1.2.3-18-g5258 From 927c7a9e92c4f69097a6e9e086d11fc2f8a5b40b Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 1 Jul 2010 16:20:36 +0200 Subject: perf: Fix race in callchains Now that software events don't have interrupt disabled anymore in the event path, callchains can nest on any context. So seperating nmi and others contexts in two buffers has become racy. Fix this by providing one buffer per nesting level. Given the size of the callchain entries (2040 bytes * 4), we now need to allocate them dynamically. v2: Fixed put_callchain_entry call after recursion. Fix the type of the recursion, it must be an array. v3: Use a manual pr cpu allocation (temporary solution until NMIs can safely access vmalloc'ed memory). Do a better separation between callchain reference tracking and allocation. Make the "put" path lockless for non-release cases. v4: Protect the callchain buffers with rcu. v5: Do the cpu buffers allocations node affine. Signed-off-by: Frederic Weisbecker Tested-by: Will Deacon Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Paul Mackerras Cc: Stephane Eranian Cc: Paul Mundt Cc: David Miller Cc: Borislav Petkov --- include/linux/perf_event.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 4db61dded38..d7e8ea69086 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -983,7 +983,6 @@ extern void perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs); extern void perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs); -extern struct perf_callchain_entry *perf_callchain_buffer(void); static inline void -- cgit v1.2.3-18-g5258 From 7ae07ea3a48d30689ee037cb136bc21f0b37d8ae Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 14 Aug 2010 20:45:13 +0200 Subject: perf: Humanize the number of contexts Instead of hardcoding the number of contexts for the recursions barriers, define a cpp constant to make the code more self-explanatory. Signed-off-by: Frederic Weisbecker Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Paul Mackerras Cc: Stephane Eranian --- include/linux/perf_event.h | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index d7e8ea69086..ae6fa605092 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -808,6 +808,12 @@ struct perf_event_context { struct rcu_head rcu_head; }; +/* + * Number of contexts where an event can trigger: + * task, softirq, hardirq, nmi. + */ +#define PERF_NR_CONTEXTS 4 + /** * struct perf_event_cpu_context - per cpu event context structure */ @@ -821,12 +827,8 @@ struct perf_cpu_context { struct mutex hlist_mutex; int hlist_refcount; - /* - * Recursion avoidance: - * - * task, softirq, irq, nmi context - */ - int recursion[4]; + /* Recursion avoidance in each contexts */ + int recursion[PERF_NR_CONTEXTS]; }; struct perf_output_handle { -- cgit v1.2.3-18-g5258 From 6016ee13db518ab1cd0cbf43fc2ad5712021e338 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 11 Aug 2010 12:47:59 +0900 Subject: perf, tracing: add missing __percpu markups ftrace_event_call->perf_events, perf_trace_buf, fgraph_data->cpu_data and some local variables are percpu pointers missing __percpu markups. Add them. Signed-off-by: Namhyung Kim Acked-by: Tejun Heo Cc: Steven Rostedt Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Paul Mackerras Cc: Stephane Eranian LKML-Reference: <1281498479-28551-1-git-send-email-namhyung@gmail.com> Signed-off-by: Frederic Weisbecker --- include/linux/ftrace_event.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 02b8b24f8f5..5f8ad7bec63 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -191,8 +191,8 @@ struct ftrace_event_call { unsigned int flags; #ifdef CONFIG_PERF_EVENTS - int perf_refcount; - struct hlist_head *perf_events; + int perf_refcount; + struct hlist_head __percpu *perf_events; #endif }; -- cgit v1.2.3-18-g5258 From 2244d07bfa2097cb00600da91c715a8aa547917e Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Tue, 17 Aug 2010 08:59:14 +0000 Subject: net: simplify flags for tx timestamping This patch removes the abstraction introduced by the union skb_shared_tx in the shared skb data. The access of the different union elements at several places led to some confusion about accessing the shared tx_flags e.g. in skb_orphan_try(). http://marc.info/?l=linux-netdev&m=128084897415886&w=2 Signed-off-by: Oliver Hartkopp Signed-off-by: David S. Miller --- include/linux/skbuff.h | 44 ++++++++++++++++---------------------------- include/net/ip.h | 2 +- include/net/sock.h | 8 ++------ 3 files changed, 19 insertions(+), 35 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index d8050382b18..f067c95cf18 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -163,26 +163,19 @@ struct skb_shared_hwtstamps { ktime_t syststamp; }; -/** - * struct skb_shared_tx - instructions for time stamping of outgoing packets - * @hardware: generate hardware time stamp - * @software: generate software time stamp - * @in_progress: device driver is going to provide - * hardware time stamp - * @prevent_sk_orphan: make sk reference available on driver level - * @flags: all shared_tx flags - * - * These flags are attached to packets as part of the - * &skb_shared_info. Use skb_tx() to get a pointer. - */ -union skb_shared_tx { - struct { - __u8 hardware:1, - software:1, - in_progress:1, - prevent_sk_orphan:1; - }; - __u8 flags; +/* Definitions for tx_flags in struct skb_shared_info */ +enum { + /* generate hardware time stamp */ + SKBTX_HW_TSTAMP = 1 << 0, + + /* generate software time stamp */ + SKBTX_SW_TSTAMP = 1 << 1, + + /* device driver is going to provide hardware time stamp */ + SKBTX_IN_PROGRESS = 1 << 2, + + /* ensure the originating sk reference is available on driver level */ + SKBTX_DRV_NEEDS_SK_REF = 1 << 3, }; /* This data is invariant across clones and lives at @@ -195,7 +188,7 @@ struct skb_shared_info { unsigned short gso_segs; unsigned short gso_type; __be32 ip6_frag_id; - union skb_shared_tx tx_flags; + __u8 tx_flags; struct sk_buff *frag_list; struct skb_shared_hwtstamps hwtstamps; @@ -587,11 +580,6 @@ static inline struct skb_shared_hwtstamps *skb_hwtstamps(struct sk_buff *skb) return &skb_shinfo(skb)->hwtstamps; } -static inline union skb_shared_tx *skb_tx(struct sk_buff *skb) -{ - return &skb_shinfo(skb)->tx_flags; -} - /** * skb_queue_empty - check if a queue is empty * @list: queue head @@ -1996,8 +1984,8 @@ extern void skb_tstamp_tx(struct sk_buff *orig_skb, static inline void sw_tx_timestamp(struct sk_buff *skb) { - union skb_shared_tx *shtx = skb_tx(skb); - if (shtx->software && !shtx->in_progress) + if (skb_shinfo(skb)->tx_flags & SKBTX_SW_TSTAMP && + !(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS)) skb_tstamp_tx(skb, NULL); } diff --git a/include/net/ip.h b/include/net/ip.h index 890f9725d68..7691aca133d 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -53,7 +53,7 @@ struct ipcm_cookie { __be32 addr; int oif; struct ip_options *opt; - union skb_shared_tx shtx; + __u8 tx_flags; }; #define IPCB(skb) ((struct inet_skb_parm*)((skb)->cb)) diff --git a/include/net/sock.h b/include/net/sock.h index ac53bfbdfe1..100e43bf95f 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1669,17 +1669,13 @@ static inline void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, /** * sock_tx_timestamp - checks whether the outgoing packet is to be time stamped - * @msg: outgoing packet * @sk: socket sending this packet - * @shtx: filled with instructions for time stamping + * @tx_flags: filled with instructions for time stamping * * Currently only depends on SOCK_TIMESTAMPING* flags. Returns error code if * parameters are invalid. */ -extern int sock_tx_timestamp(struct msghdr *msg, - struct sock *sk, - union skb_shared_tx *shtx); - +extern int sock_tx_timestamp(struct sock *sk, __u8 *tx_flags); /** * sk_eat_skb - Release a skb if it is no longer needed -- cgit v1.2.3-18-g5258 From e760702ed8333588f9f21e7bf6597873993006f1 Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Tue, 17 Aug 2010 19:03:44 +0000 Subject: net: introduce proto_ports_offset() Introduce proto_ports_offset() for getting the position of the ports or SPI in the message of a protocol. Signed-off-by: Changli Gao Signed-off-by: David S. Miller --- include/linux/in.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include') diff --git a/include/linux/in.h b/include/linux/in.h index 41d88a4689a..beeb6dee2b4 100644 --- a/include/linux/in.h +++ b/include/linux/in.h @@ -250,6 +250,25 @@ struct sockaddr_in { #ifdef __KERNEL__ +#include + +static inline int proto_ports_offset(int proto) +{ + switch (proto) { + case IPPROTO_TCP: + case IPPROTO_UDP: + case IPPROTO_DCCP: + case IPPROTO_ESP: /* SPI */ + case IPPROTO_SCTP: + case IPPROTO_UDPLITE: + return 0; + case IPPROTO_AH: /* SPI */ + return 4; + default: + return -EINVAL; + } +} + static inline bool ipv4_is_loopback(__be32 addr) { return (addr & htonl(0xff000000)) == htonl(0x7f000000); -- cgit v1.2.3-18-g5258 From d34a16661ed0fed433c9469d7cfa3ca4d30ca42e Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 14 Jun 2010 17:06:21 -0700 Subject: net: convert to rcu_dereference_index_check() The task_cls_classid() function applies rcu_dereference() to integers, which does not work with the shiny new sparse-based checking in rcu_dereference(). This commit therefore moves to the new RCU API rcu_dereference_index_check(). Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett Acked-by: David S. Miller Acked-by: Herbert Xu --- include/net/cls_cgroup.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/cls_cgroup.h b/include/net/cls_cgroup.h index 726cc353640..dd1fdb8293f 100644 --- a/include/net/cls_cgroup.h +++ b/include/net/cls_cgroup.h @@ -45,7 +45,8 @@ static inline u32 task_cls_classid(struct task_struct *p) return 0; rcu_read_lock(); - id = rcu_dereference(net_cls_subsys_id); + id = rcu_dereference_index_check(net_cls_subsys_id, + rcu_read_lock_held()); if (id >= 0) classid = container_of(task_subsys_state(p, id), struct cgroup_cls_state, css)->classid; -- cgit v1.2.3-18-g5258 From ca5ecddfa8fcbd948c95530e7e817cee9fb43a3d Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 28 Apr 2010 14:39:09 -0700 Subject: rcu: define __rcu address space modifier for sparse This commit provides definitions for the __rcu annotation defined earlier. This annotation permits sparse to check for correct use of RCU-protected pointers. If a pointer that is annotated with __rcu is accessed directly (as opposed to via rcu_dereference(), rcu_assign_pointer(), or one of their variants), sparse can be made to complain. To enable such complaints, use the new default-disabled CONFIG_SPARSE_RCU_POINTER kernel configuration option. Please note that these sparse complaints are intended to be a debugging aid, -not- a code-style-enforcement mechanism. There are special rcu_dereference_protected() and rcu_access_pointer() accessors for use when RCU read-side protection is not required, for example, when no other CPU has access to the data structure in question or while the current CPU hold the update-side lock. This patch also updates a number of docbook comments that were showing their age. Signed-off-by: Arnd Bergmann Signed-off-by: Paul E. McKenney Cc: Christopher Li Reviewed-by: Josh Triplett --- include/linux/compiler.h | 4 + include/linux/rcupdate.h | 352 ++++++++++++++++++++++++++++------------------- include/linux/srcu.h | 27 +++- 3 files changed, 240 insertions(+), 143 deletions(-) (limited to 'include') diff --git a/include/linux/compiler.h b/include/linux/compiler.h index c1a62c56a66..320d6c94ff8 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -16,7 +16,11 @@ # define __release(x) __context__(x,-1) # define __cond_lock(x,c) ((c) ? ({ __acquire(x); 1; }) : 0) # define __percpu __attribute__((noderef, address_space(3))) +#ifdef CONFIG_SPARSE_RCU_POINTER +# define __rcu __attribute__((noderef, address_space(4))) +#else # define __rcu +#endif extern void __chk_user_ptr(const volatile void __user *); extern void __chk_io_ptr(const volatile void __iomem *); #else diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 9fbc54a2585..b973dea2d6b 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -41,6 +41,7 @@ #include #include #include +#include #ifdef CONFIG_RCU_TORTURE_TEST extern int rcutorture_runnable; /* for sysctl */ @@ -120,14 +121,15 @@ extern struct lockdep_map rcu_sched_lock_map; extern int debug_lockdep_rcu_enabled(void); /** - * rcu_read_lock_held - might we be in RCU read-side critical section? + * rcu_read_lock_held() - might we be in RCU read-side critical section? * * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an RCU * read-side critical section. In absence of CONFIG_DEBUG_LOCK_ALLOC, * this assumes we are in an RCU read-side critical section unless it can - * prove otherwise. + * prove otherwise. This is useful for debug checks in functions that + * require that they be called within an RCU read-side critical section. * - * Check debug_lockdep_rcu_enabled() to prevent false positives during boot + * Checks debug_lockdep_rcu_enabled() to prevent false positives during boot * and while lockdep is disabled. */ static inline int rcu_read_lock_held(void) @@ -144,14 +146,16 @@ static inline int rcu_read_lock_held(void) extern int rcu_read_lock_bh_held(void); /** - * rcu_read_lock_sched_held - might we be in RCU-sched read-side critical section? + * rcu_read_lock_sched_held() - might we be in RCU-sched read-side critical section? * * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an * RCU-sched read-side critical section. In absence of * CONFIG_DEBUG_LOCK_ALLOC, this assumes we are in an RCU-sched read-side * critical section unless it can prove otherwise. Note that disabling * of preemption (including disabling irqs) counts as an RCU-sched - * read-side critical section. + * read-side critical section. This is useful for debug checks in functions + * that required that they be called within an RCU-sched read-side + * critical section. * * Check debug_lockdep_rcu_enabled() to prevent false positives during boot * and while lockdep is disabled. @@ -220,41 +224,155 @@ extern int rcu_my_thread_group_empty(void); } \ } while (0) +#else /* #ifdef CONFIG_PROVE_RCU */ + +#define __do_rcu_dereference_check(c) do { } while (0) + +#endif /* #else #ifdef CONFIG_PROVE_RCU */ + +/* + * Helper functions for rcu_dereference_check(), rcu_dereference_protected() + * and rcu_assign_pointer(). Some of these could be folded into their + * callers, but they are left separate in order to ease introduction of + * multiple flavors of pointers to match the multiple flavors of RCU + * (e.g., __rcu_bh, * __rcu_sched, and __srcu), should this make sense in + * the future. + */ +#define __rcu_access_pointer(p, space) \ + ({ \ + typeof(*p) *_________p1 = (typeof(*p)*__force )ACCESS_ONCE(p); \ + (void) (((typeof (*p) space *)p) == p); \ + ((typeof(*p) __force __kernel *)(_________p1)); \ + }) +#define __rcu_dereference_check(p, c, space) \ + ({ \ + typeof(*p) *_________p1 = (typeof(*p)*__force )ACCESS_ONCE(p); \ + __do_rcu_dereference_check(c); \ + (void) (((typeof (*p) space *)p) == p); \ + smp_read_barrier_depends(); \ + ((typeof(*p) __force __kernel *)(_________p1)); \ + }) +#define __rcu_dereference_protected(p, c, space) \ + ({ \ + __do_rcu_dereference_check(c); \ + (void) (((typeof (*p) space *)p) == p); \ + ((typeof(*p) __force __kernel *)(p)); \ + }) + +#define __rcu_dereference_index_check(p, c) \ + ({ \ + typeof(p) _________p1 = ACCESS_ONCE(p); \ + __do_rcu_dereference_check(c); \ + smp_read_barrier_depends(); \ + (_________p1); \ + }) +#define __rcu_assign_pointer(p, v, space) \ + ({ \ + if (!__builtin_constant_p(v) || \ + ((v) != NULL)) \ + smp_wmb(); \ + (p) = (typeof(*v) __force space *)(v); \ + }) + + +/** + * rcu_access_pointer() - fetch RCU pointer with no dereferencing + * @p: The pointer to read + * + * Return the value of the specified RCU-protected pointer, but omit the + * smp_read_barrier_depends() and keep the ACCESS_ONCE(). This is useful + * when the value of this pointer is accessed, but the pointer is not + * dereferenced, for example, when testing an RCU-protected pointer against + * NULL. Although rcu_access_pointer() may also be used in cases where + * update-side locks prevent the value of the pointer from changing, you + * should instead use rcu_dereference_protected() for this use case. + */ +#define rcu_access_pointer(p) __rcu_access_pointer((p), __rcu) + /** - * rcu_dereference_check - rcu_dereference with debug checking + * rcu_dereference_check() - rcu_dereference with debug checking * @p: The pointer to read, prior to dereferencing * @c: The conditions under which the dereference will take place * * Do an rcu_dereference(), but check that the conditions under which the - * dereference will take place are correct. Typically the conditions indicate - * the various locking conditions that should be held at that point. The check - * should return true if the conditions are satisfied. + * dereference will take place are correct. Typically the conditions + * indicate the various locking conditions that should be held at that + * point. The check should return true if the conditions are satisfied. + * An implicit check for being in an RCU read-side critical section + * (rcu_read_lock()) is included. * * For example: * - * bar = rcu_dereference_check(foo->bar, rcu_read_lock_held() || - * lockdep_is_held(&foo->lock)); + * bar = rcu_dereference_check(foo->bar, lockdep_is_held(&foo->lock)); * * could be used to indicate to lockdep that foo->bar may only be dereferenced - * if either the RCU read lock is held, or that the lock required to replace + * if either rcu_read_lock() is held, or that the lock required to replace * the bar struct at foo->bar is held. * * Note that the list of conditions may also include indications of when a lock * need not be held, for example during initialisation or destruction of the * target struct: * - * bar = rcu_dereference_check(foo->bar, rcu_read_lock_held() || - * lockdep_is_held(&foo->lock) || + * bar = rcu_dereference_check(foo->bar, lockdep_is_held(&foo->lock) || * atomic_read(&foo->usage) == 0); + * + * Inserts memory barriers on architectures that require them + * (currently only the Alpha), prevents the compiler from refetching + * (and from merging fetches), and, more importantly, documents exactly + * which pointers are protected by RCU and checks that the pointer is + * annotated as __rcu. */ #define rcu_dereference_check(p, c) \ - ({ \ - __do_rcu_dereference_check(c); \ - rcu_dereference_raw(p); \ - }) + __rcu_dereference_check((p), rcu_read_lock_held() || (c), __rcu) + +/** + * rcu_dereference_bh_check() - rcu_dereference_bh with debug checking + * @p: The pointer to read, prior to dereferencing + * @c: The conditions under which the dereference will take place + * + * This is the RCU-bh counterpart to rcu_dereference_check(). + */ +#define rcu_dereference_bh_check(p, c) \ + __rcu_dereference_check((p), rcu_read_lock_bh_held() || (c), __rcu) /** - * rcu_dereference_protected - fetch RCU pointer when updates prevented + * rcu_dereference_sched_check() - rcu_dereference_sched with debug checking + * @p: The pointer to read, prior to dereferencing + * @c: The conditions under which the dereference will take place + * + * This is the RCU-sched counterpart to rcu_dereference_check(). + */ +#define rcu_dereference_sched_check(p, c) \ + __rcu_dereference_check((p), rcu_read_lock_sched_held() || (c), \ + __rcu) + +#define rcu_dereference_raw(p) rcu_dereference_check(p, 1) /*@@@ needed? @@@*/ + +/** + * rcu_dereference_index_check() - rcu_dereference for indices with debug checking + * @p: The pointer to read, prior to dereferencing + * @c: The conditions under which the dereference will take place + * + * Similar to rcu_dereference_check(), but omits the sparse checking. + * This allows rcu_dereference_index_check() to be used on integers, + * which can then be used as array indices. Attempting to use + * rcu_dereference_check() on an integer will give compiler warnings + * because the sparse address-space mechanism relies on dereferencing + * the RCU-protected pointer. Dereferencing integers is not something + * that even gcc will put up with. + * + * Note that this function does not implicitly check for RCU read-side + * critical sections. If this function gains lots of uses, it might + * make sense to provide versions for each flavor of RCU, but it does + * not make sense as of early 2010. + */ +#define rcu_dereference_index_check(p, c) \ + __rcu_dereference_index_check((p), (c)) + +/** + * rcu_dereference_protected() - fetch RCU pointer when updates prevented + * @p: The pointer to read, prior to dereferencing + * @c: The conditions under which the dereference will take place * * Return the value of the specified RCU-protected pointer, but omit * both the smp_read_barrier_depends() and the ACCESS_ONCE(). This @@ -263,35 +381,61 @@ extern int rcu_my_thread_group_empty(void); * prevent the compiler from repeating this reference or combining it * with other references, so it should not be used without protection * of appropriate locks. + * + * This function is only for update-side use. Using this function + * when protected only by rcu_read_lock() will result in infrequent + * but very ugly failures. */ #define rcu_dereference_protected(p, c) \ - ({ \ - __do_rcu_dereference_check(c); \ - (p); \ - }) + __rcu_dereference_protected((p), (c), __rcu) -#else /* #ifdef CONFIG_PROVE_RCU */ +/** + * rcu_dereference_bh_protected() - fetch RCU-bh pointer when updates prevented + * @p: The pointer to read, prior to dereferencing + * @c: The conditions under which the dereference will take place + * + * This is the RCU-bh counterpart to rcu_dereference_protected(). + */ +#define rcu_dereference_bh_protected(p, c) \ + __rcu_dereference_protected((p), (c), __rcu) -#define rcu_dereference_check(p, c) rcu_dereference_raw(p) -#define rcu_dereference_protected(p, c) (p) +/** + * rcu_dereference_sched_protected() - fetch RCU-sched pointer when updates prevented + * @p: The pointer to read, prior to dereferencing + * @c: The conditions under which the dereference will take place + * + * This is the RCU-sched counterpart to rcu_dereference_protected(). + */ +#define rcu_dereference_sched_protected(p, c) \ + __rcu_dereference_protected((p), (c), __rcu) -#endif /* #else #ifdef CONFIG_PROVE_RCU */ /** - * rcu_access_pointer - fetch RCU pointer with no dereferencing + * rcu_dereference() - fetch RCU-protected pointer for dereferencing + * @p: The pointer to read, prior to dereferencing * - * Return the value of the specified RCU-protected pointer, but omit the - * smp_read_barrier_depends() and keep the ACCESS_ONCE(). This is useful - * when the value of this pointer is accessed, but the pointer is not - * dereferenced, for example, when testing an RCU-protected pointer against - * NULL. This may also be used in cases where update-side locks prevent - * the value of the pointer from changing, but rcu_dereference_protected() - * is a lighter-weight primitive for this use case. + * This is a simple wrapper around rcu_dereference_check(). */ -#define rcu_access_pointer(p) ACCESS_ONCE(p) +#define rcu_dereference(p) rcu_dereference_check(p, 0) /** - * rcu_read_lock - mark the beginning of an RCU read-side critical section. + * rcu_dereference_bh() - fetch an RCU-bh-protected pointer for dereferencing + * @p: The pointer to read, prior to dereferencing + * + * Makes rcu_dereference_check() do the dirty work. + */ +#define rcu_dereference_bh(p) rcu_dereference_bh_check(p, 0) + +/** + * rcu_dereference_sched() - fetch RCU-sched-protected pointer for dereferencing + * @p: The pointer to read, prior to dereferencing + * + * Makes rcu_dereference_check() do the dirty work. + */ +#define rcu_dereference_sched(p) rcu_dereference_sched_check(p, 0) + +/** + * rcu_read_lock() - mark the beginning of an RCU read-side critical section * * When synchronize_rcu() is invoked on one CPU while other CPUs * are within RCU read-side critical sections, then the @@ -337,7 +481,7 @@ static inline void rcu_read_lock(void) */ /** - * rcu_read_unlock - marks the end of an RCU read-side critical section. + * rcu_read_unlock() - marks the end of an RCU read-side critical section. * * See rcu_read_lock() for more information. */ @@ -349,15 +493,16 @@ static inline void rcu_read_unlock(void) } /** - * rcu_read_lock_bh - mark the beginning of a softirq-only RCU critical section + * rcu_read_lock_bh() - mark the beginning of an RCU-bh critical section * * This is equivalent of rcu_read_lock(), but to be used when updates - * are being done using call_rcu_bh(). Since call_rcu_bh() callbacks - * consider completion of a softirq handler to be a quiescent state, - * a process in RCU read-side critical section must be protected by - * disabling softirqs. Read-side critical sections in interrupt context - * can use just rcu_read_lock(). - * + * are being done using call_rcu_bh() or synchronize_rcu_bh(). Since + * both call_rcu_bh() and synchronize_rcu_bh() consider completion of a + * softirq handler to be a quiescent state, a process in RCU read-side + * critical section must be protected by disabling softirqs. Read-side + * critical sections in interrupt context can use just rcu_read_lock(), + * though this should at least be commented to avoid confusing people + * reading the code. */ static inline void rcu_read_lock_bh(void) { @@ -379,13 +524,12 @@ static inline void rcu_read_unlock_bh(void) } /** - * rcu_read_lock_sched - mark the beginning of a RCU-classic critical section + * rcu_read_lock_sched() - mark the beginning of a RCU-sched critical section * - * Should be used with either - * - synchronize_sched() - * or - * - call_rcu_sched() and rcu_barrier_sched() - * on the write-side to insure proper synchronization. + * This is equivalent of rcu_read_lock(), but to be used when updates + * are being done using call_rcu_sched() or synchronize_rcu_sched(). + * Read-side critical sections can also be introduced by anything that + * disables preemption, including local_irq_disable() and friends. */ static inline void rcu_read_lock_sched(void) { @@ -420,54 +564,14 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) preempt_enable_notrace(); } - /** - * rcu_dereference_raw - fetch an RCU-protected pointer + * rcu_assign_pointer() - assign to RCU-protected pointer + * @p: pointer to assign to + * @v: value to assign (publish) * - * The caller must be within some flavor of RCU read-side critical - * section, or must be otherwise preventing the pointer from changing, - * for example, by holding an appropriate lock. This pointer may later - * be safely dereferenced. It is the caller's responsibility to have - * done the right thing, as this primitive does no checking of any kind. - * - * Inserts memory barriers on architectures that require them - * (currently only the Alpha), and, more importantly, documents - * exactly which pointers are protected by RCU. - */ -#define rcu_dereference_raw(p) ({ \ - typeof(p) _________p1 = ACCESS_ONCE(p); \ - smp_read_barrier_depends(); \ - (_________p1); \ - }) - -/** - * rcu_dereference - fetch an RCU-protected pointer, checking for RCU - * - * Makes rcu_dereference_check() do the dirty work. - */ -#define rcu_dereference(p) \ - rcu_dereference_check(p, rcu_read_lock_held()) - -/** - * rcu_dereference_bh - fetch an RCU-protected pointer, checking for RCU-bh - * - * Makes rcu_dereference_check() do the dirty work. - */ -#define rcu_dereference_bh(p) \ - rcu_dereference_check(p, rcu_read_lock_bh_held()) - -/** - * rcu_dereference_sched - fetch RCU-protected pointer, checking for RCU-sched - * - * Makes rcu_dereference_check() do the dirty work. - */ -#define rcu_dereference_sched(p) \ - rcu_dereference_check(p, rcu_read_lock_sched_held()) - -/** - * rcu_assign_pointer - assign (publicize) a pointer to a newly - * initialized structure that will be dereferenced by RCU read-side - * critical sections. Returns the value assigned. + * Assigns the specified value to the specified RCU-protected + * pointer, ensuring that any concurrent RCU readers will see + * any prior initialization. Returns the value assigned. * * Inserts memory barriers on architectures that require them * (pretty much all of them other than x86), and also prevents @@ -476,14 +580,17 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) * call documents which pointers will be dereferenced by RCU read-side * code. */ - #define rcu_assign_pointer(p, v) \ - ({ \ - if (!__builtin_constant_p(v) || \ - ((v) != NULL)) \ - smp_wmb(); \ - (p) = (v); \ - }) + __rcu_assign_pointer((p), (v), __rcu) + +/** + * RCU_INIT_POINTER() - initialize an RCU protected pointer + * + * Initialize an RCU-protected pointer in such a way to avoid RCU-lockdep + * splats. + */ +#define RCU_INIT_POINTER(p, v) \ + p = (typeof(*v) __force __rcu *)(v) /* Infrastructure to implement the synchronize_() primitives. */ @@ -495,7 +602,7 @@ struct rcu_synchronize { extern void wakeme_after_rcu(struct rcu_head *head); /** - * call_rcu - Queue an RCU callback for invocation after a grace period. + * call_rcu() - Queue an RCU callback for invocation after a grace period. * @head: structure to be used for queueing the RCU updates. * @func: actual update function to be invoked after the grace period * @@ -509,7 +616,7 @@ extern void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *head)); /** - * call_rcu_bh - Queue an RCU for invocation after a quicker grace period. + * call_rcu_bh() - Queue an RCU for invocation after a quicker grace period. * @head: structure to be used for queueing the RCU updates. * @func: actual update function to be invoked after the grace period * @@ -566,37 +673,4 @@ static inline void debug_rcu_head_unqueue(struct rcu_head *head) } #endif /* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */ -#ifndef CONFIG_PROVE_RCU -#define __do_rcu_dereference_check(c) do { } while (0) -#endif /* #ifdef CONFIG_PROVE_RCU */ - -#define __rcu_dereference_index_check(p, c) \ - ({ \ - typeof(p) _________p1 = ACCESS_ONCE(p); \ - __do_rcu_dereference_check(c); \ - smp_read_barrier_depends(); \ - (_________p1); \ - }) - -/** - * rcu_dereference_index_check() - rcu_dereference for indices with debug checking - * @p: The pointer to read, prior to dereferencing - * @c: The conditions under which the dereference will take place - * - * Similar to rcu_dereference_check(), but omits the sparse checking. - * This allows rcu_dereference_index_check() to be used on integers, - * which can then be used as array indices. Attempting to use - * rcu_dereference_check() on an integer will give compiler warnings - * because the sparse address-space mechanism relies on dereferencing - * the RCU-protected pointer. Dereferencing integers is not something - * that even gcc will put up with. - * - * Note that this function does not implicitly check for RCU read-side - * critical sections. If this function gains lots of uses, it might - * make sense to provide versions for each flavor of RCU, but it does - * not make sense as of early 2010. - */ -#define rcu_dereference_index_check(p, c) \ - __rcu_dereference_index_check((p), (c)) - #endif /* __LINUX_RCUPDATE_H */ diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 4d5d2f546db..6f456a720ff 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -108,12 +108,31 @@ static inline int srcu_read_lock_held(struct srcu_struct *sp) #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ /** - * srcu_dereference - fetch SRCU-protected pointer with checking + * srcu_dereference_check - fetch SRCU-protected pointer for later dereferencing + * @p: the pointer to fetch and protect for later dereferencing + * @sp: pointer to the srcu_struct, which is used to check that we + * really are in an SRCU read-side critical section. + * @c: condition to check for update-side use * - * Makes rcu_dereference_check() do the dirty work. + * If PROVE_RCU is enabled, invoking this outside of an RCU read-side + * critical section will result in an RCU-lockdep splat, unless @c evaluates + * to 1. The @c argument will normally be a logical expression containing + * lockdep_is_held() calls. */ -#define srcu_dereference(p, sp) \ - rcu_dereference_check(p, srcu_read_lock_held(sp)) +#define srcu_dereference_check(p, sp, c) \ + __rcu_dereference_check((p), srcu_read_lock_held(sp) || (c), __rcu) + +/** + * srcu_dereference - fetch SRCU-protected pointer for later dereferencing + * @p: the pointer to fetch and protect for later dereferencing + * @sp: pointer to the srcu_struct, which is used to check that we + * really are in an SRCU read-side critical section. + * + * Makes rcu_dereference_check() do the dirty work. If PROVE_RCU + * is enabled, invoking this outside of an RCU read-side critical + * section will result in an RCU-lockdep splat. + */ +#define srcu_dereference(p, sp) srcu_dereference_check((p), (sp), 0) /** * srcu_read_lock - register a new reader for an SRCU-protected structure. -- cgit v1.2.3-18-g5258 From 67bdbffd696f29a0b68aa8daa285783a06651583 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 25 Feb 2010 16:55:13 +0100 Subject: rculist: avoid __rcu annotations This avoids warnings from missing __rcu annotations in the rculist implementation, making it possible to use the same lists in both RCU and non-RCU cases. We can add rculist annotations later, together with lockdep support for rculist, which is missing as well, but that may involve changing all the users. Signed-off-by: Arnd Bergmann Signed-off-by: Paul E. McKenney Cc: Pavel Emelyanov Cc: Sukadev Bhattiprolu Reviewed-by: Josh Triplett --- include/linux/rculist.h | 53 +++++++++++++++++++++++++++---------------- include/linux/rculist_nulls.h | 16 +++++++++---- 2 files changed, 45 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/include/linux/rculist.h b/include/linux/rculist.h index 4ec3b38ce9c..c10b1050dbe 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -9,6 +9,12 @@ #include #include +/* + * return the ->next pointer of a list_head in an rcu safe + * way, we must not access it directly + */ +#define list_next_rcu(list) (*((struct list_head __rcu **)(&(list)->next))) + /* * Insert a new entry between two known consecutive entries. * @@ -20,7 +26,7 @@ static inline void __list_add_rcu(struct list_head *new, { new->next = next; new->prev = prev; - rcu_assign_pointer(prev->next, new); + rcu_assign_pointer(list_next_rcu(prev), new); next->prev = new; } @@ -138,7 +144,7 @@ static inline void list_replace_rcu(struct list_head *old, { new->next = old->next; new->prev = old->prev; - rcu_assign_pointer(new->prev->next, new); + rcu_assign_pointer(list_next_rcu(new->prev), new); new->next->prev = new; old->prev = LIST_POISON2; } @@ -193,7 +199,7 @@ static inline void list_splice_init_rcu(struct list_head *list, */ last->next = at; - rcu_assign_pointer(head->next, first); + rcu_assign_pointer(list_next_rcu(head), first); first->prev = head; at->prev = last; } @@ -208,7 +214,9 @@ static inline void list_splice_init_rcu(struct list_head *list, * primitives such as list_add_rcu() as long as it's guarded by rcu_read_lock(). */ #define list_entry_rcu(ptr, type, member) \ - container_of(rcu_dereference_raw(ptr), type, member) + ({typeof (*ptr) __rcu *__ptr = (typeof (*ptr) __rcu __force *)ptr; \ + container_of((typeof(ptr))rcu_dereference_raw(__ptr), type, member); \ + }) /** * list_first_entry_rcu - get the first element from a list @@ -225,9 +233,9 @@ static inline void list_splice_init_rcu(struct list_head *list, list_entry_rcu((ptr)->next, type, member) #define __list_for_each_rcu(pos, head) \ - for (pos = rcu_dereference_raw((head)->next); \ + for (pos = rcu_dereference_raw(list_next_rcu(head)); \ pos != (head); \ - pos = rcu_dereference_raw(pos->next)) + pos = rcu_dereference_raw(list_next_rcu((pos))) /** * list_for_each_entry_rcu - iterate over rcu list of given type @@ -257,9 +265,9 @@ static inline void list_splice_init_rcu(struct list_head *list, * as long as the traversal is guarded by rcu_read_lock(). */ #define list_for_each_continue_rcu(pos, head) \ - for ((pos) = rcu_dereference_raw((pos)->next); \ + for ((pos) = rcu_dereference_raw(list_next_rcu(pos)); \ prefetch((pos)->next), (pos) != (head); \ - (pos) = rcu_dereference_raw((pos)->next)) + (pos) = rcu_dereference_raw(list_next_rcu(pos))) /** * list_for_each_entry_continue_rcu - continue iteration over list of given type @@ -314,12 +322,19 @@ static inline void hlist_replace_rcu(struct hlist_node *old, new->next = next; new->pprev = old->pprev; - rcu_assign_pointer(*new->pprev, new); + rcu_assign_pointer(*(struct hlist_node __rcu **)new->pprev, new); if (next) new->next->pprev = &new->next; old->pprev = LIST_POISON2; } +/* + * return the first or the next element in an RCU protected hlist + */ +#define hlist_first_rcu(head) (*((struct hlist_node __rcu **)(&(head)->first))) +#define hlist_next_rcu(node) (*((struct hlist_node __rcu **)(&(node)->next))) +#define hlist_pprev_rcu(node) (*((struct hlist_node __rcu **)((node)->pprev))) + /** * hlist_add_head_rcu * @n: the element to add to the hash list. @@ -346,7 +361,7 @@ static inline void hlist_add_head_rcu(struct hlist_node *n, n->next = first; n->pprev = &h->first; - rcu_assign_pointer(h->first, n); + rcu_assign_pointer(hlist_first_rcu(h), n); if (first) first->pprev = &n->next; } @@ -374,7 +389,7 @@ static inline void hlist_add_before_rcu(struct hlist_node *n, { n->pprev = next->pprev; n->next = next; - rcu_assign_pointer(*(n->pprev), n); + rcu_assign_pointer(hlist_pprev_rcu(n), n); next->pprev = &n->next; } @@ -401,15 +416,15 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev, { n->next = prev->next; n->pprev = &prev->next; - rcu_assign_pointer(prev->next, n); + rcu_assign_pointer(hlist_next_rcu(prev), n); if (n->next) n->next->pprev = &n->next; } -#define __hlist_for_each_rcu(pos, head) \ - for (pos = rcu_dereference((head)->first); \ - pos && ({ prefetch(pos->next); 1; }); \ - pos = rcu_dereference(pos->next)) +#define __hlist_for_each_rcu(pos, head) \ + for (pos = rcu_dereference(hlist_first_rcu(head)); \ + pos && ({ prefetch(pos->next); 1; }); \ + pos = rcu_dereference(hlist_next_rcu(pos))) /** * hlist_for_each_entry_rcu - iterate over rcu list of given type @@ -422,11 +437,11 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev, * the _rcu list-mutation primitives such as hlist_add_head_rcu() * as long as the traversal is guarded by rcu_read_lock(). */ -#define hlist_for_each_entry_rcu(tpos, pos, head, member) \ - for (pos = rcu_dereference_raw((head)->first); \ +#define hlist_for_each_entry_rcu(tpos, pos, head, member) \ + for (pos = rcu_dereference_raw(hlist_first_rcu(head)); \ pos && ({ prefetch(pos->next); 1; }) && \ ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \ - pos = rcu_dereference_raw(pos->next)) + pos = rcu_dereference_raw(hlist_next_rcu(pos))) /** * hlist_for_each_entry_rcu_bh - iterate over rcu list of given type diff --git a/include/linux/rculist_nulls.h b/include/linux/rculist_nulls.h index b70ffe53cb9..2ae13714828 100644 --- a/include/linux/rculist_nulls.h +++ b/include/linux/rculist_nulls.h @@ -37,6 +37,12 @@ static inline void hlist_nulls_del_init_rcu(struct hlist_nulls_node *n) } } +#define hlist_nulls_first_rcu(head) \ + (*((struct hlist_nulls_node __rcu __force **)&(head)->first)) + +#define hlist_nulls_next_rcu(node) \ + (*((struct hlist_nulls_node __rcu __force **)&(node)->next)) + /** * hlist_nulls_del_rcu - deletes entry from hash list without re-initialization * @n: the element to delete from the hash list. @@ -88,7 +94,7 @@ static inline void hlist_nulls_add_head_rcu(struct hlist_nulls_node *n, n->next = first; n->pprev = &h->first; - rcu_assign_pointer(h->first, n); + rcu_assign_pointer(hlist_nulls_first_rcu(h), n); if (!is_a_nulls(first)) first->pprev = &n->next; } @@ -100,11 +106,11 @@ static inline void hlist_nulls_add_head_rcu(struct hlist_nulls_node *n, * @member: the name of the hlist_nulls_node within the struct. * */ -#define hlist_nulls_for_each_entry_rcu(tpos, pos, head, member) \ - for (pos = rcu_dereference_raw((head)->first); \ - (!is_a_nulls(pos)) && \ +#define hlist_nulls_for_each_entry_rcu(tpos, pos, head, member) \ + for (pos = rcu_dereference_raw(hlist_nulls_first_rcu(head)); \ + (!is_a_nulls(pos)) && \ ({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); 1; }); \ - pos = rcu_dereference_raw(pos->next)) + pos = rcu_dereference_raw(hlist_nulls_next_rcu(pos))) #endif #endif -- cgit v1.2.3-18-g5258 From 2c392b8c3450ceb69ba1b93cb0cddb3998fb8cdc Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 24 Feb 2010 19:41:39 +0100 Subject: cgroups: __rcu annotations Signed-off-by: Arnd Bergmann Signed-off-by: Paul E. McKenney Acked-by: Paul Menage Cc: Li Zefan Reviewed-by: Josh Triplett --- include/linux/cgroup.h | 4 ++-- include/linux/sched.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index ed3e92e41c6..3cb7d04308c 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -75,7 +75,7 @@ struct cgroup_subsys_state { unsigned long flags; /* ID for this css, if possible */ - struct css_id *id; + struct css_id __rcu *id; }; /* bits in struct cgroup_subsys_state flags field */ @@ -205,7 +205,7 @@ struct cgroup { struct list_head children; /* my children */ struct cgroup *parent; /* my parent */ - struct dentry *dentry; /* cgroup fs entry, RCU protected */ + struct dentry __rcu *dentry; /* cgroup fs entry, RCU protected */ /* Private pointers for each registered subsystem */ struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT]; diff --git a/include/linux/sched.h b/include/linux/sched.h index 1e2a6db2d7d..bbffd087476 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1418,7 +1418,7 @@ struct task_struct { #endif #ifdef CONFIG_CGROUPS /* Control Group info protected by css_set_lock */ - struct css_set *cgroups; + struct css_set __rcu *cgroups; /* cg_list protected by css_set_lock and tsk->alloc_lock */ struct list_head cg_list; #endif -- cgit v1.2.3-18-g5258 From 1b0ba1c9037b2265d6e5d0165d31e4c0269b603b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 24 Feb 2010 19:45:09 +0100 Subject: credentials: rcu annotation Signed-off-by: Arnd Bergmann Signed-off-by: Paul E. McKenney Cc: Peter Zijlstra Cc: Ingo Molnar Acked-by: David Howells Reviewed-by: Josh Triplett --- include/linux/sched.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index bbffd087476..2c756666c11 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1288,9 +1288,9 @@ struct task_struct { struct list_head cpu_timers[3]; /* process credentials */ - const struct cred *real_cred; /* objective and real subjective task + const struct cred __rcu *real_cred; /* objective and real subjective task * credentials (COW) */ - const struct cred *cred; /* effective (overridable) subjective task + const struct cred __rcu *cred; /* effective (overridable) subjective task * credentials (COW) */ struct mutex cred_guard_mutex; /* guard against foreign influences on * credential calculations -- cgit v1.2.3-18-g5258 From e63ba744a64d234c8a07c469ab1806443cb0a6ff Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 26 Feb 2010 18:01:20 +0100 Subject: keys: __rcu annotations Signed-off-by: Arnd Bergmann Signed-off-by: Paul E. McKenney Acked-by: David Howells Reviewed-by: Josh Triplett --- include/linux/cred.h | 2 +- include/linux/key.h | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/cred.h b/include/linux/cred.h index 4d2c39573f3..4aaeab37644 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -84,7 +84,7 @@ struct thread_group_cred { atomic_t usage; pid_t tgid; /* thread group process ID */ spinlock_t lock; - struct key *session_keyring; /* keyring inherited over fork */ + struct key __rcu *session_keyring; /* keyring inherited over fork */ struct key *process_keyring; /* keyring private to this process */ struct rcu_head rcu; /* RCU deletion hook */ }; diff --git a/include/linux/key.h b/include/linux/key.h index cd50dfa1d4c..3db0adce1fd 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -178,8 +178,9 @@ struct key { */ union { unsigned long value; + void __rcu *rcudata; void *data; - struct keyring_list *subscriptions; + struct keyring_list __rcu *subscriptions; } payload; }; -- cgit v1.2.3-18-g5258 From 5b22216e11f717adc344abc7f97b42e03127c6c0 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 3 Mar 2010 10:20:10 +0100 Subject: nfs: __rcu annotations Signed-off-by: Arnd Bergmann Signed-off-by: Paul E. McKenney Acked-by: Trond Myklebust --- include/linux/nfs_fs.h | 2 +- include/linux/sunrpc/auth_gss.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 508f8cf6da3..d0edf7d823a 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -185,7 +185,7 @@ struct nfs_inode { struct nfs4_cached_acl *nfs4_acl; /* NFSv4 state */ struct list_head open_states; - struct nfs_delegation *delegation; + struct nfs_delegation __rcu *delegation; fmode_t delegation_state; struct rw_semaphore rwsem; #endif /* CONFIG_NFS_V4*/ diff --git a/include/linux/sunrpc/auth_gss.h b/include/linux/sunrpc/auth_gss.h index 671538d25bc..8eee9dbbfe7 100644 --- a/include/linux/sunrpc/auth_gss.h +++ b/include/linux/sunrpc/auth_gss.h @@ -69,7 +69,7 @@ struct gss_cl_ctx { enum rpc_gss_proc gc_proc; u32 gc_seq; spinlock_t gc_seq_lock; - struct gss_ctx *gc_gss_ctx; + struct gss_ctx __rcu *gc_gss_ctx; struct xdr_netobj gc_wire_ctx; u32 gc_win; unsigned long gc_expiry; @@ -80,7 +80,7 @@ struct gss_upcall_msg; struct gss_cred { struct rpc_cred gc_base; enum rpc_gss_svc gc_service; - struct gss_cl_ctx *gc_ctx; + struct gss_cl_ctx __rcu *gc_ctx; struct gss_upcall_msg *gc_upcall; unsigned long gc_upcall_timestamp; unsigned char gc_machine_cred : 1; -- cgit v1.2.3-18-g5258 From 2be85279281bafe7de808ca99de59af4fd474c49 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 4 Mar 2010 15:50:28 +0100 Subject: input: __rcu annotations Signed-off-by: Arnd Bergmann Signed-off-by: Paul E. McKenney Cc: Dmitry Torokhov Acked-by: Dmitry Torokhov Reviewed-by: Josh Triplett --- include/linux/input.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/input.h b/include/linux/input.h index 896a92227bc..d6ae1761be9 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -1196,7 +1196,7 @@ struct input_dev { int (*flush)(struct input_dev *dev, struct file *file); int (*event)(struct input_dev *dev, unsigned int type, unsigned int code, int value); - struct input_handle *grab; + struct input_handle __rcu *grab; spinlock_t event_lock; struct mutex mutex; -- cgit v1.2.3-18-g5258 From 0906a372f2aa0fec1e59bd12b896883b6e41307a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 9 Mar 2010 20:59:15 +0100 Subject: net/netfilter: __rcu annotations Signed-off-by: Arnd Bergmann Signed-off-by: Paul E. McKenney Acked-by: Patrick McHardy Cc: "David S. Miller" Cc: Eric Dumazet Reviewed-by: Josh Triplett --- include/net/netfilter/nf_conntrack.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index e624dae54fa..caf17db87db 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -75,7 +75,7 @@ struct nf_conntrack_helper; /* nf_conn feature for connections that have a helper */ struct nf_conn_help { /* Helper. if any */ - struct nf_conntrack_helper *helper; + struct nf_conntrack_helper __rcu *helper; union nf_conntrack_help help; -- cgit v1.2.3-18-g5258 From 4b6a2872a2a00042ee50024822ab706e5456aad8 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 4 Mar 2010 15:59:23 +0100 Subject: kvm: add __rcu annotations Signed-off-by: Arnd Bergmann Signed-off-by: Paul E. McKenney Cc: Avi Kivity Cc: Marcelo Tosatti Reviewed-by: Josh Triplett --- include/linux/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index c13cc48697a..ac740b26eb1 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -205,7 +205,7 @@ struct kvm { struct mutex irq_lock; #ifdef CONFIG_HAVE_KVM_IRQCHIP - struct kvm_irq_routing_table *irq_routing; + struct kvm_irq_routing_table __rcu *irq_routing; struct hlist_head mask_notifier_list; struct hlist_head irq_ack_notifier_list; #endif -- cgit v1.2.3-18-g5258 From 4221a9918e38b7494cee341dda7b7b4bb8c04bde Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Sat, 26 Jun 2010 01:08:19 +0900 Subject: Add RCU check for find_task_by_vpid(). find_task_by_vpid() says "Must be called under rcu_read_lock().". But due to commit 3120438 "rcu: Disable lockdep checking in RCU list-traversal primitives", we are currently unable to catch "find_task_by_vpid() with tasklist_lock held but RCU lock not held" errors due to the RCU-lockdep checks being suppressed in the RCU variants of the struct list_head traversals. This commit therefore places an explicit check for being in an RCU read-side critical section in find_task_by_pid_ns(). =================================================== [ INFO: suspicious rcu_dereference_check() usage. ] --------------------------------------------------- kernel/pid.c:386 invoked rcu_dereference_check() without protection! other info that might help us debug this: rcu_scheduler_active = 1, debug_locks = 1 1 lock held by rc.sysinit/1102: #0: (tasklist_lock){.+.+..}, at: [] sys_setpgid+0x40/0x160 stack backtrace: Pid: 1102, comm: rc.sysinit Not tainted 2.6.35-rc3-dirty #1 Call Trace: [] lockdep_rcu_dereference+0x94/0xb0 [] find_task_by_pid_ns+0x6d/0x70 [] find_task_by_vpid+0x18/0x20 [] sys_setpgid+0x47/0x160 [] sysenter_do_call+0x12/0x36 Commit updated to use a new rcu_lockdep_assert() exported API rather than the old internal __do_rcu_dereference(). Signed-off-by: Tetsuo Handa Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- include/linux/rcupdate.h | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index b973dea2d6b..b124bc6a75a 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -215,7 +215,11 @@ static inline int rcu_read_lock_sched_held(void) extern int rcu_my_thread_group_empty(void); -#define __do_rcu_dereference_check(c) \ +/** + * rcu_lockdep_assert - emit lockdep splat if specified condition not met + * @c: condition to check + */ +#define rcu_lockdep_assert(c) \ do { \ static bool __warned; \ if (debug_lockdep_rcu_enabled() && !__warned && !(c)) { \ @@ -226,7 +230,7 @@ extern int rcu_my_thread_group_empty(void); #else /* #ifdef CONFIG_PROVE_RCU */ -#define __do_rcu_dereference_check(c) do { } while (0) +#define rcu_lockdep_assert(c) do { } while (0) #endif /* #else #ifdef CONFIG_PROVE_RCU */ @@ -247,14 +251,14 @@ extern int rcu_my_thread_group_empty(void); #define __rcu_dereference_check(p, c, space) \ ({ \ typeof(*p) *_________p1 = (typeof(*p)*__force )ACCESS_ONCE(p); \ - __do_rcu_dereference_check(c); \ + rcu_lockdep_assert(c); \ (void) (((typeof (*p) space *)p) == p); \ smp_read_barrier_depends(); \ ((typeof(*p) __force __kernel *)(_________p1)); \ }) #define __rcu_dereference_protected(p, c, space) \ ({ \ - __do_rcu_dereference_check(c); \ + rcu_lockdep_assert(c); \ (void) (((typeof (*p) space *)p) == p); \ ((typeof(*p) __force __kernel *)(p)); \ }) @@ -262,7 +266,7 @@ extern int rcu_my_thread_group_empty(void); #define __rcu_dereference_index_check(p, c) \ ({ \ typeof(p) _________p1 = ACCESS_ONCE(p); \ - __do_rcu_dereference_check(c); \ + rcu_lockdep_assert(c); \ smp_read_barrier_depends(); \ (_________p1); \ }) -- cgit v1.2.3-18-g5258 From 77d8485a8b5416c615b6acd95f01bfcacd7d81ff Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 8 Jul 2010 17:38:59 -0700 Subject: rcu: improve kerneldoc for rcu_read_lock(), call_rcu(), and synchronize_rcu() Make it explicit that new RCU read-side critical sections that start after call_rcu() and synchronize_rcu() start might still be running after the end of the relevant grace period. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- include/linux/rcupdate.h | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index b124bc6a75a..3e1b6625553 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -450,7 +450,7 @@ extern int rcu_my_thread_group_empty(void); * until after the all the other CPUs exit their critical sections. * * Note, however, that RCU callbacks are permitted to run concurrently - * with RCU read-side critical sections. One way that this can happen + * with new RCU read-side critical sections. One way that this can happen * is via the following sequence of events: (1) CPU 0 enters an RCU * read-side critical section, (2) CPU 1 invokes call_rcu() to register * an RCU callback, (3) CPU 0 exits the RCU read-side critical section, @@ -608,11 +608,13 @@ extern void wakeme_after_rcu(struct rcu_head *head); /** * call_rcu() - Queue an RCU callback for invocation after a grace period. * @head: structure to be used for queueing the RCU updates. - * @func: actual update function to be invoked after the grace period + * @func: actual callback function to be invoked after the grace period * - * The update function will be invoked some time after a full grace - * period elapses, in other words after all currently executing RCU - * read-side critical sections have completed. RCU read-side critical + * The callback function will be invoked some time after a full grace + * period elapses, in other words after all pre-existing RCU read-side + * critical sections have completed. However, the callback function + * might well execute concurrently with RCU read-side critical sections + * that started after call_rcu() was invoked. RCU read-side critical * sections are delimited by rcu_read_lock() and rcu_read_unlock(), * and may be nested. */ @@ -622,9 +624,9 @@ extern void call_rcu(struct rcu_head *head, /** * call_rcu_bh() - Queue an RCU for invocation after a quicker grace period. * @head: structure to be used for queueing the RCU updates. - * @func: actual update function to be invoked after the grace period + * @func: actual callback function to be invoked after the grace period * - * The update function will be invoked some time after a full grace + * The callback function will be invoked some time after a full grace * period elapses, in other words after all currently executing RCU * read-side critical sections have completed. call_rcu_bh() assumes * that the read-side critical sections end on completion of a softirq -- cgit v1.2.3-18-g5258 From 374a8e0dc33c984fac284de7d57d77af3cfdbfb7 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 24 Feb 2010 20:00:13 +0100 Subject: notifiers: __rcu annotations Signed-off-by: Arnd Bergmann Signed-off-by: Paul E. McKenney Cc: Alan Cox Reviewed-by: Josh Triplett --- include/linux/notifier.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/notifier.h b/include/linux/notifier.h index b2f1a4d8355..2026f9e1ceb 100644 --- a/include/linux/notifier.h +++ b/include/linux/notifier.h @@ -49,28 +49,28 @@ struct notifier_block { int (*notifier_call)(struct notifier_block *, unsigned long, void *); - struct notifier_block *next; + struct notifier_block __rcu *next; int priority; }; struct atomic_notifier_head { spinlock_t lock; - struct notifier_block *head; + struct notifier_block __rcu *head; }; struct blocking_notifier_head { struct rw_semaphore rwsem; - struct notifier_block *head; + struct notifier_block __rcu *head; }; struct raw_notifier_head { - struct notifier_block *head; + struct notifier_block __rcu *head; }; struct srcu_notifier_head { struct mutex mutex; struct srcu_struct srcu; - struct notifier_block *head; + struct notifier_block __rcu *head; }; #define ATOMIC_INIT_NOTIFIER_HEAD(name) do { \ -- cgit v1.2.3-18-g5258 From a1115570b31091f3e3ab9e6cf7ee8d320a42be84 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 25 Feb 2010 23:43:52 +0100 Subject: radix-tree: __rcu annotations Signed-off-by: Arnd Bergmann Signed-off-by: Paul E. McKenney Cc: Nick Piggin Reviewed-by: Josh Triplett --- include/linux/radix-tree.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index 634b8e674ac..a39cbed9ee1 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -47,6 +47,8 @@ static inline void *radix_tree_indirect_to_ptr(void *ptr) { return (void *)((unsigned long)ptr & ~RADIX_TREE_INDIRECT_PTR); } +#define radix_tree_indirect_to_ptr(ptr) \ + radix_tree_indirect_to_ptr((void __force *)(ptr)) static inline int radix_tree_is_indirect_ptr(void *ptr) { @@ -61,7 +63,7 @@ static inline int radix_tree_is_indirect_ptr(void *ptr) struct radix_tree_root { unsigned int height; gfp_t gfp_mask; - struct radix_tree_node *rnode; + struct radix_tree_node __rcu *rnode; }; #define RADIX_TREE_INIT(mask) { \ -- cgit v1.2.3-18-g5258 From d2c2486bc8e185548490e8edbc84d185de9eaff1 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 26 Feb 2010 14:53:26 +0100 Subject: idr: __rcu annotations Signed-off-by: Arnd Bergmann Signed-off-by: Paul E. McKenney Cc: Manfred Spraul Reviewed-by: Josh Triplett --- include/linux/idr.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/idr.h b/include/linux/idr.h index e968db71e33..cdb715e58e3 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -50,14 +50,14 @@ struct idr_layer { unsigned long bitmap; /* A zero bit means "space here" */ - struct idr_layer *ary[1< Date: Wed, 24 Feb 2010 20:01:56 +0100 Subject: kernel: __rcu annotations This adds annotations for RCU operations in core kernel components Signed-off-by: Arnd Bergmann Signed-off-by: Paul E. McKenney Cc: Al Viro Cc: Jens Axboe Cc: Andrew Morton Reviewed-by: Josh Triplett --- include/linux/fdtable.h | 6 +++--- include/linux/fs.h | 2 +- include/linux/genhd.h | 6 +++--- include/linux/init_task.h | 4 ++-- include/linux/iocontext.h | 2 +- include/linux/mm_types.h | 2 +- 6 files changed, 11 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index f59ed297b66..133c0ba25e3 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -31,7 +31,7 @@ struct embedded_fd_set { struct fdtable { unsigned int max_fds; - struct file ** fd; /* current fd array */ + struct file __rcu **fd; /* current fd array */ fd_set *close_on_exec; fd_set *open_fds; struct rcu_head rcu; @@ -46,7 +46,7 @@ struct files_struct { * read mostly part */ atomic_t count; - struct fdtable *fdt; + struct fdtable __rcu *fdt; struct fdtable fdtab; /* * written part on a separate cache line in SMP @@ -55,7 +55,7 @@ struct files_struct { int next_fd; struct embedded_fd_set close_on_exec_init; struct embedded_fd_set open_fds_init; - struct file * fd_array[NR_OPEN_DEFAULT]; + struct file __rcu * fd_array[NR_OPEN_DEFAULT]; }; #define rcu_dereference_check_fdtable(files, fdtfd) \ diff --git a/include/linux/fs.h b/include/linux/fs.h index 76041b61475..aa3dc8d2043 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1380,7 +1380,7 @@ struct super_block { * Saved mount options for lazy filesystems using * generic_show_options() */ - char *s_options; + char __rcu *s_options; }; extern struct timespec current_fs_time(struct super_block *sb); diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 5f2f4c4d8fb..af3f06b41dc 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -129,8 +129,8 @@ struct blk_scsi_cmd_filter { struct disk_part_tbl { struct rcu_head rcu_head; int len; - struct hd_struct *last_lookup; - struct hd_struct *part[]; + struct hd_struct __rcu *last_lookup; + struct hd_struct __rcu *part[]; }; struct gendisk { @@ -149,7 +149,7 @@ struct gendisk { * non-critical accesses use RCU. Always access through * helpers. */ - struct disk_part_tbl *part_tbl; + struct disk_part_tbl __rcu *part_tbl; struct hd_struct part0; const struct block_device_operations *fops; diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 1f43fa56f60..6460fc65ed6 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -137,8 +137,8 @@ extern struct cred init_cred; .children = LIST_HEAD_INIT(tsk.children), \ .sibling = LIST_HEAD_INIT(tsk.sibling), \ .group_leader = &tsk, \ - .real_cred = &init_cred, \ - .cred = &init_cred, \ + RCU_INIT_POINTER(.real_cred, &init_cred), \ + RCU_INIT_POINTER(.cred, &init_cred), \ .cred_guard_mutex = \ __MUTEX_INITIALIZER(tsk.cred_guard_mutex), \ .comm = "swapper", \ diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h index 64d52913303..3e70b21884a 100644 --- a/include/linux/iocontext.h +++ b/include/linux/iocontext.h @@ -53,7 +53,7 @@ struct io_context { struct radix_tree_root radix_root; struct hlist_head cic_list; - void *ioc_data; + void __rcu *ioc_data; }; static inline struct io_context *ioc_task_link(struct io_context *ioc) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index b8bb9a6a1f3..05537a5eb85 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -299,7 +299,7 @@ struct mm_struct { * new_owner->mm == mm * new_owner->alloc_lock is held */ - struct task_struct *owner; + struct task_struct __rcu *owner; #endif #ifdef CONFIG_PROC_FS -- cgit v1.2.3-18-g5258 From 5e8067adfdbaf97039a97540765b1e16eb8d61cc Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Sat, 17 Apr 2010 08:48:41 -0400 Subject: rcu head remove init RCU heads really don't need to be initialized. Their state before call_rcu() really does not matter. We need to keep init/destroy_rcu_head_on_stack() though, since we want debugobjects to be able to keep track of these objects. Signed-off-by: Alexey Dobriyan Signed-off-by: Mathieu Desnoyers CC: David S. Miller CC: "Paul E. McKenney" CC: akpm@linux-foundation.org CC: mingo@elte.hu CC: laijs@cn.fujitsu.com CC: dipankar@in.ibm.com CC: josh@joshtriplett.org CC: dvhltc@us.ibm.com CC: niv@us.ibm.com CC: tglx@linutronix.de CC: peterz@infradead.org CC: rostedt@goodmis.org CC: Valdis.Kletnieks@vt.edu CC: dhowells@redhat.com CC: eric.dumazet@gmail.com CC: Alexey Dobriyan Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- include/linux/rcupdate.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 3e1b6625553..27b44b3e302 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -75,12 +75,6 @@ extern void rcu_init(void); #error "Unknown RCU implementation specified to kernel configuration" #endif -#define RCU_HEAD_INIT { .next = NULL, .func = NULL } -#define RCU_HEAD(head) struct rcu_head head = RCU_HEAD_INIT -#define INIT_RCU_HEAD(ptr) do { \ - (ptr)->next = NULL; (ptr)->func = NULL; \ -} while (0) - /* * init_rcu_head_on_stack()/destroy_rcu_head_on_stack() are needed for dynamic * initialization and destruction of rcu_head on the stack. rcu_head structures -- cgit v1.2.3-18-g5258 From eb4d40654505e47aa9d2035bb97f631fa61d14b4 Mon Sep 17 00:00:00 2001 From: Grégoire Baron Date: Wed, 18 Aug 2010 13:10:35 +0000 Subject: net/sched: add ACT_CSUM action to update packets checksums net/sched: add ACT_CSUM action to update packets checksums ACT_CSUM can be called just after ACT_PEDIT in order to re-compute some altered checksums in IPv4 and IPv6 packets. The following checksums are supported by this patch: - IPv4: IPv4 header, ICMP, IGMP, TCP, UDP & UDPLite - IPv6: ICMPv6, TCP, UDP & UDPLite It's possible to request in the same action to update different kind of checksums, if the packets flow mix TCP, UDP and UDPLite, ... An example of usage is done in the associated iproute2 patch. Version 3 changes: - remove useless goto instructions - improve IPv6 hop options decoding Version 2 changes: - coding style correction - remove useless arguments of some functions - use stack in tcf_csum_dump() - add tcf_csum_skb_nextlayer() to factor code Signed-off-by: Gregoire Baron Acked-by: jamal Signed-off-by: David S. Miller --- include/linux/tc_act/Kbuild | 1 + include/linux/tc_act/tc_csum.h | 32 ++++++++++++++++++++++++++++++++ include/net/tc_act/tc_csum.h | 15 +++++++++++++++ 3 files changed, 48 insertions(+) create mode 100644 include/linux/tc_act/tc_csum.h create mode 100644 include/net/tc_act/tc_csum.h (limited to 'include') diff --git a/include/linux/tc_act/Kbuild b/include/linux/tc_act/Kbuild index 76990937f4c..67b501c302b 100644 --- a/include/linux/tc_act/Kbuild +++ b/include/linux/tc_act/Kbuild @@ -4,3 +4,4 @@ header-y += tc_mirred.h header-y += tc_pedit.h header-y += tc_nat.h header-y += tc_skbedit.h +header-y += tc_csum.h diff --git a/include/linux/tc_act/tc_csum.h b/include/linux/tc_act/tc_csum.h new file mode 100644 index 00000000000..a047c49a315 --- /dev/null +++ b/include/linux/tc_act/tc_csum.h @@ -0,0 +1,32 @@ +#ifndef __LINUX_TC_CSUM_H +#define __LINUX_TC_CSUM_H + +#include +#include + +#define TCA_ACT_CSUM 16 + +enum { + TCA_CSUM_UNSPEC, + TCA_CSUM_PARMS, + TCA_CSUM_TM, + __TCA_CSUM_MAX +}; +#define TCA_CSUM_MAX (__TCA_CSUM_MAX - 1) + +enum { + TCA_CSUM_UPDATE_FLAG_IPV4HDR = 1, + TCA_CSUM_UPDATE_FLAG_ICMP = 2, + TCA_CSUM_UPDATE_FLAG_IGMP = 4, + TCA_CSUM_UPDATE_FLAG_TCP = 8, + TCA_CSUM_UPDATE_FLAG_UDP = 16, + TCA_CSUM_UPDATE_FLAG_UDPLITE = 32 +}; + +struct tc_csum { + tc_gen; + + __u32 update_flags; +}; + +#endif /* __LINUX_TC_CSUM_H */ diff --git a/include/net/tc_act/tc_csum.h b/include/net/tc_act/tc_csum.h new file mode 100644 index 00000000000..9e8710be7a0 --- /dev/null +++ b/include/net/tc_act/tc_csum.h @@ -0,0 +1,15 @@ +#ifndef __NET_TC_CSUM_H +#define __NET_TC_CSUM_H + +#include +#include + +struct tcf_csum { + struct tcf_common common; + + u32 update_flags; +}; +#define to_tcf_csum(pc) \ + container_of(pc,struct tcf_csum,common) + +#endif /* __NET_TC_CSUM_H */ -- cgit v1.2.3-18-g5258 From a57eb940d130477a799dfb24a570ee04979c0f7f Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 29 Jun 2010 16:49:16 -0700 Subject: rcu: Add a TINY_PREEMPT_RCU MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implement a small-memory-footprint uniprocessor-only implementation of preemptible RCU. This implementation uses but a single blocked-tasks list rather than the combinatorial number used per leaf rcu_node by TREE_PREEMPT_RCU, which reduces memory consumption and greatly simplifies processing. This version also takes advantage of uniprocessor execution to accelerate grace periods in the case where there are no readers. The general design is otherwise broadly similar to that of TREE_PREEMPT_RCU. This implementation is a step towards having RCU implementation driven off of the SMP and PREEMPT kernel configuration variables, which can happen once this implementation has accumulated sufficient experience. Removed ACCESS_ONCE() from __rcu_read_unlock() and added barrier() as suggested by Steve Rostedt in order to avoid the compiler-reordering issue noted by Mathieu Desnoyers (http://lkml.org/lkml/2010/8/16/183). As can be seen below, CONFIG_TINY_PREEMPT_RCU represents almost 5Kbyte savings compared to CONFIG_TREE_PREEMPT_RCU. Of course, for non-real-time workloads, CONFIG_TINY_RCU is even better. CONFIG_TREE_PREEMPT_RCU text data bss dec filename 13 0 0 13 kernel/rcupdate.o 6170 825 28 7023 kernel/rcutree.o ---- 7026 Total CONFIG_TINY_PREEMPT_RCU text data bss dec filename 13 0 0 13 kernel/rcupdate.o 2081 81 8 2170 kernel/rcutiny.o ---- 2183 Total CONFIG_TINY_RCU (non-preemptible) text data bss dec filename 13 0 0 13 kernel/rcupdate.o 719 25 0 744 kernel/rcutiny.o --- 757 Total Requested-by: Loïc Minier Signed-off-by: Paul E. McKenney --- include/linux/hardirq.h | 2 +- include/linux/init_task.h | 10 +++- include/linux/rcupdate.h | 3 +- include/linux/rcutiny.h | 126 ++++++++++++++++++++++++++++++++-------------- include/linux/rcutree.h | 2 + include/linux/sched.h | 10 ++-- 6 files changed, 108 insertions(+), 45 deletions(-) (limited to 'include') diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index d5b387669da..1f4517d55b1 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -139,7 +139,7 @@ static inline void account_system_vtime(struct task_struct *tsk) #endif #if defined(CONFIG_NO_HZ) -#if defined(CONFIG_TINY_RCU) +#if defined(CONFIG_TINY_RCU) || defined(CONFIG_TINY_PREEMPT_RCU) extern void rcu_enter_nohz(void); extern void rcu_exit_nohz(void); diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 6460fc65ed6..2fea6c8ef6b 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -82,11 +82,17 @@ extern struct group_info init_groups; # define CAP_INIT_BSET CAP_FULL_SET #ifdef CONFIG_TREE_PREEMPT_RCU +#define INIT_TASK_RCU_TREE_PREEMPT() \ + .rcu_blocked_node = NULL, +#else +#define INIT_TASK_RCU_TREE_PREEMPT(tsk) +#endif +#ifdef CONFIG_PREEMPT_RCU #define INIT_TASK_RCU_PREEMPT(tsk) \ .rcu_read_lock_nesting = 0, \ .rcu_read_unlock_special = 0, \ - .rcu_blocked_node = NULL, \ - .rcu_node_entry = LIST_HEAD_INIT(tsk.rcu_node_entry), + .rcu_node_entry = LIST_HEAD_INIT(tsk.rcu_node_entry), \ + INIT_TASK_RCU_TREE_PREEMPT() #else #define INIT_TASK_RCU_PREEMPT(tsk) #endif diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 27b44b3e302..24b89664938 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -58,7 +58,6 @@ struct rcu_head { }; /* Exported common interfaces */ -extern void rcu_barrier(void); extern void rcu_barrier_bh(void); extern void rcu_barrier_sched(void); extern void synchronize_sched_expedited(void); @@ -69,7 +68,7 @@ extern void rcu_init(void); #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) #include -#elif defined(CONFIG_TINY_RCU) +#elif defined(CONFIG_TINY_RCU) || defined(CONFIG_TINY_PREEMPT_RCU) #include #else #error "Unknown RCU implementation specified to kernel configuration" diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index e2e893144a8..4cc5eba4161 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -29,66 +29,51 @@ void rcu_sched_qs(int cpu); void rcu_bh_qs(int cpu); -static inline void rcu_note_context_switch(int cpu) -{ - rcu_sched_qs(cpu); -} +#ifdef CONFIG_TINY_RCU #define __rcu_read_lock() preempt_disable() #define __rcu_read_unlock() preempt_enable() +#else /* #ifdef CONFIG_TINY_RCU */ +void __rcu_read_lock(void); +void __rcu_read_unlock(void); +#endif /* #else #ifdef CONFIG_TINY_RCU */ #define __rcu_read_lock_bh() local_bh_disable() #define __rcu_read_unlock_bh() local_bh_enable() -#define call_rcu_sched call_rcu +extern void call_rcu_sched(struct rcu_head *head, + void (*func)(struct rcu_head *rcu)); #define rcu_init_sched() do { } while (0) -extern void rcu_check_callbacks(int cpu, int user); -static inline int rcu_needs_cpu(int cpu) -{ - return 0; -} +extern void synchronize_sched(void); -/* - * Return the number of grace periods. - */ -static inline long rcu_batches_completed(void) -{ - return 0; -} +#ifdef CONFIG_TINY_RCU -/* - * Return the number of bottom-half grace periods. - */ -static inline long rcu_batches_completed_bh(void) -{ - return 0; -} +#define call_rcu call_rcu_sched -static inline void rcu_force_quiescent_state(void) +static inline void synchronize_rcu(void) { + synchronize_sched(); } -static inline void rcu_bh_force_quiescent_state(void) +static inline void synchronize_rcu_expedited(void) { + synchronize_sched(); /* Only one CPU, so pretty fast anyway!!! */ } -static inline void rcu_sched_force_quiescent_state(void) +static inline void rcu_barrier(void) { + rcu_barrier_sched(); /* Only one CPU, so only one list of callbacks! */ } -extern void synchronize_sched(void); +#else /* #ifdef CONFIG_TINY_RCU */ -static inline void synchronize_rcu(void) -{ - synchronize_sched(); -} +void synchronize_rcu(void); +void rcu_barrier(void); +void synchronize_rcu_expedited(void); -static inline void synchronize_rcu_bh(void) -{ - synchronize_sched(); -} +#endif /* #else #ifdef CONFIG_TINY_RCU */ -static inline void synchronize_rcu_expedited(void) +static inline void synchronize_rcu_bh(void) { synchronize_sched(); } @@ -117,15 +102,82 @@ static inline void rcu_exit_nohz(void) #endif /* #else #ifdef CONFIG_NO_HZ */ +#ifdef CONFIG_TINY_RCU + +static inline void rcu_preempt_note_context_switch(void) +{ +} + static inline void exit_rcu(void) { } +static inline int rcu_needs_cpu(int cpu) +{ + return 0; +} + static inline int rcu_preempt_depth(void) { return 0; } +#else /* #ifdef CONFIG_TINY_RCU */ + +void rcu_preempt_note_context_switch(void); +extern void exit_rcu(void); +int rcu_preempt_needs_cpu(void); + +static inline int rcu_needs_cpu(int cpu) +{ + return rcu_preempt_needs_cpu(); +} + +/* + * Defined as macro as it is a very low level header + * included from areas that don't even know about current + * FIXME: combine with include/linux/rcutree.h into rcupdate.h. + */ +#define rcu_preempt_depth() (current->rcu_read_lock_nesting) + +#endif /* #else #ifdef CONFIG_TINY_RCU */ + +static inline void rcu_note_context_switch(int cpu) +{ + rcu_sched_qs(cpu); + rcu_preempt_note_context_switch(); +} + +extern void rcu_check_callbacks(int cpu, int user); + +/* + * Return the number of grace periods. + */ +static inline long rcu_batches_completed(void) +{ + return 0; +} + +/* + * Return the number of bottom-half grace periods. + */ +static inline long rcu_batches_completed_bh(void) +{ + return 0; +} + +static inline void rcu_force_quiescent_state(void) +{ +} + +static inline void rcu_bh_force_quiescent_state(void) +{ +} + +static inline void rcu_sched_force_quiescent_state(void) +{ +} + #ifdef CONFIG_DEBUG_LOCK_ALLOC extern int rcu_scheduler_active __read_mostly; diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index c0ed1c056f2..c13b85dd22b 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -95,6 +95,8 @@ static inline void synchronize_rcu_bh_expedited(void) synchronize_sched_expedited(); } +extern void rcu_barrier(void); + extern void rcu_check_callbacks(int cpu, int user); extern long rcu_batches_completed(void); diff --git a/include/linux/sched.h b/include/linux/sched.h index 2c756666c11..e18473f0eb7 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1202,11 +1202,13 @@ struct task_struct { unsigned int policy; cpumask_t cpus_allowed; -#ifdef CONFIG_TREE_PREEMPT_RCU +#ifdef CONFIG_PREEMPT_RCU int rcu_read_lock_nesting; char rcu_read_unlock_special; - struct rcu_node *rcu_blocked_node; struct list_head rcu_node_entry; +#endif /* #ifdef CONFIG_PREEMPT_RCU */ +#ifdef CONFIG_TREE_PREEMPT_RCU + struct rcu_node *rcu_blocked_node; #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) @@ -1740,7 +1742,7 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t * #define tsk_used_math(p) ((p)->flags & PF_USED_MATH) #define used_math() tsk_used_math(current) -#ifdef CONFIG_TREE_PREEMPT_RCU +#ifdef CONFIG_PREEMPT_RCU #define RCU_READ_UNLOCK_BLOCKED (1 << 0) /* blocked while in RCU read-side. */ #define RCU_READ_UNLOCK_NEED_QS (1 << 1) /* RCU core needs CPU response. */ @@ -1749,7 +1751,9 @@ static inline void rcu_copy_process(struct task_struct *p) { p->rcu_read_lock_nesting = 0; p->rcu_read_unlock_special = 0; +#ifdef CONFIG_TREE_PREEMPT_RCU p->rcu_blocked_node = NULL; +#endif INIT_LIST_HEAD(&p->rcu_node_entry); } -- cgit v1.2.3-18-g5258 From 9079fd7c2e06a92cf27d05224a1f478581916c5b Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sat, 7 Aug 2010 21:59:54 -0700 Subject: rcu: update obsolete rcu_read_lock() comment. The comment says that blocking is illegal in rcu_read_lock()-style RCU read-side critical sections, which is no longer entirely true given preemptible RCU. This commit provides a fix. Suggested-by: David Miller Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 24b89664938..d7af96ef6fc 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -458,7 +458,20 @@ extern int rcu_my_thread_group_empty(void); * will be deferred until the outermost RCU read-side critical section * completes. * - * It is illegal to block while in an RCU read-side critical section. + * You can avoid reading and understanding the next paragraph by + * following this rule: don't put anything in an rcu_read_lock() RCU + * read-side critical section that would block in a !PREEMPT kernel. + * But if you want the full story, read on! + * + * In non-preemptible RCU implementations (TREE_RCU and TINY_RCU), it + * is illegal to block while in an RCU read-side critical section. In + * preemptible RCU implementations (TREE_PREEMPT_RCU and TINY_PREEMPT_RCU) + * in CONFIG_PREEMPT kernel builds, RCU read-side critical sections may + * be preempted, but explicit blocking is illegal. Finally, in preemptible + * RCU implementations in real-time (CONFIG_PREEMPT_RT) kernel builds, + * RCU read-side critical sections may be preempted and they may also + * block, but only when acquiring spinlocks that are subject to priority + * inheritance. */ static inline void rcu_read_lock(void) { -- cgit v1.2.3-18-g5258 From 53d84e004d5e8c018be395c4330dc72fd60bd13e Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 10 Aug 2010 14:28:53 -0700 Subject: rcu: permit suppressing current grace period's CPU stall warnings When using a kernel debugger, a long sojourn in the debugger can get you lots of RCU CPU stall warnings once you resume. This might not be helpful, especially if you are using the system console. This patch therefore allows RCU CPU stall warnings to be suppressed, but only for the duration of the current set of grace periods. This differs from Jason's original patch in that it adds support for tiny RCU and preemptible RCU, and uses a slightly different method for suppressing the RCU CPU stall warning messages. Signed-off-by: Jason Wessel Signed-off-by: Paul E. McKenney Tested-by: Jason Wessel --- include/linux/rcutiny.h | 4 ++++ include/linux/rcutree.h | 1 + 2 files changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 4cc5eba4161..3fa179784e1 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -178,6 +178,10 @@ static inline void rcu_sched_force_quiescent_state(void) { } +static inline void rcu_cpu_stall_reset(void) +{ +} + #ifdef CONFIG_DEBUG_LOCK_ALLOC extern int rcu_scheduler_active __read_mostly; diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index c13b85dd22b..0726809497b 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -36,6 +36,7 @@ extern void rcu_sched_qs(int cpu); extern void rcu_bh_qs(int cpu); extern void rcu_note_context_switch(int cpu); extern int rcu_needs_cpu(int cpu); +extern void rcu_cpu_stall_reset(void); #ifdef CONFIG_TREE_PREEMPT_RCU -- cgit v1.2.3-18-g5258 From a3dc3fb161f9b4066c0fce22db72638af8baf83b Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 13 Aug 2010 16:16:25 -0700 Subject: rcu: repair code-duplication FIXMEs Combine the duplicate definitions of ULONG_CMP_GE(), ULONG_CMP_LT(), and rcu_preempt_depth() into include/linux/rcupdate.h. Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 15 +++++++++++++++ include/linux/rcutiny.h | 7 ------- include/linux/rcutree.h | 6 ------ 3 files changed, 15 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index d7af96ef6fc..325bad7bbca 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -47,6 +47,9 @@ extern int rcutorture_runnable; /* for sysctl */ #endif /* #ifdef CONFIG_RCU_TORTURE_TEST */ +#define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b)) +#define ULONG_CMP_LT(a, b) (ULONG_MAX / 2 < (a) - (b)) + /** * struct rcu_head - callback structure for use with RCU * @next: next update requests in a list @@ -66,6 +69,18 @@ extern int sched_expedited_torture_stats(char *page); /* Internal to kernel */ extern void rcu_init(void); +#ifdef CONFIG_PREEMPT_RCU + +/* + * Defined as a macro as it is a very low level header included from + * areas that don't even know about current. This gives the rcu_read_lock() + * nesting depth, but makes sense only if CONFIG_PREEMPT_RCU -- in other + * types of kernel builds, the rcu_read_lock() nesting depth is unknowable. + */ +#define rcu_preempt_depth() (current->rcu_read_lock_nesting) + +#endif /* #ifdef CONFIG_PREEMPT_RCU */ + #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) #include #elif defined(CONFIG_TINY_RCU) || defined(CONFIG_TINY_PREEMPT_RCU) diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 3fa179784e1..c6b11dc5ba0 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -133,13 +133,6 @@ static inline int rcu_needs_cpu(int cpu) return rcu_preempt_needs_cpu(); } -/* - * Defined as macro as it is a very low level header - * included from areas that don't even know about current - * FIXME: combine with include/linux/rcutree.h into rcupdate.h. - */ -#define rcu_preempt_depth() (current->rcu_read_lock_nesting) - #endif /* #else #ifdef CONFIG_TINY_RCU */ static inline void rcu_note_context_switch(int cpu) diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 0726809497b..54a20c11f98 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -45,12 +45,6 @@ extern void __rcu_read_unlock(void); extern void synchronize_rcu(void); extern void exit_rcu(void); -/* - * Defined as macro as it is a very low level header - * included from areas that don't even know about current - */ -#define rcu_preempt_depth() (current->rcu_read_lock_nesting) - #else /* #ifdef CONFIG_TREE_PREEMPT_RCU */ static inline void __rcu_read_lock(void) -- cgit v1.2.3-18-g5258 From 73d4da4d360136826b36f78f5cf72b29da82c8a6 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 16 Aug 2010 10:50:54 -0700 Subject: rcu: Upgrade srcu_read_lock() docbook about SRCU grace periods It is illegal to wait for an SRCU grace period while within the corresponding flavor of SRCU read-side critical section. Therefore, this commit updates the srcu_read_lock() docbook accordingly. Signed-off-by: Paul E. McKenney --- include/linux/srcu.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 6f456a720ff..58971e891f4 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -139,7 +139,12 @@ static inline int srcu_read_lock_held(struct srcu_struct *sp) * @sp: srcu_struct in which to register the new reader. * * Enter an SRCU read-side critical section. Note that SRCU read-side - * critical sections may be nested. + * critical sections may be nested. However, it is illegal to + * call anything that waits on an SRCU grace period for the same + * srcu_struct, whether directly or indirectly. Please note that + * one way to indirectly wait on an SRCU grace period is to acquire + * a mutex that is held elsewhere while calling synchronize_srcu() or + * synchronize_srcu_expedited(). */ static inline int srcu_read_lock(struct srcu_struct *sp) __acquires(sp) { -- cgit v1.2.3-18-g5258 From 7b0b759b65247cbc66384a912be9acf8d4800636 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 17 Aug 2010 14:18:46 -0700 Subject: rcu: combine duplicate code, courtesy of CONFIG_PREEMPT_RCU The CONFIG_PREEMPT_RCU kernel configuration parameter was recently re-introduced, but as an indication of the type of RCU (preemptible vs. non-preemptible) instead of as selecting a given implementation. This commit uses CONFIG_PREEMPT_RCU to combine duplicate code from include/linux/rcutiny.h and include/linux/rcutree.h into include/linux/rcupdate.h. This commit also combines a few other pieces of duplicate code that have accumulated. Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 75 ++++++++++++++++++++++++++++++++++++++++++++++-- include/linux/rcutiny.h | 51 -------------------------------- include/linux/rcutree.h | 50 -------------------------------- 3 files changed, 72 insertions(+), 104 deletions(-) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 325bad7bbca..89414d67d96 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -61,16 +61,30 @@ struct rcu_head { }; /* Exported common interfaces */ +extern void call_rcu_sched(struct rcu_head *head, + void (*func)(struct rcu_head *rcu)); +extern void synchronize_sched(void); extern void rcu_barrier_bh(void); extern void rcu_barrier_sched(void); extern void synchronize_sched_expedited(void); extern int sched_expedited_torture_stats(char *page); -/* Internal to kernel */ -extern void rcu_init(void); +static inline void __rcu_read_lock_bh(void) +{ + local_bh_disable(); +} + +static inline void __rcu_read_unlock_bh(void) +{ + local_bh_enable(); +} #ifdef CONFIG_PREEMPT_RCU +extern void __rcu_read_lock(void); +extern void __rcu_read_unlock(void); +void synchronize_rcu(void); + /* * Defined as a macro as it is a very low level header included from * areas that don't even know about current. This gives the rcu_read_lock() @@ -79,7 +93,53 @@ extern void rcu_init(void); */ #define rcu_preempt_depth() (current->rcu_read_lock_nesting) -#endif /* #ifdef CONFIG_PREEMPT_RCU */ +#else /* #ifdef CONFIG_PREEMPT_RCU */ + +static inline void __rcu_read_lock(void) +{ + preempt_disable(); +} + +static inline void __rcu_read_unlock(void) +{ + preempt_enable(); +} + +static inline void synchronize_rcu(void) +{ + synchronize_sched(); +} + +static inline int rcu_preempt_depth(void) +{ + return 0; +} + +#endif /* #else #ifdef CONFIG_PREEMPT_RCU */ + +/* Internal to kernel */ +extern void rcu_init(void); +extern void rcu_sched_qs(int cpu); +extern void rcu_bh_qs(int cpu); +extern void rcu_check_callbacks(int cpu, int user); +struct notifier_block; + +#ifdef CONFIG_NO_HZ + +extern void rcu_enter_nohz(void); +extern void rcu_exit_nohz(void); + +#else /* #ifdef CONFIG_NO_HZ */ + +static inline void rcu_enter_nohz(void) +{ +} + +static inline void rcu_exit_nohz(void) +{ +} + +#endif /* #else #ifdef CONFIG_NO_HZ */ #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) #include @@ -626,6 +686,8 @@ struct rcu_synchronize { extern void wakeme_after_rcu(struct rcu_head *head); +#ifdef CONFIG_PREEMPT_RCU + /** * call_rcu() - Queue an RCU callback for invocation after a grace period. * @head: structure to be used for queueing the RCU updates. @@ -642,6 +704,13 @@ extern void wakeme_after_rcu(struct rcu_head *head); extern void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *head)); +#else /* #ifdef CONFIG_PREEMPT_RCU */ + +/* In classic RCU, call_rcu() is just call_rcu_sched(). */ +#define call_rcu call_rcu_sched + +#endif /* #else #ifdef CONFIG_PREEMPT_RCU */ + /** * call_rcu_bh() - Queue an RCU for invocation after a quicker grace period. * @head: structure to be used for queueing the RCU updates. diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index c6b11dc5ba0..13877cb93a6 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -27,34 +27,10 @@ #include -void rcu_sched_qs(int cpu); -void rcu_bh_qs(int cpu); - -#ifdef CONFIG_TINY_RCU -#define __rcu_read_lock() preempt_disable() -#define __rcu_read_unlock() preempt_enable() -#else /* #ifdef CONFIG_TINY_RCU */ -void __rcu_read_lock(void); -void __rcu_read_unlock(void); -#endif /* #else #ifdef CONFIG_TINY_RCU */ -#define __rcu_read_lock_bh() local_bh_disable() -#define __rcu_read_unlock_bh() local_bh_enable() -extern void call_rcu_sched(struct rcu_head *head, - void (*func)(struct rcu_head *rcu)); - #define rcu_init_sched() do { } while (0) -extern void synchronize_sched(void); - #ifdef CONFIG_TINY_RCU -#define call_rcu call_rcu_sched - -static inline void synchronize_rcu(void) -{ - synchronize_sched(); -} - static inline void synchronize_rcu_expedited(void) { synchronize_sched(); /* Only one CPU, so pretty fast anyway!!! */ @@ -67,7 +43,6 @@ static inline void rcu_barrier(void) #else /* #ifdef CONFIG_TINY_RCU */ -void synchronize_rcu(void); void rcu_barrier(void); void synchronize_rcu_expedited(void); @@ -83,25 +58,6 @@ static inline void synchronize_rcu_bh_expedited(void) synchronize_sched(); } -struct notifier_block; - -#ifdef CONFIG_NO_HZ - -extern void rcu_enter_nohz(void); -extern void rcu_exit_nohz(void); - -#else /* #ifdef CONFIG_NO_HZ */ - -static inline void rcu_enter_nohz(void) -{ -} - -static inline void rcu_exit_nohz(void) -{ -} - -#endif /* #else #ifdef CONFIG_NO_HZ */ - #ifdef CONFIG_TINY_RCU static inline void rcu_preempt_note_context_switch(void) @@ -117,11 +73,6 @@ static inline int rcu_needs_cpu(int cpu) return 0; } -static inline int rcu_preempt_depth(void) -{ - return 0; -} - #else /* #ifdef CONFIG_TINY_RCU */ void rcu_preempt_note_context_switch(void); @@ -141,8 +92,6 @@ static inline void rcu_note_context_switch(int cpu) rcu_preempt_note_context_switch(); } -extern void rcu_check_callbacks(int cpu, int user); - /* * Return the number of grace periods. */ diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 54a20c11f98..95518e62879 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -30,59 +30,23 @@ #ifndef __LINUX_RCUTREE_H #define __LINUX_RCUTREE_H -struct notifier_block; - -extern void rcu_sched_qs(int cpu); -extern void rcu_bh_qs(int cpu); extern void rcu_note_context_switch(int cpu); extern int rcu_needs_cpu(int cpu); extern void rcu_cpu_stall_reset(void); #ifdef CONFIG_TREE_PREEMPT_RCU -extern void __rcu_read_lock(void); -extern void __rcu_read_unlock(void); -extern void synchronize_rcu(void); extern void exit_rcu(void); #else /* #ifdef CONFIG_TREE_PREEMPT_RCU */ -static inline void __rcu_read_lock(void) -{ - preempt_disable(); -} - -static inline void __rcu_read_unlock(void) -{ - preempt_enable(); -} - -#define synchronize_rcu synchronize_sched - static inline void exit_rcu(void) { } -static inline int rcu_preempt_depth(void) -{ - return 0; -} - #endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */ -static inline void __rcu_read_lock_bh(void) -{ - local_bh_disable(); -} -static inline void __rcu_read_unlock_bh(void) -{ - local_bh_enable(); -} - -extern void call_rcu_sched(struct rcu_head *head, - void (*func)(struct rcu_head *rcu)); extern void synchronize_rcu_bh(void); -extern void synchronize_sched(void); extern void synchronize_rcu_expedited(void); static inline void synchronize_rcu_bh_expedited(void) @@ -92,8 +56,6 @@ static inline void synchronize_rcu_bh_expedited(void) extern void rcu_barrier(void); -extern void rcu_check_callbacks(int cpu, int user); - extern long rcu_batches_completed(void); extern long rcu_batches_completed_bh(void); extern long rcu_batches_completed_sched(void); @@ -101,18 +63,6 @@ extern void rcu_force_quiescent_state(void); extern void rcu_bh_force_quiescent_state(void); extern void rcu_sched_force_quiescent_state(void); -#ifdef CONFIG_NO_HZ -void rcu_enter_nohz(void); -void rcu_exit_nohz(void); -#else /* CONFIG_NO_HZ */ -static inline void rcu_enter_nohz(void) -{ -} -static inline void rcu_exit_nohz(void) -{ -} -#endif /* CONFIG_NO_HZ */ - /* A context switch is a grace period for RCU-sched and RCU-bh. */ static inline int rcu_blocking_is_gp(void) { -- cgit v1.2.3-18-g5258 From 65e6bf484c497f02d47a0faae69ee398cd59cfda Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 19 Aug 2010 21:43:09 -0700 Subject: rcu: add comment stating that list_empty() applies to RCU-protected lists Because list_empty() does not dereference any RCU-protected pointers, and further does not pass such pointers to the caller (so that the caller does not dereference them either), it is safe to use list_empty() on RCU-protected lists. There is no need for a list_empty_rcu(). This commit adds a comment stating this explicitly. Requested-by: Andrew Morton Signed-off-by: Paul E. McKenney --- include/linux/rculist.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/rculist.h b/include/linux/rculist.h index c10b1050dbe..f31ef61f1c6 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -9,6 +9,15 @@ #include #include +/* + * Why is there no list_empty_rcu()? Because list_empty() serves this + * purpose. The list_empty() function fetches the RCU-protected pointer + * and compares it to the address of the list head, but neither dereferences + * this pointer itself nor provides this pointer to the caller. Therefore, + * it is not necessary to use rcu_dereference(), so that list_empty() can + * be used anywhere you would want to use a list_empty_rcu(). + */ + /* * return the ->next pointer of a list_head in an rcu safe * way, we must not access it directly -- cgit v1.2.3-18-g5258 From 00959ade36acadc00e757f87060bf6e4501d545f Mon Sep 17 00:00:00 2001 From: Dmitry Kozlov Date: Sat, 21 Aug 2010 23:05:39 -0700 Subject: PPTP: PPP over IPv4 (Point-to-Point Tunneling Protocol) PPP: introduce "pptp" module which implements point-to-point tunneling protocol using pppox framework NET: introduce the "gre" module for demultiplexing GRE packets on version criteria (required to pptp and ip_gre may coexists) NET: ip_gre: update to use the "gre" module This patch introduces then pptp support to the linux kernel which dramatically speeds up pptp vpn connections and decreases cpu usage in comparison of existing user-space implementation (poptop/pptpclient). There is accel-pptp project (https://sourceforge.net/projects/accel-pptp/) to utilize this module, it contains plugin for pppd to use pptp in client-mode and modified pptpd (poptop) to build high-performance pptp NAS. There was many changes from initial submitted patch, most important are: 1. using rcu instead of read-write locks 2. using static bitmap instead of dynamically allocated 3. using vmalloc for memory allocation instead of BITS_PER_LONG + __get_free_pages 4. fixed many coding style issues Thanks to Eric Dumazet. Signed-off-by: Dmitry Kozlov Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/if_pppox.h | 52 ++++++++++++++++++++++++++++++++---------------- include/net/gre.h | 18 +++++++++++++++++ 2 files changed, 53 insertions(+), 17 deletions(-) create mode 100644 include/net/gre.h (limited to 'include') diff --git a/include/linux/if_pppox.h b/include/linux/if_pppox.h index 1925e0c3f16..1525b2156b2 100644 --- a/include/linux/if_pppox.h +++ b/include/linux/if_pppox.h @@ -40,25 +40,35 @@ * PPPoE addressing definition */ typedef __be16 sid_t; -struct pppoe_addr{ - sid_t sid; /* Session identifier */ - unsigned char remote[ETH_ALEN]; /* Remote address */ - char dev[IFNAMSIZ]; /* Local device to use */ +struct pppoe_addr { + sid_t sid; /* Session identifier */ + unsigned char remote[ETH_ALEN]; /* Remote address */ + char dev[IFNAMSIZ]; /* Local device to use */ }; /************************************************************************ - * Protocols supported by AF_PPPOX - */ + * PPTP addressing definition + */ +struct pptp_addr { + u16 call_id; + struct in_addr sin_addr; +}; + +/************************************************************************ + * Protocols supported by AF_PPPOX + */ #define PX_PROTO_OE 0 /* Currently just PPPoE */ #define PX_PROTO_OL2TP 1 /* Now L2TP also */ -#define PX_MAX_PROTO 2 - -struct sockaddr_pppox { - sa_family_t sa_family; /* address family, AF_PPPOX */ - unsigned int sa_protocol; /* protocol identifier */ - union{ - struct pppoe_addr pppoe; - }sa_addr; +#define PX_PROTO_PPTP 2 +#define PX_MAX_PROTO 3 + +struct sockaddr_pppox { + sa_family_t sa_family; /* address family, AF_PPPOX */ + unsigned int sa_protocol; /* protocol identifier */ + union { + struct pppoe_addr pppoe; + struct pptp_addr pptp; + } sa_addr; } __packed; /* The use of the above union isn't viable because the size of this @@ -101,7 +111,7 @@ struct pppoe_tag { __be16 tag_type; __be16 tag_len; char tag_data[0]; -} __attribute ((packed)); +} __packed; /* Tag identifiers */ #define PTT_EOL __cpu_to_be16(0x0000) @@ -150,15 +160,23 @@ struct pppoe_opt { relayed to (PPPoE relaying) */ }; +struct pptp_opt { + struct pptp_addr src_addr; + struct pptp_addr dst_addr; + u32 ack_sent, ack_recv; + u32 seq_sent, seq_recv; + int ppp_flags; +}; #include struct pppox_sock { /* struct sock must be the first member of pppox_sock */ - struct sock sk; - struct ppp_channel chan; + struct sock sk; + struct ppp_channel chan; struct pppox_sock *next; /* for hash table */ union { struct pppoe_opt pppoe; + struct pptp_opt pptp; } proto; __be16 num; }; diff --git a/include/net/gre.h b/include/net/gre.h new file mode 100644 index 00000000000..82665474bcb --- /dev/null +++ b/include/net/gre.h @@ -0,0 +1,18 @@ +#ifndef __LINUX_GRE_H +#define __LINUX_GRE_H + +#include + +#define GREPROTO_CISCO 0 +#define GREPROTO_PPTP 1 +#define GREPROTO_MAX 2 + +struct gre_protocol { + int (*handler)(struct sk_buff *skb); + void (*err_handler)(struct sk_buff *skb, u32 info); +}; + +int gre_add_protocol(const struct gre_protocol *proto, u8 version); +int gre_del_protocol(const struct gre_protocol *proto, u8 version); + +#endif -- cgit v1.2.3-18-g5258 From 81ce790bd75d49a0d119f5d7b27405e1d9b1bd57 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 19 Aug 2010 23:51:33 +0000 Subject: irda: use net_device_stats from struct net_device struct net_device has its own struct net_device_stats member, so use this one instead of a private copy in the irlan_cb struct. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/irda/irlan_common.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/net/irda/irlan_common.h b/include/net/irda/irlan_common.h index 73cacb3ac16..0af8b8dfbc2 100644 --- a/include/net/irda/irlan_common.h +++ b/include/net/irda/irlan_common.h @@ -171,7 +171,6 @@ struct irlan_cb { int magic; struct list_head dev_list; struct net_device *dev; /* Ethernet device structure*/ - struct net_device_stats stats; __u32 saddr; /* Source device address */ __u32 daddr; /* Destination device address */ -- cgit v1.2.3-18-g5258 From 739a91ef0625e0e4a40b835f4f891313c47915df Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Sat, 21 Aug 2010 06:23:15 +0000 Subject: net_sched: cls_flow: add key rxhash We can use rxhash to classify the traffic into flows. As rxhash maybe supplied by NIC or RPS, it is cheaper. Signed-off-by: Changli Gao Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/linux/pkt_cls.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/pkt_cls.h b/include/linux/pkt_cls.h index 7f6ba8658ab..defbde203d0 100644 --- a/include/linux/pkt_cls.h +++ b/include/linux/pkt_cls.h @@ -332,6 +332,7 @@ enum { FLOW_KEY_SKUID, FLOW_KEY_SKGID, FLOW_KEY_VLAN_TAG, + FLOW_KEY_RXHASH, __FLOW_KEY_MAX, }; -- cgit v1.2.3-18-g5258 From d8287fc864643beaf1623c92aceb1ab38eae0648 Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Sun, 22 Aug 2010 18:37:27 -0700 Subject: net: use __be16 instead of u16 for the userspace code Signed-off-by: Changli Gao Signed-off-by: David S. Miller --- include/linux/if_pppox.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/if_pppox.h b/include/linux/if_pppox.h index 1525b2156b2..770e8fa669d 100644 --- a/include/linux/if_pppox.h +++ b/include/linux/if_pppox.h @@ -50,8 +50,8 @@ struct pppoe_addr { * PPTP addressing definition */ struct pptp_addr { - u16 call_id; - struct in_addr sin_addr; + __be16 call_id; + struct in_addr sin_addr; }; /************************************************************************ -- cgit v1.2.3-18-g5258 From 05532121da0728eaedac2a0a5c3cecad3a95d765 Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Sun, 22 Aug 2010 21:03:33 -0700 Subject: net: 802.1q: make vlan_hwaccel_do_receive() return void vlan_hwaccel_do_receive() always returns 0, so make it return void. Signed-off-by: Changli Gao Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 3d870fda8c4..a52320751bf 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -119,7 +119,7 @@ extern u16 vlan_dev_vlan_id(const struct net_device *dev); extern int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp, u16 vlan_tci, int polling); -extern int vlan_hwaccel_do_receive(struct sk_buff *skb); +extern void vlan_hwaccel_do_receive(struct sk_buff *skb); extern gro_result_t vlan_gro_receive(struct napi_struct *napi, struct vlan_group *grp, unsigned int vlan_tci, struct sk_buff *skb); @@ -147,9 +147,8 @@ static inline int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp, return NET_XMIT_SUCCESS; } -static inline int vlan_hwaccel_do_receive(struct sk_buff *skb) +static inline void vlan_hwaccel_do_receive(struct sk_buff *skb) { - return 0; } static inline gro_result_t -- cgit v1.2.3-18-g5258 From fcb12fd2236f49aa8fdc1568ed4ebdfe4fddc6b5 Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Sun, 22 Aug 2010 16:41:59 +0000 Subject: net: rds: remove duplication type definitions __be* are defined in linux/types.h now, and in fact, rds.h isn't exported to user space even. Signed-off-by: Changli Gao Signed-off-by: David S. Miller --- include/linux/rds.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include') diff --git a/include/linux/rds.h b/include/linux/rds.h index 24bce3ded9e..7f3971d9fc5 100644 --- a/include/linux/rds.h +++ b/include/linux/rds.h @@ -36,15 +36,6 @@ #include -/* These sparse annotated types shouldn't be in any user - * visible header file. We should clean this up rather - * than kludging around them. */ -#ifndef __KERNEL__ -#define __be16 u_int16_t -#define __be32 u_int32_t -#define __be64 u_int64_t -#endif - #define RDS_IB_ABI_VERSION 0x301 /* -- cgit v1.2.3-18-g5258 From 21dc330157454046dd7c494961277d76e1c957fe Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 23 Aug 2010 00:13:46 -0700 Subject: net: Rename skb_has_frags to skb_has_frag_list SKBs can be "fragmented" in two ways, via a page array (called skb_shinfo(skb)->frags[]) and via a list of SKBs (called skb_shinfo(skb)->frag_list). Since skb_has_frags() tests the latter, it's name is confusing since it sounds more like it's testing the former. Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- include/linux/skbuff.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 46c36ffe20e..ce2de8b6408 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2191,7 +2191,7 @@ static inline int net_gso_ok(int features, int gso_type) static inline int skb_gso_ok(struct sk_buff *skb, int features) { return net_gso_ok(features, skb_shinfo(skb)->gso_type) && - (!skb_has_frags(skb) || (features & NETIF_F_FRAGLIST)); + (!skb_has_frag_list(skb) || (features & NETIF_F_FRAGLIST)); } static inline int netif_needs_gso(struct net_device *dev, struct sk_buff *skb) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index f067c95cf18..f900ffcd847 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1120,7 +1120,7 @@ extern void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off, int size); #define SKB_PAGE_ASSERT(skb) BUG_ON(skb_shinfo(skb)->nr_frags) -#define SKB_FRAG_ASSERT(skb) BUG_ON(skb_has_frags(skb)) +#define SKB_FRAG_ASSERT(skb) BUG_ON(skb_has_frag_list(skb)) #define SKB_LINEAR_ASSERT(skb) BUG_ON(skb_is_nonlinear(skb)) #ifdef NET_SKBUFF_DATA_USES_OFFSET @@ -1784,7 +1784,7 @@ static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len) skb = skb->prev) -static inline bool skb_has_frags(const struct sk_buff *skb) +static inline bool skb_has_frag_list(const struct sk_buff *skb) { return skb_shinfo(skb)->frag_list != NULL; } -- cgit v1.2.3-18-g5258 From 61c77326d1df079f202fa79403c3ccd8c5966a81 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Mon, 16 Aug 2010 09:16:55 +0800 Subject: x86, mm: Avoid unnecessary TLB flush In x86, access and dirty bits are set automatically by CPU when CPU accesses memory. When we go into the code path of below flush_tlb_fix_spurious_fault(), we already set dirty bit for pte and don't need flush tlb. This might mean tlb entry in some CPUs hasn't dirty bit set, but this doesn't matter. When the CPUs do page write, they will automatically check the bit and no software involved. On the other hand, flush tlb in below position is harmful. Test creates CPU number of threads, each thread writes to a same but random address in same vma range and we measure the total time. Under a 4 socket system, original time is 1.96s, while with the patch, the time is 0.8s. Under a 2 socket system, there is 20% time cut too. perf shows a lot of time are taking to send ipi/handle ipi for tlb flush. Signed-off-by: Shaohua Li LKML-Reference: <20100816011655.GA362@sli10-desk.sh.intel.com> Acked-by: Suresh Siddha Cc: Andrea Archangeli Signed-off-by: H. Peter Anvin --- include/asm-generic/pgtable.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index e2bd73e8f9c..f4d4120e512 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -129,6 +129,10 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addres #define move_pte(pte, prot, old_addr, new_addr) (pte) #endif +#ifndef flush_tlb_fix_spurious_fault +#define flush_tlb_fix_spurious_fault(vma, address) flush_tlb_page(vma, address) +#endif + #ifndef pgprot_noncached #define pgprot_noncached(prot) (prot) #endif -- cgit v1.2.3-18-g5258 From 8b230ed8ec96c933047dd0625cf95f739e4939a6 Mon Sep 17 00:00:00 2001 From: Rasesh Mody Date: Mon, 23 Aug 2010 20:24:12 -0700 Subject: bna: Brocade 10Gb Ethernet device driver This is patch 1/6 which contains linux driver source for Brocade's BR1010/BR1020 10Gb CEE capable ethernet adapter. Signed-off-by: Debashis Dutt Signed-off-by: Rasesh Mody Signed-off-by: David S. Miller --- include/linux/pci_ids.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index f6a3b2d36ca..1f730de0df0 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2189,6 +2189,9 @@ #define PCI_VENDOR_ID_ARIMA 0x161f #define PCI_VENDOR_ID_BROCADE 0x1657 +#define PCI_DEVICE_ID_BROCADE_CT 0x0014 +#define PCI_DEVICE_ID_BROCADE_FC_8G1P 0x0017 +#define PCI_DEVICE_ID_BROCADE_CT_FC 0x0021 #define PCI_VENDOR_ID_SIBYTE 0x166d #define PCI_DEVICE_ID_BCM1250_PCI 0x0001 -- cgit v1.2.3-18-g5258 From 1726442e115a9e58f40747d009a5b4f303e0840a Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Mon, 23 Aug 2010 16:26:41 +0000 Subject: net: increase the size of priv_flags and add IFF_OVS_DATAPATH IFF_OVS_DATAPATH is a place-holder for the Open vSwitch datapath which I am preparing to submit for merging. As all 16 bits of priv_flags are already assigned flags, also increase the size of priv_flags to 32 bits. Unfortunately, by my calculations this increases the size of struct net_device by 4 bytes on 32bit architectures and 8 bytes on 64 bit architectures. I couldn't see an obvious way to avoid that. Cc: Jesse Gross Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- include/linux/if.h | 2 ++ include/linux/netdevice.h | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/if.h b/include/linux/if.h index 53558ec59e1..6ed43c1f07a 100644 --- a/include/linux/if.h +++ b/include/linux/if.h @@ -75,6 +75,8 @@ #define IFF_DISABLE_NETPOLL 0x2000 /* disable netpoll at run-time */ #define IFF_MACVLAN_PORT 0x4000 /* device used as macvlan port */ #define IFF_BRIDGE_PORT 0x8000 /* device used as bridge port */ +#define IFF_OVS_DATAPATH 0x10000 /* device used as Open vSwitch + * dapath port */ #define IF_GET_IFACE 0x0001 /* for querying only */ #define IF_GET_PROTO 0x0002 diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ce2de8b6408..59962dbc275 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -901,7 +901,7 @@ struct net_device { unsigned int flags; /* interface flags (a la BSD) */ unsigned short gflags; - unsigned short priv_flags; /* Like 'flags' but invisible to userspace. */ + unsigned int priv_flags; /* Like 'flags' but invisible to userspace. */ unsigned short padded; /* How much padding added by alloc_netdev() */ unsigned char operstate; /* RFC2863 operstate */ -- cgit v1.2.3-18-g5258 From 633adf1ad1c92c02bd3f10bbd73737a969179378 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 12 Aug 2010 14:49:58 +0200 Subject: cfg80211: mark ieee80211_hdrlen const This function analyses only its single, value-passed argument, and has no side effects. Thus it can be const, which makes mac80211 smaller, for example: text data bss dec hex filename 362518 16720 884 380122 5ccda mac80211.ko (before) 362358 16720 884 379962 5cc3a mac80211.ko (after) a 160 byte saving in text size, and an optimisation because the function won't be called as often. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/cfg80211.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 2fd06c60ffb..2b403c7ee32 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1715,7 +1715,7 @@ unsigned int ieee80211_get_hdrlen_from_skb(const struct sk_buff *skb); * ieee80211_hdrlen - get header length in bytes from frame control * @fc: frame control field in little-endian format */ -unsigned int ieee80211_hdrlen(__le16 fc); +unsigned int __attribute_const__ ieee80211_hdrlen(__le16 fc); /** * ieee80211_data_to_8023 - convert an 802.11 data frame to 802.3 -- cgit v1.2.3-18-g5258 From 2e161f78e5f63a7f9fd25a766bb7f816a01eb14a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 12 Aug 2010 15:38:38 +0200 Subject: cfg80211/mac80211: extensible frame processing Allow userspace to register for more than just action frames by giving the frame subtype, and make it possible to use this in various modes as well. With some tweaks and some added functionality this will, in the future, also be usable in AP mode and be able to replace the cooked monitor interface currently used in that case. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/nl80211.h | 93 +++++++++++++++++++++++++++++++++++++++---------- include/net/cfg80211.h | 56 +++++++++++++++++------------ 2 files changed, 108 insertions(+), 41 deletions(-) (limited to 'include') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 2c870168733..8af1e66c3cf 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -39,6 +39,43 @@ * TODO: need more info? */ +/** + * DOC: Frame transmission/registration support + * + * Frame transmission and registration support exists to allow userspace + * management entities such as wpa_supplicant react to management frames + * that are not being handled by the kernel. This includes, for example, + * certain classes of action frames that cannot be handled in the kernel + * for various reasons. + * + * Frame registration is done on a per-interface basis and registrations + * cannot be removed other than by closing the socket. It is possible to + * specify a registration filter to register, for example, only for a + * certain type of action frame. In particular with action frames, those + * that userspace registers for will not be returned as unhandled by the + * driver, so that the registered application has to take responsibility + * for doing that. + * + * The type of frame that can be registered for is also dependent on the + * driver and interface type. The frame types are advertised in wiphy + * attributes so applications know what to expect. + * + * NOTE: When an interface changes type while registrations are active, + * these registrations are ignored until the interface type is + * changed again. This means that changing the interface type can + * lead to a situation that couldn't otherwise be produced, but + * any such registrations will be dormant in the sense that they + * will not be serviced, i.e. they will not receive any frames. + * + * Frame transmission allows userspace to send for example the required + * responses to action frames. It is subject to some sanity checking, + * but many frames can be transmitted. When a frame was transmitted, its + * status is indicated to the sending socket. + * + * For more technical details, see the corresponding command descriptions + * below. + */ + /** * enum nl80211_commands - supported nl80211 commands * @@ -301,16 +338,18 @@ * rate selection. %NL80211_ATTR_IFINDEX is used to specify the interface * and @NL80211_ATTR_TX_RATES the set of allowed rates. * - * @NL80211_CMD_REGISTER_ACTION: Register for receiving certain action frames - * (via @NL80211_CMD_ACTION) for processing in userspace. This command - * requires an interface index and a match attribute containing the first - * few bytes of the frame that should match, e.g. a single byte for only - * a category match or four bytes for vendor frames including the OUI. - * The registration cannot be dropped, but is removed automatically - * when the netlink socket is closed. Multiple registrations can be made. - * @NL80211_CMD_ACTION: Action frame TX request and RX notification. This - * command is used both as a request to transmit an Action frame and as an - * event indicating reception of an Action frame that was not processed in + * @NL80211_CMD_REGISTER_FRAME: Register for receiving certain mgmt frames + * (via @NL80211_CMD_FRAME) for processing in userspace. This command + * requires an interface index, a frame type attribute (optional for + * backward compatibility reasons, if not given assumes action frames) + * and a match attribute containing the first few bytes of the frame + * that should match, e.g. a single byte for only a category match or + * four bytes for vendor frames including the OUI. The registration + * cannot be dropped, but is removed automatically when the netlink + * socket is closed. Multiple registrations can be made. + * @NL80211_CMD_FRAME: Management frame TX request and RX notification. This + * command is used both as a request to transmit a management frame and + * as an event indicating reception of a frame that was not processed in * kernel code, but is for us (i.e., which may need to be processed in a * user space application). %NL80211_ATTR_FRAME is used to specify the * frame contents (including header). %NL80211_ATTR_WIPHY_FREQ (and @@ -320,8 +359,8 @@ * operational channel). When called, this operation returns a cookie * (%NL80211_ATTR_COOKIE) that will be included with the TX status event * pertaining to the TX request. - * @NL80211_CMD_ACTION_TX_STATUS: Report TX status of an Action frame - * transmitted with %NL80211_CMD_ACTION. %NL80211_ATTR_COOKIE identifies + * @NL80211_CMD_FRAME_TX_STATUS: Report TX status of a management frame + * transmitted with %NL80211_CMD_FRAME. %NL80211_ATTR_COOKIE identifies * the TX command and %NL80211_ATTR_FRAME includes the contents of the * frame. %NL80211_ATTR_ACK flag is included if the recipient acknowledged * the frame. @@ -429,9 +468,12 @@ enum nl80211_commands { NL80211_CMD_SET_TX_BITRATE_MASK, - NL80211_CMD_REGISTER_ACTION, - NL80211_CMD_ACTION, - NL80211_CMD_ACTION_TX_STATUS, + NL80211_CMD_REGISTER_FRAME, + NL80211_CMD_REGISTER_ACTION = NL80211_CMD_REGISTER_FRAME, + NL80211_CMD_FRAME, + NL80211_CMD_ACTION = NL80211_CMD_FRAME, + NL80211_CMD_FRAME_TX_STATUS, + NL80211_CMD_ACTION_TX_STATUS = NL80211_CMD_FRAME_TX_STATUS, NL80211_CMD_SET_POWER_SAVE, NL80211_CMD_GET_POWER_SAVE, @@ -708,7 +750,16 @@ enum nl80211_commands { * is used with %NL80211_CMD_SET_TX_BITRATE_MASK. * * @NL80211_ATTR_FRAME_MATCH: A binary attribute which typically must contain - * at least one byte, currently used with @NL80211_CMD_REGISTER_ACTION. + * at least one byte, currently used with @NL80211_CMD_REGISTER_FRAME. + * @NL80211_ATTR_FRAME_TYPE: A u16 indicating the frame type/subtype for the + * @NL80211_CMD_REGISTER_FRAME command. + * @NL80211_ATTR_TX_FRAME_TYPES: wiphy capability attribute, which is a + * nested attribute of %NL80211_ATTR_FRAME_TYPE attributes, containing + * information about which frame types can be transmitted with + * %NL80211_CMD_FRAME. + * @NL80211_ATTR_RX_FRAME_TYPES: wiphy capability attribute, which is a + * nested attribute of %NL80211_ATTR_FRAME_TYPE attributes, containing + * information about which frame types can be registered for RX. * * @NL80211_ATTR_ACK: Flag attribute indicating that the frame was * acknowledged by the recipient. @@ -891,6 +942,10 @@ enum nl80211_attrs { NL80211_ATTR_WIPHY_TX_POWER_SETTING, NL80211_ATTR_WIPHY_TX_POWER_LEVEL, + NL80211_ATTR_TX_FRAME_TYPES, + NL80211_ATTR_RX_FRAME_TYPES, + NL80211_ATTR_FRAME_TYPE, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -947,7 +1002,7 @@ enum nl80211_attrs { * @NL80211_IFTYPE_MONITOR: monitor interface receiving all frames * @NL80211_IFTYPE_MESH_POINT: mesh point * @NL80211_IFTYPE_MAX: highest interface type number currently defined - * @__NL80211_IFTYPE_AFTER_LAST: internal use + * @NUM_NL80211_IFTYPES: number of defined interface types * * These values are used with the %NL80211_ATTR_IFTYPE * to set the type of an interface. @@ -964,8 +1019,8 @@ enum nl80211_iftype { NL80211_IFTYPE_MESH_POINT, /* keep last */ - __NL80211_IFTYPE_AFTER_LAST, - NL80211_IFTYPE_MAX = __NL80211_IFTYPE_AFTER_LAST - 1 + NUM_NL80211_IFTYPES, + NL80211_IFTYPE_MAX = NUM_NL80211_IFTYPES - 1 }; /** diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 2b403c7ee32..6a98b1b3bfd 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1020,7 +1020,7 @@ struct cfg80211_pmksa { * @cancel_remain_on_channel: Cancel an on-going remain-on-channel operation. * This allows the operation to be terminated prior to timeout based on * the duration value. - * @action: Transmit an action frame + * @mgmt_tx: Transmit a management frame * * @testmode_cmd: run a test mode command * @@ -1172,7 +1172,7 @@ struct cfg80211_ops { struct net_device *dev, u64 cookie); - int (*action)(struct wiphy *wiphy, struct net_device *dev, + int (*mgmt_tx)(struct wiphy *wiphy, struct net_device *dev, struct ieee80211_channel *chan, enum nl80211_channel_type channel_type, bool channel_type_valid, @@ -1236,6 +1236,10 @@ struct mac_address { u8 addr[ETH_ALEN]; }; +struct ieee80211_txrx_stypes { + u16 tx, rx; +}; + /** * struct wiphy - wireless hardware description * @reg_notifier: the driver's regulatory notification callback @@ -1286,6 +1290,10 @@ struct mac_address { * @privid: a pointer that drivers can use to identify if an arbitrary * wiphy is theirs, e.g. in global notifiers * @bands: information about bands/channels supported by this device + * + * @mgmt_stypes: bitmasks of frame subtypes that can be subscribed to or + * transmitted through nl80211, points to an array indexed by interface + * type */ struct wiphy { /* assign these fields before you register the wiphy */ @@ -1294,9 +1302,12 @@ struct wiphy { u8 perm_addr[ETH_ALEN]; u8 addr_mask[ETH_ALEN]; - u16 n_addresses; struct mac_address *addresses; + const struct ieee80211_txrx_stypes *mgmt_stypes; + + u16 n_addresses; + /* Supported interface modes, OR together BIT(NL80211_IFTYPE_...) */ u16 interface_modes; @@ -1492,8 +1503,8 @@ struct cfg80211_cached_keys; * set by driver (if supported) on add_interface BEFORE registering the * netdev and may otherwise be used by driver read-only, will be update * by cfg80211 on change_interface - * @action_registrations: list of registrations for action frames - * @action_registrations_lock: lock for the list + * @mgmt_registrations: list of registrations for management frames + * @mgmt_registrations_lock: lock for the list * @mtx: mutex used to lock data in this struct * @cleanup_work: work struct used for cleanup that can't be done directly */ @@ -1505,8 +1516,8 @@ struct wireless_dev { struct list_head list; struct net_device *netdev; - struct list_head action_registrations; - spinlock_t action_registrations_lock; + struct list_head mgmt_registrations; + spinlock_t mgmt_registrations_lock; struct mutex mtx; @@ -2373,38 +2384,39 @@ void cfg80211_new_sta(struct net_device *dev, const u8 *mac_addr, struct station_info *sinfo, gfp_t gfp); /** - * cfg80211_rx_action - notification of received, unprocessed Action frame + * cfg80211_rx_mgmt - notification of received, unprocessed management frame * @dev: network device * @freq: Frequency on which the frame was received in MHz - * @buf: Action frame (header + body) + * @buf: Management frame (header + body) * @len: length of the frame data * @gfp: context flags - * Returns %true if a user space application is responsible for rejecting the - * unrecognized Action frame; %false if no such application is registered - * (i.e., the driver is responsible for rejecting the unrecognized Action - * frame) + * + * Returns %true if a user space application has registered for this frame. + * For action frames, that makes it responsible for rejecting unrecognized + * action frames; %false otherwise, in which case for action frames the + * driver is responsible for rejecting the frame. * * This function is called whenever an Action frame is received for a station * mode interface, but is not processed in kernel. */ -bool cfg80211_rx_action(struct net_device *dev, int freq, const u8 *buf, - size_t len, gfp_t gfp); +bool cfg80211_rx_mgmt(struct net_device *dev, int freq, const u8 *buf, + size_t len, gfp_t gfp); /** - * cfg80211_action_tx_status - notification of TX status for Action frame + * cfg80211_mgmt_tx_status - notification of TX status for management frame * @dev: network device - * @cookie: Cookie returned by cfg80211_ops::action() - * @buf: Action frame (header + body) + * @cookie: Cookie returned by cfg80211_ops::mgmt_tx() + * @buf: Management frame (header + body) * @len: length of the frame data * @ack: Whether frame was acknowledged * @gfp: context flags * - * This function is called whenever an Action frame was requested to be - * transmitted with cfg80211_ops::action() to report the TX status of the + * This function is called whenever a management frame was requested to be + * transmitted with cfg80211_ops::mgmt_tx() to report the TX status of the * transmission attempt. */ -void cfg80211_action_tx_status(struct net_device *dev, u64 cookie, - const u8 *buf, size_t len, bool ack, gfp_t gfp); +void cfg80211_mgmt_tx_status(struct net_device *dev, u64 cookie, + const u8 *buf, size_t len, bool ack, gfp_t gfp); /** -- cgit v1.2.3-18-g5258 From d2730b2a6a019d14455556019d744ab051e6554b Mon Sep 17 00:00:00 2001 From: Gábor Stefanik Date: Mon, 16 Aug 2010 22:39:16 +0200 Subject: b43: N-PHY: Implement MAC PHY clock set MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Gábor Stefanik Signed-off-by: John W. Linville --- include/linux/ssb/ssb_regs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/ssb/ssb_regs.h b/include/linux/ssb/ssb_regs.h index a6d5225b927..11daf9c140e 100644 --- a/include/linux/ssb/ssb_regs.h +++ b/include/linux/ssb/ssb_regs.h @@ -97,6 +97,7 @@ #define SSB_TMSLOW_RESET 0x00000001 /* Reset */ #define SSB_TMSLOW_REJECT_22 0x00000002 /* Reject (Backplane rev 2.2) */ #define SSB_TMSLOW_REJECT_23 0x00000004 /* Reject (Backplane rev 2.3) */ +#define SSB_TMSLOW_PHYCLK 0x00000010 /* MAC PHY Clock Control Enable */ #define SSB_TMSLOW_CLOCK 0x00010000 /* Clock Enable */ #define SSB_TMSLOW_FGC 0x00020000 /* Force Gated Clocks On */ #define SSB_TMSLOW_PE 0x40000000 /* Power Management Enable */ -- cgit v1.2.3-18-g5258 From 633dd1ea683d907af944bcd9814092efe9869b05 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 18 Aug 2010 15:01:23 +0200 Subject: mac80211: fix docbook Fix a small problem in the documentation for ieee80211_request_smps, and a now erroneous inclusion of enum ieee80211_key_alg, which no longer exists after the change to ciphers. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 914c747ac3a..2a181136607 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2537,7 +2537,7 @@ void ieee80211_chswitch_done(struct ieee80211_vif *vif, bool success); /** * ieee80211_request_smps - request SM PS transition * @vif: &struct ieee80211_vif pointer from the add_interface callback. - * @mode: new SM PS mode + * @smps_mode: new SM PS mode * * This allows the driver to request an SM PS transition in managed * mode. This is useful when the driver has more information than -- cgit v1.2.3-18-g5258 From 2a5fb7b088f8418958775774dda9427d6c73c522 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 18 Aug 2010 17:44:36 +0200 Subject: nl80211: some documentation fixes The nl80211 documentation is currently never generated, so problems have accumulated. Fix most of the trivial ones. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/nl80211.h | 57 +++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 46 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 8af1e66c3cf..ec1690da784 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -347,6 +347,8 @@ * four bytes for vendor frames including the OUI. The registration * cannot be dropped, but is removed automatically when the netlink * socket is closed. Multiple registrations can be made. + * @NL80211_CMD_REGISTER_ACTION: Alias for @NL80211_CMD_REGISTER_FRAME for + * backward compatibility * @NL80211_CMD_FRAME: Management frame TX request and RX notification. This * command is used both as a request to transmit a management frame and * as an event indicating reception of a frame that was not processed in @@ -359,11 +361,14 @@ * operational channel). When called, this operation returns a cookie * (%NL80211_ATTR_COOKIE) that will be included with the TX status event * pertaining to the TX request. + * @NL80211_CMD_ACTION: Alias for @NL80211_CMD_FRAME for backward compatibility. * @NL80211_CMD_FRAME_TX_STATUS: Report TX status of a management frame * transmitted with %NL80211_CMD_FRAME. %NL80211_ATTR_COOKIE identifies * the TX command and %NL80211_ATTR_FRAME includes the contents of the * frame. %NL80211_ATTR_ACK flag is included if the recipient acknowledged * the frame. + * @NL80211_CMD_ACTION_TX_STATUS: Alias for @NL80211_CMD_FRAME_TX_STATUS for + * backward compatibility. * @NL80211_CMD_SET_CQM: Connection quality monitor configuration. This command * is used to configure connection quality monitoring notification trigger * levels. @@ -1029,11 +1034,14 @@ enum nl80211_iftype { * Station flags. When a station is added to an AP interface, it is * assumed to be already associated (and hence authenticated.) * + * @__NL80211_STA_FLAG_INVALID: attribute number 0 is reserved * @NL80211_STA_FLAG_AUTHORIZED: station is authorized (802.1X) * @NL80211_STA_FLAG_SHORT_PREAMBLE: station is capable of receiving frames * with short barker preamble * @NL80211_STA_FLAG_WME: station is WME/QoS capable * @NL80211_STA_FLAG_MFP: station uses management frame protection + * @NL80211_STA_FLAG_MAX: highest station flag number currently defined + * @__NL80211_STA_FLAG_AFTER_LAST: internal use */ enum nl80211_sta_flags { __NL80211_STA_FLAG_INVALID, @@ -1146,14 +1154,17 @@ enum nl80211_mpath_flags { * information about a mesh path. * * @__NL80211_MPATH_INFO_INVALID: attribute number 0 is reserved - * @NL80211_ATTR_MPATH_FRAME_QLEN: number of queued frames for this destination - * @NL80211_ATTR_MPATH_SN: destination sequence number - * @NL80211_ATTR_MPATH_METRIC: metric (cost) of this mesh path - * @NL80211_ATTR_MPATH_EXPTIME: expiration time for the path, in msec from now - * @NL80211_ATTR_MPATH_FLAGS: mesh path flags, enumerated in + * @NL80211_MPATH_INFO_FRAME_QLEN: number of queued frames for this destination + * @NL80211_MPATH_INFO_SN: destination sequence number + * @NL80211_MPATH_INFO_METRIC: metric (cost) of this mesh path + * @NL80211_MPATH_INFO_EXPTIME: expiration time for the path, in msec from now + * @NL80211_MPATH_INFO_FLAGS: mesh path flags, enumerated in * &enum nl80211_mpath_flags; - * @NL80211_ATTR_MPATH_DISCOVERY_TIMEOUT: total path discovery timeout, in msec - * @NL80211_ATTR_MPATH_DISCOVERY_RETRIES: mesh path discovery retries + * @NL80211_MPATH_INFO_DISCOVERY_TIMEOUT: total path discovery timeout, in msec + * @NL80211_MPATH_INFO_DISCOVERY_RETRIES: mesh path discovery retries + * @NL80211_MPATH_INFO_MAX: highest mesh path information attribute number + * currently defind + * @__NL80211_MPATH_INFO_AFTER_LAST: internal use */ enum nl80211_mpath_info { __NL80211_MPATH_INFO_INVALID, @@ -1182,6 +1193,8 @@ enum nl80211_mpath_info { * @NL80211_BAND_ATTR_HT_CAPA: HT capabilities, as in the HT information IE * @NL80211_BAND_ATTR_HT_AMPDU_FACTOR: A-MPDU factor, as in 11n * @NL80211_BAND_ATTR_HT_AMPDU_DENSITY: A-MPDU density, as in 11n + * @NL80211_BAND_ATTR_MAX: highest band attribute currently defined + * @__NL80211_BAND_ATTR_AFTER_LAST: internal use */ enum nl80211_band_attr { __NL80211_BAND_ATTR_INVALID, @@ -1202,6 +1215,7 @@ enum nl80211_band_attr { /** * enum nl80211_frequency_attr - frequency attributes + * @__NL80211_FREQUENCY_ATTR_INVALID: attribute number 0 is reserved * @NL80211_FREQUENCY_ATTR_FREQ: Frequency in MHz * @NL80211_FREQUENCY_ATTR_DISABLED: Channel is disabled in current * regulatory domain. @@ -1213,6 +1227,9 @@ enum nl80211_band_attr { * on this channel in current regulatory domain. * @NL80211_FREQUENCY_ATTR_MAX_TX_POWER: Maximum transmission power in mBm * (100 * dBm). + * @NL80211_FREQUENCY_ATTR_MAX: highest frequency attribute number + * currently defined + * @__NL80211_FREQUENCY_ATTR_AFTER_LAST: internal use */ enum nl80211_frequency_attr { __NL80211_FREQUENCY_ATTR_INVALID, @@ -1232,9 +1249,13 @@ enum nl80211_frequency_attr { /** * enum nl80211_bitrate_attr - bitrate attributes + * @__NL80211_BITRATE_ATTR_INVALID: attribute number 0 is reserved * @NL80211_BITRATE_ATTR_RATE: Bitrate in units of 100 kbps * @NL80211_BITRATE_ATTR_2GHZ_SHORTPREAMBLE: Short preamble supported * in 2.4 GHz band. + * @NL80211_BITRATE_ATTR_MAX: highest bitrate attribute number + * currently defined + * @__NL80211_BITRATE_ATTR_AFTER_LAST: internal use */ enum nl80211_bitrate_attr { __NL80211_BITRATE_ATTR_INVALID, @@ -1290,6 +1311,7 @@ enum nl80211_reg_type { /** * enum nl80211_reg_rule_attr - regulatory rule attributes + * @__NL80211_REG_RULE_ATTR_INVALID: attribute number 0 is reserved * @NL80211_ATTR_REG_RULE_FLAGS: a set of flags which specify additional * considerations for a given frequency range. These are the * &enum nl80211_reg_rule_flags. @@ -1306,6 +1328,9 @@ enum nl80211_reg_type { * If you don't have one then don't send this. * @NL80211_ATTR_POWER_RULE_MAX_EIRP: the maximum allowed EIRP for * a given frequency range. The value is in mBm (100 * dBm). + * @NL80211_REG_RULE_ATTR_MAX: highest regulatory rule attribute number + * currently defined + * @__NL80211_REG_RULE_ATTR_AFTER_LAST: internal use */ enum nl80211_reg_rule_attr { __NL80211_REG_RULE_ATTR_INVALID, @@ -1357,6 +1382,9 @@ enum nl80211_reg_rule_flags { * @__NL80211_SURVEY_INFO_INVALID: attribute number 0 is reserved * @NL80211_SURVEY_INFO_FREQUENCY: center frequency of channel * @NL80211_SURVEY_INFO_NOISE: noise level of channel (u8, dBm) + * @NL80211_SURVEY_INFO_MAX: highest survey info attribute number + * currently defined + * @__NL80211_SURVEY_INFO_AFTER_LAST: internal use */ enum nl80211_survey_info { __NL80211_SURVEY_INFO_INVALID, @@ -1521,6 +1549,7 @@ enum nl80211_channel_type { * enum nl80211_bss - netlink attributes for a BSS * * @__NL80211_BSS_INVALID: invalid + * @NL80211_BSS_BSSID: BSSID of the BSS (6 octets) * @NL80211_BSS_FREQUENCY: frequency in MHz (u32) * @NL80211_BSS_TSF: TSF of the received probe response/beacon (u64) * @NL80211_BSS_BEACON_INTERVAL: beacon interval of the (I)BSS (u16) @@ -1564,6 +1593,12 @@ enum nl80211_bss { /** * enum nl80211_bss_status - BSS "status" + * @NL80211_BSS_STATUS_AUTHENTICATED: Authenticated with this BSS. + * @NL80211_BSS_STATUS_ASSOCIATED: Associated with this BSS. + * @NL80211_BSS_STATUS_IBSS_JOINED: Joined to this IBSS. + * + * The BSS status is a BSS attribute in scan dumps, which + * indicates the status the interface has wrt. this BSS. */ enum nl80211_bss_status { NL80211_BSS_STATUS_AUTHENTICATED, @@ -1674,8 +1709,8 @@ enum nl80211_tx_rate_attributes { /** * enum nl80211_band - Frequency band - * @NL80211_BAND_2GHZ - 2.4 GHz ISM band - * @NL80211_BAND_5GHZ - around 5 GHz band (4.9 - 5.7 GHz) + * @NL80211_BAND_2GHZ: 2.4 GHz ISM band + * @NL80211_BAND_5GHZ: around 5 GHz band (4.9 - 5.7 GHz) */ enum nl80211_band { NL80211_BAND_2GHZ, @@ -1713,9 +1748,9 @@ enum nl80211_attr_cqm { /** * enum nl80211_cqm_rssi_threshold_event - RSSI threshold event - * @NL80211_CQM_RSSI_THRESHOLD_EVENT_LOW - The RSSI level is lower than the + * @NL80211_CQM_RSSI_THRESHOLD_EVENT_LOW: The RSSI level is lower than the * configured threshold - * @NL80211_CQM_RSSI_THRESHOLD_EVENT_HIGH - The RSSI is higher than the + * @NL80211_CQM_RSSI_THRESHOLD_EVENT_HIGH: The RSSI is higher than the * configured threshold */ enum nl80211_cqm_rssi_threshold_event { -- cgit v1.2.3-18-g5258 From d70e96932de55fb2c05b1c0af1dff178651a9b77 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 19 Aug 2010 16:11:27 +0200 Subject: cfg80211: add some documentation Add some documentation for cfg80211. I'm hoping some of the regulatory documentation will be filled by somebody more familiar with it, hint hint! :) Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/cfg80211.h | 121 ++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 115 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 6a98b1b3bfd..f2740537b5d 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -25,6 +25,43 @@ #include +/** + * DOC: Introduction + * + * cfg80211 is the configuration API for 802.11 devices in Linux. It bridges + * userspace and drivers, and offers some utility functionality associated + * with 802.11. cfg80211 must, directly or indirectly via mac80211, be used + * by all modern wireless drivers in Linux, so that they offer a consistent + * API through nl80211. For backward compatibility, cfg80211 also offers + * wireless extensions to userspace, but hides them from drivers completely. + * + * Additionally, cfg80211 contains code to help enforce regulatory spectrum + * use restrictions. + */ + + +/** + * DOC: Device registration + * + * In order for a driver to use cfg80211, it must register the hardware device + * with cfg80211. This happens through a number of hardware capability structs + * described below. + * + * The fundamental structure for each device is the 'wiphy', of which each + * instance describes a physical wireless device connected to the system. Each + * such wiphy can have zero, one, or many virtual interfaces associated with + * it, which need to be identified as such by pointing the network interface's + * @ieee80211_ptr pointer to a &struct wireless_dev which further describes + * the wireless part of the interface, normally this struct is embedded in the + * network interface's private data area. Drivers can optionally allow creating + * or destroying virtual interfaces on the fly, but without at least one or the + * ability to create some the wireless device isn't useful. + * + * Each wiphy structure contains device capability information, and also has + * a pointer to the various operations the driver offers. The definitions and + * structures here describe these capabilities in detail. + */ + /* * wireless hardware capability structures */ @@ -204,6 +241,21 @@ struct ieee80211_supported_band { * Wireless hardware/device configuration structures and methods */ +/** + * DOC: Actions and configuration + * + * Each wireless device and each virtual interface offer a set of configuration + * operations and other actions that are invoked by userspace. Each of these + * actions is described in the operations structure, and the parameters these + * operations use are described separately. + * + * Additionally, some operations are asynchronous and expect to get status + * information via some functions that drivers need to call. + * + * Scanning and BSS list handling with its associated functionality is described + * in a separate chapter. + */ + /** * struct vif_params - describes virtual interface parameters * @mesh_id: mesh ID to use @@ -570,8 +622,28 @@ struct ieee80211_txq_params { /* from net/wireless.h */ struct wiphy; -/* from net/ieee80211.h */ -struct ieee80211_channel; +/** + * DOC: Scanning and BSS list handling + * + * The scanning process itself is fairly simple, but cfg80211 offers quite + * a bit of helper functionality. To start a scan, the scan operation will + * be invoked with a scan definition. This scan definition contains the + * channels to scan, and the SSIDs to send probe requests for (including the + * wildcard, if desired). A passive scan is indicated by having no SSIDs to + * probe. Additionally, a scan request may contain extra information elements + * that should be added to the probe request. The IEs are guaranteed to be + * well-formed, and will not exceed the maximum length the driver advertised + * in the wiphy structure. + * + * When scanning finds a BSS, cfg80211 needs to be notified of that, because + * it is responsible for maintaining the BSS list; the driver should not + * maintain a list itself. For this notification, various functions exist. + * + * Since drivers do not maintain a BSS list, there are also a number of + * functions to search for a BSS and obtain information about it from the + * BSS structure cfg80211 maintains. The BSS list is also made available + * to userspace. + */ /** * struct cfg80211_ssid - SSID description @@ -1574,8 +1646,10 @@ static inline void *wdev_priv(struct wireless_dev *wdev) return wiphy_priv(wdev->wiphy); } -/* - * Utility functions +/** + * DOC: Utility functions + * + * cfg80211 offers a number of utility functions that can be useful. */ /** @@ -1728,6 +1802,14 @@ unsigned int ieee80211_get_hdrlen_from_skb(const struct sk_buff *skb); */ unsigned int __attribute_const__ ieee80211_hdrlen(__le16 fc); +/** + * DOC: Data path helpers + * + * In addition to generic utilities, cfg80211 also offers + * functions that help implement the data path for devices + * that do not do the 802.11/802.3 conversion on the device. + */ + /** * ieee80211_data_to_8023 - convert an 802.11 data frame to 802.3 * @skb: the 802.11 data frame @@ -1788,8 +1870,10 @@ unsigned int cfg80211_classify8021d(struct sk_buff *skb); */ const u8 *cfg80211_find_ie(u8 eid, const u8 *ies, int len); -/* - * Regulatory helper functions for wiphys +/** + * DOC: Regulatory enforcement infrastructure + * + * TODO */ /** @@ -2191,6 +2275,20 @@ void cfg80211_michael_mic_failure(struct net_device *dev, const u8 *addr, */ void cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid, gfp_t gfp); +/** + * DOC: RFkill integration + * + * RFkill integration in cfg80211 is almost invisible to drivers, + * as cfg80211 automatically registers an rfkill instance for each + * wireless device it knows about. Soft kill is also translated + * into disconnecting and turning all interfaces off, drivers are + * expected to turn off the device when all interfaces are down. + * + * However, devices may have a hard RFkill line, in which case they + * also need to interact with the rfkill subsystem, via cfg80211. + * They can do this with a few helper functions documented here. + */ + /** * wiphy_rfkill_set_hw_state - notify cfg80211 about hw block state * @wiphy: the wiphy @@ -2211,6 +2309,17 @@ void wiphy_rfkill_start_polling(struct wiphy *wiphy); void wiphy_rfkill_stop_polling(struct wiphy *wiphy); #ifdef CONFIG_NL80211_TESTMODE +/** + * DOC: Test mode + * + * Test mode is a set of utility functions to allow drivers to + * interact with driver-specific tools to aid, for instance, + * factory programming. + * + * This chapter describes how drivers interact with it, for more + * information see the nl80211 book's chapter on it. + */ + /** * cfg80211_testmode_alloc_reply_skb - allocate testmode reply * @wiphy: the wiphy -- cgit v1.2.3-18-g5258 From 5a46790ca4c40fdb6ed5336d7d6b593c96326b31 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 24 Aug 2010 14:46:53 -0700 Subject: include/linux/if_ether.h: Remove unused #define MAC_FMT Last use was removed, so remove the #define. Signed-off-by: Joe Perches Signed-off-by: David S. Miller --- include/linux/if_ether.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h index bed7a4682b9..f9c3df03db0 100644 --- a/include/linux/if_ether.h +++ b/include/linux/if_ether.h @@ -137,8 +137,6 @@ extern struct ctl_table ether_table[]; extern ssize_t sysfs_format_mac(char *buf, const unsigned char *addr, int len); -#define MAC_FMT "%02x:%02x:%02x:%02x:%02x:%02x" - #endif #endif /* _LINUX_IF_ETHER_H */ -- cgit v1.2.3-18-g5258 From c2e3143e3c46ede22336316b3ff4746727c0d93a Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 24 Aug 2010 14:48:10 -0700 Subject: tc: add meta match on receive hash Trivial extension to existing meta data match rules to allow matching on skb receive hash value. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/tc_ematch/tc_em_meta.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/tc_ematch/tc_em_meta.h b/include/linux/tc_ematch/tc_em_meta.h index 0864206ec1a..7138962664f 100644 --- a/include/linux/tc_ematch/tc_em_meta.h +++ b/include/linux/tc_ematch/tc_em_meta.h @@ -79,6 +79,7 @@ enum { TCF_META_ID_SK_SENDMSG_OFF, TCF_META_ID_SK_WRITE_PENDING, TCF_META_ID_VLAN_TAG, + TCF_META_ID_RXHASH, __TCF_META_ID_MAX }; #define TCF_META_ID_MAX (__TCF_META_ID_MAX - 1) -- cgit v1.2.3-18-g5258 From e7c1c2c46201e46f8ce817196507d2ffd3dafd8e Mon Sep 17 00:00:00 2001 From: Yevgeny Petrilin Date: Tue, 24 Aug 2010 03:46:18 +0000 Subject: mlx4_en: Added self diagnostics test implementation The selftest includes 5 features: 1. Interrupt test: Executing commands and receiving command completion on all our interrupt vectors. 2. Link test: Verifying we are connected to valid link partner. 3. Speed test: Check that we negotiated link speed correctly. 4. Registers test: Activate HW health check command. 5. Loopback test: Send a packet on loopback interface and catch it on RX side. Signed-off-by: Yevgeny Petrilin Signed-off-by: David S. Miller --- include/linux/mlx4/cmd.h | 1 + include/linux/mlx4/device.h | 2 ++ 2 files changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h index 0f82293a82e..78a1b967175 100644 --- a/include/linux/mlx4/cmd.h +++ b/include/linux/mlx4/cmd.h @@ -56,6 +56,7 @@ enum { MLX4_CMD_QUERY_HCA = 0xb, MLX4_CMD_QUERY_PORT = 0x43, MLX4_CMD_SENSE_PORT = 0x4d, + MLX4_CMD_HW_HEALTH_CHECK = 0x50, MLX4_CMD_SET_PORT = 0xc, MLX4_CMD_ACCESS_DDR = 0x2e, MLX4_CMD_MAP_ICM = 0xffa, diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 7a7f9c1e679..2cec5872273 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -229,6 +229,7 @@ struct mlx4_caps { u32 bmme_flags; u32 reserved_lkey; u16 stat_rate_support; + int loopback_support; u8 port_width_cap[MLX4_MAX_PORTS + 1]; int max_gso_sz; int reserved_qps_cnt[MLX4_NUM_QP_REGION]; @@ -480,5 +481,6 @@ void mlx4_fmr_unmap(struct mlx4_dev *dev, struct mlx4_fmr *fmr, u32 *lkey, u32 *rkey); int mlx4_fmr_free(struct mlx4_dev *dev, struct mlx4_fmr *fmr); int mlx4_SYNC_TPT(struct mlx4_dev *dev); +int mlx4_test_interrupts(struct mlx4_dev *dev); #endif /* MLX4_DEVICE_H */ -- cgit v1.2.3-18-g5258 From 7699517db435fd24143bd32dd644275e3eeb4c86 Mon Sep 17 00:00:00 2001 From: Yevgeny Petrilin Date: Tue, 24 Aug 2010 03:46:23 +0000 Subject: mlx4_en: Fixing report in Ethtool get_settings The report now based on query from FW, giving the correct tranciever type and link speed. Signed-off-by: Yevgeny Petrilin Signed-off-by: David S. Miller --- include/linux/mlx4/device.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 2cec5872273..2a36a344fb3 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -186,6 +186,10 @@ struct mlx4_caps { int eth_mtu_cap[MLX4_MAX_PORTS + 1]; int gid_table_len[MLX4_MAX_PORTS + 1]; int pkey_table_len[MLX4_MAX_PORTS + 1]; + int trans_type[MLX4_MAX_PORTS + 1]; + int vendor_oui[MLX4_MAX_PORTS + 1]; + int wavelength[MLX4_MAX_PORTS + 1]; + u64 trans_code[MLX4_MAX_PORTS + 1]; int local_ca_ack_delay; int num_uars; int bf_reg_size; -- cgit v1.2.3-18-g5258 From 0533943c5c45cce2e26432bf0a6b8e114757c897 Mon Sep 17 00:00:00 2001 From: Yevgeny Petrilin Date: Tue, 24 Aug 2010 03:46:42 +0000 Subject: mlx4_en: UDP RSS support Adding capability for RSS for UDP traffic, hashing is done based on IP addresses and UDP port number. The support depends on HW/FW capabilities. Signed-off-by: Yevgeny Petrilin Signed-off-by: David S. Miller --- include/linux/mlx4/device.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 2a36a344fb3..7338654c02b 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -233,6 +233,7 @@ struct mlx4_caps { u32 bmme_flags; u32 reserved_lkey; u16 stat_rate_support; + int udp_rss; int loopback_support; u8 port_width_cap[MLX4_MAX_PORTS + 1]; int max_gso_sz; -- cgit v1.2.3-18-g5258 From 2738bd682df546f34654ed3d59dfc9ebe8d04979 Mon Sep 17 00:00:00 2001 From: Bob Copeland Date: Sat, 21 Aug 2010 16:39:01 -0400 Subject: mac80211: trivial spelling fixes Fix spelling and readability of a few lines of kernel doc: s/issueing/issuing/g s/approriate/appropriate/g s/supported by simply/supported simply by/ s/IEEE80211_HW_BEACON_FILTERING/IEEE80211_HW_BEACON_FILTER/g Signed-off-by: Bob Copeland Signed-off-by: John W. Linville --- include/net/mac80211.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 2a181136607..dcc8c2bf986 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1242,8 +1242,8 @@ ieee80211_get_alt_retry_rate(const struct ieee80211_hw *hw, * %IEEE80211_CONF_PS flag enabled means that the powersave mode defined in * IEEE 802.11-2007 section 11.2 is enabled. This is not to be confused * with hardware wakeup and sleep states. Driver is responsible for waking - * up the hardware before issueing commands to the hardware and putting it - * back to sleep at approriate times. + * up the hardware before issuing commands to the hardware and putting it + * back to sleep at appropriate times. * * When PS is enabled, hardware needs to wakeup for beacons and receive the * buffered multicast/broadcast frames after the beacon. Also it must be @@ -1264,7 +1264,7 @@ ieee80211_get_alt_retry_rate(const struct ieee80211_hw *hw, * there's data traffic and still saving significantly power in idle * periods. * - * Dynamic powersave is supported by simply mac80211 enabling and disabling + * Dynamic powersave is simply supported by mac80211 enabling and disabling * PS based on traffic. Driver needs to only set %IEEE80211_HW_SUPPORTS_PS * flag and mac80211 will handle everything automatically. Additionally, * hardware having support for the dynamic PS feature may set the @@ -2458,7 +2458,7 @@ void ieee80211_sta_block_awake(struct ieee80211_hw *hw, * * @vif: &struct ieee80211_vif pointer from the add_interface callback. * - * When beacon filtering is enabled with %IEEE80211_HW_BEACON_FILTERING and + * When beacon filtering is enabled with %IEEE80211_HW_BEACON_FILTER and * %IEEE80211_CONF_PS is set, the driver needs to inform whenever the * hardware is not receiving beacons with this function. */ @@ -2469,7 +2469,7 @@ void ieee80211_beacon_loss(struct ieee80211_vif *vif); * * @vif: &struct ieee80211_vif pointer from the add_interface callback. * - * When beacon filtering is enabled with %IEEE80211_HW_BEACON_FILTERING, and + * When beacon filtering is enabled with %IEEE80211_HW_BEACON_FILTER, and * %IEEE80211_CONF_PS and %IEEE80211_HW_CONNECTION_MONITOR are set, the driver * needs to inform if the connection to the AP has been lost. * -- cgit v1.2.3-18-g5258 From 4c5f7d7a1e6cf20ad515dad8a63c0813fac5bcea Mon Sep 17 00:00:00 2001 From: Kalle Valo Date: Sun, 22 Aug 2010 22:46:28 +0300 Subject: wl12xx: change contact person for the include file Luciano should be the contact person for the include/linux/spi/wl12xx.h file. Signed-off-by: Kalle Valo Acked-by: Luciano Coelho Signed-off-by: John W. Linville --- include/linux/spi/wl12xx.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/spi/wl12xx.h b/include/linux/spi/wl12xx.h index a223ecbc71e..a20bccf0b5c 100644 --- a/include/linux/spi/wl12xx.h +++ b/include/linux/spi/wl12xx.h @@ -3,7 +3,7 @@ * * Copyright (C) 2009 Nokia Corporation * - * Contact: Kalle Valo + * Contact: Luciano Coelho * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License -- cgit v1.2.3-18-g5258 From ad01b7d480a4a135f974afd5c617c417e0b0542f Mon Sep 17 00:00:00 2001 From: Giuseppe CAVALLARO Date: Mon, 23 Aug 2010 20:40:42 +0000 Subject: stmmac: make ioaddr 'void __iomem *' rather than unsigned long This avoids unnecessary casting and adds the ioaddr in the private structure. This patch also removes many warning when compile the driver. Signed-off-by: Giuseppe Cavallaro Signed-off-by: David S. Miller --- include/linux/stmmac.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 632ff7c0328..a4adf0de6ed 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -35,7 +35,7 @@ struct plat_stmmacenet_data { int has_gmac; int enh_desc; void (*fix_mac_speed)(void *priv, unsigned int speed); - void (*bus_setup)(unsigned long ioaddr); + void (*bus_setup)(void __iomem *ioaddr); #ifdef CONFIG_STM_DRIVERS struct stm_pad_config *pad_config; #endif -- cgit v1.2.3-18-g5258 From 2971944582ff43b7dedbb460777052243ac9915a Mon Sep 17 00:00:00 2001 From: Rabin Vincent Date: Mon, 9 Aug 2010 12:54:43 +0100 Subject: ARM: 6307/1: mmci: allow the card detect GPIO value not to be inverted On some platforms, the GPIO value from the gpio_cd pin doesn't need to be inverted to get it active high. Add a cd_invert platform data parameter and change existing platforms using GPIO for CD (only Realview) to enable it. Acked-by: Linus Walleij Signed-off-by: Rabin Vincent Signed-off-by: Russell King --- include/linux/amba/mmci.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/amba/mmci.h b/include/linux/amba/mmci.h index ca84ce70d5d..f4ee9acc972 100644 --- a/include/linux/amba/mmci.h +++ b/include/linux/amba/mmci.h @@ -24,6 +24,7 @@ * whether a card is present in the MMC slot or not * @gpio_wp: read this GPIO pin to see if the card is write protected * @gpio_cd: read this GPIO pin to detect card insertion + * @cd_invert: true if the gpio_cd pin value is active low * @capabilities: the capabilities of the block as implemented in * this platform, signify anything MMC_CAP_* from mmc/host.h */ @@ -35,6 +36,7 @@ struct mmci_platform_data { unsigned int (*status)(struct device *); int gpio_wp; int gpio_cd; + bool cd_invert; unsigned long capabilities; }; -- cgit v1.2.3-18-g5258 From 145ce502e44b57c074c72cfdc855557e19026999 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 24 Aug 2010 13:21:08 +0000 Subject: net/sctp: Use pr_fmt and pr_ Change SCTP_DEBUG_PRINTK and SCTP_DEBUG_PRINTK_IPADDR to use do { print } while (0) guards. Add SCTP_DEBUG_PRINTK_CONT to fix errors in log when lines were continued. Add #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt Add a missing newline in "Failed bind hash alloc" Signed-off-by: Joe Perches Signed-off-by: David S. Miller --- include/net/sctp/sctp.h | 48 ++++++++++++++++++++++++++++++------------------ 1 file changed, 30 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 65946bc43d0..2cb3980b161 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -275,24 +275,35 @@ struct sctp_mib { /* Print debugging messages. */ #if SCTP_DEBUG extern int sctp_debug_flag; -#define SCTP_DEBUG_PRINTK(whatever...) \ - ((void) (sctp_debug_flag && printk(KERN_DEBUG whatever))) -#define SCTP_DEBUG_PRINTK_IPADDR(lead, trail, leadparm, saddr, otherparms...) \ - if (sctp_debug_flag) { \ - if (saddr->sa.sa_family == AF_INET6) { \ - printk(KERN_DEBUG \ - lead "%pI6" trail, \ - leadparm, \ - &saddr->v6.sin6_addr, \ - otherparms); \ - } else { \ - printk(KERN_DEBUG \ - lead "%pI4" trail, \ - leadparm, \ - &saddr->v4.sin_addr.s_addr, \ - otherparms); \ - } \ - } +#define SCTP_DEBUG_PRINTK(fmt, args...) \ +do { \ + if (sctp_debug_flag) \ + printk(KERN_DEBUG pr_fmt(fmt), ##args); \ +} while (0) +#define SCTP_DEBUG_PRINTK_CONT(fmt, args...) \ +do { \ + if (sctp_debug_flag) \ + pr_cont(fmt, ##args); \ +} while (0) +#define SCTP_DEBUG_PRINTK_IPADDR(fmt_lead, fmt_trail, \ + args_lead, saddr, args_trail...) \ +do { \ + if (sctp_debug_flag) { \ + if (saddr->sa.sa_family == AF_INET6) { \ + printk(KERN_DEBUG \ + pr_fmt(fmt_lead "%pI6" fmt_trail), \ + args_lead, \ + &saddr->v6.sin6_addr, \ + args_trail); \ + } else { \ + printk(KERN_DEBUG \ + pr_fmt(fmt_lead "%pI4" fmt_trail), \ + args_lead, \ + &saddr->v4.sin_addr.s_addr, \ + args_trail); \ + } \ + } \ +} while (0) #define SCTP_ENABLE_DEBUG { sctp_debug_flag = 1; } #define SCTP_DISABLE_DEBUG { sctp_debug_flag = 0; } @@ -306,6 +317,7 @@ extern int sctp_debug_flag; #else /* SCTP_DEBUG */ #define SCTP_DEBUG_PRINTK(whatever...) +#define SCTP_DEBUG_PRINTK_CONT(fmt, args...) #define SCTP_DEBUG_PRINTK_IPADDR(whatever...) #define SCTP_ENABLE_DEBUG #define SCTP_DISABLE_DEBUG -- cgit v1.2.3-18-g5258 From 480125ba49ba62be93beea37770f266846e077ab Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 26 Aug 2010 13:57:57 -0400 Subject: x86, iommu: Make all IOMMU's detection routines return a value. We return 1 if the IOMMU has been detected. Zero or an error number if we failed to find it. This is in preperation of using the IOMMU_INIT so that we can detect whether an IOMMU is present. I have not tested this for regression on Calgary, nor on AMD Vi chipsets as I don't have that hardware. CC: Muli Ben-Yehuda CC: "Jon D. Mason" CC: "Darrick J. Wong" CC: Jesse Barnes CC: David Woodhouse CC: Chris Wright CC: Yinghai Lu CC: Joerg Roedel CC: H. Peter Anvin CC: Fujita Tomonori Signed-off-by: Konrad Rzeszutek Wilk LKML-Reference: <1282845485-8991-3-git-send-email-konrad.wilk@oracle.com> Signed-off-by: H. Peter Anvin --- include/linux/dmar.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/dmar.h b/include/linux/dmar.h index d7cecc90ed3..a2060204151 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -57,15 +57,15 @@ extern int dmar_table_init(void); extern int dmar_dev_scope_init(void); /* Intel IOMMU detection */ -extern void detect_intel_iommu(void); +extern int detect_intel_iommu(void); extern int enable_drhd_fault_handling(void); extern int parse_ioapics_under_ir(void); extern int alloc_iommu(struct dmar_drhd_unit *); #else -static inline void detect_intel_iommu(void) +static inline int detect_intel_iommu(void) { - return; + return -ENODEV; } static inline int dmar_table_init(void) -- cgit v1.2.3-18-g5258 From 40d0802b3eb47d57e2d57a5244a18cbbe9632e13 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 26 Aug 2010 22:03:08 -0700 Subject: gro: __napi_gro_receive() optimizations compare_ether_header() can have a special implementation on 64 bit arches if CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS is defined. __napi_gro_receive() and vlan_gro_common() can avoid a conditional branch to perform device match. On x86_64, __napi_gro_receive() has now 38 instructions instead of 53 As gcc-4.4.3 still choose to not inline it, add inline keyword to this performance critical function. Signed-off-by: Eric Dumazet CC: Herbert Xu Signed-off-by: David S. Miller --- include/linux/etherdevice.h | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index 2308fbb4523..fb6aa607092 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -237,13 +237,29 @@ static inline bool is_etherdev_addr(const struct net_device *dev, * entry points. */ -static inline int compare_ether_header(const void *a, const void *b) +static inline unsigned long compare_ether_header(const void *a, const void *b) { +#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 + unsigned long fold; + + /* + * We want to compare 14 bytes: + * [a0 ... a13] ^ [b0 ... b13] + * Use two long XOR, ORed together, with an overlap of two bytes. + * [a0 a1 a2 a3 a4 a5 a6 a7 ] ^ [b0 b1 b2 b3 b4 b5 b6 b7 ] | + * [a6 a7 a8 a9 a10 a11 a12 a13] ^ [b6 b7 b8 b9 b10 b11 b12 b13] + * This means the [a6 a7] ^ [b6 b7] part is done two times. + */ + fold = *(unsigned long *)a ^ *(unsigned long *)b; + fold |= *(unsigned long *)(a + 6) ^ *(unsigned long *)(b + 6); + return fold; +#else u32 *a32 = (u32 *)((u8 *)a + 2); u32 *b32 = (u32 *)((u8 *)b + 2); return (*(u16 *)a ^ *(u16 *)b) | (a32[0] ^ b32[0]) | (a32[1] ^ b32[1]) | (a32[2] ^ b32[2]); +#endif } #endif /* _LINUX_ETHERDEVICE_H */ -- cgit v1.2.3-18-g5258 From 8789d459bc5e837bf37d261453df96ef54018d7b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 26 Aug 2010 13:30:26 +0200 Subject: mac80211: allow scan to complete from any context The ieee80211_scan_completed() function was a frequent source of potential deadlocks, since it is called by drivers but may call back into drivers, so drivers had to make sure to call it without any locks held, which frequently lead to more complex code in drivers. Avoid that problem by allowing the function to be called in any context, and queueing the actual work it does. Also update the documentation for it to indicate this. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index dcc8c2bf986..8f97548b6d8 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2268,7 +2268,8 @@ void ieee80211_wake_queues(struct ieee80211_hw *hw); * * When hardware scan offload is used (i.e. the hw_scan() callback is * assigned) this function needs to be called by the driver to notify - * mac80211 that the scan finished. + * mac80211 that the scan finished. This function can be called from + * any context, including hardirq context. * * @hw: the hardware that finished the scan * @aborted: set to true if scan was aborted -- cgit v1.2.3-18-g5258 From c0692b8fe29fb4d4dad33487aabf3ed7e1e880c0 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 27 Aug 2010 14:26:53 +0300 Subject: cfg80211: allow changing port control protocol Some vendor specified mechanisms for 802.1X-style functionality use a different protocol than EAP (even if EAP is vendor-extensible). Allow setting the ethertype for the protocol when a driver has support for this. The default if unspecified is EAP, of course. Note: This is suitable only for station mode, not for AP implementation. Signed-off-by: Johannes Berg Signed-off-by: Juuso Oikarinen Signed-off-by: John W. Linville --- include/linux/nl80211.h | 16 +++++++++++++++- include/net/cfg80211.h | 24 +++++++++++++++++------- 2 files changed, 32 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index ec1690da784..31603e8b558 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -295,7 +295,9 @@ * auth and assoc steps. For this, you need to specify the SSID in a * %NL80211_ATTR_SSID attribute, and can optionally specify the association * IEs in %NL80211_ATTR_IE, %NL80211_ATTR_AUTH_TYPE, %NL80211_ATTR_MAC, - * %NL80211_ATTR_WIPHY_FREQ and %NL80211_ATTR_CONTROL_PORT. + * %NL80211_ATTR_WIPHY_FREQ, %NL80211_ATTR_CONTROL_PORT, + * %NL80211_ATTR_CONTROL_PORT_ETHERTYPE and + * %NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT. * It is also sent as an event, with the BSSID and response IEs when the * connection is established or failed to be established. This can be * determined by the STATUS_CODE attribute. @@ -686,6 +688,15 @@ enum nl80211_commands { * request, the driver will assume that the port is unauthorized until * authorized by user space. Otherwise, port is marked authorized by * default in station mode. + * @NL80211_ATTR_CONTROL_PORT_ETHERTYPE: A 16-bit value indicating the + * ethertype that will be used for key negotiation. It can be + * specified with the associate and connect commands. If it is not + * specified, the value defaults to 0x888E (PAE, 802.1X). This + * attribute is also used as a flag in the wiphy information to + * indicate that protocols other than PAE are supported. + * @NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT: When included along with + * %NL80211_ATTR_CONTROL_PORT_ETHERTYPE, indicates that the custom + * ethertype frames used for key negotiation must not be encrypted. * * @NL80211_ATTR_TESTDATA: Testmode data blob, passed through to the driver. * We recommend using nested, driver-specific attributes within this. @@ -951,6 +962,9 @@ enum nl80211_attrs { NL80211_ATTR_RX_FRAME_TYPES, NL80211_ATTR_FRAME_TYPE, + NL80211_ATTR_CONTROL_PORT_ETHERTYPE, + NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index f2740537b5d..4c8c727d0cc 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -763,6 +763,10 @@ const u8 *ieee80211_bss_get_ie(struct cfg80211_bss *bss, u8 ie); * sets/clears %NL80211_STA_FLAG_AUTHORIZED. If true, the driver is * required to assume that the port is unauthorized until authorized by * user space. Otherwise, port is marked authorized by default. + * @control_port_ethertype: the control port protocol that should be + * allowed through even on unauthorized ports + * @control_port_no_encrypt: TRUE to prevent encryption of control port + * protocol frames. */ struct cfg80211_crypto_settings { u32 wpa_versions; @@ -772,6 +776,8 @@ struct cfg80211_crypto_settings { int n_akm_suites; u32 akm_suites[NL80211_MAX_NR_AKM_SUITES]; bool control_port; + __be16 control_port_ethertype; + bool control_port_no_encrypt; }; /** @@ -1293,15 +1299,19 @@ struct cfg80211_ops { * @WIPHY_FLAG_4ADDR_AP: supports 4addr mode even on AP (with a single station * on a VLAN interface) * @WIPHY_FLAG_4ADDR_STATION: supports 4addr mode even as a station + * @WIPHY_FLAG_CONTROL_PORT_PROTOCOL: This device supports setting the + * control port protocol ethertype. The device also honours the + * control_port_no_encrypt flag. */ enum wiphy_flags { - WIPHY_FLAG_CUSTOM_REGULATORY = BIT(0), - WIPHY_FLAG_STRICT_REGULATORY = BIT(1), - WIPHY_FLAG_DISABLE_BEACON_HINTS = BIT(2), - WIPHY_FLAG_NETNS_OK = BIT(3), - WIPHY_FLAG_PS_ON_BY_DEFAULT = BIT(4), - WIPHY_FLAG_4ADDR_AP = BIT(5), - WIPHY_FLAG_4ADDR_STATION = BIT(6), + WIPHY_FLAG_CUSTOM_REGULATORY = BIT(0), + WIPHY_FLAG_STRICT_REGULATORY = BIT(1), + WIPHY_FLAG_DISABLE_BEACON_HINTS = BIT(2), + WIPHY_FLAG_NETNS_OK = BIT(3), + WIPHY_FLAG_PS_ON_BY_DEFAULT = BIT(4), + WIPHY_FLAG_4ADDR_AP = BIT(5), + WIPHY_FLAG_4ADDR_STATION = BIT(6), + WIPHY_FLAG_CONTROL_PORT_PROTOCOL = BIT(7), }; struct mac_address { -- cgit v1.2.3-18-g5258 From 34d4bc4d41d282a66dafe1b01a7d46bad468cefb Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 27 Aug 2010 12:35:58 +0200 Subject: mac80211: support runtime interface type changes Add support to mac80211 for changing the interface type even when the interface is UP, if the driver supports it. To achieve this * add a new driver callback for switching, * split some of the interface up/down code out into new functions (do_open/do_stop), and * maintain an own __SDATA_RUNNING bit that will not be set during interface type, so that any other code doesn't use the interface. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 8f97548b6d8..f91fc331369 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1537,6 +1537,12 @@ enum ieee80211_ampdu_mlme_action { * negative error code (which will be seen in userspace.) * Must be implemented and can sleep. * + * @change_interface: Called when a netdevice changes type. This callback + * is optional, but only if it is supported can interface types be + * switched while the interface is UP. The callback may sleep. + * Note that while an interface is being switched, it will not be + * found by the interface iteration callbacks. + * * @remove_interface: Notifies a driver that an interface is going down. * The @stop callback is called after this if it is the last interface * and no monitor interfaces are present. @@ -1693,6 +1699,9 @@ struct ieee80211_ops { void (*stop)(struct ieee80211_hw *hw); int (*add_interface)(struct ieee80211_hw *hw, struct ieee80211_vif *vif); + int (*change_interface)(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + enum nl80211_iftype new_type); void (*remove_interface)(struct ieee80211_hw *hw, struct ieee80211_vif *vif); int (*config)(struct ieee80211_hw *hw, u32 changed); -- cgit v1.2.3-18-g5258 From 7950c407c0288b223a200c1bba8198941599ca37 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 25 Aug 2010 13:39:14 -0700 Subject: memblock: Add memblock_free/reserve_reserved_regions() So we can avoid export memblock_reserved_init_regions() Suggested by Ben. -v2: use __init_memblock attribute Signed-off-by: Yinghai Lu Cc: Benjamin Herrenschmidt Signed-off-by: H. Peter Anvin --- include/linux/memblock.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 4df09bdcae4..7d285271130 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -48,6 +48,8 @@ extern int memblock_can_resize; if (memblock_debug) printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__) u64 memblock_find_in_range(u64 start, u64 end, u64 size, u64 align); +int memblock_free_reserved_regions(void); +int memblock_reserve_reserved_regions(void); extern void __init memblock_init(void); extern void __init memblock_analyze(void); -- cgit v1.2.3-18-g5258 From edbe7d23b4482e7f33179290bcff3b1feae1c5f3 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 25 Aug 2010 13:39:16 -0700 Subject: memblock: Add find_memory_core_early() According to node range in early_node_map[] with __memblock_find_in_range to find free range. Will be used by memblock_x86_find_in_range_node() memblock_x86_find_in_range_node will be used to find right buffer for NODE_DATA Signed-off-by: Yinghai Lu Signed-off-by: H. Peter Anvin --- include/linux/mm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index a2b48041b91..993e85f0afc 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1164,6 +1164,8 @@ extern void free_bootmem_with_active_regions(int nid, unsigned long max_low_pfn); int add_from_early_node_map(struct range *range, int az, int nr_range, int nid); +u64 __init find_memory_core_early(int nid, u64 size, u64 align, + u64 goal, u64 limit); void *__alloc_memory_core_early(int nodeid, u64 size, u64 align, u64 goal, u64 limit); typedef int (*work_fn_t)(unsigned long, unsigned long, void *); -- cgit v1.2.3-18-g5258 From a587d2daebcd2bc159d4348b6a7b028950a6d803 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 25 Aug 2010 13:39:18 -0700 Subject: x86: Remove not used early_res code and some functions in e820.c that are not used anymore Signed-off-by: Yinghai Lu Signed-off-by: H. Peter Anvin --- include/linux/early_res.h | 23 ----------------------- 1 file changed, 23 deletions(-) delete mode 100644 include/linux/early_res.h (limited to 'include') diff --git a/include/linux/early_res.h b/include/linux/early_res.h deleted file mode 100644 index 29c09f57a13..00000000000 --- a/include/linux/early_res.h +++ /dev/null @@ -1,23 +0,0 @@ -#ifndef _LINUX_EARLY_RES_H -#define _LINUX_EARLY_RES_H -#ifdef __KERNEL__ - -extern void reserve_early(u64 start, u64 end, char *name); -extern void reserve_early_overlap_ok(u64 start, u64 end, char *name); -extern void free_early(u64 start, u64 end); -void free_early_partial(u64 start, u64 end); -extern void early_res_to_bootmem(u64 start, u64 end); - -void reserve_early_without_check(u64 start, u64 end, char *name); -u64 find_early_area(u64 ei_start, u64 ei_last, u64 start, u64 end, - u64 size, u64 align); -u64 find_early_area_size(u64 ei_start, u64 ei_last, u64 start, - u64 *sizep, u64 align); -u64 find_fw_memmap_area(u64 start, u64 end, u64 size, u64 align); -u64 get_max_mapped(void); -#include -int get_free_all_memory_range(struct range **rangep, int nodeid); - -#endif /* __KERNEL__ */ - -#endif /* _LINUX_EARLY_RES_H */ -- cgit v1.2.3-18-g5258 From 409456b10f87b28303643fec37543103f9ada00c Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Sun, 29 Aug 2010 21:57:55 -0700 Subject: net: fix datapath typo Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- include/linux/if.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/if.h b/include/linux/if.h index 6ed43c1f07a..12395992774 100644 --- a/include/linux/if.h +++ b/include/linux/if.h @@ -76,7 +76,7 @@ #define IFF_MACVLAN_PORT 0x4000 /* device used as macvlan port */ #define IFF_BRIDGE_PORT 0x8000 /* device used as bridge port */ #define IFF_OVS_DATAPATH 0x10000 /* device used as Open vSwitch - * dapath port */ + * datapath port */ #define IF_GET_IFACE 0x0001 /* for querying only */ #define IF_GET_PROTO 0x0002 -- cgit v1.2.3-18-g5258 From fcaf780b2ad352edaeb1d1c07a6da053266b1eed Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 24 Aug 2010 23:22:57 -0300 Subject: i7300_edac: start a driver for i7300 chipset (Clarksboro) Signed-off-by: Mauro Carvalho Chehab --- include/linux/pci_ids.h | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index f6a3b2d36ca..2eabe311f0d 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -815,7 +815,7 @@ #define PCI_VENDOR_ID_ANIGMA 0x1051 #define PCI_DEVICE_ID_ANIGMA_MC145575 0x0100 - + #define PCI_VENDOR_ID_EFAR 0x1055 #define PCI_DEVICE_ID_EFAR_SLC90E66_1 0x9130 #define PCI_DEVICE_ID_EFAR_SLC90E66_3 0x9463 @@ -1446,7 +1446,7 @@ #define PCI_VENDOR_ID_ZIATECH 0x1138 #define PCI_DEVICE_ID_ZIATECH_5550_HC 0x5550 - + #define PCI_VENDOR_ID_SYSKONNECT 0x1148 #define PCI_DEVICE_ID_SYSKONNECT_TR 0x4200 @@ -1600,8 +1600,8 @@ #define PCI_DEVICE_ID_RP8OCTA 0x0005 #define PCI_DEVICE_ID_RP8J 0x0006 #define PCI_DEVICE_ID_RP4J 0x0007 -#define PCI_DEVICE_ID_RP8SNI 0x0008 -#define PCI_DEVICE_ID_RP16SNI 0x0009 +#define PCI_DEVICE_ID_RP8SNI 0x0008 +#define PCI_DEVICE_ID_RP16SNI 0x0009 #define PCI_DEVICE_ID_RPP4 0x000A #define PCI_DEVICE_ID_RPP8 0x000B #define PCI_DEVICE_ID_RP4M 0x000D @@ -1611,9 +1611,9 @@ #define PCI_DEVICE_ID_URP8INTF 0x0802 #define PCI_DEVICE_ID_URP16INTF 0x0803 #define PCI_DEVICE_ID_URP8OCTA 0x0805 -#define PCI_DEVICE_ID_UPCI_RM3_8PORT 0x080C +#define PCI_DEVICE_ID_UPCI_RM3_8PORT 0x080C #define PCI_DEVICE_ID_UPCI_RM3_4PORT 0x080D -#define PCI_DEVICE_ID_CRP16INTF 0x0903 +#define PCI_DEVICE_ID_CRP16INTF 0x0903 #define PCI_VENDOR_ID_CYCLADES 0x120e #define PCI_DEVICE_ID_CYCLOM_Y_Lo 0x0100 @@ -2139,7 +2139,7 @@ #define PCI_DEVICE_ID_RASTEL_2PORT 0x2000 #define PCI_VENDOR_ID_ZOLTRIX 0x15b0 -#define PCI_DEVICE_ID_ZOLTRIX_2BD0 0x2bd0 +#define PCI_DEVICE_ID_ZOLTRIX_2BD0 0x2bd0 #define PCI_VENDOR_ID_MELLANOX 0x15b3 #define PCI_DEVICE_ID_MELLANOX_TAVOR 0x5a44 @@ -2413,7 +2413,7 @@ #define PCI_DEVICE_ID_INTEL_82815_MC 0x1130 #define PCI_DEVICE_ID_INTEL_82815_CGC 0x1132 #define PCI_DEVICE_ID_INTEL_82092AA_0 0x1221 -#define PCI_DEVICE_ID_INTEL_7505_0 0x2550 +#define PCI_DEVICE_ID_INTEL_7505_0 0x2550 #define PCI_DEVICE_ID_INTEL_7205_0 0x255d #define PCI_DEVICE_ID_INTEL_82437 0x122d #define PCI_DEVICE_ID_INTEL_82371FB_0 0x122e @@ -2616,6 +2616,9 @@ #define PCI_DEVICE_ID_INTEL_MCH_PC 0x3599 #define PCI_DEVICE_ID_INTEL_MCH_PC1 0x359a #define PCI_DEVICE_ID_INTEL_E7525_MCH 0x359e +#define PCI_DEVICE_ID_INTEL_I7300_MCH_ERR 0x360c +#define PCI_DEVICE_ID_INTEL_I7300_MCH_FB0 0x360f +#define PCI_DEVICE_ID_INTEL_I7300_MCH_FB1 0x3610 #define PCI_DEVICE_ID_INTEL_IOAT_CNB 0x360b #define PCI_DEVICE_ID_INTEL_FBD_CNB 0x360c #define PCI_DEVICE_ID_INTEL_IOAT_JSF0 0x3710 -- cgit v1.2.3-18-g5258 From dca43c75e7e545694a9dd6288553f55c53e2a3a3 Mon Sep 17 00:00:00 2001 From: Jerry Chu Date: Fri, 27 Aug 2010 19:13:28 +0000 Subject: tcp: Add TCP_USER_TIMEOUT socket option. This patch provides a "user timeout" support as described in RFC793. The socket option is also needed for the the local half of RFC5482 "TCP User Timeout Option". TCP_USER_TIMEOUT is a TCP level socket option that takes an unsigned int, when > 0, to specify the maximum amount of time in ms that transmitted data may remain unacknowledged before TCP will forcefully close the corresponding connection and return ETIMEDOUT to the application. If 0 is given, TCP will continue to use the system default. Increasing the user timeouts allows a TCP connection to survive extended periods without end-to-end connectivity. Decreasing the user timeouts allows applications to "fail fast" if so desired. Otherwise it may take upto 20 minutes with the current system defaults in a normal WAN environment. The socket option can be made during any state of a TCP connection, but is only effective during the synchronized states of a connection (ESTABLISHED, FIN-WAIT-1, FIN-WAIT-2, CLOSE-WAIT, CLOSING, or LAST-ACK). Moreover, when used with the TCP keepalive (SO_KEEPALIVE) option, TCP_USER_TIMEOUT will overtake keepalive to determine when to close a connection due to keepalive failure. The option does not change in anyway when TCP retransmits a packet, nor when a keepalive probe will be sent. This option, like many others, will be inherited by an acceptor from its listener. Signed-off-by: H.K. Jerry Chu Signed-off-by: David S. Miller --- include/linux/tcp.h | 1 + include/net/inet_connection_sock.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index a778ee02459..e64f4c67d0e 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -105,6 +105,7 @@ enum { #define TCP_COOKIE_TRANSACTIONS 15 /* TCP Cookie Transactions */ #define TCP_THIN_LINEAR_TIMEOUTS 16 /* Use linear timeouts for thin streams*/ #define TCP_THIN_DUPACK 17 /* Fast retrans. after 1 dupack */ +#define TCP_USER_TIMEOUT 18 /* How long for loss retry before timeout */ /* for TCP_INFO socket option */ #define TCPI_OPT_TIMESTAMPS 1 diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index b6d3b55da19..e4f494b42e0 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -125,6 +125,7 @@ struct inet_connection_sock { int probe_size; } icsk_mtup; u32 icsk_ca_priv[16]; + u32 icsk_user_timeout; #define ICSK_CA_PRIV_SIZE (16 * sizeof(u32)) }; -- cgit v1.2.3-18-g5258 From 22b71c8f4f3db8df92f5e7b081c265bc56c0bd2f Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Sun, 29 Aug 2010 19:23:12 +0000 Subject: tcp/dccp: Consolidate common code for RFC 3390 conversion This patch consolidates initial-window code common to TCP and CCID-2: * TCP uses RFC 3390 in a packet-oriented manner (tcp_input.c) and * CCID-2 uses RFC 3390 in packet-oriented manner (RFC 4341). Signed-off-by: Gerrit Renker Signed-off-by: David S. Miller --- include/net/tcp.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index df6a2eb2019..a64022199b6 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -779,6 +779,21 @@ static inline __u32 tcp_current_ssthresh(const struct sock *sk) /* Use define here intentionally to get WARN_ON location shown at the caller */ #define tcp_verify_left_out(tp) WARN_ON(tcp_left_out(tp) > tp->packets_out) +/* + * Convert RFC 3390 larger initial window into an equivalent number of packets. + * + * John Heffner states: + * + * The RFC specifies a window of no more than 4380 bytes + * unless 2*MSS > 4380. Reading the pseudocode in the RFC + * is a bit misleading because they use a clamp at 4380 bytes + * rather than a multiplier in the relevant range. + */ +static inline u32 rfc3390_bytes_to_packets(const u32 smss) +{ + return smss <= 1095 ? 4 : (smss > 1460 ? 2 : 3); +} + extern void tcp_enter_cwr(struct sock *sk, const int set_ssthresh); extern __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst); -- cgit v1.2.3-18-g5258 From 3d5b99ae82f8742e3bb1f8634fd11ac36ea19ee1 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Sun, 29 Aug 2010 19:27:34 +0000 Subject: TCP: update initial windows according to RFC 5681 This updates the use of larger initial windows, as originally specified in RFC 3390, to use the newer IW values specified in RFC 5681, section 3.1. The changes made in RFC 5681 are: a) the setting now is more clearly specified in units of segments (as the comments by John Heffner emphasized, this was not very clear in RFC 3390); b) for connections with 1095 < SMSS <= 2190 there is now a change: - RFC 3390 says that IW <= 4380, - RFC 5681 says that IW = 3 * SMSS <= 6570. Since RFC 3390 is older and "only" proposed standard, whereas the newer RFC 5681 is already draft standard, it seems preferable to use the newer IW variant. Signed-off-by: Gerrit Renker Signed-off-by: David S. Miller --- include/net/tcp.h | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index a64022199b6..11dbacc886c 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -781,17 +781,11 @@ static inline __u32 tcp_current_ssthresh(const struct sock *sk) /* * Convert RFC 3390 larger initial window into an equivalent number of packets. - * - * John Heffner states: - * - * The RFC specifies a window of no more than 4380 bytes - * unless 2*MSS > 4380. Reading the pseudocode in the RFC - * is a bit misleading because they use a clamp at 4380 bytes - * rather than a multiplier in the relevant range. + * This is based on the numbers specified in RFC 5681, 3.1. */ static inline u32 rfc3390_bytes_to_packets(const u32 smss) { - return smss <= 1095 ? 4 : (smss > 1460 ? 2 : 3); + return smss <= 1095 ? 4 : (smss > 2190 ? 2 : 3); } extern void tcp_enter_cwr(struct sock *sk, const int set_ssthresh); -- cgit v1.2.3-18-g5258 From 4dc89133f49b8cfd77ba7e83f5960aed63aaa99e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 31 Aug 2010 07:40:16 +0000 Subject: net: add a comment on netdev->last_rx As some driver authors seem to reintroduce dev->last_rx use, add a comment to strongly discourage this. Since commit 6cf3f41e6c0 (bonding, net: Move last_rx update into bonding recv logic), network drivers dont need to update last_rx themselves, unless they use this field to implement a timeout. Not updating last_rx helps not dirtying a cache line, improving performance in SMP. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 0cf9448a32c..c82220a9f3d 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -953,7 +953,14 @@ struct net_device { /* * Cache line mostly used on receive path (including eth_type_trans()) */ - unsigned long last_rx; /* Time of last Rx */ + unsigned long last_rx; /* Time of last Rx + * This should not be set in + * drivers, unless really needed, + * because network stack (bonding) + * use it if/when necessary, to + * avoid dirtying this cache line. + */ + /* Interface address info used in eth_type_trans() */ unsigned char *dev_addr; /* hw address, (before bcast because most packets are -- cgit v1.2.3-18-g5258 From 86cac58b71227cc34a3d0e78f19585c0eff49ea3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 31 Aug 2010 18:25:32 +0000 Subject: skge: add GRO support - napi_gro_flush() is exported from net/core/dev.c, to avoid an irq_save/irq_restore in the packet receive path. - use napi_gro_receive() instead of netif_receive_skb() - use napi_gro_flush() before calling __napi_complete() - turn on NETIF_F_GRO by default - Tested on a Marvell 88E8001 Gigabit NIC Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index c82220a9f3d..af05186d5b3 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1702,6 +1702,7 @@ extern gro_result_t dev_gro_receive(struct napi_struct *napi, extern gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb); extern gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb); +extern void napi_gro_flush(struct napi_struct *napi); extern void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb); extern struct sk_buff * napi_get_frags(struct napi_struct *napi); -- cgit v1.2.3-18-g5258 From c68839963426d42bdb2c915b435f9860d060e645 Mon Sep 17 00:00:00 2001 From: Peter Meerwald Date: Thu, 2 Sep 2010 04:06:24 +0000 Subject: net: Improve comments in include/linux/phy.h Correct state range of PHY bus addresses (i.e. 0-31) in comment, make spelling of PHY consistent in comments. Signed-off-by: Peter Meerwald Signed-off-by: David S. Miller --- include/linux/phy.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index 6b0a782c622..a6e047a04f7 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -116,7 +116,7 @@ struct mii_bus { /* list of all PHYs on bus */ struct phy_device *phy_map[PHY_MAX_ADDR]; - /* Phy addresses to be ignored when probing */ + /* PHY addresses to be ignored when probing */ u32 phy_mask; /* @@ -283,7 +283,7 @@ struct phy_device { phy_interface_t interface; - /* Bus address of the PHY (0-32) */ + /* Bus address of the PHY (0-31) */ int addr; /* -- cgit v1.2.3-18-g5258 From bc8acf2c8c3e43fcc192762a9f964b3e9a17748b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 2 Sep 2010 13:07:41 -0700 Subject: drivers/net: avoid some skb->ip_summed initializations fresh skbs have ip_summed set to CHECKSUM_NONE (0) We can avoid setting again skb->ip_summed to CHECKSUM_NONE in drivers. Introduce skb_checksum_none_assert() helper so that we keep this assertion documented in driver sources. Change most occurrences of : skb->ip_summed = CHECKSUM_NONE; by : skb_checksum_none_assert(skb); Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/skbuff.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index f900ffcd847..9e8085a8958 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2206,6 +2206,21 @@ static inline void skb_forward_csum(struct sk_buff *skb) skb->ip_summed = CHECKSUM_NONE; } +/** + * skb_checksum_none_assert - make sure skb ip_summed is CHECKSUM_NONE + * @skb: skb to check + * + * fresh skbs have their ip_summed set to CHECKSUM_NONE. + * Instead of forcing ip_summed to CHECKSUM_NONE, we can + * use this helper, to document places where we make this assertion. + */ +static inline void skb_checksum_none_assert(struct sk_buff *skb) +{ +#ifdef DEBUG + BUG_ON(skb->ip_summed != CHECKSUM_NONE); +#endif +} + bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off); #endif /* __KERNEL__ */ #endif /* _LINUX_SKBUFF_H */ -- cgit v1.2.3-18-g5258 From 57a2ce5f54f3120467be760662c6ef3bea3f9579 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 3 Sep 2010 19:09:46 +0800 Subject: padata: add missing __percpu markup in include/linux/padata.h parallel_data->queue is a percpu pointer but was missing __percpu markup. Add it. Signed-off-by: Namhyung Kim Acked-by: Steffen Klassert Signed-off-by: Herbert Xu --- include/linux/padata.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/padata.h b/include/linux/padata.h index bdcd1e9eace..4633b2f726b 100644 --- a/include/linux/padata.h +++ b/include/linux/padata.h @@ -127,8 +127,8 @@ struct padata_cpumask { */ struct parallel_data { struct padata_instance *pinst; - struct padata_parallel_queue *pqueue; - struct padata_serial_queue *squeue; + struct padata_parallel_queue __percpu *pqueue; + struct padata_serial_queue __percpu *squeue; atomic_t seq_nr; atomic_t reorder_objects; atomic_t refcnt; -- cgit v1.2.3-18-g5258 From b8ef3204f460912a46659cdc74d237adbe705053 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Fri, 6 Aug 2010 03:02:37 -0500 Subject: [SCSI] fc class: add fc host default default dev loss setting This patch adds a fc_host setting to store the default dev_loss_tmo. It is used if the driver has a callack to get the value from the LLD. If the callback is not set, then we use the fc class module default value. Signed-off-by: Mike Christie Signed-off-by: James Bottomley --- include/scsi/scsi_transport_fc.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/scsi/scsi_transport_fc.h b/include/scsi/scsi_transport_fc.h index 87d81b3ce56..9f98fca9b76 100644 --- a/include/scsi/scsi_transport_fc.h +++ b/include/scsi/scsi_transport_fc.h @@ -496,6 +496,7 @@ struct fc_host_attrs { u64 fabric_name; char symbolic_name[FC_SYMBOLIC_NAME_SIZE]; char system_hostname[FC_SYMBOLIC_NAME_SIZE]; + u32 def_dev_loss_tmo; /* Private (Transport-managed) Attributes */ enum fc_tgtid_binding_type tgtid_bind_type; @@ -580,6 +581,8 @@ struct fc_host_attrs { (((struct fc_host_attrs *)(x)->shost_data)->devloss_work_q_name) #define fc_host_devloss_work_q(x) \ (((struct fc_host_attrs *)(x)->shost_data)->devloss_work_q) +#define fc_host_def_dev_loss_tmo(x) \ + (((struct fc_host_attrs *)(x)->shost_data)->def_dev_loss_tmo) struct fc_bsg_buffer { @@ -640,6 +643,7 @@ struct fc_function_template { void (*get_host_fabric_name)(struct Scsi_Host *); void (*get_host_symbolic_name)(struct Scsi_Host *); void (*set_host_system_hostname)(struct Scsi_Host *); + void (*get_host_def_dev_loss_tmo)(struct Scsi_Host *); struct fc_host_statistics * (*get_fc_host_stats)(struct Scsi_Host *); void (*reset_fc_host_stats)(struct Scsi_Host *); -- cgit v1.2.3-18-g5258 From 144c0f8833d0458e4369a27a53aea8856c665c41 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Fri, 3 Sep 2010 10:31:05 -0700 Subject: Input: fix a few typos Signed-off-by: Dmitry Torokhov --- include/linux/input.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/input.h b/include/linux/input.h index 896a92227bc..78926512353 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -67,7 +67,7 @@ struct input_absinfo { #define EVIOCGPHYS(len) _IOC(_IOC_READ, 'E', 0x07, len) /* get physical location */ #define EVIOCGUNIQ(len) _IOC(_IOC_READ, 'E', 0x08, len) /* get unique identifier */ -#define EVIOCGKEY(len) _IOC(_IOC_READ, 'E', 0x18, len) /* get global keystate */ +#define EVIOCGKEY(len) _IOC(_IOC_READ, 'E', 0x18, len) /* get global key state */ #define EVIOCGLED(len) _IOC(_IOC_READ, 'E', 0x19, len) /* get all LEDs */ #define EVIOCGSND(len) _IOC(_IOC_READ, 'E', 0x1a, len) /* get all sounds status */ #define EVIOCGSW(len) _IOC(_IOC_READ, 'E', 0x1b, len) /* get all switch states */ -- cgit v1.2.3-18-g5258 From fe8e0c25cad28e8858ecfa5863333c70685a6811 Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Mon, 6 Sep 2010 20:53:42 +0200 Subject: x86, 32-bit: Align percpu area and irq stacks to THREAD_SIZE The irq stacks, located in the percpu-area, need to be THREAD_SIZE aligned. Add the infrastucture to align percpu variables to larger-than-pagesize amounts within the percpu area, and use it to specify the alignment for the irq stacks. Also align the percpu area itself to THREAD_SIZE. This should make irq stacks work with 8K THREAD_SIZE. Signed-off-by: Alexander van Heukelum Cc: Tejun Heo Cc: hch@lst.de LKML-Reference: <1283799222.15941.1393621887@webmail.messagingengine.com> Signed-off-by: Ingo Molnar --- include/linux/percpu-defs.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h index ce2dc655cd1..ab20d119a85 100644 --- a/include/linux/percpu-defs.h +++ b/include/linux/percpu-defs.h @@ -138,6 +138,18 @@ DEFINE_PER_CPU_SECTION(type, name, "..page_aligned") \ __aligned(PAGE_SIZE) +/* + * Declaration/definition used for large per-CPU variables that must be + * aligned to something larger than the pagesize. + */ +#define DECLARE_PER_CPU_MULTIPAGE_ALIGNED(type, name, size) \ + DECLARE_PER_CPU_SECTION(type, name, "..page_aligned") \ + __aligned(size) + +#define DEFINE_PER_CPU_MULTIPAGE_ALIGNED(type, name, size) \ + DEFINE_PER_CPU_SECTION(type, name, "..page_aligned") \ + __aligned(size) + /* * Intermodule exports for per-CPU variables. sparse forgets about * address space across EXPORT_SYMBOL(), change EXPORT_SYMBOL() to -- cgit v1.2.3-18-g5258 From 2bf2160d8805de64308e2e7c3cd97813cb58ed2f Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Mon, 23 Aug 2010 18:42:48 +0900 Subject: irq: Add tracepoint to softirq_raise Add a tracepoint for tracing when softirq action is raised. This and the existing tracepoints complete softirq's tracepoints: softirq_raise, softirq_entry and softirq_exit. And when this tracepoint is used in combination with the softirq_entry tracepoint we can determine the softirq raise latency. Signed-off-by: Lai Jiangshan Acked-by: Mathieu Desnoyers Acked-by: Neil Horman Cc: David Miller Cc: Kaneshige Kenji Cc: Izumo Taku Cc: Kosaki Motohiro Cc: Lai Jiangshan Cc: Scott Mcmillan Cc: Steven Rostedt Cc: Eric Dumazet LKML-Reference: <4C724298.4050509@jp.fujitsu.com> [ factorize softirq events with DECLARE_EVENT_CLASS ] Signed-off-by: Koki Sanagi Signed-off-by: Frederic Weisbecker --- include/linux/interrupt.h | 8 +++++++- include/trace/events/irq.h | 26 ++++++++++++++++++++++++-- 2 files changed, 31 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index a0384a4d1e6..531495db170 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -18,6 +18,7 @@ #include #include #include +#include /* * These correspond to the IORESOURCE_IRQ_* defines in @@ -407,7 +408,12 @@ asmlinkage void do_softirq(void); asmlinkage void __do_softirq(void); extern void open_softirq(int nr, void (*action)(struct softirq_action *)); extern void softirq_init(void); -#define __raise_softirq_irqoff(nr) do { or_softirq_pending(1UL << (nr)); } while (0) +static inline void __raise_softirq_irqoff(unsigned int nr) +{ + trace_softirq_raise((struct softirq_action *)(unsigned long)nr, NULL); + or_softirq_pending(1UL << nr); +} + extern void raise_softirq_irqoff(unsigned int nr); extern void raise_softirq(unsigned int nr); extern void wakeup_softirqd(void); diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h index 0e4cfb694fe..6fa7cbab7d9 100644 --- a/include/trace/events/irq.h +++ b/include/trace/events/irq.h @@ -5,7 +5,9 @@ #define _TRACE_IRQ_H #include -#include + +struct irqaction; +struct softirq_action; #define softirq_name(sirq) { sirq##_SOFTIRQ, #sirq } #define show_softirq_name(val) \ @@ -93,7 +95,10 @@ DECLARE_EVENT_CLASS(softirq, ), TP_fast_assign( - __entry->vec = (int)(h - vec); + if (vec) + __entry->vec = (int)(h - vec); + else + __entry->vec = (int)(long)h; ), TP_printk("vec=%d [action=%s]", __entry->vec, @@ -136,6 +141,23 @@ DEFINE_EVENT(softirq, softirq_exit, TP_ARGS(h, vec) ); +/** + * softirq_raise - called immediately when a softirq is raised + * @h: pointer to struct softirq_action + * @vec: pointer to first struct softirq_action in softirq_vec array + * + * The @h parameter contains a pointer to the softirq vector number which is + * raised. @vec is NULL and it means @h includes vector number not + * softirq_action. When used in combination with the softirq_entry tracepoint + * we can determine the softirq raise latency. + */ +DEFINE_EVENT(softirq, softirq_raise, + + TP_PROTO(struct softirq_action *h, struct softirq_action *vec), + + TP_ARGS(h, vec) +); + #endif /* _TRACE_IRQ_H */ /* This part must be outside protection */ -- cgit v1.2.3-18-g5258 From 3e4b10d7a4d2a78af64f8096dc7cdb3bebd65adb Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Mon, 23 Aug 2010 18:43:51 +0900 Subject: napi: Convert trace_napi_poll to TRACE_EVENT This patch converts trace_napi_poll from DECLARE_EVENT to TRACE_EVENT to improve the usability of napi_poll tracepoint. -0 [001] 241302.750777: napi_poll: napi poll on napi struct f6acc480 for device eth3 -0 [000] 241302.852389: napi_poll: napi poll on napi struct f5d0d70c for device eth1 The original patch is below: http://marc.info/?l=linux-kernel&m=126021713809450&w=2 [ sanagi.koki@jp.fujitsu.com: And add a fix by Steven Rostedt: http://marc.info/?l=linux-kernel&m=126150506519173&w=2 ] Signed-off-by: Neil Horman Acked-by: David S. Miller Acked-by: Neil Horman Cc: Mathieu Desnoyers Cc: Kaneshige Kenji Cc: Izumo Taku Cc: Kosaki Motohiro Cc: Lai Jiangshan Cc: Scott Mcmillan Cc: Steven Rostedt Cc: Eric Dumazet LKML-Reference: <4C7242D7.4050009@jp.fujitsu.com> Signed-off-by: Koki Sanagi Signed-off-by: Frederic Weisbecker --- include/trace/events/napi.h | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/trace/events/napi.h b/include/trace/events/napi.h index 188deca2f3c..8fe1e93f531 100644 --- a/include/trace/events/napi.h +++ b/include/trace/events/napi.h @@ -6,10 +6,31 @@ #include #include +#include + +#define NO_DEV "(no_device)" + +TRACE_EVENT(napi_poll, -DECLARE_TRACE(napi_poll, TP_PROTO(struct napi_struct *napi), - TP_ARGS(napi)); + + TP_ARGS(napi), + + TP_STRUCT__entry( + __field( struct napi_struct *, napi) + __string( dev_name, napi->dev ? napi->dev->name : NO_DEV) + ), + + TP_fast_assign( + __entry->napi = napi; + __assign_str(dev_name, napi->dev ? napi->dev->name : NO_DEV); + ), + + TP_printk("napi poll on napi struct %p for device %s", + __entry->napi, __get_str(dev_name)) +); + +#undef NO_DEV #endif /* _TRACE_NAPI_H_ */ -- cgit v1.2.3-18-g5258 From cf66ba58b5cb8b1526e9dd2fb96ff8db048d4d44 Mon Sep 17 00:00:00 2001 From: Koki Sanagi Date: Mon, 23 Aug 2010 18:45:02 +0900 Subject: netdev: Add tracepoints to netdev layer This patch adds tracepoint to dev_queue_xmit, dev_hard_start_xmit, netif_rx and netif_receive_skb. These tracepoints help you to monitor network driver's input/output. -0 [001] 112447.902030: netif_rx: dev=eth1 skbaddr=f3ef0900 len=84 -0 [001] 112447.902039: netif_receive_skb: dev=eth1 skbaddr=f3ef0900 len=84 sshd-6828 [000] 112447.903257: net_dev_queue: dev=eth4 skbaddr=f3fca538 len=226 sshd-6828 [000] 112447.903260: net_dev_xmit: dev=eth4 skbaddr=f3fca538 len=226 rc=0 Signed-off-by: Koki Sanagi Acked-by: David S. Miller Acked-by: Neil Horman Cc: Mathieu Desnoyers Cc: Kaneshige Kenji Cc: Izumo Taku Cc: Kosaki Motohiro Cc: Lai Jiangshan Cc: Scott Mcmillan Cc: Steven Rostedt Cc: Eric Dumazet LKML-Reference: <4C72431E.3000901@jp.fujitsu.com> Signed-off-by: Frederic Weisbecker --- include/trace/events/net.h | 82 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 82 insertions(+) create mode 100644 include/trace/events/net.h (limited to 'include') diff --git a/include/trace/events/net.h b/include/trace/events/net.h new file mode 100644 index 00000000000..5f247f5ffc5 --- /dev/null +++ b/include/trace/events/net.h @@ -0,0 +1,82 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM net + +#if !defined(_TRACE_NET_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_NET_H + +#include +#include +#include +#include + +TRACE_EVENT(net_dev_xmit, + + TP_PROTO(struct sk_buff *skb, + int rc), + + TP_ARGS(skb, rc), + + TP_STRUCT__entry( + __field( void *, skbaddr ) + __field( unsigned int, len ) + __field( int, rc ) + __string( name, skb->dev->name ) + ), + + TP_fast_assign( + __entry->skbaddr = skb; + __entry->len = skb->len; + __entry->rc = rc; + __assign_str(name, skb->dev->name); + ), + + TP_printk("dev=%s skbaddr=%p len=%u rc=%d", + __get_str(name), __entry->skbaddr, __entry->len, __entry->rc) +); + +DECLARE_EVENT_CLASS(net_dev_template, + + TP_PROTO(struct sk_buff *skb), + + TP_ARGS(skb), + + TP_STRUCT__entry( + __field( void *, skbaddr ) + __field( unsigned int, len ) + __string( name, skb->dev->name ) + ), + + TP_fast_assign( + __entry->skbaddr = skb; + __entry->len = skb->len; + __assign_str(name, skb->dev->name); + ), + + TP_printk("dev=%s skbaddr=%p len=%u", + __get_str(name), __entry->skbaddr, __entry->len) +) + +DEFINE_EVENT(net_dev_template, net_dev_queue, + + TP_PROTO(struct sk_buff *skb), + + TP_ARGS(skb) +); + +DEFINE_EVENT(net_dev_template, netif_receive_skb, + + TP_PROTO(struct sk_buff *skb), + + TP_ARGS(skb) +); + +DEFINE_EVENT(net_dev_template, netif_rx, + + TP_PROTO(struct sk_buff *skb), + + TP_ARGS(skb) +); +#endif /* _TRACE_NET_H */ + +/* This part must be outside protection */ +#include -- cgit v1.2.3-18-g5258 From 07dc22e7295f25526f110d704655ff0ea7687420 Mon Sep 17 00:00:00 2001 From: Koki Sanagi Date: Mon, 23 Aug 2010 18:46:12 +0900 Subject: skb: Add tracepoints to freeing skb This patch adds tracepoint to consume_skb and add trace_kfree_skb before __kfree_skb in skb_free_datagram_locked and net_tx_action. Combinating with tracepoint on dev_hard_start_xmit, we can check how long it takes to free transmitted packets. And using it, we can calculate how many packets driver had at that time. It is useful when a drop of transmitted packet is a problem. sshd-6828 [000] 112689.258154: consume_skb: skbaddr=f2d99bb8 Signed-off-by: Koki Sanagi Acked-by: David S. Miller Acked-by: Neil Horman Cc: Mathieu Desnoyers Cc: Kaneshige Kenji Cc: Izumo Taku Cc: Kosaki Motohiro Cc: Lai Jiangshan Cc: Scott Mcmillan Cc: Steven Rostedt Cc: Eric Dumazet LKML-Reference: <4C724364.50903@jp.fujitsu.com> Signed-off-by: Frederic Weisbecker --- include/trace/events/skb.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include') diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h index 4b2be6dc76f..75ce9d500d8 100644 --- a/include/trace/events/skb.h +++ b/include/trace/events/skb.h @@ -35,6 +35,23 @@ TRACE_EVENT(kfree_skb, __entry->skbaddr, __entry->protocol, __entry->location) ); +TRACE_EVENT(consume_skb, + + TP_PROTO(struct sk_buff *skb), + + TP_ARGS(skb), + + TP_STRUCT__entry( + __field( void *, skbaddr ) + ), + + TP_fast_assign( + __entry->skbaddr = skb; + ), + + TP_printk("skbaddr=%p", __entry->skbaddr) +); + TRACE_EVENT(skb_copy_datagram_iovec, TP_PROTO(const struct sk_buff *skb, int len), -- cgit v1.2.3-18-g5258 From 269cddd44e3588d1c50a7ec055b78de4d6c72cb6 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Tue, 7 Sep 2010 14:17:10 -0500 Subject: dlm: Fix dlm lock status block comment in dlm.h There is only one place in the dlm where the sb_status is set and that is queue_cast(). Tracing back the callers of that function shows that the listed set of return values is out of date, so here are an updated set. Signed-off-by: Steven Whitehouse Signed-off-by: David Teigland --- include/linux/dlm.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/dlm.h b/include/linux/dlm.h index 0b3518c4235..d4e02f5353a 100644 --- a/include/linux/dlm.h +++ b/include/linux/dlm.h @@ -48,10 +48,10 @@ typedef void dlm_lockspace_t; * * 0 if lock request was successful * -EAGAIN if request would block and is flagged DLM_LKF_NOQUEUE - * -ENOMEM if there is no memory to process request - * -EINVAL if there are invalid parameters * -DLM_EUNLOCK if unlock request was successful * -DLM_ECANCEL if a cancel completed successfully + * -EDEADLK if a deadlock was detected + * -ETIMEDOUT if the lock request was canceled due to a timeout */ #define DLM_SBF_DEMOTED 0x01 -- cgit v1.2.3-18-g5258 From 4f8b02b4e5c6896e073bed736136d420bd44b627 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Sep 2010 18:22:47 +0200 Subject: vmalloc: pcpu_get/free_vm_areas() aren't needed on UP These functions are used only by percpu memory allocator on SMP. Don't build them on UP. Signed-off-by: Tejun Heo Cc: Nick Piggin Reviewed-by: Chrsitoph Lameter --- include/linux/vmalloc.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 01c2145118d..63a4fe6d51b 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -117,10 +117,12 @@ extern rwlock_t vmlist_lock; extern struct vm_struct *vmlist; extern __init void vm_area_register_early(struct vm_struct *vm, size_t align); +#ifdef CONFIG_SMP struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets, const size_t *sizes, int nr_vms, size_t align, gfp_t gfp_mask); void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms); +#endif #endif /* _LINUX_VMALLOC_H */ -- cgit v1.2.3-18-g5258 From 6abad5acac09921f4944af77d3860f82d49f528d Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Sep 2010 18:22:47 +0200 Subject: percpu: reduce PCPU_MIN_UNIT_SIZE to 32k In preparation of enabling percpu allocator for UP, reduce PCPU_MIN_UNIT_SIZE to 32k. On UP, the first chunk doesn't have to include static percpu variables and chunk size can be smaller which is important as UP percpu allocator will use contiguous kernel memory to populate chunks. PCPU_MIN_UNIT_SIZE also determines the maximum supported allocation size but 32k should still be enough. Signed-off-by: Tejun Heo Reviewed-by: Christoph Lameter --- include/linux/percpu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 49466b13c5c..fc8130a7cac 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -42,7 +42,7 @@ #ifdef CONFIG_SMP /* minimum unit size, also is the maximum supported allocation size */ -#define PCPU_MIN_UNIT_SIZE PFN_ALIGN(64 << 10) +#define PCPU_MIN_UNIT_SIZE PFN_ALIGN(32 << 10) /* * Percpu allocator can serve percpu allocations before slab is -- cgit v1.2.3-18-g5258 From bbddff0545878a8649c091a9dd7c43ce91516734 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Sep 2010 18:22:48 +0200 Subject: percpu: use percpu allocator on UP too On UP, percpu allocations were redirected to kmalloc. This has the following problems. * For certain amount of allocations (determined by PERCPU_DYNAMIC_EARLY_SLOTS and PERCPU_DYNAMIC_EARLY_SIZE), percpu allocator can be used before the usual kernel memory allocator is brought online. On SMP, this is used to initialize the kernel memory allocator. * percpu allocator honors alignment upto PAGE_SIZE but kmalloc() doesn't. For example, workqueue makes use of larger alignments for cpu_workqueues. Currently, users of percpu allocators need to handle UP differently, which is somewhat fragile and ugly. Other than small amount of memory, there isn't much to lose by enabling percpu allocator on UP. It can simply use kernel memory based chunk allocation which was added for SMP archs w/o MMUs. This patch removes mm/percpu_up.c, builds mm/percpu.c on UP too and makes UP build use percpu-km. As percpu addresses and kernel addresses are always identity mapped and static percpu variables don't need any special treatment, nothing is arch dependent and mm/percpu.c implements generic setup_per_cpu_areas() for UP. Signed-off-by: Tejun Heo Reviewed-by: Christoph Lameter Acked-by: Pekka Enberg --- include/linux/percpu.h | 29 +++++------------------------ 1 file changed, 5 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index fc8130a7cac..aeeeef1093c 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -39,8 +39,6 @@ preempt_enable(); \ } while (0) -#ifdef CONFIG_SMP - /* minimum unit size, also is the maximum supported allocation size */ #define PCPU_MIN_UNIT_SIZE PFN_ALIGN(32 << 10) @@ -137,37 +135,20 @@ extern int __init pcpu_page_first_chunk(size_t reserved_size, * dynamically allocated. Non-atomic access to the current CPU's * version should probably be combined with get_cpu()/put_cpu(). */ +#ifdef CONFIG_SMP #define per_cpu_ptr(ptr, cpu) SHIFT_PERCPU_PTR((ptr), per_cpu_offset((cpu))) +#else +#define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); VERIFY_PERCPU_PTR((ptr)); }) +#endif extern void __percpu *__alloc_reserved_percpu(size_t size, size_t align); extern bool is_kernel_percpu_address(unsigned long addr); -#ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA +#if !defined(CONFIG_SMP) || !defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) extern void __init setup_per_cpu_areas(void); #endif extern void __init percpu_init_late(void); -#else /* CONFIG_SMP */ - -#define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); VERIFY_PERCPU_PTR((ptr)); }) - -/* can't distinguish from other static vars, always false */ -static inline bool is_kernel_percpu_address(unsigned long addr) -{ - return false; -} - -static inline void __init setup_per_cpu_areas(void) { } - -static inline void __init percpu_init_late(void) { } - -static inline void *pcpu_lpage_remapped(void *kaddr) -{ - return NULL; -} - -#endif /* CONFIG_SMP */ - extern void __percpu *__alloc_percpu(size_t size, size_t align); extern void free_percpu(void __percpu *__pdata); extern phys_addr_t per_cpu_ptr_to_phys(void *addr); -- cgit v1.2.3-18-g5258 From e3634169bcc0cce33c815865d62ab378739f7389 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 7 Sep 2010 05:55:38 +0000 Subject: include/net/raw.h: Convert raw_seq_private macro to inline Signed-off-by: Joe Perches Signed-off-by: David S. Miller --- include/net/raw.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/raw.h b/include/net/raw.h index 43c57502659..42ce6fe7a2d 100644 --- a/include/net/raw.h +++ b/include/net/raw.h @@ -45,7 +45,10 @@ struct raw_iter_state { struct raw_hashinfo *h; }; -#define raw_seq_private(seq) ((struct raw_iter_state *)(seq)->private) +static inline struct raw_iter_state *raw_seq_private(struct seq_file *seq) +{ + return seq->private; +} void *raw_seq_start(struct seq_file *seq, loff_t *pos); void *raw_seq_next(struct seq_file *seq, void *v, loff_t *pos); void raw_seq_stop(struct seq_file *seq, void *v); -- cgit v1.2.3-18-g5258 From a6e0fc8514d41dfdd98b1d15cacc432cf040f8af Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 8 Sep 2010 14:15:32 -0700 Subject: net: introduce rcu_dereference_rtnl We use rcu_dereference_check(p, rcu_read_lock_held() || lockdep_rtnl_is_held()) several times in network stack. More usages to come too, so its time to create a helper. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 58d44491880..263690d991a 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -749,6 +749,17 @@ extern int rtnl_is_locked(void); extern int lockdep_rtnl_is_held(void); #endif /* #ifdef CONFIG_PROVE_LOCKING */ +/** + * rcu_dereference_rtnl - rcu_dereference with debug checking + * @p: The pointer to read, prior to dereferencing + * + * Do an rcu_dereference(p), but check caller either holds rcu_read_lock() + * or RTNL + */ +#define rcu_dereference_rtnl(p) \ + rcu_dereference_check(p, rcu_read_lock_held() || \ + lockdep_rtnl_is_held()) + extern void rtnetlink_init(void); extern void __rtnl_unlock(void); -- cgit v1.2.3-18-g5258 From 15133f6e67d8d646d0744336b4daa3135452cb0d Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Tue, 12 Jan 2010 14:33:38 -0800 Subject: RDS: Implement atomic operations Implement a CMSG-based interface to do FADD and CSWP ops. Alter send routines to handle atomic ops. Add atomic counters to stats. Add xmit_atomic() to struct rds_transport Inline rds_ib_send_unmap_rdma into unmap_rm Signed-off-by: Andy Grover --- include/linux/rds.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include') diff --git a/include/linux/rds.h b/include/linux/rds.h index 7f3971d9fc5..9239152abf7 100644 --- a/include/linux/rds.h +++ b/include/linux/rds.h @@ -73,6 +73,8 @@ #define RDS_CMSG_RDMA_MAP 3 #define RDS_CMSG_RDMA_STATUS 4 #define RDS_CMSG_CONG_UPDATE 5 +#define RDS_CMSG_ATOMIC_FADD 6 +#define RDS_CMSG_ATOMIC_CSWP 7 #define RDS_INFO_FIRST 10000 #define RDS_INFO_COUNTERS 10000 @@ -237,6 +239,23 @@ struct rds_rdma_args { u_int64_t user_token; }; +struct rds_atomic_args { + rds_rdma_cookie_t cookie; + uint64_t local_addr; + uint64_t remote_addr; + union { + struct { + uint64_t compare; + uint64_t swap; + } cswp; + struct { + uint64_t add; + } fadd; + }; + uint64_t flags; + uint64_t user_token; +}; + struct rds_rdma_notify { u_int64_t user_token; int32_t status; -- cgit v1.2.3-18-g5258 From 2c3a5f9abb1dc5efdab8ba9a568b1661c65fd1e3 Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Mon, 1 Mar 2010 16:10:40 -0800 Subject: RDS: Add flag for silent ops. Do atomic op before RDMA Add a flag to the API so users can indicate they want silent operations. This is needed because silent ops cannot be used with USE_ONCE MRs, so we can't just assume silent. Also, change send_xmit to do atomic op before rdma op if both are present, and centralize the hairy logic to determine if we want to attempt silent, or not. Signed-off-by: Andy Grover --- include/linux/rds.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/rds.h b/include/linux/rds.h index 9239152abf7..109f1d34331 100644 --- a/include/linux/rds.h +++ b/include/linux/rds.h @@ -276,5 +276,6 @@ struct rds_rdma_notify { #define RDS_RDMA_USE_ONCE 0x0008 /* free MR after use */ #define RDS_RDMA_DONTWAIT 0x0010 /* Don't wait in SET_BARRIER */ #define RDS_RDMA_NOTIFY_ME 0x0020 /* Notify when operation completes */ +#define RDS_RDMA_SILENT 0x0040 /* Do not interrupt remote */ #endif /* IB_RDS_H */ -- cgit v1.2.3-18-g5258 From 20c72bd5f5f902e5a8745d51573699605bf8d21c Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Wed, 25 Aug 2010 05:51:28 -0700 Subject: RDS: Implement masked atomic operations Add two CMSGs for masked versions of cswp and fadd. args struct modified to use a union for different atomic op type's arguments. Change IB to do masked atomic ops. Atomic op type in rds_message similarly unionized. Signed-off-by: Andy Grover --- include/linux/rds.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/linux/rds.h b/include/linux/rds.h index 109f1d34331..a2a5edb4a27 100644 --- a/include/linux/rds.h +++ b/include/linux/rds.h @@ -75,6 +75,8 @@ #define RDS_CMSG_CONG_UPDATE 5 #define RDS_CMSG_ATOMIC_FADD 6 #define RDS_CMSG_ATOMIC_CSWP 7 +#define RDS_CMSG_MASKED_ATOMIC_FADD 8 +#define RDS_CMSG_MASKED_ATOMIC_CSWP 9 #define RDS_INFO_FIRST 10000 #define RDS_INFO_COUNTERS 10000 @@ -251,6 +253,16 @@ struct rds_atomic_args { struct { uint64_t add; } fadd; + struct { + uint64_t compare; + uint64_t swap; + uint64_t compare_mask; + uint64_t swap_mask; + } m_cswp; + struct { + uint64_t add; + uint64_t nocarry_mask; + } m_fadd; }; uint64_t flags; uint64_t user_token; -- cgit v1.2.3-18-g5258 From fd128dfa50cfc4f2959dc4aa5d7468d33b988332 Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Wed, 25 Aug 2010 09:32:17 -0700 Subject: RDS: Add rds.h to exported headers list Also, a number of changes were made based on the assumption that rds.h wasn't exported, so roll these back. Signed-off-by: Andy Grover --- include/linux/Kbuild | 1 + include/linux/rds.h | 12 ++++++------ 2 files changed, 7 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/Kbuild b/include/linux/Kbuild index 626b629429f..c7fbf298ad6 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -302,6 +302,7 @@ header-y += quota.h header-y += radeonfb.h header-y += random.h header-y += raw.h +header-y += rds.h header-y += reboot.h header-y += reiserfs_fs.h header-y += reiserfs_xattr.h diff --git a/include/linux/rds.h b/include/linux/rds.h index a2a5edb4a27..f371f885a35 100644 --- a/include/linux/rds.h +++ b/include/linux/rds.h @@ -95,7 +95,7 @@ struct rds_info_counter { u_int8_t name[32]; u_int64_t value; -} __packed; +} __attribute__((packed)); #define RDS_INFO_CONNECTION_FLAG_SENDING 0x01 #define RDS_INFO_CONNECTION_FLAG_CONNECTING 0x02 @@ -110,7 +110,7 @@ struct rds_info_connection { __be32 faddr; u_int8_t transport[TRANSNAMSIZ]; /* null term ascii */ u_int8_t flags; -} __packed; +} __attribute__((packed)); struct rds_info_flow { __be32 laddr; @@ -118,7 +118,7 @@ struct rds_info_flow { u_int32_t bytes; __be16 lport; __be16 fport; -} __packed; +} __attribute__((packed)); #define RDS_INFO_MESSAGE_FLAG_ACK 0x01 #define RDS_INFO_MESSAGE_FLAG_FAST_ACK 0x02 @@ -131,7 +131,7 @@ struct rds_info_message { __be16 lport; __be16 fport; u_int8_t flags; -} __packed; +} __attribute__((packed)); struct rds_info_socket { u_int32_t sndbuf; @@ -141,7 +141,7 @@ struct rds_info_socket { __be16 connected_port; u_int32_t rcvbuf; u_int64_t inum; -} __packed; +} __attribute__((packed)); struct rds_info_tcp_socket { __be32 local_addr; @@ -153,7 +153,7 @@ struct rds_info_tcp_socket { u_int32_t last_sent_nxt; u_int32_t last_expected_una; u_int32_t last_seen_una; -} __packed; +} __attribute__((packed)); #define RDS_IB_GID_LEN 16 struct rds_info_rdma_connection { -- cgit v1.2.3-18-g5258 From a46f561b774d90d8616473d56696e7d44fa1c9f1 Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Wed, 25 Aug 2010 09:34:10 -0700 Subject: RDS: rds.h: Replace u_int[size]_t with uint[size]_t Replace e.g. u_int32_t types with the more common uint32_t. Reported-by: Matthew Wilcox Signed-off-by: Andy Grover --- include/linux/rds.h | 58 ++++++++++++++++++++++++++--------------------------- 1 file changed, 29 insertions(+), 29 deletions(-) (limited to 'include') diff --git a/include/linux/rds.h b/include/linux/rds.h index f371f885a35..3576b31b6b7 100644 --- a/include/linux/rds.h +++ b/include/linux/rds.h @@ -93,8 +93,8 @@ #define RDS_INFO_LAST 10010 struct rds_info_counter { - u_int8_t name[32]; - u_int64_t value; + uint8_t name[32]; + uint64_t value; } __attribute__((packed)); #define RDS_INFO_CONNECTION_FLAG_SENDING 0x01 @@ -104,18 +104,18 @@ struct rds_info_counter { #define TRANSNAMSIZ 16 struct rds_info_connection { - u_int64_t next_tx_seq; - u_int64_t next_rx_seq; + uint64_t next_tx_seq; + uint64_t next_rx_seq; __be32 laddr; __be32 faddr; - u_int8_t transport[TRANSNAMSIZ]; /* null term ascii */ - u_int8_t flags; + uint8_t transport[TRANSNAMSIZ]; /* null term ascii */ + uint8_t flags; } __attribute__((packed)); struct rds_info_flow { __be32 laddr; __be32 faddr; - u_int32_t bytes; + uint32_t bytes; __be16 lport; __be16 fport; } __attribute__((packed)); @@ -124,23 +124,23 @@ struct rds_info_flow { #define RDS_INFO_MESSAGE_FLAG_FAST_ACK 0x02 struct rds_info_message { - u_int64_t seq; - u_int32_t len; + uint64_t seq; + uint32_t len; __be32 laddr; __be32 faddr; __be16 lport; __be16 fport; - u_int8_t flags; + uint8_t flags; } __attribute__((packed)); struct rds_info_socket { - u_int32_t sndbuf; + uint32_t sndbuf; __be32 bound_addr; __be32 connected_addr; __be16 bound_port; __be16 connected_port; - u_int32_t rcvbuf; - u_int64_t inum; + uint32_t rcvbuf; + uint64_t inum; } __attribute__((packed)); struct rds_info_tcp_socket { @@ -148,11 +148,11 @@ struct rds_info_tcp_socket { __be16 local_port; __be32 peer_addr; __be16 peer_port; - u_int64_t hdr_rem; - u_int64_t data_rem; - u_int32_t last_sent_nxt; - u_int32_t last_expected_una; - u_int32_t last_seen_una; + uint64_t hdr_rem; + uint64_t data_rem; + uint32_t last_sent_nxt; + uint32_t last_expected_una; + uint32_t last_seen_una; } __attribute__((packed)); #define RDS_IB_GID_LEN 16 @@ -207,38 +207,38 @@ struct rds_info_rdma_connection { * (so that the application does not have to worry about * alignment). */ -typedef u_int64_t rds_rdma_cookie_t; +typedef uint64_t rds_rdma_cookie_t; struct rds_iovec { - u_int64_t addr; - u_int64_t bytes; + uint64_t addr; + uint64_t bytes; }; struct rds_get_mr_args { struct rds_iovec vec; - u_int64_t cookie_addr; + uint64_t cookie_addr; uint64_t flags; }; struct rds_get_mr_for_dest_args { struct sockaddr_storage dest_addr; struct rds_iovec vec; - u_int64_t cookie_addr; + uint64_t cookie_addr; uint64_t flags; }; struct rds_free_mr_args { rds_rdma_cookie_t cookie; - u_int64_t flags; + uint64_t flags; }; struct rds_rdma_args { rds_rdma_cookie_t cookie; struct rds_iovec remote_vec; - u_int64_t local_vec_addr; - u_int64_t nr_local; - u_int64_t flags; - u_int64_t user_token; + uint64_t local_vec_addr; + uint64_t nr_local; + uint64_t flags; + uint64_t user_token; }; struct rds_atomic_args { @@ -269,7 +269,7 @@ struct rds_atomic_args { }; struct rds_rdma_notify { - u_int64_t user_token; + uint64_t user_token; int32_t status; }; -- cgit v1.2.3-18-g5258 From 905d64c89e2a9d71d0606904b7c3908633db6072 Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Wed, 8 Sep 2010 18:03:54 -0700 Subject: RDS: Remove dead struct from rds.h flows are an obsolete date type. Signed-off-by: Andy Grover --- include/linux/rds.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include') diff --git a/include/linux/rds.h b/include/linux/rds.h index 3576b31b6b7..91950950aa5 100644 --- a/include/linux/rds.h +++ b/include/linux/rds.h @@ -112,14 +112,6 @@ struct rds_info_connection { uint8_t flags; } __attribute__((packed)); -struct rds_info_flow { - __be32 laddr; - __be32 faddr; - uint32_t bytes; - __be16 lport; - __be16 fport; -} __attribute__((packed)); - #define RDS_INFO_MESSAGE_FLAG_ACK 0x01 #define RDS_INFO_MESSAGE_FLAG_FAST_ACK 0x02 -- cgit v1.2.3-18-g5258 From 01a08546af311c065f34727787dd0cc8dc0c216f Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 31 Aug 2010 10:28:16 +0200 Subject: sched: Add book scheduling domain On top of the SMT and MC scheduling domains this adds the BOOK scheduling domain. This is useful for NUMA like machines which do not have an interface which tells which piece of memory is attached to which node or where the hardware performs striping. Signed-off-by: Heiko Carstens Signed-off-by: Peter Zijlstra LKML-Reference: <20100831082844.253053798@de.ibm.com> Signed-off-by: Ingo Molnar --- include/linux/sched.h | 1 + include/linux/topology.h | 6 ++++++ 2 files changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 1e2a6db2d7d..b51c53c285b 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -875,6 +875,7 @@ enum sched_domain_level { SD_LV_NONE = 0, SD_LV_SIBLING, SD_LV_MC, + SD_LV_BOOK, SD_LV_CPU, SD_LV_NODE, SD_LV_ALLNODES, diff --git a/include/linux/topology.h b/include/linux/topology.h index 64e084ff5e5..b91a40e847d 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -201,6 +201,12 @@ int arch_update_cpu_topology(void); .balance_interval = 64, \ } +#ifdef CONFIG_SCHED_BOOK +#ifndef SD_BOOK_INIT +#error Please define an appropriate SD_BOOK_INIT in include/asm/topology.h!!! +#endif +#endif /* CONFIG_SCHED_BOOK */ + #ifdef CONFIG_NUMA #ifndef SD_NODE_INIT #error Please define an appropriate SD_NODE_INIT in include/asm/topology.h!!! -- cgit v1.2.3-18-g5258 From 51b0fe39549a04858001922919ab355dee9bdfcf Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 11 Jun 2010 13:35:57 +0200 Subject: perf: Deconstify struct pmu sed -ie 's/const struct pmu\>/struct pmu/g' `git grep -l "const struct pmu\>"` Signed-off-by: Peter Zijlstra Cc: paulus Cc: stephane eranian Cc: Robert Richter Cc: Will Deacon Cc: Paul Mundt Cc: Frederic Weisbecker Cc: Cyrill Gorcunov Cc: Lin Ming Cc: Yanmin Cc: Deng-Cheng Zhu Cc: David Miller Cc: Michael Cree LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 000610c4de7..09d048b5211 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -578,19 +578,19 @@ struct pmu { * Start the transaction, after this ->enable() doesn't need * to do schedulability tests. */ - void (*start_txn) (const struct pmu *pmu); + void (*start_txn) (struct pmu *pmu); /* * If ->start_txn() disabled the ->enable() schedulability test * then ->commit_txn() is required to perform one. On success * the transaction is closed. On error the transaction is kept * open until ->cancel_txn() is called. */ - int (*commit_txn) (const struct pmu *pmu); + int (*commit_txn) (struct pmu *pmu); /* * Will cancel the transaction, assumes ->disable() is called for * each successfull ->enable() during the transaction. */ - void (*cancel_txn) (const struct pmu *pmu); + void (*cancel_txn) (struct pmu *pmu); }; /** @@ -669,7 +669,7 @@ struct perf_event { int nr_siblings; int group_flags; struct perf_event *group_leader; - const struct pmu *pmu; + struct pmu *pmu; enum perf_event_active_state state; unsigned int attach_state; @@ -849,7 +849,7 @@ struct perf_output_handle { */ extern int perf_max_events; -extern const struct pmu *hw_perf_event_init(struct perf_event *event); +extern struct pmu *hw_perf_event_init(struct perf_event *event); extern void perf_event_task_sched_in(struct task_struct *task); extern void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next); -- cgit v1.2.3-18-g5258 From b0a873ebbf87bf38bf70b5e39a7cadc96099fa13 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 11 Jun 2010 13:35:08 +0200 Subject: perf: Register PMU implementations Simple registration interface for struct pmu, this provides the infrastructure for removing all the weak functions. Signed-off-by: Peter Zijlstra Cc: paulus Cc: stephane eranian Cc: Robert Richter Cc: Will Deacon Cc: Paul Mundt Cc: Frederic Weisbecker Cc: Cyrill Gorcunov Cc: Lin Ming Cc: Yanmin Cc: Deng-Cheng Zhu Cc: David Miller Cc: Michael Cree LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 09d048b5211..ab72f56eb37 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -561,6 +561,13 @@ struct perf_event; * struct pmu - generic performance monitoring unit */ struct pmu { + struct list_head entry; + + /* + * Should return -ENOENT when the @event doesn't match this pmu + */ + int (*event_init) (struct perf_event *event); + int (*enable) (struct perf_event *event); void (*disable) (struct perf_event *event); int (*start) (struct perf_event *event); @@ -849,7 +856,8 @@ struct perf_output_handle { */ extern int perf_max_events; -extern struct pmu *hw_perf_event_init(struct perf_event *event); +extern int perf_pmu_register(struct pmu *pmu); +extern void perf_pmu_unregister(struct pmu *pmu); extern void perf_event_task_sched_in(struct task_struct *task); extern void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next); -- cgit v1.2.3-18-g5258 From 24cd7f54a0d47e1d5b3de29e2456bfbd2d8447b7 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 11 Jun 2010 17:32:03 +0200 Subject: perf: Reduce perf_disable() usage Since the current perf_disable() usage is only an optimization, remove it for now. This eases the removal of the __weak hw_perf_enable() interface. Signed-off-by: Peter Zijlstra Cc: paulus Cc: stephane eranian Cc: Robert Richter Cc: Will Deacon Cc: Paul Mundt Cc: Frederic Weisbecker Cc: Cyrill Gorcunov Cc: Lin Ming Cc: Yanmin Cc: Deng-Cheng Zhu Cc: David Miller Cc: Michael Cree LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index ab72f56eb37..243286a8ded 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -564,26 +564,26 @@ struct pmu { struct list_head entry; /* - * Should return -ENOENT when the @event doesn't match this pmu + * Should return -ENOENT when the @event doesn't match this PMU. */ int (*event_init) (struct perf_event *event); - int (*enable) (struct perf_event *event); + int (*enable) (struct perf_event *event); void (*disable) (struct perf_event *event); - int (*start) (struct perf_event *event); + int (*start) (struct perf_event *event); void (*stop) (struct perf_event *event); void (*read) (struct perf_event *event); void (*unthrottle) (struct perf_event *event); /* - * Group events scheduling is treated as a transaction, add group - * events as a whole and perform one schedulability test. If the test - * fails, roll back the whole group + * Group events scheduling is treated as a transaction, add + * group events as a whole and perform one schedulability test. + * If the test fails, roll back the whole group */ /* - * Start the transaction, after this ->enable() doesn't need - * to do schedulability tests. + * Start the transaction, after this ->enable() doesn't need to + * do schedulability tests. */ void (*start_txn) (struct pmu *pmu); /* @@ -594,8 +594,8 @@ struct pmu { */ int (*commit_txn) (struct pmu *pmu); /* - * Will cancel the transaction, assumes ->disable() is called for - * each successfull ->enable() during the transaction. + * Will cancel the transaction, assumes ->disable() is called + * for each successfull ->enable() during the transaction. */ void (*cancel_txn) (struct pmu *pmu); }; -- cgit v1.2.3-18-g5258 From 33696fc0d141bbbcb12f75b69608ea83282e3117 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 14 Jun 2010 08:49:00 +0200 Subject: perf: Per PMU disable Changes perf_disable() into perf_pmu_disable(). Signed-off-by: Peter Zijlstra Cc: paulus Cc: stephane eranian Cc: Robert Richter Cc: Will Deacon Cc: Paul Mundt Cc: Frederic Weisbecker Cc: Cyrill Gorcunov Cc: Lin Ming Cc: Yanmin Cc: Deng-Cheng Zhu Cc: David Miller Cc: Michael Cree LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 243286a8ded..6abf103fb7f 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -563,6 +563,11 @@ struct perf_event; struct pmu { struct list_head entry; + int *pmu_disable_count; + + void (*pmu_enable) (struct pmu *pmu); + void (*pmu_disable) (struct pmu *pmu); + /* * Should return -ENOENT when the @event doesn't match this PMU. */ @@ -868,10 +873,8 @@ extern void perf_event_free_task(struct task_struct *task); extern void set_perf_event_pending(void); extern void perf_event_do_pending(void); extern void perf_event_print_debug(void); -extern void __perf_disable(void); -extern bool __perf_enable(void); -extern void perf_disable(void); -extern void perf_enable(void); +extern void perf_pmu_disable(struct pmu *pmu); +extern void perf_pmu_enable(struct pmu *pmu); extern int perf_event_task_disable(void); extern int perf_event_task_enable(void); extern void perf_event_update_userpage(struct perf_event *event); @@ -1056,8 +1059,6 @@ static inline void perf_event_exit_task(struct task_struct *child) { } static inline void perf_event_free_task(struct task_struct *task) { } static inline void perf_event_do_pending(void) { } static inline void perf_event_print_debug(void) { } -static inline void perf_disable(void) { } -static inline void perf_enable(void) { } static inline int perf_event_task_disable(void) { return -EINVAL; } static inline int perf_event_task_enable(void) { return -EINVAL; } -- cgit v1.2.3-18-g5258 From ad5133b7030d04ce7701aa7cbe98f561347c79c2 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 15 Jun 2010 12:22:39 +0200 Subject: perf: Default PMU ops Provide default implementations for the pmu txn methods, this allows us to remove some conditional code. Signed-off-by: Peter Zijlstra Cc: paulus Cc: stephane eranian Cc: Robert Richter Cc: Will Deacon Cc: Paul Mundt Cc: Frederic Weisbecker Cc: Cyrill Gorcunov Cc: Lin Ming Cc: Yanmin Cc: Deng-Cheng Zhu Cc: David Miller Cc: Michael Cree LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 6abf103fb7f..bf85733597e 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -565,8 +565,8 @@ struct pmu { int *pmu_disable_count; - void (*pmu_enable) (struct pmu *pmu); - void (*pmu_disable) (struct pmu *pmu); + void (*pmu_enable) (struct pmu *pmu); /* optional */ + void (*pmu_disable) (struct pmu *pmu); /* optional */ /* * Should return -ENOENT when the @event doesn't match this PMU. @@ -590,19 +590,19 @@ struct pmu { * Start the transaction, after this ->enable() doesn't need to * do schedulability tests. */ - void (*start_txn) (struct pmu *pmu); + void (*start_txn) (struct pmu *pmu); /* optional */ /* * If ->start_txn() disabled the ->enable() schedulability test * then ->commit_txn() is required to perform one. On success * the transaction is closed. On error the transaction is kept * open until ->cancel_txn() is called. */ - int (*commit_txn) (struct pmu *pmu); + int (*commit_txn) (struct pmu *pmu); /* optional */ /* * Will cancel the transaction, assumes ->disable() is called * for each successfull ->enable() during the transaction. */ - void (*cancel_txn) (struct pmu *pmu); + void (*cancel_txn) (struct pmu *pmu); /* optional */ }; /** -- cgit v1.2.3-18-g5258 From fa407f35e0298d841e4088f95a7f9cf6e725c6d5 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 24 Jun 2010 12:35:12 +0200 Subject: perf: Shrink hw_perf_event Use hw_perf_event::period_left instead of hw_perf_event::remaining and win back 8 bytes. Signed-off-by: Peter Zijlstra Cc: paulus Cc: Frederic Weisbecker LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index bf85733597e..8cafa15af60 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -529,7 +529,6 @@ struct hw_perf_event { int last_cpu; }; struct { /* software */ - s64 remaining; struct hrtimer hrtimer; }; #ifdef CONFIG_HAVE_HW_BREAKPOINT -- cgit v1.2.3-18-g5258 From a4eaf7f14675cb512d69f0c928055e73d0c6d252 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 16 Jun 2010 14:37:10 +0200 Subject: perf: Rework the PMU methods Replace pmu::{enable,disable,start,stop,unthrottle} with pmu::{add,del,start,stop}, all of which take a flags argument. The new interface extends the capability to stop a counter while keeping it scheduled on the PMU. We replace the throttled state with the generic stopped state. This also allows us to efficiently stop/start counters over certain code paths (like IRQ handlers). It also allows scheduling a counter without it starting, allowing for a generic frozen state (useful for rotating stopped counters). The stopped state is implemented in two different ways, depending on how the architecture implemented the throttled state: 1) We disable the counter: a) the pmu has per-counter enable bits, we flip that b) we program a NOP event, preserving the counter state 2) We store the counter state and ignore all read/overflow events Signed-off-by: Peter Zijlstra Cc: paulus Cc: stephane eranian Cc: Robert Richter Cc: Will Deacon Cc: Paul Mundt Cc: Frederic Weisbecker Cc: Cyrill Gorcunov Cc: Lin Ming Cc: Yanmin Cc: Deng-Cheng Zhu Cc: David Miller Cc: Michael Cree LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/ftrace_event.h | 4 ++-- include/linux/perf_event.h | 54 +++++++++++++++++++++++++++++++++----------- 2 files changed, 43 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 5f8ad7bec63..8beabb958f6 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -252,8 +252,8 @@ DECLARE_PER_CPU(struct pt_regs, perf_trace_regs); extern int perf_trace_init(struct perf_event *event); extern void perf_trace_destroy(struct perf_event *event); -extern int perf_trace_enable(struct perf_event *event); -extern void perf_trace_disable(struct perf_event *event); +extern int perf_trace_add(struct perf_event *event, int flags); +extern void perf_trace_del(struct perf_event *event, int flags); extern int ftrace_profile_set_filter(struct perf_event *event, int event_id, char *filter_str); extern void ftrace_profile_free_filter(struct perf_event *event); diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 8cafa15af60..402073c6166 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -538,6 +538,7 @@ struct hw_perf_event { }; #endif }; + int state; local64_t prev_count; u64 sample_period; u64 last_period; @@ -549,6 +550,13 @@ struct hw_perf_event { #endif }; +/* + * hw_perf_event::state flags + */ +#define PERF_HES_STOPPED 0x01 /* the counter is stopped */ +#define PERF_HES_UPTODATE 0x02 /* event->count up-to-date */ +#define PERF_HES_ARCH 0x04 + struct perf_event; /* @@ -564,42 +572,62 @@ struct pmu { int *pmu_disable_count; + /* + * Fully disable/enable this PMU, can be used to protect from the PMI + * as well as for lazy/batch writing of the MSRs. + */ void (*pmu_enable) (struct pmu *pmu); /* optional */ void (*pmu_disable) (struct pmu *pmu); /* optional */ /* + * Try and initialize the event for this PMU. * Should return -ENOENT when the @event doesn't match this PMU. */ int (*event_init) (struct perf_event *event); - int (*enable) (struct perf_event *event); - void (*disable) (struct perf_event *event); - int (*start) (struct perf_event *event); - void (*stop) (struct perf_event *event); +#define PERF_EF_START 0x01 /* start the counter when adding */ +#define PERF_EF_RELOAD 0x02 /* reload the counter when starting */ +#define PERF_EF_UPDATE 0x04 /* update the counter when stopping */ + + /* + * Adds/Removes a counter to/from the PMU, can be done inside + * a transaction, see the ->*_txn() methods. + */ + int (*add) (struct perf_event *event, int flags); + void (*del) (struct perf_event *event, int flags); + + /* + * Starts/Stops a counter present on the PMU. The PMI handler + * should stop the counter when perf_event_overflow() returns + * !0. ->start() will be used to continue. + */ + void (*start) (struct perf_event *event, int flags); + void (*stop) (struct perf_event *event, int flags); + + /* + * Updates the counter value of the event. + */ void (*read) (struct perf_event *event); - void (*unthrottle) (struct perf_event *event); /* * Group events scheduling is treated as a transaction, add * group events as a whole and perform one schedulability test. * If the test fails, roll back the whole group - */ - - /* - * Start the transaction, after this ->enable() doesn't need to + * + * Start the transaction, after this ->add() doesn't need to * do schedulability tests. */ void (*start_txn) (struct pmu *pmu); /* optional */ /* - * If ->start_txn() disabled the ->enable() schedulability test + * If ->start_txn() disabled the ->add() schedulability test * then ->commit_txn() is required to perform one. On success * the transaction is closed. On error the transaction is kept * open until ->cancel_txn() is called. */ int (*commit_txn) (struct pmu *pmu); /* optional */ /* - * Will cancel the transaction, assumes ->disable() is called - * for each successfull ->enable() during the transaction. + * Will cancel the transaction, assumes ->del() is called + * for each successfull ->add() during the transaction. */ void (*cancel_txn) (struct pmu *pmu); /* optional */ }; @@ -680,7 +708,7 @@ struct perf_event { int nr_siblings; int group_flags; struct perf_event *group_leader; - struct pmu *pmu; + struct pmu *pmu; enum perf_event_active_state state; unsigned int attach_state; -- cgit v1.2.3-18-g5258 From 15ac9a395a753cb28c674e7ea80386ffdff21785 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 6 Sep 2010 15:51:45 +0200 Subject: perf: Remove the sysfs bits Neither the overcommit nor the reservation sysfs parameter were actually working, remove them as they'll only get in the way. Signed-off-by: Peter Zijlstra Cc: paulus LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 402073c6166..b22176d3ebd 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -860,7 +860,6 @@ struct perf_cpu_context { struct perf_event_context ctx; struct perf_event_context *task_ctx; int active_oncpu; - int max_pertask; int exclusive; struct swevent_hlist *swevent_hlist; struct mutex hlist_mutex; @@ -883,11 +882,6 @@ struct perf_output_handle { #ifdef CONFIG_PERF_EVENTS -/* - * Set by architecture code: - */ -extern int perf_max_events; - extern int perf_pmu_register(struct pmu *pmu); extern void perf_pmu_unregister(struct pmu *pmu); -- cgit v1.2.3-18-g5258 From b28ab83c595e767f2028276b7398d17f2253cec0 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 6 Sep 2010 14:48:15 +0200 Subject: perf: Remove the swevent hash-table from the cpu context Separate the swevent hash-table from the cpu_context bits in preparation for per pmu cpu contexts. This keeps the swevent hash a global entity. Signed-off-by: Peter Zijlstra Cc: paulus Cc: stephane eranian Cc: Robert Richter Cc: Frederic Weisbecker Cc: Lin Ming Cc: Yanmin LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index b22176d3ebd..4ab4f0ca09a 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -861,12 +861,6 @@ struct perf_cpu_context { struct perf_event_context *task_ctx; int active_oncpu; int exclusive; - struct swevent_hlist *swevent_hlist; - struct mutex hlist_mutex; - int hlist_refcount; - - /* Recursion avoidance in each contexts */ - int recursion[PERF_NR_CONTEXTS]; }; struct perf_output_handle { -- cgit v1.2.3-18-g5258 From b5ab4cd563e7ab49b27957704112a8ecade54e1f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 6 Sep 2010 16:32:21 +0200 Subject: perf: Per cpu-context rotation timer Give each cpu-context its own timer so that it is a self contained entity, this eases the way for per-pmu-per-cpu contexts as well as provides the basic infrastructure to allow different rotation times per pmu. Things to look at: - folding the tick and these TICK_NSEC timers - separate task context rotation Signed-off-by: Peter Zijlstra Cc: paulus Cc: stephane eranian Cc: Robert Richter Cc: Frederic Weisbecker Cc: Lin Ming Cc: Yanmin LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 4ab4f0ca09a..fa04537df55 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -861,6 +861,8 @@ struct perf_cpu_context { struct perf_event_context *task_ctx; int active_oncpu; int exclusive; + u64 timer_interval; + struct hrtimer timer; }; struct perf_output_handle { @@ -881,7 +883,6 @@ extern void perf_pmu_unregister(struct pmu *pmu); extern void perf_event_task_sched_in(struct task_struct *task); extern void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next); -extern void perf_event_task_tick(struct task_struct *task); extern int perf_event_init_task(struct task_struct *child); extern void perf_event_exit_task(struct task_struct *child); extern void perf_event_free_task(struct task_struct *task); @@ -1067,8 +1068,6 @@ perf_event_task_sched_in(struct task_struct *task) { } static inline void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next) { } -static inline void -perf_event_task_tick(struct task_struct *task) { } static inline int perf_event_init_task(struct task_struct *child) { return 0; } static inline void perf_event_exit_task(struct task_struct *child) { } static inline void perf_event_free_task(struct task_struct *task) { } -- cgit v1.2.3-18-g5258 From 108b02cfce04ee90b0a07ee0b104baffd39f5934 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 6 Sep 2010 14:32:03 +0200 Subject: perf: Per-pmu-per-cpu contexts Allocate per-cpu contexts per pmu. Signed-off-by: Peter Zijlstra Cc: paulus Cc: stephane eranian Cc: Robert Richter Cc: Frederic Weisbecker Cc: Lin Ming Cc: Yanmin LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index fa04537df55..22155ef3b36 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -570,7 +570,8 @@ struct perf_event; struct pmu { struct list_head entry; - int *pmu_disable_count; + int * __percpu pmu_disable_count; + struct perf_cpu_context * __percpu pmu_cpu_context; /* * Fully disable/enable this PMU, can be used to protect from the PMI @@ -808,6 +809,7 @@ struct perf_event { * Used as a container for task events and CPU events as well: */ struct perf_event_context { + struct pmu *pmu; /* * Protect the states of the events in the list, * nr_active, and the list: -- cgit v1.2.3-18-g5258 From 8dc85d547285668e509f86c177bcd4ea055bcaaf Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 2 Sep 2010 16:50:03 +0200 Subject: perf: Multiple task contexts Provide the infrastructure for multiple task contexts. A more flexible approach would have resulted in more pointer chases in the scheduling hot-paths. This approach has the limitation of a static number of task contexts. Since I expect most external PMUs to be system wide, or at least node wide (as per the intel uncore unit) they won't actually need a task context. Signed-off-by: Peter Zijlstra Cc: paulus Cc: stephane eranian Cc: Robert Richter Cc: Frederic Weisbecker Cc: Lin Ming Cc: Yanmin LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 1 + include/linux/sched.h | 8 +++++++- 2 files changed, 8 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 22155ef3b36..9ecfd856ce6 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -572,6 +572,7 @@ struct pmu { int * __percpu pmu_disable_count; struct perf_cpu_context * __percpu pmu_cpu_context; + int task_ctx_nr; /* * Fully disable/enable this PMU, can be used to protect from the PMI diff --git a/include/linux/sched.h b/include/linux/sched.h index 1e2a6db2d7d..89d6023c6f8 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1160,6 +1160,12 @@ struct sched_rt_entity { struct rcu_node; +enum perf_event_task_context { + perf_invalid_context = -1, + perf_hw_context = 0, + perf_nr_task_contexts, +}; + struct task_struct { volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ void *stack; @@ -1431,7 +1437,7 @@ struct task_struct { struct futex_pi_state *pi_state_cache; #endif #ifdef CONFIG_PERF_EVENTS - struct perf_event_context *perf_event_ctxp; + struct perf_event_context *perf_event_ctxp[perf_nr_task_contexts]; struct mutex perf_event_mutex; struct list_head perf_event_list; #endif -- cgit v1.2.3-18-g5258 From 89a1e18731959e9953fae15ddc1a983eb15a4f19 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 7 Sep 2010 17:34:50 +0200 Subject: perf: Provide a separate task context for swevents Since software events are always schedulable, mixing them up with hardware events (who are not) can lead to funny scheduling oddities. Giving them their own context solves this. Signed-off-by: Peter Zijlstra Cc: paulus Cc: stephane eranian Cc: Robert Richter Cc: Frederic Weisbecker Cc: Lin Ming Cc: Yanmin LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 9 +-------- include/linux/sched.h | 1 + 2 files changed, 2 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 9ecfd856ce6..c1173520f14 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -952,14 +952,7 @@ extern int perf_event_overflow(struct perf_event *event, int nmi, */ static inline int is_software_event(struct perf_event *event) { - switch (event->attr.type) { - case PERF_TYPE_SOFTWARE: - case PERF_TYPE_TRACEPOINT: - /* for now the breakpoint stuff also works as software event */ - case PERF_TYPE_BREAKPOINT: - return 1; - } - return 0; + return event->pmu->task_ctx_nr == perf_sw_context; } extern atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX]; diff --git a/include/linux/sched.h b/include/linux/sched.h index 89d6023c6f8..eb3c1ceec06 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1163,6 +1163,7 @@ struct rcu_node; enum perf_event_task_context { perf_invalid_context = -1, perf_hw_context = 0, + perf_sw_context, perf_nr_task_contexts, }; -- cgit v1.2.3-18-g5258 From 4e231c7962ce711c7d8c2a4dc23ecd1e8fc28363 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 9 Sep 2010 21:01:59 +0200 Subject: perf: Fix up delayed_put_task_struct() I missed a perf_event_ctxp user when converting it to an array. Pull this last user into perf_event.c as well and fix it up. Signed-off-by: Peter Zijlstra LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index c1173520f14..93bf53aa50e 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -889,6 +889,7 @@ extern void perf_event_task_sched_out(struct task_struct *task, struct task_stru extern int perf_event_init_task(struct task_struct *child); extern void perf_event_exit_task(struct task_struct *child); extern void perf_event_free_task(struct task_struct *task); +extern void perf_event_delayed_put(struct task_struct *task); extern void set_perf_event_pending(void); extern void perf_event_do_pending(void); extern void perf_event_print_debug(void); @@ -1067,6 +1068,7 @@ perf_event_task_sched_out(struct task_struct *task, static inline int perf_event_init_task(struct task_struct *child) { return 0; } static inline void perf_event_exit_task(struct task_struct *child) { } static inline void perf_event_free_task(struct task_struct *task) { } +static inline void perf_event_delayed_put(struct task_struct *task) { } static inline void perf_event_do_pending(void) { } static inline void perf_event_print_debug(void) { } static inline int perf_event_task_disable(void) { return -EINVAL; } -- cgit v1.2.3-18-g5258 From 3b8fad3e2f5f69bfd8e42d099ca8582fb2342edf Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 8 Sep 2010 14:26:00 +0200 Subject: irq: Fix circular headers dependency asm-generic/hardirq.h needs asm/irq.h which might include linux/interrupt.h as in the sparc 32 case. At this point we need irq_cpustat generic definitions, but those are included later in asm-generic/hardirq.h. Then delay a bit the inclusion of irq.h from asm-generic/hardirq.h, it doesn't need to be included early. This fixes: include/linux/interrupt.h: In function '__raise_softirq_irqoff': include/linux/interrupt.h:414: error: implicit declaration of function 'local_softirq_pending' include/linux/interrupt.h:414: error: lvalue required as left operand of assignment Reported-by: Ingo Molnar Signed-off-by: Frederic Weisbecker Cc: Lai Jiangshan Cc: Koki Sanagi Cc: mathieu.desnoyers@efficios.com Cc: rostedt@goodmis.org Cc: nhorman@tuxdriver.com Cc: scott.a.mcmillan@intel.com Cc: eric.dumazet@gmail.com Cc: kaneshige.kenji@jp.fujitsu.com Cc: davem@davemloft.net Cc: izumi.taku@jp.fujitsu.com Cc: kosaki.motohiro@jp.fujitsu.com LKML-Reference: <20100908122557.GA5310@nowhere> Signed-off-by: Ingo Molnar --- include/asm-generic/hardirq.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-generic/hardirq.h b/include/asm-generic/hardirq.h index 62f59080e5c..04d0a977cd4 100644 --- a/include/asm-generic/hardirq.h +++ b/include/asm-generic/hardirq.h @@ -3,13 +3,13 @@ #include #include -#include typedef struct { unsigned int __softirq_pending; } ____cacheline_aligned irq_cpustat_t; #include /* Standard mappings for irq_cpustat_t above */ +#include #ifndef ack_bad_irq static inline void ack_bad_irq(unsigned int irq) -- cgit v1.2.3-18-g5258 From 8613e4c2872a87cc309a42de2c7091744dc54d0e Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Thu, 9 Sep 2010 21:54:22 -0700 Subject: Input: add support for large scancodes Several devices use a high number of bits for scancodes. One important group is the Remote Controllers. Some new protocols like RC-6 define a scancode space of 64 bits. The current EVIO[CS]GKEYCODE ioctls allow replace the scancode/keycode translation tables, but it is limited to up to 32 bits for scancode. Also, if userspace wants to clean the existing table, replacing it by a new one, it needs to run a loop calling the ioctls over the entire sparse scancode space. To solve those problems, this patch extends the ioctls to allow drivers handle scancodes up to 32 bytes long (the length could be extended in the future should such need arise) and allow userspace to query and set scancode to keycode mappings not only by scancode but also by index. Compatibility code were also added to handle the old format of EVIO[CS]GKEYCODE ioctls. Folded fixes by: - Dan Carpenter: locking fixes for the original implementation - Jarod Wilson: fix crash when setting keycode and wiring up get/set handlers in original implementation. - Dmitry Torokhov: rework to consolidate old and new scancode handling, provide options to act either by index or scancode. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Dan Carpenter Signed-off-by: Jarod Wilson Signed-off-by: Dmitry Torokhov --- include/linux/input.h | 55 ++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 44 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/input.h b/include/linux/input.h index 78926512353..0057698fd97 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -34,7 +34,7 @@ struct input_event { * Protocol version. */ -#define EV_VERSION 0x010000 +#define EV_VERSION 0x010001 /* * IOCTLs (0x00 - 0x7f) @@ -56,12 +56,37 @@ struct input_absinfo { __s32 resolution; }; +/** + * struct input_keymap_entry - used by EVIOCGKEYCODE/EVIOCSKEYCODE ioctls + * @scancode: scancode represented in machine-endian form. + * @len: length of the scancode that resides in @scancode buffer. + * @index: index in the keymap, may be used instead of scancode + * @flags: allows to specify how kernel should handle the request. For + * example, setting INPUT_KEYMAP_BY_INDEX flag indicates that kernel + * should perform lookup in keymap by @index instead of @scancode + * @keycode: key code assigned to this scancode + * + * The structure is used to retrieve and modify keymap data. Users have + * option of performing lookup either by @scancode itself or by @index + * in keymap entry. EVIOCGKEYCODE will also return scancode or index + * (depending on which element was used to perform lookup). + */ +struct input_keymap_entry { +#define INPUT_KEYMAP_BY_INDEX (1 << 0) + __u8 flags; + __u8 len; + __u16 index; + __u32 keycode; + __u8 scancode[32]; +}; + #define EVIOCGVERSION _IOR('E', 0x01, int) /* get driver version */ #define EVIOCGID _IOR('E', 0x02, struct input_id) /* get device ID */ #define EVIOCGREP _IOR('E', 0x03, unsigned int[2]) /* get repeat settings */ #define EVIOCSREP _IOW('E', 0x03, unsigned int[2]) /* set repeat settings */ -#define EVIOCGKEYCODE _IOR('E', 0x04, unsigned int[2]) /* get keycode */ -#define EVIOCSKEYCODE _IOW('E', 0x04, unsigned int[2]) /* set keycode */ + +#define EVIOCGKEYCODE _IOR('E', 0x04, struct input_keymap_entry) /* get keycode */ +#define EVIOCSKEYCODE _IOW('E', 0x04, struct input_keymap_entry) /* set keycode */ #define EVIOCGNAME(len) _IOC(_IOC_READ, 'E', 0x06, len) /* get device name */ #define EVIOCGPHYS(len) _IOC(_IOC_READ, 'E', 0x07, len) /* get physical location */ @@ -73,8 +98,8 @@ struct input_absinfo { #define EVIOCGSW(len) _IOC(_IOC_READ, 'E', 0x1b, len) /* get all switch states */ #define EVIOCGBIT(ev,len) _IOC(_IOC_READ, 'E', 0x20 + ev, len) /* get event bits */ -#define EVIOCGABS(abs) _IOR('E', 0x40 + abs, struct input_absinfo) /* get abs value/limits */ -#define EVIOCSABS(abs) _IOW('E', 0xc0 + abs, struct input_absinfo) /* set abs value/limits */ +#define EVIOCGABS(abs) _IOR('E', 0x40 + abs, struct input_absinfo) /* get abs value/limits */ +#define EVIOCSABS(abs) _IOW('E', 0xc0 + abs, struct input_absinfo) /* set abs value/limits */ #define EVIOCSFF _IOC(_IOC_WRITE, 'E', 0x80, sizeof(struct ff_effect)) /* send a force effect to a force feedback device */ #define EVIOCRMFF _IOW('E', 0x81, int) /* Erase a force effect */ @@ -1088,13 +1113,13 @@ struct input_mt_slot { * @keycodemax: size of keycode table * @keycodesize: size of elements in keycode table * @keycode: map of scancodes to keycodes for this device + * @getkeycode: optional legacy method to retrieve current keymap. * @setkeycode: optional method to alter current keymap, used to implement * sparse keymaps. If not supplied default mechanism will be used. * The method is being called while holding event_lock and thus must * not sleep - * @getkeycode: optional method to retrieve current keymap. If not supplied - * default mechanism will be used. The method is being called while - * holding event_lock and thus must not sleep + * @getkeycode_new: transition method + * @setkeycode_new: transition method * @ff: force feedback structure associated with the device if device * supports force feedback effects * @repeat_key: stores key code of the last key pressed; used to implement @@ -1168,10 +1193,16 @@ struct input_dev { unsigned int keycodemax; unsigned int keycodesize; void *keycode; + int (*setkeycode)(struct input_dev *dev, unsigned int scancode, unsigned int keycode); int (*getkeycode)(struct input_dev *dev, unsigned int scancode, unsigned int *keycode); + int (*setkeycode_new)(struct input_dev *dev, + const struct input_keymap_entry *ke, + unsigned int *old_keycode); + int (*getkeycode_new)(struct input_dev *dev, + struct input_keymap_entry *ke); struct ff_device *ff; @@ -1478,10 +1509,12 @@ INPUT_GENERATE_ABS_ACCESSORS(fuzz, fuzz) INPUT_GENERATE_ABS_ACCESSORS(flat, flat) INPUT_GENERATE_ABS_ACCESSORS(res, resolution) -int input_get_keycode(struct input_dev *dev, - unsigned int scancode, unsigned int *keycode); +int input_scancode_to_scalar(const struct input_keymap_entry *ke, + unsigned int *scancode); + +int input_get_keycode(struct input_dev *dev, struct input_keymap_entry *ke); int input_set_keycode(struct input_dev *dev, - unsigned int scancode, unsigned int keycode); + const struct input_keymap_entry *ke); extern struct class input_class; -- cgit v1.2.3-18-g5258 From 9f470095068e415658ccc6977cf4b3f5be418526 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Thu, 9 Sep 2010 21:59:11 -0700 Subject: Input: media/IR - switch to using new keycode interface Switch the code to use new style of getkeycode and setkeycode methods to allow retrieving and setting keycodes not only by their scancodes but also by index. Acked-by: Mauro Carvalho Chehab Signed-off-by: Dmitry Torokhov --- include/media/rc-map.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/media/rc-map.h b/include/media/rc-map.h index a9c041d4966..9b201ec8dc7 100644 --- a/include/media/rc-map.h +++ b/include/media/rc-map.h @@ -35,7 +35,7 @@ struct ir_scancode_table { unsigned int len; /* Used number of entries */ unsigned int alloc; /* Size of *scan in bytes */ u64 ir_type; - char *name; + const char *name; spinlock_t lock; }; -- cgit v1.2.3-18-g5258 From db7829c6cc32f3c0c9a324118d743acb1abff081 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Thu, 9 Sep 2010 18:17:26 +0200 Subject: x86, percpu: Optimize this_cpu_ptr Allow arches to implement __this_cpu_ptr, and provide an x86 version. Before: movq $foo, %rax movq %gs:this_cpu_off, %rdx addq %rdx, %rax After: movq $foo, %rax addq %gs:this_cpu_off, %rax The benefit is doing it in one less instruction and not clobbering a temporary register. tj: * Beefed up the comment a bit and renamed in-macro temp variable to match neighboring macros. * Folded fix for const pointer case found in linux-next. * Fixed sparse notation. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- include/asm-generic/percpu.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h index 08923b68476..ec643115bb5 100644 --- a/include/asm-generic/percpu.h +++ b/include/asm-generic/percpu.h @@ -60,9 +60,14 @@ extern unsigned long __per_cpu_offset[NR_CPUS]; #define __raw_get_cpu_var(var) \ (*SHIFT_PERCPU_PTR(&(var), __my_cpu_offset)) -#define this_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, my_cpu_offset) +#ifndef __this_cpu_ptr #define __this_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, __my_cpu_offset) - +#endif +#ifdef CONFIG_DEBUG_PREEMPT +#define this_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, my_cpu_offset) +#else +#define this_cpu_ptr(ptr) __this_cpu_ptr(ptr) +#endif #ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA extern void setup_per_cpu_areas(void); -- cgit v1.2.3-18-g5258 From 677243d7494d09bfa782425f063a6013de53c35b Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Thu, 9 Sep 2010 18:17:26 +0200 Subject: percpu: Optimize __get_cpu_var() Redefine __get_cpu_var() using this_cpu_ptr() which can be arch-optimized. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- include/asm-generic/percpu.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h index ec643115bb5..d17784ea37f 100644 --- a/include/asm-generic/percpu.h +++ b/include/asm-generic/percpu.h @@ -55,10 +55,6 @@ extern unsigned long __per_cpu_offset[NR_CPUS]; */ #define per_cpu(var, cpu) \ (*SHIFT_PERCPU_PTR(&(var), per_cpu_offset(cpu))) -#define __get_cpu_var(var) \ - (*SHIFT_PERCPU_PTR(&(var), my_cpu_offset)) -#define __raw_get_cpu_var(var) \ - (*SHIFT_PERCPU_PTR(&(var), __my_cpu_offset)) #ifndef __this_cpu_ptr #define __this_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, __my_cpu_offset) @@ -69,6 +65,9 @@ extern unsigned long __per_cpu_offset[NR_CPUS]; #define this_cpu_ptr(ptr) __this_cpu_ptr(ptr) #endif +#define __get_cpu_var(var) (*this_cpu_ptr(&(var))) +#define __raw_get_cpu_var(var) (*__this_cpu_ptr(&(var))) + #ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA extern void setup_per_cpu_areas(void); #endif -- cgit v1.2.3-18-g5258 From 0da2f50944976e890ccc9436ab88c0da87788d02 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Sep 2010 11:56:16 +0200 Subject: ide: remove unnecessary blk_queue_flushing() test in do_ide_request() Unplugging from a request function doesn't really help much (it's already in the request_fn) and soon block layer will be updated to mix barrier sequence with other commands, so there's no need to treat queue flushing any differently. ide was the only user of blk_queue_flushing(). Remove it. Signed-off-by: Tejun Heo Cc: Christoph Hellwig Acked-by: David S. Miller Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 2c54906f678..015375c7d03 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -521,7 +521,6 @@ enum { #define blk_queue_nonrot(q) test_bit(QUEUE_FLAG_NONROT, &(q)->queue_flags) #define blk_queue_io_stat(q) test_bit(QUEUE_FLAG_IO_STAT, &(q)->queue_flags) #define blk_queue_add_random(q) test_bit(QUEUE_FLAG_ADD_RANDOM, &(q)->queue_flags) -#define blk_queue_flushing(q) ((q)->ordseq) #define blk_queue_stackable(q) \ test_bit(QUEUE_FLAG_STACKABLE, &(q)->queue_flags) #define blk_queue_discard(q) test_bit(QUEUE_FLAG_DISCARD, &(q)->queue_flags) -- cgit v1.2.3-18-g5258 From 6958f145459ca7ad9715024de97445addacb8510 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Sep 2010 11:56:16 +0200 Subject: block: kill QUEUE_ORDERED_BY_TAG Nobody is making meaningful use of ORDERED_BY_TAG now and queue draining for barrier requests will be removed soon which will render the advantage of tag ordering moot. Kill ORDERED_BY_TAG. The following users are affected. * brd: converted to ORDERED_DRAIN. * virtio_blk: ORDERED_TAG path was already marked deprecated. Removed. * xen-blkfront: ORDERED_TAG case dropped. Signed-off-by: Tejun Heo Cc: Christoph Hellwig Cc: Nick Piggin Cc: Michael S. Tsirkin Cc: Jeremy Fitzhardinge Cc: Chris Wright Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 015375c7d03..7077bc0d613 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -470,12 +470,7 @@ enum { * DRAIN : ordering by draining is enough * DRAIN_FLUSH : ordering by draining w/ pre and post flushes * DRAIN_FUA : ordering by draining w/ pre flush and FUA write - * TAG : ordering by tag is enough - * TAG_FLUSH : ordering by tag w/ pre and post flushes - * TAG_FUA : ordering by tag w/ pre flush and FUA write */ - QUEUE_ORDERED_BY_DRAIN = 0x01, - QUEUE_ORDERED_BY_TAG = 0x02, QUEUE_ORDERED_DO_PREFLUSH = 0x10, QUEUE_ORDERED_DO_BAR = 0x20, QUEUE_ORDERED_DO_POSTFLUSH = 0x40, @@ -483,8 +478,7 @@ enum { QUEUE_ORDERED_NONE = 0x00, - QUEUE_ORDERED_DRAIN = QUEUE_ORDERED_BY_DRAIN | - QUEUE_ORDERED_DO_BAR, + QUEUE_ORDERED_DRAIN = QUEUE_ORDERED_DO_BAR, QUEUE_ORDERED_DRAIN_FLUSH = QUEUE_ORDERED_DRAIN | QUEUE_ORDERED_DO_PREFLUSH | QUEUE_ORDERED_DO_POSTFLUSH, @@ -492,15 +486,6 @@ enum { QUEUE_ORDERED_DO_PREFLUSH | QUEUE_ORDERED_DO_FUA, - QUEUE_ORDERED_TAG = QUEUE_ORDERED_BY_TAG | - QUEUE_ORDERED_DO_BAR, - QUEUE_ORDERED_TAG_FLUSH = QUEUE_ORDERED_TAG | - QUEUE_ORDERED_DO_PREFLUSH | - QUEUE_ORDERED_DO_POSTFLUSH, - QUEUE_ORDERED_TAG_FUA = QUEUE_ORDERED_TAG | - QUEUE_ORDERED_DO_PREFLUSH | - QUEUE_ORDERED_DO_FUA, - /* * Ordered operation sequence */ -- cgit v1.2.3-18-g5258 From 4913efe456c987057e5d36a3f0a55422a9072cae Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Sep 2010 11:56:16 +0200 Subject: block: deprecate barrier and replace blk_queue_ordered() with blk_queue_flush() Barrier is deemed too heavy and will soon be replaced by FLUSH/FUA requests. Deprecate barrier. All REQ_HARDBARRIERs are failed with -EOPNOTSUPP and blk_queue_ordered() is replaced with simpler blk_queue_flush(). blk_queue_flush() takes combinations of REQ_FLUSH and FUA. If a device has write cache and can flush it, it should set REQ_FLUSH. If the device can handle FUA writes, it should also set REQ_FUA. All blk_queue_ordered() users are converted. * ORDERED_DRAIN is mapped to 0 which is the default value. * ORDERED_DRAIN_FLUSH is mapped to REQ_FLUSH. * ORDERED_DRAIN_FLUSH_FUA is mapped to REQ_FLUSH | REQ_FUA. Signed-off-by: Tejun Heo Acked-by: Boaz Harrosh Cc: Christoph Hellwig Cc: Nick Piggin Cc: Michael S. Tsirkin Cc: Jeremy Fitzhardinge Cc: Chris Wright Cc: FUJITA Tomonori Cc: Geert Uytterhoeven Cc: David S. Miller Cc: Alasdair G Kergon Cc: Pierre Ossman Cc: Stefan Weinhuber Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 7077bc0d613..e97911d4dec 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -355,8 +355,10 @@ struct request_queue struct blk_trace *blk_trace; #endif /* - * reserved for flush operations + * for flush operations */ + unsigned int flush_flags; + unsigned int ordered, next_ordered, ordseq; int orderr, ordcolor; struct request pre_flush_rq, bar_rq, post_flush_rq; @@ -865,8 +867,8 @@ extern void blk_queue_update_dma_alignment(struct request_queue *, int); extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *); extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *); extern void blk_queue_rq_timeout(struct request_queue *, unsigned int); +extern void blk_queue_flush(struct request_queue *q, unsigned int flush); extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); -extern int blk_queue_ordered(struct request_queue *, unsigned); extern bool blk_do_ordered(struct request_queue *, struct request **); extern unsigned blk_ordered_cur_seq(struct request_queue *); extern unsigned blk_ordered_req_seq(struct request *); -- cgit v1.2.3-18-g5258 From 9cbbdca44ae1a6f512ea1e2be11ced8bbb9d430a Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Sep 2010 11:56:16 +0200 Subject: block: remove spurious uses of REQ_HARDBARRIER REQ_HARDBARRIER is deprecated. Remove spurious uses in the following users. Please note that other than osdblk, all other uses were already spurious before deprecation. * osdblk: osdblk_rq_fn() won't receive any request with REQ_HARDBARRIER set. Remove the test for it. * pktcdvd: use of REQ_HARDBARRIER in pkt_generic_packet() doesn't mean anything. Removed. * aic7xxx_old: Setting MSG_ORDERED_Q_TAG on REQ_HARDBARRIER is spurious. Removed. * sas_scsi_host: Setting TASK_ATTR_ORDERED on REQ_HARDBARRIER is spurious. Removed. * scsi_tcq: The ordered tag path wasn't being used anyway. Removed. Signed-off-by: Tejun Heo Acked-by: Boaz Harrosh Cc: James Bottomley Cc: Peter Osterlund Signed-off-by: Jens Axboe --- include/scsi/scsi_tcq.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'include') diff --git a/include/scsi/scsi_tcq.h b/include/scsi/scsi_tcq.h index 17231385cb3..d6e7994aa63 100644 --- a/include/scsi/scsi_tcq.h +++ b/include/scsi/scsi_tcq.h @@ -97,13 +97,9 @@ static inline void scsi_deactivate_tcq(struct scsi_device *sdev, int depth) static inline int scsi_populate_tag_msg(struct scsi_cmnd *cmd, char *msg) { struct request *req = cmd->request; - struct scsi_device *sdev = cmd->device; if (blk_rq_tagged(req)) { - if (sdev->ordered_tags && req->cmd_flags & REQ_HARDBARRIER) - *msg++ = MSG_ORDERED_TAG; - else - *msg++ = MSG_SIMPLE_TAG; + *msg++ = MSG_SIMPLE_TAG; *msg++ = req->tag; return 2; } -- cgit v1.2.3-18-g5258 From dd831006d5be7f74c3fe7aef82380c51c3637960 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Sep 2010 11:56:16 +0200 Subject: block: misc cleanups in barrier code Make the following cleanups in preparation of barrier/flush update. * blk_do_ordered() declaration is moved from include/linux/blkdev.h to block/blk.h. * blk_do_ordered() now returns pointer to struct request, with %NULL meaning "try the next request" and ERR_PTR(-EAGAIN) "try again later". The third case will be dropped with further changes. * In the initialization of proxy barrier request, data direction is already set by init_request_from_bio(). Drop unnecessary explicit REQ_WRITE setting and move init_request_from_bio() above REQ_FUA flag setting. * add_request() is collapsed into __make_request(). These changes don't make any functional difference. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e97911d4dec..996549d7192 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -869,7 +869,6 @@ extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *); extern void blk_queue_rq_timeout(struct request_queue *, unsigned int); extern void blk_queue_flush(struct request_queue *q, unsigned int flush); extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); -extern bool blk_do_ordered(struct request_queue *, struct request **); extern unsigned blk_ordered_cur_seq(struct request_queue *); extern unsigned blk_ordered_req_seq(struct request *); extern bool blk_ordered_complete_seq(struct request_queue *, unsigned, int); -- cgit v1.2.3-18-g5258 From 28e7d1845216538303bb95d679d8fd4de50e2f1a Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Sep 2010 11:56:16 +0200 Subject: block: drop barrier ordering by queue draining Filesystems will take all the responsibilities for ordering requests around commit writes and will only indicate how the commit writes themselves should be handled by block layers. This patch drops barrier ordering by queue draining from block layer. Ordering by draining implementation was somewhat invasive to request handling. List of notable changes follow. * Each queue has 1 bit color which is flipped on each barrier issue. This is used to track whether a given request is issued before the current barrier or not. REQ_ORDERED_COLOR flag and coloring implementation in __elv_add_request() are removed. * Requests which shouldn't be processed yet for draining were stalled by returning -EAGAIN from blk_do_ordered() according to the test result between blk_ordered_req_seq() and blk_blk_ordered_cur_seq(). This logic is removed. * Draining completion logic in elv_completed_request() removed. * All barrier sequence requests were queued to request queue and then trckled to lower layer according to progress and thus maintaining request orders during requeue was necessary. This is replaced by queueing the next request in the barrier sequence only after the current one is complete from blk_ordered_complete_seq(), which removes the need for multiple proxy requests in struct request_queue and the request sorting logic in the ELEVATOR_INSERT_REQUEUE path of elv_insert(). * As barriers no longer have ordering constraints, there's no need to dump the whole elevator onto the dispatch queue on each barrier. Insert barriers at the front instead. * If other barrier requests come to the front of the dispatch queue while one is already in progress, they are stored in q->pending_barriers and restored to dispatch queue one-by-one after each barrier completion from blk_ordered_complete_seq(). Signed-off-by: Tejun Heo Cc: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 2 -- include/linux/blkdev.h | 19 ++++++++----------- 2 files changed, 8 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index ca83a97c971..9192282b425 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -143,7 +143,6 @@ enum rq_flag_bits { __REQ_FAILED, /* set if the request failed */ __REQ_QUIET, /* don't worry about errors */ __REQ_PREEMPT, /* set for "ide_preempt" requests */ - __REQ_ORDERED_COLOR, /* is before or after barrier */ __REQ_ALLOCED, /* request came from our alloc pool */ __REQ_COPY_USER, /* contains copies of user pages */ __REQ_INTEGRITY, /* integrity metadata has been remapped */ @@ -184,7 +183,6 @@ enum rq_flag_bits { #define REQ_FAILED (1 << __REQ_FAILED) #define REQ_QUIET (1 << __REQ_QUIET) #define REQ_PREEMPT (1 << __REQ_PREEMPT) -#define REQ_ORDERED_COLOR (1 << __REQ_ORDERED_COLOR) #define REQ_ALLOCED (1 << __REQ_ALLOCED) #define REQ_COPY_USER (1 << __REQ_COPY_USER) #define REQ_INTEGRITY (1 << __REQ_INTEGRITY) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 996549d7192..20a3710a481 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -360,9 +360,10 @@ struct request_queue unsigned int flush_flags; unsigned int ordered, next_ordered, ordseq; - int orderr, ordcolor; - struct request pre_flush_rq, bar_rq, post_flush_rq; + int orderr; + struct request bar_rq; struct request *orig_bar_rq; + struct list_head pending_barriers; struct mutex sysfs_lock; @@ -491,12 +492,11 @@ enum { /* * Ordered operation sequence */ - QUEUE_ORDSEQ_STARTED = 0x01, /* flushing in progress */ - QUEUE_ORDSEQ_DRAIN = 0x02, /* waiting for the queue to be drained */ - QUEUE_ORDSEQ_PREFLUSH = 0x04, /* pre-flushing in progress */ - QUEUE_ORDSEQ_BAR = 0x08, /* original barrier req in progress */ - QUEUE_ORDSEQ_POSTFLUSH = 0x10, /* post-flushing in progress */ - QUEUE_ORDSEQ_DONE = 0x20, + QUEUE_ORDSEQ_STARTED = (1 << 0), /* flushing in progress */ + QUEUE_ORDSEQ_PREFLUSH = (1 << 1), /* pre-flushing in progress */ + QUEUE_ORDSEQ_BAR = (1 << 2), /* barrier write in progress */ + QUEUE_ORDSEQ_POSTFLUSH = (1 << 3), /* post-flushing in progress */ + QUEUE_ORDSEQ_DONE = (1 << 4), }; #define blk_queue_plugged(q) test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags) @@ -869,9 +869,6 @@ extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *); extern void blk_queue_rq_timeout(struct request_queue *, unsigned int); extern void blk_queue_flush(struct request_queue *q, unsigned int flush); extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); -extern unsigned blk_ordered_cur_seq(struct request_queue *); -extern unsigned blk_ordered_req_seq(struct request *); -extern bool blk_ordered_complete_seq(struct request_queue *, unsigned, int); extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatterlist *); extern void blk_dump_rq_flags(struct request *, char *); -- cgit v1.2.3-18-g5258 From dd4c133f387c48f526022860ad70354637a80f4c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Sep 2010 11:56:16 +0200 Subject: block: rename barrier/ordered to flush With ordering requirements dropped, barrier and ordered are misnomers. Now all block layer does is sequencing FLUSH and FUA. Rename them to flush. Signed-off-by: Tejun Heo Cc: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 20a3710a481..1cd83ec077d 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -357,13 +357,13 @@ struct request_queue /* * for flush operations */ + unsigned int ordered, next_ordered; unsigned int flush_flags; - - unsigned int ordered, next_ordered, ordseq; - int orderr; - struct request bar_rq; - struct request *orig_bar_rq; - struct list_head pending_barriers; + unsigned int flush_seq; + int flush_err; + struct request flush_rq; + struct request *orig_flush_rq; + struct list_head pending_flushes; struct mutex sysfs_lock; @@ -490,13 +490,13 @@ enum { QUEUE_ORDERED_DO_FUA, /* - * Ordered operation sequence + * FLUSH/FUA sequences. */ - QUEUE_ORDSEQ_STARTED = (1 << 0), /* flushing in progress */ - QUEUE_ORDSEQ_PREFLUSH = (1 << 1), /* pre-flushing in progress */ - QUEUE_ORDSEQ_BAR = (1 << 2), /* barrier write in progress */ - QUEUE_ORDSEQ_POSTFLUSH = (1 << 3), /* post-flushing in progress */ - QUEUE_ORDSEQ_DONE = (1 << 4), + QUEUE_FSEQ_STARTED = (1 << 0), /* flushing in progress */ + QUEUE_FSEQ_PREFLUSH = (1 << 1), /* pre-flushing in progress */ + QUEUE_FSEQ_DATA = (1 << 2), /* data write in progress */ + QUEUE_FSEQ_POSTFLUSH = (1 << 3), /* post-flushing in progress */ + QUEUE_FSEQ_DONE = (1 << 4), }; #define blk_queue_plugged(q) test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags) -- cgit v1.2.3-18-g5258 From 4fed947cb311e5aa51781d316cefca836352f6ce Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Sep 2010 11:56:17 +0200 Subject: block: implement REQ_FLUSH/FUA based interface for FLUSH/FUA requests Now that the backend conversion is complete, export sequenced FLUSH/FUA capability through REQ_FLUSH/FUA flags. REQ_FLUSH means the device cache should be flushed before executing the request. REQ_FUA means that the data in the request should be on non-volatile media on completion. Block layer will choose the correct way of implementing the semantics and execute it. The request may be passed to the device directly if the device can handle it; otherwise, it will be sequenced using one or more proxy requests. Devices will never see REQ_FLUSH and/or FUA which it doesn't support. Also, unlike the original REQ_HARDBARRIER, REQ_FLUSH/FUA requests are never failed with -EOPNOTSUPP. If the underlying device doesn't support FLUSH/FUA, the block layer simply make those noop. IOW, it no longer distinguishes between writeback cache which doesn't support cache flush and writethrough/no cache. Devices which have WB cache w/o flush are very difficult to come by these days and there's nothing much we can do anyway, so it doesn't make sense to require everyone to implement -EOPNOTSUPP handling. This will simplify filesystems and block drivers as they can drop -EOPNOTSUPP retry logic for barriers. * QUEUE_ORDERED_* are removed and QUEUE_FSEQ_* are moved into blk-flush.c. * REQ_FLUSH w/o data can also be directly passed to drivers without sequencing but some drivers assume that zero length requests don't have rq->bio which isn't true for these requests requiring the use of proxy requests. * REQ_COMMON_MASK now includes REQ_FLUSH | REQ_FUA so that they are copied from bio to request. * WRITE_BARRIER is marked deprecated and WRITE_FLUSH, WRITE_FUA and WRITE_FLUSH_FUA are added. Signed-off-by: Tejun Heo Cc: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 2 +- include/linux/blkdev.h | 38 ++------------------------------------ include/linux/buffer_head.h | 2 +- include/linux/fs.h | 19 +++++++++++++------ 4 files changed, 17 insertions(+), 44 deletions(-) (limited to 'include') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 9192282b425..179799479e6 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -167,7 +167,7 @@ enum rq_flag_bits { (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER) #define REQ_COMMON_MASK \ (REQ_WRITE | REQ_FAILFAST_MASK | REQ_HARDBARRIER | REQ_SYNC | \ - REQ_META| REQ_DISCARD | REQ_NOIDLE) + REQ_META | REQ_DISCARD | REQ_NOIDLE | REQ_FLUSH | REQ_FUA) #define REQ_UNPLUG (1 << __REQ_UNPLUG) #define REQ_RAHEAD (1 << __REQ_RAHEAD) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 1cd83ec077d..8ef705f800a 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -357,7 +357,6 @@ struct request_queue /* * for flush operations */ - unsigned int ordered, next_ordered; unsigned int flush_flags; unsigned int flush_seq; int flush_err; @@ -465,40 +464,6 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q) __clear_bit(flag, &q->queue_flags); } -enum { - /* - * Hardbarrier is supported with one of the following methods. - * - * NONE : hardbarrier unsupported - * DRAIN : ordering by draining is enough - * DRAIN_FLUSH : ordering by draining w/ pre and post flushes - * DRAIN_FUA : ordering by draining w/ pre flush and FUA write - */ - QUEUE_ORDERED_DO_PREFLUSH = 0x10, - QUEUE_ORDERED_DO_BAR = 0x20, - QUEUE_ORDERED_DO_POSTFLUSH = 0x40, - QUEUE_ORDERED_DO_FUA = 0x80, - - QUEUE_ORDERED_NONE = 0x00, - - QUEUE_ORDERED_DRAIN = QUEUE_ORDERED_DO_BAR, - QUEUE_ORDERED_DRAIN_FLUSH = QUEUE_ORDERED_DRAIN | - QUEUE_ORDERED_DO_PREFLUSH | - QUEUE_ORDERED_DO_POSTFLUSH, - QUEUE_ORDERED_DRAIN_FUA = QUEUE_ORDERED_DRAIN | - QUEUE_ORDERED_DO_PREFLUSH | - QUEUE_ORDERED_DO_FUA, - - /* - * FLUSH/FUA sequences. - */ - QUEUE_FSEQ_STARTED = (1 << 0), /* flushing in progress */ - QUEUE_FSEQ_PREFLUSH = (1 << 1), /* pre-flushing in progress */ - QUEUE_FSEQ_DATA = (1 << 2), /* data write in progress */ - QUEUE_FSEQ_POSTFLUSH = (1 << 3), /* post-flushing in progress */ - QUEUE_FSEQ_DONE = (1 << 4), -}; - #define blk_queue_plugged(q) test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags) #define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags) #define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) @@ -578,7 +543,8 @@ static inline void blk_clear_queue_full(struct request_queue *q, int sync) * it already be started by driver. */ #define RQ_NOMERGE_FLAGS \ - (REQ_NOMERGE | REQ_STARTED | REQ_HARDBARRIER | REQ_SOFTBARRIER) + (REQ_NOMERGE | REQ_STARTED | REQ_HARDBARRIER | REQ_SOFTBARRIER | \ + REQ_FLUSH | REQ_FUA) #define rq_mergeable(rq) \ (!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && \ (((rq)->cmd_flags & REQ_DISCARD) || \ diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index ec94c12f21d..fc999f583fd 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -32,7 +32,7 @@ enum bh_state_bits { BH_Delay, /* Buffer is not yet allocated on disk */ BH_Boundary, /* Block is followed by a discontiguity */ BH_Write_EIO, /* I/O error on write */ - BH_Eopnotsupp, /* operation not supported (barrier) */ + BH_Eopnotsupp, /* DEPRECATED: operation not supported (barrier) */ BH_Unwritten, /* Buffer is allocated on disk but not written */ BH_Quiet, /* Buffer Error Prinks to be quiet */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 76041b61475..352c4862738 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -135,12 +135,13 @@ struct inodes_stat_t { * immediately after submission. The write equivalent * of READ_SYNC. * WRITE_ODIRECT_PLUG Special case write for O_DIRECT only. - * WRITE_BARRIER Like WRITE_SYNC, but tells the block layer that all - * previously submitted writes must be safely on storage - * before this one is started. Also guarantees that when - * this write is complete, it itself is also safely on - * storage. Prevents reordering of writes on both sides - * of this IO. + * WRITE_BARRIER DEPRECATED. Always fails. Use FLUSH/FUA instead. + * WRITE_FLUSH Like WRITE_SYNC but with preceding cache flush. + * WRITE_FUA Like WRITE_SYNC but data is guaranteed to be on + * non-volatile media on completion. + * WRITE_FLUSH_FUA Combination of WRITE_FLUSH and FUA. The IO is preceded + * by a cache flush and data is guaranteed to be on + * non-volatile media on completion. * */ #define RW_MASK REQ_WRITE @@ -158,6 +159,12 @@ struct inodes_stat_t { #define WRITE_META (WRITE | REQ_META) #define WRITE_BARRIER (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \ REQ_HARDBARRIER) +#define WRITE_FLUSH (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \ + REQ_FLUSH) +#define WRITE_FUA (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \ + REQ_FUA) +#define WRITE_FLUSH_FUA (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \ + REQ_FLUSH | REQ_FUA) /* * These aren't really reads or writes, they pass down information about -- cgit v1.2.3-18-g5258 From 3a2edd0d6ddbd5fa3b389ea6db811285415ce6c8 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Sep 2010 11:56:18 +0200 Subject: block: make __blk_rq_prep_clone() copy most command flags Currently __blk_rq_prep_clone() copies only REQ_WRITE and REQ_DISCARD. There's no reason to omit other command flags and REQ_FUA needs to be copied to implement FUA support in request-based dm. REQ_COMMON_MASK which specifies flags to be copied from bio to request already identifies all the command flags. Define REQ_CLONE_MASK to be the same as REQ_COMMON_MASK for clarity and make __blk_rq_prep_clone() copy all flags in the mask. Signed-off-by: Tejun Heo Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 179799479e6..36edadf5b41 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -168,6 +168,7 @@ enum rq_flag_bits { #define REQ_COMMON_MASK \ (REQ_WRITE | REQ_FAILFAST_MASK | REQ_HARDBARRIER | REQ_SYNC | \ REQ_META | REQ_DISCARD | REQ_NOIDLE | REQ_FLUSH | REQ_FUA) +#define REQ_CLONE_MASK REQ_COMMON_MASK #define REQ_UNPLUG (1 << __REQ_UNPLUG) #define REQ_RAHEAD (1 << __REQ_RAHEAD) -- cgit v1.2.3-18-g5258 From 2cf6d26a354ab6362e301b5a323832b02867df47 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 18 Aug 2010 05:29:10 -0400 Subject: block: pass gfp_mask and flags to sb_issue_discard We'll need to get rid of the BLKDEV_IFL_BARRIER flag, and to facilitate that and to make the interface less confusing pass all flags explicitly. Signed-off-by: Christoph Hellwig Acked-by: Mike Snitzer Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 8ef705f800a..6b305eb4a34 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -881,13 +881,12 @@ extern int blkdev_issue_discard(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask, unsigned long flags); extern int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask, unsigned long flags); -static inline int sb_issue_discard(struct super_block *sb, - sector_t block, sector_t nr_blocks) +static inline int sb_issue_discard(struct super_block *sb, sector_t block, + sector_t nr_blocks, gfp_t gfp_mask, unsigned long flags) { - block <<= (sb->s_blocksize_bits - 9); - nr_blocks <<= (sb->s_blocksize_bits - 9); - return blkdev_issue_discard(sb->s_bdev, block, nr_blocks, GFP_NOFS, - BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER); + return blkdev_issue_discard(sb->s_bdev, block << (sb->s_blocksize_bits - 9), + nr_blocks << (sb->s_blocksize_bits - 9), + gfp_mask, flags); } extern int blk_verify_command(unsigned char *cmd, fmode_t has_write_perm); -- cgit v1.2.3-18-g5258 From 31725e65c7214b52b607eba705fc4f306be4d5a5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 18 Aug 2010 05:29:21 -0400 Subject: block: remove the WRITE_BARRIER flag It's unused now. Signed-off-by: Christoph Hellwig Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/fs.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 352c4862738..d6add69bc17 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -135,7 +135,6 @@ struct inodes_stat_t { * immediately after submission. The write equivalent * of READ_SYNC. * WRITE_ODIRECT_PLUG Special case write for O_DIRECT only. - * WRITE_BARRIER DEPRECATED. Always fails. Use FLUSH/FUA instead. * WRITE_FLUSH Like WRITE_SYNC but with preceding cache flush. * WRITE_FUA Like WRITE_SYNC but data is guaranteed to be on * non-volatile media on completion. @@ -157,8 +156,6 @@ struct inodes_stat_t { #define WRITE_SYNC (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG) #define WRITE_ODIRECT_PLUG (WRITE | REQ_SYNC) #define WRITE_META (WRITE | REQ_META) -#define WRITE_BARRIER (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \ - REQ_HARDBARRIER) #define WRITE_FLUSH (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \ REQ_FLUSH) #define WRITE_FUA (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \ -- cgit v1.2.3-18-g5258 From 8c5553678237b7121355108e03c36086037d8975 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 18 Aug 2010 05:29:22 -0400 Subject: block: remove the BLKDEV_IFL_BARRIER flag Remove support for barriers on discards, which is unused now. Also remove the DISCARD_NOBARRIER I/O type in favour of just setting the rw flags up locally in blkdev_issue_discard. tj: Also remove DISCARD_SECURE and use REQ_SECURE directly. Signed-off-by: Christoph Hellwig Acked-by: Mike Snitzer Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 -- include/linux/fs.h | 8 -------- 2 files changed, 10 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 6b305eb4a34..cfcb3a61060 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -869,11 +869,9 @@ static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt, } enum{ BLKDEV_WAIT, /* wait for completion */ - BLKDEV_BARRIER, /* issue request with barrier */ BLKDEV_SECURE, /* secure discard */ }; #define BLKDEV_IFL_WAIT (1 << BLKDEV_WAIT) -#define BLKDEV_IFL_BARRIER (1 << BLKDEV_BARRIER) #define BLKDEV_IFL_SECURE (1 << BLKDEV_SECURE) extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *, unsigned long); diff --git a/include/linux/fs.h b/include/linux/fs.h index d6add69bc17..6b0f6e9993a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -163,14 +163,6 @@ struct inodes_stat_t { #define WRITE_FLUSH_FUA (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \ REQ_FLUSH | REQ_FUA) -/* - * These aren't really reads or writes, they pass down information about - * parts of device that are now unused by the file system. - */ -#define DISCARD_NOBARRIER (WRITE | REQ_DISCARD) -#define DISCARD_BARRIER (WRITE | REQ_DISCARD | REQ_HARDBARRIER) -#define DISCARD_SECURE (DISCARD_NOBARRIER | REQ_SECURE) - #define SEL_IN 1 #define SEL_OUT 2 #define SEL_EX 4 -- cgit v1.2.3-18-g5258 From 0edd55faea7c8081bc826234b917501738a6218f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 18 Aug 2010 05:29:23 -0400 Subject: block: remove the BH_Eopnotsupp flag This flag was only set for barrier buffers, which we don't submit anymore. Signed-off-by: Christoph Hellwig Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/buffer_head.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index fc999f583fd..dd1b25b2641 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -32,7 +32,6 @@ enum bh_state_bits { BH_Delay, /* Buffer is not yet allocated on disk */ BH_Boundary, /* Block is followed by a discontiguity */ BH_Write_EIO, /* I/O error on write */ - BH_Eopnotsupp, /* DEPRECATED: operation not supported (barrier) */ BH_Unwritten, /* Buffer is allocated on disk but not written */ BH_Quiet, /* Buffer Error Prinks to be quiet */ @@ -124,7 +123,6 @@ BUFFER_FNS(Async_Write, async_write) BUFFER_FNS(Delay, delay) BUFFER_FNS(Boundary, boundary) BUFFER_FNS(Write_EIO, write_io_error) -BUFFER_FNS(Eopnotsupp, eopnotsupp) BUFFER_FNS(Unwritten, unwritten) #define bh_offset(bh) ((unsigned long)(bh)->b_data & ~PAGE_MASK) -- cgit v1.2.3-18-g5258 From 30ca22c70e3ef0a96ff84de69cd7e8561b416cb2 Mon Sep 17 00:00:00 2001 From: "Patrick J. LoPresti" Date: Thu, 22 Jul 2010 15:03:41 -0700 Subject: ext3/ext4: Factor out disk addressability check As part of adding support for OCFS2 to mount huge volumes, we need to check that the sector_t and page cache of the system are capable of addressing the entire volume. An identical check already appears in ext3 and ext4. This patch moves the addressability check into its own function in fs/libfs.c and modifies ext3 and ext4 to invoke it. [Edited to -EINVAL instead of BUG_ON() for bad blocksize_bits -- Joel] Signed-off-by: Patrick LoPresti Cc: linux-ext4@vger.kernel.org Acked-by: Andreas Dilger Signed-off-by: Joel Becker --- include/linux/fs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 76041b61475..1a759f40ab9 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2374,6 +2374,8 @@ extern ssize_t simple_write_to_buffer(void *to, size_t available, loff_t *ppos, extern int generic_file_fsync(struct file *, int); +extern int generic_check_addressable(unsigned, u64); + #ifdef CONFIG_MIGRATION extern int buffer_migrate_page(struct address_space *, struct page *, struct page *); -- cgit v1.2.3-18-g5258 From c8bf1336824ebd698d37b71763e1c43190f2229a Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Fri, 10 Sep 2010 20:07:38 +0200 Subject: Consolidate min_not_zero We have several users of min_not_zero, each of them using their own definition. Move the define to kernel.h. Signed-off-by: Martin K. Petersen Signed-off-by: Jens Axboe --- include/linux/kernel.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 2b0a35e6bc6..f5df2f4acb0 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -640,6 +640,16 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { } (void) (&_max1 == &_max2); \ _max1 > _max2 ? _max1 : _max2; }) +/** + * min_not_zero - return the minimum that is _not_ zero, unless both are zero + * @x: value1 + * @y: value2 + */ +#define min_not_zero(x, y) ({ \ + typeof(x) __x = (x); \ + typeof(y) __y = (y); \ + __x == 0 ? __y : ((__y == 0) ? __x : min(__x, __y)); }) + /** * clamp - return a value clamped to a given range with strict typechecking * @val: current value -- cgit v1.2.3-18-g5258 From 13f05c8d8e98bbdce89158bfdb2e380940695a88 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Fri, 10 Sep 2010 20:50:10 +0200 Subject: block/scsi: Provide a limit on the number of integrity segments Some controllers have a hardware limit on the number of protection information scatter-gather list segments they can handle. Introduce a max_integrity_segments limit in the block layer and provide a new scsi_host_template setting that allows HBA drivers to provide a value suitable for the hardware. Add support for honoring the integrity segment limit when merging both bios and requests. Signed-off-by: Martin K. Petersen Signed-off-by: Jens Axboe --- include/linux/bio.h | 4 ++++ include/linux/blkdev.h | 33 +++++++++++++++++++++++++++++---- include/scsi/scsi.h | 6 ++++++ include/scsi/scsi_host.h | 7 +++++++ 4 files changed, 46 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/bio.h b/include/linux/bio.h index 5274103434a..2c3fd742160 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -496,6 +496,10 @@ static inline struct bio *bio_list_get(struct bio_list *bl) #define bip_for_each_vec(bvl, bip, i) \ __bip_for_each_vec(bvl, bip, i, (bip)->bip_idx) +#define bio_for_each_integrity_vec(_bvl, _bio, _iter) \ + for_each_bio(_bio) \ + bip_for_each_vec(_bvl, _bio->bi_integrity, _iter) + #define bio_integrity(bio) (bio->bi_integrity != NULL) extern struct bio_integrity_payload *bio_integrity_alloc_bioset(struct bio *, gfp_t, unsigned int, struct bio_set *); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 2c54906f678..7e661106270 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -124,6 +124,9 @@ struct request { * physical address coalescing is performed. */ unsigned short nr_phys_segments; +#if defined(CONFIG_BLK_DEV_INTEGRITY) + unsigned short nr_integrity_segments; +#endif unsigned short ioprio; @@ -243,6 +246,7 @@ struct queue_limits { unsigned short logical_block_size; unsigned short max_segments; + unsigned short max_integrity_segments; unsigned char misaligned; unsigned char discard_misaligned; @@ -1213,8 +1217,13 @@ struct blk_integrity { extern int blk_integrity_register(struct gendisk *, struct blk_integrity *); extern void blk_integrity_unregister(struct gendisk *); extern int blk_integrity_compare(struct gendisk *, struct gendisk *); -extern int blk_rq_map_integrity_sg(struct request *, struct scatterlist *); -extern int blk_rq_count_integrity_sg(struct request *); +extern int blk_rq_map_integrity_sg(struct request_queue *, struct bio *, + struct scatterlist *); +extern int blk_rq_count_integrity_sg(struct request_queue *, struct bio *); +extern int blk_integrity_merge_rq(struct request_queue *, struct request *, + struct request *); +extern int blk_integrity_merge_bio(struct request_queue *, struct request *, + struct bio *); static inline struct blk_integrity *bdev_get_integrity(struct block_device *bdev) @@ -1235,16 +1244,32 @@ static inline int blk_integrity_rq(struct request *rq) return bio_integrity(rq->bio); } +static inline void blk_queue_max_integrity_segments(struct request_queue *q, + unsigned int segs) +{ + q->limits.max_integrity_segments = segs; +} + +static inline unsigned short +queue_max_integrity_segments(struct request_queue *q) +{ + return q->limits.max_integrity_segments; +} + #else /* CONFIG_BLK_DEV_INTEGRITY */ #define blk_integrity_rq(rq) (0) -#define blk_rq_count_integrity_sg(a) (0) -#define blk_rq_map_integrity_sg(a, b) (0) +#define blk_rq_count_integrity_sg(a, b) (0) +#define blk_rq_map_integrity_sg(a, b, c) (0) #define bdev_get_integrity(a) (0) #define blk_get_integrity(a) (0) #define blk_integrity_compare(a, b) (0) #define blk_integrity_register(a, b) (0) #define blk_integrity_unregister(a) do { } while (0); +#define blk_queue_max_integrity_segments(a, b) do { } while (0); +#define queue_max_integrity_segments(a) (0) +#define blk_integrity_merge_rq(a, b, c) (0) +#define blk_integrity_merge_bio(a, b, c) (0) #endif /* CONFIG_BLK_DEV_INTEGRITY */ diff --git a/include/scsi/scsi.h b/include/scsi/scsi.h index 8fcb6e0e9e7..d63533a4a59 100644 --- a/include/scsi/scsi.h +++ b/include/scsi/scsi.h @@ -31,6 +31,12 @@ struct scsi_cmnd; #define SCSI_MAX_SG_CHAIN_SEGMENTS SCSI_MAX_SG_SEGMENTS #endif +/* + * DIX-capable adapters effectively support infinite chaining for the + * protection information scatterlist + */ +#define SCSI_MAX_PROT_SG_SEGMENTS 0xFFFF + /* * Special value for scanning to specify scanning or rescanning of all * possible channels, (target) ids, or luns on a given shost. diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h index b7bdecb7b76..d0a6a845f20 100644 --- a/include/scsi/scsi_host.h +++ b/include/scsi/scsi_host.h @@ -388,6 +388,7 @@ struct scsi_host_template { * of scatter-gather. */ unsigned short sg_tablesize; + unsigned short sg_prot_tablesize; /* * Set this if the host adapter has limitations beside segment count. @@ -599,6 +600,7 @@ struct Scsi_Host { int can_queue; short cmd_per_lun; short unsigned int sg_tablesize; + short unsigned int sg_prot_tablesize; short unsigned int max_sectors; unsigned long dma_boundary; /* @@ -823,6 +825,11 @@ static inline unsigned int scsi_host_get_prot(struct Scsi_Host *shost) return shost->prot_capabilities; } +static inline int scsi_host_prot_dma(struct Scsi_Host *shost) +{ + return shost->prot_capabilities >= SHOST_DIX_TYPE0_PROTECTION; +} + static inline unsigned int scsi_host_dif_capable(struct Scsi_Host *shost, unsigned int target_type) { static unsigned char cap[] = { 0, -- cgit v1.2.3-18-g5258 From 637bbdc5b83615ef9f45f50399d1c7f27473c713 Mon Sep 17 00:00:00 2001 From: Dave Young Date: Mon, 13 Sep 2010 20:19:03 +0800 Subject: sched: Remove unused PF_ALIGNWARN flag PF_ALIGNWARN is not implemented and it is for 486 as the comment. It is not likely someone will implement this flag feature. So here remove this flag and leave the valuable 0x00000001 for future use. Signed-off-by: Dave Young Cc: Peter Zijlstra Cc: Linus Torvalds LKML-Reference: <20100913121903.GB22238@darkstar> Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index b51c53c285b..cdf56693ecb 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1682,8 +1682,6 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t * /* * Per process flags */ -#define PF_ALIGNWARN 0x00000001 /* Print alignment warning msgs */ - /* Not implemented yet, only for 486*/ #define PF_STARTING 0x00000002 /* being created */ #define PF_EXITING 0x00000004 /* getting shut down */ #define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */ -- cgit v1.2.3-18-g5258 From ceee42714cf382e9bb9ab71b846ad49497b29d6c Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 13 Sep 2010 23:53:55 -0700 Subject: Input: serio_driver - mark id_table and description as const Memory pointed to by these fields is not supposed to change. Signed-off-by: Dmitry Torokhov --- include/linux/serio.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/serio.h b/include/linux/serio.h index b5552568178..a31c95a3171 100644 --- a/include/linux/serio.h +++ b/include/linux/serio.h @@ -55,9 +55,9 @@ struct serio { struct serio_driver { void *private; - char *description; + const char *description; - struct serio_device_id *id_table; + const struct serio_device_id *id_table; bool manual_bind; void (*write_wakeup)(struct serio *); -- cgit v1.2.3-18-g5258 From 7fc49c498c18795d35864bee433caf419bd013b2 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 13 Sep 2010 23:53:55 -0700 Subject: Input: serio_driver - drop private pointer Nobody uses it anymore. Signed-off-by: Dmitry Torokhov --- include/linux/serio.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/serio.h b/include/linux/serio.h index a31c95a3171..111ad501b05 100644 --- a/include/linux/serio.h +++ b/include/linux/serio.h @@ -54,7 +54,6 @@ struct serio { #define to_serio_port(d) container_of(d, struct serio, dev) struct serio_driver { - void *private; const char *description; const struct serio_device_id *id_table; -- cgit v1.2.3-18-g5258 From 37b0bb112b7e3ffa5015c4305a934e861b4e2e53 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 13 Sep 2010 23:53:55 -0700 Subject: Input: gameport_driver - mark description as const pointer Memory pointed to by the pointer should not change. Signed-off-by: Dmitry Torokhov --- include/linux/gameport.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/gameport.h b/include/linux/gameport.h index 361d1cc288d..632d1265fbe 100644 --- a/include/linux/gameport.h +++ b/include/linux/gameport.h @@ -55,7 +55,7 @@ struct gameport { struct gameport_driver { void *private; - char *description; + const char *description; int (*connect)(struct gameport *, struct gameport_driver *drv); int (*reconnect)(struct gameport *); -- cgit v1.2.3-18-g5258 From 528487081aad32da85bf99802bdb7af32f4922b9 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 13 Sep 2010 23:53:55 -0700 Subject: Input: gameport_driver - drop private pointer Nobody uses it anymore. Signed-off-by: Dmitry Torokhov --- include/linux/gameport.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/gameport.h b/include/linux/gameport.h index 632d1265fbe..b65a6f47277 100644 --- a/include/linux/gameport.h +++ b/include/linux/gameport.h @@ -53,8 +53,6 @@ struct gameport { #define to_gameport_port(d) container_of(d, struct gameport, dev) struct gameport_driver { - - void *private; const char *description; int (*connect)(struct gameport *, struct gameport_driver *drv); -- cgit v1.2.3-18-g5258 From 2944f45d9db851e186774df7c9cbf075f4a585c6 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 14 Sep 2010 18:37:20 +0200 Subject: mac80211: add a note about iterating interfaces during add_interface() Signed-off-by: Felix Fietkau Signed-off-by: John W. Linville --- include/net/mac80211.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index f91fc331369..19a5cb4a658 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2293,6 +2293,7 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted); * This function allows the iterator function to sleep, when the iterator * function is atomic @ieee80211_iterate_active_interfaces_atomic can * be used. + * Does not iterate over a new interface during add_interface() * * @hw: the hardware struct of which the interfaces should be iterated over * @iterator: the iterator function to call @@ -2310,6 +2311,7 @@ void ieee80211_iterate_active_interfaces(struct ieee80211_hw *hw, * hardware that are currently active and calls the callback for them. * This function requires the iterator callback function to be atomic, * if that is not desired, use @ieee80211_iterate_active_interfaces instead. + * Does not iterate over a new interface during add_interface() * * @hw: the hardware struct of which the interfaces should be iterated over * @iterator: the iterator function to call, cannot sleep -- cgit v1.2.3-18-g5258 From 55b1804c678e679e1018671bd40b583eab124689 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 13 Sep 2010 18:24:01 +0000 Subject: net/irda: Use static const char * const where possible Signed-off-by: Joe Perches Signed-off-by: David S. Miller --- include/net/irda/irlan_event.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/irda/irlan_event.h b/include/net/irda/irlan_event.h index 6d9539f0580..018b5a77e61 100644 --- a/include/net/irda/irlan_event.h +++ b/include/net/irda/irlan_event.h @@ -67,7 +67,7 @@ typedef enum { IRLAN_WATCHDOG_TIMEOUT, } IRLAN_EVENT; -extern char *irlan_state[]; +extern const char * const irlan_state[]; void irlan_do_client_event(struct irlan_cb *self, IRLAN_EVENT event, struct sk_buff *skb); -- cgit v1.2.3-18-g5258 From 38a81da2205f94e8a2a834b51a6b99c91fc7c2e8 Mon Sep 17 00:00:00 2001 From: Matt Helsley Date: Mon, 13 Sep 2010 13:01:20 -0700 Subject: perf events: Clean up pid passing The kernel perf event creation path shouldn't use find_task_by_vpid() because a vpid exists in a specific namespace. find_task_by_vpid() uses current's pid namespace which isn't always the correct namespace to use for the vpid in all the places perf_event_create_kernel_counter() (and thus find_get_context()) is called. The goal is to clean up pid namespace handling and prevent bugs like: https://bugzilla.kernel.org/show_bug.cgi?id=17281 Instead of using pids switch find_get_context() to use task struct pointers directly. The syscall is responsible for resolving the pid to a task struct. This moves the pid namespace resolution into the syscall much like every other syscall that takes pid parameters. Signed-off-by: Matt Helsley Signed-off-by: Peter Zijlstra Cc: Robin Green Cc: Prasad Cc: Arnaldo Carvalho de Melo Cc: Steven Rostedt Cc: Will Deacon Cc: Mahesh Salgaonkar LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 93bf53aa50e..39d8860b268 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -902,7 +902,7 @@ extern int perf_event_release_kernel(struct perf_event *event); extern struct perf_event * perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, - pid_t pid, + struct task_struct *task, perf_overflow_handler_t callback); extern u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running); -- cgit v1.2.3-18-g5258 From 144177991ca624841ddbd1e7edff958fc0f6d1fe Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 15 Sep 2010 13:08:27 +0200 Subject: block: fix an address space warning in blk-map.c Change type of 2nd parameter of blk_rq_aligned() into unsigned long and remove unnecessary casting. Now we can call it with 'uaddr' instead of 'ubuf' in __blk_rq_map_user() so that it can remove following warnings from sparse: block/blk-map.c:57:31: warning: incorrect type in argument 2 (different address spaces) block/blk-map.c:57:31: expected void *addr block/blk-map.c:57:31: got void [noderef] *ubuf However blk_rq_map_kern() needs one more local variable to handle it. Signed-off-by: Namhyung Kim Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 7e661106270..780824edac1 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1097,11 +1097,11 @@ static inline int queue_dma_alignment(struct request_queue *q) return q ? q->dma_alignment : 511; } -static inline int blk_rq_aligned(struct request_queue *q, void *addr, +static inline int blk_rq_aligned(struct request_queue *q, unsigned long addr, unsigned int len) { unsigned int alignment = queue_dma_alignment(q) | q->dma_pad_mask; - return !((unsigned long)addr & alignment) && !(len & alignment); + return !(addr & alignment) && !(len & alignment); } /* assumes size > 256 */ -- cgit v1.2.3-18-g5258 From 6d1d8050b4bc89d0165d29b58e894aeba2564a97 Mon Sep 17 00:00:00 2001 From: Will Drewry Date: Tue, 31 Aug 2010 15:47:05 -0500 Subject: block, partition: add partition_meta_info to hd_struct I'm reposting this patch series as v4 since there have been no additional comments, and I cleaned up one extra bit of unneeded code (in 3/3). The patches are against Linus's tree: 2bfc96a127bc1cc94d26bfaa40159966064f9c8c (2.6.36-rc3). Would this patchset be suitable for inclusion in an mm branch? This changes adds a partition_meta_info struct which itself contains a union of structures that provide partition table specific metadata. This change leaves the union empty. The subsequent patch includes an implementation for CONFIG_EFI_PARTITION-based metadata. Signed-off-by: Will Drewry Signed-off-by: Jens Axboe --- include/linux/genhd.h | 53 +++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 51 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 5f2f4c4d8fb..66e26b5a153 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -12,6 +12,7 @@ #include #include #include +#include #ifdef CONFIG_BLOCK @@ -86,7 +87,15 @@ struct disk_stats { unsigned long io_ticks; unsigned long time_in_queue; }; - + +#define PARTITION_META_INFO_VOLNAMELTH 64 +#define PARTITION_META_INFO_UUIDLTH 16 + +struct partition_meta_info { + u8 uuid[PARTITION_META_INFO_UUIDLTH]; /* always big endian */ + u8 volname[PARTITION_META_INFO_VOLNAMELTH]; +}; + struct hd_struct { sector_t start_sect; sector_t nr_sects; @@ -95,6 +104,7 @@ struct hd_struct { struct device __dev; struct kobject *holder_dir; int policy, partno; + struct partition_meta_info *info; #ifdef CONFIG_FAIL_MAKE_REQUEST int make_it_fail; #endif @@ -181,6 +191,30 @@ static inline struct gendisk *part_to_disk(struct hd_struct *part) return NULL; } +static inline void part_pack_uuid(const u8 *uuid_str, u8 *to) +{ + int i; + for (i = 0; i < 16; ++i) { + *to++ = (hex_to_bin(*uuid_str) << 4) | + (hex_to_bin(*(uuid_str + 1))); + uuid_str += 2; + switch (i) { + case 3: + case 5: + case 7: + case 9: + uuid_str++; + continue; + } + } +} + +static inline char *part_unpack_uuid(const u8 *uuid, char *out) +{ + sprintf(out, "%pU", uuid); + return out; +} + static inline int disk_max_parts(struct gendisk *disk) { if (disk->flags & GENHD_FL_EXT_DEVT) @@ -342,6 +376,19 @@ static inline int part_in_flight(struct hd_struct *part) return part->in_flight[0] + part->in_flight[1]; } +static inline struct partition_meta_info *alloc_part_info(struct gendisk *disk) +{ + if (disk) + return kzalloc_node(sizeof(struct partition_meta_info), + GFP_KERNEL, disk->node_id); + return kzalloc(sizeof(struct partition_meta_info), GFP_KERNEL); +} + +static inline void free_part_info(struct hd_struct *part) +{ + kfree(part->info); +} + /* block/blk-core.c */ extern void part_round_stats(int cpu, struct hd_struct *part); @@ -533,7 +580,9 @@ extern int disk_expand_part_tbl(struct gendisk *disk, int target); extern int rescan_partitions(struct gendisk *disk, struct block_device *bdev); extern struct hd_struct * __must_check add_partition(struct gendisk *disk, int partno, sector_t start, - sector_t len, int flags); + sector_t len, int flags, + struct partition_meta_info + *info); extern void delete_partition(struct gendisk *, int); extern void printk_all_partitions(void); -- cgit v1.2.3-18-g5258 From 3661ca66a42e306aaf53246fb75aec1ea01be0f0 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 15 Sep 2010 13:05:29 -0700 Subject: memblock: Fix section mismatch warnings Stephen found a bunch of section mismatch warnings with the new memblock changes. Use __init_memblock to replace __init in memblock.c and remove __init in memblock.h. We should not use __init in header files. Reported-by: Stephen Rothwell Tested-by: Stephen Rothwell Signed-off-by: Yinghai Lu Cc: Peter Zijlstra Cc: Benjamin Herrenschmidt LKML-Reference: <4C912709.2090201@kernel.org> Signed-off-by: Ingo Molnar --- include/linux/memblock.h | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 7d285271130..5096458c753 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -51,39 +51,39 @@ u64 memblock_find_in_range(u64 start, u64 end, u64 size, u64 align); int memblock_free_reserved_regions(void); int memblock_reserve_reserved_regions(void); -extern void __init memblock_init(void); -extern void __init memblock_analyze(void); +extern void memblock_init(void); +extern void memblock_analyze(void); extern long memblock_add(phys_addr_t base, phys_addr_t size); extern long memblock_remove(phys_addr_t base, phys_addr_t size); -extern long __init memblock_free(phys_addr_t base, phys_addr_t size); -extern long __init memblock_reserve(phys_addr_t base, phys_addr_t size); +extern long memblock_free(phys_addr_t base, phys_addr_t size); +extern long memblock_reserve(phys_addr_t base, phys_addr_t size); /* The numa aware allocator is only available if * CONFIG_ARCH_POPULATES_NODE_MAP is set */ -extern phys_addr_t __init memblock_alloc_nid(phys_addr_t size, phys_addr_t align, +extern phys_addr_t memblock_alloc_nid(phys_addr_t size, phys_addr_t align, int nid); -extern phys_addr_t __init memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align, +extern phys_addr_t memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid); -extern phys_addr_t __init memblock_alloc(phys_addr_t size, phys_addr_t align); +extern phys_addr_t memblock_alloc(phys_addr_t size, phys_addr_t align); /* Flags for memblock_alloc_base() amd __memblock_alloc_base() */ #define MEMBLOCK_ALLOC_ANYWHERE (~(phys_addr_t)0) #define MEMBLOCK_ALLOC_ACCESSIBLE 0 -extern phys_addr_t __init memblock_alloc_base(phys_addr_t size, +extern phys_addr_t memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys_addr_t max_addr); -extern phys_addr_t __init __memblock_alloc_base(phys_addr_t size, +extern phys_addr_t __memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys_addr_t max_addr); -extern phys_addr_t __init memblock_phys_mem_size(void); +extern phys_addr_t memblock_phys_mem_size(void); extern phys_addr_t memblock_end_of_DRAM(void); -extern void __init memblock_enforce_memory_limit(phys_addr_t memory_limit); +extern void memblock_enforce_memory_limit(phys_addr_t memory_limit); extern int memblock_is_memory(phys_addr_t addr); extern int memblock_is_region_memory(phys_addr_t base, phys_addr_t size); -extern int __init memblock_is_reserved(phys_addr_t addr); +extern int memblock_is_reserved(phys_addr_t addr); extern int memblock_is_region_reserved(phys_addr_t base, phys_addr_t size); extern void memblock_dump_all(void); -- cgit v1.2.3-18-g5258 From e035587305011432ee07f69f9738b3c7ef7f3684 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Tue, 14 Sep 2010 09:10:03 +0000 Subject: ethtool: Complete kernel-doc comments for RX flow filter and hash control There are now several interfaces within the ethtool API for getting and setting RX flow filtering and hashing behaviour, most of which are poorly documented. This adds kernel-doc comments for all these interfaces, based on the existing incomplete comments and on the initial implementations. Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- include/linux/ethtool.h | 129 ++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 113 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 991269e5b15..4b3ba05b11a 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -314,9 +314,20 @@ enum ethtool_flags { }; /* The following structures are for supporting RX network flow - * classification configuration. Note, all multibyte fields, e.g., - * ip4src, ip4dst, psrc, pdst, spi, etc. are expected to be in network - * byte order. + * classification and RX n-tuple configuration. Note, all multibyte + * fields, e.g., ip4src, ip4dst, psrc, pdst, spi, etc. are expected to + * be in network byte order. + */ + +/** + * struct ethtool_tcpip4_spec - flow specification for TCP/IPv4 etc. + * @ip4src: Source host + * @ip4dst: Destination host + * @psrc: Source port + * @pdst: Destination port + * @tos: Type-of-service + * + * This can be used to specify a TCP/IPv4, UDP/IPv4 or SCTP/IPv4 flow. */ struct ethtool_tcpip4_spec { __be32 ip4src; @@ -326,6 +337,15 @@ struct ethtool_tcpip4_spec { __u8 tos; }; +/** + * struct ethtool_ah_espip4_spec - flow specification for IPsec/IPv4 + * @ip4src: Source host + * @ip4dst: Destination host + * @spi: Security parameters index + * @tos: Type-of-service + * + * This can be used to specify an IPsec transport or tunnel over IPv4. + */ struct ethtool_ah_espip4_spec { __be32 ip4src; __be32 ip4dst; @@ -348,6 +368,15 @@ struct ethtool_ether_spec { #define ETH_RX_NFC_IP4 1 #define ETH_RX_NFC_IP6 2 +/** + * struct ethtool_usrip4_spec - general flow specification for IPv4 + * @ip4src: Source host + * @ip4dst: Destination host + * @l4_4_bytes: First 4 bytes of transport (layer 4) header + * @tos: Type-of-service + * @ip_ver: Value must be %ETH_RX_NFC_IP4; mask must be 0 + * @proto: Transport protocol number; mask must be 0 + */ struct ethtool_usrip4_spec { __be32 ip4src; __be32 ip4dst; @@ -357,6 +386,15 @@ struct ethtool_usrip4_spec { __u8 proto; }; +/** + * struct ethtool_rx_flow_spec - specification for RX flow filter + * @flow_type: Type of match to perform, e.g. %TCP_V4_FLOW + * @h_u: Flow fields to match (dependent on @flow_type) + * @m_u: Masks for flow field bits to be ignored + * @ring_cookie: RX ring/queue index to deliver to, or %RX_CLS_FLOW_DISC + * if packets should be discarded + * @location: Index of filter in hardware table + */ struct ethtool_rx_flow_spec { __u32 flow_type; union { @@ -369,32 +407,87 @@ struct ethtool_rx_flow_spec { struct ethtool_ether_spec ether_spec; struct ethtool_usrip4_spec usr_ip4_spec; __u8 hdata[64]; - } h_u, m_u; /* entry, mask */ + } h_u, m_u; __u64 ring_cookie; __u32 location; }; +/** + * struct ethtool_rxnfc - command to get or set RX flow classification rules + * @cmd: Specific command number - %ETHTOOL_GRXFH, %ETHTOOL_SRXFH, + * %ETHTOOL_GRXRINGS, %ETHTOOL_GRXCLSRLCNT, %ETHTOOL_GRXCLSRULE, + * %ETHTOOL_GRXCLSRLALL, %ETHTOOL_SRXCLSRLDEL or %ETHTOOL_SRXCLSRLINS + * @flow_type: Type of flow to be affected, e.g. %TCP_V4_FLOW + * @data: Command-dependent value + * @fs: Flow filter specification + * @rule_cnt: Number of rules to be affected + * @rule_locs: Array of valid rule indices + * + * For %ETHTOOL_GRXFH and %ETHTOOL_SRXFH, @data is a bitmask indicating + * the fields included in the flow hash, e.g. %RXH_IP_SRC. The following + * structure fields must not be used. + * + * For %ETHTOOL_GRXRINGS, @data is set to the number of RX rings/queues + * on return. + * + * For %ETHTOOL_GRXCLSRLCNT, @rule_cnt is set to the number of defined + * rules on return. + * + * For %ETHTOOL_GRXCLSRULE, @fs.@location specifies the index of an + * existing filter rule on entry and @fs contains the rule on return. + * + * For %ETHTOOL_GRXCLSRLALL, @rule_cnt specifies the array size of the + * user buffer for @rule_locs on entry. On return, @data is the size + * of the filter table and @rule_locs contains the indices of the + * defined rules. + * + * For %ETHTOOL_SRXCLSRLINS, @fs specifies the filter rule to add or + * update. @fs.@location specifies the index to use and must not be + * ignored. + * + * For %ETHTOOL_SRXCLSRLDEL, @fs.@location specifies the index of an + * existing filter rule on entry. + * + * Implementation of indexed classification rules generally requires a + * TCAM. + */ struct ethtool_rxnfc { __u32 cmd; __u32 flow_type; - /* The rx flow hash value or the rule DB size */ __u64 data; - /* The following fields are not valid and must not be used for - * the ETHTOOL_{G,X}RXFH commands. */ struct ethtool_rx_flow_spec fs; __u32 rule_cnt; __u32 rule_locs[0]; }; +/** + * struct ethtool_rxfh_indir - command to get or set RX flow hash indirection + * @cmd: Specific command number - %ETHTOOL_GRXFHINDIR or %ETHTOOL_SRXFHINDIR + * @size: On entry, the array size of the user buffer. On return from + * %ETHTOOL_GRXFHINDIR, the array size of the hardware indirection table. + * @ring_index: RX ring/queue index for each hash value + */ struct ethtool_rxfh_indir { __u32 cmd; - /* On entry, this is the array size of the user buffer. On - * return from ETHTOOL_GRXFHINDIR, this is the array size of - * the hardware indirection table. */ __u32 size; - __u32 ring_index[0]; /* ring/queue index for each hash value */ + __u32 ring_index[0]; }; +/** + * struct ethtool_rx_ntuple_flow_spec - specification for RX flow filter + * @flow_type: Type of match to perform, e.g. %TCP_V4_FLOW + * @h_u: Flow field values to match (dependent on @flow_type) + * @m_u: Masks for flow field value bits to be ignored + * @vlan_tag: VLAN tag to match + * @vlan_tag_mask: Mask for VLAN tag bits to be ignored + * @data: Driver-dependent data to match + * @data_mask: Mask for driver-dependent data bits to be ignored + * @action: RX ring/queue index to deliver to (non-negative) or other action + * (negative, e.g. %ETHTOOL_RXNTUPLE_ACTION_DROP) + * + * Zero values in @h_u may be ignored, as if all the corresponding + * mask bits were set. + */ struct ethtool_rx_ntuple_flow_spec { __u32 flow_type; union { @@ -407,18 +500,22 @@ struct ethtool_rx_ntuple_flow_spec { struct ethtool_ether_spec ether_spec; struct ethtool_usrip4_spec usr_ip4_spec; __u8 hdata[64]; - } h_u, m_u; /* entry, mask */ + } h_u, m_u; __u16 vlan_tag; __u16 vlan_tag_mask; - __u64 data; /* user-defined flow spec data */ - __u64 data_mask; /* user-defined flow spec mask */ + __u64 data; + __u64 data_mask; - /* signed to distinguish between queue and actions (DROP) */ __s32 action; -#define ETHTOOL_RXNTUPLE_ACTION_DROP -1 +#define ETHTOOL_RXNTUPLE_ACTION_DROP -1 /* drop packet */ }; +/** + * struct ethtool_rx_ntuple - command to set RX flow filter + * @cmd: Command number - %ETHTOOL_SRXNTUPLE + * @fs: Flow filter specification + */ struct ethtool_rx_ntuple { __u32 cmd; struct ethtool_rx_ntuple_flow_spec fs; -- cgit v1.2.3-18-g5258 From e0de7c93b950b9e784894efc4b529c6958cb747a Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Tue, 14 Sep 2010 09:13:08 +0000 Subject: ethtool: Remove unimplemented flow specification types struct ethtool_rawip4_spec and struct ethtool_ether_spec are neither commented nor used by any driver, so remove them. Adjust padding in the user-visible unions that included these structures. Fix references to struct ethtool_rawip4_spec in ethtool_get_rx_ntuple(), which should use struct ethtool_usrip4_spec. struct ethtool_usrip4_spec cannot hold IPv6 host addresses and there is no separate structure that can, so remove ETH_RX_NFC_IP6 and the reference to it in niu. Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- include/linux/ethtool.h | 21 ++------------------- 1 file changed, 2 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 4b3ba05b11a..d64e246a39e 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -353,20 +353,7 @@ struct ethtool_ah_espip4_spec { __u8 tos; }; -struct ethtool_rawip4_spec { - __be32 ip4src; - __be32 ip4dst; - __u8 hdata[64]; -}; - -struct ethtool_ether_spec { - __be16 ether_type; - __u8 frame_size; - __u8 eframe[16]; -}; - #define ETH_RX_NFC_IP4 1 -#define ETH_RX_NFC_IP6 2 /** * struct ethtool_usrip4_spec - general flow specification for IPv4 @@ -403,10 +390,8 @@ struct ethtool_rx_flow_spec { struct ethtool_tcpip4_spec sctp_ip4_spec; struct ethtool_ah_espip4_spec ah_ip4_spec; struct ethtool_ah_espip4_spec esp_ip4_spec; - struct ethtool_rawip4_spec raw_ip4_spec; - struct ethtool_ether_spec ether_spec; struct ethtool_usrip4_spec usr_ip4_spec; - __u8 hdata[64]; + __u8 hdata[72]; } h_u, m_u; __u64 ring_cookie; __u32 location; @@ -496,10 +481,8 @@ struct ethtool_rx_ntuple_flow_spec { struct ethtool_tcpip4_spec sctp_ip4_spec; struct ethtool_ah_espip4_spec ah_ip4_spec; struct ethtool_ah_espip4_spec esp_ip4_spec; - struct ethtool_rawip4_spec raw_ip4_spec; - struct ethtool_ether_spec ether_spec; struct ethtool_usrip4_spec usr_ip4_spec; - __u8 hdata[64]; + __u8 hdata[72]; } h_u, m_u; __u16 vlan_tag; -- cgit v1.2.3-18-g5258 From 7dff59efbb0e8b0f81c95fd40379c0d0c757c808 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 15 Sep 2010 11:07:15 +0000 Subject: net: add rtnl_dereference() We sometime want to dereference an rcu protected pointer while holding RTNL. Use a macro to hide all lockdep details. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 263690d991a..68c436bddc8 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -760,6 +760,15 @@ extern int lockdep_rtnl_is_held(void); rcu_dereference_check(p, rcu_read_lock_held() || \ lockdep_rtnl_is_held()) +/** + * rtnl_dereference - rcu_dereference with debug checking + * @p: The pointer to read, prior to dereferencing + * + * Do an rcu_dereference(p), but check caller holds RTNL + */ +#define rtnl_dereference(p) \ + rcu_dereference_check(p, lockdep_rtnl_is_held()) + extern void rtnetlink_init(void); extern void __rtnl_unlock(void); -- cgit v1.2.3-18-g5258 From 6482f554e2b9cbe733d63124765104f29cf0c9ad Mon Sep 17 00:00:00 2001 From: Rémi Denis-Courmont Date: Wed, 15 Sep 2010 12:19:53 +0000 Subject: Phonet: remove dangling pipe if an endpoint is closed early MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closing a pipe endpoint is not normally allowed by the Phonet pipe, other than as a side after-effect of removing the pipe between two endpoints. But there is no way to prevent Linux userspace processes from being killed or suffering from bugs, so this can still happen. We might as well forcefully close Phonet pipe endpoints then. The cellular modem supports only a few existing pipes at a time. So we really should not leak them. This change instructs the modem to destroy the pipe if either of the pipe's endpoint (Linux socket) is closed too early. Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- include/net/phonet/pep.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/net/phonet/pep.h b/include/net/phonet/pep.h index 35672b1cf44..37f23dc05de 100644 --- a/include/net/phonet/pep.h +++ b/include/net/phonet/pep.h @@ -77,6 +77,11 @@ static inline struct pnpipehdr *pnp_hdr(struct sk_buff *skb) #define MAX_PNPIPE_HEADER (MAX_PHONET_HEADER + 4) enum { + PNS_PIPE_CREATE_REQ = 0x00, + PNS_PIPE_CREATE_RESP, + PNS_PIPE_REMOVE_REQ, + PNS_PIPE_REMOVE_RESP, + PNS_PIPE_DATA = 0x20, PNS_PIPE_ALIGNED_DATA, -- cgit v1.2.3-18-g5258 From 4e3d16ce5e82648d7f4dfd28b6cf8fe2e9a9efc3 Mon Sep 17 00:00:00 2001 From: Rémi Denis-Courmont Date: Wed, 15 Sep 2010 12:30:11 +0000 Subject: Phonet: resource routing backend MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When both destination device and object are nul, Phonet routes the packet according to the resource field. In fact, this is the most common pattern when sending Phonet "request" packets. In this case, the packet is delivered to whichever endpoint (socket) has registered the resource. This adds a new table so that Linux processes can register their Phonet sockets to Phonet resources, if they have adequate privileges. (Namespace support is not implemented at the moment.) Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- include/net/phonet/phonet.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/net/phonet/phonet.h b/include/net/phonet/phonet.h index 7b114079a51..d5df797f954 100644 --- a/include/net/phonet/phonet.h +++ b/include/net/phonet/phonet.h @@ -54,6 +54,11 @@ void pn_sock_hash(struct sock *sk); void pn_sock_unhash(struct sock *sk); int pn_sock_get_port(struct sock *sk, unsigned short sport); +struct sock *pn_find_sock_by_res(struct net *net, u8 res); +int pn_sock_bind_res(struct sock *sock, u8 res); +int pn_sock_unbind_res(struct sock *sk, u8 res); +void pn_sock_unbind_all_res(struct sock *sk); + int pn_skb_send(struct sock *sk, struct sk_buff *skb, const struct sockaddr_pn *target); -- cgit v1.2.3-18-g5258 From 7417fa83c1a8b75a03bd9b9b358999f38e771eab Mon Sep 17 00:00:00 2001 From: Rémi Denis-Courmont Date: Wed, 15 Sep 2010 12:30:12 +0000 Subject: Phonet: hook resource routing to userspace via ioctl()'s MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I wish we could use something cleaner, such as bind(). But that would not work since resource subscription is orthogonal/in addition to the normal object ID allocated via bind(). This is similar to multicasting which also uses ioctl()'s. Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- include/linux/phonet.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/phonet.h b/include/linux/phonet.h index 76edadf046d..85e14a83283 100644 --- a/include/linux/phonet.h +++ b/include/linux/phonet.h @@ -47,6 +47,8 @@ /* ioctls */ #define SIOCPNGETOBJECT (SIOCPROTOPRIVATE + 0) +#define SIOCPNADDRESOURCE (SIOCPROTOPRIVATE + 14) +#define SIOCPNDELRESOURCE (SIOCPROTOPRIVATE + 15) /* Phonet protocol header */ struct phonethdr { -- cgit v1.2.3-18-g5258 From 507215f8d04f9e61f38c975e61d93bcafd30815f Mon Sep 17 00:00:00 2001 From: Rémi Denis-Courmont Date: Wed, 15 Sep 2010 12:30:14 +0000 Subject: Phonet: list subscribed resources via proc_fs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- include/net/phonet/pn_dev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/phonet/pn_dev.h b/include/net/phonet/pn_dev.h index 2d16783d5e2..13649eb5741 100644 --- a/include/net/phonet/pn_dev.h +++ b/include/net/phonet/pn_dev.h @@ -57,5 +57,6 @@ struct net_device *phonet_route_output(struct net *net, u8 daddr); #define PN_NO_ADDR 0xff extern const struct file_operations pn_sock_seq_fops; +extern const struct file_operations pn_res_seq_fops; #endif -- cgit v1.2.3-18-g5258 From 95ae6b228f814fc0528d0506ee9f18ac333d6851 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 15 Sep 2010 04:04:31 +0000 Subject: ipv4: ip_ptr cleanups dev->ip_ptr is protected by rtnl and rcu. Yet some places dont use appropriate primitives and/or locking rules. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/inetdevice.h | 14 +++++--------- include/linux/netdevice.h | 2 +- 2 files changed, 6 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index 2be1a1a2beb..1ec09bb4a3a 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -9,6 +9,7 @@ #include #include #include +#include enum { @@ -198,14 +199,10 @@ static __inline__ int bad_mask(__be32 mask, __be32 addr) static inline struct in_device *__in_dev_get_rcu(const struct net_device *dev) { - struct in_device *in_dev = dev->ip_ptr; - if (in_dev) - in_dev = rcu_dereference(in_dev); - return in_dev; + return rcu_dereference(dev->ip_ptr); } -static __inline__ struct in_device * -in_dev_get(const struct net_device *dev) +static inline struct in_device *in_dev_get(const struct net_device *dev) { struct in_device *in_dev; @@ -217,10 +214,9 @@ in_dev_get(const struct net_device *dev) return in_dev; } -static __inline__ struct in_device * -__in_dev_get_rtnl(const struct net_device *dev) +static inline struct in_device *__in_dev_get_rtnl(const struct net_device *dev) { - return (struct in_device*)dev->ip_ptr; + return rcu_dereference_check(dev->ip_ptr, lockdep_rtnl_is_held()); } extern void in_dev_finish_destroy(struct in_device *idev); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index af05186d5b3..8992fffb810 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -942,7 +942,7 @@ struct net_device { void *dsa_ptr; /* dsa specific data */ #endif void *atalk_ptr; /* AppleTalk link */ - void *ip_ptr; /* IPv4 specific data */ + struct in_device __rcu *ip_ptr; /* IPv4 specific data */ void *dn_ptr; /* DECnet specific data */ void *ip6_ptr; /* IPv6 specific data */ void *ec_ptr; /* Econet specific data */ -- cgit v1.2.3-18-g5258 From e43473b7f223ec866f7db273697e76c337c390f9 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Wed, 15 Sep 2010 17:06:35 -0400 Subject: blkio: Core implementation of throttle policy o Actual implementation of throttling policy in block layer. Currently it implements READ and WRITE bytes per second throttling logic. IOPS throttling comes in later patches. Signed-off-by: Vivek Goyal Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 3 +++ include/linux/blkdev.h | 24 ++++++++++++++++++++++++ 2 files changed, 27 insertions(+) (limited to 'include') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index ca83a97c971..10a0c291b55 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -130,6 +130,8 @@ enum rq_flag_bits { /* bio only flags */ __REQ_UNPLUG, /* unplug the immediately after submission */ __REQ_RAHEAD, /* read ahead, can fail anytime */ + __REQ_THROTTLED, /* This bio has already been subjected to + * throttling rules. Don't do it again. */ /* request only flags */ __REQ_SORTED, /* elevator knows about this request */ @@ -172,6 +174,7 @@ enum rq_flag_bits { #define REQ_UNPLUG (1 << __REQ_UNPLUG) #define REQ_RAHEAD (1 << __REQ_RAHEAD) +#define REQ_THROTTLED (1 << __REQ_THROTTLED) #define REQ_SORTED (1 << __REQ_SORTED) #define REQ_SOFTBARRIER (1 << __REQ_SOFTBARRIER) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 780824edac1..1341df5806d 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -371,6 +371,11 @@ struct request_queue #if defined(CONFIG_BLK_DEV_BSG) struct bsg_class_device bsg_dev; #endif + +#ifdef CONFIG_BLK_DEV_THROTTLING + /* Throttle data */ + struct throtl_data *td; +#endif }; #define QUEUE_FLAG_CLUSTER 0 /* cluster several segments into 1 */ @@ -1131,6 +1136,7 @@ static inline void put_dev_sector(Sector p) struct work_struct; int kblockd_schedule_work(struct request_queue *q, struct work_struct *work); +int kblockd_schedule_delayed_work(struct request_queue *q, struct delayed_work *dwork, unsigned long delay); #ifdef CONFIG_BLK_CGROUP /* @@ -1174,6 +1180,24 @@ static inline uint64_t rq_io_start_time_ns(struct request *req) } #endif +#ifdef CONFIG_BLK_DEV_THROTTLING +extern int blk_throtl_init(struct request_queue *q); +extern void blk_throtl_exit(struct request_queue *q); +extern int blk_throtl_bio(struct request_queue *q, struct bio **bio); +extern void throtl_schedule_delayed_work(struct request_queue *q, unsigned long delay); +extern void throtl_shutdown_timer_wq(struct request_queue *q); +#else /* CONFIG_BLK_DEV_THROTTLING */ +static inline int blk_throtl_bio(struct request_queue *q, struct bio **bio) +{ + return 0; +} + +static inline int blk_throtl_init(struct request_queue *q) { return 0; } +static inline int blk_throtl_exit(struct request_queue *q) { return 0; } +static inline void throtl_schedule_delayed_work(struct request_queue *q, unsigned long delay) {} +static inline void throtl_shutdown_timer_wq(struct request_queue *q) {} +#endif /* CONFIG_BLK_DEV_THROTTLING */ + #define MODULE_ALIAS_BLOCKDEV(major,minor) \ MODULE_ALIAS("block-major-" __stringify(major) "-" __stringify(minor)) #define MODULE_ALIAS_BLOCKDEV_MAJOR(major) \ -- cgit v1.2.3-18-g5258 From 1ec5584e3edf9c4bf2c88c846534d19cf986ba11 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sun, 15 Aug 2010 21:50:52 +0200 Subject: libfs: use generic_file_llseek for simple_attr Simple attribute files need to be seekable to allow resetting the file for another read. Signed-off-by: Arnd Bergmann --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 76041b61475..c8effc81f0c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2450,6 +2450,7 @@ static const struct file_operations __fops = { \ .release = simple_attr_release, \ .read = simple_attr_read, \ .write = simple_attr_write, \ + .llseek = generic_file_llseek, \ }; static inline void __attribute__((format(printf, 1, 2))) -- cgit v1.2.3-18-g5258 From dd3932eddf428571762596e17b65f5dc92ca361b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 16 Sep 2010 20:51:46 +0200 Subject: block: remove BLKDEV_IFL_WAIT All the blkdev_issue_* helpers can only sanely be used for synchronous caller. To issue cache flushes or barriers asynchronously the caller needs to set up a bio by itself with a completion callback to move the asynchronous state machine ahead. So drop the BLKDEV_IFL_WAIT flag that is always specified when calling blkdev_issue_* and also remove the now unused flags argument to blkdev_issue_flush and blkdev_issue_zeroout. For blkdev_issue_discard we need to keep it for the secure discard flag, which gains a more descriptive name and loses the bitops vs flag confusion. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index cfcb3a61060..accbd0e5c89 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -867,18 +867,14 @@ static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt, return NULL; return bqt->tag_index[tag]; } -enum{ - BLKDEV_WAIT, /* wait for completion */ - BLKDEV_SECURE, /* secure discard */ -}; -#define BLKDEV_IFL_WAIT (1 << BLKDEV_WAIT) -#define BLKDEV_IFL_SECURE (1 << BLKDEV_SECURE) -extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *, - unsigned long); + +#define BLKDEV_DISCARD_SECURE 0x01 /* secure discard */ + +extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *); extern int blkdev_issue_discard(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask, unsigned long flags); extern int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, - sector_t nr_sects, gfp_t gfp_mask, unsigned long flags); + sector_t nr_sects, gfp_t gfp_mask); static inline int sb_issue_discard(struct super_block *sb, sector_t block, sector_t nr_blocks, gfp_t gfp_mask, unsigned long flags) { -- cgit v1.2.3-18-g5258 From 9c376639297d3dd82d40e54c9cdca8da9dfc22f1 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Fri, 20 Aug 2010 15:13:59 -0700 Subject: include/net/cfg80211.h: wiphy_ messages use dev_printk The output becomes: [ 41.261941] ieee80211 phy0: Selected rate control algorithm 'minstrel_ht' Signed-off-by: Joe Perches Signed-off-by: John W. Linville --- include/net/cfg80211.h | 37 ++++++++++++------------------------- 1 file changed, 12 insertions(+), 25 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 4c8c727d0cc..a0613ff62c9 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2558,49 +2558,36 @@ void cfg80211_cqm_rssi_notify(struct net_device *dev, /* wiphy_printk helpers, similar to dev_printk */ #define wiphy_printk(level, wiphy, format, args...) \ - printk(level "%s: " format, wiphy_name(wiphy), ##args) + dev_printk(level, &(wiphy)->dev, format, ##args) #define wiphy_emerg(wiphy, format, args...) \ - wiphy_printk(KERN_EMERG, wiphy, format, ##args) + dev_emerg(&(wiphy)->dev, format, ##args) #define wiphy_alert(wiphy, format, args...) \ - wiphy_printk(KERN_ALERT, wiphy, format, ##args) + dev_alert(&(wiphy)->dev, format, ##args) #define wiphy_crit(wiphy, format, args...) \ - wiphy_printk(KERN_CRIT, wiphy, format, ##args) + dev_crit(&(wiphy)->dev, format, ##args) #define wiphy_err(wiphy, format, args...) \ - wiphy_printk(KERN_ERR, wiphy, format, ##args) + dev_err(&(wiphy)->dev, format, ##args) #define wiphy_warn(wiphy, format, args...) \ - wiphy_printk(KERN_WARNING, wiphy, format, ##args) + dev_warn(&(wiphy)->dev, format, ##args) #define wiphy_notice(wiphy, format, args...) \ - wiphy_printk(KERN_NOTICE, wiphy, format, ##args) + dev_notice(&(wiphy)->dev, format, ##args) #define wiphy_info(wiphy, format, args...) \ - wiphy_printk(KERN_INFO, wiphy, format, ##args) + dev_info(&(wiphy)->dev, format, ##args) -int wiphy_debug(const struct wiphy *wiphy, const char *format, ...) - __attribute__ ((format (printf, 2, 3))); - -#if defined(DEBUG) -#define wiphy_dbg(wiphy, format, args...) \ +#define wiphy_debug(wiphy, format, args...) \ wiphy_printk(KERN_DEBUG, wiphy, format, ##args) -#elif defined(CONFIG_DYNAMIC_DEBUG) + #define wiphy_dbg(wiphy, format, args...) \ - dynamic_pr_debug("%s: " format, wiphy_name(wiphy), ##args) -#else -#define wiphy_dbg(wiphy, format, args...) \ -({ \ - if (0) \ - wiphy_printk(KERN_DEBUG, wiphy, format, ##args); \ - 0; \ -}) -#endif + dev_dbg(&(wiphy)->dev, format, ##args) #if defined(VERBOSE_DEBUG) #define wiphy_vdbg wiphy_dbg #else - #define wiphy_vdbg(wiphy, format, args...) \ ({ \ if (0) \ wiphy_printk(KERN_DEBUG, wiphy, format, ##args); \ - 0; \ + 0; \ }) #endif -- cgit v1.2.3-18-g5258 From 074ac8df9f93f2a35a356d92fd7f16cd846f0a03 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 16 Sep 2010 14:58:22 +0200 Subject: cfg80211/nl80211: introduce p2p device types This adds P2P-STA and P2P-GO as device types so we can distinguish between those and normal STA or AP (respectively) type interfaces. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/nl80211.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 31603e8b558..f0518b0278a 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -1020,6 +1020,8 @@ enum nl80211_attrs { * @NL80211_IFTYPE_WDS: wireless distribution interface * @NL80211_IFTYPE_MONITOR: monitor interface receiving all frames * @NL80211_IFTYPE_MESH_POINT: mesh point + * @NL80211_IFTYPE_P2P_CLIENT: P2P client + * @NL80211_IFTYPE_P2P_GO: P2P group owner * @NL80211_IFTYPE_MAX: highest interface type number currently defined * @NUM_NL80211_IFTYPES: number of defined interface types * @@ -1036,6 +1038,8 @@ enum nl80211_iftype { NL80211_IFTYPE_WDS, NL80211_IFTYPE_MONITOR, NL80211_IFTYPE_MESH_POINT, + NL80211_IFTYPE_P2P_CLIENT, + NL80211_IFTYPE_P2P_GO, /* keep last */ NUM_NL80211_IFTYPES, -- cgit v1.2.3-18-g5258 From 2ca27bcff7127da1aa7dd39cd2a6f7cb187e327f Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 16 Sep 2010 14:58:23 +0200 Subject: mac80211: add p2p device type support When a driver advertises p2p device support, mac80211 will handle it, but internally it will rewrite the interface type to STA/AP rather than P2P-STA/GO since otherwise a lot of paths need to be touched that are otherwise identical. A p2p boolean tells drivers whether or not a given interface will be used for p2p or not. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 19a5cb4a658..12a49f0ba32 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -769,6 +769,8 @@ struct ieee80211_channel_switch { * @bss_conf: BSS configuration for this interface, either our own * or the BSS we're associated to * @addr: address of this interface + * @p2p: indicates whether this AP or STA interface is a p2p + * interface, i.e. a GO or p2p-sta respectively * @drv_priv: data area for driver use, will always be aligned to * sizeof(void *). */ @@ -776,6 +778,7 @@ struct ieee80211_vif { enum nl80211_iftype type; struct ieee80211_bss_conf bss_conf; u8 addr[ETH_ALEN]; + bool p2p; /* must be last */ u8 drv_priv[0] __attribute__((__aligned__(sizeof(void *)))); }; @@ -1701,7 +1704,7 @@ struct ieee80211_ops { struct ieee80211_vif *vif); int (*change_interface)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, - enum nl80211_iftype new_type); + enum nl80211_iftype new_type, bool p2p); void (*remove_interface)(struct ieee80211_hw *hw, struct ieee80211_vif *vif); int (*config)(struct ieee80211_hw *hw, u32 changed); @@ -2721,4 +2724,26 @@ conf_is_ht(struct ieee80211_conf *conf) return conf->channel_type != NL80211_CHAN_NO_HT; } +static inline enum nl80211_iftype +ieee80211_iftype_p2p(enum nl80211_iftype type, bool p2p) +{ + if (p2p) { + switch (type) { + case NL80211_IFTYPE_STATION: + return NL80211_IFTYPE_P2P_CLIENT; + case NL80211_IFTYPE_AP: + return NL80211_IFTYPE_P2P_GO; + default: + break; + } + } + return type; +} + +static inline enum nl80211_iftype +ieee80211_vif_type_p2p(struct ieee80211_vif *vif) +{ + return ieee80211_iftype_p2p(vif->type, vif->p2p); +} + #endif /* MAC80211_H */ -- cgit v1.2.3-18-g5258 From cd13539b8bc9ae884e6d8d9374c594adff4304e4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 16 Sep 2010 02:58:13 +0000 Subject: net: shrinks struct net_device commit ab95bfe01 (net: replace hooks in __netif_receive_skb) added rx_handler at wrong place, between two cache line aligned objects, creating a big hole (a full cache line) Move rx_handler and rx_handler_data before rx_queue, filling existing hole. Move master field in the cache line(s) used in receive path. This saves 64 bytes (or L1_CACHE_BYTES), and avoids two possible cache misses in receive path. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 8992fffb810..ec17887a5bc 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -918,10 +918,6 @@ struct net_device { unsigned short needed_headroom; unsigned short needed_tailroom; - struct net_device *master; /* Pointer to master device of a group, - * which this device is member of. - */ - /* Interface address info. */ unsigned char perm_addr[MAX_ADDR_LEN]; /* permanent hw address */ unsigned char addr_assign_type; /* hw address assignment type */ @@ -951,7 +947,7 @@ struct net_device { assign before registering */ /* - * Cache line mostly used on receive path (including eth_type_trans()) + * Cache lines mostly used on receive path (including eth_type_trans()) */ unsigned long last_rx; /* Time of last Rx * This should not be set in @@ -961,6 +957,10 @@ struct net_device { * avoid dirtying this cache line. */ + struct net_device *master; /* Pointer to master device of a group, + * which this device is member of. + */ + /* Interface address info used in eth_type_trans() */ unsigned char *dev_addr; /* hw address, (before bcast because most packets are @@ -980,10 +980,14 @@ struct net_device { unsigned int num_rx_queues; #endif - struct netdev_queue rx_queue; rx_handler_func_t *rx_handler; void *rx_handler_data; + struct netdev_queue rx_queue; /* use two cache lines */ + +/* + * Cache lines mostly used on transmit path + */ struct netdev_queue *_tx ____cacheline_aligned_in_smp; /* Number of TX queues allocated at alloc_netdev_mq() time */ @@ -997,9 +1001,7 @@ struct net_device { unsigned long tx_queue_len; /* Max frames per queue allowed */ spinlock_t tx_global_lock; -/* - * One part is mostly used on xmit path (device) - */ + /* These may be needed for future network-power-down code. */ /* -- cgit v1.2.3-18-g5258 From 74704ac6ea402d50c9543cb28247e6d9f521f7ae Mon Sep 17 00:00:00 2001 From: Jean Pihet Date: Tue, 7 Sep 2010 09:21:32 +0200 Subject: tracing, perf: Add more power related events This patch adds new generic events for dynamic power management tracing: - clock events class: used for clock enable/disable and for clock rate change, - power_domain events class: used for power domains transitions. The OMAP architecture will be using the new events for PM debugging, however the new events are made generic enough to be used by all platforms. Signed-off-by: Jean Pihet Acked-by: Thomas Renninger Cc: discuss@lesswatts.org Cc: linux-pm@lists.linux-foundation.org Cc: Thomas Renninger Cc: Arjan van de Ven Cc: Peter Zijlstra Cc: Len Brown Cc: Kevin Hilman LKML-Reference: Signed-off-by: Ingo Molnar --- include/trace/events/power.h | 90 ++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 87 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/trace/events/power.h b/include/trace/events/power.h index 35a2a6e7bf1..286784d69b8 100644 --- a/include/trace/events/power.h +++ b/include/trace/events/power.h @@ -10,12 +10,17 @@ #ifndef _TRACE_POWER_ENUM_ #define _TRACE_POWER_ENUM_ enum { - POWER_NONE = 0, - POWER_CSTATE = 1, - POWER_PSTATE = 2, + POWER_NONE = 0, + POWER_CSTATE = 1, /* C-State */ + POWER_PSTATE = 2, /* Fequency change or DVFS */ + POWER_SSTATE = 3, /* Suspend */ }; #endif +/* + * The power events are used for cpuidle & suspend (power_start, power_end) + * and for cpufreq (power_frequency) + */ DECLARE_EVENT_CLASS(power, TP_PROTO(unsigned int type, unsigned int state, unsigned int cpu_id), @@ -70,6 +75,85 @@ TRACE_EVENT(power_end, ); +/* + * The clock events are used for clock enable/disable and for + * clock rate change + */ +DECLARE_EVENT_CLASS(clock, + + TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id), + + TP_ARGS(name, state, cpu_id), + + TP_STRUCT__entry( + __string( name, name ) + __field( u64, state ) + __field( u64, cpu_id ) + ), + + TP_fast_assign( + __assign_str(name, name); + __entry->state = state; + __entry->cpu_id = cpu_id; + ), + + TP_printk("%s state=%lu cpu_id=%lu", __get_str(name), + (unsigned long)__entry->state, (unsigned long)__entry->cpu_id) +); + +DEFINE_EVENT(clock, clock_enable, + + TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id), + + TP_ARGS(name, state, cpu_id) +); + +DEFINE_EVENT(clock, clock_disable, + + TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id), + + TP_ARGS(name, state, cpu_id) +); + +DEFINE_EVENT(clock, clock_set_rate, + + TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id), + + TP_ARGS(name, state, cpu_id) +); + +/* + * The power domain events are used for power domains transitions + */ +DECLARE_EVENT_CLASS(power_domain, + + TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id), + + TP_ARGS(name, state, cpu_id), + + TP_STRUCT__entry( + __string( name, name ) + __field( u64, state ) + __field( u64, cpu_id ) + ), + + TP_fast_assign( + __assign_str(name, name); + __entry->state = state; + __entry->cpu_id = cpu_id; +), + + TP_printk("%s state=%lu cpu_id=%lu", __get_str(name), + (unsigned long)__entry->state, (unsigned long)__entry->cpu_id) +); + +DEFINE_EVENT(power_domain, power_domain_target, + + TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id), + + TP_ARGS(name, state, cpu_id) +); + #endif /* _TRACE_POWER_H */ /* This part must be outside protection */ -- cgit v1.2.3-18-g5258 From b04243ef7006cda301819f54ee7ce0a3632489e3 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 17 Sep 2010 11:28:48 +0200 Subject: perf: Complete software pmu grouping Aside from allowing software events into a !software group, allow adding !software events to pure software groups. Once we've moved the software group and attached the first !software event, the group will no longer be a pure software group and hence no longer be eligible for movement, at which point the straight ctx comparison is correct again. Signed-off-by: Peter Zijlstra Cc: Stephane Eranian Cc: Robert Richter Cc: Paul Mackerras LKML-Reference: <20100917093009.410784731@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 39d8860b268..165287fd2cc 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -804,12 +804,18 @@ struct perf_event { #endif /* CONFIG_PERF_EVENTS */ }; +enum perf_event_context_type { + task_context, + cpu_context, +}; + /** * struct perf_event_context - event context structure * * Used as a container for task events and CPU events as well: */ struct perf_event_context { + enum perf_event_context_type type; struct pmu *pmu; /* * Protect the states of the events in the list, -- cgit v1.2.3-18-g5258 From e9d2b064149ff7ef4acbc65a1b9374ac8b218d3e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 17 Sep 2010 11:28:50 +0200 Subject: perf: Undo the per cpu-context timer stuff Revert the timer per cpu-context timers because of unfortunate nohz interaction. Fixing that would have been somewhat ugly, so go back to driving things from the regular tick. Provide a jiffies interval feature for people who want slower rotations. Signed-off-by: Peter Zijlstra Cc: Stephane Eranian Cc: Robert Richter Cc: Yinghai Lu LKML-Reference: <20100917093009.519845633@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 165287fd2cc..61b1e2d760f 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -870,8 +870,8 @@ struct perf_cpu_context { struct perf_event_context *task_ctx; int active_oncpu; int exclusive; - u64 timer_interval; - struct hrtimer timer; + struct list_head rotation_list; + int jiffies_interval; }; struct perf_output_handle { @@ -1065,6 +1065,7 @@ extern int perf_swevent_get_recursion_context(void); extern void perf_swevent_put_recursion_context(int rctx); extern void perf_event_enable(struct perf_event *event); extern void perf_event_disable(struct perf_event *event); +extern void perf_event_task_tick(void); #else static inline void perf_event_task_sched_in(struct task_struct *task) { } @@ -1099,6 +1100,7 @@ static inline int perf_swevent_get_recursion_context(void) { return -1; } static inline void perf_swevent_put_recursion_context(int rctx) { } static inline void perf_event_enable(struct perf_event *event) { } static inline void perf_event_disable(struct perf_event *event) { } +static inline void perf_event_task_tick(void) { } #endif #define perf_output_put(handle, x) \ -- cgit v1.2.3-18-g5258 From 3575792e005dc9994f15ae72c1c6f401d134177d Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Fri, 17 Sep 2010 14:18:16 +0200 Subject: ipvs: extend connection flags to 32 bits - the sync protocol supports 16 bits only, so bits 0..15 should be used only for flags that should go to backup server, bits 16 and above should be allocated for flags not sent to backup. - use IP_VS_CONN_F_DEST_MASK as mask of connection flags in destination that can be changed by user space - allow IP_VS_CONN_F_ONE_PACKET to be set in destination Signed-off-by: Julian Anastasov Signed-off-by: Patrick McHardy --- include/linux/ip_vs.h | 8 ++++++++ include/net/ip_vs.h | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ip_vs.h b/include/linux/ip_vs.h index 9708de265bb..003d75f6ffe 100644 --- a/include/linux/ip_vs.h +++ b/include/linux/ip_vs.h @@ -70,6 +70,7 @@ /* * IPVS Connection Flags + * Only flags 0..15 are sent to backup server */ #define IP_VS_CONN_F_FWD_MASK 0x0007 /* mask for the fwd methods */ #define IP_VS_CONN_F_MASQ 0x0000 /* masquerading/NAT */ @@ -88,6 +89,13 @@ #define IP_VS_CONN_F_TEMPLATE 0x1000 /* template, not connection */ #define IP_VS_CONN_F_ONE_PACKET 0x2000 /* forward only one packet */ +/* Flags that are not sent to backup server start from bit 16 */ + +/* Connection flags from destination that can be changed by user space */ +#define IP_VS_CONN_F_DEST_MASK (IP_VS_CONN_F_FWD_MASK | \ + IP_VS_CONN_F_ONE_PACKET | \ + 0) + #define IP_VS_SCHEDNAME_MAXLEN 16 #define IP_VS_IFNAME_MAXLEN 16 diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index f976885f686..62698a9c163 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -366,6 +366,7 @@ struct ip_vs_conn { union nf_inet_addr caddr; /* client address */ union nf_inet_addr vaddr; /* virtual address */ union nf_inet_addr daddr; /* destination address */ + volatile __u32 flags; /* status flags */ __be16 cport; __be16 vport; __be16 dport; @@ -378,7 +379,6 @@ struct ip_vs_conn { /* Flags and state transition */ spinlock_t lock; /* lock for state transition */ - volatile __u16 flags; /* status flags */ volatile __u16 state; /* state info */ volatile __u16 old_state; /* old state, to be used for * state transition triggerd -- cgit v1.2.3-18-g5258 From dfb8fb96ae2b5126cd0c08c0ccd7c42e1f46568a Mon Sep 17 00:00:00 2001 From: Giuseppe CAVALLARO Date: Fri, 17 Sep 2010 03:23:39 +0000 Subject: stmmac: add CSR Clock range selection This patch adds the CSR Clock range selection. Original patch from Johannes Stezenbach fixed the CSR in the stmmac_mdio. We agreed to provide this through the platform instead of. Also thanks to Johannes for having tested it on ARM. Signed-off-by: Giuseppe Cavallaro Signed-off-by: Johannes Stezenbach Signed-off-by: David S. Miller --- include/linux/stmmac.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index a4adf0de6ed..c87c88ccffc 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -32,6 +32,7 @@ struct plat_stmmacenet_data { int bus_id; int pbl; + int clk_csr; int has_gmac; int enh_desc; void (*fix_mac_speed)(void *priv, unsigned int speed); -- cgit v1.2.3-18-g5258 From ebbb293f8b3021ae2009fcb7cb3b8a52fb5fd06a Mon Sep 17 00:00:00 2001 From: Giuseppe CAVALLARO Date: Fri, 17 Sep 2010 03:23:40 +0000 Subject: stmmac: consolidate and tidy-up the COE support The first version of the driver had hard-coded the logic for handling the checksum offloading. This was designed according to the chips included in the STM platforms where: o MAC10/100 supports no COE at all. o GMAC fully supports RX/TX COE. This is not good for other chip configurations where, for example, the mac10/100 supports the tx csum in HW or when the GMAC has no IPC. Thanks to Johannes Stezenbach; he provided me a first draft of this patch that only reviewed the IPC for the GMAC devices. This patch also helps on SPEAr platforms where the MAC10/100 can perform the TX csum in HW. Thanks to Deepak SIKRI for his support on this. In the end, GMAC devices for STM platforms have a bugged Jumbo frame support that needs to have the Tx COE disabled for oversized frames (due to limited buffer sizes). This information is also passed through the driver's platform structure. Signed-off-by: Giuseppe Cavallaro Signed-off-by: Johannes Stezenbach Signed-off-by: Deepak SIKRI Signed-off-by: David S. Miller --- include/linux/stmmac.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index c87c88ccffc..1d8baf71921 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -35,6 +35,8 @@ struct plat_stmmacenet_data { int clk_csr; int has_gmac; int enh_desc; + int tx_coe; + int bugged_jumbo; void (*fix_mac_speed)(void *priv, unsigned int speed); void (*bus_setup)(void __iomem *ioaddr); #ifdef CONFIG_STM_DRIVERS -- cgit v1.2.3-18-g5258 From be2902daee80b655cebd482b5ee91ffc29408121 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Thu, 16 Sep 2010 11:28:07 +0000 Subject: ethtool, ixgbe: Move RX n-tuple mask fixup to ethtool The ethtool utility does not set masks for flow parameters that are not specified, so if both value and mask are 0 then this must be treated as equivalent to a mask with all bits set. Currently that is done in the only driver that implements RX n-tuple filtering, ixgbe. Move it to the ethtool core. Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- include/linux/ethtool.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index d64e246a39e..00334eebbe2 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -470,8 +470,9 @@ struct ethtool_rxfh_indir { * @action: RX ring/queue index to deliver to (non-negative) or other action * (negative, e.g. %ETHTOOL_RXNTUPLE_ACTION_DROP) * - * Zero values in @h_u may be ignored, as if all the corresponding - * mask bits were set. + * For flow types %TCP_V4_FLOW, %UDP_V4_FLOW and %SCTP_V4_FLOW, where + * a field value and mask are both zero this is treated as if all mask + * bits are set i.e. the field is ignored. */ struct ethtool_rx_ntuple_flow_spec { __u32 flow_type; -- cgit v1.2.3-18-g5258 From 07af7a2bfa853db3957a22f9a41f437bf0f10e63 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Thu, 16 Sep 2010 11:34:26 +0000 Subject: ethtool: Add comments for valid use of flow types Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- include/linux/ethtool.h | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 00334eebbe2..b67af60a889 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -841,21 +841,21 @@ struct ethtool_ops { #define WAKE_MAGICSECURE (1 << 6) /* only meaningful if WAKE_MAGIC */ /* L3-L4 network traffic flow types */ -#define TCP_V4_FLOW 0x01 -#define UDP_V4_FLOW 0x02 -#define SCTP_V4_FLOW 0x03 -#define AH_ESP_V4_FLOW 0x04 -#define TCP_V6_FLOW 0x05 -#define UDP_V6_FLOW 0x06 -#define SCTP_V6_FLOW 0x07 -#define AH_ESP_V6_FLOW 0x08 -#define AH_V4_FLOW 0x09 -#define ESP_V4_FLOW 0x0a -#define AH_V6_FLOW 0x0b -#define ESP_V6_FLOW 0x0c -#define IP_USER_FLOW 0x0d -#define IPV4_FLOW 0x10 -#define IPV6_FLOW 0x11 +#define TCP_V4_FLOW 0x01 /* hash or spec (tcp_ip4_spec) */ +#define UDP_V4_FLOW 0x02 /* hash or spec (udp_ip4_spec) */ +#define SCTP_V4_FLOW 0x03 /* hash or spec (sctp_ip4_spec) */ +#define AH_ESP_V4_FLOW 0x04 /* hash only */ +#define TCP_V6_FLOW 0x05 /* hash only */ +#define UDP_V6_FLOW 0x06 /* hash only */ +#define SCTP_V6_FLOW 0x07 /* hash only */ +#define AH_ESP_V6_FLOW 0x08 /* hash only */ +#define AH_V4_FLOW 0x09 /* hash or spec (ah_ip4_spec) */ +#define ESP_V4_FLOW 0x0a /* hash or spec (esp_ip4_spec) */ +#define AH_V6_FLOW 0x0b /* hash only */ +#define ESP_V6_FLOW 0x0c /* hash only */ +#define IP_USER_FLOW 0x0d /* spec only (usr_ip4_spec) */ +#define IPV4_FLOW 0x10 /* hash only */ +#define IPV6_FLOW 0x11 /* hash only */ /* L3-L4 network traffic flow hash options */ #define RXH_L2DA (1 << 1) -- cgit v1.2.3-18-g5258 From a3c74c52570c0c4ac90c9a0216de800c39089ba7 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 14 Sep 2010 21:43:47 +0900 Subject: futex: Mark restart_block.futex.uaddr[2] __user @uaddr and @uaddr2 fields in restart_block.futex are user pointers. Add __user and remove unnecessary casts. Signed-off-by: Namhyung Kim Cc: Peter Zijlstra Cc: Darren Hart LKML-Reference: <1284468228-8723-2-git-send-email-namhyung@gmail.com> Signed-off-by: Thomas Gleixner --- include/linux/thread_info.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index a8cc4e13434..c9069654417 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -23,12 +23,12 @@ struct restart_block { }; /* For futex_wait and futex_wait_requeue_pi */ struct { - u32 *uaddr; + u32 __user *uaddr; u32 val; u32 flags; u32 bitset; u64 time; - u32 *uaddr2; + u32 __user *uaddr2; } futex; /* For nanosleep */ struct { -- cgit v1.2.3-18-g5258 From f0a7a98d1d400e2a5fd9a63ed56d30d30f2864cb Mon Sep 17 00:00:00 2001 From: Rabin Vincent Date: Mon, 13 Sep 2010 13:04:02 +0100 Subject: ARM: 6373/1: tc35892-gpio: add setup/remove callbacks For board-specific initialization. Cc: Samuel Ortiz Cc: linux-kernel@vger.kernel.org Acked-by: Linus Walleij Signed-off-by: Rabin Vincent Signed-off-by: Russell King --- include/linux/mfd/tc35892.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/mfd/tc35892.h b/include/linux/mfd/tc35892.h index e47f770d306..eff3094ca84 100644 --- a/include/linux/mfd/tc35892.h +++ b/include/linux/mfd/tc35892.h @@ -111,9 +111,13 @@ extern int tc35892_set_bits(struct tc35892 *tc35892, u8 reg, u8 mask, u8 val); * struct tc35892_gpio_platform_data - TC35892 GPIO platform data * @gpio_base: first gpio number assigned to TC35892. A maximum of * %TC35892_NR_GPIOS GPIOs will be allocated. + * @setup: callback for board-specific initialization + * @remove: callback for board-specific teardown */ struct tc35892_gpio_platform_data { int gpio_base; + void (*setup)(struct tc35892 *tc35892, unsigned gpio_base); + void (*remove)(struct tc35892 *tc35892, unsigned gpio_base); }; /** -- cgit v1.2.3-18-g5258 From 81dcaf6516d8bbd75b894862c8ae7bba04380cfe Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 16 Sep 2010 10:17:35 +0200 Subject: workqueue: implement alloc_ordered_workqueue() alloc_ordered_workqueue() creates a workqueue which processes each work itemp one by one in the queued order. This will be used to replace create_freezeable_workqueue() and create_singlethread_workqueue(). Signed-off-by: Tejun Heo --- include/linux/workqueue.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 25e02c941ba..07c48925a8f 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -306,6 +306,24 @@ __alloc_workqueue_key(const char *name, unsigned int flags, int max_active, __alloc_workqueue_key((name), (flags), (max_active), NULL, NULL) #endif +/** + * alloc_ordered_workqueue - allocate an ordered workqueue + * @name: name of the workqueue + * @flags: WQ_* flags (only WQ_FREEZEABLE and WQ_RESCUER are meaningful) + * + * Allocate an ordered workqueue. An ordered workqueue executes at + * most one work item at any given time in the queued order. They are + * implemented as unbound workqueues with @max_active of one. + * + * RETURNS: + * Pointer to the allocated workqueue on success, %NULL on failure. + */ +static inline struct workqueue_struct * +alloc_ordered_workqueue(const char *name, unsigned int flags) +{ + return alloc_workqueue(name, WQ_UNBOUND | flags, 1); +} + #define create_workqueue(name) \ alloc_workqueue((name), WQ_RESCUER, 1) #define create_freezeable_workqueue(name) \ -- cgit v1.2.3-18-g5258 From 401a8d048eadfbe1b1c1bf53d3b614fcc894c61a Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 16 Sep 2010 10:36:00 +0200 Subject: workqueue: cleanup flush/cancel functions Make the following cleanup changes. * Relocate flush/cancel function prototypes and definitions. * Relocate wait_on_cpu_work() and wait_on_work() before try_to_grab_pending(). These will be used to implement flush_work_sync(). * Make all flush/cancel functions return bool instead of int. * Update wait_on_cpu_work() and wait_on_work() to return %true if they actually waited. * Add / update comments. This patch doesn't cause any functional changes. Signed-off-by: Tejun Heo --- include/linux/workqueue.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 07c48925a8f..bb9b683ea6f 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -343,7 +343,6 @@ extern int queue_delayed_work_on(int cpu, struct workqueue_struct *wq, extern void flush_workqueue(struct workqueue_struct *wq); extern void flush_scheduled_work(void); -extern void flush_delayed_work(struct delayed_work *work); extern int schedule_work(struct work_struct *work); extern int schedule_work_on(int cpu, struct work_struct *work); @@ -355,8 +354,11 @@ extern int keventd_up(void); int execute_in_process_context(work_func_t fn, struct execute_work *); -extern int flush_work(struct work_struct *work); -extern int cancel_work_sync(struct work_struct *work); +extern bool flush_work(struct work_struct *work); +extern bool cancel_work_sync(struct work_struct *work); + +extern bool flush_delayed_work(struct delayed_work *dwork); +extern bool cancel_delayed_work_sync(struct delayed_work *dwork); extern void workqueue_set_max_active(struct workqueue_struct *wq, int max_active); @@ -370,9 +372,9 @@ extern unsigned int work_busy(struct work_struct *work); * it returns 1 and the work doesn't re-arm itself. Run flush_workqueue() or * cancel_work_sync() to wait on it. */ -static inline int cancel_delayed_work(struct delayed_work *work) +static inline bool cancel_delayed_work(struct delayed_work *work) { - int ret; + bool ret; ret = del_timer_sync(&work->timer); if (ret) @@ -385,9 +387,9 @@ static inline int cancel_delayed_work(struct delayed_work *work) * if it returns 0 the timer function may be running and the queueing is in * progress. */ -static inline int __cancel_delayed_work(struct delayed_work *work) +static inline bool __cancel_delayed_work(struct delayed_work *work) { - int ret; + bool ret; ret = del_timer(&work->timer); if (ret) @@ -395,8 +397,6 @@ static inline int __cancel_delayed_work(struct delayed_work *work) return ret; } -extern int cancel_delayed_work_sync(struct delayed_work *work); - /* Obsolete. use cancel_delayed_work_sync() */ static inline void cancel_rearming_delayed_workqueue(struct workqueue_struct *wq, -- cgit v1.2.3-18-g5258 From 09383498c5d35262e643bfdbae84826177a3c624 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 16 Sep 2010 10:48:29 +0200 Subject: workqueue: implement flush[_delayed]_work_sync() Implement flush[_delayed]_work_sync(). These are flush functions which also make sure no CPU is still executing the target work from earlier queueing instances. These are similar to cancel[_delayed]_work_sync() except that the target work item is flushed instead of cancelled. Signed-off-by: Tejun Heo --- include/linux/workqueue.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index bb9b683ea6f..e33ff4a9170 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -355,9 +355,11 @@ extern int keventd_up(void); int execute_in_process_context(work_func_t fn, struct execute_work *); extern bool flush_work(struct work_struct *work); +extern bool flush_work_sync(struct work_struct *work); extern bool cancel_work_sync(struct work_struct *work); extern bool flush_delayed_work(struct delayed_work *dwork); +extern bool flush_delayed_work_sync(struct delayed_work *work); extern bool cancel_delayed_work_sync(struct delayed_work *dwork); extern void workqueue_set_max_active(struct workqueue_struct *wq, -- cgit v1.2.3-18-g5258 From 8f8f103d8466e627ecef7894248eb79407d9047c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 19 Sep 2010 11:24:02 -0700 Subject: net: reorder struct netdev_hw_addr Move 'synced' and 'global_use' fields before 'refcount', to shrinks struct netdev_hw_addr by 8 bytes (on 64bit arches). Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ec17887a5bc..f7f1302138a 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -228,9 +228,9 @@ struct netdev_hw_addr { #define NETDEV_HW_ADDR_T_SLAVE 3 #define NETDEV_HW_ADDR_T_UNICAST 4 #define NETDEV_HW_ADDR_T_MULTICAST 5 - int refcount; bool synced; bool global_use; + int refcount; struct rcu_head rcu_head; }; -- cgit v1.2.3-18-g5258 From cabdf8bf488bfa3b565360b9fa1322d2db7747eb Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Mon, 20 Sep 2010 15:37:25 +0900 Subject: sh: pci: Move Renesas PCI IDs to a better place. Previously these IDs were only used by one driver, so there was not much need for having them generically defined. Now that this will no longer hold true, move them over. Signed-off-by: Paul Mundt --- include/linux/pci_ids.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index f6a3b2d36ca..33a5d1c3972 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2260,6 +2260,13 @@ #define PCI_VENDOR_ID_SILAN 0x1904 +#define PCI_VENDOR_ID_RENESAS 0x1912 +#define PCI_DEVICE_ID_RENESAS_SH7781 0x0001 +#define PCI_DEVICE_ID_RENESAS_SH7780 0x0002 +#define PCI_DEVICE_ID_RENESAS_SH7763 0x0004 +#define PCI_DEVICE_ID_RENESAS_SH7785 0x0007 +#define PCI_DEVICE_ID_RENESAS_SH7786 0x0010 + #define PCI_VENDOR_ID_TDI 0x192E #define PCI_DEVICE_ID_TDI_EHCI 0x0101 -- cgit v1.2.3-18-g5258 From 298c926c6d7f50d91d6acb76c33b83bab5b5bd5c Mon Sep 17 00:00:00 2001 From: Adrian Hoban Date: Mon, 20 Sep 2010 16:05:12 +0800 Subject: crypto: cryptd - Adding the AEAD interface type support to cryptd This patch adds AEAD support into the cryptd framework. Having AEAD support in cryptd enables crypto drivers that use the AEAD interface type (such as the patch for AEAD based RFC4106 AES-GCM implementation using Intel New Instructions) to leverage cryptd for asynchronous processing. Signed-off-by: Adrian Hoban Signed-off-by: Tadeusz Struk Signed-off-by: Gabriele Paoloni Signed-off-by: Aidan O'Mahony Signed-off-by: Herbert Xu --- include/crypto/cryptd.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include') diff --git a/include/crypto/cryptd.h b/include/crypto/cryptd.h index 1c96b255017..ba98918bbd9 100644 --- a/include/crypto/cryptd.h +++ b/include/crypto/cryptd.h @@ -1,5 +1,12 @@ /* * Software async crypto daemon + * + * Added AEAD support to cryptd. + * Authors: Tadeusz Struk (tadeusz.struk@intel.com) + * Adrian Hoban + * Gabriele Paoloni + * Aidan O'Mahony (aidan.o.mahony@intel.com) + * Copyright (c) 2010, Intel Corporation. */ #ifndef _CRYPTO_CRYPT_H @@ -42,4 +49,21 @@ struct crypto_shash *cryptd_ahash_child(struct cryptd_ahash *tfm); struct shash_desc *cryptd_shash_desc(struct ahash_request *req); void cryptd_free_ahash(struct cryptd_ahash *tfm); +struct cryptd_aead { + struct crypto_aead base; +}; + +static inline struct cryptd_aead *__cryptd_aead_cast( + struct crypto_aead *tfm) +{ + return (struct cryptd_aead *)tfm; +} + +struct cryptd_aead *cryptd_alloc_aead(const char *alg_name, + u32 type, u32 mask); + +struct crypto_aead *cryptd_aead_child(struct cryptd_aead *tfm); + +void cryptd_free_aead(struct cryptd_aead *tfm); + #endif -- cgit v1.2.3-18-g5258 From a18213d1d2a469956845b437f5d1d0401ab22e8b Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Sun, 19 Sep 2010 20:08:00 +0200 Subject: dccp: Replace magic CCID-specific numbers by symbolic constants The constants DCCPO_{MIN,MAX}_CCID_SPECIFIC are nowhere used in the code, but instead for the CCID-specific options numbers are used. This patch unifies the use of CCID-specific option numbers, by adding symbolic names reflecting the definitions in RFC 4340, 10.3. Signed-off-by: Gerrit Renker --- include/linux/dccp.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 7434a8353e2..7187bd8a75f 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -165,8 +165,10 @@ enum { DCCPO_TIMESTAMP_ECHO = 42, DCCPO_ELAPSED_TIME = 43, DCCPO_MAX = 45, - DCCPO_MIN_CCID_SPECIFIC = 128, - DCCPO_MAX_CCID_SPECIFIC = 255, + DCCPO_MIN_RX_CCID_SPECIFIC = 128, /* from sender to receiver */ + DCCPO_MAX_RX_CCID_SPECIFIC = 191, + DCCPO_MIN_TX_CCID_SPECIFIC = 192, /* from receiver to sender */ + DCCPO_MAX_TX_CCID_SPECIFIC = 255, }; /* maximum size of a single TLV-encoded DCCP option (sans type/len bytes) */ #define DCCP_SINGLE_OPT_MAXLEN 253 -- cgit v1.2.3-18-g5258 From 8b8e2ec1eeca7f6941bc81cefc9663018d6ceb57 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 16 Sep 2010 19:21:28 +0200 Subject: percpu: Add {get,put}_cpu_ptr These are similar to {get,put}_cpu_var() except for dynamically allocated per-cpu memory. Signed-off-by: Peter Zijlstra Acked-by: Tejun Heo LKML-Reference: <20100917093009.252867712@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/percpu.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 49466b13c5c..0eb50832aa0 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -39,6 +39,15 @@ preempt_enable(); \ } while (0) +#define get_cpu_ptr(var) ({ \ + preempt_disable(); \ + this_cpu_ptr(var); }) + +#define put_cpu_ptr(var) do { \ + (void)(var); \ + preempt_enable(); \ +} while (0) + #ifdef CONFIG_SMP /* minimum unit size, also is the maximum supported allocation size */ -- cgit v1.2.3-18-g5258 From 24750f3e469bef81a96c0036cd4700df5fb48925 Mon Sep 17 00:00:00 2001 From: Henrik Rydberg Date: Tue, 24 Aug 2010 10:54:44 +0200 Subject: HID: Add a hid quirk for input sync override As of lately, HID devices which send per-frame data split over several HID reports have started to emerge. This patch adds a quirk which allows the HID driver to take over the input layer synchronization, and hence the control of the frame boundary. Signed-off-by: Henrik Rydberg Signed-off-by: Jiri Kosina --- include/linux/hid.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/hid.h b/include/linux/hid.h index 42a0f1d1136..4cfe02c3fa4 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -316,6 +316,7 @@ struct hid_item { #define HID_QUIRK_FULLSPEED_INTERVAL 0x10000000 #define HID_QUIRK_NO_INIT_REPORTS 0x20000000 #define HID_QUIRK_NO_IGNORE 0x40000000 +#define HID_QUIRK_NO_INPUT_SYNC 0x80000000 /* * This is the global environment of the parser. This information is -- cgit v1.2.3-18-g5258 From a8027073eb127cd207070891374b5c54c2ce3d23 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 20 Sep 2010 15:13:34 -0400 Subject: tracing/sched: Add sched_pi_setprio tracepoint Add a tracepoint that shows the priority of a task being boosted via priority inheritance. Cc: Gregory Haskins Acked-by: Peter Zijlstra Signed-off-by: Steven Rostedt --- include/trace/events/sched.h | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'include') diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 9208c92aeab..f6334782a59 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -362,6 +362,35 @@ TRACE_EVENT(sched_stat_runtime, (unsigned long long)__entry->vruntime) ); +/* + * Tracepoint for showing priority inheritance modifying a tasks + * priority. + */ +TRACE_EVENT(sched_pi_setprio, + + TP_PROTO(struct task_struct *tsk, int newprio), + + TP_ARGS(tsk, newprio), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( int, oldprio ) + __field( int, newprio ) + ), + + TP_fast_assign( + memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); + __entry->pid = tsk->pid; + __entry->oldprio = tsk->prio; + __entry->newprio = newprio; + ), + + TP_printk("comm=%s pid=%d oldprio=%d newprio=%d", + __entry->comm, __entry->pid, + __entry->oldprio, __entry->newprio) +); + #endif /* _TRACE_SCHED_H */ /* This part must be outside protection */ -- cgit v1.2.3-18-g5258 From c1f9a095600e07fefe64eb94eb711f410100824a Mon Sep 17 00:00:00 2001 From: Ohad Ben-Cohen Date: Thu, 16 Sep 2010 13:16:02 +0200 Subject: wl12xx: make wl12xx.h common to both spi and sdio Move wl12xx.h outside of the spi-specific location, so it can be shared with both spi and sdio solutions. Update all users of spi/wl12xx.h accordingly Signed-off-by: Ohad Ben-Cohen Acked-by: Luciano Coelho Acked-by: Tony Lindgren Signed-off-by: John W. Linville --- include/linux/spi/wl12xx.h | 34 ---------------------------------- include/linux/wl12xx.h | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 34 deletions(-) delete mode 100644 include/linux/spi/wl12xx.h create mode 100644 include/linux/wl12xx.h (limited to 'include') diff --git a/include/linux/spi/wl12xx.h b/include/linux/spi/wl12xx.h deleted file mode 100644 index a20bccf0b5c..00000000000 --- a/include/linux/spi/wl12xx.h +++ /dev/null @@ -1,34 +0,0 @@ -/* - * This file is part of wl12xx - * - * Copyright (C) 2009 Nokia Corporation - * - * Contact: Luciano Coelho - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * version 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA - * 02110-1301 USA - * - */ - -#ifndef _LINUX_SPI_WL12XX_H -#define _LINUX_SPI_WL12XX_H - -struct wl12xx_platform_data { - void (*set_power)(bool enable); - /* SDIO only: IRQ number if WLAN_IRQ line is used, 0 for SDIO IRQs */ - int irq; - bool use_eeprom; -}; - -#endif diff --git a/include/linux/wl12xx.h b/include/linux/wl12xx.h new file mode 100644 index 00000000000..015687a1776 --- /dev/null +++ b/include/linux/wl12xx.h @@ -0,0 +1,34 @@ +/* + * This file is part of wl12xx + * + * Copyright (C) 2009 Nokia Corporation + * + * Contact: Luciano Coelho + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + * + */ + +#ifndef _LINUX_WL12XX_H +#define _LINUX_WL12XX_H + +struct wl12xx_platform_data { + void (*set_power)(bool enable); + /* SDIO only: IRQ number if WLAN_IRQ line is used, 0 for SDIO IRQs */ + int irq; + bool use_eeprom; +}; + +#endif -- cgit v1.2.3-18-g5258 From 817f2c842d6c38acfd58d20d29ba583ec467ae35 Mon Sep 17 00:00:00 2001 From: Nikanth Karthikesan Date: Mon, 20 Sep 2010 11:44:00 +0530 Subject: Fix various typos of valid in comments Fix various typos of valid. Signed-off-by: Nikanth Karthikesan Signed-off-by: Jiri Kosina --- include/linux/libata.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/libata.h b/include/linux/libata.h index f010f18a0f8..9eee84b24a4 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -340,7 +340,7 @@ enum { ATA_EHI_DID_HARDRESET = (1 << 17), /* already soft-reset this port */ ATA_EHI_PRINTINFO = (1 << 18), /* print configuration info */ ATA_EHI_SETMODE = (1 << 19), /* configure transfer mode */ - ATA_EHI_POST_SETMODE = (1 << 20), /* revaildating after setmode */ + ATA_EHI_POST_SETMODE = (1 << 20), /* revalidating after setmode */ ATA_EHI_DID_RESET = ATA_EHI_DID_SOFTRESET | ATA_EHI_DID_HARDRESET, -- cgit v1.2.3-18-g5258 From 61ee7007a5d61aa066076da578e8e8084e122d7d Mon Sep 17 00:00:00 2001 From: Ohad Ben-Cohen Date: Thu, 16 Sep 2010 01:31:12 +0200 Subject: wl12xx: add platform data passing support Add a simple mechanism to pass platform data to the SDIO instances of wl12xx. This way there is no confusion over who owns the 'embedded data', typechecking is preserved, and no possibility for the wrong driver to pick up the data. Originally proposed by Russell King. Signed-off-by: Ohad Ben-Cohen Signed-off-by: John W. Linville --- include/linux/wl12xx.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/wl12xx.h b/include/linux/wl12xx.h index 015687a1776..bd70563107f 100644 --- a/include/linux/wl12xx.h +++ b/include/linux/wl12xx.h @@ -31,4 +31,7 @@ struct wl12xx_platform_data { bool use_eeprom; }; +int wl12xx_set_platform_data(const struct wl12xx_platform_data *data); +const struct wl12xx_platform_data *wl12xx_get_platform_data(void); + #endif -- cgit v1.2.3-18-g5258 From 15cea99306ae14ce5f7c3d3989bcc17202e2b0be Mon Sep 17 00:00:00 2001 From: Ohad Ben-Cohen Date: Thu, 16 Sep 2010 01:31:51 +0200 Subject: wl1271: make ref_clock configurable by board The wl1271 device is using a reference clock that may change between board to board. Make the ref_clock parameter configurable by board settings instead of having a hard coded value in the sources. Signed-off-by: Ohad Ben-Cohen Signed-off-by: John W. Linville --- include/linux/wl12xx.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/wl12xx.h b/include/linux/wl12xx.h index bd70563107f..95deae3968f 100644 --- a/include/linux/wl12xx.h +++ b/include/linux/wl12xx.h @@ -29,6 +29,7 @@ struct wl12xx_platform_data { /* SDIO only: IRQ number if WLAN_IRQ line is used, 0 for SDIO IRQs */ int irq; bool use_eeprom; + int board_ref_clock; }; int wl12xx_set_platform_data(const struct wl12xx_platform_data *data); -- cgit v1.2.3-18-g5258 From f4bc17cdd205ebaa3807c2aa973719bb5ce6a5b2 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Tue, 21 Sep 2010 17:35:41 +0200 Subject: ipvs: netfilter connection tracking changes Add more code to IPVS to work with Netfilter connection tracking and fix some problems. - Allow IPVS to be compiled without connection tracking as in 2.6.35 and before. This can avoid keeping conntracks for all IPVS connections because this costs memory. ip_vs_ftp still depends on connection tracking and NAT as implemented for 2.6.36. - Add sysctl var "conntrack" to enable connection tracking for all IPVS connections. For loaded IPVS directors it needs tuning of nf_conntrack_max limit. - Add IP_VS_CONN_F_NFCT connection flag to request the connection to use connection tracking. This allows user space to provide this flag, for example, in dest->conn_flags. This can be useful to request connection tracking per real server instead of forcing it for all connections with the "conntrack" sysctl. This flag is set currently only by ip_vs_ftp and of course by "conntrack" sysctl. - Add ip_vs_nfct.c file to hold all connection tracking code, by this way main code should not depend of netfilter conntrack support. - Return back the ip_vs_post_routing handler as in 2.6.35 and use skb->ipvs_property=1 to allow IPVS to work without connection tracking Connection tracking: - most of the code is already in 2.6.36-rc - alter conntrack reply tuple for LVS-NAT connections when first packet from client is forwarded and conntrack state is NEW or RELATED. Additionally, alter reply for RELATED connections from real server, again for packet in original direction. - add IP_VS_XMIT_TUNNEL to confirm conntrack (without altering reply) for LVS-TUN early because we want to call nf_reset. It is needed because we add IPIP header and the original conntrack should be preserved, not destroyed. The transmitted IPIP packets can reuse same conntrack, so we do not set skb->ipvs_property. - try to destroy conntrack when the IPVS connection is destroyed. It is not fatal if conntrack disappears before that, it depends on the used timers. Fix problems from long time: - add skb->ip_summed = CHECKSUM_NONE for the LVS-TUN transmitters Signed-off-by: Julian Anastasov Signed-off-by: Patrick McHardy --- include/linux/ip_vs.h | 2 ++ include/net/ip_vs.h | 44 +++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 45 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ip_vs.h b/include/linux/ip_vs.h index 003d75f6ffe..df772861372 100644 --- a/include/linux/ip_vs.h +++ b/include/linux/ip_vs.h @@ -90,10 +90,12 @@ #define IP_VS_CONN_F_ONE_PACKET 0x2000 /* forward only one packet */ /* Flags that are not sent to backup server start from bit 16 */ +#define IP_VS_CONN_F_NFCT (1 << 16) /* use netfilter conntrack */ /* Connection flags from destination that can be changed by user space */ #define IP_VS_CONN_F_DEST_MASK (IP_VS_CONN_F_FWD_MASK | \ IP_VS_CONN_F_ONE_PACKET | \ + IP_VS_CONN_F_NFCT | \ 0) #define IP_VS_SCHEDNAME_MAXLEN 16 diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 62698a9c163..e8ec5231eae 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -25,7 +25,9 @@ #include #include /* for struct ipv6hdr */ #include /* for ipv6_addr_copy */ - +#ifdef CONFIG_IP_VS_NFCT +#include +#endif /* Connections' size value needed by ip_vs_ctl.c */ extern int ip_vs_conn_tab_size; @@ -798,6 +800,7 @@ extern int sysctl_ip_vs_expire_nodest_conn; extern int sysctl_ip_vs_expire_quiescent_template; extern int sysctl_ip_vs_sync_threshold[2]; extern int sysctl_ip_vs_nat_icmp_send; +extern int sysctl_ip_vs_conntrack; extern struct ip_vs_stats ip_vs_stats; extern const struct ctl_path net_vs_ctl_path[]; @@ -955,8 +958,47 @@ static inline __wsum ip_vs_check_diff2(__be16 old, __be16 new, __wsum oldsum) return csum_partial(diff, sizeof(diff), oldsum); } +#ifdef CONFIG_IP_VS_NFCT +/* + * Netfilter connection tracking + * (from ip_vs_nfct.c) + */ +static inline int ip_vs_conntrack_enabled(void) +{ + return sysctl_ip_vs_conntrack; +} + extern void ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, int outin); +extern int ip_vs_confirm_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp); +extern void ip_vs_nfct_expect_related(struct sk_buff *skb, struct nf_conn *ct, + struct ip_vs_conn *cp, u_int8_t proto, + const __be16 port, int from_rs); +extern void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp); + +#else + +static inline int ip_vs_conntrack_enabled(void) +{ + return 0; +} + +static inline void ip_vs_update_conntrack(struct sk_buff *skb, + struct ip_vs_conn *cp, int outin) +{ +} + +static inline int ip_vs_confirm_conntrack(struct sk_buff *skb, + struct ip_vs_conn *cp) +{ + return NF_ACCEPT; +} + +static inline void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp) +{ +} +/* CONFIG_IP_VS_NFCT */ +#endif #endif /* __KERNEL__ */ -- cgit v1.2.3-18-g5258 From 8a8030407f55a6aaedb51167c1a2383311fcd707 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Tue, 21 Sep 2010 17:38:57 +0200 Subject: ipvs: make rerouting optional with snat_reroute Add new sysctl flag "snat_reroute". Recent kernels use ip_route_me_harder() to route LVS-NAT responses properly by VIP when there are multiple paths to client. But setups that do not have alternative default routes can skip this routing lookup by using snat_reroute=0. Signed-off-by: Julian Anastasov Signed-off-by: Patrick McHardy --- include/net/ip_vs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index e8ec5231eae..3915a4f4cd3 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -801,6 +801,7 @@ extern int sysctl_ip_vs_expire_quiescent_template; extern int sysctl_ip_vs_sync_threshold[2]; extern int sysctl_ip_vs_nat_icmp_send; extern int sysctl_ip_vs_conntrack; +extern int sysctl_ip_vs_snat_reroute; extern struct ip_vs_stats ip_vs_stats; extern const struct ctl_path net_vs_ctl_path[]; -- cgit v1.2.3-18-g5258 From c22ab7816fd81efceefa96b00c4ad62cf657964b Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Mon, 20 Sep 2010 08:41:47 +0000 Subject: ethtool: Define RX n-tuple action to clear a rule Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- include/linux/ethtool.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index b67af60a889..3350870001f 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -492,11 +492,12 @@ struct ethtool_rx_ntuple_flow_spec { __u64 data_mask; __s32 action; -#define ETHTOOL_RXNTUPLE_ACTION_DROP -1 /* drop packet */ +#define ETHTOOL_RXNTUPLE_ACTION_DROP (-1) /* drop packet */ +#define ETHTOOL_RXNTUPLE_ACTION_CLEAR (-2) /* clear filter */ }; /** - * struct ethtool_rx_ntuple - command to set RX flow filter + * struct ethtool_rx_ntuple - command to set or clear RX flow filter * @cmd: Command number - %ETHTOOL_SRXNTUPLE * @fs: Flow filter specification */ -- cgit v1.2.3-18-g5258 From 6099e3dea9aaa6127cea0610533221c9e956f009 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Mon, 20 Sep 2010 08:42:08 +0000 Subject: ethtool: Add Ethernet MAC-level filtering/steering Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- include/linux/ethtool.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 3350870001f..8a3338ceb43 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -14,6 +14,7 @@ #define _LINUX_ETHTOOL_H #include +#include /* This should work for both 32 and 64 bit userland. */ struct ethtool_cmd { @@ -391,6 +392,7 @@ struct ethtool_rx_flow_spec { struct ethtool_ah_espip4_spec ah_ip4_spec; struct ethtool_ah_espip4_spec esp_ip4_spec; struct ethtool_usrip4_spec usr_ip4_spec; + struct ethhdr ether_spec; __u8 hdata[72]; } h_u, m_u; __u64 ring_cookie; @@ -483,6 +485,7 @@ struct ethtool_rx_ntuple_flow_spec { struct ethtool_ah_espip4_spec ah_ip4_spec; struct ethtool_ah_espip4_spec esp_ip4_spec; struct ethtool_usrip4_spec usr_ip4_spec; + struct ethhdr ether_spec; __u8 hdata[72]; } h_u, m_u; @@ -841,7 +844,7 @@ struct ethtool_ops { #define WAKE_MAGIC (1 << 5) #define WAKE_MAGICSECURE (1 << 6) /* only meaningful if WAKE_MAGIC */ -/* L3-L4 network traffic flow types */ +/* L2-L4 network traffic flow types */ #define TCP_V4_FLOW 0x01 /* hash or spec (tcp_ip4_spec) */ #define UDP_V4_FLOW 0x02 /* hash or spec (udp_ip4_spec) */ #define SCTP_V4_FLOW 0x03 /* hash or spec (sctp_ip4_spec) */ @@ -857,6 +860,7 @@ struct ethtool_ops { #define IP_USER_FLOW 0x0d /* spec only (usr_ip4_spec) */ #define IPV4_FLOW 0x10 /* hash only */ #define IPV6_FLOW 0x11 /* hash only */ +#define ETHER_FLOW 0x12 /* spec only (ether_spec) */ /* L3-L4 network traffic flow hash options */ #define RXH_L2DA (1 << 1) -- cgit v1.2.3-18-g5258 From 756e64a0b106f1a2ca96889c39ea0d48131105c0 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 21 Sep 2010 06:43:54 +0000 Subject: net: constify some ppp/pptp structs Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/if_pppox.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/if_pppox.h b/include/linux/if_pppox.h index 29bcd55851e..397921b09ef 100644 --- a/include/linux/if_pppox.h +++ b/include/linux/if_pppox.h @@ -204,7 +204,7 @@ struct pppox_proto { struct module *owner; }; -extern int register_pppox_proto(int proto_num, struct pppox_proto *pp); +extern int register_pppox_proto(int proto_num, const struct pppox_proto *pp); extern void unregister_pppox_proto(int proto_num); extern void pppox_unbind_sock(struct sock *sk);/* delete ppp-channel binding */ extern int pppox_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); -- cgit v1.2.3-18-g5258 From 48daa3bb84d547828871534caa51427a3fe90748 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 21 Sep 2010 06:57:39 +0000 Subject: ipv6: addrconf.h cleanups - Use rcu_dereference_rtnl() in __in6_dev_get - kerneldoc for __in6_dev_get() and in6_dev_get() - Use inline functions instead of macros Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/addrconf.h | 63 ++++++++++++++++++++++++++++++++++---------------- 1 file changed, 43 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 45375b41a2a..7d178a758ac 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -174,20 +174,32 @@ extern int ipv6_chk_acast_addr(struct net *net, struct net_device *dev, extern int register_inet6addr_notifier(struct notifier_block *nb); extern int unregister_inet6addr_notifier(struct notifier_block *nb); -static inline struct inet6_dev * -__in6_dev_get(struct net_device *dev) +/** + * __in6_dev_get - get inet6_dev pointer from netdevice + * @dev: network device + * + * Caller must hold rcu_read_lock or RTNL, because this function + * does not take a reference on the inet6_dev. + */ +static inline struct inet6_dev *__in6_dev_get(const struct net_device *dev) { - return rcu_dereference_check(dev->ip6_ptr, - rcu_read_lock_held() || - lockdep_rtnl_is_held()); + return rcu_dereference_rtnl(dev->ip6_ptr); } -static inline struct inet6_dev * -in6_dev_get(struct net_device *dev) +/** + * in6_dev_get - get inet6_dev pointer from netdevice + * @dev: network device + * + * This version can be used in any context, and takes a reference + * on the inet6_dev. Callers must use in6_dev_put() later to + * release this reference. + */ +static inline struct inet6_dev *in6_dev_get(const struct net_device *dev) { - struct inet6_dev *idev = NULL; + struct inet6_dev *idev; + rcu_read_lock(); - idev = __in6_dev_get(dev); + idev = rcu_dereference(dev->ip6_ptr); if (idev) atomic_inc(&idev->refcnt); rcu_read_unlock(); @@ -196,16 +208,21 @@ in6_dev_get(struct net_device *dev) extern void in6_dev_finish_destroy(struct inet6_dev *idev); -static inline void -in6_dev_put(struct inet6_dev *idev) +static inline void in6_dev_put(struct inet6_dev *idev) { if (atomic_dec_and_test(&idev->refcnt)) in6_dev_finish_destroy(idev); } -#define __in6_dev_put(idev) atomic_dec(&(idev)->refcnt) -#define in6_dev_hold(idev) atomic_inc(&(idev)->refcnt) +static inline void __in6_dev_put(struct inet6_dev *idev) +{ + atomic_dec(&idev->refcnt); +} +static inline void in6_dev_hold(struct inet6_dev *idev) +{ + atomic_inc(&idev->refcnt); +} extern void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp); @@ -215,9 +232,15 @@ static inline void in6_ifa_put(struct inet6_ifaddr *ifp) inet6_ifa_finish_destroy(ifp); } -#define __in6_ifa_put(ifp) atomic_dec(&(ifp)->refcnt) -#define in6_ifa_hold(ifp) atomic_inc(&(ifp)->refcnt) +static inline void __in6_ifa_put(struct inet6_ifaddr *ifp) +{ + atomic_dec(&ifp->refcnt); +} +static inline void in6_ifa_hold(struct inet6_ifaddr *ifp) +{ + atomic_inc(&ifp->refcnt); +} /* @@ -240,23 +263,23 @@ static inline int ipv6_addr_is_multicast(const struct in6_addr *addr) static inline int ipv6_addr_is_ll_all_nodes(const struct in6_addr *addr) { - return (((addr->s6_addr32[0] ^ htonl(0xff020000)) | + return ((addr->s6_addr32[0] ^ htonl(0xff020000)) | addr->s6_addr32[1] | addr->s6_addr32[2] | - (addr->s6_addr32[3] ^ htonl(0x00000001))) == 0); + (addr->s6_addr32[3] ^ htonl(0x00000001))) == 0; } static inline int ipv6_addr_is_ll_all_routers(const struct in6_addr *addr) { - return (((addr->s6_addr32[0] ^ htonl(0xff020000)) | + return ((addr->s6_addr32[0] ^ htonl(0xff020000)) | addr->s6_addr32[1] | addr->s6_addr32[2] | - (addr->s6_addr32[3] ^ htonl(0x00000002))) == 0); + (addr->s6_addr32[3] ^ htonl(0x00000002))) == 0; } extern int __ipv6_isatap_ifid(u8 *eui, __be32 addr); static inline int ipv6_addr_is_isatap(const struct in6_addr *addr) { - return ((addr->s6_addr32[2] | htonl(0x02000000)) == htonl(0x02005EFE)); + return (addr->s6_addr32[2] | htonl(0x02000000)) == htonl(0x02005EFE); } #ifdef CONFIG_PROC_FS -- cgit v1.2.3-18-g5258 From 8b008faf92ac8f7eeb65e8cd36077601af7c46db Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 22 Sep 2010 08:36:59 +0200 Subject: netfilter: ctnetlink: allow to specify the expectation flags With this patch, you can specify the expectation flags for user-space created expectations. Signed-off-by: Pablo Neira Ayuso Signed-off-by: Patrick McHardy --- include/linux/netfilter/nf_conntrack_common.h | 4 ++++ include/linux/netfilter/nfnetlink_conntrack.h | 1 + include/net/netfilter/nf_conntrack_expect.h | 3 --- 3 files changed, 5 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h index 1afd18c855e..fdc50cae861 100644 --- a/include/linux/netfilter/nf_conntrack_common.h +++ b/include/linux/netfilter/nf_conntrack_common.h @@ -100,6 +100,10 @@ enum ip_conntrack_expect_events { IPEXP_NEW, /* new expectation */ }; +/* expectation flags */ +#define NF_CT_EXPECT_PERMANENT 0x1 +#define NF_CT_EXPECT_INACTIVE 0x2 + #ifdef __KERNEL__ struct ip_conntrack_stat { unsigned int searched; diff --git a/include/linux/netfilter/nfnetlink_conntrack.h b/include/linux/netfilter/nfnetlink_conntrack.h index 9ed534c991b..455f0ce4f43 100644 --- a/include/linux/netfilter/nfnetlink_conntrack.h +++ b/include/linux/netfilter/nfnetlink_conntrack.h @@ -161,6 +161,7 @@ enum ctattr_expect { CTA_EXPECT_ID, CTA_EXPECT_HELP_NAME, CTA_EXPECT_ZONE, + CTA_EXPECT_FLAGS, __CTA_EXPECT_MAX }; #define CTA_EXPECT_MAX (__CTA_EXPECT_MAX - 1) diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h index 11e815084fc..96bb42af5fa 100644 --- a/include/net/netfilter/nf_conntrack_expect.h +++ b/include/net/netfilter/nf_conntrack_expect.h @@ -67,9 +67,6 @@ struct nf_conntrack_expect_policy { #define NF_CT_EXPECT_CLASS_DEFAULT 0 -#define NF_CT_EXPECT_PERMANENT 0x1 -#define NF_CT_EXPECT_INACTIVE 0x2 - int nf_conntrack_expect_init(struct net *net); void nf_conntrack_expect_fini(struct net *net); -- cgit v1.2.3-18-g5258 From 676cb02dc32adef13d9efb5ea52079e4ede1e3ec Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 20 Jul 2009 23:33:49 +0200 Subject: softirqs: Make wakeup_softirqd static No users outside of kernel/softirq.c Signed-off-by: Thomas Gleixner --- include/linux/interrupt.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index a0384a4d1e6..0a9141e6924 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -410,7 +410,6 @@ extern void softirq_init(void); #define __raise_softirq_irqoff(nr) do { or_softirq_pending(1UL << (nr)); } while (0) extern void raise_softirq_irqoff(unsigned int nr); extern void raise_softirq(unsigned int nr); -extern void wakeup_softirqd(void); /* This is the worklist that queues up per-cpu softirq work. * -- cgit v1.2.3-18-g5258 From bf5438fca2950b03c21ad868090cc1a8fcd49536 Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Fri, 17 Sep 2010 11:09:00 -0400 Subject: jump label: Base patch for jump label base patch to implement 'jump labeling'. Based on a new 'asm goto' inline assembly gcc mechanism, we can now branch to labels from an 'asm goto' statment. This allows us to create a 'no-op' fastpath, which can subsequently be patched with a jump to the slowpath code. This is useful for code which might be rarely used, but which we'd like to be able to call, if needed. Tracepoints are the current usecase that these are being implemented for. Acked-by: David S. Miller Signed-off-by: Jason Baron LKML-Reference: [ cleaned up some formating ] Signed-off-by: Steven Rostedt --- include/asm-generic/vmlinux.lds.h | 10 +++++++ include/linux/jump_label.h | 58 +++++++++++++++++++++++++++++++++++++++ include/linux/module.h | 5 +++- 3 files changed, 72 insertions(+), 1 deletion(-) create mode 100644 include/linux/jump_label.h (limited to 'include') diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 8a92a170fb7..ef2af9948ea 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -220,6 +220,8 @@ \ BUG_TABLE \ \ + JUMP_TABLE \ + \ /* PCI quirks */ \ .pci_fixup : AT(ADDR(.pci_fixup) - LOAD_OFFSET) { \ VMLINUX_SYMBOL(__start_pci_fixups_early) = .; \ @@ -563,6 +565,14 @@ #define BUG_TABLE #endif +#define JUMP_TABLE \ + . = ALIGN(8); \ + __jump_table : AT(ADDR(__jump_table) - LOAD_OFFSET) { \ + VMLINUX_SYMBOL(__start___jump_table) = .; \ + *(__jump_table) \ + VMLINUX_SYMBOL(__stop___jump_table) = .; \ + } + #ifdef CONFIG_PM_TRACE #define TRACEDATA \ . = ALIGN(4); \ diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h new file mode 100644 index 00000000000..de58656d28e --- /dev/null +++ b/include/linux/jump_label.h @@ -0,0 +1,58 @@ +#ifndef _LINUX_JUMP_LABEL_H +#define _LINUX_JUMP_LABEL_H + +#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_HAVE_ARCH_JUMP_LABEL) +# include +# define HAVE_JUMP_LABEL +#endif + +enum jump_label_type { + JUMP_LABEL_ENABLE, + JUMP_LABEL_DISABLE +}; + +struct module; + +#ifdef HAVE_JUMP_LABEL + +extern struct jump_entry __start___jump_table[]; +extern struct jump_entry __stop___jump_table[]; + +extern void arch_jump_label_transform(struct jump_entry *entry, + enum jump_label_type type); +extern void jump_label_update(unsigned long key, enum jump_label_type type); +extern void jump_label_apply_nops(struct module *mod); +extern void arch_jump_label_text_poke_early(jump_label_t addr); + +#define enable_jump_label(key) \ + jump_label_update((unsigned long)key, JUMP_LABEL_ENABLE); + +#define disable_jump_label(key) \ + jump_label_update((unsigned long)key, JUMP_LABEL_DISABLE); + +#else + +#define JUMP_LABEL(key, label) \ +do { \ + if (unlikely(*key)) \ + goto label; \ +} while (0) + +#define enable_jump_label(cond_var) \ +do { \ + *(cond_var) = 1; \ +} while (0) + +#define disable_jump_label(cond_var) \ +do { \ + *(cond_var) = 0; \ +} while (0) + +static inline int jump_label_apply_nops(struct module *mod) +{ + return 0; +} + +#endif + +#endif diff --git a/include/linux/module.h b/include/linux/module.h index 8a6b9fdc7ff..403ac26023c 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -350,7 +350,10 @@ struct module struct tracepoint *tracepoints; unsigned int num_tracepoints; #endif - +#ifdef HAVE_JUMP_LABEL + struct jump_entry *jump_entries; + unsigned int num_jump_entries; +#endif #ifdef CONFIG_TRACING const char **trace_bprintk_fmt_start; unsigned int num_trace_bprintk_fmt; -- cgit v1.2.3-18-g5258 From 4c3ef6d79328c0e23ade60cbfc8d496123a6855c Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Fri, 17 Sep 2010 11:09:08 -0400 Subject: jump label: Add jump_label_text_reserved() to reserve jump points Add a jump_label_text_reserved(void *start, void *end), so that other pieces of code that want to modify kernel text, can first verify that jump label has not reserved the instruction. Acked-by: Masami Hiramatsu Signed-off-by: Jason Baron LKML-Reference: <06236663a3a7b1c1f13576bb9eccb6d9c17b7bfe.1284733808.git.jbaron@redhat.com> Signed-off-by: Steven Rostedt --- include/linux/jump_label.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index de58656d28e..b72cd9f92c2 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -20,9 +20,10 @@ extern struct jump_entry __stop___jump_table[]; extern void arch_jump_label_transform(struct jump_entry *entry, enum jump_label_type type); +extern void arch_jump_label_text_poke_early(jump_label_t addr); extern void jump_label_update(unsigned long key, enum jump_label_type type); extern void jump_label_apply_nops(struct module *mod); -extern void arch_jump_label_text_poke_early(jump_label_t addr); +extern int jump_label_text_reserved(void *start, void *end); #define enable_jump_label(key) \ jump_label_update((unsigned long)key, JUMP_LABEL_ENABLE); @@ -53,6 +54,11 @@ static inline int jump_label_apply_nops(struct module *mod) return 0; } +static inline int jump_label_text_reserved(void *start, void *end) +{ + return 0; +} + #endif #endif -- cgit v1.2.3-18-g5258 From 8f7b50c514206211cc282a4247f7b12f18dee674 Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Fri, 17 Sep 2010 11:09:13 -0400 Subject: jump label: Tracepoint support for jump labels Make use of the jump label infrastructure for tracepoints. Signed-off-by: Jason Baron LKML-Reference: Signed-off-by: Steven Rostedt --- include/linux/tracepoint.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 103d1b61aac..a4a90b6726c 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -17,6 +17,7 @@ #include #include #include +#include struct module; struct tracepoint; @@ -145,7 +146,9 @@ static inline void tracepoint_update_probe_range(struct tracepoint *begin, extern struct tracepoint __tracepoint_##name; \ static inline void trace_##name(proto) \ { \ - if (unlikely(__tracepoint_##name.state)) \ + JUMP_LABEL(&__tracepoint_##name.state, do_trace); \ + return; \ +do_trace: \ __DO_TRACE(&__tracepoint_##name, \ TP_PROTO(data_proto), \ TP_ARGS(data_args)); \ -- cgit v1.2.3-18-g5258 From 52159d98be6f26c48f5e02c7ab3c9848a85979b5 Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Fri, 17 Sep 2010 11:09:17 -0400 Subject: jump label: Convert dynamic debug to use jump labels Convert the 'dynamic debug' infrastructure to use jump labels. Signed-off-by: Jason Baron LKML-Reference: Signed-off-by: Steven Rostedt --- include/linux/dynamic_debug.h | 39 +++++++++++++++++++++------------------ 1 file changed, 21 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h index 52c0da4bdd1..bef3cda44c4 100644 --- a/include/linux/dynamic_debug.h +++ b/include/linux/dynamic_debug.h @@ -1,6 +1,8 @@ #ifndef _DYNAMIC_DEBUG_H #define _DYNAMIC_DEBUG_H +#include + /* dynamic_printk_enabled, and dynamic_printk_enabled2 are bitmasks in which * bit n is set to 1 if any modname hashes into the bucket n, 0 otherwise. They * use independent hash functions, to reduce the chance of false positives. @@ -22,8 +24,6 @@ struct _ddebug { const char *function; const char *filename; const char *format; - char primary_hash; - char secondary_hash; unsigned int lineno:24; /* * The flags field controls the behaviour at the callsite. @@ -33,6 +33,7 @@ struct _ddebug { #define _DPRINTK_FLAGS_PRINT (1<<0) /* printk() a message using the format */ #define _DPRINTK_FLAGS_DEFAULT 0 unsigned int flags:8; + char enabled; } __attribute__((aligned(8))); @@ -42,33 +43,35 @@ int ddebug_add_module(struct _ddebug *tab, unsigned int n, #if defined(CONFIG_DYNAMIC_DEBUG) extern int ddebug_remove_module(const char *mod_name); -#define __dynamic_dbg_enabled(dd) ({ \ - int __ret = 0; \ - if (unlikely((dynamic_debug_enabled & (1LL << DEBUG_HASH)) && \ - (dynamic_debug_enabled2 & (1LL << DEBUG_HASH2)))) \ - if (unlikely(dd.flags)) \ - __ret = 1; \ - __ret; }) - #define dynamic_pr_debug(fmt, ...) do { \ + __label__ do_printk; \ + __label__ out; \ static struct _ddebug descriptor \ __used \ __attribute__((section("__verbose"), aligned(8))) = \ - { KBUILD_MODNAME, __func__, __FILE__, fmt, DEBUG_HASH, \ - DEBUG_HASH2, __LINE__, _DPRINTK_FLAGS_DEFAULT }; \ - if (__dynamic_dbg_enabled(descriptor)) \ - printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); \ + { KBUILD_MODNAME, __func__, __FILE__, fmt, __LINE__, \ + _DPRINTK_FLAGS_DEFAULT }; \ + JUMP_LABEL(&descriptor.enabled, do_printk); \ + goto out; \ +do_printk: \ + printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); \ +out: ; \ } while (0) #define dynamic_dev_dbg(dev, fmt, ...) do { \ + __label__ do_printk; \ + __label__ out; \ static struct _ddebug descriptor \ __used \ __attribute__((section("__verbose"), aligned(8))) = \ - { KBUILD_MODNAME, __func__, __FILE__, fmt, DEBUG_HASH, \ - DEBUG_HASH2, __LINE__, _DPRINTK_FLAGS_DEFAULT }; \ - if (__dynamic_dbg_enabled(descriptor)) \ - dev_printk(KERN_DEBUG, dev, fmt, ##__VA_ARGS__); \ + { KBUILD_MODNAME, __func__, __FILE__, fmt, __LINE__, \ + _DPRINTK_FLAGS_DEFAULT }; \ + JUMP_LABEL(&descriptor.enabled, do_printk); \ + goto out; \ +do_printk: \ + dev_printk(KERN_DEBUG, dev, fmt, ##__VA_ARGS__); \ +out: ; \ } while (0) #else -- cgit v1.2.3-18-g5258 From d3f3cf859db17cc5f8156c5bfcd032413e44483b Mon Sep 17 00:00:00 2001 From: Mathieu Lacage Date: Sat, 14 Aug 2010 15:02:44 +0200 Subject: missing inline keyword for static function in linux/dmaengine.h Add a missing inline keyword for static function in linux/dmaengine.h to avoid duplicate symbol definitions. Signed-off-by: Mathieu Lacage Signed-off-by: Dan Williams --- include/linux/dmaengine.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index c61d4ca27bc..e2106495cc1 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -548,7 +548,7 @@ static inline bool dma_dev_has_pq_continue(struct dma_device *dma) return (dma->max_pq & DMA_HAS_PQ_CONTINUE) == DMA_HAS_PQ_CONTINUE; } -static unsigned short dma_dev_to_maxpq(struct dma_device *dma) +static inline unsigned short dma_dev_to_maxpq(struct dma_device *dma) { return dma->max_pq & ~DMA_HAS_PQ_CONTINUE; } -- cgit v1.2.3-18-g5258 From ed9f524ac79457f0c547c85746b19b92526be612 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 16 Sep 2010 01:30:19 +0900 Subject: ida: document IDA_BITMAP_LONGS calculation IDA_BITMAP_LONGS value is calculated take into account struct ida_bitmap not to waste memory space. Comment it. Signed-off-by: Namhyung Kim Acked-by: Tejun Heo Acked-by: Randy Dunlap Signed-off-by: Jiri Kosina --- include/linux/idr.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/idr.h b/include/linux/idr.h index e968db71e33..88607523e2d 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -117,10 +117,13 @@ void idr_init(struct idr *idp); /* * IDA - IDR based id allocator, use when translation from id to * pointer isn't necessary. + * + * IDA_BITMAP_LONGS is calculated to be one less to accommodate + * ida_bitmap->nr_busy so that the whole struct fits in 128 bytes. */ #define IDA_CHUNK_SIZE 128 /* 128 bytes per chunk */ -#define IDA_BITMAP_LONGS (128 / sizeof(long) - 1) -#define IDA_BITMAP_BITS (IDA_BITMAP_LONGS * sizeof(long) * 8) +#define IDA_BITMAP_LONGS (IDA_CHUNK_SIZE / sizeof(long) - 1) +#define IDA_BITMAP_BITS (IDA_BITMAP_LONGS * sizeof(long) * 8) struct ida_bitmap { long nr_busy; -- cgit v1.2.3-18-g5258 From b3a084b9b684622b149e8dcf03855bf0d5fb588b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 22 Sep 2010 08:38:44 +0200 Subject: rcu: rcu_read_lock_bh_held(): disabling irqs also disables bh rcu_dereference_bh() doesnt know yet about hard irq being disabled, so lockdep can trigger in netpoll_rx() after commit f0f9deae9e7c4 (netpoll: Disable IRQ around RCU dereference in netpoll_rx) Reported-by: Miles Lane Signed-off-by: Eric Dumazet Tested-by: Miles Lane Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 9fbc54a2585..83af1f8d8b7 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -454,7 +454,7 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) * Makes rcu_dereference_check() do the dirty work. */ #define rcu_dereference_bh(p) \ - rcu_dereference_check(p, rcu_read_lock_bh_held()) + rcu_dereference_check(p, rcu_read_lock_bh_held() || irqs_disabled()) /** * rcu_dereference_sched - fetch RCU-protected pointer, checking for RCU-sched -- cgit v1.2.3-18-g5258 From 53ecfba259f54b6967a35d19f4a564e3bc07997f Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 13 Sep 2010 17:24:21 -0700 Subject: rcu: only one evaluation of arg in rcu_dereference_check() unless sparse The current version of the __rcu_access_pointer(), __rcu_dereference_check(), and __rcu_dereference_protected() macros evaluate their "p" argument three times, not counting typeof()s. This is bad news if that argument contains a side effect. This commit therefore evaluates this argument only once in normal kernel builds. However, the straightforward approach defeats sparse's RCU-pointer checking, so when __CHECKER__ is defined, the additional pair of evaluations of the "p" argument are performed in order to permit sparse to detect misuse of RCU-protected pointers. Signed-off-by: Paul E. McKenney Cc: Arnd Bergmann --- include/linux/rcupdate.h | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 89414d67d96..03cda7bed98 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -310,24 +310,32 @@ extern int rcu_my_thread_group_empty(void); * (e.g., __rcu_bh, * __rcu_sched, and __srcu), should this make sense in * the future. */ + +#ifdef __CHECKER__ +#define rcu_dereference_sparse(p, space) \ + ((void)(((typeof(*p) space *)p) == p)) +#else /* #ifdef __CHECKER__ */ +#define rcu_dereference_sparse(p, space) +#endif /* #else #ifdef __CHECKER__ */ + #define __rcu_access_pointer(p, space) \ ({ \ typeof(*p) *_________p1 = (typeof(*p)*__force )ACCESS_ONCE(p); \ - (void) (((typeof (*p) space *)p) == p); \ + rcu_dereference_sparse(p, space); \ ((typeof(*p) __force __kernel *)(_________p1)); \ }) #define __rcu_dereference_check(p, c, space) \ ({ \ typeof(*p) *_________p1 = (typeof(*p)*__force )ACCESS_ONCE(p); \ rcu_lockdep_assert(c); \ - (void) (((typeof (*p) space *)p) == p); \ + rcu_dereference_sparse(p, space); \ smp_read_barrier_depends(); \ ((typeof(*p) __force __kernel *)(_________p1)); \ }) #define __rcu_dereference_protected(p, c, space) \ ({ \ rcu_lockdep_assert(c); \ - (void) (((typeof (*p) space *)p) == p); \ + rcu_dereference_sparse(p, space); \ ((typeof(*p) __force __kernel *)(p)); \ }) -- cgit v1.2.3-18-g5258 From d1ea13c6e2cce0106531852daaa93dd97aec9580 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 23 Sep 2010 18:40:07 +0200 Subject: genirq: Cleanup irq_chip->typename leftovers 3 years transition phase is enough. Cleanup the last users and remove the cruft. Signed-off-by: Thomas Gleixner Cc: Leo Chen Cc: Hirokazu Takata Cc: Chris Metcalf Cc: Jeff Dike Cc: Chris Zankel --- include/linux/irq.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/linux/irq.h b/include/linux/irq.h index c03243ad84b..06273a2a17e 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -106,7 +106,6 @@ struct msi_desc; * @bus_sync_unlock: function to sync and unlock slow bus (i2c) chips * * @release: release function solely used by UML - * @typename: obsoleted by name, kept as migration helper */ struct irq_chip { const char *name; @@ -135,11 +134,6 @@ struct irq_chip { #ifdef CONFIG_IRQ_RELEASE_METHOD void (*release)(unsigned int irq, void *dev_id); #endif - /* - * For compatibility, ->typename is copied into ->name. - * Will disappear. - */ - const char *typename; }; struct timer_rand_state; -- cgit v1.2.3-18-g5258 From a02cec2155fbea457eca8881870fd2de1a4c4c76 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 22 Sep 2010 20:43:57 +0000 Subject: net: return operator cleanup Change "return (EXPR);" to "return EXPR;" return is not a function, parentheses are not required. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/atmdev.h | 2 +- include/linux/etherdevice.h | 4 ++-- include/linux/netdevice.h | 2 +- include/linux/skbuff.h | 6 +++--- include/net/bluetooth/hci_core.h | 2 +- include/net/bluetooth/l2cap.h | 2 +- include/net/inet_ecn.h | 2 +- include/net/ip.h | 4 ++-- include/net/ipv6.h | 35 +++++++++++++++++------------------ include/net/irda/irlap.h | 2 +- include/net/irda/irlmp.h | 2 +- include/net/irda/irttp.h | 2 +- include/net/sch_generic.h | 2 +- include/net/sctp/sctp.h | 12 ++++++------ include/net/sctp/sm.h | 10 +++++----- include/net/sctp/structs.h | 2 +- include/net/sctp/tsnmap.h | 2 +- include/net/tipc/tipc_msg.h | 10 +++++----- 18 files changed, 51 insertions(+), 52 deletions(-) (limited to 'include') diff --git a/include/linux/atmdev.h b/include/linux/atmdev.h index f6481daf6e5..a8e4e832cdb 100644 --- a/include/linux/atmdev.h +++ b/include/linux/atmdev.h @@ -449,7 +449,7 @@ void vcc_insert_socket(struct sock *sk); static inline int atm_guess_pdu2truesize(int size) { - return (SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info)); + return SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info); } diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index fb6aa607092..f16a01081e1 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -71,7 +71,7 @@ static inline int is_zero_ether_addr(const u8 *addr) */ static inline int is_multicast_ether_addr(const u8 *addr) { - return (0x01 & addr[0]); + return 0x01 & addr[0]; } /** @@ -82,7 +82,7 @@ static inline int is_multicast_ether_addr(const u8 *addr) */ static inline int is_local_ether_addr(const u8 *addr) { - return (0x02 & addr[0]); + return 0x02 & addr[0]; } /** diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f7f1302138a..45dcda5bfda 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1676,7 +1676,7 @@ static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index) */ static inline int netif_is_multiqueue(const struct net_device *dev) { - return (dev->num_tx_queues > 1); + return dev->num_tx_queues > 1; } extern void netif_set_real_num_tx_queues(struct net_device *dev, diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 9e8085a8958..b2c41d19735 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -601,7 +601,7 @@ static inline int skb_queue_empty(const struct sk_buff_head *list) static inline bool skb_queue_is_last(const struct sk_buff_head *list, const struct sk_buff *skb) { - return (skb->next == (struct sk_buff *) list); + return skb->next == (struct sk_buff *)list; } /** @@ -614,7 +614,7 @@ static inline bool skb_queue_is_last(const struct sk_buff_head *list, static inline bool skb_queue_is_first(const struct sk_buff_head *list, const struct sk_buff *skb) { - return (skb->prev == (struct sk_buff *) list); + return skb->prev == (struct sk_buff *)list; } /** @@ -2156,7 +2156,7 @@ static inline u16 skb_get_rx_queue(const struct sk_buff *skb) static inline bool skb_rx_queue_recorded(const struct sk_buff *skb) { - return (skb->queue_mapping != 0); + return skb->queue_mapping != 0; } extern u16 skb_tx_hash(const struct net_device *dev, diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 4568b938ca3..ebec8c9a929 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -233,7 +233,7 @@ static inline void inquiry_cache_init(struct hci_dev *hdev) static inline int inquiry_cache_empty(struct hci_dev *hdev) { struct inquiry_cache *c = &hdev->inq_cache; - return (c->list == NULL); + return c->list == NULL; } static inline long inquiry_cache_age(struct hci_dev *hdev) diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index 6c241444f90..c819c8bf9b6 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -414,7 +414,7 @@ static inline int l2cap_tx_window_full(struct sock *sk) if (sub < 0) sub += 64; - return (sub == pi->remote_tx_win); + return sub == pi->remote_tx_win; } #define __get_txseq(ctrl) ((ctrl) & L2CAP_CTRL_TXSEQ) >> 1 diff --git a/include/net/inet_ecn.h b/include/net/inet_ecn.h index 9b5d08f4f6e..88bdd010d65 100644 --- a/include/net/inet_ecn.h +++ b/include/net/inet_ecn.h @@ -27,7 +27,7 @@ static inline int INET_ECN_is_not_ect(__u8 dsfield) static inline int INET_ECN_is_capable(__u8 dsfield) { - return (dsfield & INET_ECN_ECT_0); + return dsfield & INET_ECN_ECT_0; } static inline __u8 INET_ECN_encapsulate(__u8 outer, __u8 inner) diff --git a/include/net/ip.h b/include/net/ip.h index 7691aca133d..dbee3fe260e 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -238,9 +238,9 @@ int ip_decrease_ttl(struct iphdr *iph) static inline int ip_dont_fragment(struct sock *sk, struct dst_entry *dst) { - return (inet_sk(sk)->pmtudisc == IP_PMTUDISC_DO || + return inet_sk(sk)->pmtudisc == IP_PMTUDISC_DO || (inet_sk(sk)->pmtudisc == IP_PMTUDISC_WANT && - !(dst_metric_locked(dst, RTAX_MTU)))); + !(dst_metric_locked(dst, RTAX_MTU))); } extern void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more); diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 1f841241099..4a3cd2cd2f5 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -262,7 +262,7 @@ static inline int ipv6_addr_scope(const struct in6_addr *addr) static inline int __ipv6_addr_src_scope(int type) { - return (type == IPV6_ADDR_ANY ? __IPV6_ADDR_SCOPE_INVALID : (type >> 16)); + return (type == IPV6_ADDR_ANY) ? __IPV6_ADDR_SCOPE_INVALID : (type >> 16); } static inline int ipv6_addr_src_scope(const struct in6_addr *addr) @@ -279,10 +279,10 @@ static inline int ipv6_masked_addr_cmp(const struct in6_addr *a1, const struct in6_addr *m, const struct in6_addr *a2) { - return (!!(((a1->s6_addr32[0] ^ a2->s6_addr32[0]) & m->s6_addr32[0]) | - ((a1->s6_addr32[1] ^ a2->s6_addr32[1]) & m->s6_addr32[1]) | - ((a1->s6_addr32[2] ^ a2->s6_addr32[2]) & m->s6_addr32[2]) | - ((a1->s6_addr32[3] ^ a2->s6_addr32[3]) & m->s6_addr32[3]))); + return !!(((a1->s6_addr32[0] ^ a2->s6_addr32[0]) & m->s6_addr32[0]) | + ((a1->s6_addr32[1] ^ a2->s6_addr32[1]) & m->s6_addr32[1]) | + ((a1->s6_addr32[2] ^ a2->s6_addr32[2]) & m->s6_addr32[2]) | + ((a1->s6_addr32[3] ^ a2->s6_addr32[3]) & m->s6_addr32[3])); } static inline void ipv6_addr_copy(struct in6_addr *a1, const struct in6_addr *a2) @@ -317,10 +317,10 @@ static inline void ipv6_addr_set(struct in6_addr *addr, static inline int ipv6_addr_equal(const struct in6_addr *a1, const struct in6_addr *a2) { - return (((a1->s6_addr32[0] ^ a2->s6_addr32[0]) | - (a1->s6_addr32[1] ^ a2->s6_addr32[1]) | - (a1->s6_addr32[2] ^ a2->s6_addr32[2]) | - (a1->s6_addr32[3] ^ a2->s6_addr32[3])) == 0); + return ((a1->s6_addr32[0] ^ a2->s6_addr32[0]) | + (a1->s6_addr32[1] ^ a2->s6_addr32[1]) | + (a1->s6_addr32[2] ^ a2->s6_addr32[2]) | + (a1->s6_addr32[3] ^ a2->s6_addr32[3])) == 0; } static inline int __ipv6_prefix_equal(const __be32 *a1, const __be32 *a2, @@ -373,20 +373,20 @@ int ip6_frag_match(struct inet_frag_queue *q, void *a); static inline int ipv6_addr_any(const struct in6_addr *a) { - return ((a->s6_addr32[0] | a->s6_addr32[1] | - a->s6_addr32[2] | a->s6_addr32[3] ) == 0); + return (a->s6_addr32[0] | a->s6_addr32[1] | + a->s6_addr32[2] | a->s6_addr32[3]) == 0; } static inline int ipv6_addr_loopback(const struct in6_addr *a) { - return ((a->s6_addr32[0] | a->s6_addr32[1] | - a->s6_addr32[2] | (a->s6_addr32[3] ^ htonl(1))) == 0); + return (a->s6_addr32[0] | a->s6_addr32[1] | + a->s6_addr32[2] | (a->s6_addr32[3] ^ htonl(1))) == 0; } static inline int ipv6_addr_v4mapped(const struct in6_addr *a) { - return ((a->s6_addr32[0] | a->s6_addr32[1] | - (a->s6_addr32[2] ^ htonl(0x0000ffff))) == 0); + return (a->s6_addr32[0] | a->s6_addr32[1] | + (a->s6_addr32[2] ^ htonl(0x0000ffff))) == 0; } /* @@ -395,8 +395,7 @@ static inline int ipv6_addr_v4mapped(const struct in6_addr *a) */ static inline int ipv6_addr_orchid(const struct in6_addr *a) { - return ((a->s6_addr32[0] & htonl(0xfffffff0)) - == htonl(0x20010010)); + return (a->s6_addr32[0] & htonl(0xfffffff0)) == htonl(0x20010010); } static inline void ipv6_addr_set_v4mapped(const __be32 addr, @@ -441,7 +440,7 @@ static inline int __ipv6_addr_diff(const void *token1, const void *token2, int a * if returned value is greater than prefix length. * --ANK (980803) */ - return (addrlen << 5); + return addrlen << 5; } static inline int ipv6_addr_diff(const struct in6_addr *a1, const struct in6_addr *a2) diff --git a/include/net/irda/irlap.h b/include/net/irda/irlap.h index 9d0c78ea92f..17fcd964f9d 100644 --- a/include/net/irda/irlap.h +++ b/include/net/irda/irlap.h @@ -282,7 +282,7 @@ static inline int irlap_is_primary(struct irlap_cb *self) default: ret = -1; } - return(ret); + return ret; } /* Clear a pending IrLAP disconnect. - Jean II */ diff --git a/include/net/irda/irlmp.h b/include/net/irda/irlmp.h index 3ffc1d0f93d..fff11b7fe8a 100644 --- a/include/net/irda/irlmp.h +++ b/include/net/irda/irlmp.h @@ -274,7 +274,7 @@ static inline int irlmp_lap_tx_queue_full(struct lsap_cb *self) if (self->lap->irlap == NULL) return 0; - return(IRLAP_GET_TX_QUEUE_LEN(self->lap->irlap) >= LAP_HIGH_THRESHOLD); + return IRLAP_GET_TX_QUEUE_LEN(self->lap->irlap) >= LAP_HIGH_THRESHOLD; } /* After doing a irlmp_dup(), this get one of the two socket back into diff --git a/include/net/irda/irttp.h b/include/net/irda/irttp.h index 11aee7a2972..af4b87721d1 100644 --- a/include/net/irda/irttp.h +++ b/include/net/irda/irttp.h @@ -204,7 +204,7 @@ static inline int irttp_is_primary(struct tsap_cb *self) (self->lsap->lap == NULL) || (self->lsap->lap->irlap == NULL)) return -2; - return(irlap_is_primary(self->lsap->lap->irlap)); + return irlap_is_primary(self->lsap->lap->irlap); } #endif /* IRTTP_H */ diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 3c8728aaab4..eda8808fdac 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -601,7 +601,7 @@ static inline u32 qdisc_l2t(struct qdisc_rate_table* rtab, unsigned int pktlen) slot = 0; slot >>= rtab->rate.cell_log; if (slot > 255) - return (rtab->data[255]*(slot >> 8) + rtab->data[slot & 0xFF]); + return rtab->data[255]*(slot >> 8) + rtab->data[slot & 0xFF]; return rtab->data[slot]; } diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 2cb3980b161..505845ddb0b 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -405,7 +405,7 @@ static inline void sctp_v6_del_protocol(void) { return; } /* Map an association to an assoc_id. */ static inline sctp_assoc_t sctp_assoc2id(const struct sctp_association *asoc) { - return (asoc?asoc->assoc_id:0); + return asoc ? asoc->assoc_id : 0; } /* Look up the association by its id. */ @@ -473,7 +473,7 @@ static inline void sctp_skb_set_owner_r(struct sk_buff *skb, struct sock *sk) /* Tests if the list has one and only one entry. */ static inline int sctp_list_single_entry(struct list_head *head) { - return ((head->next != head) && (head->next == head->prev)); + return (head->next != head) && (head->next == head->prev); } /* Generate a random jitter in the range of -50% ~ +50% of input RTO. */ @@ -631,13 +631,13 @@ static inline int sctp_sanity_check(void) /* This is the hash function for the SCTP port hash table. */ static inline int sctp_phashfn(__u16 lport) { - return (lport & (sctp_port_hashsize - 1)); + return lport & (sctp_port_hashsize - 1); } /* This is the hash function for the endpoint hash table. */ static inline int sctp_ep_hashfn(__u16 lport) { - return (lport & (sctp_ep_hashsize - 1)); + return lport & (sctp_ep_hashsize - 1); } /* This is the hash function for the association hash table. */ @@ -645,7 +645,7 @@ static inline int sctp_assoc_hashfn(__u16 lport, __u16 rport) { int h = (lport << 16) + rport; h ^= h>>8; - return (h & (sctp_assoc_hashsize - 1)); + return h & (sctp_assoc_hashsize - 1); } /* This is the hash function for the association hash table. This is @@ -656,7 +656,7 @@ static inline int sctp_vtag_hashfn(__u16 lport, __u16 rport, __u32 vtag) { int h = (lport << 16) + rport; h ^= vtag; - return (h & (sctp_assoc_hashsize-1)); + return h & (sctp_assoc_hashsize - 1); } #define sctp_for_each_hentry(epb, node, head) \ diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h index 4088c89a905..9352d12f02d 100644 --- a/include/net/sctp/sm.h +++ b/include/net/sctp/sm.h @@ -345,12 +345,12 @@ enum { static inline int TSN_lt(__u32 s, __u32 t) { - return (((s) - (t)) & TSN_SIGN_BIT); + return ((s) - (t)) & TSN_SIGN_BIT; } static inline int TSN_lte(__u32 s, __u32 t) { - return (((s) == (t)) || (((s) - (t)) & TSN_SIGN_BIT)); + return ((s) == (t)) || (((s) - (t)) & TSN_SIGN_BIT); } /* Compare two SSNs */ @@ -369,12 +369,12 @@ enum { static inline int SSN_lt(__u16 s, __u16 t) { - return (((s) - (t)) & SSN_SIGN_BIT); + return ((s) - (t)) & SSN_SIGN_BIT; } static inline int SSN_lte(__u16 s, __u16 t) { - return (((s) == (t)) || (((s) - (t)) & SSN_SIGN_BIT)); + return ((s) == (t)) || (((s) - (t)) & SSN_SIGN_BIT); } /* @@ -388,7 +388,7 @@ enum { static inline int ADDIP_SERIAL_gte(__u16 s, __u16 t) { - return (((s) == (t)) || (((t) - (s)) & ADDIP_SERIAL_SIGN_BIT)); + return ((s) == (t)) || (((t) - (s)) & ADDIP_SERIAL_SIGN_BIT); } /* Check VTAG of the packet matches the sender's own tag. */ diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index f9e7473613b..69fef4fb79c 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -847,7 +847,7 @@ void sctp_packet_free(struct sctp_packet *); static inline int sctp_packet_empty(struct sctp_packet *packet) { - return (packet->size == packet->overhead); + return packet->size == packet->overhead; } /* This represents a remote transport address. diff --git a/include/net/sctp/tsnmap.h b/include/net/sctp/tsnmap.h index 4aabc5a96cf..e7728bc14cc 100644 --- a/include/net/sctp/tsnmap.h +++ b/include/net/sctp/tsnmap.h @@ -157,7 +157,7 @@ __u16 sctp_tsnmap_pending(struct sctp_tsnmap *map); /* Is there a gap in the TSN map? */ static inline int sctp_tsnmap_has_gap(const struct sctp_tsnmap *map) { - return (map->cumulative_tsn_ack_point != map->max_tsn_seen); + return map->cumulative_tsn_ack_point != map->max_tsn_seen; } /* Mark a duplicate TSN. Note: limit the storage of duplicate TSN diff --git a/include/net/tipc/tipc_msg.h b/include/net/tipc/tipc_msg.h index 2e159a812f8..ffe50b4e7b9 100644 --- a/include/net/tipc/tipc_msg.h +++ b/include/net/tipc/tipc_msg.h @@ -107,7 +107,7 @@ static inline u32 msg_hdr_sz(struct tipc_msg *m) static inline int msg_short(struct tipc_msg *m) { - return (msg_hdr_sz(m) == 24); + return msg_hdr_sz(m) == 24; } static inline u32 msg_size(struct tipc_msg *m) @@ -117,7 +117,7 @@ static inline u32 msg_size(struct tipc_msg *m) static inline u32 msg_data_sz(struct tipc_msg *m) { - return (msg_size(m) - msg_hdr_sz(m)); + return msg_size(m) - msg_hdr_sz(m); } static inline unchar *msg_data(struct tipc_msg *m) @@ -132,17 +132,17 @@ static inline u32 msg_type(struct tipc_msg *m) static inline u32 msg_named(struct tipc_msg *m) { - return (msg_type(m) == TIPC_NAMED_MSG); + return msg_type(m) == TIPC_NAMED_MSG; } static inline u32 msg_mcast(struct tipc_msg *m) { - return (msg_type(m) == TIPC_MCAST_MSG); + return msg_type(m) == TIPC_MCAST_MSG; } static inline u32 msg_connected(struct tipc_msg *m) { - return (msg_type(m) == TIPC_CONN_MSG); + return msg_type(m) == TIPC_CONN_MSG; } static inline u32 msg_errcode(struct tipc_msg *m) -- cgit v1.2.3-18-g5258 From 50bb6d8492ff0c3f204b263aff90d4a7ebf4dd90 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Thu, 23 Sep 2010 10:40:21 -0400 Subject: HID: usbhid: remove unused hiddev_driver Now that hiddev_driver isn't being used for anything, there's no reason to keep it around. This patch (as1419) gets rid of it entirely. Signed-off-by: Alan Stern Signed-off-by: Jiri Kosina --- include/linux/hiddev.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/hiddev.h b/include/linux/hiddev.h index bb6f58baf31..a3f481a3063 100644 --- a/include/linux/hiddev.h +++ b/include/linux/hiddev.h @@ -226,8 +226,6 @@ void hiddev_disconnect(struct hid_device *); void hiddev_hid_event(struct hid_device *hid, struct hid_field *field, struct hid_usage *usage, __s32 value); void hiddev_report_event(struct hid_device *hid, struct hid_report *report); -int __init hiddev_init(void); -void hiddev_exit(void); #else static inline int hiddev_connect(struct hid_device *hid, unsigned int force) @@ -236,8 +234,6 @@ static inline void hiddev_disconnect(struct hid_device *hid) { } static inline void hiddev_hid_event(struct hid_device *hid, struct hid_field *field, struct hid_usage *usage, __s32 value) { } static inline void hiddev_report_event(struct hid_device *hid, struct hid_report *report) { } -static inline int hiddev_init(void) { return 0; } -static inline void hiddev_exit(void) { } #endif #endif -- cgit v1.2.3-18-g5258 From eb7d3066cf864342e8ae6a5c1126a1602c4d06c0 Mon Sep 17 00:00:00 2001 From: Christian Lamparter Date: Tue, 21 Sep 2010 21:36:18 +0200 Subject: mac80211: clear txflags for ps-filtered frames This patch fixes stale mac80211_tx_control_flags for filtered / retried frames. Because ieee80211_handle_filtered_frame feeds skbs back into the tx path, they have to be stripped of some tx flags so they won't confuse the stack, driver or device. Cc: Acked-by: Johannes Berg Signed-off-by: Christian Lamparter Signed-off-by: John W. Linville --- include/net/mac80211.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 12a49f0ba32..5d1187d7c5e 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -321,6 +321,9 @@ struct ieee80211_bss_conf { * @IEEE80211_TX_CTL_LDPC: tells the driver to use LDPC for this frame * @IEEE80211_TX_CTL_STBC: Enables Space-Time Block Coding (STBC) for this * frame and selects the maximum number of streams that it can use. + * + * Note: If you have to add new flags to the enumeration, then don't + * forget to update %IEEE80211_TX_TEMPORARY_FLAGS when necessary. */ enum mac80211_tx_control_flags { IEEE80211_TX_CTL_REQ_TX_STATUS = BIT(0), @@ -350,6 +353,19 @@ enum mac80211_tx_control_flags { #define IEEE80211_TX_CTL_STBC_SHIFT 23 +/* + * This definition is used as a mask to clear all temporary flags, which are + * set by the tx handlers for each transmission attempt by the mac80211 stack. + */ +#define IEEE80211_TX_TEMPORARY_FLAGS (IEEE80211_TX_CTL_NO_ACK | \ + IEEE80211_TX_CTL_CLEAR_PS_FILT | IEEE80211_TX_CTL_FIRST_FRAGMENT | \ + IEEE80211_TX_CTL_SEND_AFTER_DTIM | IEEE80211_TX_CTL_AMPDU | \ + IEEE80211_TX_STAT_TX_FILTERED | IEEE80211_TX_STAT_ACK | \ + IEEE80211_TX_STAT_AMPDU | IEEE80211_TX_STAT_AMPDU_NO_BACK | \ + IEEE80211_TX_CTL_RATE_CTRL_PROBE | IEEE80211_TX_CTL_PSPOLL_RESPONSE | \ + IEEE80211_TX_CTL_MORE_FRAMES | IEEE80211_TX_CTL_LDPC | \ + IEEE80211_TX_CTL_STBC) + /** * enum mac80211_rate_control_flags - per-rate flags set by the * Rate Control algorithm. -- cgit v1.2.3-18-g5258 From 543876c92837a8b208b5c99ec225c1f5a581900e Mon Sep 17 00:00:00 2001 From: Giuseppe Cavallaro Date: Fri, 24 Sep 2010 21:27:41 -0700 Subject: stmmac: review the wake-up support If the PM support is available this is passed through the platform instead to be hard-coded in the core files. WoL on Magic Frame can be enabled by using the ethtool support. Signed-off-by: Giuseppe Cavallaro Signed-off-by: David S. Miller --- include/linux/stmmac.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 1d8baf71921..d66c61774d9 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -37,6 +37,7 @@ struct plat_stmmacenet_data { int enh_desc; int tx_coe; int bugged_jumbo; + int pmt; void (*fix_mac_speed)(void *priv, unsigned int speed); void (*bus_setup)(void __iomem *ioaddr); #ifdef CONFIG_STM_DRIVERS -- cgit v1.2.3-18-g5258 From e4ecda1b60bfd2333c12bbe71b153d3b6bdc831a Mon Sep 17 00:00:00 2001 From: Mark Lord Date: Sat, 25 Sep 2010 11:17:22 +0200 Subject: Fix compile error in blk-exec.c for !CONFIG_DETECT_HUNG_TASK Ensure that 'sysctl_hung_task_timeout_secs' is defined even when CONFIG_DETECT_HUNG_TASK is not set. This way we can safely reference it without need for ifdefs in the code elsewhere. eg. in block/blk-exec.c Signed-off-by: Mark Lord Signed-off-by: Jens Axboe --- include/linux/sched.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 1e2a6db2d7d..dbafa9e34a2 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -336,6 +336,9 @@ extern unsigned long sysctl_hung_task_warnings; extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); +#else +/* Avoid need for ifdefs elsewhere in the code */ +enum { sysctl_hung_task_timeout_secs = 0 }; #endif /* Attach to any functions which should be ignored in wchan output. */ -- cgit v1.2.3-18-g5258 From f459ffbdfd04edb4a8ce6eea33170eb057a5e695 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Sat, 25 Sep 2010 17:45:50 +1000 Subject: drm/radeon: fix PCI ID 5657 to be an RV410 fixes https://bugzilla.kernel.org/show_bug.cgi?id=19012 cc: stable@kernel.org Signed-off-by: Dave Airlie --- include/drm/drm_pciids.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/drm/drm_pciids.h b/include/drm/drm_pciids.h index 3a9940ef728..883c1d43989 100644 --- a/include/drm/drm_pciids.h +++ b/include/drm/drm_pciids.h @@ -85,7 +85,6 @@ {0x1002, 0x5460, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV380|RADEON_IS_MOBILITY}, \ {0x1002, 0x5462, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV380|RADEON_IS_MOBILITY}, \ {0x1002, 0x5464, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV380|RADEON_IS_MOBILITY}, \ - {0x1002, 0x5657, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV380|RADEON_NEW_MEMMAP}, \ {0x1002, 0x5548, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_R423|RADEON_NEW_MEMMAP}, \ {0x1002, 0x5549, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_R423|RADEON_NEW_MEMMAP}, \ {0x1002, 0x554A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_R423|RADEON_NEW_MEMMAP}, \ @@ -103,6 +102,7 @@ {0x1002, 0x564F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV410|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ {0x1002, 0x5652, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV410|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ {0x1002, 0x5653, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV410|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ + {0x1002, 0x5657, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV410|RADEON_NEW_MEMMAP}, \ {0x1002, 0x5834, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS300|RADEON_IS_IGP}, \ {0x1002, 0x5835, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS300|RADEON_IS_IGP|RADEON_IS_MOBILITY}, \ {0x1002, 0x5954, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS480|RADEON_IS_IGP|RADEON_IS_MOBILITY|RADEON_IS_IGPGART}, \ -- cgit v1.2.3-18-g5258 From cb4dfe562cac6fcb544df752e40c1d78000d0712 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 23 Sep 2010 05:06:54 +0000 Subject: net: skb_frag_t can be smaller on small arches On 32bit arches, if PAGE_SIZE is smaller than 65536, we can use 16bit offset and size fields. This patch saves 72 bytes per skb on i386, or 128 bytes after rounding. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/skbuff.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index b2c41d19735..0b53c43ac92 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -129,8 +129,13 @@ typedef struct skb_frag_struct skb_frag_t; struct skb_frag_struct { struct page *page; +#if (BITS_PER_LONG > 32) || (PAGE_SIZE >= 65536) __u32 page_offset; __u32 size; +#else + __u16 page_offset; + __u16 size; +#endif }; #define HAVE_HW_TIME_STAMP -- cgit v1.2.3-18-g5258 From 7a91b434e2bad554b709265db7603b1aa52dd92e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 26 Sep 2010 18:53:07 -0700 Subject: net: update SOCK_MIN_RCVBUF SOCK_MIN_RCVBUF current value is 256 bytes It doesnt permit to receive the smallest possible frame, considering socket sk_rmem_alloc/sk_rcvbuf account skb truesizes. On 64bit arches, sizeof(struct sk_buff) is 240 bytes. Add the typical 64 bytes of headroom, and we go over the limit. With old kernels and 32bit arches, we were under the limit, if netdriver was doing copybreak. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index 8ae97c4970d..73a4f9702a6 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1558,7 +1558,11 @@ static inline void sk_wake_async(struct sock *sk, int how, int band) } #define SOCK_MIN_SNDBUF 2048 -#define SOCK_MIN_RCVBUF 256 +/* + * Since sk_rmem_alloc sums skb->truesize, even a small frame might need + * sizeof(sk_buff) + MTU + padding, unless net driver perform copybreak + */ +#define SOCK_MIN_RCVBUF (2048 + sizeof(struct sk_buff)) static inline void sk_stream_moderate_sndbuf(struct sock *sk) { -- cgit v1.2.3-18-g5258 From a7855c78a24d6348e989bec616318e68c662e78b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 23 Sep 2010 23:51:51 +0000 Subject: net: loopback driver cleanup loopback driver uses dev->ml_priv to store its percpu stats pointer. It uses ugly casts "(void __percpu __force *)" to shut up sparse complains. Define an union to better document we use ml_priv in loopback driver and define a lstats field with appropriate types. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 45dcda5bfda..01bd4c82d98 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1050,8 +1050,10 @@ struct net_device { #endif /* mid-layer private */ - void *ml_priv; - + union { + void *ml_priv; + struct pcpu_lstats __percpu *lstats; /* loopback stats */ + }; /* GARP */ struct garp_port *garp_port; -- cgit v1.2.3-18-g5258 From 686b9cb994f5f74be790df4cd12873dfdc8a6984 Mon Sep 17 00:00:00 2001 From: Ben Greear Date: Thu, 23 Sep 2010 09:44:36 -0700 Subject: mac80211/ath9k: Support AMPDU with multiple VIFs. The old ieee80211_find_sta_by_hw method didn't properly find VIFS when there was more than one per AP. This caused AMPDU logic in ath9k to get the wrong VIF when trying to account for transmitted SKBs. This patch changes ieee80211_find_sta_by_hw to take a localaddr argument to distinguish between VIFs with the same AP but different local addresses. The method name is changed to ieee80211_find_sta_by_ifaddr. Signed-off-by: Ben Greear Acked-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 5d1187d7c5e..73469d8b64b 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2432,25 +2432,28 @@ struct ieee80211_sta *ieee80211_find_sta(struct ieee80211_vif *vif, const u8 *addr); /** - * ieee80211_find_sta_by_hw - find a station on hardware + * ieee80211_find_sta_by_ifaddr - find a station on hardware * * @hw: pointer as obtained from ieee80211_alloc_hw() - * @addr: station's address + * @addr: remote station's address + * @localaddr: local address (vif->sdata->vif.addr). Use NULL for 'any'. * * This function must be called under RCU lock and the * resulting pointer is only valid under RCU lock as well. * - * NOTE: This function should not be used! When mac80211 is converted - * internally to properly keep track of stations on multiple - * virtual interfaces, it will not always know which station to - * return here since a single address might be used by multiple - * logical stations (e.g. consider a station connecting to another - * BSSID on the same AP hardware without disconnecting first). + * NOTE: You may pass NULL for localaddr, but then you will just get + * the first STA that matches the remote address 'addr'. + * We can have multiple STA associated with multiple + * logical stations (e.g. consider a station connecting to another + * BSSID on the same AP hardware without disconnecting first). + * In this case, the result of this method with localaddr NULL + * is not reliable. * - * DO NOT USE THIS FUNCTION. + * DO NOT USE THIS FUNCTION with localaddr NULL if at all possible. */ -struct ieee80211_sta *ieee80211_find_sta_by_hw(struct ieee80211_hw *hw, - const u8 *addr); +struct ieee80211_sta *ieee80211_find_sta_by_ifaddr(struct ieee80211_hw *hw, + const u8 *addr, + const u8 *localaddr); /** * ieee80211_sta_block_awake - block station from waking up -- cgit v1.2.3-18-g5258 From 554891e63a29af35cc6bb403ef34e319518114d0 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 24 Sep 2010 12:38:25 +0200 Subject: mac80211: move packet flags into packet commit 8c0c709eea5cbab97fb464cd68b06f24acc58ee1 Author: Johannes Berg Date: Wed Nov 25 17:46:15 2009 +0100 mac80211: move cmntr flag out of rx flags moved the CMNTR flag into the skb RX flags for some aggregation cleanups, but this was wrong since the optimisation this flag tried to make requires that it is kept across the processing of multiple interfaces -- which isn't true for flags in the skb. The patch not only broke the optimisation, it also introduced a bug: under some (common!) circumstances the flag will be set on an already freed skb! However, investigating this in more detail, I found that most of the flags that we set should be per packet, _except_ for this one, due to a-MPDU processing. Additionally, the flags used for processing (currently just this one) need to be reset before processing a new packet. Since we haven't actually seen bugs reported as a result of the wrong flags handling (which is not too surprising -- the only real bug case I can come up with is an a-MSDU contained in an a-MPDU), I'll make a different fix for rc. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 73469d8b64b..fe8b9dae4de 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -581,9 +581,6 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info) * @RX_FLAG_HT: HT MCS was used and rate_idx is MCS index * @RX_FLAG_40MHZ: HT40 (40 MHz) was used * @RX_FLAG_SHORT_GI: Short guard interval was used - * @RX_FLAG_INTERNAL_CMTR: set internally after frame was reported - * on cooked monitor to avoid double-reporting it for multiple - * virtual interfaces */ enum mac80211_rx_flags { RX_FLAG_MMIC_ERROR = 1<<0, @@ -597,7 +594,6 @@ enum mac80211_rx_flags { RX_FLAG_HT = 1<<9, RX_FLAG_40MHZ = 1<<10, RX_FLAG_SHORT_GI = 1<<11, - RX_FLAG_INTERNAL_CMTR = 1<<12, }; /** @@ -618,6 +614,7 @@ enum mac80211_rx_flags { * @rate_idx: index of data rate into band's supported rates or MCS index if * HT rates are use (RX_FLAG_HT) * @flag: %RX_FLAG_* + * @rx_flags: internal RX flags for mac80211 */ struct ieee80211_rx_status { u64 mactime; @@ -627,6 +624,7 @@ struct ieee80211_rx_status { int antenna; int rate_idx; int flag; + unsigned int rx_flags; }; /** -- cgit v1.2.3-18-g5258 From 31dfbc93923c0aaa0440b809f80ff2830c6a531a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 27 Sep 2010 21:28:30 +0100 Subject: drm: Prune GEM vma entries Hook the GEM vm open/close ops into the generic drm vm open/close so that the private vma entries are created and destroy appropriately. Fixes the leak of the drm_vma_entries during the lifetime of the filp. Reported-by: Matt Mackall Cc: Jesse Barnes Signed-off-by: Chris Wilson Acked-by: Jesse Barnes Cc: stable@kernel.org Signed-off-by: Dave Airlie --- include/drm/drmP.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/drm/drmP.h b/include/drm/drmP.h index 7809d230ade..774e1d49509 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -1175,6 +1175,7 @@ extern int drm_release(struct inode *inode, struct file *filp); extern int drm_mmap(struct file *filp, struct vm_area_struct *vma); extern int drm_mmap_locked(struct file *filp, struct vm_area_struct *vma); extern void drm_vm_open_locked(struct vm_area_struct *vma); +extern void drm_vm_close_locked(struct vm_area_struct *vma); extern resource_size_t drm_core_get_map_ofs(struct drm_local_map * map); extern resource_size_t drm_core_get_reg_ofs(struct drm_device *dev); extern unsigned int drm_poll(struct file *filp, struct poll_table_struct *wait); -- cgit v1.2.3-18-g5258 From 2fc11536cf5c0b8eb4eb7e01a2a672a189e9280f Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Tue, 7 Sep 2010 06:10:45 -0300 Subject: V4L/DVB: videobuf-dma-sg: set correct size in last sg element This fixes a nasty memory corruption bug when using userptr I/O. The function videobuf_pages_to_sg() sets up the scatter-gather list for the DMA transfer to the userspace pages. The first transfer is setup correctly (the size is set to PAGE_SIZE - offset), but all other transfers have size PAGE_SIZE. This is wrong for the last transfer which may be less than PAGE_SIZE. Most, if not all, drivers will program the boards DMA engine correctly, i.e. even though the size in the last sg element is wrong, they will do their own size calculations and make sure the right amount is DMA-ed, and so seemingly prevent memory corruption. However, behind the scenes the dynamic DMA mapping support (in lib/swiotlb.c) may create bounce buffers if the memory pages are not in DMA-able memory. This happens for example on a 64-bit linux with a board that only supports 32-bit DMA. These bounce buffers DO use the information in the sg list to determine the size. So while the DMA engine transfers the correct amount of data, when the data is 'bounced' back too much is copied, causing buffer overwrites. The fix is simple: calculate and set the correct size for the last sg list element. Signed-off-by: Hans Verkuil Cc: stable@kernel.org Signed-off-by: Mauro Carvalho Chehab --- include/media/videobuf-dma-sg.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/media/videobuf-dma-sg.h b/include/media/videobuf-dma-sg.h index 97e07f46a0f..aa4ebb42a56 100644 --- a/include/media/videobuf-dma-sg.h +++ b/include/media/videobuf-dma-sg.h @@ -48,6 +48,7 @@ struct videobuf_dmabuf { /* for userland buffer */ int offset; + size_t size; struct page **pages; /* for kernel buffers */ -- cgit v1.2.3-18-g5258 From 8d98efa84b790bdd62248eb0dfff17e9baf5c844 Mon Sep 17 00:00:00 2001 From: Kumar Sanghvi Date: Sun, 26 Sep 2010 19:07:59 +0000 Subject: Phonet: Implement Pipe Controller to support Nokia Slim Modems Phonet stack assumes the presence of Pipe Controller, either in Modem or on Application Processing Engine user-space for the Pipe data. Nokia Slim Modems like WG2.5 used in ST-Ericsson U8500 platform do not implement Pipe controller in them. This patch adds Pipe Controller implemenation to Phonet stack to support Pipe data over Phonet stack for Nokia Slim Modems. Signed-off-by: Kumar Sanghvi Acked-by: Linus Walleij Signed-off-by: David S. Miller --- include/linux/phonet.h | 5 +++++ include/net/phonet/pep.h | 21 +++++++++++++++++++++ 2 files changed, 26 insertions(+) (limited to 'include') diff --git a/include/linux/phonet.h b/include/linux/phonet.h index 85e14a83283..96f5625d62f 100644 --- a/include/linux/phonet.h +++ b/include/linux/phonet.h @@ -36,6 +36,11 @@ /* Socket options for SOL_PNPIPE level */ #define PNPIPE_ENCAP 1 #define PNPIPE_IFINDEX 2 +#define PNPIPE_CREATE 3 +#define PNPIPE_ENABLE 4 +#define PNPIPE_DISABLE 5 +#define PNPIPE_DESTROY 6 +#define PNPIPE_INQ 7 #define PNADDR_ANY 0 #define PNADDR_BROADCAST 0xFC diff --git a/include/net/phonet/pep.h b/include/net/phonet/pep.h index 37f23dc05de..def6cfa3f45 100644 --- a/include/net/phonet/pep.h +++ b/include/net/phonet/pep.h @@ -45,6 +45,10 @@ struct pep_sock { u8 tx_fc; /* TX flow control */ u8 init_enable; /* auto-enable at creation */ u8 aligned; +#ifdef CONFIG_PHONET_PIPECTRLR + u16 remote_pep; + u8 pipe_state; +#endif }; static inline struct pep_sock *pep_sk(struct sock *sk) @@ -165,4 +169,21 @@ enum { PEP_IND_READY, }; +#ifdef CONFIG_PHONET_PIPECTRLR +#define PNS_PEP_CONNECT_UTID 0x02 +#define PNS_PIPE_CREATED_IND_UTID 0x04 +#define PNS_PIPE_ENABLE_UTID 0x0A +#define PNS_PIPE_ENABLED_IND_UTID 0x0C +#define PNS_PIPE_DISABLE_UTID 0x0F +#define PNS_PIPE_DISABLED_IND_UTID 0x11 +#define PNS_PEP_DISCONNECT_UTID 0x06 + +/* Used for tracking state of a pipe */ +enum { + PIPE_IDLE, + PIPE_DISABLED, + PIPE_ENABLED, +}; +#endif /* CONFIG_PHONET_PIPECTRLR */ + #endif -- cgit v1.2.3-18-g5258 From 290b895e0ba4552dfcfc4bd35759c192345b934a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 27 Sep 2010 00:33:35 +0000 Subject: tunnels: prepare percpu accounting Tunnels are going to use percpu for their accounting. They are going to use a new tstats field in net_device. skb_tunnel_rx() is changed to be a wrapper around __skb_tunnel_rx() IPTUNNEL_XMIT() is changed to be a wrapper around __IPTUNNEL_XMIT() Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + include/net/dst.h | 24 +++++++++++++++++++----- include/net/ipip.h | 12 +++++++----- 3 files changed, 27 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 01bd4c82d98..83de0eb7a07 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1053,6 +1053,7 @@ struct net_device { union { void *ml_priv; struct pcpu_lstats __percpu *lstats; /* loopback stats */ + struct pcpu_tstats __percpu *tstats; /* tunnel stats */ }; /* GARP */ struct garp_port *garp_port; diff --git a/include/net/dst.h b/include/net/dst.h index 02386505033..aa53fbc34b2 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -227,6 +227,23 @@ static inline void skb_dst_force(struct sk_buff *skb) } +/** + * __skb_tunnel_rx - prepare skb for rx reinsert + * @skb: buffer + * @dev: tunnel device + * + * After decapsulation, packet is going to re-enter (netif_rx()) our stack, + * so make some cleanups. (no accounting done) + */ +static inline void __skb_tunnel_rx(struct sk_buff *skb, struct net_device *dev) +{ + skb->dev = dev; + skb->rxhash = 0; + skb_set_queue_mapping(skb, 0); + skb_dst_drop(skb); + nf_reset(skb); +} + /** * skb_tunnel_rx - prepare skb for rx reinsert * @skb: buffer @@ -234,17 +251,14 @@ static inline void skb_dst_force(struct sk_buff *skb) * * After decapsulation, packet is going to re-enter (netif_rx()) our stack, * so make some cleanups, and perform accounting. + * Note: this accounting is not SMP safe. */ static inline void skb_tunnel_rx(struct sk_buff *skb, struct net_device *dev) { - skb->dev = dev; /* TODO : stats should be SMP safe */ dev->stats.rx_packets++; dev->stats.rx_bytes += skb->len; - skb->rxhash = 0; - skb_set_queue_mapping(skb, 0); - skb_dst_drop(skb); - nf_reset(skb); + __skb_tunnel_rx(skb, dev); } /* Children define the path of the packet through the diff --git a/include/net/ipip.h b/include/net/ipip.h index 65caea8b414..58abbf966b0 100644 --- a/include/net/ipip.h +++ b/include/net/ipip.h @@ -45,7 +45,7 @@ struct ip_tunnel_prl_entry { struct rcu_head rcu_head; }; -#define IPTUNNEL_XMIT() do { \ +#define __IPTUNNEL_XMIT(stats1, stats2) do { \ int err; \ int pkt_len = skb->len - skb_transport_offset(skb); \ \ @@ -54,12 +54,14 @@ struct ip_tunnel_prl_entry { \ err = ip_local_out(skb); \ if (likely(net_xmit_eval(err) == 0)) { \ - txq->tx_bytes += pkt_len; \ - txq->tx_packets++; \ + (stats1)->tx_bytes += pkt_len; \ + (stats1)->tx_packets++; \ } else { \ - stats->tx_errors++; \ - stats->tx_aborted_errors++; \ + (stats2)->tx_errors++; \ + (stats2)->tx_aborted_errors++; \ } \ } while (0) +#define IPTUNNEL_XMIT() __IPTUNNEL_XMIT(txq, stats) + #endif -- cgit v1.2.3-18-g5258 From 62fe0b40abb3484413800edaef9b087a20059acf Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Mon, 27 Sep 2010 08:24:33 +0000 Subject: net: Allow changing number of RX queues after device allocation For RPS, we create a kobject for each RX queue based on the number of queues passed to alloc_netdev_mq(). However, drivers generally do not determine the numbers of hardware queues to use until much later, so this usually represents the maximum number the driver may use and not the actual number in use. For TX queues, drivers can update the actual number using netif_set_real_num_tx_queues(). Add a corresponding function for RX queues, netif_set_real_num_rx_queues(). Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- include/linux/netdevice.h | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 83de0eb7a07..b15732e22ee 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -976,8 +976,11 @@ struct net_device { struct netdev_rx_queue *_rx; - /* Number of RX queues allocated at alloc_netdev_mq() time */ + /* Number of RX queues allocated at register_netdev() time */ unsigned int num_rx_queues; + + /* Number of RX queues currently active in device */ + unsigned int real_num_rx_queues; #endif rx_handler_func_t *rx_handler; @@ -1685,6 +1688,17 @@ static inline int netif_is_multiqueue(const struct net_device *dev) extern void netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq); +#ifdef CONFIG_RPS +extern int netif_set_real_num_rx_queues(struct net_device *dev, + unsigned int rxq); +#else +static inline int netif_set_real_num_rx_queues(struct net_device *dev, + unsigned int rxq) +{ + return 0; +} +#endif + /* Use this variant when it is known for sure that it * is executing from hardware interrupt context or with hardware interrupts * disabled. -- cgit v1.2.3-18-g5258 From 3171d026291d08c2a4cfe06302ce308b09605c4b Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Mon, 27 Sep 2010 08:24:49 +0000 Subject: net: Add netif_copy_real_num_queues() for use by virtual net drivers This sets the active numbers of queues on a net device to match another. Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- include/linux/netdevice.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index b15732e22ee..c2bec990bd1 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1699,6 +1699,18 @@ static inline int netif_set_real_num_rx_queues(struct net_device *dev, } #endif +static inline int netif_copy_real_num_queues(struct net_device *to_dev, + const struct net_device *from_dev) +{ + netif_set_real_num_tx_queues(to_dev, from_dev->real_num_tx_queues); +#ifdef CONFIG_RPS + return netif_set_real_num_rx_queues(to_dev, + from_dev->real_num_rx_queues); +#else + return 0; +#endif +} + /* Use this variant when it is known for sure that it * is executing from hardware interrupt context or with hardware interrupts * disabled. -- cgit v1.2.3-18-g5258 From bc01befdcf3e40979eb518085a075cbf0aacede0 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 28 Sep 2010 21:06:34 +0200 Subject: netfilter: ctnetlink: add support for user-space expectation helpers This patch adds the basic infrastructure to support user-space expectation helpers via ctnetlink and the netfilter queuing infrastructure NFQUEUE. Basically, this patch: * adds NF_CT_EXPECT_USERSPACE flag to identify user-space created expectations. I have also added a sanity check in __nf_ct_expect_check() to avoid that kernel-space helpers may create an expectation if the master conntrack has no helper assigned. * adds some branches to check if the master conntrack helper exists, otherwise we skip the code that refers to kernel-space helper such as the local expectation list and the expectation policy. * allows to set the timeout for user-space expectations with no helper assigned. * a list of expectations created from user-space that depends on ctnetlink (if this module is removed, they are deleted). * includes USERSPACE in the /proc output for expectations that have been created by a user-space helper. This patch also modifies ctnetlink to skip including the helper name in the Netlink messages if no kernel-space helper is set (since no user-space expectation has not kernel-space kernel assigned). You can access an example user-space FTP conntrack helper at: http://people.netfilter.org/pablo/userspace-conntrack-helpers/nf-ftp-helper-userspace-POC.tar.bz Signed-off-by: Pablo Neira Ayuso Signed-off-by: Patrick McHardy --- include/linux/netfilter/nf_conntrack_common.h | 1 + include/net/netfilter/nf_conntrack_expect.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h index fdc50cae861..23a1a08578a 100644 --- a/include/linux/netfilter/nf_conntrack_common.h +++ b/include/linux/netfilter/nf_conntrack_common.h @@ -103,6 +103,7 @@ enum ip_conntrack_expect_events { /* expectation flags */ #define NF_CT_EXPECT_PERMANENT 0x1 #define NF_CT_EXPECT_INACTIVE 0x2 +#define NF_CT_EXPECT_USERSPACE 0x4 #ifdef __KERNEL__ struct ip_conntrack_stat { diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h index 96bb42af5fa..416b8384448 100644 --- a/include/net/netfilter/nf_conntrack_expect.h +++ b/include/net/netfilter/nf_conntrack_expect.h @@ -85,6 +85,7 @@ nf_ct_find_expectation(struct net *net, u16 zone, void nf_ct_unlink_expect(struct nf_conntrack_expect *exp); void nf_ct_remove_expectations(struct nf_conn *ct); void nf_ct_unexpect_related(struct nf_conntrack_expect *exp); +void nf_ct_remove_userspace_expectations(void); /* Allocate space for an expectation: this is mandatory before calling nf_ct_expect_related. You will have to call put afterwards. */ -- cgit v1.2.3-18-g5258 From 58f87ed0d45141a90167f34c0959d607160a26df Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Tue, 7 Sep 2010 12:49:45 -0400 Subject: ACPI: Fix typos Signed-off-by: Len Brown --- include/acpi/acpixf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index c0786d446a0..984cdc62e30 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -55,7 +55,7 @@ extern u8 acpi_gbl_permanent_mmap; /* - * Globals that are publically available, allowing for + * Globals that are publicly available, allowing for * run time configuration */ extern u32 acpi_dbg_level; -- cgit v1.2.3-18-g5258 From 4465b469008bc03b98a1b8df4e9ae501b6c69d4b Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Sun, 23 May 2010 19:54:12 +0000 Subject: ipv4: Allow configuring subnets as local addresses This patch allows a host to be configured to respond to any address in a specified range as if it were local, without actually needing to configure the address on an interface. This is done through routing table configuration. For instance, to configure a host to respond to any address in 10.1/16 received on eth0 as a local address we can do: ip rule add from all iif eth0 lookup 200 ip route add local 10.1/16 dev lo proto kernel scope host src 127.0.0.1 table 200 This host is now reachable by any 10.1/16 address (route lookup on input for packets received on eth0 can find the route). On output, the rule will not be matched so that this host can still send packets to 10.1/16 (not sent on loopback). Presumably, external routing can be configured to make sense out of this. To make this work, we needed to modify the logic in finding the interface which is assigned a given source address for output (dev_ip_find). We perform a normal fib_lookup instead of just a lookup on the local table, and in the lookup we ignore the input interface for matching. This patch is useful to implement IP-anycast for subnets of virtual addresses. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/net/flow.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/flow.h b/include/net/flow.h index bb08692a20b..0ac3fb5e097 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -49,6 +49,7 @@ struct flowi { __u8 proto; __u8 flags; #define FLOWI_FLAG_ANYSRC 0x01 +#define FLOWI_FLAG_MATCH_ANY_IIF 0x02 union { struct { __be16 sport; -- cgit v1.2.3-18-g5258 From cdb138080b78146d1cdadba9f5dadbeb97445b91 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Wed, 28 Jul 2010 10:59:06 +0200 Subject: pcmcia: do not use win_req_t when calling pcmcia_request_window() Instead of win_req_t, drivers are now requested to fill out struct pcmcia_device *p_dev->resource[2,3,4,5] for up to four iomem ranges. After a call to pcmcia_request_window(), the windows found there are reserved and may be used until pcmcia_release_window() is called. CC: netdev@vger.kernel.org CC: linux-wireless@vger.kernel.org CC: linux-mtd@lists.infradead.org CC: Jiri Kosina CC: linux-scsi@vger.kernel.org Tested-by: Wolfram Sang Signed-off-by: Dominik Brodowski --- include/pcmcia/cs.h | 24 ------------------------ include/pcmcia/ds.h | 28 +++++++++++++++++++++------- 2 files changed, 21 insertions(+), 31 deletions(-) (limited to 'include') diff --git a/include/pcmcia/cs.h b/include/pcmcia/cs.h index 68d8bde7e8d..63cb9bbe390 100644 --- a/include/pcmcia/cs.h +++ b/include/pcmcia/cs.h @@ -68,28 +68,4 @@ typedef struct config_req_t { #define PRESENT_IOBASE_3 0x100 #define PRESENT_IOSIZE 0x200 -/* For RequestWindow */ -typedef struct win_req_t { - u_int Attributes; - u_long Base; - u_int Size; - u_int AccessSpeed; -} win_req_t; - -/* Attributes for RequestWindow */ -#define WIN_MEMORY_TYPE_CM 0x00 /* default */ -#define WIN_MEMORY_TYPE_AM 0x20 /* MAP_ATTRIB */ -#define WIN_DATA_WIDTH_8 0x00 /* default */ -#define WIN_DATA_WIDTH_16 0x02 /* MAP_16BIT */ -#define WIN_ENABLE 0x01 /* MAP_ACTIVE */ -#define WIN_USE_WAIT 0x40 /* MAP_USE_WAIT */ - -#define WIN_FLAGS_MAP 0x63 /* MAP_ATTRIB | MAP_16BIT | MAP_ACTIVE | - MAP_USE_WAIT */ -#define WIN_FLAGS_REQ 0x1c /* mapping to socket->win[i]: - 0x04 -> 0 - 0x08 -> 1 - 0x0c -> 2 - 0x10 -> 3 */ - #endif /* _LINUX_CS_H */ diff --git a/include/pcmcia/ds.h b/include/pcmcia/ds.h index 70c58ed2278..6f7cb38d885 100644 --- a/include/pcmcia/ds.h +++ b/include/pcmcia/ds.h @@ -36,8 +36,6 @@ struct pcmcia_device; struct config_t; struct net_device; -typedef struct resource *window_handle_t; - /* dynamic device IDs for PCMCIA device drivers. See * Documentation/pcmcia/driver.txt for details. */ @@ -92,7 +90,6 @@ struct pcmcia_device { /* deprecated, will be cleaned up soon */ config_req_t conf; - window_handle_t win; /* device setup */ unsigned int irq; @@ -209,10 +206,10 @@ int __must_check pcmcia_request_irq(struct pcmcia_device *p_dev, int pcmcia_request_configuration(struct pcmcia_device *p_dev, config_req_t *req); -int pcmcia_request_window(struct pcmcia_device *p_dev, win_req_t *req, - window_handle_t *wh); -int pcmcia_release_window(struct pcmcia_device *p_dev, window_handle_t win); -int pcmcia_map_mem_page(struct pcmcia_device *p_dev, window_handle_t win, +int pcmcia_request_window(struct pcmcia_device *p_dev, struct resource *res, + unsigned int speed); +int pcmcia_release_window(struct pcmcia_device *p_dev, struct resource *res); +int pcmcia_map_mem_page(struct pcmcia_device *p_dev, struct resource *res, unsigned int offset); int pcmcia_modify_configuration(struct pcmcia_device *p_dev, modconf_t *mod); @@ -234,6 +231,23 @@ static inline int pcmcia_io_cfg_data_width(unsigned int flags) return IO_DATA_PATH_WIDTH_AUTO; } +/* IO memory */ +#define WIN_MEMORY_TYPE_CM 0x00 /* default */ +#define WIN_MEMORY_TYPE_AM 0x20 /* MAP_ATTRIB */ +#define WIN_DATA_WIDTH_8 0x00 /* default */ +#define WIN_DATA_WIDTH_16 0x02 /* MAP_16BIT */ +#define WIN_ENABLE 0x01 /* MAP_ACTIVE */ +#define WIN_USE_WAIT 0x40 /* MAP_USE_WAIT */ + +#define WIN_FLAGS_MAP 0x63 /* MAP_ATTRIB | MAP_16BIT | MAP_ACTIVE | + MAP_USE_WAIT */ +#define WIN_FLAGS_REQ 0x1c /* mapping to socket->win[i]: + 0x04 -> 0 + 0x08 -> 1 + 0x0c -> 2 + 0x10 -> 3 */ + + #endif /* __KERNEL__ */ #endif /* _LINUX_DS_H */ -- cgit v1.2.3-18-g5258 From fb49fa533f9d211994c33efb752ffa5b30033729 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Thu, 29 Jul 2010 14:06:42 +0200 Subject: pcmcia: split up modify_configuration() into two fixup functions pcmcia_modify_configuration() was only used by two drivers to fix up one issue each: setting the Vpp to a different value, and reducing the IO width to 8 bit. Introduce two explicitly named functions handling these things, and remove one further typedef. CC: netdev@vger.kernel.org CC: linux-mtd@lists.infradead.org Tested-by: Wolfram Sang Signed-off-by: Dominik Brodowski --- include/pcmcia/cs.h | 13 ------------- include/pcmcia/ds.h | 4 +++- 2 files changed, 3 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/pcmcia/cs.h b/include/pcmcia/cs.h index 63cb9bbe390..e13d0cd3f8f 100644 --- a/include/pcmcia/cs.h +++ b/include/pcmcia/cs.h @@ -19,19 +19,6 @@ #include #endif -/* ModifyConfiguration */ -typedef struct modconf_t { - u_int Attributes; - u_int Vcc, Vpp1, Vpp2; -} modconf_t; - -/* Attributes for ModifyConfiguration */ -#define CONF_IRQ_CHANGE_VALID 0x0100 -#define CONF_VCC_CHANGE_VALID 0x0200 -#define CONF_VPP1_CHANGE_VALID 0x0400 -#define CONF_VPP2_CHANGE_VALID 0x0800 -#define CONF_IO_CHANGE_WIDTH 0x1000 - /* For RequestConfiguration */ typedef struct config_req_t { u_int Attributes; diff --git a/include/pcmcia/ds.h b/include/pcmcia/ds.h index 6f7cb38d885..8e307b93f47 100644 --- a/include/pcmcia/ds.h +++ b/include/pcmcia/ds.h @@ -212,7 +212,9 @@ int pcmcia_release_window(struct pcmcia_device *p_dev, struct resource *res); int pcmcia_map_mem_page(struct pcmcia_device *p_dev, struct resource *res, unsigned int offset); -int pcmcia_modify_configuration(struct pcmcia_device *p_dev, modconf_t *mod); +int pcmcia_fixup_vpp(struct pcmcia_device *p_dev, unsigned char new_vpp); +int pcmcia_fixup_iowidth(struct pcmcia_device *p_dev); + void pcmcia_disable_device(struct pcmcia_device *p_dev); /* IO ports */ -- cgit v1.2.3-18-g5258 From e8405f0f617856de0ceb7d04e65b663051451544 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Thu, 29 Jul 2010 15:50:55 +0200 Subject: pcmcia: move Vpp setup to struct pcmcia_device Some drivers prefer to explicitly set Vpp. Instead of passing the voltage inside config_req_t, store it in struct pcmcia_device. CC: linux-ide@vger.kernel.org CC: netdev@vger.kernel.org CC: linux-mtd@lists.infradead.org CC: linux-wireless@vger.kernel.org CC: linux-serial@vger.kernel.org CC: linux-usb@vger.kernel.org CC: linux-scsi@vger.kernel.org Acked-by: Gustavo F. Padovan (for drivers/bluetooth) Tested-by: Wolfram Sang Signed-off-by: Dominik Brodowski --- include/pcmcia/cs.h | 1 - include/pcmcia/ds.h | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/pcmcia/cs.h b/include/pcmcia/cs.h index e13d0cd3f8f..ccb8e6e0dd6 100644 --- a/include/pcmcia/cs.h +++ b/include/pcmcia/cs.h @@ -22,7 +22,6 @@ /* For RequestConfiguration */ typedef struct config_req_t { u_int Attributes; - u_int Vpp; /* both Vpp1 and Vpp2 */ u_int IntType; u_int ConfigBase; u_char Status, Pin, Copy, ExtStatus; diff --git a/include/pcmcia/ds.h b/include/pcmcia/ds.h index 8e307b93f47..6137fbc34ab 100644 --- a/include/pcmcia/ds.h +++ b/include/pcmcia/ds.h @@ -94,6 +94,7 @@ struct pcmcia_device { /* device setup */ unsigned int irq; struct resource *resource[PCMCIA_NUM_RESOURCES]; + unsigned int vpp; unsigned int io_lines; /* number of I/O lines */ -- cgit v1.2.3-18-g5258 From 1a4a046030ade0f57b8f3b476d61c7c35d894b66 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Thu, 29 Jul 2010 15:54:54 +0200 Subject: pcmcia: remove Pin, Copy configuration register access The "Pin" and "Copy" configuration registers (CISREG_SCR, CISREG_PPR) do not seem to be utilized anywhere. If a device would request a write to these registers, "0" would be written. Continue to do so, but warn of unexpected behavior -- and remove the "Pin" and "Copy" entries from config_req_t. Tested-by: Wolfram Sang Signed-off-by: Dominik Brodowski --- include/pcmcia/cs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/pcmcia/cs.h b/include/pcmcia/cs.h index ccb8e6e0dd6..e656abee174 100644 --- a/include/pcmcia/cs.h +++ b/include/pcmcia/cs.h @@ -24,7 +24,7 @@ typedef struct config_req_t { u_int Attributes; u_int IntType; u_int ConfigBase; - u_char Status, Pin, Copy, ExtStatus; + u_char Status, ExtStatus; u_char ConfigIndex; u_int Present; } config_req_t; -- cgit v1.2.3-18-g5258 From fc301101034c06bf56a7f71bf682c48909e401a4 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Thu, 29 Jul 2010 16:19:39 +0200 Subject: pcmcia: simplify Status, ExtStatus register access The Status (CISREG_CCSR) and ExtStatus (CISREG_ESR) registers were only accessed to enable audio output for some drivers and IRQ for serial_cs.c. The former also required setting config_req_t.Attributes to CONF_ENABLE_SPKR; the latter can be simplified to setting this field to CONF_ENABLE_ESR. CC: netdev@vger.kernel.org CC: linux-wireless@vger.kernel.org CC: linux-serial@vger.kernel.org CC: linux-scsi@vger.kernel.org Tested-by: Wolfram Sang Signed-off-by: Dominik Brodowski --- include/pcmcia/cs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/pcmcia/cs.h b/include/pcmcia/cs.h index e656abee174..29d693f72ba 100644 --- a/include/pcmcia/cs.h +++ b/include/pcmcia/cs.h @@ -24,7 +24,6 @@ typedef struct config_req_t { u_int Attributes; u_int IntType; u_int ConfigBase; - u_char Status, ExtStatus; u_char ConfigIndex; u_int Present; } config_req_t; @@ -34,6 +33,7 @@ typedef struct config_req_t { #define CONF_ENABLE_DMA 0x02 #define CONF_ENABLE_SPKR 0x04 #define CONF_ENABLE_PULSE_IRQ 0x08 +#define CONF_ENABLE_ESR 0x10 #define CONF_VALID_CLIENT 0x100 /* IntType field */ -- cgit v1.2.3-18-g5258 From 37979e1546a790c44adbc7f27a85569944480ebc Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Thu, 29 Jul 2010 16:33:23 +0200 Subject: pcmcia: simplify IntType IntType was only set to INT_MEMORY (driver pcmciamtd) or INT_MEMORY_AND_IO (all other drivers). As this flags seems to relate to ioport access, make it conditional to the driver having requested IO port access. There are two drivers which do not request IO ports, but did set INT_MEMORY_AND_IO: ray_cs and b43. For those, we consistently only set INT_MEMORY in future. CC: netdev@vger.kernel.org CC: linux-wireless@vger.kernel.org CC: linux-ide@vger.kernel.org CC: linux-usb@vger.kernel.org CC: laforge@gnumonks.org CC: linux-mtd@lists.infradead.org CC: alsa-devel@alsa-project.org CC: linux-serial@vger.kernel.org CC: Jiri Kosina CC: linux-scsi@vger.kernel.org Acked-by: Gustavo F. Padovan (for drivers/bluetooth) Tested-by: Wolfram Sang Signed-off-by: Dominik Brodowski --- include/pcmcia/cs.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include') diff --git a/include/pcmcia/cs.h b/include/pcmcia/cs.h index 29d693f72ba..674edbc9ebe 100644 --- a/include/pcmcia/cs.h +++ b/include/pcmcia/cs.h @@ -22,7 +22,6 @@ /* For RequestConfiguration */ typedef struct config_req_t { u_int Attributes; - u_int IntType; u_int ConfigBase; u_char ConfigIndex; u_int Present; @@ -36,12 +35,6 @@ typedef struct config_req_t { #define CONF_ENABLE_ESR 0x10 #define CONF_VALID_CLIENT 0x100 -/* IntType field */ -#define INT_MEMORY 0x01 -#define INT_MEMORY_AND_IO 0x02 -#define INT_CARDBUS 0x04 -#define INT_ZOOMED_VIDEO 0x08 - /* Configuration registers present */ #define PRESENT_OPTION 0x001 #define PRESENT_STATUS 0x002 -- cgit v1.2.3-18-g5258 From 7feabb6412ea23edd298c0fa90e5aa6733eb4a42 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Thu, 29 Jul 2010 18:35:47 +0200 Subject: pcmcia: move config_{base,index,regs} to struct pcmcia_device Several drivers prefer to explicitly set config_{base,index,regs}, formerly known as ConfigBase, ConfigIndex and Present. Instead of passing these values inside config_req_t, store it in struct pcmcia_device. CC: netdev@vger.kernel.org CC: linux-wireless@vger.kernel.org CC: linux-ide@vger.kernel.org CC: linux-usb@vger.kernel.org CC: laforge@gnumonks.org CC: linux-mtd@lists.infradead.org CC: alsa-devel@alsa-project.org CC: linux-serial@vger.kernel.org CC: Jiri Kosina CC: linux-scsi@vger.kernel.org Acked-by: Gustavo F. Padovan (for drivers/bluetooth) Tested-by: Wolfram Sang Signed-off-by: Dominik Brodowski --- include/pcmcia/cs.h | 15 --------------- include/pcmcia/ds.h | 14 ++++++++++++++ 2 files changed, 14 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/pcmcia/cs.h b/include/pcmcia/cs.h index 674edbc9ebe..47b6092c4ed 100644 --- a/include/pcmcia/cs.h +++ b/include/pcmcia/cs.h @@ -22,9 +22,6 @@ /* For RequestConfiguration */ typedef struct config_req_t { u_int Attributes; - u_int ConfigBase; - u_char ConfigIndex; - u_int Present; } config_req_t; /* Attributes for RequestConfiguration */ @@ -35,16 +32,4 @@ typedef struct config_req_t { #define CONF_ENABLE_ESR 0x10 #define CONF_VALID_CLIENT 0x100 -/* Configuration registers present */ -#define PRESENT_OPTION 0x001 -#define PRESENT_STATUS 0x002 -#define PRESENT_PIN_REPLACE 0x004 -#define PRESENT_COPY 0x008 -#define PRESENT_EXT_STATUS 0x010 -#define PRESENT_IOBASE_0 0x020 -#define PRESENT_IOBASE_1 0x040 -#define PRESENT_IOBASE_2 0x080 -#define PRESENT_IOBASE_3 0x100 -#define PRESENT_IOSIZE 0x200 - #endif /* _LINUX_CS_H */ diff --git a/include/pcmcia/ds.h b/include/pcmcia/ds.h index 6137fbc34ab..bc28f96d0b5 100644 --- a/include/pcmcia/ds.h +++ b/include/pcmcia/ds.h @@ -97,6 +97,9 @@ struct pcmcia_device { unsigned int vpp; unsigned int io_lines; /* number of I/O lines */ + unsigned int config_base; + unsigned int config_index; + unsigned int config_regs; /* PRESENT_ flags below */ /* Is the device suspended? */ u16 suspended:1; @@ -250,6 +253,17 @@ static inline int pcmcia_io_cfg_data_width(unsigned int flags) 0x0c -> 2 0x10 -> 3 */ +/* config_reg{ister}s present for this PCMCIA device */ +#define PRESENT_OPTION 0x001 +#define PRESENT_STATUS 0x002 +#define PRESENT_PIN_REPLACE 0x004 +#define PRESENT_COPY 0x008 +#define PRESENT_EXT_STATUS 0x010 +#define PRESENT_IOBASE_0 0x020 +#define PRESENT_IOBASE_1 0x040 +#define PRESENT_IOBASE_2 0x080 +#define PRESENT_IOBASE_3 0x100 +#define PRESENT_IOSIZE 0x200 #endif /* __KERNEL__ */ -- cgit v1.2.3-18-g5258 From 1ac71e5a35eebee60cdcf15b3980bd94498f037b Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Thu, 29 Jul 2010 19:27:09 +0200 Subject: pcmcia: convert pcmcia_request_configuration to pcmcia_enable_device pcmcia_enable_device() now replaces pcmcia_request_configuration(). Instead of config_req_t, all necessary flags are either passed as a parameter to pcmcia_enable_device(), or (in rare circumstances) set in struct pcmcia_device -> flags. With the last remaining user of include/pcmcia/cs.h gone, remove all references. CC: netdev@vger.kernel.org CC: linux-wireless@vger.kernel.org CC: linux-ide@vger.kernel.org CC: linux-usb@vger.kernel.org CC: laforge@gnumonks.org CC: linux-mtd@lists.infradead.org CC: alsa-devel@alsa-project.org CC: linux-serial@vger.kernel.org CC: Jiri Kosina CC: linux-scsi@vger.kernel.org Acked-by: Gustavo F. Padovan (for drivers/bluetooth) Tested-by: Wolfram Sang Signed-off-by: Dominik Brodowski --- include/pcmcia/cs.h | 35 ----------------------------------- include/pcmcia/ds.h | 17 +++++++++++------ include/pcmcia/ss.h | 1 - 3 files changed, 11 insertions(+), 42 deletions(-) delete mode 100644 include/pcmcia/cs.h (limited to 'include') diff --git a/include/pcmcia/cs.h b/include/pcmcia/cs.h deleted file mode 100644 index 47b6092c4ed..00000000000 --- a/include/pcmcia/cs.h +++ /dev/null @@ -1,35 +0,0 @@ -/* - * cs.h - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * The initial developer of the original code is David A. Hinds - * . Portions created by David A. Hinds - * are Copyright (C) 1999 David A. Hinds. All Rights Reserved. - * - * (C) 1999 David A. Hinds - */ - -#ifndef _LINUX_CS_H -#define _LINUX_CS_H - -#ifdef __KERNEL__ -#include -#endif - -/* For RequestConfiguration */ -typedef struct config_req_t { - u_int Attributes; -} config_req_t; - -/* Attributes for RequestConfiguration */ -#define CONF_ENABLE_IRQ 0x01 -#define CONF_ENABLE_DMA 0x02 -#define CONF_ENABLE_SPKR 0x04 -#define CONF_ENABLE_PULSE_IRQ 0x08 -#define CONF_ENABLE_ESR 0x10 -#define CONF_VALID_CLIENT 0x100 - -#endif /* _LINUX_CS_H */ diff --git a/include/pcmcia/ds.h b/include/pcmcia/ds.h index bc28f96d0b5..50b03fd67fd 100644 --- a/include/pcmcia/ds.h +++ b/include/pcmcia/ds.h @@ -24,9 +24,11 @@ #ifdef __KERNEL__ #include +#include #include #include + /* * PCMCIA device drivers (16-bit cards only; 32-bit cards require CardBus * a.k.a. PCI drivers @@ -88,18 +90,16 @@ struct pcmcia_device { struct list_head socket_device_list; - /* deprecated, will be cleaned up soon */ - config_req_t conf; - /* device setup */ unsigned int irq; struct resource *resource[PCMCIA_NUM_RESOURCES]; unsigned int vpp; - unsigned int io_lines; /* number of I/O lines */ + unsigned int config_flags; /* CONF_ENABLE_ flags below */ unsigned int config_base; unsigned int config_index; unsigned int config_regs; /* PRESENT_ flags below */ + unsigned int io_lines; /* number of I/O lines */ /* Is the device suspended? */ u16 suspended:1; @@ -207,8 +207,7 @@ pcmcia_request_exclusive_irq(struct pcmcia_device *p_dev, int __must_check pcmcia_request_irq(struct pcmcia_device *p_dev, irq_handler_t handler); -int pcmcia_request_configuration(struct pcmcia_device *p_dev, - config_req_t *req); +int pcmcia_enable_device(struct pcmcia_device *p_dev); int pcmcia_request_window(struct pcmcia_device *p_dev, struct resource *res, unsigned int speed); @@ -265,6 +264,12 @@ static inline int pcmcia_io_cfg_data_width(unsigned int flags) #define PRESENT_IOBASE_3 0x100 #define PRESENT_IOSIZE 0x200 +/* flags to be passed to pcmcia_enable_device() */ +#define CONF_ENABLE_IRQ 0x01 +#define CONF_ENABLE_SPKR 0x02 +#define CONF_ENABLE_PULSE_IRQ 0x04 +#define CONF_ENABLE_ESR 0x08 + #endif /* __KERNEL__ */ #endif /* _LINUX_DS_H */ diff --git a/include/pcmcia/ss.h b/include/pcmcia/ss.h index 626b63c33d9..731cde010f4 100644 --- a/include/pcmcia/ss.h +++ b/include/pcmcia/ss.h @@ -19,7 +19,6 @@ #include /* task_struct, completion */ #include -#include #ifdef CONFIG_CARDBUS #include #endif -- cgit v1.2.3-18-g5258 From 440eed43e2a95bb842488755683716814da10f2b Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Fri, 30 Jul 2010 09:51:52 +0200 Subject: pcmcia: introduce autoconfiguration feature Introduce an autoconfiguration feature to set certain values in pcmcia_loop_config(), instead of copying the same code over and over in each PCMCIA driver. At first, introduce the following options: CONF_AUTO_CHECK_VCC check or matching Vcc entry CONF_AUTO_SET_VPP set Vpp CONF_AUTO_AUDIO enable the speaker line CC: netdev@vger.kernel.org CC: linux-wireless@vger.kernel.org CC: linux-ide@vger.kernel.org CC: linux-usb@vger.kernel.org CC: laforge@gnumonks.org CC: linux-mtd@lists.infradead.org CC: alsa-devel@alsa-project.org CC: linux-serial@vger.kernel.org CC: Jiri Kosina CC: linux-scsi@vger.kernel.org Acked-by: Gustavo F. Padovan (for drivers/bluetooth) Tested-by: Wolfram Sang Signed-off-by: Dominik Brodowski --- include/pcmcia/ds.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/pcmcia/ds.h b/include/pcmcia/ds.h index 50b03fd67fd..0577e5f1030 100644 --- a/include/pcmcia/ds.h +++ b/include/pcmcia/ds.h @@ -177,7 +177,6 @@ int pcmcia_loop_config(struct pcmcia_device *p_dev, int (*conf_check) (struct pcmcia_device *p_dev, cistpl_cftable_entry_t *cf, cistpl_cftable_entry_t *dflt, - unsigned int vcc, void *priv_data), void *priv_data); @@ -270,6 +269,12 @@ static inline int pcmcia_io_cfg_data_width(unsigned int flags) #define CONF_ENABLE_PULSE_IRQ 0x04 #define CONF_ENABLE_ESR 0x08 +/* flags used by pcmcia_loop_config() autoconfiguration */ +#define CONF_AUTO_CHECK_VCC 0x10 /* check for matching Vcc? */ +#define CONF_AUTO_SET_VPP 0x20 /* set Vpp? */ +#define CONF_AUTO_AUDIO 0x40 /* enable audio line? */ + + #endif /* __KERNEL__ */ #endif /* _LINUX_DS_H */ -- cgit v1.2.3-18-g5258 From 00990e7ce0b0e596fe41d9c64d6933ea70084003 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Fri, 30 Jul 2010 13:13:46 +0200 Subject: pcmcia: use autoconfiguration feature for ioports and iomem When CONF_AUTO_SET_IO or CONF_AUTO_SET_IOMEM are set, the corresponding fields in struct pcmcia_device *p_dev->resource[0,1,2] are set accordinly. Drivers wishing to override certain settings may do so in the callback function, but they no longer need to parse the CIS entries stored in cistpl_cftable_entry_t themselves. CC: netdev@vger.kernel.org CC: linux-wireless@vger.kernel.org CC: linux-ide@vger.kernel.org CC: linux-usb@vger.kernel.org CC: laforge@gnumonks.org CC: linux-mtd@lists.infradead.org CC: linux-bluetooth@vger.kernel.org CC: alsa-devel@alsa-project.org CC: linux-serial@vger.kernel.org CC: Jiri Kosina CC: linux-scsi@vger.kernel.org Tested-by: Wolfram Sang Signed-off-by: Dominik Brodowski --- include/pcmcia/ds.h | 30 ++++++++++-------------------- 1 file changed, 10 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/include/pcmcia/ds.h b/include/pcmcia/ds.h index 0577e5f1030..0b8c8d45df4 100644 --- a/include/pcmcia/ds.h +++ b/include/pcmcia/ds.h @@ -93,6 +93,7 @@ struct pcmcia_device { /* device setup */ unsigned int irq; struct resource *resource[PCMCIA_NUM_RESOURCES]; + resource_size_t card_addr; /* for the 1st IOMEM resource */ unsigned int vpp; unsigned int config_flags; /* CONF_ENABLE_ flags below */ @@ -175,8 +176,6 @@ int pcmcia_parse_tuple(tuple_t *tuple, cisparse_t *parse); /* loop CIS entries for valid configuration */ int pcmcia_loop_config(struct pcmcia_device *p_dev, int (*conf_check) (struct pcmcia_device *p_dev, - cistpl_cftable_entry_t *cf, - cistpl_cftable_entry_t *dflt, void *priv_data), void *priv_data); @@ -225,16 +224,6 @@ void pcmcia_disable_device(struct pcmcia_device *p_dev); #define IO_DATA_PATH_WIDTH_16 0x08 #define IO_DATA_PATH_WIDTH_AUTO 0x10 -/* convert flag found in cfgtable to data path width parameter */ -static inline int pcmcia_io_cfg_data_width(unsigned int flags) -{ - if (!(flags & CISTPL_IO_8BIT)) - return IO_DATA_PATH_WIDTH_16; - if (!(flags & CISTPL_IO_16BIT)) - return IO_DATA_PATH_WIDTH_8; - return IO_DATA_PATH_WIDTH_AUTO; -} - /* IO memory */ #define WIN_MEMORY_TYPE_CM 0x00 /* default */ #define WIN_MEMORY_TYPE_AM 0x20 /* MAP_ATTRIB */ @@ -264,16 +253,17 @@ static inline int pcmcia_io_cfg_data_width(unsigned int flags) #define PRESENT_IOSIZE 0x200 /* flags to be passed to pcmcia_enable_device() */ -#define CONF_ENABLE_IRQ 0x01 -#define CONF_ENABLE_SPKR 0x02 -#define CONF_ENABLE_PULSE_IRQ 0x04 -#define CONF_ENABLE_ESR 0x08 +#define CONF_ENABLE_IRQ 0x0001 +#define CONF_ENABLE_SPKR 0x0002 +#define CONF_ENABLE_PULSE_IRQ 0x0004 +#define CONF_ENABLE_ESR 0x0008 /* flags used by pcmcia_loop_config() autoconfiguration */ -#define CONF_AUTO_CHECK_VCC 0x10 /* check for matching Vcc? */ -#define CONF_AUTO_SET_VPP 0x20 /* set Vpp? */ -#define CONF_AUTO_AUDIO 0x40 /* enable audio line? */ - +#define CONF_AUTO_CHECK_VCC 0x0100 /* check for matching Vcc? */ +#define CONF_AUTO_SET_VPP 0x0200 /* set Vpp? */ +#define CONF_AUTO_AUDIO 0x0400 /* enable audio line? */ +#define CONF_AUTO_SET_IO 0x0800 /* set ->resource[0,1] */ +#define CONF_AUTO_SET_IOMEM 0x1000 /* set ->resource[2] */ #endif /* __KERNEL__ */ -- cgit v1.2.3-18-g5258 From 2e9b981a7c63ee8278df6823f8389d69dad1a499 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 8 Aug 2010 11:36:26 +0200 Subject: pcmcia: move driver name to struct pcmcia_driver Tested-by: Wolfram Sang Signed-off-by: Dominik Brodowski --- include/pcmcia/ds.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/pcmcia/ds.h b/include/pcmcia/ds.h index 0b8c8d45df4..d830c87ff0a 100644 --- a/include/pcmcia/ds.h +++ b/include/pcmcia/ds.h @@ -47,6 +47,8 @@ struct pcmcia_dynids { }; struct pcmcia_driver { + const char *name; + int (*probe) (struct pcmcia_device *dev); void (*remove) (struct pcmcia_device *dev); -- cgit v1.2.3-18-g5258 From 6d81f41c58c69ddde497e9e640ba5805aa26e78c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 27 Sep 2010 20:50:33 +0000 Subject: dummy: percpu stats and lockless xmit Converts dummy network device driver to : - percpu stats - 64bit stats - lockless xmit (NETIF_F_LLTX) - performance features added (NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_TSO | NETIF_F_NO_CSUM | NETIF_F_HIGHDMA) Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index c2bec990bd1..6f0845e0b88 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1057,6 +1057,7 @@ struct net_device { void *ml_priv; struct pcpu_lstats __percpu *lstats; /* loopback stats */ struct pcpu_tstats __percpu *tstats; /* tunnel stats */ + struct pcpu_dstats __percpu *dstats; /* dummy stats */ }; /* GARP */ struct garp_port *garp_port; -- cgit v1.2.3-18-g5258 From bfa5ae63b823f4ffd3483a05f60a93a4a7b7d680 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 28 Sep 2010 05:58:37 +0000 Subject: net: rename netdev rx_queue to ingress_queue There is some confusion with rx_queue name after RPS, and net drivers private rx_queue fields. I suggest to rename "struct net_device"->rx_queue to ingress_queue. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 6f0845e0b88..ceed3474014 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -986,7 +986,7 @@ struct net_device { rx_handler_func_t *rx_handler; void *rx_handler_data; - struct netdev_queue rx_queue; /* use two cache lines */ + struct netdev_queue ingress_queue; /* use two cache lines */ /* * Cache lines mostly used on transmit path -- cgit v1.2.3-18-g5258 From a64de47c091e4a337fa9763315cb6f2fbf0c583b Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Tue, 28 Sep 2010 17:08:02 +0000 Subject: arp: remove unnecessary export of arp_broken_ops arp_broken_ops is only used in arp.c Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/net/arp.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/net/arp.h b/include/net/arp.h index 716f43c5c98..f4cf6ce6658 100644 --- a/include/net/arp.h +++ b/include/net/arp.h @@ -26,6 +26,4 @@ extern struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip, const unsigned char *target_hw); extern void arp_xmit(struct sk_buff *skb); -extern const struct neigh_ops arp_broken_ops; - #endif /* _ARP_H */ -- cgit v1.2.3-18-g5258 From 1b9f409293529da4630bfc5d6d8e7d7451a6ccb5 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Tue, 28 Sep 2010 19:30:14 +0000 Subject: tcp: tcp_enter_quickack_mode can be static Function only used in tcp_input.c Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/net/tcp.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index 914a60c7ad6..4fee0424af7 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -346,8 +346,6 @@ static inline void tcp_dec_quickack_mode(struct sock *sk, } } -extern void tcp_enter_quickack_mode(struct sock *sk); - #define TCP_ECN_OK 1 #define TCP_ECN_QUEUE_CWR 2 #define TCP_ECN_DEMAND_CWR 4 -- cgit v1.2.3-18-g5258 From 65836112fc24bdf009554481b36b6ba0a690b855 Mon Sep 17 00:00:00 2001 From: Ohad Ben-Cohen Date: Tue, 28 Sep 2010 20:20:28 +0200 Subject: wl12xx: fix non-wl12xx build scenarios Support building wl1271-equipped boards without building the wl1271 driver itself, e.g.: CONFIG_MACH_OMAP_ZOOM3=y CONFIG_WL12XX is not set Reported-by: John W. Linville Signed-off-by: Ohad Ben-Cohen Signed-off-by: Luciano Coelho --- include/linux/wl12xx.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include') diff --git a/include/linux/wl12xx.h b/include/linux/wl12xx.h index 95deae3968f..4f902e1908a 100644 --- a/include/linux/wl12xx.h +++ b/include/linux/wl12xx.h @@ -32,7 +32,20 @@ struct wl12xx_platform_data { int board_ref_clock; }; +#ifdef CONFIG_WL12XX_PLATFORM_DATA + int wl12xx_set_platform_data(const struct wl12xx_platform_data *data); + +#else + +static inline +int wl12xx_set_platform_data(const struct wl12xx_platform_data *data) +{ + return -ENOSYS; +} + +#endif + const struct wl12xx_platform_data *wl12xx_get_platform_data(void); #endif -- cgit v1.2.3-18-g5258 From e454c844644683571617896ab2a4ce0109c1943e Mon Sep 17 00:00:00 2001 From: "Gustavo F. Padovan" Date: Tue, 21 Sep 2010 16:31:11 -0300 Subject: Bluetooth: Fix deadlock in the ERTM logic The Enhanced Retransmission Mode(ERTM) is a realiable mode of operation of the Bluetooth L2CAP layer. Think on it like a simplified version of TCP. The problem we were facing here was a deadlock. ERTM uses a backlog queue to queue incomimg packets while the user is helding the lock. At some moment the sk_sndbuf can be exceeded and we can't alloc new skbs then the code sleep with the lock to wait for memory, that stalls the ERTM connection once we can't read the acknowledgements packets in the backlog queue to free memory and make the allocation of outcoming skb successful. This patch actually affect all users of bt_skb_send_alloc(), i.e., all L2CAP modes and SCO. We are safe against socket states changes or channels deletion while the we are sleeping wait memory. Checking for the sk->sk_err and sk->sk_shutdown make the code safe, since any action that can leave the socket or the channel in a not usable state set one of the struct members at least. Then we can check both of them when getting the lock again and return with the proper error if something unexpected happens. Signed-off-by: Gustavo F. Padovan Signed-off-by: Ulisses Furquim --- include/net/bluetooth/bluetooth.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include') diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index 27a902d9b3a..30fce0128dd 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -161,12 +161,30 @@ static inline struct sk_buff *bt_skb_send_alloc(struct sock *sk, unsigned long l { struct sk_buff *skb; + release_sock(sk); if ((skb = sock_alloc_send_skb(sk, len + BT_SKB_RESERVE, nb, err))) { skb_reserve(skb, BT_SKB_RESERVE); bt_cb(skb)->incoming = 0; } + lock_sock(sk); + + if (!skb && *err) + return NULL; + + *err = sock_error(sk); + if (*err) + goto out; + + if (sk->sk_shutdown) { + *err = -ECONNRESET; + goto out; + } return skb; + +out: + kfree_skb(skb); + return NULL; } int bt_err(__u16 code); -- cgit v1.2.3-18-g5258 From 29d08b3efddca628b0360411ab2b85f7b1723f48 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 27 Sep 2010 16:17:17 +1000 Subject: drm/gem: handlecount isn't really a kref so don't make it one. There were lots of places being inconsistent since handle count looked like a kref but it really wasn't. Fix this my just making handle count an atomic on the object, and have it increase the normal object kref. Now i915/radeon/nouveau drivers can drop the normal reference on userspace object creation, and have the handle hold it. This patch fixes a memory leak or corruption on unload, because the driver had no way of knowing if a handle had been actually added for this object, and the fbcon object needed to know this to clean itself up properly. Reviewed-by: Chris Wilson Signed-off-by: Dave Airlie --- include/drm/drmP.h | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/drm/drmP.h b/include/drm/drmP.h index 774e1d49509..07e4726a4ee 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -612,7 +612,7 @@ struct drm_gem_object { struct kref refcount; /** Handle count of this object. Each handle also holds a reference */ - struct kref handlecount; + atomic_t handle_count; /* number of handles on this object */ /** Related drm device */ struct drm_device *dev; @@ -1461,7 +1461,7 @@ struct drm_gem_object *drm_gem_object_alloc(struct drm_device *dev, size_t size); int drm_gem_object_init(struct drm_device *dev, struct drm_gem_object *obj, size_t size); -void drm_gem_object_handle_free(struct kref *kref); +void drm_gem_object_handle_free(struct drm_gem_object *obj); void drm_gem_vm_open(struct vm_area_struct *vma); void drm_gem_vm_close(struct vm_area_struct *vma); int drm_gem_mmap(struct file *filp, struct vm_area_struct *vma); @@ -1496,7 +1496,7 @@ static inline void drm_gem_object_handle_reference(struct drm_gem_object *obj) { drm_gem_object_reference(obj); - kref_get(&obj->handlecount); + atomic_inc(&obj->handle_count); } static inline void @@ -1505,12 +1505,15 @@ drm_gem_object_handle_unreference(struct drm_gem_object *obj) if (obj == NULL) return; + if (atomic_read(&obj->handle_count) == 0) + return; /* * Must bump handle count first as this may be the last * ref, in which case the object would disappear before we * checked for a name */ - kref_put(&obj->handlecount, drm_gem_object_handle_free); + if (atomic_dec_and_test(&obj->handle_count)) + drm_gem_object_handle_free(obj); drm_gem_object_unreference(obj); } @@ -1520,12 +1523,17 @@ drm_gem_object_handle_unreference_unlocked(struct drm_gem_object *obj) if (obj == NULL) return; + if (atomic_read(&obj->handle_count) == 0) + return; + /* * Must bump handle count first as this may be the last * ref, in which case the object would disappear before we * checked for a name */ - kref_put(&obj->handlecount, drm_gem_object_handle_free); + + if (atomic_dec_and_test(&obj->handle_count)) + drm_gem_object_handle_free(obj); drm_gem_object_unreference_unlocked(obj); } -- cgit v1.2.3-18-g5258 From 6110a1f43c27b516e16d5ce8860fca50748c2a87 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 30 Sep 2010 21:19:07 -0400 Subject: intel_idle: Voluntary leave_mm before entering deeper Avoid TLB flush IPIs for the cores in deeper c-states by voluntary leave_mm() before entering into that state. CPUs tend to flush TLB in those c-states anyways. acpi_idle does this with C3-type states, but it was not caried over when intel_idle was introduced. intel_idle can apply it to C-states in addition to those that ACPI might export as C3... Signed-off-by: Suresh Siddha Signed-off-by: Len Brown --- include/linux/cpuidle.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index 36ca9721a0c..1be416bbbb8 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -53,6 +53,7 @@ struct cpuidle_state { #define CPUIDLE_FLAG_BALANCED (0x40) /* medium latency, moderate savings */ #define CPUIDLE_FLAG_DEEP (0x80) /* high latency, large savings */ #define CPUIDLE_FLAG_IGNORE (0x100) /* ignore during this idle period */ +#define CPUIDLE_FLAG_TLB_FLUSHED (0x200) /* tlb will be flushed */ #define CPUIDLE_DRIVER_FLAGS_MASK (0xFFFF0000) -- cgit v1.2.3-18-g5258 From 82efee1499a27c06f5afb11b07db384fdb3f7004 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 30 Sep 2010 03:31:56 +0000 Subject: ipv4: introduce __ip_dev_find() ip_dev_find(net, addr) finds a device given an IPv4 source address and takes a reference on it. Introduce __ip_dev_find(), taking a third argument, to optionally take the device reference. Callers not asking the reference to be taken should be in an rcu_read_lock() protected section. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/inetdevice.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index 1ec09bb4a3a..ccd5b07d678 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -159,7 +159,12 @@ struct in_ifaddr { extern int register_inetaddr_notifier(struct notifier_block *nb); extern int unregister_inetaddr_notifier(struct notifier_block *nb); -extern struct net_device *ip_dev_find(struct net *net, __be32 addr); +extern struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref); +static inline struct net_device *ip_dev_find(struct net *net, __be32 addr) +{ + return __ip_dev_find(net, addr, true); +} + extern int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b); extern int devinet_ioctl(struct net *net, unsigned int cmd, void __user *); extern void devinet_init(void); -- cgit v1.2.3-18-g5258 From 367e5e376922dcf52f92e1db436010fb828d3bfa Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 30 Sep 2010 05:36:29 +0000 Subject: neigh: reorder fields in struct neighbour On 64bit arches, there are two 32bit holes that we can remove. sizeof(struct neighbour) shrinks from 0xf8 to 0xf0 bytes Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/neighbour.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 242879b6c4d..7d08fd1062f 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -94,7 +94,7 @@ struct neighbour { struct neighbour *next; struct neigh_table *tbl; struct neigh_parms *parms; - struct net_device *dev; + struct net_device *dev; unsigned long used; unsigned long confirmed; unsigned long updated; @@ -102,11 +102,11 @@ struct neighbour { __u8 nud_state; __u8 type; __u8 dead; + atomic_t refcnt; atomic_t probes; rwlock_t lock; unsigned char ha[ALIGN(MAX_ADDR_LEN, sizeof(unsigned long))]; struct hh_cache *hh; - atomic_t refcnt; int (*output)(struct sk_buff *skb); struct sk_buff_head arp_queue; struct timer_list timer; @@ -163,7 +163,7 @@ struct neigh_table { atomic_t entries; rwlock_t lock; unsigned long last_rand; - struct kmem_cache *kmem_cachep; + struct kmem_cache *kmem_cachep; struct neigh_statistics __percpu *stats; struct neighbour **hash_buckets; unsigned int hash_mask; -- cgit v1.2.3-18-g5258 From 39b4d07aa3583ceefe73622841303a0a3e942ca1 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 30 Sep 2010 09:10:26 +0100 Subject: drm: Hold the mutex when dropping the last GEM reference (v2) In order to be fully threadsafe we need to check that the drm_gem_object refcount is still 0 after acquiring the mutex in order to call the free function. Otherwise, we may encounter scenarios like: Thread A: Thread B: drm_gem_close unreference_unlocked kref_put mutex_lock ... i915_gem_evict ... kref_get -> BUG ... i915_gem_unbind ... kref_put ... i915_gem_object_free ... mutex_unlock mutex_lock i915_gem_object_free -> BUG i915_gem_object_unbind kfree mutex_unlock Note that no driver is currently using the free_unlocked vfunc and it is scheduled for removal, hasten that process. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=30454 Reported-and-Tested-by: Magnus Kessler Signed-off-by: Chris Wilson Cc: stable@kernel.org Signed-off-by: Dave Airlie --- include/drm/drmP.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/drm/drmP.h b/include/drm/drmP.h index 07e4726a4ee..4c9461a4f9e 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -808,7 +808,6 @@ struct drm_driver { */ int (*gem_init_object) (struct drm_gem_object *obj); void (*gem_free_object) (struct drm_gem_object *obj); - void (*gem_free_object_unlocked) (struct drm_gem_object *obj); /* vga arb irq handler */ void (*vgaarb_irq)(struct drm_device *dev, bool state); @@ -1456,7 +1455,6 @@ int drm_gem_init(struct drm_device *dev); void drm_gem_destroy(struct drm_device *dev); void drm_gem_object_release(struct drm_gem_object *obj); void drm_gem_object_free(struct kref *kref); -void drm_gem_object_free_unlocked(struct kref *kref); struct drm_gem_object *drm_gem_object_alloc(struct drm_device *dev, size_t size); int drm_gem_object_init(struct drm_device *dev, @@ -1484,8 +1482,12 @@ drm_gem_object_unreference(struct drm_gem_object *obj) static inline void drm_gem_object_unreference_unlocked(struct drm_gem_object *obj) { - if (obj != NULL) - kref_put(&obj->refcount, drm_gem_object_free_unlocked); + if (obj != NULL) { + struct drm_device *dev = obj->dev; + mutex_lock(&dev->struct_mutex); + kref_put(&obj->refcount, drm_gem_object_free); + mutex_unlock(&dev->struct_mutex); + } } int drm_gem_handle_create(struct drm_file *file_priv, -- cgit v1.2.3-18-g5258 From 5c80cc78de46aef6cd5e714208da05c3f7f548f8 Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Thu, 30 Sep 2010 14:43:16 +0200 Subject: x86, amd_nb: Enable GART support for AMD family 0x15 CPUs AMD CPU family 0x15 still supports GART for compatibility reasons. Signed-off-by: Andreas Herrmann LKML-Reference: <20100930124316.GG20545@loge.amd.com> Signed-off-by: H. Peter Anvin --- include/linux/pci_ids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 10d33309e9a..edc0279be72 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -514,6 +514,7 @@ #define PCI_DEVICE_ID_AMD_11H_NB_DRAM 0x1302 #define PCI_DEVICE_ID_AMD_11H_NB_MISC 0x1303 #define PCI_DEVICE_ID_AMD_11H_NB_LINK 0x1304 +#define PCI_DEVICE_ID_AMD_15H_NB_MISC 0x1603 #define PCI_DEVICE_ID_AMD_LANCE 0x2000 #define PCI_DEVICE_ID_AMD_LANCE_HOME 0x2001 #define PCI_DEVICE_ID_AMD_SCSI 0x2020 -- cgit v1.2.3-18-g5258 From 51df1142816e469173889fb6d6dc810be9b9e022 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Fri, 20 Aug 2010 12:37:15 -0500 Subject: slub: Dynamically size kmalloc cache allocations kmalloc caches are statically defined and may take up a lot of space just because the sizes of the node array has to be dimensioned for the largest node count supported. This patch makes the size of the kmem_cache structure dynamic throughout by creating a kmem_cache slab cache for the kmem_cache objects. The bootstrap occurs by allocating the initial one or two kmem_cache objects from the page allocator. C2->C3 - Fix various issues indicated by David - Make create kmalloc_cache return a kmem_cache * pointer. Acked-by: David Rientjes Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- include/linux/slub_def.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 9f63538928c..a6c43ec6a4a 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -139,19 +139,16 @@ struct kmem_cache { #ifdef CONFIG_ZONE_DMA #define SLUB_DMA __GFP_DMA -/* Reserve extra caches for potential DMA use */ -#define KMALLOC_CACHES (2 * SLUB_PAGE_SHIFT) #else /* Disable DMA functionality */ #define SLUB_DMA (__force gfp_t)0 -#define KMALLOC_CACHES SLUB_PAGE_SHIFT #endif /* * We keep the general caches in an array of slab caches that are used for * 2^x bytes of allocations. */ -extern struct kmem_cache kmalloc_caches[KMALLOC_CACHES]; +extern struct kmem_cache *kmalloc_caches[SLUB_PAGE_SHIFT]; /* * Sorry that the following has to be that ugly but some versions of GCC @@ -216,7 +213,7 @@ static __always_inline struct kmem_cache *kmalloc_slab(size_t size) if (index == 0) return NULL; - return &kmalloc_caches[index]; + return kmalloc_caches[index]; } void *kmem_cache_alloc(struct kmem_cache *, gfp_t); -- cgit v1.2.3-18-g5258 From 0bc14062414d35c269b7c7dc3243a890886e7b38 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Sep 2010 18:22:47 +0200 Subject: vmalloc: pcpu_get/free_vm_areas() aren't needed on UP These functions are used only by percpu memory allocator on SMP. Don't build them on UP. Signed-off-by: Tejun Heo Cc: Nick Piggin --- include/linux/vmalloc.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 01c2145118d..63a4fe6d51b 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -117,10 +117,12 @@ extern rwlock_t vmlist_lock; extern struct vm_struct *vmlist; extern __init void vm_area_register_early(struct vm_struct *vm, size_t align); +#ifdef CONFIG_SMP struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets, const size_t *sizes, int nr_vms, size_t align, gfp_t gfp_mask); void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms); +#endif #endif /* _LINUX_VMALLOC_H */ -- cgit v1.2.3-18-g5258 From a7b6b77b8917488d2d6b99d82673845e508144a3 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Sep 2010 18:22:47 +0200 Subject: percpu: reduce PCPU_MIN_UNIT_SIZE to 32k In preparation of enabling percpu allocator for UP, reduce PCPU_MIN_UNIT_SIZE to 32k. On UP, the first chunk doesn't have to include static percpu variables and chunk size can be smaller which is important as UP percpu allocator will use contiguous kernel memory to populate chunks. PCPU_MIN_UNIT_SIZE also determines the maximum supported allocation size but 32k should still be enough. Signed-off-by: Tejun Heo --- include/linux/percpu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 49466b13c5c..fc8130a7cac 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -42,7 +42,7 @@ #ifdef CONFIG_SMP /* minimum unit size, also is the maximum supported allocation size */ -#define PCPU_MIN_UNIT_SIZE PFN_ALIGN(64 << 10) +#define PCPU_MIN_UNIT_SIZE PFN_ALIGN(32 << 10) /* * Percpu allocator can serve percpu allocations before slab is -- cgit v1.2.3-18-g5258 From 9b8327bb2483ded5e04df6c33cf339ce7c02f6e9 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Sep 2010 18:22:48 +0200 Subject: percpu: use percpu allocator on UP too On UP, percpu allocations were redirected to kmalloc. This has the following problems. * For certain amount of allocations (determined by PERCPU_DYNAMIC_EARLY_SLOTS and PERCPU_DYNAMIC_EARLY_SIZE), percpu allocator can be used before the usual kernel memory allocator is brought online. On SMP, this is used to initialize the kernel memory allocator. * percpu allocator honors alignment upto PAGE_SIZE but kmalloc() doesn't. For example, workqueue makes use of larger alignments for cpu_workqueues. Currently, users of percpu allocators need to handle UP differently, which is somewhat fragile and ugly. Other than small amount of memory, there isn't much to lose by enabling percpu allocator on UP. It can simply use kernel memory based chunk allocation which was added for SMP archs w/o MMUs. This patch removes mm/percpu_up.c, builds mm/percpu.c on UP too and makes UP build use percpu-km. As percpu addresses and kernel addresses are always identity mapped and static percpu variables don't need any special treatment, nothing is arch dependent and mm/percpu.c implements generic setup_per_cpu_areas() for UP. Signed-off-by: Tejun Heo Cc: Christoph Lameter Cc: Pekka Enberg --- include/linux/percpu.h | 29 +++++------------------------ 1 file changed, 5 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index fc8130a7cac..aeeeef1093c 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -39,8 +39,6 @@ preempt_enable(); \ } while (0) -#ifdef CONFIG_SMP - /* minimum unit size, also is the maximum supported allocation size */ #define PCPU_MIN_UNIT_SIZE PFN_ALIGN(32 << 10) @@ -137,37 +135,20 @@ extern int __init pcpu_page_first_chunk(size_t reserved_size, * dynamically allocated. Non-atomic access to the current CPU's * version should probably be combined with get_cpu()/put_cpu(). */ +#ifdef CONFIG_SMP #define per_cpu_ptr(ptr, cpu) SHIFT_PERCPU_PTR((ptr), per_cpu_offset((cpu))) +#else +#define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); VERIFY_PERCPU_PTR((ptr)); }) +#endif extern void __percpu *__alloc_reserved_percpu(size_t size, size_t align); extern bool is_kernel_percpu_address(unsigned long addr); -#ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA +#if !defined(CONFIG_SMP) || !defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) extern void __init setup_per_cpu_areas(void); #endif extern void __init percpu_init_late(void); -#else /* CONFIG_SMP */ - -#define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); VERIFY_PERCPU_PTR((ptr)); }) - -/* can't distinguish from other static vars, always false */ -static inline bool is_kernel_percpu_address(unsigned long addr) -{ - return false; -} - -static inline void __init setup_per_cpu_areas(void) { } - -static inline void __init percpu_init_late(void) { } - -static inline void *pcpu_lpage_remapped(void *kaddr) -{ - return NULL; -} - -#endif /* CONFIG_SMP */ - extern void __percpu *__alloc_percpu(size_t size, size_t align); extern void free_percpu(void __percpu *__pdata); extern phys_addr_t per_cpu_ptr_to_phys(void *addr); -- cgit v1.2.3-18-g5258 From 7340cc84141d5236c5dd003359ee921513cd9b84 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Tue, 28 Sep 2010 08:10:26 -0500 Subject: slub: reduce differences between SMP and NUMA Reduce the #ifdefs and simplify bootstrap by making SMP and NUMA as much alike as possible. This means that there will be an additional indirection to get to the kmem_cache_node field under SMP. Acked-by: David Rientjes Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- include/linux/slub_def.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index a6c43ec6a4a..b33c0f2e61d 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -96,11 +96,8 @@ struct kmem_cache { * Defragmentation by allocating from a remote node. */ int remote_node_defrag_ratio; - struct kmem_cache_node *node[MAX_NUMNODES]; -#else - /* Avoid an extra cache line for UP */ - struct kmem_cache_node local_node; #endif + struct kmem_cache_node *node[MAX_NUMNODES]; }; /* -- cgit v1.2.3-18-g5258 From 4bacd796ccd6976b03dd490708a1abc291d5521e Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Sat, 2 Oct 2010 22:02:07 +0900 Subject: sh: Support early IRQ vector map reservation for delayed controllers. Some controllers will need to be initialized lazily due to pinmux constraints, while others may simply have no need to be brought online if there are no backing devices for them attached. In this case it's still necessary to be able to reserve their hardware vector map before dynamic IRQs get a hold of them. Signed-off-by: Paul Mundt --- include/linux/sh_intc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/sh_intc.h b/include/linux/sh_intc.h index 0d6cd38e673..bff2f286ca6 100644 --- a/include/linux/sh_intc.h +++ b/include/linux/sh_intc.h @@ -106,6 +106,7 @@ struct intc_desc symbol __initdata = { \ } int __init register_intc_controller(struct intc_desc *desc); +void reserve_intc_vectors(struct intc_vect *vectors, unsigned int nr_vecs); int intc_set_priority(unsigned int irq, unsigned int prio); #ifdef CONFIG_INTC_USERIMASK -- cgit v1.2.3-18-g5258 From b72421d8aa39724474ec2bfb91e182001f1f25a7 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Mon, 4 Oct 2010 03:54:56 +0900 Subject: sh: pfc: support pinmux deregistration. Presently the pinmux code is a one-way thing, but there's nothing preventing an unregistration if no one has grabbed any of the pins. This will permit us to save a bit of memory on systems that require pin demux for certain peripherals in the case where registration of those peripherals fails, or they are otherwise not attached to the system. Signed-off-by: Paul Mundt --- include/linux/sh_pfc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/sh_pfc.h b/include/linux/sh_pfc.h index 07c08af9f8f..30cae70874f 100644 --- a/include/linux/sh_pfc.h +++ b/include/linux/sh_pfc.h @@ -92,5 +92,6 @@ struct pinmux_info { }; int register_pinmux(struct pinmux_info *pip); +int unregister_pinmux(struct pinmux_info *pip); #endif /* __SH_PFC_H */ -- cgit v1.2.3-18-g5258 From a8c9486b816f74d4645144db9e8fa2f711c1fc4b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 1 Oct 2010 16:15:08 +0000 Subject: ipmr: RCU protection for mfc_cache_array Use RCU & RTNL protection for mfc_cache_array[] ipmr_cache_find() is called under rcu_read_lock(); Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/mroute.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/mroute.h b/include/linux/mroute.h index fa04b246c9a..0fa7a3a874c 100644 --- a/include/linux/mroute.h +++ b/include/linux/mroute.h @@ -213,6 +213,7 @@ struct mfc_cache { unsigned char ttls[MAXVIFS]; /* TTL thresholds */ } res; } mfc_un; + struct rcu_head rcu; }; #define MFC_STATIC 1 -- cgit v1.2.3-18-g5258 From c7d4426a98a5f6654cd0b4b33d9dab2e77192c18 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 3 Oct 2010 22:17:54 -0700 Subject: net: introduce DST_NOCACHE flag While doing stress tests with IP route cache disabled, and multi queue devices, I noticed a very high contention on one rwlock used in neighbour code. When many cpus are trying to send frames (possibly using a high performance multiqueue device) to the same neighbour, they fight for the neigh->lock rwlock in order to call neigh_hh_init(), and fight on hh->hh_refcnt (a pair of atomic_inc/atomic_dec_and_test()) But we dont need to call neigh_hh_init() for dst that are used only once. It costs four atomic operations at least, on two contended cache lines, plus the high contention on neigh->lock rwlock. Introduce a new dst flag, DST_NOCACHE, that is set when dst was not inserted in route cache. With the stress test bench, sending 160000000 frames on one neighbour, results are : Before patch: real 2m28.406s user 0m11.781s sys 36m17.964s After patch: real 1m26.532s user 0m12.185s sys 20m3.903s Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/dst.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/dst.h b/include/net/dst.h index aa53fbc34b2..a217c838ec0 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -43,10 +43,11 @@ struct dst_entry { short error; short obsolete; int flags; -#define DST_HOST 1 -#define DST_NOXFRM 2 -#define DST_NOPOLICY 4 -#define DST_NOHASH 8 +#define DST_HOST 0x0001 +#define DST_NOXFRM 0x0002 +#define DST_NOPOLICY 0x0004 +#define DST_NOHASH 0x0008 +#define DST_NOCACHE 0x0010 unsigned long expires; unsigned short header_len; /* more space at head required */ -- cgit v1.2.3-18-g5258 From ff7dcd44dd446db2c3e13bdedf2d52b8e0127f16 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 27 Sep 2010 12:44:25 +0000 Subject: genirq: Create irq_data Low level chip functions need access to irq_desc->handler_data, irq_desc->chip_data and irq_desc->msi_desc. We hand down the irq number to the low level functions, so they need to lookup irq_desc. With sparse irq this means a radix tree lookup. We could hand down irq_desc itself, but low level chip functions have no need to fiddle with it directly and we want to restrict access to irq_desc further. Preparatory patch for new chip functions. Note, that the ugly anon union/struct is there to avoid a full tree wide clean up for now. This is not going to last 3 years like __do_IRQ() Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra LKML-Reference: <20100927121841.645542300@linutronix.de> Reviewed-by: H. Peter Anvin Reviewed-by: Ingo Molnar --- include/linux/irq.h | 90 +++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 63 insertions(+), 27 deletions(-) (limited to 'include') diff --git a/include/linux/irq.h b/include/linux/irq.h index 06273a2a17e..363c76ff82c 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -83,6 +83,37 @@ typedef void (*irq_flow_handler_t)(unsigned int irq, struct proc_dir_entry; struct msi_desc; +/** + * struct irq_data - per irq and irq chip data passed down to chip functions + * @irq: interrupt number + * @node: node index useful for balancing + * @chip: low level interrupt hardware access + * @handler_data: per-IRQ data for the irq_chip methods + * @chip_data: platform-specific per-chip private data for the chip + * methods, to allow shared chip implementations + * @msi_desc: MSI descriptor + * @affinity: IRQ affinity on SMP + * @irq_2_iommu: iommu with this irq + * + * The fields here need to overlay the ones in irq_desc until we + * cleaned up the direct references and switched everything over to + * irq_data. + */ +struct irq_data { + unsigned int irq; + unsigned int node; + struct irq_chip *chip; + void *handler_data; + void *chip_data; + struct msi_desc *msi_desc; +#ifdef CONFIG_SMP + cpumask_var_t affinity; +#endif +#ifdef CONFIG_INTR_REMAP + struct irq_2_iommu *irq_2_iommu; +#endif +}; + /** * struct irq_chip - hardware interrupt chip descriptor * @@ -140,16 +171,10 @@ struct timer_rand_state; struct irq_2_iommu; /** * struct irq_desc - interrupt descriptor - * @irq: interrupt number for this descriptor + * @irq_data: per irq and chip data passed down to chip functions * @timer_rand_state: pointer to timer rand state struct * @kstat_irqs: irq stats per cpu - * @irq_2_iommu: iommu with this irq * @handle_irq: highlevel irq-events handler [if NULL, __do_IRQ()] - * @chip: low level interrupt hardware access - * @msi_desc: MSI descriptor - * @handler_data: per-IRQ data for the irq_chip methods - * @chip_data: platform-specific per-chip private data for the chip - * methods, to allow shared chip implementations * @action: the irq action chain * @status: status information * @depth: disable-depth, for nested irq_disable() calls @@ -158,8 +183,6 @@ struct irq_2_iommu; * @last_unhandled: aging timer for unhandled count * @irqs_unhandled: stats field for spurious unhandled interrupts * @lock: locking for SMP - * @affinity: IRQ affinity on SMP - * @node: node index useful for balancing * @pending_mask: pending rebalanced interrupts * @threads_active: number of irqaction threads currently running * @wait_for_threads: wait queue for sync_irq to wait for threaded handlers @@ -167,17 +190,32 @@ struct irq_2_iommu; * @name: flow handler name for /proc/interrupts output */ struct irq_desc { - unsigned int irq; - struct timer_rand_state *timer_rand_state; - unsigned int *kstat_irqs; + + /* + * This union will go away, once we fixed the direct access to + * irq_desc all over the place. The direct fields are a 1:1 + * overlay of irq_data. + */ + union { + struct irq_data irq_data; + struct { + unsigned int irq; + unsigned int node; + struct irq_chip *chip; + void *handler_data; + void *chip_data; + struct msi_desc *msi_desc; +#ifdef CONFIG_SMP + cpumask_var_t affinity; +#endif #ifdef CONFIG_INTR_REMAP - struct irq_2_iommu *irq_2_iommu; + struct irq_2_iommu *irq_2_iommu; #endif + }; + }; + struct timer_rand_state *timer_rand_state; + unsigned int *kstat_irqs; irq_flow_handler_t handle_irq; - struct irq_chip *chip; - struct msi_desc *msi_desc; - void *handler_data; - void *chip_data; struct irqaction *action; /* IRQ action list */ unsigned int status; /* IRQ status */ @@ -188,9 +226,7 @@ struct irq_desc { unsigned int irqs_unhandled; raw_spinlock_t lock; #ifdef CONFIG_SMP - cpumask_var_t affinity; const struct cpumask *affinity_hint; - unsigned int node; #ifdef CONFIG_GENERIC_PENDING_IRQ cpumask_var_t pending_mask; #endif @@ -406,15 +442,15 @@ extern int set_irq_chip_data(unsigned int irq, void *data); extern int set_irq_type(unsigned int irq, unsigned int type); extern int set_irq_msi(unsigned int irq, struct msi_desc *entry); -#define get_irq_chip(irq) (irq_to_desc(irq)->chip) -#define get_irq_chip_data(irq) (irq_to_desc(irq)->chip_data) -#define get_irq_data(irq) (irq_to_desc(irq)->handler_data) -#define get_irq_msi(irq) (irq_to_desc(irq)->msi_desc) +#define get_irq_chip(irq) (irq_to_desc(irq)->irq_data.chip) +#define get_irq_chip_data(irq) (irq_to_desc(irq)->irq_data.chip_data) +#define get_irq_data(irq) (irq_to_desc(irq)->irq_data.handler_data) +#define get_irq_msi(irq) (irq_to_desc(irq)->irq_data.msi_desc) -#define get_irq_desc_chip(desc) ((desc)->chip) -#define get_irq_desc_chip_data(desc) ((desc)->chip_data) -#define get_irq_desc_data(desc) ((desc)->handler_data) -#define get_irq_desc_msi(desc) ((desc)->msi_desc) +#define get_irq_desc_chip(desc) ((desc)->irq_data.chip) +#define get_irq_desc_chip_data(desc) ((desc)->irq_data.chip_data) +#define get_irq_desc_data(desc) ((desc)->irq_data.handler_data) +#define get_irq_desc_msi(desc) ((desc)->irq_data.msi_desc) #endif /* CONFIG_GENERIC_HARDIRQS */ -- cgit v1.2.3-18-g5258 From 6b8ff3120c758340505dddf08ad685ebb841d5d5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 1 Oct 2010 12:58:38 +0200 Subject: genirq: Convert core code to irq_data Convert all references in the core code to orq, chip, handler_data, chip_data, msi_desc, affinity to irq_data.* Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar --- include/linux/irq.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/irq.h b/include/linux/irq.h index 363c76ff82c..002351d83c3 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -475,12 +475,12 @@ static inline bool alloc_desc_masks(struct irq_desc *desc, int node, gfp = GFP_NOWAIT; #ifdef CONFIG_CPUMASK_OFFSTACK - if (!alloc_cpumask_var_node(&desc->affinity, gfp, node)) + if (!alloc_cpumask_var_node(&desc->irq_data.affinity, gfp, node)) return false; #ifdef CONFIG_GENERIC_PENDING_IRQ if (!alloc_cpumask_var_node(&desc->pending_mask, gfp, node)) { - free_cpumask_var(desc->affinity); + free_cpumask_var(desc->irq_data.affinity); return false; } #endif @@ -490,7 +490,7 @@ static inline bool alloc_desc_masks(struct irq_desc *desc, int node, static inline void init_desc_masks(struct irq_desc *desc) { - cpumask_setall(desc->affinity); + cpumask_setall(desc->irq_data.affinity); #ifdef CONFIG_GENERIC_PENDING_IRQ cpumask_clear(desc->pending_mask); #endif @@ -510,7 +510,7 @@ static inline void init_copy_desc_masks(struct irq_desc *old_desc, struct irq_desc *new_desc) { #ifdef CONFIG_CPUMASK_OFFSTACK - cpumask_copy(new_desc->affinity, old_desc->affinity); + cpumask_copy(new_desc->irq_data.affinity, old_desc->irq_data.affinity); #ifdef CONFIG_GENERIC_PENDING_IRQ cpumask_copy(new_desc->pending_mask, old_desc->pending_mask); @@ -521,7 +521,7 @@ static inline void init_copy_desc_masks(struct irq_desc *old_desc, static inline void free_desc_masks(struct irq_desc *old_desc, struct irq_desc *new_desc) { - free_cpumask_var(old_desc->affinity); + free_cpumask_var(old_desc->irq_data.affinity); #ifdef CONFIG_GENERIC_PENDING_IRQ free_cpumask_var(old_desc->pending_mask); -- cgit v1.2.3-18-g5258 From f8822657e799b02c55556c99a601261e207a299d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 27 Sep 2010 12:44:32 +0000 Subject: genirq: Provide advanced irq chip functions The low level irq chip functions want access to irq_desc->irq_data. Provide new functions which hand down irq_data instead of the irq number so these functions avoid to call irq_to_desc() which is a radix tree lookup in case of sparse irq. This provides all the old functions except one: end(). end() is a relict of __do_IRQ() and will just go away with the __do_IRQ() code. The replacement for set_affinity() has an extra argument "bool force". The reason for this is to notify the low level code, that the move has to be done right away and cannot be delayed until the next interrupt happens. That's necessary to handle the irq fixup on cpu unplug in the generic code. Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra LKML-Reference: <20100927121841.742126604@linutronix.de> Reviewed-by: H. Peter Anvin Reviewed-by: Ingo Molnar --- include/linux/irq.h | 66 ++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 50 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/linux/irq.h b/include/linux/irq.h index 002351d83c3..0c83cbd2df4 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -118,23 +118,38 @@ struct irq_data { * struct irq_chip - hardware interrupt chip descriptor * * @name: name for /proc/interrupts - * @startup: start up the interrupt (defaults to ->enable if NULL) - * @shutdown: shut down the interrupt (defaults to ->disable if NULL) - * @enable: enable the interrupt (defaults to chip->unmask if NULL) - * @disable: disable the interrupt - * @ack: start of a new interrupt - * @mask: mask an interrupt source - * @mask_ack: ack and mask an interrupt source - * @unmask: unmask an interrupt source - * @eoi: end of interrupt - chip level - * @end: end of interrupt - flow level - * @set_affinity: set the CPU affinity on SMP machines - * @retrigger: resend an IRQ to the CPU - * @set_type: set the flow type (IRQ_TYPE_LEVEL/etc.) of an IRQ - * @set_wake: enable/disable power-management wake-on of an IRQ + * @startup: deprecated, replaced by irq_startup + * @shutdown: deprecated, replaced by irq_shutdown + * @enable: deprecated, replaced by irq_enable + * @disable: deprecated, replaced by irq_disable + * @ack: deprecated, replaced by irq_ack + * @mask: deprecated, replaced by irq_mask + * @mask_ack: deprecated, replaced by irq_mask_ack + * @unmask: deprecated, replaced by irq_unmask + * @eoi: deprecated, replaced by irq_eoi + * @end: deprecated, will go away with __do_IRQ() + * @set_affinity: deprecated, replaced by irq_set_affinity + * @retrigger: deprecated, replaced by irq_retrigger + * @set_type: deprecated, replaced by irq_set_type + * @set_wake: deprecated, replaced by irq_wake + * @bus_lock: deprecated, replaced by irq_bus_lock + * @bus_sync_unlock: deprecated, replaced by irq_bus_sync_unlock * - * @bus_lock: function to lock access to slow bus (i2c) chips - * @bus_sync_unlock: function to sync and unlock slow bus (i2c) chips + * @irq_startup: start up the interrupt (defaults to ->enable if NULL) + * @irq_shutdown: shut down the interrupt (defaults to ->disable if NULL) + * @irq_enable: enable the interrupt (defaults to chip->unmask if NULL) + * @irq_disable: disable the interrupt + * @irq_ack: start of a new interrupt + * @irq_mask: mask an interrupt source + * @irq_mask_ack: ack and mask an interrupt source + * @irq_unmask: unmask an interrupt source + * @irq_eoi: end of interrupt + * @irq_set_affinity: set the CPU affinity on SMP machines + * @irq_retrigger: resend an IRQ to the CPU + * @irq_set_type: set the flow type (IRQ_TYPE_LEVEL/etc.) of an IRQ + * @irq_set_wake: enable/disable power-management wake-on of an IRQ + * @irq_bus_lock: function to lock access to slow bus (i2c) chips + * @irq_bus_sync_unlock:function to sync and unlock slow bus (i2c) chips * * @release: release function solely used by UML */ @@ -161,6 +176,25 @@ struct irq_chip { void (*bus_lock)(unsigned int irq); void (*bus_sync_unlock)(unsigned int irq); + unsigned int (*irq_startup)(struct irq_data *data); + void (*irq_shutdown)(struct irq_data *data); + void (*irq_enable)(struct irq_data *data); + void (*irq_disable)(struct irq_data *data); + + void (*irq_ack)(struct irq_data *data); + void (*irq_mask)(struct irq_data *data); + void (*irq_mask_ack)(struct irq_data *data); + void (*irq_unmask)(struct irq_data *data); + void (*irq_eoi)(struct irq_data *data); + + int (*irq_set_affinity)(struct irq_data *data, const struct cpumask *dest, bool force); + int (*irq_retrigger)(struct irq_data *data); + int (*irq_set_type)(struct irq_data *data, unsigned int flow_type); + int (*irq_set_wake)(struct irq_data *data, unsigned int on); + + void (*irq_bus_lock)(struct irq_data *data); + void (*irq_bus_sync_unlock)(struct irq_data *data); + /* Currently used only by UML, might disappear one day.*/ #ifdef CONFIG_IRQ_RELEASE_METHOD void (*release)(unsigned int irq, void *dev_id); -- cgit v1.2.3-18-g5258 From bd151412263a67b5321e9dd1d5b4bf6d96fdebf3 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 1 Oct 2010 15:17:14 +0200 Subject: genirq: Provide config option to disable deprecated code This option covers now the old chip functions and the irq_desc data fields which are moving to struct irq_data. More stuff will follow. Pretty handy for testing a conversion, whether something broke or not. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar --- include/linux/irq.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/irq.h b/include/linux/irq.h index 0c83cbd2df4..82ed8231394 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -155,6 +155,7 @@ struct irq_data { */ struct irq_chip { const char *name; +#ifndef CONFIG_GENERIC_HARDIRQS_NO_DEPRECATED unsigned int (*startup)(unsigned int irq); void (*shutdown)(unsigned int irq); void (*enable)(unsigned int irq); @@ -175,7 +176,7 @@ struct irq_chip { void (*bus_lock)(unsigned int irq); void (*bus_sync_unlock)(unsigned int irq); - +#endif unsigned int (*irq_startup)(struct irq_data *data); void (*irq_shutdown)(struct irq_data *data); void (*irq_enable)(struct irq_data *data); @@ -225,6 +226,9 @@ struct irq_2_iommu; */ struct irq_desc { +#ifdef CONFIG_GENERIC_HARDIRQS_NO_DEPRECATED + struct irq_data irq_data; +#else /* * This union will go away, once we fixed the direct access to * irq_desc all over the place. The direct fields are a 1:1 @@ -247,6 +251,8 @@ struct irq_desc { #endif }; }; +#endif + struct timer_rand_state *timer_rand_state; unsigned int *kstat_irqs; irq_flow_handler_t handle_irq; -- cgit v1.2.3-18-g5258 From 001985b2c0cfad48e1dec8e30f4d432eac240dd2 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Sun, 22 Aug 2010 21:37:51 +0900 Subject: netfilter: nf_conntrack_sip: Add callid parser Signed-off-by: Simon Horman Acked-by: Julian Anastasov --- include/linux/netfilter/nf_conntrack_sip.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/netfilter/nf_conntrack_sip.h b/include/linux/netfilter/nf_conntrack_sip.h index ff8cfbcf3b8..0ce91d56a5f 100644 --- a/include/linux/netfilter/nf_conntrack_sip.h +++ b/include/linux/netfilter/nf_conntrack_sip.h @@ -89,6 +89,7 @@ enum sip_header_types { SIP_HDR_VIA_TCP, SIP_HDR_EXPIRES, SIP_HDR_CONTENT_LENGTH, + SIP_HDR_CALL_ID, }; enum sdp_header_types { -- cgit v1.2.3-18-g5258 From f11017ec2d1859c661f4e2b12c4a8d250e1f47cf Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Sun, 22 Aug 2010 21:37:52 +0900 Subject: IPVS: Add struct ip_vs_conn_param Signed-off-by: Simon Horman Acked-by: Julian Anastasov --- include/net/ip_vs.h | 44 ++++++++++++++++++++++++++++++-------------- 1 file changed, 30 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 3915a4f4cd3..d4da774eca7 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -357,6 +357,15 @@ struct ip_vs_protocol { extern struct ip_vs_protocol * ip_vs_proto_get(unsigned short proto); +struct ip_vs_conn_param { + const union nf_inet_addr *caddr; + const union nf_inet_addr *vaddr; + __be16 cport; + __be16 vport; + __u16 protocol; + u16 af; +}; + /* * IP_VS structure allocated for each dynamically scheduled connection */ @@ -626,13 +635,23 @@ enum { IP_VS_DIR_LAST, }; -extern struct ip_vs_conn *ip_vs_conn_in_get -(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port, - const union nf_inet_addr *d_addr, __be16 d_port); +static inline void ip_vs_conn_fill_param(int af, int protocol, + const union nf_inet_addr *caddr, + __be16 cport, + const union nf_inet_addr *vaddr, + __be16 vport, + struct ip_vs_conn_param *p) +{ + p->af = af; + p->protocol = protocol; + p->caddr = caddr; + p->cport = cport; + p->vaddr = vaddr; + p->vport = vport; +} -extern struct ip_vs_conn *ip_vs_ct_in_get -(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port, - const union nf_inet_addr *d_addr, __be16 d_port); +struct ip_vs_conn *ip_vs_conn_in_get(const struct ip_vs_conn_param *p); +struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p); struct ip_vs_conn * ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp, @@ -640,9 +659,7 @@ struct ip_vs_conn * ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb, unsigned int proto_off, int inverse); -extern struct ip_vs_conn *ip_vs_conn_out_get -(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port, - const union nf_inet_addr *d_addr, __be16 d_port); +struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p); struct ip_vs_conn * ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp, @@ -658,11 +675,10 @@ static inline void __ip_vs_conn_put(struct ip_vs_conn *cp) extern void ip_vs_conn_put(struct ip_vs_conn *cp); extern void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport); -extern struct ip_vs_conn * -ip_vs_conn_new(int af, int proto, const union nf_inet_addr *caddr, __be16 cport, - const union nf_inet_addr *vaddr, __be16 vport, - const union nf_inet_addr *daddr, __be16 dport, unsigned flags, - struct ip_vs_dest *dest); +struct ip_vs_conn *ip_vs_conn_new(const struct ip_vs_conn_param *p, + const union nf_inet_addr *daddr, + __be16 dport, unsigned flags, + struct ip_vs_dest *dest); extern void ip_vs_conn_expire_now(struct ip_vs_conn *cp); extern const char * ip_vs_state_name(__u16 proto, int state); -- cgit v1.2.3-18-g5258 From 85999283a21ab2dd37427fdd8c8e8af57223977c Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Sun, 22 Aug 2010 21:37:53 +0900 Subject: IPVS: Add struct ip_vs_pe Signed-off-by: Simon Horman Acked-by: Julian Anastasov --- include/linux/ip_vs.h | 2 ++ include/net/ip_vs.h | 28 +++++++++++++++++++++++++++- 2 files changed, 29 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ip_vs.h b/include/linux/ip_vs.h index df772861372..0a9c44d6429 100644 --- a/include/linux/ip_vs.h +++ b/include/linux/ip_vs.h @@ -99,8 +99,10 @@ 0) #define IP_VS_SCHEDNAME_MAXLEN 16 +#define IP_VS_PENAME_MAXLEN 16 #define IP_VS_IFNAME_MAXLEN 16 +#define IP_VS_PEDATA_MAXLEN 255 /* * The struct ip_vs_service_user and struct ip_vs_dest_user are diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index d4da774eca7..b6b309d05e4 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -364,6 +364,10 @@ struct ip_vs_conn_param { __be16 vport; __u16 protocol; u16 af; + + const struct ip_vs_pe *pe; + char *pe_data; + __u8 pe_data_len; }; /* @@ -416,6 +420,9 @@ struct ip_vs_conn { void *app_data; /* Application private data */ struct ip_vs_seq in_seq; /* incoming seq. struct */ struct ip_vs_seq out_seq; /* outgoing seq. struct */ + + char *pe_data; + __u8 pe_data_len; }; @@ -486,6 +493,9 @@ struct ip_vs_service { struct ip_vs_scheduler *scheduler; /* bound scheduler object */ rwlock_t sched_lock; /* lock sched_data */ void *sched_data; /* scheduler application data */ + + /* alternate persistence engine */ + struct ip_vs_pe *pe; }; @@ -549,6 +559,20 @@ struct ip_vs_scheduler { const struct sk_buff *skb); }; +/* The persistence engine object */ +struct ip_vs_pe { + struct list_head n_list; /* d-linked list head */ + char *name; /* scheduler name */ + atomic_t refcnt; /* reference counter */ + struct module *module; /* THIS_MODULE/NULL */ + + /* get the connection template, if any */ + int (*fill_param)(struct ip_vs_conn_param *p, struct sk_buff *skb); + bool (*ct_match)(const struct ip_vs_conn_param *p, + struct ip_vs_conn *ct); + u32 (*hashkey_raw)(const struct ip_vs_conn_param *p, u32 initval, + bool inverse); +}; /* * The application module object (a.k.a. app incarnation) @@ -648,6 +672,8 @@ static inline void ip_vs_conn_fill_param(int af, int protocol, p->cport = cport; p->vaddr = vaddr; p->vport = vport; + p->pe = NULL; + p->pe_data = NULL; } struct ip_vs_conn *ip_vs_conn_in_get(const struct ip_vs_conn_param *p); @@ -803,7 +829,7 @@ extern int ip_vs_unbind_scheduler(struct ip_vs_service *svc); extern struct ip_vs_scheduler *ip_vs_scheduler_get(const char *sched_name); extern void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler); extern struct ip_vs_conn * -ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb); +ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb); extern int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, struct ip_vs_protocol *pp); -- cgit v1.2.3-18-g5258 From a3c918acd29a96aba3b46bf50136e7953a480d17 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Sun, 22 Aug 2010 21:37:53 +0900 Subject: IPVS: Add persistence engine data to /proc/net/ip_vs_conn This shouldn't break compatibility with userspace as the new data is at the end of the line. I have confirmed that this doesn't break ipvsadm, the main (only?) user-space user of this data. Signed-off-by: Simon Horman Acked-by: Julian Anastasov --- include/net/ip_vs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index b6b309d05e4..974daf52ba7 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -572,6 +572,7 @@ struct ip_vs_pe { struct ip_vs_conn *ct); u32 (*hashkey_raw)(const struct ip_vs_conn_param *p, u32 initval, bool inverse); + int (*show_pe_data)(const struct ip_vs_conn *cp, char *buf); }; /* -- cgit v1.2.3-18-g5258 From 8be67a6617b3403551fccb67b1c624c659419515 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Sun, 22 Aug 2010 21:37:54 +0900 Subject: IPVS: management of persistence engine modules This is based heavily on the scheduler management code Signed-off-by: Simon Horman Acked-by: Julian Anastasov --- include/net/ip_vs.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 974daf52ba7..a6b93a26d4e 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -796,6 +796,12 @@ extern int ip_vs_app_pkt_in(struct ip_vs_conn *, struct sk_buff *skb); extern int ip_vs_app_init(void); extern void ip_vs_app_cleanup(void); +void ip_vs_bind_pe(struct ip_vs_service *svc, struct ip_vs_pe *pe); +void ip_vs_unbind_pe(struct ip_vs_service *svc); +int register_ip_vs_pe(struct ip_vs_pe *pe); +int unregister_ip_vs_pe(struct ip_vs_pe *pe); +extern struct ip_vs_pe *ip_vs_pe_get(const char *name); +extern void ip_vs_pe_put(struct ip_vs_pe *pe); /* * IPVS protocol functions (from ip_vs_proto.c) -- cgit v1.2.3-18-g5258 From 0d1e71b04a04b6912e50926b9987c1e72facb1f3 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Sun, 22 Aug 2010 21:37:54 +0900 Subject: IPVS: Allow configuration of persistence engines Allow the persistence engine of a virtual service to be set, edited and unset. This feature only works with the netlink user-space interface. Signed-off-by: Simon Horman Acked-by: Julian Anastasov --- include/linux/ip_vs.h | 3 +++ include/net/ip_vs.h | 1 + 2 files changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/ip_vs.h b/include/linux/ip_vs.h index 0a9c44d6429..5f43a3b2e3a 100644 --- a/include/linux/ip_vs.h +++ b/include/linux/ip_vs.h @@ -336,6 +336,9 @@ enum { IPVS_SVC_ATTR_NETMASK, /* persistent netmask */ IPVS_SVC_ATTR_STATS, /* nested attribute for service stats */ + + IPVS_SVC_ATTR_PE_NAME, /* name of ct retriever */ + __IPVS_SVC_ATTR_MAX, }; diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index a6b93a26d4e..52fbe2308c3 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -444,6 +444,7 @@ struct ip_vs_service_user_kern { /* virtual service options */ char *sched_name; + char *pe_name; unsigned flags; /* virtual service flags */ unsigned timeout; /* persistent timeout in sec */ u32 netmask; /* persistent netmask */ -- cgit v1.2.3-18-g5258 From 44629f57accccbb8e6d443246fe6f51b42f7f781 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Thu, 23 Sep 2010 20:09:38 +0900 Subject: sh: intc: Implement reverse mapping for IRQs to per-controller IDs. This implements a scheme roughly analogous to the PowerPC virtual to hardware IRQ mapping, which we use for IRQ to per-controller ID mapping. This makes it possible for drivers to use the IDs directly for lookup instead of hardcoding the vector. The main motivation for this work is as a building block for dynamically allocating virtual IRQs for demuxing INTC events sharing a single INTEVT in addition to a common masking source. Signed-off-by: Paul Mundt --- include/linux/sh_intc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/sh_intc.h b/include/linux/sh_intc.h index bff2f286ca6..d40fd77fa75 100644 --- a/include/linux/sh_intc.h +++ b/include/linux/sh_intc.h @@ -108,6 +108,7 @@ struct intc_desc symbol __initdata = { \ int __init register_intc_controller(struct intc_desc *desc); void reserve_intc_vectors(struct intc_vect *vectors, unsigned int nr_vecs); int intc_set_priority(unsigned int irq, unsigned int prio); +unsigned int intc_irq_lookup(const char *chipname, intc_enum enum_id); #ifdef CONFIG_INTC_USERIMASK int register_intc_userimask(unsigned long addr); -- cgit v1.2.3-18-g5258 From 0c200d935346fe0ebde9b6dffbb683dddd166fb9 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 4 Oct 2010 20:53:18 +0200 Subject: netfilter: nf_nat: make find/put static The functions nf_nat_proto_find_get and nf_nat_proto_put are only used internally in nf_nat_core. This might break some out of tree NAT module. Signed-off-by: Stephen Hemminger Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_nat_protocol.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_nat_protocol.h b/include/net/netfilter/nf_nat_protocol.h index df17bac46bf..93cc90d28e6 100644 --- a/include/net/netfilter/nf_nat_protocol.h +++ b/include/net/netfilter/nf_nat_protocol.h @@ -45,9 +45,6 @@ struct nf_nat_protocol { extern int nf_nat_protocol_register(const struct nf_nat_protocol *proto); extern void nf_nat_protocol_unregister(const struct nf_nat_protocol *proto); -extern const struct nf_nat_protocol *nf_nat_proto_find_get(u_int8_t protocol); -extern void nf_nat_proto_put(const struct nf_nat_protocol *proto); - /* Built-in protocols. */ extern const struct nf_nat_protocol nf_nat_protocol_tcp; extern const struct nf_nat_protocol nf_nat_protocol_udp; -- cgit v1.2.3-18-g5258 From 2e54eb96e2c801f33d95b5dade15212ac4d6c4a5 Mon Sep 17 00:00:00 2001 From: Petr Vandrovec Date: Mon, 27 Sep 2010 01:47:33 +0200 Subject: BKL: Remove BKL from ncpfs Dozen of changes in ncpfs to provide some locking other than BKL. In readdir cache unlock and mark complete first page as last operation, so it can be used for synchronization, as code intended. When updating dentry name on case insensitive filesystems do at least some basic locking... Hold i_mutex when updating inode fields. Push some ncp_conn_is_valid down to ncp_request. Connection can become invalid at any moment, and fewer error code paths to test the better. Use i_size_{read,write} to modify file size. Set inode's backing_dev_info as ncpfs has its own special bdi. In ioctl unbreak ioctls invoked on filesystem mounted 'ro' - tests are for inode writeable or owner match, but were turned to filesystem writeable and inode writeable or owner match. Also collect all permission checks in single place. Add some locking, and remove comments saying that it would be cool to add some locks to the code. Constify some pointers. Signed-off-by: Petr Vandrovec Signed-off-by: Arnd Bergmann --- include/linux/ncp_fs.h | 28 ---------------------------- include/linux/ncp_fs_sb.h | 4 +++- 2 files changed, 3 insertions(+), 29 deletions(-) (limited to 'include') diff --git a/include/linux/ncp_fs.h b/include/linux/ncp_fs.h index 4522aed0090..ef663061d5a 100644 --- a/include/linux/ncp_fs.h +++ b/include/linux/ncp_fs.h @@ -241,34 +241,6 @@ int ncp_mmap(struct file *, struct vm_area_struct *); /* linux/fs/ncpfs/ncplib_kernel.c */ int ncp_make_closed(struct inode *); -#define ncp_namespace(i) (NCP_SERVER(i)->name_space[NCP_FINFO(i)->volNumber]) - -static inline int ncp_preserve_entry_case(struct inode *i, __u32 nscreator) -{ -#ifdef CONFIG_NCPFS_SMALLDOS - int ns = ncp_namespace(i); - - if ((ns == NW_NS_DOS) -#ifdef CONFIG_NCPFS_OS2_NS - || ((ns == NW_NS_OS2) && (nscreator == NW_NS_DOS)) -#endif /* CONFIG_NCPFS_OS2_NS */ - ) - return 0; -#endif /* CONFIG_NCPFS_SMALLDOS */ - return 1; -} - -#define ncp_preserve_case(i) (ncp_namespace(i) != NW_NS_DOS) - -static inline int ncp_case_sensitive(struct inode *i) -{ -#ifdef CONFIG_NCPFS_NFS_NS - return ncp_namespace(i) == NW_NS_NFS; -#else - return 0; -#endif /* CONFIG_NCPFS_NFS_NS */ -} - #endif /* __KERNEL__ */ #endif /* _LINUX_NCP_FS_H */ diff --git a/include/linux/ncp_fs_sb.h b/include/linux/ncp_fs_sb.h index 8da05bc098c..d64b0e89433 100644 --- a/include/linux/ncp_fs_sb.h +++ b/include/linux/ncp_fs_sb.h @@ -62,6 +62,7 @@ struct ncp_server { int ncp_reply_size; int root_setuped; + struct mutex root_setup_lock; /* info for packet signing */ int sign_wanted; /* 1=Server needs signed packets */ @@ -81,13 +82,14 @@ struct ncp_server { size_t len; void* data; } priv; + struct rw_semaphore auth_rwsem; /* nls info: codepage for volume and charset for I/O */ struct nls_table *nls_vol; struct nls_table *nls_io; /* maximum age in jiffies */ - int dentry_ttl; + atomic_t dentry_ttl; /* miscellaneous */ unsigned int flags; -- cgit v1.2.3-18-g5258 From c1e30ad98fe210688edca872686db4a715c2fb23 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Tue, 5 Oct 2010 04:47:03 +0900 Subject: sh: intc: Support virtual mappings for IRQ subgroups. Many interrupts that share a single mask source but are on different hardware vectors will have an associated register tied to an INTEVT that denotes the precise cause for the interrupt exception being triggered. This introduces the concept of IRQ subgroups in the intc core, where a virtual IRQ map is constructed for each of the pre-defined cause bits, and a higher level chained handler takes control of the parent INTEVT. This enables CPUs with heavily muxed IRQ vectors (especially across disjoint blocks) to break things out in to a series of managed chained handlers while being able to dynamically lookup and adopt the IRQs created for them. This is largely an opt-in interface, requiring CPUs to manually submit IRQs for subgroup splitting, in addition to providing identifiers in their enum maps that can be used for lazy lookup via the radix tree. Signed-off-by: Paul Mundt --- include/linux/sh_intc.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sh_intc.h b/include/linux/sh_intc.h index d40fd77fa75..04134a6c7b5 100644 --- a/include/linux/sh_intc.h +++ b/include/linux/sh_intc.h @@ -20,6 +20,12 @@ struct intc_group { #define INTC_GROUP(enum_id, ids...) { enum_id, { ids } } +struct intc_subgroup { + unsigned long reg, reg_width; + intc_enum parent_id; + intc_enum enum_ids[32]; +}; + struct intc_mask_reg { unsigned long set_reg, clr_reg, reg_width; intc_enum enum_ids[32]; @@ -69,9 +75,12 @@ struct intc_hw_desc { unsigned int nr_sense_regs; struct intc_mask_reg *ack_regs; unsigned int nr_ack_regs; + struct intc_subgroup *subgroups; + unsigned int nr_subgroups; }; -#define _INTC_ARRAY(a) a, sizeof(a)/sizeof(*a) +#define _INTC_ARRAY(a) a, a == NULL ? 0 : sizeof(a)/sizeof(*a) + #define INTC_HW_DESC(vectors, groups, mask_regs, \ prio_regs, sense_regs, ack_regs) \ { \ @@ -109,6 +118,7 @@ int __init register_intc_controller(struct intc_desc *desc); void reserve_intc_vectors(struct intc_vect *vectors, unsigned int nr_vecs); int intc_set_priority(unsigned int irq, unsigned int prio); unsigned int intc_irq_lookup(const char *chipname, intc_enum enum_id); +void intc_finalize(void); #ifdef CONFIG_INTC_USERIMASK int register_intc_userimask(unsigned long addr); -- cgit v1.2.3-18-g5258 From eecc545856c8a0f27783a440d25f4ceaa1f95ce8 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 4 Oct 2010 23:24:21 +0200 Subject: netfilter: add missing xt_log.h file Forgot to add xt_log.h in commit a8defca0 (netfilter: ipt_LOG: add bufferisation to call printk() once) Signed-off-by: Patrick McHardy --- include/net/netfilter/xt_log.h | 54 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 include/net/netfilter/xt_log.h (limited to 'include') diff --git a/include/net/netfilter/xt_log.h b/include/net/netfilter/xt_log.h new file mode 100644 index 00000000000..0dfb34a5b53 --- /dev/null +++ b/include/net/netfilter/xt_log.h @@ -0,0 +1,54 @@ +#define S_SIZE (1024 - (sizeof(unsigned int) + 1)) + +struct sbuff { + unsigned int count; + char buf[S_SIZE + 1]; +}; +static struct sbuff emergency, *emergency_ptr = &emergency; + +static int sb_add(struct sbuff *m, const char *f, ...) +{ + va_list args; + int len; + + if (likely(m->count < S_SIZE)) { + va_start(args, f); + len = vsnprintf(m->buf + m->count, S_SIZE - m->count, f, args); + va_end(args); + if (likely(m->count + len < S_SIZE)) { + m->count += len; + return 0; + } + } + m->count = S_SIZE; + printk_once(KERN_ERR KBUILD_MODNAME " please increase S_SIZE\n"); + return -1; +} + +static struct sbuff *sb_open(void) +{ + struct sbuff *m = kmalloc(sizeof(*m), GFP_ATOMIC); + + if (unlikely(!m)) { + local_bh_disable(); + do { + m = xchg(&emergency_ptr, NULL); + } while (!m); + } + m->count = 0; + return m; +} + +static void sb_close(struct sbuff *m) +{ + m->buf[m->count] = 0; + printk("%s\n", m->buf); + + if (likely(m != &emergency)) + kfree(m); + else { + xchg(&emergency_ptr, m); + local_bh_enable(); + } +} + -- cgit v1.2.3-18-g5258 From 24824a09e35402b8d58dcc5be803a5ad3937bdba Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 2 Oct 2010 06:11:55 +0000 Subject: net: dynamic ingress_queue allocation ingress being not used very much, and net_device->ingress_queue being quite a big object (128 or 256 bytes), use a dynamic allocation if needed (tc qdisc add dev eth0 ingress ...) dev_ingress_queue(dev) helper should be used only with RTNL taken. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- include/linux/rtnetlink.h | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ceed3474014..92d81edd580 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -986,7 +986,7 @@ struct net_device { rx_handler_func_t *rx_handler; void *rx_handler_data; - struct netdev_queue ingress_queue; /* use two cache lines */ + struct netdev_queue __rcu *ingress_queue; /* * Cache lines mostly used on transmit path diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 68c436bddc8..0bb7b48632b 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -6,6 +6,7 @@ #include #include #include +#include /* rtnetlink families. Values up to 127 are reserved for real address * families, values above 128 may be used arbitrarily. @@ -769,6 +770,13 @@ extern int lockdep_rtnl_is_held(void); #define rtnl_dereference(p) \ rcu_dereference_check(p, lockdep_rtnl_is_held()) +static inline struct netdev_queue *dev_ingress_queue(struct net_device *dev) +{ + return rtnl_dereference(dev->ingress_queue); +} + +extern struct netdev_queue *dev_ingress_queue_create(struct net_device *dev); + extern void rtnetlink_init(void); extern void __rtnl_unlock(void); -- cgit v1.2.3-18-g5258 From 29fa060eab3f524d338566d34c1d9e704579ae5e Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 5 Oct 2010 00:29:48 -0700 Subject: net: relax rtnl_dereference() rtnl_dereference() is used in contexts where RTNL is held, to fetch an RCU protected pointer. Updates to this pointer are prevented by RTNL, so we dont need smp_read_barrier_depends() and the ACCESS_ONCE() provided in rcu_dereference_check(). rtnl_dereference() is mainly a macro to document the locking invariant. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 0bb7b48632b..d42f274418b 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -755,20 +755,22 @@ extern int lockdep_rtnl_is_held(void); * @p: The pointer to read, prior to dereferencing * * Do an rcu_dereference(p), but check caller either holds rcu_read_lock() - * or RTNL + * or RTNL. Note : Please prefer rtnl_dereference() or rcu_dereference() */ #define rcu_dereference_rtnl(p) \ rcu_dereference_check(p, rcu_read_lock_held() || \ lockdep_rtnl_is_held()) /** - * rtnl_dereference - rcu_dereference with debug checking + * rtnl_dereference - fetch RCU pointer when updates are prevented by RTNL * @p: The pointer to read, prior to dereferencing * - * Do an rcu_dereference(p), but check caller holds RTNL + * Return the value of the specified RCU-protected pointer, but omit + * both the smp_read_barrier_depends() and the ACCESS_ONCE(), because + * caller holds RTNL. */ #define rtnl_dereference(p) \ - rcu_dereference_check(p, lockdep_rtnl_is_held()) + rcu_dereference_protected(p, lockdep_rtnl_is_held()) static inline struct netdev_queue *dev_ingress_queue(struct net_device *dev) { -- cgit v1.2.3-18-g5258 From 1df9916e46451533463f227e6be57cc2cfca4c5f Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Mon, 4 Oct 2010 20:14:17 +0000 Subject: fib: fib_rules_cleanup can be static fib_rules_cleanup_ups is only defined and used in one place. Signed-off-by: Stephen Hemminger Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/fib_rules.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index e8923bc20f9..ac2fd002812 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -106,7 +106,6 @@ static inline u32 frh_get_table(struct fib_rule_hdr *frh, struct nlattr **nla) extern struct fib_rules_ops *fib_rules_register(const struct fib_rules_ops *, struct net *); extern void fib_rules_unregister(struct fib_rules_ops *); -extern void fib_rules_cleanup_ops(struct fib_rules_ops *); extern int fib_rules_lookup(struct fib_rules_ops *, struct flowi *, int flags, -- cgit v1.2.3-18-g5258 From c61393ea83573ff422af505b6fd49ef9ec9b91ca Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Mon, 4 Oct 2010 20:17:53 +0000 Subject: ipv6: make __ipv6_isatap_ifid static Another exported symbol only used in one file Signed-off-by: Stephen Hemminger Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/addrconf.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 958d2749b7a..a9441249306 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -276,8 +276,6 @@ static inline int ipv6_addr_is_ll_all_routers(const struct in6_addr *addr) (addr->s6_addr32[3] ^ htonl(0x00000002))) == 0; } -extern int __ipv6_isatap_ifid(u8 *eui, __be32 addr); - static inline int ipv6_addr_is_isatap(const struct in6_addr *addr) { return (addr->s6_addr32[2] | htonl(0x02000000)) == htonl(0x02005EFE); -- cgit v1.2.3-18-g5258 From 97bd234701b2b39a0e749c1fe0e44f1d14c94292 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 5 Oct 2010 10:41:14 +0200 Subject: workqueue: prepare for more tracepoints Define workqueue_work event class and use it for workqueue_execute_end trace point. Also, move trace/events/workqueue.h include downwards such that all struct definitions are visible to it. This is to prepare for more tracepoints and doesn't cause any functional change. Signed-off-by: Tejun Heo Cc: Frederic Weisbecker --- include/trace/events/workqueue.h | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/trace/events/workqueue.h b/include/trace/events/workqueue.h index 49682d7e9d6..ec9d7244eb9 100644 --- a/include/trace/events/workqueue.h +++ b/include/trace/events/workqueue.h @@ -7,13 +7,7 @@ #include #include -/** - * workqueue_execute_start - called immediately before the workqueue callback - * @work: pointer to struct work_struct - * - * Allows to track workqueue execution. - */ -TRACE_EVENT(workqueue_execute_start, +DECLARE_EVENT_CLASS(workqueue_work, TP_PROTO(struct work_struct *work), @@ -21,24 +15,22 @@ TRACE_EVENT(workqueue_execute_start, TP_STRUCT__entry( __field( void *, work ) - __field( void *, function) ), TP_fast_assign( __entry->work = work; - __entry->function = work->func; ), - TP_printk("work struct %p: function %pf", __entry->work, __entry->function) + TP_printk("work struct %p", __entry->work) ); /** - * workqueue_execute_end - called immediately before the workqueue callback + * workqueue_execute_start - called immediately before the workqueue callback * @work: pointer to struct work_struct * * Allows to track workqueue execution. */ -TRACE_EVENT(workqueue_execute_end, +TRACE_EVENT(workqueue_execute_start, TP_PROTO(struct work_struct *work), @@ -46,15 +38,29 @@ TRACE_EVENT(workqueue_execute_end, TP_STRUCT__entry( __field( void *, work ) + __field( void *, function) ), TP_fast_assign( __entry->work = work; + __entry->function = work->func; ), - TP_printk("work struct %p", __entry->work) + TP_printk("work struct %p: function %pf", __entry->work, __entry->function) ); +/** + * workqueue_execute_end - called immediately before the workqueue callback + * @work: pointer to struct work_struct + * + * Allows to track workqueue execution. + */ +DEFINE_EVENT(workqueue_work, workqueue_execute_end, + + TP_PROTO(struct work_struct *work), + + TP_ARGS(work) +); #endif /* _TRACE_WORKQUEUE_H */ -- cgit v1.2.3-18-g5258 From cdadf0097cdca06c497ffaeb5982e028c6e4ed38 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 5 Oct 2010 10:49:55 +0200 Subject: workqueue: add queue_work and activate_work trace points These two tracepoints allow tracking when and how a work is queued and activated. This patch is based on Frederic's patch to add queue_work trace point. Signed-off-by: Tejun Heo Cc: Frederic Weisbecker --- include/trace/events/workqueue.h | 53 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) (limited to 'include') diff --git a/include/trace/events/workqueue.h b/include/trace/events/workqueue.h index ec9d7244eb9..7d497291c85 100644 --- a/include/trace/events/workqueue.h +++ b/include/trace/events/workqueue.h @@ -24,6 +24,59 @@ DECLARE_EVENT_CLASS(workqueue_work, TP_printk("work struct %p", __entry->work) ); +/** + * workqueue_queue_work - called when a work gets queued + * @req_cpu: the requested cpu + * @cwq: pointer to struct cpu_workqueue_struct + * @work: pointer to struct work_struct + * + * This event occurs when a work is queued immediately or once a + * delayed work is actually queued on a workqueue (ie: once the delay + * has been reached). + */ +TRACE_EVENT(workqueue_queue_work, + + TP_PROTO(unsigned int req_cpu, struct cpu_workqueue_struct *cwq, + struct work_struct *work), + + TP_ARGS(req_cpu, cwq, work), + + TP_STRUCT__entry( + __field( void *, work ) + __field( void *, function) + __field( void *, workqueue) + __field( unsigned int, req_cpu ) + __field( unsigned int, cpu ) + ), + + TP_fast_assign( + __entry->work = work; + __entry->function = work->func; + __entry->workqueue = cwq->wq; + __entry->req_cpu = req_cpu; + __entry->cpu = cwq->gcwq->cpu; + ), + + TP_printk("work struct=%p function=%pf workqueue=%p req_cpu=%u cpu=%u", + __entry->work, __entry->function, __entry->workqueue, + __entry->req_cpu, __entry->cpu) +); + +/** + * workqueue_activate_work - called when a work gets activated + * @work: pointer to struct work_struct + * + * This event occurs when a queued work is put on the active queue, + * which happens immediately after queueing unless @max_active limit + * is reached. + */ +DEFINE_EVENT(workqueue_work, workqueue_activate_work, + + TP_PROTO(struct work_struct *work), + + TP_ARGS(work) +); + /** * workqueue_execute_start - called immediately before the workqueue callback * @work: pointer to struct work_struct -- cgit v1.2.3-18-g5258 From b89f432133851a01c0d28822f11cbdcc15781a75 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sat, 18 Sep 2010 15:09:31 +0200 Subject: fs/locks.c: prepare for BKL removal This prepares the removal of the big kernel lock from the file locking code. We still use the BKL as long as fs/lockd uses it and ceph might sleep, but we can flip the definition to a private spinlock as soon as that's done. All users outside of fs/lockd get converted to use lock_flocks() instead of lock_kernel() where appropriate. Based on an earlier patch to use a spinlock from Matthew Wilcox, who has attempted this a few times before, the earliest patch from over 10 years ago turned it into a semaphore, which ended up being slower than the BKL and was subsequently reverted. Someone should do some serious performance testing when this becomes a spinlock, since this has caused problems before. Using a spinlock should be at least as good as the BKL in theory, but who knows... Signed-off-by: Arnd Bergmann Acked-by: Matthew Wilcox Cc: Christoph Hellwig Cc: Trond Myklebust Cc: "J. Bruce Fields" Cc: Andrew Morton Cc: Miklos Szeredi Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: John Kacur Cc: Sage Weil Cc: linux-kernel@vger.kernel.org Cc: linux-fsdevel@vger.kernel.org --- include/linux/fs.h | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 63d069bd80b..18032526823 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1093,10 +1093,6 @@ struct file_lock { #include -/* temporary stubs for BKL removal */ -#define lock_flocks() lock_kernel() -#define unlock_flocks() unlock_kernel() - extern void send_sigio(struct fown_struct *fown, int fd, int band); #ifdef CONFIG_FILE_LOCKING @@ -1135,6 +1131,8 @@ extern int vfs_setlease(struct file *, long, struct file_lock **); extern int lease_modify(struct file_lock **, int); extern int lock_may_read(struct inode *, loff_t start, unsigned long count); extern int lock_may_write(struct inode *, loff_t start, unsigned long count); +extern void lock_flocks(void); +extern void unlock_flocks(void); #else /* !CONFIG_FILE_LOCKING */ static inline int fcntl_getlk(struct file *file, struct flock __user *user) { @@ -1277,6 +1275,14 @@ static inline int lock_may_write(struct inode *inode, loff_t start, return 1; } +static inline void lock_flocks(void) +{ +} + +static inline void unlock_flocks(void) +{ +} + #endif /* !CONFIG_FILE_LOCKING */ -- cgit v1.2.3-18-g5258 From d74310d3b18aabbb7d0549ea9e3fd3259c1dce00 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Tue, 5 Oct 2010 18:13:23 +0900 Subject: sh: intc: Handle early lookups of subgroup IRQs. If lookups happen while the radix node still points to a subgroup mapping, an IRQ hasn't yet been made available for the specified id, so error out accordingly. Once the slot is replaced with an IRQ mapping and the tag is discarded, lookup can commence as normal. Signed-off-by: Paul Mundt --- include/linux/sh_intc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sh_intc.h b/include/linux/sh_intc.h index 04134a6c7b5..1fc69701e0f 100644 --- a/include/linux/sh_intc.h +++ b/include/linux/sh_intc.h @@ -117,7 +117,7 @@ struct intc_desc symbol __initdata = { \ int __init register_intc_controller(struct intc_desc *desc); void reserve_intc_vectors(struct intc_vect *vectors, unsigned int nr_vecs); int intc_set_priority(unsigned int irq, unsigned int prio); -unsigned int intc_irq_lookup(const char *chipname, intc_enum enum_id); +int intc_irq_lookup(const char *chipname, intc_enum enum_id); void intc_finalize(void); #ifdef CONFIG_INTC_USERIMASK -- cgit v1.2.3-18-g5258 From 2be6bb0c79c7fbda3425b65ee51c558bbaf4cf91 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Tue, 5 Oct 2010 22:10:30 +0900 Subject: sh: intc: Split up the INTC code. This splits up the sh intc core in to something more vaguely resembling a subsystem. Most of the functionality was alread fairly well compartmentalized, and there were only a handful of interdependencies that needed to be resolved in the process. This also serves as future-proofing for the genirq and sparseirq rework, which will make some of the split out functionality wholly generic, allowing things to be killed off in place with minimal migration pain. Signed-off-by: Paul Mundt --- include/linux/sh_intc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sh_intc.h b/include/linux/sh_intc.h index 1fc69701e0f..b4f183a31f1 100644 --- a/include/linux/sh_intc.h +++ b/include/linux/sh_intc.h @@ -114,7 +114,7 @@ struct intc_desc symbol __initdata = { \ prio_regs, sense_regs, ack_regs), \ } -int __init register_intc_controller(struct intc_desc *desc); +int register_intc_controller(struct intc_desc *desc); void reserve_intc_vectors(struct intc_vect *vectors, unsigned int nr_vecs); int intc_set_priority(unsigned int irq, unsigned int prio); int intc_irq_lookup(const char *chipname, intc_enum enum_id); -- cgit v1.2.3-18-g5258 From 17e5a8082894a4b66cb69e7ec16074f0f01281e1 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 29 Sep 2010 17:15:30 +0200 Subject: nl80211: allow drivers to indicate whether the survey data channel is in use Some user space applications only want to display survey data for the operating channel, however there is no API to get that yet. Signed-off-by: Felix Fietkau Signed-off-by: John W. Linville --- include/linux/nl80211.h | 2 ++ include/net/cfg80211.h | 2 ++ 2 files changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index f0518b0278a..edd21ae6acf 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -1400,6 +1400,7 @@ enum nl80211_reg_rule_flags { * @__NL80211_SURVEY_INFO_INVALID: attribute number 0 is reserved * @NL80211_SURVEY_INFO_FREQUENCY: center frequency of channel * @NL80211_SURVEY_INFO_NOISE: noise level of channel (u8, dBm) + * @NL80211_SURVEY_INFO_IN_USE: channel is currently being used * @NL80211_SURVEY_INFO_MAX: highest survey info attribute number * currently defined * @__NL80211_SURVEY_INFO_AFTER_LAST: internal use @@ -1408,6 +1409,7 @@ enum nl80211_survey_info { __NL80211_SURVEY_INFO_INVALID, NL80211_SURVEY_INFO_FREQUENCY, NL80211_SURVEY_INFO_NOISE, + NL80211_SURVEY_INFO_IN_USE, /* keep last */ __NL80211_SURVEY_INFO_AFTER_LAST, diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index a0613ff62c9..ecc0403b918 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -293,12 +293,14 @@ struct key_params { * enum survey_info_flags - survey information flags * * @SURVEY_INFO_NOISE_DBM: noise (in dBm) was filled in + * @SURVEY_INFO_IN_USE: channel is currently being used * * Used by the driver to indicate which info in &struct survey_info * it has filled in during the get_survey(). */ enum survey_info_flags { SURVEY_INFO_NOISE_DBM = 1<<0, + SURVEY_INFO_IN_USE = 1<<1, }; /** -- cgit v1.2.3-18-g5258 From ea229e682633a18c1fa2c408400a6923cfc47910 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 30 Sep 2010 21:21:25 +0200 Subject: cfg80211: remove spurious __KERNEL__ ifdef The net/cfg80211.h header file isn't exported to userspace, so there's no need for any kind of __KERNEL__ protection in it. If it was exported, everything else in it would need protection as well, not just the logging stuff ... Cc:Joe Perches Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/cfg80211.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index ecc0403b918..5f4d8acf7ab 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2553,8 +2553,6 @@ void cfg80211_cqm_rssi_notify(struct net_device *dev, enum nl80211_cqm_rssi_threshold_event rssi_event, gfp_t gfp); -#ifdef __KERNEL__ - /* Logging, debugging and troubleshooting/diagnostic helpers. */ /* wiphy_printk helpers, similar to dev_printk */ @@ -2601,6 +2599,4 @@ void cfg80211_cqm_rssi_notify(struct net_device *dev, #define wiphy_WARN(wiphy, format, args...) \ WARN(1, "wiphy: %s\n" format, wiphy_name(wiphy), ##args); -#endif - #endif /* __NET_CFG80211_H */ -- cgit v1.2.3-18-g5258 From e8347ebad2f1b15bddb6ed3ed5f767531eb52dc3 Mon Sep 17 00:00:00 2001 From: Bill Jordan Date: Fri, 1 Oct 2010 13:54:28 -0400 Subject: cfg80211: patches to allow setting the WDS peer Added a nl interface to set the peer bssid of a WDS interface. Signed-off-by: Bill Jordan Signed-off-by: John W. Linville --- include/linux/nl80211.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index edd21ae6acf..73d9390d4dd 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -387,6 +387,8 @@ * of any other interfaces, and other interfaces will again take * precedence when they are used. * + * @NL80211_CMD_SET_WDS_PEER: Set the MAC address of the peer on a WDS interface. + * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -489,6 +491,7 @@ enum nl80211_commands { NL80211_CMD_NOTIFY_CQM, NL80211_CMD_SET_CHANNEL, + NL80211_CMD_SET_WDS_PEER, /* add new commands above here */ -- cgit v1.2.3-18-g5258 From 78be49ec2a0df34de9441930fdced20311fd709f Mon Sep 17 00:00:00 2001 From: Helmut Schaa Date: Sat, 2 Oct 2010 11:31:55 +0200 Subject: mac80211: distinct between max rates and the number of rates the hw can report Some drivers cannot handle multiple retry rates specified by the rc algorithm but instead use their own retry table (for example rt2800). However, if such a device registers itself with a max_rates value of 1 the rc algorithm cannot make use of the extended information the device can provide about retried rates. On the other hand, if a device registers itself with a max_rates value > 1 the rc algorithm assumes that the device can handle multi rate retries. Fix this issue by introducing another hw parameter max_report_rates that can be set to a different value then max_rates to indicate if a device is capable of reporting more rates then specified in max_rates. Signed-off-by: Helmut Schaa Signed-off-by: Ivo van Doorn Signed-off-by: John W. Linville --- include/net/mac80211.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index fe8b9dae4de..47316a653ae 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1109,7 +1109,10 @@ enum ieee80211_hw_flags { * @sta_data_size: size (in bytes) of the drv_priv data area * within &struct ieee80211_sta. * - * @max_rates: maximum number of alternate rate retry stages + * @max_rates: maximum number of alternate rate retry stages the hw + * can handle. + * @max_report_rates: maximum number of alternate rate retry stages + * the hw can report back. * @max_rate_tries: maximum number of tries for each stage * * @napi_weight: weight used for NAPI polling. You must specify an @@ -1131,6 +1134,7 @@ struct ieee80211_hw { u16 max_listen_interval; s8 max_signal; u8 max_rates; + u8 max_report_rates; u8 max_rate_tries; }; -- cgit v1.2.3-18-g5258 From ff4c92d85c6f2777d2067f8552e7fefb4d1754ae Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 4 Oct 2010 21:14:03 +0200 Subject: genetlink: introduce pre_doit/post_doit hooks Each family may have some amount of boilerplate locking code that applies to most, or even all, commands. This allows a family to handle such things in a more generic way, by allowing it to a) include private flags in each operation b) specify a pre_doit hook that is called, before an operation's doit() callback and may return an error directly, c) specify a post_doit hook that can undo locking or similar things done by pre_doit, and finally d) include two private pointers in each info struct passed between all these operations including doit(). (It's two because I'll need two in nl80211 -- can be extended.) Signed-off-by: Johannes Berg Acked-by: David S. Miller Signed-off-by: John W. Linville --- include/net/genetlink.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include') diff --git a/include/net/genetlink.h b/include/net/genetlink.h index f7dcd2c7041..8a64b811a39 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -20,6 +20,9 @@ struct genl_multicast_group { u32 id; }; +struct genl_ops; +struct genl_info; + /** * struct genl_family - generic netlink family * @id: protocol family idenfitier @@ -29,6 +32,10 @@ struct genl_multicast_group { * @maxattr: maximum number of attributes supported * @netnsok: set to true if the family can handle network * namespaces and should be presented in all of them + * @pre_doit: called before an operation's doit callback, it may + * do additional, common, filtering and return an error + * @post_doit: called after an operation's doit callback, it may + * undo operations done by pre_doit, for example release locks * @attrbuf: buffer to store parsed attributes * @ops_list: list of all assigned operations * @family_list: family list @@ -41,6 +48,12 @@ struct genl_family { unsigned int version; unsigned int maxattr; bool netnsok; + int (*pre_doit)(struct genl_ops *ops, + struct sk_buff *skb, + struct genl_info *info); + void (*post_doit)(struct genl_ops *ops, + struct sk_buff *skb, + struct genl_info *info); struct nlattr ** attrbuf; /* private */ struct list_head ops_list; /* private */ struct list_head family_list; /* private */ @@ -55,6 +68,8 @@ struct genl_family { * @genlhdr: generic netlink message header * @userhdr: user specific header * @attrs: netlink attributes + * @_net: network namespace + * @user_ptr: user pointers */ struct genl_info { u32 snd_seq; @@ -66,6 +81,7 @@ struct genl_info { #ifdef CONFIG_NET_NS struct net * _net; #endif + void * user_ptr[2]; }; static inline struct net *genl_info_net(struct genl_info *info) @@ -81,6 +97,7 @@ static inline void genl_info_net_set(struct genl_info *info, struct net *net) /** * struct genl_ops - generic netlink operations * @cmd: command identifier + * @internal_flags: flags used by the family * @flags: flags * @policy: attribute validation policy * @doit: standard command callback @@ -90,6 +107,7 @@ static inline void genl_info_net_set(struct genl_info *info, struct net *net) */ struct genl_ops { u8 cmd; + u8 internal_flags; unsigned int flags; const struct nla_policy *policy; int (*doit)(struct sk_buff *skb, -- cgit v1.2.3-18-g5258 From 691895e7e2204be9a717809fb78d6ff7c10b470a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 5 Oct 2010 16:19:42 +0200 Subject: nl80211: fix remain-on-channel documentation The documentation for NL80211_CMD_REMAIN_ON_CHANNEL isn't accurate, an interface index is required by the command. Update it accordingly. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/nl80211.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 73d9390d4dd..c4efdfa24ed 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -315,8 +315,8 @@ * channel for the specified amount of time. This can be used to do * off-channel operations like transmit a Public Action frame and wait for * a response while being associated to an AP on another channel. - * %NL80211_ATTR_WIPHY or %NL80211_ATTR_IFINDEX is used to specify which - * radio is used. %NL80211_ATTR_WIPHY_FREQ is used to specify the + * %NL80211_ATTR_IFINDEX is used to specify which interface (and thus + * radio) is used. %NL80211_ATTR_WIPHY_FREQ is used to specify the * frequency for the operation and %NL80211_ATTR_WIPHY_CHANNEL_TYPE may be * optionally used to specify additional channel parameters. * %NL80211_ATTR_DURATION is used to specify the duration in milliseconds -- cgit v1.2.3-18-g5258 From 5336377d6225959624146629ce3fc88ee8ecda3d Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 5 Oct 2010 11:29:27 -0700 Subject: modules: Fix module_bug_list list corruption race With all the recent module loading cleanups, we've minimized the code that sits under module_mutex, fixing various deadlocks and making it possible to do most of the module loading in parallel. However, that whole conversion totally missed the rather obscure code that adds a new module to the list for BUG() handling. That code was doubly obscure because (a) the code itself lives in lib/bugs.c (for dubious reasons) and (b) it gets called from the architecture-specific "module_finalize()" rather than from generic code. Calling it from arch-specific code makes no sense what-so-ever to begin with, and is now actively wrong since that code isn't protected by the module loading lock any more. So this commit moves the "module_bug_{finalize,cleanup}()" calls away from the arch-specific code, and into the generic code - and in the process protects it with the module_mutex so that the list operations are now safe. Future fixups: - move the module list handling code into kernel/module.c where it belongs. - get rid of 'module_bug_list' and just use the regular list of modules (called 'modules' - imagine that) that we already create and maintain for other reasons. Reported-and-tested-by: Thomas Gleixner Cc: Rusty Russell Cc: Adrian Bunk Cc: Andrew Morton Cc: stable@kernel.org Signed-off-by: Linus Torvalds --- include/linux/module.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/module.h b/include/linux/module.h index 8a6b9fdc7ff..aace066bad8 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -686,17 +686,16 @@ extern int module_sysfs_initialized; #ifdef CONFIG_GENERIC_BUG -int module_bug_finalize(const Elf_Ehdr *, const Elf_Shdr *, +void module_bug_finalize(const Elf_Ehdr *, const Elf_Shdr *, struct module *); void module_bug_cleanup(struct module *); #else /* !CONFIG_GENERIC_BUG */ -static inline int module_bug_finalize(const Elf_Ehdr *hdr, +static inline void module_bug_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *mod) { - return 0; } static inline void module_bug_cleanup(struct module *mod) {} #endif /* CONFIG_GENERIC_BUG */ -- cgit v1.2.3-18-g5258 From 231d0aefd88e94129cb8fb84794f9bb788c6366e Mon Sep 17 00:00:00 2001 From: Evgeny Kuznetsov Date: Tue, 5 Oct 2010 12:47:57 +0400 Subject: wait: using uninitialized member of wait queue The "flags" member of "struct wait_queue_t" is used in several places in the kernel code without beeing initialized by init_wait(). "flags" is used in bitwise operations. If "flags" not initialized then unexpected behaviour may take place. Incorrect flags might used later in code. Added initialization of "wait_queue_t.flags" with zero value into "init_wait". Signed-off-by: Evgeny Kuznetsov [ The bit we care about does end up being initialized by both prepare_to_wait() and add_to_wait_queue(), so this doesn't seem to cause actual bugs, but is definitely the right thing to do -Linus ] Signed-off-by: Linus Torvalds --- include/linux/wait.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/wait.h b/include/linux/wait.h index 0836ccc5712..3efc9f3f43a 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -614,6 +614,7 @@ int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *key); (wait)->private = current; \ (wait)->func = autoremove_wake_function; \ INIT_LIST_HEAD(&(wait)->task_list); \ + (wait)->flags = 0; \ } while (0) /** -- cgit v1.2.3-18-g5258 From 773e3f93577ffb493fb7c39b1a6ecf39b5748e87 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 5 Oct 2010 14:03:02 -0700 Subject: rcu: move check from rcu_dereference_bh to rcu_read_lock_bh_held As suggested by Linus, push the irqs_disabled() down to the rcu_read_lock_bh_held() level so that all callers get the benefit of the correct check. Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 83af1f8d8b7..9fbc54a2585 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -454,7 +454,7 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) * Makes rcu_dereference_check() do the dirty work. */ #define rcu_dereference_bh(p) \ - rcu_dereference_check(p, rcu_read_lock_bh_held() || irqs_disabled()) + rcu_dereference_check(p, rcu_read_lock_bh_held()) /** * rcu_dereference_sched - fetch RCU-protected pointer, checking for RCU-sched -- cgit v1.2.3-18-g5258 From caf586e5f23cebb2a68cbaf288d59dbbf2d74052 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 30 Sep 2010 21:06:55 +0000 Subject: net: add a core netdev->rx_dropped counter In various situations, a device provides a packet to our stack and we drop it before it enters protocol stack : - softnet backlog full (accounted in /proc/net/softnet_stat) - bad vlan tag (not accounted) - unknown/unregistered protocol (not accounted) We can handle a per-device counter of such dropped frames at core level, and automatically adds it to the device provided stats (rx_dropped), so that standard tools can be used (ifconfig, ip link, cat /proc/net/dev) This is a generalization of commit 8990f468a (net: rx_dropped accounting), thus reverting it. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 92d81edd580..6abcef67b17 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -884,6 +884,9 @@ struct net_device { int iflink; struct net_device_stats stats; + atomic_long_t rx_dropped; /* dropped packets by core network + * Do not use this in drivers. + */ #ifdef CONFIG_WIRELESS_EXT /* List of functions to handle Wireless Extensions (instead of ioctl). -- cgit v1.2.3-18-g5258 From d6bf781712a1d25cc8987036b3a48535b331eb91 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 4 Oct 2010 06:15:44 +0000 Subject: net neigh: RCU conversion of neigh hash table David This is the first step for RCU conversion of neigh code. Next patches will convert hash_buckets[] and "struct neighbour" to RCU protected objects. Thanks [PATCH net-next] net neigh: RCU conversion of neigh hash table Instead of storing hash_buckets, hash_mask and hash_rnd in "struct neigh_table", a new structure is defined : struct neigh_hash_table { struct neighbour **hash_buckets; unsigned int hash_mask; __u32 hash_rnd; struct rcu_head rcu; }; And "struct neigh_table" has an RCU protected pointer to such a neigh_hash_table. This means the signature of (*hash)() function changed: We need to add a third parameter with the actual hash_rnd value, since this is not anymore a neigh_table field. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/neighbour.h | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 7d08fd1062f..37845dae648 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -138,13 +138,22 @@ struct pneigh_entry { * neighbour table manipulation */ +struct neigh_hash_table { + struct neighbour **hash_buckets; + unsigned int hash_mask; + __u32 hash_rnd; + struct rcu_head rcu; +}; + struct neigh_table { struct neigh_table *next; int family; int entry_size; int key_len; - __u32 (*hash)(const void *pkey, const struct net_device *); + __u32 (*hash)(const void *pkey, + const struct net_device *dev, + __u32 hash_rnd); int (*constructor)(struct neighbour *); int (*pconstructor)(struct pneigh_entry *); void (*pdestructor)(struct pneigh_entry *); @@ -165,9 +174,7 @@ struct neigh_table { unsigned long last_rand; struct kmem_cache *kmem_cachep; struct neigh_statistics __percpu *stats; - struct neighbour **hash_buckets; - unsigned int hash_mask; - __u32 hash_rnd; + struct neigh_hash_table __rcu *nht; struct pneigh_entry **phash_buckets; }; @@ -237,6 +244,7 @@ extern void pneigh_for_each(struct neigh_table *tbl, void (*cb)(struct pneigh_en struct neigh_seq_state { struct seq_net_private p; struct neigh_table *tbl; + struct neigh_hash_table *nht; void *(*neigh_sub_iter)(struct neigh_seq_state *state, struct neighbour *n, loff_t *pos); unsigned int bucket; -- cgit v1.2.3-18-g5258 From 1df6a2ebd75067aefbdf07482bf8e3d0584e04ee Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Thu, 30 Sep 2010 12:36:45 +0200 Subject: drm/ttm: Fix two race conditions + fix busy codepaths This fixes a race pointed out by Dave Airlie where we don't take a buffer object about to be destroyed off the LRU lists properly. It also fixes a rare case where a buffer object could be destroyed in the middle of an accelerated eviction. The patch also adds a utility function that can be used to prematurely release GPU memory space usage of an object waiting to be destroyed. For example during eviction or swapout. The above mentioned commit didn't queue the buffer on the delayed destroy list under some rare circumstances. It also didn't completely honor the remove_all parameter. Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=615505 http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=591061 Signed-off-by: Thomas Hellstrom Signed-off-by: Dave Airlie --- include/drm/ttm/ttm_bo_api.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h index 267a86c74e2..2040e6c4f17 100644 --- a/include/drm/ttm/ttm_bo_api.h +++ b/include/drm/ttm/ttm_bo_api.h @@ -246,9 +246,11 @@ struct ttm_buffer_object { atomic_t reserved; - /** * Members protected by the bo::lock + * In addition, setting sync_obj to anything else + * than NULL requires bo::reserved to be held. This allows for + * checking NULL while reserved but not holding bo::lock. */ void *sync_obj_arg; -- cgit v1.2.3-18-g5258 From c2952c314b4fe61820ba8fd6c949eed636140d52 Mon Sep 17 00:00:00 2001 From: Flavio Leitner Date: Tue, 5 Oct 2010 14:23:59 +0000 Subject: bonding: add retransmit membership reports tunable Allow sysadmins to configure the number of multicast membership report sent on a link failure event. Signed-off-by: Flavio Leitner Signed-off-by: David S. Miller --- include/linux/if_bonding.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/if_bonding.h b/include/linux/if_bonding.h index 2c7994372bd..a17edda8a78 100644 --- a/include/linux/if_bonding.h +++ b/include/linux/if_bonding.h @@ -84,6 +84,9 @@ #define BOND_DEFAULT_MAX_BONDS 1 /* Default maximum number of devices to support */ #define BOND_DEFAULT_TX_QUEUES 16 /* Default number of tx queues per device */ + +#define BOND_DEFAULT_RESEND_IGMP 1 /* Default number of IGMP membership reports */ + /* hashing types */ #define BOND_XMIT_POLICY_LAYER2 0 /* layer 2 (MAC only), default */ #define BOND_XMIT_POLICY_LAYER34 1 /* layer 3+4 (IP ^ (TCP || UDP)) */ -- cgit v1.2.3-18-g5258 From ebc0ffae5dfb4447e0a431ffe7fe1d467c48bbb9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 5 Oct 2010 10:41:36 +0000 Subject: fib: RCU conversion of fib_lookup() fib_lookup() converted to be called in RCU protected context, no reference taken and released on a contended cache line (fib_clntref) fib_table_lookup() and fib_semantic_match() get an additional parameter. struct fib_info gets an rcu_head field, and is freed after an rcu grace period. Stress test : (Sending 160.000.000 UDP frames on same neighbour, IP route cache disabled, dual E5540 @2.53GHz, 32bit kernel, FIB_HASH) (about same results for FIB_TRIE) Before patch : real 1m31.199s user 0m13.761s sys 23m24.780s After patch: real 1m5.375s user 0m14.997s sys 15m50.115s Before patch Profile : 13044.00 15.4% __ip_route_output_key vmlinux 8438.00 10.0% dst_destroy vmlinux 5983.00 7.1% fib_semantic_match vmlinux 5410.00 6.4% fib_rules_lookup vmlinux 4803.00 5.7% neigh_lookup vmlinux 4420.00 5.2% _raw_spin_lock vmlinux 3883.00 4.6% rt_set_nexthop vmlinux 3261.00 3.9% _raw_read_lock vmlinux 2794.00 3.3% fib_table_lookup vmlinux 2374.00 2.8% neigh_resolve_output vmlinux 2153.00 2.5% dst_alloc vmlinux 1502.00 1.8% _raw_read_lock_bh vmlinux 1484.00 1.8% kmem_cache_alloc vmlinux 1407.00 1.7% eth_header vmlinux 1406.00 1.7% ipv4_dst_destroy vmlinux 1298.00 1.5% __copy_from_user_ll vmlinux 1174.00 1.4% dev_queue_xmit vmlinux 1000.00 1.2% ip_output vmlinux After patch Profile : 13712.00 15.8% dst_destroy vmlinux 8548.00 9.9% __ip_route_output_key vmlinux 7017.00 8.1% neigh_lookup vmlinux 4554.00 5.3% fib_semantic_match vmlinux 4067.00 4.7% _raw_read_lock vmlinux 3491.00 4.0% dst_alloc vmlinux 3186.00 3.7% neigh_resolve_output vmlinux 3103.00 3.6% fib_table_lookup vmlinux 2098.00 2.4% _raw_read_lock_bh vmlinux 2081.00 2.4% kmem_cache_alloc vmlinux 2013.00 2.3% _raw_spin_lock vmlinux 1763.00 2.0% __copy_from_user_ll vmlinux 1763.00 2.0% ip_output vmlinux 1761.00 2.0% ipv4_dst_destroy vmlinux 1631.00 1.9% eth_header vmlinux 1440.00 1.7% _raw_read_unlock_bh vmlinux Reference results, if IP route cache is enabled : real 0m29.718s user 0m10.845s sys 7m37.341s 25213.00 29.5% __ip_route_output_key vmlinux 9011.00 10.5% dst_release vmlinux 4817.00 5.6% ip_push_pending_frames vmlinux 4232.00 5.0% ip_finish_output vmlinux 3940.00 4.6% udp_sendmsg vmlinux 3730.00 4.4% __copy_from_user_ll vmlinux 3716.00 4.4% ip_route_output_flow vmlinux 2451.00 2.9% __xfrm_lookup vmlinux 2221.00 2.6% ip_append_data vmlinux 1718.00 2.0% _raw_spin_lock_bh vmlinux 1655.00 1.9% __alloc_skb vmlinux 1572.00 1.8% sock_wfree vmlinux 1345.00 1.6% kfree vmlinux Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/fib_rules.h | 2 ++ include/net/ip_fib.h | 17 ++++------------- 2 files changed, 6 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index ac2fd002812..106f3097d38 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -31,6 +31,8 @@ struct fib_lookup_arg { void *lookup_ptr; void *result; struct fib_rule *rule; + int flags; +#define FIB_LOOKUP_NOREF 1 }; struct fib_rules_ops { diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index c93f94edc61..ba3666d3176 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -86,6 +86,7 @@ struct fib_info { #ifdef CONFIG_IP_ROUTE_MULTIPATH int fib_power; #endif + struct rcu_head rcu; struct fib_nh fib_nh[0]; #define fib_dev fib_nh[0].nh_dev }; @@ -148,7 +149,7 @@ struct fib_table { }; extern int fib_table_lookup(struct fib_table *tb, const struct flowi *flp, - struct fib_result *res); + struct fib_result *res, int fib_flags); extern int fib_table_insert(struct fib_table *, struct fib_config *); extern int fib_table_delete(struct fib_table *, struct fib_config *); extern int fib_table_dump(struct fib_table *table, struct sk_buff *skb, @@ -185,11 +186,11 @@ static inline int fib_lookup(struct net *net, const struct flowi *flp, struct fib_table *table; table = fib_get_table(net, RT_TABLE_LOCAL); - if (!fib_table_lookup(table, flp, res)) + if (!fib_table_lookup(table, flp, res, FIB_LOOKUP_NOREF)) return 0; table = fib_get_table(net, RT_TABLE_MAIN); - if (!fib_table_lookup(table, flp, res)) + if (!fib_table_lookup(table, flp, res, FIB_LOOKUP_NOREF)) return 0; return -ENETUNREACH; } @@ -254,16 +255,6 @@ static inline void fib_info_put(struct fib_info *fi) free_fib_info(fi); } -static inline void fib_res_put(struct fib_result *res) -{ - if (res->fi) - fib_info_put(res->fi); -#ifdef CONFIG_IP_MULTIPLE_TABLES - if (res->r) - fib_rule_put(res->r); -#endif -} - #ifdef CONFIG_PROC_FS extern int __net_init fib_proc_init(struct net *net); extern void __net_exit fib_proc_exit(struct net *net); -- cgit v1.2.3-18-g5258 From ab4d5ed5eeda4f57c50d14131ce1b1da75d0c938 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Tue, 5 Oct 2010 13:57:26 -0500 Subject: slub: Enable sysfs support for !CONFIG_SLUB_DEBUG Currently disabling CONFIG_SLUB_DEBUG also disabled SYSFS support meaning that the slabs cannot be tuned without DEBUG. Make SYSFS support independent of CONFIG_SLUB_DEBUG Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- include/linux/slub_def.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index b33c0f2e61d..e4f5ed180b9 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -87,7 +87,7 @@ struct kmem_cache { unsigned long min_partial; const char *name; /* Name (only for display!) */ struct list_head list; /* List of slab caches */ -#ifdef CONFIG_SLUB_DEBUG +#ifdef CONFIG_SYSFS struct kobject kobj; /* For sysfs */ #endif -- cgit v1.2.3-18-g5258 From e31b82136d1adc7a599b6e99d3321e5831841f5a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 5 Oct 2010 19:39:30 +0200 Subject: cfg80211/mac80211: allow per-station GTKs This adds API to allow adding per-station GTKs, updates mac80211 to support it, and also allows drivers to remove a key from hwaccel again when this may be necessary due to multiple GTKs. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/nl80211.h | 12 ++++++++++++ include/net/cfg80211.h | 9 ++++++--- include/net/mac80211.h | 24 ++++++++++++++++++++++++ 3 files changed, 42 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index c4efdfa24ed..e451f176e66 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -801,6 +801,9 @@ enum nl80211_commands { * This is used in association with @NL80211_ATTR_WIPHY_TX_POWER_SETTING * for non-automatic settings. * + * @NL80211_ATTR_SUPPORT_IBSS_RSN: The device supports IBSS RSN, which mostly + * means support for per-station GTKs. + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -968,6 +971,8 @@ enum nl80211_attrs { NL80211_ATTR_CONTROL_PORT_ETHERTYPE, NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT, + NL80211_ATTR_SUPPORT_IBSS_RSN, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -1659,11 +1664,14 @@ enum nl80211_auth_type { * @NL80211_KEYTYPE_GROUP: Group (broadcast/multicast) key * @NL80211_KEYTYPE_PAIRWISE: Pairwise (unicast/individual) key * @NL80211_KEYTYPE_PEERKEY: PeerKey (DLS) + * @NUM_NL80211_KEYTYPES: number of defined key types */ enum nl80211_key_type { NL80211_KEYTYPE_GROUP, NL80211_KEYTYPE_PAIRWISE, NL80211_KEYTYPE_PEERKEY, + + NUM_NL80211_KEYTYPES }; /** @@ -1694,6 +1702,9 @@ enum nl80211_wpa_versions { * CCMP keys, each six bytes in little endian * @NL80211_KEY_DEFAULT: flag indicating default key * @NL80211_KEY_DEFAULT_MGMT: flag indicating default management key + * @NL80211_KEY_TYPE: the key type from enum nl80211_key_type, if not + * specified the default depends on whether a MAC address was + * given with the command using the key or not (u32) * @__NL80211_KEY_AFTER_LAST: internal * @NL80211_KEY_MAX: highest key attribute */ @@ -1705,6 +1716,7 @@ enum nl80211_key_attributes { NL80211_KEY_SEQ, NL80211_KEY_DEFAULT, NL80211_KEY_DEFAULT_MGMT, + NL80211_KEY_TYPE, /* keep last */ __NL80211_KEY_AFTER_LAST, diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 5f4d8acf7ab..0f77515266b 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1130,13 +1130,14 @@ struct cfg80211_ops { struct vif_params *params); int (*add_key)(struct wiphy *wiphy, struct net_device *netdev, - u8 key_index, const u8 *mac_addr, + u8 key_index, bool pairwise, const u8 *mac_addr, struct key_params *params); int (*get_key)(struct wiphy *wiphy, struct net_device *netdev, - u8 key_index, const u8 *mac_addr, void *cookie, + u8 key_index, bool pairwise, const u8 *mac_addr, + void *cookie, void (*callback)(void *cookie, struct key_params*)); int (*del_key)(struct wiphy *wiphy, struct net_device *netdev, - u8 key_index, const u8 *mac_addr); + u8 key_index, bool pairwise, const u8 *mac_addr); int (*set_default_key)(struct wiphy *wiphy, struct net_device *netdev, u8 key_index); @@ -1304,6 +1305,7 @@ struct cfg80211_ops { * @WIPHY_FLAG_CONTROL_PORT_PROTOCOL: This device supports setting the * control port protocol ethertype. The device also honours the * control_port_no_encrypt flag. + * @WIPHY_FLAG_IBSS_RSN: The device supports IBSS RSN. */ enum wiphy_flags { WIPHY_FLAG_CUSTOM_REGULATORY = BIT(0), @@ -1314,6 +1316,7 @@ enum wiphy_flags { WIPHY_FLAG_4ADDR_AP = BIT(5), WIPHY_FLAG_4ADDR_STATION = BIT(6), WIPHY_FLAG_CONTROL_PORT_PROTOCOL = BIT(7), + WIPHY_FLAG_IBSS_RSN = BIT(7), }; struct mac_address { diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 47316a653ae..33aa2e39147 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1041,6 +1041,13 @@ enum ieee80211_tkip_key_type { * @IEEE80211_HW_NEED_DTIM_PERIOD: * This device needs to know the DTIM period for the BSS before * associating. + * + * @IEEE80211_HW_SUPPORTS_PER_STA_GTK: The device's crypto engine supports + * per-station GTKs as used by IBSS RSN or during fast transition. If + * the device doesn't support per-station GTKs, but can be asked not + * to decrypt group addressed frames, then IBSS RSN support is still + * possible but software crypto will be used. Advertise the wiphy flag + * only in that case. */ enum ieee80211_hw_flags { IEEE80211_HW_HAS_RATE_CONTROL = 1<<0, @@ -1064,6 +1071,7 @@ enum ieee80211_hw_flags { IEEE80211_HW_REPORTS_TX_ACK_STATUS = 1<<18, IEEE80211_HW_CONNECTION_MONITOR = 1<<19, IEEE80211_HW_SUPPORTS_CQM_RSSI = 1<<20, + IEEE80211_HW_SUPPORTS_PER_STA_GTK = 1<<21, }; /** @@ -2582,6 +2590,22 @@ void ieee80211_chswitch_done(struct ieee80211_vif *vif, bool success); void ieee80211_request_smps(struct ieee80211_vif *vif, enum ieee80211_smps_mode smps_mode); +/** + * ieee80211_key_removed - disable hw acceleration for key + * @key_conf: The key hw acceleration should be disabled for + * + * This allows drivers to indicate that the given key has been + * removed from hardware acceleration, due to a new key that + * was added. Don't use this if the key can continue to be used + * for TX, if the key restriction is on RX only it is permitted + * to keep the key for TX only and not call this function. + * + * Due to locking constraints, it may only be called during + * @set_key. This function must be allowed to sleep, and the + * key it tries to disable may still be used until it returns. + */ +void ieee80211_key_removed(struct ieee80211_key_conf *key_conf); + /* Rate control API */ /** -- cgit v1.2.3-18-g5258 From b206b4ef062d83c0875a085672ed50e8c8b01521 Mon Sep 17 00:00:00 2001 From: Bruno Randolf Date: Wed, 6 Oct 2010 18:34:12 +0900 Subject: nl80211/mac80211: Add retry and failed transmission count to station info This information is already available in mac80211, we just need to export it via cfg80211 and nl80211. Signed-off-by: Bruno Randolf Acked-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/nl80211.h | 4 ++++ include/net/cfg80211.h | 8 ++++++++ 2 files changed, 12 insertions(+) (limited to 'include') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index e451f176e66..c08709fe36f 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -1137,6 +1137,8 @@ enum nl80211_rate_info { * @NL80211_STA_INFO_RX_PACKETS: total received packet (u32, from this station) * @NL80211_STA_INFO_TX_PACKETS: total transmitted packets (u32, to this * station) + * @NL80211_STA_INFO_TX_RETRIES: total retries (u32, to this station) + * @NL80211_STA_INFO_TX_FAILED: total failed packets (u32, to this station) */ enum nl80211_sta_info { __NL80211_STA_INFO_INVALID, @@ -1150,6 +1152,8 @@ enum nl80211_sta_info { NL80211_STA_INFO_TX_BITRATE, NL80211_STA_INFO_RX_PACKETS, NL80211_STA_INFO_TX_PACKETS, + NL80211_STA_INFO_TX_RETRIES, + NL80211_STA_INFO_TX_FAILED, /* keep last */ __NL80211_STA_INFO_AFTER_LAST, diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 0f77515266b..e76daaa7dc2 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -401,6 +401,8 @@ struct station_parameters { * (tx_bitrate, tx_bitrate_flags and tx_bitrate_mcs) * @STATION_INFO_RX_PACKETS: @rx_packets filled * @STATION_INFO_TX_PACKETS: @tx_packets filled + * @STATION_INFO_TX_RETRIES: @tx_retries filled + * @STATION_INFO_TX_FAILED: @tx_failed filled */ enum station_info_flags { STATION_INFO_INACTIVE_TIME = 1<<0, @@ -413,6 +415,8 @@ enum station_info_flags { STATION_INFO_TX_BITRATE = 1<<7, STATION_INFO_RX_PACKETS = 1<<8, STATION_INFO_TX_PACKETS = 1<<9, + STATION_INFO_TX_RETRIES = 1<<10, + STATION_INFO_TX_FAILED = 1<<11, }; /** @@ -462,6 +466,8 @@ struct rate_info { * @txrate: current unicast bitrate to this station * @rx_packets: packets received from this station * @tx_packets: packets transmitted to this station + * @tx_retries: cumulative retry counts + * @tx_failed: number of failed transmissions (retries exceeded, no ACK) * @generation: generation number for nl80211 dumps. * This number should increase every time the list of stations * changes, i.e. when a station is added or removed, so that @@ -479,6 +485,8 @@ struct station_info { struct rate_info txrate; u32 rx_packets; u32 tx_packets; + u32 tx_retries; + u32 tx_failed; int generation; }; -- cgit v1.2.3-18-g5258 From 767e97e1e0db0d0f3152cd2f3bd3403596aedbad Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 6 Oct 2010 17:49:21 -0700 Subject: neigh: RCU conversion of struct neighbour This is the second step for neighbour RCU conversion. (first was commit d6bf7817 : RCU conversion of neigh hash table) neigh_lookup() becomes lockless, but still take a reference on found neighbour. (no more read_lock()/read_unlock() on tbl->lock) struct neighbour gets an additional rcu_head field and is freed after an RCU grace period. Future work would need to eventually not take a reference on neighbour for temporary dst (DST_NOCACHE), but this would need dst->_neighbour to use a noref bit like we did for skb->_dst. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/neighbour.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 37845dae648..a4538d55370 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -91,7 +91,7 @@ struct neigh_statistics { #define NEIGH_CACHE_STAT_INC(tbl, field) this_cpu_inc((tbl)->stats->field) struct neighbour { - struct neighbour *next; + struct neighbour __rcu *next; struct neigh_table *tbl; struct neigh_parms *parms; struct net_device *dev; @@ -111,6 +111,7 @@ struct neighbour { struct sk_buff_head arp_queue; struct timer_list timer; const struct neigh_ops *ops; + struct rcu_head rcu; u8 primary_key[0]; }; @@ -139,7 +140,7 @@ struct pneigh_entry { */ struct neigh_hash_table { - struct neighbour **hash_buckets; + struct neighbour __rcu **hash_buckets; unsigned int hash_mask; __u32 hash_rnd; struct rcu_head rcu; -- cgit v1.2.3-18-g5258 From 430c62fb2948d964cf8dc7f3e2f69623c04ef62f Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 7 Oct 2010 09:35:16 +0200 Subject: elevator: fix oops on early call to elevator_change() 2.6.36 introduces an API for drivers to switch the IO scheduler instead of manually calling the elevator exit and init functions. This API was added since q->elevator must be cleared in between those two calls. And since we already have this functionality directly from use by the sysfs interface to switch schedulers online, it was prudent to reuse it internally too. But this API needs the queue to be in a fully initialized state before it is called, or it will attempt to unregister elevator kobjects before they have been added. This results in an oops like this: BUG: unable to handle kernel NULL pointer dereference at 0000000000000051 IP: [] sysfs_create_dir+0x2e/0xc0 PGD 47ddfc067 PUD 47c6a1067 PMD 0 Oops: 0000 [#1] PREEMPT SMP last sysfs file: /sys/devices/pci0000:00/0000:00:02.0/0000:04:00.1/irq CPU 2 Modules linked in: t(+) loop hid_apple usbhid ahci ehci_hcd uhci_hcd libahci usbcore nls_base igb Pid: 7319, comm: modprobe Not tainted 2.6.36-rc6+ #132 QSSC-S4R/QSSC-S4R RIP: 0010:[] [] sysfs_create_dir+0x2e/0xc0 RSP: 0018:ffff88027da25d08 EFLAGS: 00010246 RAX: ffff88047c68c528 RBX: 00000000fffffffe RCX: 0000000000000000 RDX: 000000000000002f RSI: 000000000000002f RDI: ffff88047e196c88 RBP: ffff88027da25d38 R08: 0000000000000000 R09: d84156c5635688c0 R10: d84156c5635688c0 R11: 0000000000000000 R12: ffff88047e196c88 R13: 0000000000000000 R14: 0000000000000000 R15: ffff88047c68c528 FS: 00007fcb0b26f6e0(0000) GS:ffff880287400000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 0000000000000051 CR3: 000000047e76e000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process modprobe (pid: 7319, threadinfo ffff88027da24000, task ffff88027d377090) Stack: ffff88027da25d58 ffff88047c68c528 00000000fffffffe ffff88047e196c88 <0> ffff88047c68c528 ffff88047e05bd90 ffff88027da25d78 ffffffff8123fb77 <0> ffff88047e05bd90 0000000000000000 ffff88047e196c88 ffff88047c68c528 Call Trace: [] kobject_add_internal+0xe7/0x1f0 [] kobject_add_varg+0x38/0x60 [] kobject_add+0x69/0x90 [] ? sysfs_remove_dir+0x20/0xa0 [] ? sub_preempt_count+0x9d/0xe0 [] ? _raw_spin_unlock+0x30/0x50 [] ? sysfs_remove_dir+0x20/0xa0 [] ? sysfs_remove_dir+0x34/0xa0 [] elv_register_queue+0x34/0xa0 [] elevator_change+0xfd/0x250 [] ? t_init+0x0/0x361 [t] [] ? t_init+0x0/0x361 [t] [] t_init+0xa8/0x361 [t] [] do_one_initcall+0x3e/0x170 [] sys_init_module+0xbd/0x220 [] system_call_fastpath+0x16/0x1b Code: e5 41 56 41 55 41 54 49 89 fc 53 48 83 ec 10 48 85 ff 74 52 48 8b 47 18 49 c7 c5 00 46 61 81 48 85 c0 74 04 4c 8b 68 30 45 31 f6 <41> 80 7d 51 00 74 0e 49 8b 44 24 28 4c 89 e7 ff 50 20 49 89 c6 RIP [] sysfs_create_dir+0x2e/0xc0 RSP CR2: 0000000000000051 ---[ end trace a6541d3bf07945df ]--- Fix this by adding a registered bit to the elevator queue, which is set when the sysfs kobjects have been registered. Signed-off-by: Jens Axboe --- include/linux/elevator.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 926b50322a4..4fd978e7eb8 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -93,6 +93,7 @@ struct elevator_queue struct elevator_type *elevator_type; struct mutex sysfs_lock; struct hlist_head *hash; + unsigned int registered:1; }; /* -- cgit v1.2.3-18-g5258 From bcdb714c8856c76383ca455294f0074168705eab Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 7 Oct 2010 14:08:53 +0100 Subject: Drop a couple of unnecessary asm/system.h inclusions Drop inclusions of asm/system.h from linux/hardirq.h and linux/list.h as they're no longer required and prevent the M68K arch's IRQ flag handling macros from being made into inlined functions due to circular dependencies. Signed-off-by: David Howells Acked-by: Greg Ungerer Acked-by: Geert Uytterhoeven --- include/linux/hardirq.h | 1 - include/linux/list.h | 1 - 2 files changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index d5b387669da..7dfdc06c7e1 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -8,7 +8,6 @@ #include #include #include -#include /* * We put the hardirq and softirq counter into the preemption diff --git a/include/linux/list.h b/include/linux/list.h index d167b5d7c0a..88a000617d7 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -5,7 +5,6 @@ #include #include #include -#include /* * Simple doubly linked list implementation. -- cgit v1.2.3-18-g5258 From df9ee29270c11dba7d0fe0b83ce47a4d8e8d2101 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 7 Oct 2010 14:08:55 +0100 Subject: Fix IRQ flag handling naming Fix the IRQ flag handling naming. In linux/irqflags.h under one configuration, it maps: local_irq_enable() -> raw_local_irq_enable() local_irq_disable() -> raw_local_irq_disable() local_irq_save() -> raw_local_irq_save() ... and under the other configuration, it maps: raw_local_irq_enable() -> local_irq_enable() raw_local_irq_disable() -> local_irq_disable() raw_local_irq_save() -> local_irq_save() ... This is quite confusing. There should be one set of names expected of the arch, and this should be wrapped to give another set of names that are expected by users of this facility. Change this to have the arch provide: flags = arch_local_save_flags() flags = arch_local_irq_save() arch_local_irq_restore(flags) arch_local_irq_disable() arch_local_irq_enable() arch_irqs_disabled_flags(flags) arch_irqs_disabled() arch_safe_halt() Then linux/irqflags.h wraps these to provide: raw_local_save_flags(flags) raw_local_irq_save(flags) raw_local_irq_restore(flags) raw_local_irq_disable() raw_local_irq_enable() raw_irqs_disabled_flags(flags) raw_irqs_disabled() raw_safe_halt() with type checking on the flags 'arguments', and then wraps those to provide: local_save_flags(flags) local_irq_save(flags) local_irq_restore(flags) local_irq_disable() local_irq_enable() irqs_disabled_flags(flags) irqs_disabled() safe_halt() with tracing included if enabled. The arch functions can now all be inline functions rather than some of them having to be macros. Signed-off-by: David Howells [X86, FRV, MN10300] Signed-off-by: Chris Metcalf [Tile] Signed-off-by: Michal Simek [Microblaze] Tested-by: Catalin Marinas [ARM] Acked-by: Thomas Gleixner Acked-by: Haavard Skinnemoen [AVR] Acked-by: Tony Luck [IA-64] Acked-by: Hirokazu Takata [M32R] Acked-by: Greg Ungerer [M68K/M68KNOMMU] Acked-by: Ralf Baechle [MIPS] Acked-by: Kyle McMartin [PA-RISC] Acked-by: Paul Mackerras [PowerPC] Acked-by: Martin Schwidefsky [S390] Acked-by: Chen Liqin [Score] Acked-by: Matt Fleming [SH] Acked-by: David S. Miller [Sparc] Acked-by: Chris Zankel [Xtensa] Reviewed-by: Richard Henderson [Alpha] Reviewed-by: Yoshinori Sato [H8300] Cc: starvik@axis.com [CRIS] Cc: jesper.nilsson@axis.com [CRIS] Cc: linux-cris-kernel@axis.com --- include/asm-generic/atomic.h | 5 +- include/asm-generic/cmpxchg-local.h | 1 + include/asm-generic/hardirq.h | 1 - include/asm-generic/irqflags.h | 52 ++++++++---------- include/linux/irqflags.h | 107 +++++++++++++++++++++--------------- include/linux/spinlock.h | 1 + 6 files changed, 92 insertions(+), 75 deletions(-) (limited to 'include') diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h index e53347fbf1d..fd57b8477fa 100644 --- a/include/asm-generic/atomic.h +++ b/include/asm-generic/atomic.h @@ -43,6 +43,7 @@ */ #define atomic_set(v, i) (((v)->counter) = (i)) +#include #include /** @@ -57,7 +58,7 @@ static inline int atomic_add_return(int i, atomic_t *v) unsigned long flags; int temp; - raw_local_irq_save(flags); /* Don't trace it in a irqsoff handler */ + raw_local_irq_save(flags); /* Don't trace it in an irqsoff handler */ temp = v->counter; temp += i; v->counter = temp; @@ -78,7 +79,7 @@ static inline int atomic_sub_return(int i, atomic_t *v) unsigned long flags; int temp; - raw_local_irq_save(flags); /* Don't trace it in a irqsoff handler */ + raw_local_irq_save(flags); /* Don't trace it in an irqsoff handler */ temp = v->counter; temp -= i; v->counter = temp; diff --git a/include/asm-generic/cmpxchg-local.h b/include/asm-generic/cmpxchg-local.h index b2ba2fc8829..2533fddd34a 100644 --- a/include/asm-generic/cmpxchg-local.h +++ b/include/asm-generic/cmpxchg-local.h @@ -2,6 +2,7 @@ #define __ASM_GENERIC_CMPXCHG_LOCAL_H #include +#include extern unsigned long wrong_size_cmpxchg(volatile void *ptr); diff --git a/include/asm-generic/hardirq.h b/include/asm-generic/hardirq.h index 62f59080e5c..c0771aa248c 100644 --- a/include/asm-generic/hardirq.h +++ b/include/asm-generic/hardirq.h @@ -3,7 +3,6 @@ #include #include -#include typedef struct { unsigned int __softirq_pending; diff --git a/include/asm-generic/irqflags.h b/include/asm-generic/irqflags.h index 9aebf618275..1f40d0024cf 100644 --- a/include/asm-generic/irqflags.h +++ b/include/asm-generic/irqflags.h @@ -5,68 +5,62 @@ * All architectures should implement at least the first two functions, * usually inline assembly will be the best way. */ -#ifndef RAW_IRQ_DISABLED -#define RAW_IRQ_DISABLED 0 -#define RAW_IRQ_ENABLED 1 +#ifndef ARCH_IRQ_DISABLED +#define ARCH_IRQ_DISABLED 0 +#define ARCH_IRQ_ENABLED 1 #endif /* read interrupt enabled status */ -#ifndef __raw_local_save_flags -unsigned long __raw_local_save_flags(void); +#ifndef arch_local_save_flags +unsigned long arch_local_save_flags(void); #endif /* set interrupt enabled status */ -#ifndef raw_local_irq_restore -void raw_local_irq_restore(unsigned long flags); +#ifndef arch_local_irq_restore +void arch_local_irq_restore(unsigned long flags); #endif /* get status and disable interrupts */ -#ifndef __raw_local_irq_save -static inline unsigned long __raw_local_irq_save(void) +#ifndef arch_local_irq_save +static inline unsigned long arch_local_irq_save(void) { unsigned long flags; - flags = __raw_local_save_flags(); - raw_local_irq_restore(RAW_IRQ_DISABLED); + flags = arch_local_save_flags(); + arch_local_irq_restore(ARCH_IRQ_DISABLED); return flags; } #endif /* test flags */ -#ifndef raw_irqs_disabled_flags -static inline int raw_irqs_disabled_flags(unsigned long flags) +#ifndef arch_irqs_disabled_flags +static inline int arch_irqs_disabled_flags(unsigned long flags) { - return flags == RAW_IRQ_DISABLED; + return flags == ARCH_IRQ_DISABLED; } #endif /* unconditionally enable interrupts */ -#ifndef raw_local_irq_enable -static inline void raw_local_irq_enable(void) +#ifndef arch_local_irq_enable +static inline void arch_local_irq_enable(void) { - raw_local_irq_restore(RAW_IRQ_ENABLED); + arch_local_irq_restore(ARCH_IRQ_ENABLED); } #endif /* unconditionally disable interrupts */ -#ifndef raw_local_irq_disable -static inline void raw_local_irq_disable(void) +#ifndef arch_local_irq_disable +static inline void arch_local_irq_disable(void) { - raw_local_irq_restore(RAW_IRQ_DISABLED); + arch_local_irq_restore(ARCH_IRQ_DISABLED); } #endif /* test hardware interrupt enable bit */ -#ifndef raw_irqs_disabled -static inline int raw_irqs_disabled(void) +#ifndef arch_irqs_disabled +static inline int arch_irqs_disabled(void) { - return raw_irqs_disabled_flags(__raw_local_save_flags()); + return arch_irqs_disabled_flags(arch_local_save_flags()); } #endif -#define raw_local_save_flags(flags) \ - do { (flags) = __raw_local_save_flags(); } while (0) - -#define raw_local_irq_save(flags) \ - do { (flags) = __raw_local_irq_save(); } while (0) - #endif /* __ASM_GENERIC_IRQFLAGS_H */ diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index 006bf45eae3..d176d658fe2 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -12,6 +12,7 @@ #define _LINUX_TRACE_IRQFLAGS_H #include +#include #ifdef CONFIG_TRACE_IRQFLAGS extern void trace_softirqs_on(unsigned long ip); @@ -52,17 +53,45 @@ # define start_critical_timings() do { } while (0) #endif -#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT - -#include +/* + * Wrap the arch provided IRQ routines to provide appropriate checks. + */ +#define raw_local_irq_disable() arch_local_irq_disable() +#define raw_local_irq_enable() arch_local_irq_enable() +#define raw_local_irq_save(flags) \ + do { \ + typecheck(unsigned long, flags); \ + flags = arch_local_irq_save(); \ + } while (0) +#define raw_local_irq_restore(flags) \ + do { \ + typecheck(unsigned long, flags); \ + arch_local_irq_restore(flags); \ + } while (0) +#define raw_local_save_flags(flags) \ + do { \ + typecheck(unsigned long, flags); \ + flags = arch_local_save_flags(); \ + } while (0) +#define raw_irqs_disabled_flags(flags) \ + ({ \ + typecheck(unsigned long, flags); \ + arch_irqs_disabled_flags(flags); \ + }) +#define raw_irqs_disabled() (arch_irqs_disabled()) +#define raw_safe_halt() arch_safe_halt() +/* + * The local_irq_*() APIs are equal to the raw_local_irq*() + * if !TRACE_IRQFLAGS. + */ +#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT #define local_irq_enable() \ do { trace_hardirqs_on(); raw_local_irq_enable(); } while (0) #define local_irq_disable() \ do { raw_local_irq_disable(); trace_hardirqs_off(); } while (0) #define local_irq_save(flags) \ do { \ - typecheck(unsigned long, flags); \ raw_local_irq_save(flags); \ trace_hardirqs_off(); \ } while (0) @@ -70,7 +99,6 @@ #define local_irq_restore(flags) \ do { \ - typecheck(unsigned long, flags); \ if (raw_irqs_disabled_flags(flags)) { \ raw_local_irq_restore(flags); \ trace_hardirqs_off(); \ @@ -79,51 +107,44 @@ raw_local_irq_restore(flags); \ } \ } while (0) -#else /* !CONFIG_TRACE_IRQFLAGS_SUPPORT */ -/* - * The local_irq_*() APIs are equal to the raw_local_irq*() - * if !TRACE_IRQFLAGS. - */ -# define raw_local_irq_disable() local_irq_disable() -# define raw_local_irq_enable() local_irq_enable() -# define raw_local_irq_save(flags) \ - do { \ - typecheck(unsigned long, flags); \ - local_irq_save(flags); \ - } while (0) -# define raw_local_irq_restore(flags) \ +#define local_save_flags(flags) \ do { \ - typecheck(unsigned long, flags); \ - local_irq_restore(flags); \ + raw_local_save_flags(flags); \ } while (0) -#endif /* CONFIG_TRACE_IRQFLAGS_SUPPORT */ -#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT -#define safe_halt() \ - do { \ - trace_hardirqs_on(); \ - raw_safe_halt(); \ - } while (0) +#define irqs_disabled_flags(flags) \ + ({ \ + raw_irqs_disabled_flags(flags); \ + }) -#define local_save_flags(flags) \ - do { \ - typecheck(unsigned long, flags); \ - raw_local_save_flags(flags); \ +#define irqs_disabled() \ + ({ \ + unsigned long _flags; \ + raw_local_save_flags(_flags); \ + raw_irqs_disabled_flags(_flags); \ + }) + +#define safe_halt() \ + do { \ + trace_hardirqs_on(); \ + raw_safe_halt(); \ } while (0) -#define irqs_disabled() \ -({ \ - unsigned long _flags; \ - \ - raw_local_save_flags(_flags); \ - raw_irqs_disabled_flags(_flags); \ -}) -#define irqs_disabled_flags(flags) \ -({ \ - typecheck(unsigned long, flags); \ - raw_irqs_disabled_flags(flags); \ -}) +#else /* !CONFIG_TRACE_IRQFLAGS_SUPPORT */ + +#define local_irq_enable() do { raw_local_irq_enable(); } while (0) +#define local_irq_disable() do { raw_local_irq_disable(); } while (0) +#define local_irq_save(flags) \ + do { \ + raw_local_irq_save(flags); \ + } while (0) +#define local_irq_restore(flags) do { raw_local_irq_restore(flags); } while (0) +#define local_save_flags(flags) do { raw_local_save_flags(flags); } while (0) +#define irqs_disabled() (raw_irqs_disabled()) +#define irqs_disabled_flags(flags) (raw_irqs_disabled_flags(flags)) +#define safe_halt() do { raw_safe_halt(); } while (0) + #endif /* CONFIG_TRACE_IRQFLAGS_SUPPORT */ #endif diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index f8854655860..80e535897de 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -50,6 +50,7 @@ #include #include #include +#include #include #include #include -- cgit v1.2.3-18-g5258 From 388ac775be95e510c2095ed6cd59422a5183a9fb Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 7 Oct 2010 13:11:09 +0200 Subject: cfg80211: constify WDS address There's no need for the WDS peer address to not be const, so make it const. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/cfg80211.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index e76daaa7dc2..0778d04b3bb 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1229,7 +1229,7 @@ struct cfg80211_ops { int (*get_tx_power)(struct wiphy *wiphy, int *dbm); int (*set_wds_peer)(struct wiphy *wiphy, struct net_device *dev, - u8 *addr); + const u8 *addr); void (*rfkill_poll)(struct wiphy *wiphy); -- cgit v1.2.3-18-g5258 From 43ca910a9c90566308f39f51ac03a55f94a5f83c Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Wed, 15 Sep 2010 16:52:32 -0500 Subject: [SCSI] fc class: add fc host dev loss sysfs file This adds a fc host dev loss sysfs file. Instead of calling into the driver using the get_host_def_dev_loss_tmo callback, we allow drivers to init the dev loss like is done for other fc host params, and then the fc class will handle updating the value if the user writes to the new sysfs file. Signed-off-by: Mike Christie Signed-off-by: James Bottomley --- include/scsi/scsi_transport_fc.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/scsi/scsi_transport_fc.h b/include/scsi/scsi_transport_fc.h index 9f98fca9b76..59816fe31e6 100644 --- a/include/scsi/scsi_transport_fc.h +++ b/include/scsi/scsi_transport_fc.h @@ -496,7 +496,7 @@ struct fc_host_attrs { u64 fabric_name; char symbolic_name[FC_SYMBOLIC_NAME_SIZE]; char system_hostname[FC_SYMBOLIC_NAME_SIZE]; - u32 def_dev_loss_tmo; + u32 dev_loss_tmo; /* Private (Transport-managed) Attributes */ enum fc_tgtid_binding_type tgtid_bind_type; @@ -581,8 +581,8 @@ struct fc_host_attrs { (((struct fc_host_attrs *)(x)->shost_data)->devloss_work_q_name) #define fc_host_devloss_work_q(x) \ (((struct fc_host_attrs *)(x)->shost_data)->devloss_work_q) -#define fc_host_def_dev_loss_tmo(x) \ - (((struct fc_host_attrs *)(x)->shost_data)->def_dev_loss_tmo) +#define fc_host_dev_loss_tmo(x) \ + (((struct fc_host_attrs *)(x)->shost_data)->dev_loss_tmo) struct fc_bsg_buffer { @@ -643,7 +643,6 @@ struct fc_function_template { void (*get_host_fabric_name)(struct Scsi_Host *); void (*get_host_symbolic_name)(struct Scsi_Host *); void (*set_host_system_hostname)(struct Scsi_Host *); - void (*get_host_def_dev_loss_tmo)(struct Scsi_Host *); struct fc_host_statistics * (*get_fc_host_stats)(struct Scsi_Host *); void (*reset_fc_host_stats)(struct Scsi_Host *); -- cgit v1.2.3-18-g5258 From 01723a9566f9e9ce4c75e5c4c9f6dc20600871a7 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 7 Sep 2010 22:43:19 +0100 Subject: ARM: 6368/1: move the PrimeCell IDs to use macros This make four macros for the PrimeCell ID register available to drivers that use them witout using the PrimeCell/AMBA bus abstraction and struct amba_device. It also moves the magic PrimeCell CID "B105F00D" to the bus.h header file. Signed-off-by: Linus Walleij Signed-off-by: Russell King --- include/linux/amba/bus.h | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/amba/bus.h b/include/linux/amba/bus.h index b0c17401243..c6454cca044 100644 --- a/include/linux/amba/bus.h +++ b/include/linux/amba/bus.h @@ -20,6 +20,7 @@ #include #define AMBA_NR_IRQS 2 +#define AMBA_CID 0xb105f00d struct clk; @@ -70,9 +71,15 @@ void amba_release_regions(struct amba_device *); #define amba_pclk_disable(d) \ do { if (!IS_ERR((d)->pclk)) clk_disable((d)->pclk); } while (0) -#define amba_config(d) (((d)->periphid >> 24) & 0xff) -#define amba_rev(d) (((d)->periphid >> 20) & 0x0f) -#define amba_manf(d) (((d)->periphid >> 12) & 0xff) -#define amba_part(d) ((d)->periphid & 0xfff) +/* Some drivers don't use the struct amba_device */ +#define AMBA_CONFIG_BITS(a) (((a) >> 24) & 0xff) +#define AMBA_REV_BITS(a) (((a) >> 20) & 0x0f) +#define AMBA_MANF_BITS(a) (((a) >> 12) & 0xff) +#define AMBA_PART_BITS(a) ((a) & 0xfff) + +#define amba_config(d) AMBA_CONFIG_BITS((d)->periphid) +#define amba_rev(d) AMBA_REV_BITS((d)->periphid) +#define amba_manf(d) AMBA_MANF_BITS((d)->periphid) +#define amba_part(d) AMBA_PART_BITS((d)->periphid) #endif -- cgit v1.2.3-18-g5258 From 56dd2c0691a5a387b7b05835fe547dc6fade9407 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Fri, 1 Oct 2010 13:55:47 -0700 Subject: [SCSI] libsas: Don't issue commands to devices that have been hot-removed sd will get hung up issuing commands to flush write cache if a SAS device behind the expander is unplugged without warning. Change libsas to reject commands to domain devices that have already gone away. [maciej.trela@intel.com: removed setting ->gone in sas_deform_port() to permit sync cache commands at module removal] Signed-off-by: Darrick J. Wong Tested-by: Haipao Fan Signed-off-by: Maciej Trela Signed-off-by: Dan Williams Signed-off-by: James Bottomley --- include/scsi/libsas.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h index d06e13be717..3dec1949f69 100644 --- a/include/scsi/libsas.h +++ b/include/scsi/libsas.h @@ -205,6 +205,7 @@ struct domain_device { }; void *lldd_dev; + int gone; }; struct sas_discovery_event { -- cgit v1.2.3-18-g5258 From 03789f26722a15ccfe6f191e9fb3d356f2f18a1e Mon Sep 17 00:00:00 2001 From: Rémi Denis-Courmont Date: Fri, 8 Oct 2010 04:02:02 +0000 Subject: Phonet: cleanup pipe enable socket option MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The current code works like this: int garbage, status; socklen_t len = sizeof(status); /* enable pipe */ setsockopt(fd, SOL_PNPIPE, PNPIPE_ENABLE, &garbage, sizeof(garbage)); /* disable pipe */ setsockopt(fd, SOL_PNPIPE, PNPIPE_DISABLE, &garbage, sizeof(garbage)); /* get status */ getsockopt(fd, SOL_PNPIPE, PNPIPE_INQ, &status, &len); ...which does not follow the usual socket option pattern. This patch merges all three "options" into a single gettable&settable option, before Linux 2.6.37 gets out: int status; socklen_t len = sizeof(status); /* enable pipe */ status = 1; setsockopt(fd, SOL_PNPIPE, PNPIPE_ENABLE, &status, sizeof(status)); /* disable pipe */ status = 0; setsockopt(fd, SOL_PNPIPE, PNPIPE_ENABLE, &status, sizeof(status)); /* get status */ getsockopt(fd, SOL_PNPIPE, PNPIPE_ENABLE, &status, &len); This also fixes the error code from EFAULT to ENOTCONN. Signed-off-by: Rémi Denis-Courmont Cc: Kumar Sanghvi Signed-off-by: David S. Miller --- include/linux/phonet.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/phonet.h b/include/linux/phonet.h index 96f5625d62f..e27cbf93174 100644 --- a/include/linux/phonet.h +++ b/include/linux/phonet.h @@ -38,9 +38,8 @@ #define PNPIPE_IFINDEX 2 #define PNPIPE_CREATE 3 #define PNPIPE_ENABLE 4 -#define PNPIPE_DISABLE 5 +/* unused slot */ #define PNPIPE_DESTROY 6 -#define PNPIPE_INQ 7 #define PNADDR_ANY 0 #define PNADDR_BROADCAST 0xFC -- cgit v1.2.3-18-g5258 From 3e51d3c924aea8a1f1372e6c615b0a37b528121d Mon Sep 17 00:00:00 2001 From: Kai Makisara Date: Sat, 9 Oct 2010 00:17:56 +0300 Subject: [SCSI] st: add MTWEOFI to write filemarks without flushing drive buffer This patch adds a new MTIOCTOP operation MTWEOFI that writes filemarks with immediate bit set. This means that the drive does not flush its buffer and the next file can be started immediately. This speeds up writing in applications that have to write multiple small files. Signed-off-by: Kai Makisara Signed-off-by: James Bottomley --- include/linux/mtio.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/mtio.h b/include/linux/mtio.h index ef01d6aa593..8f825756c45 100644 --- a/include/linux/mtio.h +++ b/include/linux/mtio.h @@ -63,6 +63,7 @@ struct mtop { #define MTCOMPRESSION 32/* control compression with SCSI mode page 15 */ #define MTSETPART 33 /* Change the active tape partition */ #define MTMKPART 34 /* Format the tape with one or two partitions */ +#define MTWEOFI 35 /* write an end-of-file record (mark) in immediate mode */ /* structure for MTIOCGET - mag tape get status command */ -- cgit v1.2.3-18-g5258 From 8b9d40691e8f5e7e0c8fb839c2bad29c5e0888ce Mon Sep 17 00:00:00 2001 From: Mathieu Lacage Date: Sun, 27 Jun 2010 12:26:06 +0200 Subject: asm-generic: make atomic_add_unless a function atomic_add_unless is a macro so, bad things happen if the caller defines a local variable named c, just like like the local variable c defined by the macro. Thus, convert atomic_add_unless to a function. (bug triggered by net/ipv4/netfilter/ipt_CLUSTERIP.c: clusterip_config_find_get calls atomic_inc_not_zero) Signed-off-by: Mathieu Lacage Signed-off-by: Arnd Bergmann --- include/asm-generic/atomic.h | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h index e53347fbf1d..a6cc019a41e 100644 --- a/include/asm-generic/atomic.h +++ b/include/asm-generic/atomic.h @@ -119,14 +119,23 @@ static inline void atomic_dec(atomic_t *v) #define atomic_dec_and_test(v) (atomic_sub_return(1, (v)) == 0) #define atomic_inc_and_test(v) (atomic_add_return(1, (v)) == 0) -#define atomic_add_unless(v, a, u) \ -({ \ - int c, old; \ - c = atomic_read(v); \ - while (c != (u) && (old = atomic_cmpxchg((v), c, c + (a))) != c) \ - c = old; \ - c != (u); \ -}) +#define atomic_xchg(ptr, v) (xchg(&(ptr)->counter, (v))) +#define atomic_cmpxchg(v, old, new) (cmpxchg(&((v)->counter), (old), (new))) + +#define cmpxchg_local(ptr, o, n) \ + ((__typeof__(*(ptr)))__cmpxchg_local_generic((ptr), (unsigned long)(o),\ + (unsigned long)(n), sizeof(*(ptr)))) + +#define cmpxchg64_local(ptr, o, n) __cmpxchg64_local_generic((ptr), (o), (n)) + +static inline int atomic_add_unless(atomic_t *v, int a, int u) +{ + int c, old; + c = atomic_read(v); + while (c != u && (old = atomic_cmpxchg(v, c, c + a)) != c) + c = old; + return c != u; +} #define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0) @@ -140,15 +149,6 @@ static inline void atomic_clear_mask(unsigned long mask, unsigned long *addr) raw_local_irq_restore(flags); } -#define atomic_xchg(ptr, v) (xchg(&(ptr)->counter, (v))) -#define atomic_cmpxchg(v, old, new) (cmpxchg(&((v)->counter), (old), (new))) - -#define cmpxchg_local(ptr, o, n) \ - ((__typeof__(*(ptr)))__cmpxchg_local_generic((ptr), (unsigned long)(o),\ - (unsigned long)(n), sizeof(*(ptr)))) - -#define cmpxchg64_local(ptr, o, n) __cmpxchg64_local_generic((ptr), (o), (n)) - /* Assume that atomic operations are already serializing */ #define smp_mb__before_atomic_dec() barrier() #define smp_mb__after_atomic_dec() barrier() -- cgit v1.2.3-18-g5258 From c6691126636769bd22bfd7b55829f0373a93c1ce Mon Sep 17 00:00:00 2001 From: Mathieu Lacage Date: Tue, 22 Jun 2010 10:30:15 +0200 Subject: asm-generic: cmpxchg does not handle non-long arguments The version of cmpxchg defined in asm-generic/system.h does not handle correctly non-long arguments. Use the version defined in cmpxchg.h instead. Signed-off-by: Mathieu Lacage Signed-off-by: Arnd Bergmann --- include/asm-generic/system.h | 20 +------------------- 1 file changed, 1 insertion(+), 19 deletions(-) (limited to 'include') diff --git a/include/asm-generic/system.h b/include/asm-generic/system.h index efa403b5e12..4b0b9cbbfae 100644 --- a/include/asm-generic/system.h +++ b/include/asm-generic/system.h @@ -21,6 +21,7 @@ #include #include +#include struct task_struct; @@ -136,25 +137,6 @@ unsigned long __xchg(unsigned long x, volatile void *ptr, int size) #define xchg(ptr, x) \ ((__typeof__(*(ptr))) __xchg((unsigned long)(x), (ptr), sizeof(*(ptr)))) -static inline unsigned long __cmpxchg(volatile unsigned long *m, - unsigned long old, unsigned long new) -{ - unsigned long retval; - unsigned long flags; - - local_irq_save(flags); - retval = *m; - if (retval == old) - *m = new; - local_irq_restore(flags); - return retval; -} - -#define cmpxchg(ptr, o, n) \ - ((__typeof__(*(ptr))) __cmpxchg((unsigned long *)(ptr), \ - (unsigned long)(o), \ - (unsigned long)(n))) - #endif /* !__ASSEMBLY__ */ #endif /* __KERNEL__ */ -- cgit v1.2.3-18-g5258 From 269b8fd5d058f2c0da01a42b20315ffc2640d99b Mon Sep 17 00:00:00 2001 From: Lucian Adrian Grijincu Date: Wed, 6 Oct 2010 15:03:47 -0700 Subject: asm-generic: fcntl: make exported headers use strict posix types All 'pid_t' were changed to '__kernel_pid_t' in a previous commit: make exported headers use strict posix types A number of standard posix types are used in exported headers, which is not allowed if __STRICT_KERNEL_NAMES is defined. In order to get rid of the non-__STRICT_KERNEL_NAMES part and to make sane headers the default, we have to change them all to safe types. but a later change introduced 'pid_t' again: fcntl: add F_[SG]ETOWN_EX This makes asm-generic/fcntl.h d use strict posix types again. Signed-off-by: Lucian Adrian Grijincu Signed-off-by: Andrew Morton Signed-off-by: Arnd Bergmann --- include/asm-generic/fcntl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-generic/fcntl.h b/include/asm-generic/fcntl.h index a70b2d2bfc1..0fc16e3f0bf 100644 --- a/include/asm-generic/fcntl.h +++ b/include/asm-generic/fcntl.h @@ -122,7 +122,7 @@ struct f_owner_ex { int type; - pid_t pid; + __kernel_pid_t pid; }; /* for F_[GET|SET]FL */ -- cgit v1.2.3-18-g5258 From c24cef0b68a719324c344c1563ef3d750ac6bf0e Mon Sep 17 00:00:00 2001 From: Andrea Gelmini Date: Sat, 27 Feb 2010 17:51:35 +0100 Subject: asm-generic: kdebug.h: Checkpatch cleanup include/asm-generic/kdebug.h:6: ERROR: spaces required around that '=' (ctx:VxV) Signed-off-by: Andrea Gelmini Signed-off-by: Arnd Bergmann --- include/asm-generic/kdebug.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-generic/kdebug.h b/include/asm-generic/kdebug.h index 11e57b6a85f..d1814497bcd 100644 --- a/include/asm-generic/kdebug.h +++ b/include/asm-generic/kdebug.h @@ -3,7 +3,7 @@ enum die_val { DIE_UNUSED, - DIE_OOPS=1 + DIE_OOPS = 1, }; #endif /* _ASM_GENERIC_KDEBUG_H */ -- cgit v1.2.3-18-g5258 From 708ff2a0097b02d32d375b66996661f36cd4d6d1 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Wed, 29 Sep 2010 18:08:50 +0900 Subject: bitops: make asm-generic/bitops/find.h more generic asm-generic/bitops/find.h has the extern declarations of find_next_bit() and find_next_zero_bit() and the macro definitions of find_first_bit() and find_first_zero_bit(). It is only usable by the architectures which enables CONFIG_GENERIC_FIND_NEXT_BIT and disables CONFIG_GENERIC_FIND_FIRST_BIT. x86 and tile enable both CONFIG_GENERIC_FIND_NEXT_BIT and CONFIG_GENERIC_FIND_FIRST_BIT. These architectures cannot include asm-generic/bitops/find.h in their asm/bitops.h. So ifdefed extern declarations of find_first_bit and find_first_zero_bit() are put in linux/bitops.h. This makes asm-generic/bitops/find.h usable by these architectures and use it. Also this change is needed for the forthcoming duplicated extern declarations cleanup. Signed-off-by: Akinobu Mita Signed-off-by: Arnd Bergmann Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: x86@kernel.org Cc: Chris Metcalf --- include/asm-generic/bitops/find.h | 25 +++++++++++++++++++++++++ include/linux/bitops.h | 22 ---------------------- 2 files changed, 25 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/include/asm-generic/bitops/find.h b/include/asm-generic/bitops/find.h index 1914e974251..30afec0db7d 100644 --- a/include/asm-generic/bitops/find.h +++ b/include/asm-generic/bitops/find.h @@ -9,7 +9,32 @@ extern unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size, unsigned long offset); #endif +#ifdef CONFIG_GENERIC_FIND_FIRST_BIT + +/** + * find_first_bit - find the first set bit in a memory region + * @addr: The address to start the search at + * @size: The maximum size to search + * + * Returns the bit number of the first set bit. + */ +extern unsigned long find_first_bit(const unsigned long *addr, + unsigned long size); + +/** + * find_first_zero_bit - find the first cleared bit in a memory region + * @addr: The address to start the search at + * @size: The maximum size to search + * + * Returns the bit number of the first cleared bit. + */ +extern unsigned long find_first_zero_bit(const unsigned long *addr, + unsigned long size); +#else /* CONFIG_GENERIC_FIND_FIRST_BIT */ + #define find_first_bit(addr, size) find_next_bit((addr), (size), 0) #define find_first_zero_bit(addr, size) find_next_zero_bit((addr), (size), 0) +#endif /* CONFIG_GENERIC_FIND_FIRST_BIT */ + #endif /*_ASM_GENERIC_BITOPS_FIND_H_ */ diff --git a/include/linux/bitops.h b/include/linux/bitops.h index fc68053378c..adb0f113f57 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -136,28 +136,6 @@ static inline unsigned long __ffs64(u64 word) } #ifdef __KERNEL__ -#ifdef CONFIG_GENERIC_FIND_FIRST_BIT - -/** - * find_first_bit - find the first set bit in a memory region - * @addr: The address to start the search at - * @size: The maximum size to search - * - * Returns the bit number of the first set bit. - */ -extern unsigned long find_first_bit(const unsigned long *addr, - unsigned long size); - -/** - * find_first_zero_bit - find the first cleared bit in a memory region - * @addr: The address to start the search at - * @size: The maximum size to search - * - * Returns the bit number of the first cleared bit. - */ -extern unsigned long find_first_zero_bit(const unsigned long *addr, - unsigned long size); -#endif /* CONFIG_GENERIC_FIND_FIRST_BIT */ #ifdef CONFIG_GENERIC_FIND_LAST_BIT /** -- cgit v1.2.3-18-g5258 From d852a6afd91fc928128f59ebff381838c365e358 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Wed, 29 Sep 2010 18:08:51 +0900 Subject: bitops: remove duplicated extern declarations If CONFIG_GENERIC_FIND_NEXT_BIT is enabled, find_next_bit() and find_next_zero_bit() are doubly declared in asm-generic/bitops/find.h and linux/bitops.h. asm/bitops.h includes asm-generic/bitops/find.h if and only if the architecture enables CONFIG_GENERIC_FIND_NEXT_BIT. And asm/bitops.h is included by linux/bitops.h So we can just remove the extern declarations of find_next_bit() and find_next_zero_bit() in linux/bitops.h. Also we can remove unneeded #ifndef CONFIG_GENERIC_FIND_NEXT_BIT in asm-generic/bitops/find.h. Signed-off-by: Akinobu Mita Signed-off-by: Arnd Bergmann --- include/asm-generic/bitops/find.h | 14 ++++++++++++-- include/linux/bitops.h | 23 ----------------------- 2 files changed, 12 insertions(+), 25 deletions(-) (limited to 'include') diff --git a/include/asm-generic/bitops/find.h b/include/asm-generic/bitops/find.h index 30afec0db7d..110fa700f85 100644 --- a/include/asm-generic/bitops/find.h +++ b/include/asm-generic/bitops/find.h @@ -1,13 +1,23 @@ #ifndef _ASM_GENERIC_BITOPS_FIND_H_ #define _ASM_GENERIC_BITOPS_FIND_H_ -#ifndef CONFIG_GENERIC_FIND_NEXT_BIT +/** + * find_next_bit - find the next set bit in a memory region + * @addr: The address to base the search on + * @offset: The bitnumber to start searching at + * @size: The bitmap size in bits + */ extern unsigned long find_next_bit(const unsigned long *addr, unsigned long size, unsigned long offset); +/** + * find_next_zero_bit - find the next cleared bit in a memory region + * @addr: The address to base the search on + * @offset: The bitnumber to start searching at + * @size: The bitmap size in bits + */ extern unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size, unsigned long offset); -#endif #ifdef CONFIG_GENERIC_FIND_FIRST_BIT diff --git a/include/linux/bitops.h b/include/linux/bitops.h index adb0f113f57..827cc95711e 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -149,28 +149,5 @@ extern unsigned long find_last_bit(const unsigned long *addr, unsigned long size); #endif /* CONFIG_GENERIC_FIND_LAST_BIT */ -#ifdef CONFIG_GENERIC_FIND_NEXT_BIT - -/** - * find_next_bit - find the next set bit in a memory region - * @addr: The address to base the search on - * @offset: The bitnumber to start searching at - * @size: The bitmap size in bits - */ -extern unsigned long find_next_bit(const unsigned long *addr, - unsigned long size, unsigned long offset); - -/** - * find_next_zero_bit - find the next cleared bit in a memory region - * @addr: The address to base the search on - * @offset: The bitnumber to start searching at - * @size: The bitmap size in bits - */ - -extern unsigned long find_next_zero_bit(const unsigned long *addr, - unsigned long size, - unsigned long offset); - -#endif /* CONFIG_GENERIC_FIND_NEXT_BIT */ #endif /* __KERNEL__ */ #endif -- cgit v1.2.3-18-g5258 From 3cfc535c5df8122af1258ae05aaf2770c033425d Mon Sep 17 00:00:00 2001 From: Andres Salomon Date: Sun, 10 Oct 2010 21:42:33 -0600 Subject: of/promtree: make drivers/of/pdt.c no longer sparc-only Clean up pdt.c: - make build dependent upon config OF_PROMTREE - #ifdef out the sparc-specific stuff - create pdt-specific header Signed-off-by: Andres Salomon Acked-by: David S. Miller Signed-off-by: Grant Likely --- include/linux/of_pdt.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 include/linux/of_pdt.h (limited to 'include') diff --git a/include/linux/of_pdt.h b/include/linux/of_pdt.h new file mode 100644 index 00000000000..c0a8774e45d --- /dev/null +++ b/include/linux/of_pdt.h @@ -0,0 +1,24 @@ +/* + * Definitions for building a device tree by calling into the + * Open Firmware PROM. + * + * Copyright (C) 2010 Andres Salomon + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _LINUX_OF_PDT_H +#define _LINUX_OF_PDT_H + +extern void *prom_early_alloc(unsigned long size); + +/* for building the device tree */ +extern void of_pdt_build_devicetree(phandle root_node); + +extern void (*prom_build_more)(struct device_node *dp, + struct device_node ***nextp); + +#endif /* _LINUX_OF_PDT_H */ -- cgit v1.2.3-18-g5258 From 3bf101ba42a1c89b5afbc7492e7647dae5e18735 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Mon, 27 Sep 2010 20:22:24 +0100 Subject: perf: Add helper function to return number of counters The number of counters for the registered pmu is needed in a few places so provide a helper function that returns this number. Signed-off-by: Matt Fleming Tested-by: Will Deacon Acked-by: Paul Mundt Acked-by: Peter Zijlstra Signed-off-by: Robert Richter --- include/linux/perf_event.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 716f99b682c..1a021924718 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -849,6 +849,7 @@ extern int perf_max_events; extern const struct pmu *hw_perf_event_init(struct perf_event *event); +extern int perf_num_counters(void); extern void perf_event_task_sched_in(struct task_struct *task); extern void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next); extern void perf_event_task_tick(struct task_struct *task); -- cgit v1.2.3-18-g5258 From 6370a6ad3b53df90b4700977f7718118a2cd524a Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 11 Oct 2010 15:12:27 +0200 Subject: workqueue: add and use WQ_MEM_RECLAIM flag Add WQ_MEM_RECLAIM flag which currently maps to WQ_RESCUER, mark WQ_RESCUER as internal and replace all external WQ_RESCUER usages to WQ_MEM_RECLAIM. This makes the API users express the intent of the workqueue instead of indicating the internal mechanism used to guarantee forward progress. This is also to make it cleaner to add more semantics to WQ_MEM_RECLAIM. For example, if deemed necessary, memory reclaim workqueues can be made highpri. This patch doesn't introduce any functional change. Signed-off-by: Tejun Heo Cc: Jeff Garzik Cc: Dave Chinner Cc: Steven Whitehouse --- include/linux/workqueue.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index e33ff4a9170..03bbe903e5c 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -243,11 +243,12 @@ enum { WQ_NON_REENTRANT = 1 << 0, /* guarantee non-reentrance */ WQ_UNBOUND = 1 << 1, /* not bound to any cpu */ WQ_FREEZEABLE = 1 << 2, /* freeze during suspend */ - WQ_RESCUER = 1 << 3, /* has an rescue worker */ + WQ_MEM_RECLAIM = 1 << 3, /* may be used for memory reclaim */ WQ_HIGHPRI = 1 << 4, /* high priority */ WQ_CPU_INTENSIVE = 1 << 5, /* cpu instensive workqueue */ WQ_DYING = 1 << 6, /* internal: workqueue is dying */ + WQ_RESCUER = 1 << 7, /* internal: workqueue has rescuer */ WQ_MAX_ACTIVE = 512, /* I like 512, better ideas? */ WQ_MAX_UNBOUND_PER_CPU = 4, /* 4 * #cpus for unbound wq */ @@ -309,7 +310,7 @@ __alloc_workqueue_key(const char *name, unsigned int flags, int max_active, /** * alloc_ordered_workqueue - allocate an ordered workqueue * @name: name of the workqueue - * @flags: WQ_* flags (only WQ_FREEZEABLE and WQ_RESCUER are meaningful) + * @flags: WQ_* flags (only WQ_FREEZEABLE and WQ_MEM_RECLAIM are meaningful) * * Allocate an ordered workqueue. An ordered workqueue executes at * most one work item at any given time in the queued order. They are @@ -325,11 +326,11 @@ alloc_ordered_workqueue(const char *name, unsigned int flags) } #define create_workqueue(name) \ - alloc_workqueue((name), WQ_RESCUER, 1) + alloc_workqueue((name), WQ_MEM_RECLAIM, 1) #define create_freezeable_workqueue(name) \ - alloc_workqueue((name), WQ_FREEZEABLE | WQ_UNBOUND | WQ_RESCUER, 1) + alloc_workqueue((name), WQ_FREEZEABLE | WQ_UNBOUND | WQ_MEM_RECLAIM, 1) #define create_singlethread_workqueue(name) \ - alloc_workqueue((name), WQ_UNBOUND | WQ_RESCUER, 1) + alloc_workqueue((name), WQ_UNBOUND | WQ_MEM_RECLAIM, 1) extern void destroy_workqueue(struct workqueue_struct *wq); -- cgit v1.2.3-18-g5258 From 84c7991059c9c4530cc911137c5bf508a41ed129 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Sun, 3 Oct 2010 21:41:13 +0100 Subject: perf: New helper function for pmu name Introduce perf_pmu_name() helper function that returns the name of the pmu. This gives us a generic way to get the name of a pmu regardless of how an architecture identifies it internally. Signed-off-by: Matt Fleming Acked-by: Peter Zijlstra Acked-by: Paul Mundt Signed-off-by: Robert Richter --- include/linux/perf_event.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 1a021924718..33f08dafda2 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -850,6 +850,7 @@ extern int perf_max_events; extern const struct pmu *hw_perf_event_init(struct perf_event *event); extern int perf_num_counters(void); +extern const char *perf_pmu_name(void); extern void perf_event_task_sched_in(struct task_struct *task); extern void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next); extern void perf_event_task_tick(struct task_struct *task); -- cgit v1.2.3-18-g5258 From 56946331b28d53232115a155ba662ab3dc598952 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Fri, 8 Oct 2010 21:42:17 +0100 Subject: oprofile: Make op_name_from_perf_id() global Make op_name_from_perf_id() global so that we have a way for each architecture to construct an oprofile name for op->cpu_type. We need to remove the argument from the function prototype so that we can hide all implementation details inside the function. Signed-off-by: Matt Fleming Signed-off-by: Robert Richter --- include/linux/oprofile.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/oprofile.h b/include/linux/oprofile.h index 5171639ecf0..1574d4aca72 100644 --- a/include/linux/oprofile.h +++ b/include/linux/oprofile.h @@ -185,4 +185,8 @@ int oprofile_add_data(struct op_entry *entry, unsigned long val); int oprofile_add_data64(struct op_entry *entry, u64 val); int oprofile_write_commit(struct op_entry *entry); +#ifdef CONFIG_PERF_EVENTS +char *op_name_from_perf_id(void); +#endif /* CONFIG_PERF_EVENTS */ + #endif /* OPROFILE_H */ -- cgit v1.2.3-18-g5258 From 3d90a00763b51e1db344a7430c966be723b67a29 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Mon, 27 Sep 2010 20:45:08 +0100 Subject: oprofile: Abstract the perf-events backend Move the perf-events backend from arch/arm/oprofile into drivers/oprofile so that the code can be shared between architectures. This allows each architecture to maintain only a single copy of the PMU accessor functions instead of one for both perf and OProfile. It also becomes possible for other architectures to delete much of their OProfile code in favour of the common code now available in drivers/oprofile/oprofile_perf.c. Signed-off-by: Matt Fleming Tested-by: Will Deacon Signed-off-by: Robert Richter --- include/linux/oprofile.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/oprofile.h b/include/linux/oprofile.h index 1574d4aca72..d67a8330b41 100644 --- a/include/linux/oprofile.h +++ b/include/linux/oprofile.h @@ -15,6 +15,7 @@ #include #include +#include #include /* Each escaped entry is prefixed by ESCAPE_CODE @@ -186,6 +187,8 @@ int oprofile_add_data64(struct op_entry *entry, u64 val); int oprofile_write_commit(struct op_entry *entry); #ifdef CONFIG_PERF_EVENTS +int __init oprofile_perf_init(struct oprofile_operations *ops); +void __exit oprofile_perf_exit(void); char *op_name_from_perf_id(void); #endif /* CONFIG_PERF_EVENTS */ -- cgit v1.2.3-18-g5258 From 34d101dd6204bd100fc2e6f7b5f9a10f959ce2c9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 11 Oct 2010 09:16:57 -0700 Subject: neigh: speedup neigh_hh_init() When a new dst is used to send a frame, neigh_resolve_output() tries to associate an struct hh_cache to this dst, calling neigh_hh_init() with the neigh rwlock write locked. Most of the time, hh_cache is already known and linked into neighbour, so we find it and increment its refcount. This patch changes the logic so that we call neigh_hh_init() with neighbour lock read locked only, so that fast path can be run in parallel by concurrent cpus. This brings part of the speedup we got with commit c7d4426a98a5f (introduce DST_NOCACHE flag) for non cached dsts, even for cached ones, removing one of the contention point that routers hit on multiqueue enabled machines. Further improvements would need to use a seqlock instead of an rwlock to protect neigh->ha[], to not dirty neigh too often and remove two atomic ops. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 6abcef67b17..4160db3721b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -281,6 +281,12 @@ struct hh_cache { unsigned long hh_data[HH_DATA_ALIGN(LL_MAX_HEADER) / sizeof(long)]; }; +static inline void hh_cache_put(struct hh_cache *hh) +{ + if (atomic_dec_and_test(&hh->hh_refcnt)) + kfree(hh); +} + /* Reserve HH_DATA_MOD byte aligned hard_header_len, but at least that much. * Alternative is: * dev->hard_header_len ? (dev->hard_header_len + -- cgit v1.2.3-18-g5258 From 5a5c731aa59cc2c44ca20f45b1a577cd4f5435e2 Mon Sep 17 00:00:00 2001 From: Ben Greear Date: Thu, 7 Oct 2010 16:39:20 -0700 Subject: wireless: Set some stats used by /proc/net/wireless (wext) Some stats for /proc/net/wireless (and wext in general) are not being set. This patch addresses a few of those with values easily obtained from mac80211 core. Signed-off-by: Ben Greear Signed-off-by: John W. Linville --- include/net/cfg80211.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 0778d04b3bb..f920a06f363 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -403,6 +403,7 @@ struct station_parameters { * @STATION_INFO_TX_PACKETS: @tx_packets filled * @STATION_INFO_TX_RETRIES: @tx_retries filled * @STATION_INFO_TX_FAILED: @tx_failed filled + * @STATION_INFO_RX_DROP_MISC: @rx_dropped_misc filled */ enum station_info_flags { STATION_INFO_INACTIVE_TIME = 1<<0, @@ -417,6 +418,7 @@ enum station_info_flags { STATION_INFO_TX_PACKETS = 1<<9, STATION_INFO_TX_RETRIES = 1<<10, STATION_INFO_TX_FAILED = 1<<11, + STATION_INFO_RX_DROP_MISC = 1<<12, }; /** @@ -468,6 +470,7 @@ struct rate_info { * @tx_packets: packets transmitted to this station * @tx_retries: cumulative retry counts * @tx_failed: number of failed transmissions (retries exceeded, no ACK) + * @rx_dropped_misc: Dropped for un-specified reason. * @generation: generation number for nl80211 dumps. * This number should increase every time the list of stations * changes, i.e. when a station is added or removed, so that @@ -487,6 +490,7 @@ struct station_info { u32 tx_packets; u32 tx_retries; u32 tx_failed; + u32 rx_dropped_misc; int generation; }; -- cgit v1.2.3-18-g5258 From 8610c29a2c9f273886b1c31ae4d92c69d4326262 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sat, 9 Oct 2010 02:39:29 +0200 Subject: cfg80211: add channel utilization stats to the survey command Using these, user space can calculate a relative channel utilization with arbitrary intervals by regularly taking snapshots of the survey results. Signed-off-by: Felix Fietkau Signed-off-by: John W. Linville --- include/linux/nl80211.h | 15 +++++++++++++++ include/net/cfg80211.h | 20 ++++++++++++++++++++ 2 files changed, 35 insertions(+) (limited to 'include') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index c08709fe36f..0edb2566c14 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -1413,6 +1413,16 @@ enum nl80211_reg_rule_flags { * @NL80211_SURVEY_INFO_FREQUENCY: center frequency of channel * @NL80211_SURVEY_INFO_NOISE: noise level of channel (u8, dBm) * @NL80211_SURVEY_INFO_IN_USE: channel is currently being used + * @NL80211_SURVEY_INFO_CHANNEL_TIME: amount of time (in ms) that the radio + * spent on this channel + * @NL80211_SURVEY_INFO_CHANNEL_TIME_BUSY: amount of the time the primary + * channel was sensed busy (either due to activity or energy detect) + * @NL80211_SURVEY_INFO_CHANNEL_TIME_EXT_BUSY: amount of time the extension + * channel was sensed busy + * @NL80211_SURVEY_INFO_CHANNEL_TIME_RX: amount of time the radio spent + * receiving data + * @NL80211_SURVEY_INFO_CHANNEL_TIME_TX: amount of time the radio spent + * transmitting data * @NL80211_SURVEY_INFO_MAX: highest survey info attribute number * currently defined * @__NL80211_SURVEY_INFO_AFTER_LAST: internal use @@ -1422,6 +1432,11 @@ enum nl80211_survey_info { NL80211_SURVEY_INFO_FREQUENCY, NL80211_SURVEY_INFO_NOISE, NL80211_SURVEY_INFO_IN_USE, + NL80211_SURVEY_INFO_CHANNEL_TIME, + NL80211_SURVEY_INFO_CHANNEL_TIME_BUSY, + NL80211_SURVEY_INFO_CHANNEL_TIME_EXT_BUSY, + NL80211_SURVEY_INFO_CHANNEL_TIME_RX, + NL80211_SURVEY_INFO_CHANNEL_TIME_TX, /* keep last */ __NL80211_SURVEY_INFO_AFTER_LAST, diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index f920a06f363..24d5b586927 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -294,6 +294,11 @@ struct key_params { * * @SURVEY_INFO_NOISE_DBM: noise (in dBm) was filled in * @SURVEY_INFO_IN_USE: channel is currently being used + * @SURVEY_INFO_CHANNEL_TIME: channel active time (in ms) was filled in + * @SURVEY_INFO_CHANNEL_TIME_BUSY: channel busy time was filled in + * @SURVEY_INFO_CHANNEL_TIME_EXT_BUSY: extension channel busy time was filled in + * @SURVEY_INFO_CHANNEL_TIME_RX: channel receive time was filled in + * @SURVEY_INFO_CHANNEL_TIME_TX: channel transmit time was filled in * * Used by the driver to indicate which info in &struct survey_info * it has filled in during the get_survey(). @@ -301,6 +306,11 @@ struct key_params { enum survey_info_flags { SURVEY_INFO_NOISE_DBM = 1<<0, SURVEY_INFO_IN_USE = 1<<1, + SURVEY_INFO_CHANNEL_TIME = 1<<2, + SURVEY_INFO_CHANNEL_TIME_BUSY = 1<<3, + SURVEY_INFO_CHANNEL_TIME_EXT_BUSY = 1<<4, + SURVEY_INFO_CHANNEL_TIME_RX = 1<<5, + SURVEY_INFO_CHANNEL_TIME_TX = 1<<6, }; /** @@ -310,6 +320,11 @@ enum survey_info_flags { * @filled: bitflag of flags from &enum survey_info_flags * @noise: channel noise in dBm. This and all following fields are * optional + * @channel_time: amount of time in ms the radio spent on the channel + * @channel_time_busy: amount of time the primary channel was sensed busy + * @channel_time_ext_busy: amount of time the extension channel was sensed busy + * @channel_time_rx: amount of time the radio spent receiving data + * @channel_time_tx: amount of time the radio spent transmitting data * * Used by dump_survey() to report back per-channel survey information. * @@ -318,6 +333,11 @@ enum survey_info_flags { */ struct survey_info { struct ieee80211_channel *channel; + u64 channel_time; + u64 channel_time_busy; + u64 channel_time_ext_busy; + u64 channel_time_rx; + u64 channel_time_tx; u32 filled; s8 noise; }; -- cgit v1.2.3-18-g5258 From cfdfa4d3a0c7aa1287c61326a7714f262466157a Mon Sep 17 00:00:00 2001 From: Steve deRosier Date: Sat, 9 Oct 2010 17:23:28 -0700 Subject: mac80211: Update mesh constants to approved IEEE ANA values This patch updates IEEE802.11 mesh constants to be consistent with newly approved values. It modifies some values, as well as adds many new constants in preparation for updating mesh code to the current 802.11s drafts. ANA numbers were taken from: https://mentor.ieee.org/802.11/dcn/09/11-09-0031-12-0000-ana-database-assigned-number-authority.xls A few notes are in order: 1. This will break backwards compatibility with existing Linux kernels as over-the-air constants have changed. 2. Some old and obsolete constants have been retained for now as the mesh code itself hasn't been updated yet to the new 802.11s draft. This was desired to keep the existing mesh scheme working until it can be updated. Adding the approved values is the first step in updating the mesh code. 3. Obsolete constants have been clearly marked. 4. All ANA approved 802.11s constants have been added. Signed-off-by: Steve deRosier Signed-off-by: John W. Linville --- include/linux/ieee80211.h | 71 +++++++++++++++++++++++++++++++++++++---------- 1 file changed, 56 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 97b2eae6a22..ed5a03cbe18 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -986,6 +986,7 @@ struct ieee80211_ht_info { #define WLAN_AUTH_OPEN 0 #define WLAN_AUTH_SHARED_KEY 1 #define WLAN_AUTH_FT 2 +#define WLAN_AUTH_SAE 3 #define WLAN_AUTH_LEAP 128 #define WLAN_AUTH_CHALLENGE_LEN 128 @@ -1072,6 +1073,10 @@ enum ieee80211_statuscode { WLAN_STATUS_NO_DIRECT_LINK = 48, WLAN_STATUS_STA_NOT_PRESENT = 49, WLAN_STATUS_STA_NOT_QSTA = 50, + /* 802.11s */ + WLAN_STATUS_ANTI_CLOG_REQUIRED = 76, + WLAN_STATUS_FCG_NOT_SUPP = 78, + WLAN_STATUS_STA_NO_TBTT = 78, }; @@ -1112,6 +1117,22 @@ enum ieee80211_reasoncode { WLAN_REASON_QSTA_REQUIRE_SETUP = 38, WLAN_REASON_QSTA_TIMEOUT = 39, WLAN_REASON_QSTA_CIPHER_NOT_SUPP = 45, + /* 802.11s */ + WLAN_REASON_MESH_PEER_CANCELED = 52, + WLAN_REASON_MESH_MAX_PEERS = 53, + WLAN_REASON_MESH_CONFIG = 54, + WLAN_REASON_MESH_CLOSE = 55, + WLAN_REASON_MESH_MAX_RETRIES = 56, + WLAN_REASON_MESH_CONFIRM_TIMEOUT = 57, + WLAN_REASON_MESH_INVALID_GTK = 58, + WLAN_REASON_MESH_INCONSISTENT_PARAM = 59, + WLAN_REASON_MESH_INVALID_SECURITY = 60, + WLAN_REASON_MESH_PATH_ERROR = 61, + WLAN_REASON_MESH_PATH_NOFORWARD = 62, + WLAN_REASON_MESH_PATH_DEST_UNREACHABLE = 63, + WLAN_REASON_MAC_EXISTS_IN_MBSS = 64, + WLAN_REASON_MESH_CHAN_REGULATORY = 65, + WLAN_REASON_MESH_CHAN = 66, }; @@ -1139,20 +1160,33 @@ enum ieee80211_eid { WLAN_EID_TS_DELAY = 43, WLAN_EID_TCLAS_PROCESSING = 44, WLAN_EID_QOS_CAPA = 46, - /* 802.11s - * - * All mesh EID numbers are pending IEEE 802.11 ANA approval. - * The numbers have been incremented from those suggested in - * 802.11s/D2.0 so that MESH_CONFIG does not conflict with - * EXT_SUPP_RATES. + /* 802.11s */ + WLAN_EID_MESH_CONFIG = 113, + WLAN_EID_MESH_ID = 114, + WLAN_EID_LINK_METRIC_REPORT = 115, + WLAN_EID_CONGESTION_NOTIFICATION = 116, + /* Note that the Peer Link IE has been replaced with the similar + * Peer Management IE. We will keep the former definition until mesh + * code is changed to comply with latest 802.11s drafts. */ - WLAN_EID_MESH_CONFIG = 51, - WLAN_EID_MESH_ID = 52, - WLAN_EID_PEER_LINK = 55, - WLAN_EID_PREQ = 68, - WLAN_EID_PREP = 69, - WLAN_EID_PERR = 70, - WLAN_EID_RANN = 49, /* compatible with FreeBSD */ + WLAN_EID_PEER_LINK = 55, /* no longer in 802.11s drafts */ + WLAN_EID_PEER_MGMT = 117, + WLAN_EID_CHAN_SWITCH_PARAM = 118, + WLAN_EID_MESH_AWAKE_WINDOW = 119, + WLAN_EID_BEACON_TIMING = 120, + WLAN_EID_MCCAOP_SETUP_REQ = 121, + WLAN_EID_MCCAOP_SETUP_RESP = 122, + WLAN_EID_MCCAOP_ADVERT = 123, + WLAN_EID_MCCAOP_TEARDOWN = 124, + WLAN_EID_GANN = 125, + WLAN_EID_RANN = 126, + WLAN_EID_PREQ = 130, + WLAN_EID_PREP = 131, + WLAN_EID_PERR = 132, + WLAN_EID_PXU = 137, + WLAN_EID_PXUC = 138, + WLAN_EID_AUTH_MESH_PEER_EXCH = 139, + WLAN_EID_MIC = 140, WLAN_EID_PWR_CONSTRAINT = 32, WLAN_EID_PWR_CAPABILITY = 33, @@ -1211,9 +1245,14 @@ enum ieee80211_category { WLAN_CATEGORY_HT = 7, WLAN_CATEGORY_SA_QUERY = 8, WLAN_CATEGORY_PROTECTED_DUAL_OF_ACTION = 9, + WLAN_CATEGORY_MESH_ACTION = 13, + WLAN_CATEGORY_MULTIHOP_ACTION = 14, + WLAN_CATEGORY_SELF_PROTECTED = 15, WLAN_CATEGORY_WMM = 17, - WLAN_CATEGORY_MESH_PLINK = 30, /* Pending ANA approval */ - WLAN_CATEGORY_MESH_PATH_SEL = 32, /* Pending ANA approval */ + /* TODO: remove MESH_PLINK and MESH_PATH_SEL after */ + /* mesh is updated to current 802.11s draft */ + WLAN_CATEGORY_MESH_PLINK = 30, + WLAN_CATEGORY_MESH_PATH_SEL = 32, WLAN_CATEGORY_VENDOR_SPECIFIC_PROTECTED = 126, WLAN_CATEGORY_VENDOR_SPECIFIC = 127, }; @@ -1351,6 +1390,8 @@ enum ieee80211_sa_query_action { /* AKM suite selectors */ #define WLAN_AKM_SUITE_8021X 0x000FAC01 #define WLAN_AKM_SUITE_PSK 0x000FAC02 +#define WLAN_AKM_SUITE_SAE 0x000FAC08 +#define WLAN_AKM_SUITE_FT_OVER_SAE 0x000FAC09 #define WLAN_MAX_KEY_LEN 32 -- cgit v1.2.3-18-g5258 From 0ed8ddf4045fcfcac36bad753dc4046118c603ec Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 7 Oct 2010 10:44:07 +0000 Subject: neigh: Protect neigh->ha[] with a seqlock Add a seqlock in struct neighbour to protect neigh->ha[], and avoid dirtying neighbour in stress situation (many different flows / dsts) Dirtying takes place because of read_lock(&n->lock) and n->used writes. Switching to a seqlock, and writing n->used only on jiffies changes permits less dirtying. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/neighbour.h | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/neighbour.h b/include/net/neighbour.h index a4538d55370..f04e7a2522c 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -105,6 +105,7 @@ struct neighbour { atomic_t refcnt; atomic_t probes; rwlock_t lock; + seqlock_t ha_lock; unsigned char ha[ALIGN(MAX_ADDR_LEN, sizeof(unsigned long))]; struct hh_cache *hh; int (*output)(struct sk_buff *skb); @@ -302,7 +303,10 @@ static inline void neigh_confirm(struct neighbour *neigh) static inline int neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) { - neigh->used = jiffies; + unsigned long now = ACCESS_ONCE(jiffies); + + if (neigh->used != now) + neigh->used = now; if (!(neigh->nud_state&(NUD_CONNECTED|NUD_DELAY|NUD_PROBE))) return __neigh_event_send(neigh, skb); return 0; @@ -373,4 +377,14 @@ struct neighbour_cb { #define NEIGH_CB(skb) ((struct neighbour_cb *)(skb)->cb) +static inline void neigh_ha_snapshot(char *dst, const struct neighbour *n, + const struct net_device *dev) +{ + unsigned int seq; + + do { + seq = read_seqbegin(&n->ha_lock); + memcpy(dst, n->ha, dev->addr_len); + } while (read_seqretry(&n->ha_lock, seq)); +} #endif -- cgit v1.2.3-18-g5258 From fc66f95c68b6d4535a0ea2ea15d5cf626e310956 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 8 Oct 2010 06:37:34 +0000 Subject: net dst: use a percpu_counter to track entries struct dst_ops tracks number of allocated dst in an atomic_t field, subject to high cache line contention in stress workload. Switch to a percpu_counter, to reduce number of time we need to dirty a central location. Place it on a separate cache line to avoid dirtying read only fields. Stress test : (Sending 160.000.000 UDP frames, IP route cache disabled, dual E5540 @2.53GHz, 32bit kernel, FIB_TRIE, SLUB/NUMA) Before: real 0m51.179s user 0m15.329s sys 10m15.942s After: real 0m45.570s user 0m15.525s sys 9m56.669s With a small reordering of struct neighbour fields, subject of a following patch, (to separate refcnt from other read mostly fields) real 0m41.841s user 0m15.261s sys 8m45.949s Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/dst_ops.h | 37 ++++++++++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/dst_ops.h b/include/net/dst_ops.h index d1ff9b7e99b..1fa5306e3e2 100644 --- a/include/net/dst_ops.h +++ b/include/net/dst_ops.h @@ -1,6 +1,7 @@ #ifndef _NET_DST_OPS_H #define _NET_DST_OPS_H #include +#include struct dst_entry; struct kmem_cachep; @@ -22,7 +23,41 @@ struct dst_ops { void (*update_pmtu)(struct dst_entry *dst, u32 mtu); int (*local_out)(struct sk_buff *skb); - atomic_t entries; struct kmem_cache *kmem_cachep; + + struct percpu_counter pcpuc_entries ____cacheline_aligned_in_smp; }; + +static inline int dst_entries_get_fast(struct dst_ops *dst) +{ + return percpu_counter_read_positive(&dst->pcpuc_entries); +} + +static inline int dst_entries_get_slow(struct dst_ops *dst) +{ + int res; + + local_bh_disable(); + res = percpu_counter_sum_positive(&dst->pcpuc_entries); + local_bh_enable(); + return res; +} + +static inline void dst_entries_add(struct dst_ops *dst, int val) +{ + local_bh_disable(); + percpu_counter_add(&dst->pcpuc_entries, val); + local_bh_enable(); +} + +static inline int dst_entries_init(struct dst_ops *dst) +{ + return percpu_counter_init(&dst->pcpuc_entries, 0); +} + +static inline void dst_entries_destroy(struct dst_ops *dst) +{ + percpu_counter_destroy(&dst->pcpuc_entries); +} + #endif -- cgit v1.2.3-18-g5258 From 29e29f27486ed7074df259b3eda8656bb014e9b5 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Fri, 1 Oct 2010 09:15:41 +0100 Subject: ARM: 6421/1: amba-pl011: add missing ST specific registers The ST Micro derivates have several extra interesting registers that we may soon use for something interesting so may just as well define them in the header. Signed-off-by: Jonas Aaberg Signed-off-by: Linus Walleij Signed-off-by: Russell King --- include/linux/amba/serial.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/amba/serial.h b/include/linux/amba/serial.h index e1b634b635f..6021588ba0a 100644 --- a/include/linux/amba/serial.h +++ b/include/linux/amba/serial.h @@ -32,7 +32,9 @@ #define UART01x_RSR 0x04 /* Receive status register (Read). */ #define UART01x_ECR 0x04 /* Error clear register (Write). */ #define UART010_LCRH 0x08 /* Line control register, high byte. */ +#define ST_UART011_DMAWM 0x08 /* DMA watermark configure register. */ #define UART010_LCRM 0x0C /* Line control register, middle byte. */ +#define ST_UART011_TIMEOUT 0x0C /* Timeout period register. */ #define UART010_LCRL 0x10 /* Line control register, low byte. */ #define UART010_CR 0x14 /* Control register. */ #define UART01x_FR 0x18 /* Flag register (Read only). */ @@ -51,6 +53,15 @@ #define UART011_MIS 0x40 /* Masked interrupt status. */ #define UART011_ICR 0x44 /* Interrupt clear register. */ #define UART011_DMACR 0x48 /* DMA control register. */ +#define ST_UART011_XFCR 0x50 /* XON/XOFF control register. */ +#define ST_UART011_XON1 0x54 /* XON1 register. */ +#define ST_UART011_XON2 0x58 /* XON2 register. */ +#define ST_UART011_XOFF1 0x5C /* XON1 register. */ +#define ST_UART011_XOFF2 0x60 /* XON2 register. */ +#define ST_UART011_ITCR 0x80 /* Integration test control register. */ +#define ST_UART011_ITIP 0x84 /* Integration test input register. */ +#define ST_UART011_ABCR 0x100 /* Autobaud control register. */ +#define ST_UART011_ABIMSC 0x15C /* Autobaud interrupt mask/clear register. */ #define UART011_DR_OE (1 << 11) #define UART011_DR_BE (1 << 10) -- cgit v1.2.3-18-g5258 From e37ef961e50d74f55e9edb48e54dd2e7963aad39 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 11 Oct 2010 12:20:54 +0000 Subject: neigh: reorder struct neighbour fields MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Le mardi 12 octobre 2010 à 00:02 +0200, Eric Dumazet a écrit : > Here is the followup patch. > > Thanks ! > Oops, this was an old version, the up2date ones also took care of "used" field. I guess its time for a sleep, sorry again. [PATCH net-next V2] neigh: reorder struct neighbour fields (refcnt) and (ha_lock, ha, used, dev, output, ops, primary_key) should be placed on a separate cache lines. refcnt can be often written, while other fields are mostly read. This gave me good result on stress test : before: real 0m45.570s user 0m15.525s sys 9m56.669s After: real 0m41.841s user 0m15.261s sys 8m45.949s Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/neighbour.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/neighbour.h b/include/net/neighbour.h index f04e7a2522c..55590ab16b3 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -94,8 +94,6 @@ struct neighbour { struct neighbour __rcu *next; struct neigh_table *tbl; struct neigh_parms *parms; - struct net_device *dev; - unsigned long used; unsigned long confirmed; unsigned long updated; __u8 flags; @@ -103,16 +101,18 @@ struct neighbour { __u8 type; __u8 dead; atomic_t refcnt; + struct sk_buff_head arp_queue; + struct timer_list timer; + unsigned long used; atomic_t probes; rwlock_t lock; seqlock_t ha_lock; unsigned char ha[ALIGN(MAX_ADDR_LEN, sizeof(unsigned long))]; struct hh_cache *hh; int (*output)(struct sk_buff *skb); - struct sk_buff_head arp_queue; - struct timer_list timer; const struct neigh_ops *ops; struct rcu_head rcu; + struct net_device *dev; u8 primary_key[0]; }; -- cgit v1.2.3-18-g5258 From 7c5347733dcc4ba0bac0baf86d99fae0561f33b7 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Mon, 11 Oct 2010 18:13:31 -0400 Subject: fanotify: disable fanotify syscalls This patch disables the fanotify syscalls by just not building them and letting the cond_syscall() statements in kernel/sys_ni.c redirect them to sys_ni_syscall(). It was pointed out by Tvrtko Ursulin that the fanotify interface did not include an explicit prioritization between groups. This is necessary for fanotify to be usable for hierarchical storage management software, as they must get first access to the file, before inotify-like notifiers see the file. This feature can be added in an ABI compatible way in the next release (by using a number of bits in the flags field to carry the info) but it was suggested by Alan that maybe we should just hold off and do it in the next cycle, likely with an (new) explicit argument to the syscall. I don't like this approach best as I know people are already starting to use the current interface, but Alan is all wise and noone on list backed me up with just using what we have. I feel this is needlessly ripping the rug out from under people at the last minute, but if others think it needs to be a new argument it might be the best way forward. Three choices: Go with what we got (and implement the new feature next cycle). Add a new field right now (and implement the new feature next cycle). Wait till next cycle to release the ABI (and implement the new feature next cycle). This is number 3. Signed-off-by: Eric Paris Signed-off-by: Linus Torvalds --- include/linux/Kbuild | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/Kbuild b/include/linux/Kbuild index 626b629429f..4e8ea8c8ec1 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -118,7 +118,6 @@ header-y += eventpoll.h header-y += ext2_fs.h header-y += fadvise.h header-y += falloc.h -header-y += fanotify.h header-y += fb.h header-y += fcntl.h header-y += fd.h -- cgit v1.2.3-18-g5258 From e144710b302525de5b90b9c3ba43562458d8957f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 1 Oct 2010 16:03:45 +0200 Subject: genirq: Distangle irq.h Move irq_desc and internal functions out of irq.h Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar --- include/linux/irq.h | 292 +++--------------------------------------------- include/linux/irqdesc.h | 171 ++++++++++++++++++++++++++++ 2 files changed, 184 insertions(+), 279 deletions(-) create mode 100644 include/linux/irqdesc.h (limited to 'include') diff --git a/include/linux/irq.h b/include/linux/irq.h index 82ed8231394..f5827abbc03 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -80,7 +80,6 @@ typedef void (*irq_flow_handler_t)(unsigned int irq, # define IRQ_NO_BALANCING_MASK IRQ_NO_BALANCING #endif -struct proc_dir_entry; struct msi_desc; /** @@ -202,152 +201,36 @@ struct irq_chip { #endif }; -struct timer_rand_state; -struct irq_2_iommu; -/** - * struct irq_desc - interrupt descriptor - * @irq_data: per irq and chip data passed down to chip functions - * @timer_rand_state: pointer to timer rand state struct - * @kstat_irqs: irq stats per cpu - * @handle_irq: highlevel irq-events handler [if NULL, __do_IRQ()] - * @action: the irq action chain - * @status: status information - * @depth: disable-depth, for nested irq_disable() calls - * @wake_depth: enable depth, for multiple set_irq_wake() callers - * @irq_count: stats field to detect stalled irqs - * @last_unhandled: aging timer for unhandled count - * @irqs_unhandled: stats field for spurious unhandled interrupts - * @lock: locking for SMP - * @pending_mask: pending rebalanced interrupts - * @threads_active: number of irqaction threads currently running - * @wait_for_threads: wait queue for sync_irq to wait for threaded handlers - * @dir: /proc/irq/ procfs entry - * @name: flow handler name for /proc/interrupts output - */ -struct irq_desc { - -#ifdef CONFIG_GENERIC_HARDIRQS_NO_DEPRECATED - struct irq_data irq_data; -#else - /* - * This union will go away, once we fixed the direct access to - * irq_desc all over the place. The direct fields are a 1:1 - * overlay of irq_data. - */ - union { - struct irq_data irq_data; - struct { - unsigned int irq; - unsigned int node; - struct irq_chip *chip; - void *handler_data; - void *chip_data; - struct msi_desc *msi_desc; -#ifdef CONFIG_SMP - cpumask_var_t affinity; -#endif -#ifdef CONFIG_INTR_REMAP - struct irq_2_iommu *irq_2_iommu; -#endif - }; - }; -#endif - - struct timer_rand_state *timer_rand_state; - unsigned int *kstat_irqs; - irq_flow_handler_t handle_irq; - struct irqaction *action; /* IRQ action list */ - unsigned int status; /* IRQ status */ - - unsigned int depth; /* nested irq disables */ - unsigned int wake_depth; /* nested wake enables */ - unsigned int irq_count; /* For detecting broken IRQs */ - unsigned long last_unhandled; /* Aging timer for unhandled count */ - unsigned int irqs_unhandled; - raw_spinlock_t lock; -#ifdef CONFIG_SMP - const struct cpumask *affinity_hint; -#ifdef CONFIG_GENERIC_PENDING_IRQ - cpumask_var_t pending_mask; -#endif -#endif - atomic_t threads_active; - wait_queue_head_t wait_for_threads; -#ifdef CONFIG_PROC_FS - struct proc_dir_entry *dir; -#endif - const char *name; -} ____cacheline_internodealigned_in_smp; - -extern void arch_init_copy_chip_data(struct irq_desc *old_desc, - struct irq_desc *desc, int node); -extern void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc); - -#ifndef CONFIG_SPARSE_IRQ -extern struct irq_desc irq_desc[NR_IRQS]; -#endif - -#ifdef CONFIG_NUMA_IRQ_DESC -extern struct irq_desc *move_irq_desc(struct irq_desc *old_desc, int node); -#else -static inline struct irq_desc *move_irq_desc(struct irq_desc *desc, int node) -{ - return desc; -} -#endif - -extern struct irq_desc *irq_to_desc_alloc_node(unsigned int irq, int node); +/* This include will go away once we isolated irq_desc usage to core code */ +#include /* * Pick up the arch-dependent methods: */ #include +struct irqaction; extern int setup_irq(unsigned int irq, struct irqaction *new); extern void remove_irq(unsigned int irq, struct irqaction *act); #ifdef CONFIG_GENERIC_HARDIRQS #ifdef CONFIG_SMP - -#ifdef CONFIG_GENERIC_PENDING_IRQ - +# ifdef CONFIG_GENERIC_PENDING_IRQ void move_native_irq(int irq); void move_masked_irq(int irq); - -#else /* CONFIG_GENERIC_PENDING_IRQ */ - -static inline void move_irq(int irq) -{ -} - -static inline void move_native_irq(int irq) -{ -} - -static inline void move_masked_irq(int irq) -{ -} - -#endif /* CONFIG_GENERIC_PENDING_IRQ */ - -#else /* CONFIG_SMP */ - -#define move_native_irq(x) -#define move_masked_irq(x) - -#endif /* CONFIG_SMP */ +# else +static inline void move_irq(int irq) { } +static inline void move_native_irq(int irq) { } +static inline void move_masked_irq(int irq) { } +# endif +#else +static inline void move_native_irq(int irq) { } +static inline void move_masked_irq(int irq) { } +#endif extern int no_irq_affinity; -static inline int irq_balancing_disabled(unsigned int irq) -{ - struct irq_desc *desc; - - desc = irq_to_desc(irq); - return desc->status & IRQ_NO_BALANCING_MASK; -} - /* Handle irq action chains: */ extern irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action); @@ -363,42 +246,10 @@ extern void handle_percpu_irq(unsigned int irq, struct irq_desc *desc); extern void handle_bad_irq(unsigned int irq, struct irq_desc *desc); extern void handle_nested_irq(unsigned int irq); -/* - * Monolithic do_IRQ implementation. - */ -#ifndef CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ -extern unsigned int __do_IRQ(unsigned int irq); -#endif - -/* - * Architectures call this to let the generic IRQ layer - * handle an interrupt. If the descriptor is attached to an - * irqchip-style controller then we call the ->handle_irq() handler, - * and it calls __do_IRQ() if it's attached to an irqtype-style controller. - */ -static inline void generic_handle_irq_desc(unsigned int irq, struct irq_desc *desc) -{ -#ifdef CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ - desc->handle_irq(irq, desc); -#else - if (likely(desc->handle_irq)) - desc->handle_irq(irq, desc); - else - __do_IRQ(irq); -#endif -} - -static inline void generic_handle_irq(unsigned int irq) -{ - generic_handle_irq_desc(irq, irq_to_desc(irq)); -} - /* Handling of unhandled and spurious interrupts: */ extern void note_interrupt(unsigned int irq, struct irq_desc *desc, irqreturn_t action_ret); -/* Resending of interrupts :*/ -void check_irq_resend(struct irq_desc *desc, unsigned int irq); /* Enable/disable irq debugging output: */ extern int noirqdebug_setup(char *str); @@ -421,16 +272,6 @@ extern void __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained, const char *name); -/* caller has locked the irq_desc and both params are valid */ -static inline void __set_irq_handler_unlocked(int irq, - irq_flow_handler_t handler) -{ - struct irq_desc *desc; - - desc = irq_to_desc(irq); - desc->handle_irq = handler; -} - /* * Set a highlevel flow handler for a given IRQ: */ @@ -462,13 +303,6 @@ extern unsigned int create_irq_nr(unsigned int irq_want, int node); extern int create_irq(void); extern void destroy_irq(unsigned int irq); -/* Test to see if a driver has successfully requested an irq */ -static inline int irq_has_action(unsigned int irq) -{ - struct irq_desc *desc = irq_to_desc(irq); - return desc->action != NULL; -} - /* Dynamic irq helper functions */ extern void dynamic_irq_init(unsigned int irq); void dynamic_irq_init_keep_chip_data(unsigned int irq); @@ -487,108 +321,8 @@ extern int set_irq_msi(unsigned int irq, struct msi_desc *entry); #define get_irq_data(irq) (irq_to_desc(irq)->irq_data.handler_data) #define get_irq_msi(irq) (irq_to_desc(irq)->irq_data.msi_desc) -#define get_irq_desc_chip(desc) ((desc)->irq_data.chip) -#define get_irq_desc_chip_data(desc) ((desc)->irq_data.chip_data) -#define get_irq_desc_data(desc) ((desc)->irq_data.handler_data) -#define get_irq_desc_msi(desc) ((desc)->irq_data.msi_desc) - #endif /* CONFIG_GENERIC_HARDIRQS */ #endif /* !CONFIG_S390 */ -#ifdef CONFIG_SMP -/** - * alloc_desc_masks - allocate cpumasks for irq_desc - * @desc: pointer to irq_desc struct - * @node: node which will be handling the cpumasks - * @boot: true if need bootmem - * - * Allocates affinity and pending_mask cpumask if required. - * Returns true if successful (or not required). - */ -static inline bool alloc_desc_masks(struct irq_desc *desc, int node, - bool boot) -{ - gfp_t gfp = GFP_ATOMIC; - - if (boot) - gfp = GFP_NOWAIT; - -#ifdef CONFIG_CPUMASK_OFFSTACK - if (!alloc_cpumask_var_node(&desc->irq_data.affinity, gfp, node)) - return false; - -#ifdef CONFIG_GENERIC_PENDING_IRQ - if (!alloc_cpumask_var_node(&desc->pending_mask, gfp, node)) { - free_cpumask_var(desc->irq_data.affinity); - return false; - } -#endif -#endif - return true; -} - -static inline void init_desc_masks(struct irq_desc *desc) -{ - cpumask_setall(desc->irq_data.affinity); -#ifdef CONFIG_GENERIC_PENDING_IRQ - cpumask_clear(desc->pending_mask); -#endif -} - -/** - * init_copy_desc_masks - copy cpumasks for irq_desc - * @old_desc: pointer to old irq_desc struct - * @new_desc: pointer to new irq_desc struct - * - * Insures affinity and pending_masks are copied to new irq_desc. - * If !CONFIG_CPUMASKS_OFFSTACK the cpumasks are embedded in the - * irq_desc struct so the copy is redundant. - */ - -static inline void init_copy_desc_masks(struct irq_desc *old_desc, - struct irq_desc *new_desc) -{ -#ifdef CONFIG_CPUMASK_OFFSTACK - cpumask_copy(new_desc->irq_data.affinity, old_desc->irq_data.affinity); - -#ifdef CONFIG_GENERIC_PENDING_IRQ - cpumask_copy(new_desc->pending_mask, old_desc->pending_mask); -#endif -#endif -} - -static inline void free_desc_masks(struct irq_desc *old_desc, - struct irq_desc *new_desc) -{ - free_cpumask_var(old_desc->irq_data.affinity); - -#ifdef CONFIG_GENERIC_PENDING_IRQ - free_cpumask_var(old_desc->pending_mask); -#endif -} - -#else /* !CONFIG_SMP */ - -static inline bool alloc_desc_masks(struct irq_desc *desc, int node, - bool boot) -{ - return true; -} - -static inline void init_desc_masks(struct irq_desc *desc) -{ -} - -static inline void init_copy_desc_masks(struct irq_desc *old_desc, - struct irq_desc *new_desc) -{ -} - -static inline void free_desc_masks(struct irq_desc *old_desc, - struct irq_desc *new_desc) -{ -} -#endif /* CONFIG_SMP */ - #endif /* _LINUX_IRQ_H */ diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h new file mode 100644 index 00000000000..22e426fdd30 --- /dev/null +++ b/include/linux/irqdesc.h @@ -0,0 +1,171 @@ +#ifndef _LINUX_IRQDESC_H +#define _LINUX_IRQDESC_H + +/* + * Core internal functions to deal with irq descriptors + * + * This include will move to kernel/irq once we cleaned up the tree. + * For now it's included from + */ + +struct proc_dir_entry; +struct timer_rand_state; +struct irq_2_iommu; +/** + * struct irq_desc - interrupt descriptor + * @irq_data: per irq and chip data passed down to chip functions + * @timer_rand_state: pointer to timer rand state struct + * @kstat_irqs: irq stats per cpu + * @handle_irq: highlevel irq-events handler [if NULL, __do_IRQ()] + * @action: the irq action chain + * @status: status information + * @depth: disable-depth, for nested irq_disable() calls + * @wake_depth: enable depth, for multiple set_irq_wake() callers + * @irq_count: stats field to detect stalled irqs + * @last_unhandled: aging timer for unhandled count + * @irqs_unhandled: stats field for spurious unhandled interrupts + * @lock: locking for SMP + * @pending_mask: pending rebalanced interrupts + * @threads_active: number of irqaction threads currently running + * @wait_for_threads: wait queue for sync_irq to wait for threaded handlers + * @dir: /proc/irq/ procfs entry + * @name: flow handler name for /proc/interrupts output + */ +struct irq_desc { + +#ifdef CONFIG_GENERIC_HARDIRQS_NO_DEPRECATED + struct irq_data irq_data; +#else + /* + * This union will go away, once we fixed the direct access to + * irq_desc all over the place. The direct fields are a 1:1 + * overlay of irq_data. + */ + union { + struct irq_data irq_data; + struct { + unsigned int irq; + unsigned int node; + struct irq_chip *chip; + void *handler_data; + void *chip_data; + struct msi_desc *msi_desc; +#ifdef CONFIG_SMP + cpumask_var_t affinity; +#endif +#ifdef CONFIG_INTR_REMAP + struct irq_2_iommu *irq_2_iommu; +#endif + }; + }; +#endif + + struct timer_rand_state *timer_rand_state; + unsigned int *kstat_irqs; + irq_flow_handler_t handle_irq; + struct irqaction *action; /* IRQ action list */ + unsigned int status; /* IRQ status */ + + unsigned int depth; /* nested irq disables */ + unsigned int wake_depth; /* nested wake enables */ + unsigned int irq_count; /* For detecting broken IRQs */ + unsigned long last_unhandled; /* Aging timer for unhandled count */ + unsigned int irqs_unhandled; + raw_spinlock_t lock; +#ifdef CONFIG_SMP + const struct cpumask *affinity_hint; +#ifdef CONFIG_GENERIC_PENDING_IRQ + cpumask_var_t pending_mask; +#endif +#endif + atomic_t threads_active; + wait_queue_head_t wait_for_threads; +#ifdef CONFIG_PROC_FS + struct proc_dir_entry *dir; +#endif + const char *name; +} ____cacheline_internodealigned_in_smp; + +extern void arch_init_copy_chip_data(struct irq_desc *old_desc, + struct irq_desc *desc, int node); +extern void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc); + +#ifndef CONFIG_SPARSE_IRQ +extern struct irq_desc irq_desc[NR_IRQS]; +#endif + +#ifdef CONFIG_NUMA_IRQ_DESC +extern struct irq_desc *move_irq_desc(struct irq_desc *old_desc, int node); +#else +static inline struct irq_desc *move_irq_desc(struct irq_desc *desc, int node) +{ + return desc; +} +#endif + +extern struct irq_desc *irq_to_desc_alloc_node(unsigned int irq, int node); + +#ifdef CONFIG_GENERIC_HARDIRQS + +#define get_irq_desc_chip(desc) ((desc)->irq_data.chip) +#define get_irq_desc_chip_data(desc) ((desc)->irq_data.chip_data) +#define get_irq_desc_data(desc) ((desc)->irq_data.handler_data) +#define get_irq_desc_msi(desc) ((desc)->irq_data.msi_desc) + +/* + * Monolithic do_IRQ implementation. + */ +#ifndef CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ +extern unsigned int __do_IRQ(unsigned int irq); +#endif + +/* + * Architectures call this to let the generic IRQ layer + * handle an interrupt. If the descriptor is attached to an + * irqchip-style controller then we call the ->handle_irq() handler, + * and it calls __do_IRQ() if it's attached to an irqtype-style controller. + */ +static inline void generic_handle_irq_desc(unsigned int irq, struct irq_desc *desc) +{ +#ifdef CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ + desc->handle_irq(irq, desc); +#else + if (likely(desc->handle_irq)) + desc->handle_irq(irq, desc); + else + __do_IRQ(irq); +#endif +} + +static inline void generic_handle_irq(unsigned int irq) +{ + generic_handle_irq_desc(irq, irq_to_desc(irq)); +} + +/* Test to see if a driver has successfully requested an irq */ +static inline int irq_has_action(unsigned int irq) +{ + struct irq_desc *desc = irq_to_desc(irq); + return desc->action != NULL; +} + +static inline int irq_balancing_disabled(unsigned int irq) +{ + struct irq_desc *desc; + + desc = irq_to_desc(irq); + return desc->status & IRQ_NO_BALANCING_MASK; +} + +/* caller has locked the irq_desc and both params are valid */ +static inline void __set_irq_handler_unlocked(int irq, + irq_flow_handler_t handler) +{ + struct irq_desc *desc; + + desc = irq_to_desc(irq); + desc->handle_irq = handler; +} +#endif + +#endif -- cgit v1.2.3-18-g5258 From 3a3856d00c74560a7b8d9f8a13c1ca94ee786b78 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Oct 2010 13:47:12 +0200 Subject: genirq: Remove unsused inline move_irq() has no users. Remove it and simplify the ifdef forrest while at it. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar --- include/linux/irq.h | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/irq.h b/include/linux/irq.h index f5827abbc03..80fdab208c1 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -215,15 +215,9 @@ extern void remove_irq(unsigned int irq, struct irqaction *act); #ifdef CONFIG_GENERIC_HARDIRQS -#ifdef CONFIG_SMP -# ifdef CONFIG_GENERIC_PENDING_IRQ +#if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_PENDING_IRQ) void move_native_irq(int irq); void move_masked_irq(int irq); -# else -static inline void move_irq(int irq) { } -static inline void move_native_irq(int irq) { } -static inline void move_masked_irq(int irq) { } -# endif #else static inline void move_native_irq(int irq) { } static inline void move_masked_irq(int irq) { } -- cgit v1.2.3-18-g5258 From 442471848f5abb55b99cba1229301655f67492b4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 28 Sep 2010 10:40:18 +0200 Subject: genirq: Provide status modifier Provide a irq_desc.status modifier function to cleanup the direct access to irq_desc in arch and driver code. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar --- include/linux/irq.h | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/irq.h b/include/linux/irq.h index 80fdab208c1..e7e7ac83edd 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -72,6 +72,10 @@ typedef void (*irq_flow_handler_t)(unsigned int irq, #define IRQ_ONESHOT 0x08000000 /* IRQ is not unmasked after hardirq */ #define IRQ_NESTED_THREAD 0x10000000 /* IRQ is nested into another, no own handler thread */ +#define IRQF_MODIFY_MASK \ + (IRQ_TYPE_SENSE_MASK | IRQ_NOPROBE | IRQ_NOREQUEST | \ + IRQ_NOAUTOEN | IRQ_MOVE_PCNTXT | IRQ_LEVEL) + #ifdef CONFIG_IRQ_PER_CPU # define CHECK_IRQ_PER_CPU(var) ((var) & IRQ_PER_CPU) # define IRQ_NO_BALANCING_MASK (IRQ_PER_CPU | IRQ_NO_BALANCING) @@ -289,8 +293,27 @@ set_irq_chained_handler(unsigned int irq, extern void set_irq_nested_thread(unsigned int irq, int nest); -extern void set_irq_noprobe(unsigned int irq); -extern void set_irq_probe(unsigned int irq); +void irq_modify_status(unsigned int irq, unsigned long clr, unsigned long set); + +static inline void irq_set_status_flags(unsigned int irq, unsigned long set) +{ + irq_modify_status(irq, 0, set); +} + +static inline void irq_clear_status_flags(unsigned int irq, unsigned long clr) +{ + irq_modify_status(irq, clr, 0); +} + +static inline void set_irq_noprobe(unsigned int irq) +{ + irq_modify_status(irq, 0, IRQ_NOPROBE); +} + +static inline void set_irq_probe(unsigned int irq) +{ + irq_modify_status(irq, IRQ_NOPROBE, 0); +} /* Handle dynamic irq creation and destruction */ extern unsigned int create_irq_nr(unsigned int irq_want, int node); -- cgit v1.2.3-18-g5258 From f303a6dd127b5ec6de90d1cd79ed19820c7e9658 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 28 Sep 2010 17:34:01 +0200 Subject: genirq: Sanitize irq_data accessors Get the data structure from the core and provide inline wrappers to access the irq_data members. Provide accessor inlines for irq_data as well. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar --- include/linux/irq.h | 62 +++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 58 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/irq.h b/include/linux/irq.h index e7e7ac83edd..bea40556c5a 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -85,6 +85,7 @@ typedef void (*irq_flow_handler_t)(unsigned int irq, #endif struct msi_desc; +struct irq_2_iommu; /** * struct irq_data - per irq and irq chip data passed down to chip functions @@ -332,11 +333,64 @@ extern int set_irq_data(unsigned int irq, void *data); extern int set_irq_chip_data(unsigned int irq, void *data); extern int set_irq_type(unsigned int irq, unsigned int type); extern int set_irq_msi(unsigned int irq, struct msi_desc *entry); +extern struct irq_data *irq_get_irq_data(unsigned int irq); -#define get_irq_chip(irq) (irq_to_desc(irq)->irq_data.chip) -#define get_irq_chip_data(irq) (irq_to_desc(irq)->irq_data.chip_data) -#define get_irq_data(irq) (irq_to_desc(irq)->irq_data.handler_data) -#define get_irq_msi(irq) (irq_to_desc(irq)->irq_data.msi_desc) +static inline struct irq_chip *get_irq_chip(unsigned int irq) +{ + struct irq_data *d = irq_get_irq_data(irq); + return d ? d->chip : NULL; +} + +static inline struct irq_chip *irq_data_get_irq_chip(struct irq_data *d) +{ + return d->chip; +} + +static inline void *get_irq_chip_data(unsigned int irq) +{ + struct irq_data *d = irq_get_irq_data(irq); + return d ? d->chip_data : NULL; +} + +static inline void *irq_data_get_irq_chip_data(struct irq_data *d) +{ + return d->chip_data; +} + +static inline void *get_irq_data(unsigned int irq) +{ + struct irq_data *d = irq_get_irq_data(irq); + return d ? d->handler_data : NULL; +} + +static inline void *irq_data_get_irq_data(struct irq_data *d) +{ + return d->handler_data; +} + +static inline struct msi_desc *get_irq_msi(unsigned int irq) +{ + struct irq_data *d = irq_get_irq_data(irq); + return d ? d->msi_desc : NULL; +} + +static inline struct msi_desc *irq_data_get_msi(struct irq_data *d) +{ + return d->msi_desc; +} + +#ifdef CONFIG_INTR_REMAP +static inline struct irq_2_iommu *get_irq_iommu(unsigned int irq) +{ + struct irq_data *d = irq_get_irq_data(irq); + return d ? d->irq_2_iommu : NULL; +} + +static inline struct irq_2_iommu *irq_data_get_iommu(struct irq_data *d) +{ + return d->irq_2_iommu; +} +#endif #endif /* CONFIG_GENERIC_HARDIRQS */ -- cgit v1.2.3-18-g5258 From 154cd387cdf0e5566ce523cbddf92dd2a062dfd6 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 22 Sep 2010 15:58:45 +0200 Subject: genirq: Remove early_init_irq_lock_class() early_init_irq_lock_class() is called way before anything touches the irq descriptors. In case of SPARSE_IRQ=y this is a NOP operation because the radix tree is empty at this point. For the SPARSE_IRQ=n case it's sufficient to set the lock class in early_init_irq(). Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar --- include/linux/lockdep.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 06aed8305bf..17d050ce7ab 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -424,14 +424,6 @@ do { \ #endif /* CONFIG_LOCKDEP */ -#ifdef CONFIG_GENERIC_HARDIRQS -extern void early_init_irq_lock_class(void); -#else -static inline void early_init_irq_lock_class(void) -{ -} -#endif - #ifdef CONFIG_TRACE_IRQFLAGS extern void early_boot_irqs_off(void); extern void early_boot_irqs_on(void); -- cgit v1.2.3-18-g5258 From 1318a481fc37c503a901b96ae06b692ca2b21af5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 27 Sep 2010 21:01:37 +0200 Subject: genirq: Provide default irq init flags Arch code sets it's own irq_desc.status flags right after boot and for dynamically allocated interrupts. That might involve iterating over a huge array. Allow ARCH_IRQ_INIT_FLAGS to set separate flags aside of IRQ_DISABLED which is the default. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar --- include/linux/irq.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/irq.h b/include/linux/irq.h index bea40556c5a..30a300991ed 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -214,6 +214,12 @@ struct irq_chip { */ #include +#ifndef ARCH_IRQ_INIT_FLAGS +# define ARCH_IRQ_INIT_FLAGS 0 +#endif + +#define IRQ_DEFAULT_INIT_FLAGS (IRQ_DISABLED | ARCH_IRQ_INIT_FLAGS) + struct irqaction; extern int setup_irq(unsigned int irq, struct irqaction *new); extern void remove_irq(unsigned int irq, struct irqaction *act); -- cgit v1.2.3-18-g5258 From 1f5a5b87f78fade3ae48dfd55e8765d1d622ea4e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 27 Sep 2010 17:48:26 +0200 Subject: genirq: Implement a sane sparse_irq allocator The current sparse_irq allocator has several short comings due to failures in the design or the lack of it: - Requires iteration over the number of active irqs to find a free slot (Some architectures have grown their own workarounds for this) - Removal of entries is not possible - Racy between create_irq_nr and destroy_irq (plugged by horrible callbacks) - Migration of active irq descriptors is not possible - No bulk allocation of irq ranges - Sprinkeled irq_desc references all over the place outside of kernel/irq/ (The previous chip functions series is addressing this issue) Implement a sane allocator which fixes the above short comings (though migration of active descriptors needs a full tree wide cleanup of the direct and mostly unlocked access to irq_desc). The new allocator still uses a radix_tree, but uses a bitmap for keeping track of allocated irq numbers. That allows: - Fast lookup of a free slot - Allows the removal of descriptors - Prevents the create/destroy race - Bulk allocation of consecutive irq ranges - Basic design is ready for migration of life descriptors after further cleanups The bitmap is also used in the SPARSE_IRQ=n case for lookup and raceless (de)allocation of irq numbers. So it removes the requirement for looping through the descriptor array to find slots. Right now it uses sparse_irq_lock to protect the bitmap and the radix tree, but after cleaning up all users we should be able convert that to a mutex and to switch the radix_tree and decriptor allocations to GFP_KERNEL. [ Folded in a bugfix from Yinghai Lu ] Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar --- include/linux/irq.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include') diff --git a/include/linux/irq.h b/include/linux/irq.h index 30a300991ed..cefacf928b3 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -398,6 +398,29 @@ static inline struct irq_2_iommu *irq_data_get_iommu(struct irq_data *d) } #endif +int irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node); +void irq_free_descs(unsigned int irq, unsigned int cnt); + +static inline int irq_alloc_desc(int node) +{ + return irq_alloc_descs(-1, 0, 1, node); +} + +static inline int irq_alloc_desc_at(unsigned int at, int node) +{ + return irq_alloc_descs(at, at, 1, node); +} + +static inline int irq_alloc_desc_from(unsigned int from, int node) +{ + return irq_alloc_descs(-1, from, 1, node); +} + +static inline void irq_free_desc(unsigned int irq) +{ + irq_free_descs(irq, 1); +} + #endif /* CONFIG_GENERIC_HARDIRQS */ #endif /* !CONFIG_S390 */ -- cgit v1.2.3-18-g5258 From a98d24b71b6e229965f18dc00d28dc71cb8fe324 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 30 Sep 2010 10:45:07 +0200 Subject: genirq: Implement sane enumeration Use the allocator bitmap to lookup active interrupts. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar --- include/linux/irqnr.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/irqnr.h b/include/linux/irqnr.h index 7bf89bc8cbc..05aa8c23483 100644 --- a/include/linux/irqnr.h +++ b/include/linux/irqnr.h @@ -25,6 +25,7 @@ extern int nr_irqs; extern struct irq_desc *irq_to_desc(unsigned int irq); +unsigned int irq_get_next_irq(unsigned int offset); # define for_each_irq_desc(irq, desc) \ for (irq = 0, desc = irq_to_desc(irq); irq < nr_irqs; \ @@ -47,6 +48,10 @@ extern struct irq_desc *irq_to_desc(unsigned int irq); #define irq_node(irq) 0 #endif +# define for_each_active_irq(irq) \ + for (irq = irq_get_next_irq(0); irq < nr_irqs; \ + irq = irq_get_next_irq(irq + 1)) + #endif /* CONFIG_GENERIC_HARDIRQS */ #define for_each_irq_nr(irq) \ -- cgit v1.2.3-18-g5258 From 06f6c3399e9f9ff6eafc200e80f9226c3cee0eaf Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 12 Oct 2010 12:31:46 +0200 Subject: genirq: Implement irq reservation Mark a range of interrupts as allocated. In the SPARSE_IRQ=n case we need this to update the bitmap for the legacy irqs so the enumerator via irq_get_next_irq() works. Signed-off-by: Thomas Gleixner --- include/linux/irq.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/irq.h b/include/linux/irq.h index cefacf928b3..096b74d5d0d 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -400,6 +400,7 @@ static inline struct irq_2_iommu *irq_data_get_iommu(struct irq_data *d) int irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node); void irq_free_descs(unsigned int irq, unsigned int cnt); +int irq_reserve_irqs(unsigned int from, unsigned int cnt); static inline int irq_alloc_desc(int node) { -- cgit v1.2.3-18-g5258 From b683de2b3cb17bb10fa6fd4af614dc75b5749fe0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 27 Sep 2010 20:55:03 +0200 Subject: genirq: Query arch for number of early descriptors sparse irq sets up NR_IRQS_LEGACY irq descriptors and archs then go ahead and allocate more. Use the unused return value of arch_probe_nr_irqs() to let the architecture return the number of early allocations. Fix up all users. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar --- include/linux/irq.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/irq.h b/include/linux/irq.h index 096b74d5d0d..ef878823ee3 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -214,6 +214,10 @@ struct irq_chip { */ #include +#ifndef NR_IRQS_LEGACY +# define NR_IRQS_LEGACY 0 +#endif + #ifndef ARCH_IRQ_INIT_FLAGS # define ARCH_IRQ_INIT_FLAGS 0 #endif -- cgit v1.2.3-18-g5258 From 1c9db52534a2c0e9776788cd34ccc193289fc18c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 28 Sep 2010 16:46:51 +0200 Subject: pci: Convert msi to new irq_chip functions Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Acked-by: Jesse Barnes Cc: Benjamin Herrenschmidt Cc: "David S. Miller" Cc: Tony Luck Cc: Russell King --- include/linux/msi.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/msi.h b/include/linux/msi.h index 91b05c17185..329d17c395a 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -11,8 +11,9 @@ struct msi_msg { /* Helper functions */ struct irq_desc; -extern void mask_msi_irq(unsigned int irq); -extern void unmask_msi_irq(unsigned int irq); +struct irq_data; +extern void mask_msi_irq(struct irq_data *data); +extern void unmask_msi_irq(struct irq_data *data); extern void read_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg); extern void get_cached_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg); extern void write_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg); -- cgit v1.2.3-18-g5258 From 39431acb1a4c464e62471cb3058b8ffffb9244db Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 28 Sep 2010 19:09:51 +0200 Subject: pci: Cleanup the irq_desc mess in msi Handing down irq_desc to msi just so that msi can access irq_desc.irq_data.msi_desc is a pretty stupid idea. The calling code can hand down a pointer to msi_desc so msi code does not need to know about the irq descriptor at all. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Acked-by: Jesse Barnes --- include/linux/msi.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/msi.h b/include/linux/msi.h index 329d17c395a..05acced439a 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -10,13 +10,13 @@ struct msi_msg { }; /* Helper functions */ -struct irq_desc; struct irq_data; +struct msi_desc; extern void mask_msi_irq(struct irq_data *data); extern void unmask_msi_irq(struct irq_data *data); -extern void read_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg); -extern void get_cached_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg); -extern void write_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg); +extern void __read_msi_msg(struct msi_desc *entry, struct msi_msg *msg); +extern void __get_cached_msi_msg(struct msi_desc *entry, struct msi_msg *msg); +extern void __write_msi_msg(struct msi_desc *entry, struct msi_msg *msg); extern void read_msi_msg(unsigned int irq, struct msi_msg *msg); extern void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg); extern void write_msi_msg(unsigned int irq, struct msi_msg *msg); -- cgit v1.2.3-18-g5258 From 5c2837fbaa609e615ef9a1c58a4cd26ce90be35b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 28 Sep 2010 17:15:11 +0200 Subject: dmar: Convert to new irq chip functions Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Acked-by: David Woodhouse --- include/linux/dmar.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/dmar.h b/include/linux/dmar.h index d7cecc90ed3..cb86aa1ca43 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -187,8 +187,9 @@ static inline int set_msi_sid(struct irte *irte, struct pci_dev *dev) /* Can't use the common MSI interrupt functions * since DMAR is not a pci device */ -extern void dmar_msi_unmask(unsigned int irq); -extern void dmar_msi_mask(unsigned int irq); +struct irq_data; +extern void dmar_msi_unmask(struct irq_data *data); +extern void dmar_msi_mask(struct irq_data *data); extern void dmar_msi_read(int irq, struct msi_msg *msg); extern void dmar_msi_write(int irq, struct msi_msg *msg); extern int dmar_set_interrupt(struct intel_iommu *iommu); -- cgit v1.2.3-18-g5258 From e9f7ac664bfc36685a8eb3315ec21c067d0cee36 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 28 Sep 2010 17:22:09 +0200 Subject: ht: Convert to new irq_chip functions Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Jesse Barnes --- include/linux/htirq.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/htirq.h b/include/linux/htirq.h index c96ea46737d..70a1dbbf209 100644 --- a/include/linux/htirq.h +++ b/include/linux/htirq.h @@ -9,8 +9,9 @@ struct ht_irq_msg { /* Helper functions.. */ void fetch_ht_irq_msg(unsigned int irq, struct ht_irq_msg *msg); void write_ht_irq_msg(unsigned int irq, struct ht_irq_msg *msg); -void mask_ht_irq(unsigned int irq); -void unmask_ht_irq(unsigned int irq); +struct irq_data; +void mask_ht_irq(struct irq_data *data); +void unmask_ht_irq(struct irq_data *data); /* The arch hook for getting things started */ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev); -- cgit v1.2.3-18-g5258 From d0ad63927c6d4d511e172c78ba4a623539ef6901 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Oct 2010 18:41:37 +0200 Subject: pci: intr_remap: Remove unused functions No users. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Acked-by: Suresh Siddha Cc: David Woodhouse Cc: Jesse Barnes --- include/linux/dmar.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/dmar.h b/include/linux/dmar.h index cb86aa1ca43..200439ec7c4 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -119,8 +119,6 @@ extern int alloc_irte(struct intel_iommu *iommu, int irq, u16 count); extern int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 sub_handle); extern int map_irq_to_irte_handle(int irq, u16 *sub_handle); -extern int clear_irte_irq(int irq, struct intel_iommu *iommu, u16 index); -extern int flush_irte(int irq); extern int free_irte(int irq); extern int irq_remapped(int irq); -- cgit v1.2.3-18-g5258 From 423f085952fd7253407cb92984cc2d495a564481 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 10 Oct 2010 11:39:09 +0200 Subject: x86: Embedd irq_2_iommu into irq_cfg That interrupt remapping code is x86 specific and tied to the io_apic code. No need for separate allocator functions in the interrupt remapping code. This allows to simplify the code and irq_2_iommu is small (13 bytes on 64bit) so it's not a real problem even if interrupt remapping is runtime disabled. If it's compile time disabled the impact is zero. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Acked-by: Suresh Siddha Cc: David Woodhouse Cc: Jesse Barnes --- include/linux/dmar.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/dmar.h b/include/linux/dmar.h index 200439ec7c4..4475f8cf7a6 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -106,6 +106,7 @@ struct irte { __u64 high; }; }; + #ifdef CONFIG_INTR_REMAP extern int intr_remapping_enabled; extern int intr_remapping_supported(void); -- cgit v1.2.3-18-g5258 From 1a0730d6649113c820217387a011a17dd4aff3ad Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 11 Oct 2010 11:55:37 +0200 Subject: x86: Speed up the irq_remapped check in hot pathes irq_2_iommu is in struct irq_cfg, so we can do the irq_remapped check based on irq_cfg instead of going through a lookup function. That's especially interesting in the eoi_ioapic_irq() hotpath. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Acked-by: Suresh Siddha Cc: David Woodhouse Cc: Jesse Barnes --- include/linux/dmar.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/dmar.h b/include/linux/dmar.h index 4475f8cf7a6..51651b76d40 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -122,7 +122,6 @@ extern int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, extern int map_irq_to_irte_handle(int irq, u16 *sub_handle); extern int free_irte(int irq); -extern int irq_remapped(int irq); extern struct intel_iommu *map_dev_to_ir(struct pci_dev *dev); extern struct intel_iommu *map_ioapic_to_ir(int apic); extern struct intel_iommu *map_hpet_to_ir(u8 id); @@ -176,7 +175,6 @@ static inline int set_msi_sid(struct irte *irte, struct pci_dev *dev) return 0; } -#define irq_remapped(irq) (0) #define enable_intr_remapping(mode) (-1) #define disable_intr_remapping() (0) #define reenable_intr_remapping(mode) (0) -- cgit v1.2.3-18-g5258 From 10ba1e0eeef6a3c9453d96364e28cb4d911e1ac3 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 11 Oct 2010 12:21:18 +0200 Subject: genirq: Remove irq_2_iommu irq_2_iommu is now in the x86 code where it belongs. Remove all leftovers. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Acked-by: Suresh Siddha Cc: David Woodhouse Cc: Jesse Barnes --- include/linux/irq.h | 18 ------------------ include/linux/irqdesc.h | 4 ---- 2 files changed, 22 deletions(-) (limited to 'include') diff --git a/include/linux/irq.h b/include/linux/irq.h index ef878823ee3..49702b22883 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -85,7 +85,6 @@ typedef void (*irq_flow_handler_t)(unsigned int irq, #endif struct msi_desc; -struct irq_2_iommu; /** * struct irq_data - per irq and irq chip data passed down to chip functions @@ -97,7 +96,6 @@ struct irq_2_iommu; * methods, to allow shared chip implementations * @msi_desc: MSI descriptor * @affinity: IRQ affinity on SMP - * @irq_2_iommu: iommu with this irq * * The fields here need to overlay the ones in irq_desc until we * cleaned up the direct references and switched everything over to @@ -113,9 +111,6 @@ struct irq_data { #ifdef CONFIG_SMP cpumask_var_t affinity; #endif -#ifdef CONFIG_INTR_REMAP - struct irq_2_iommu *irq_2_iommu; -#endif }; /** @@ -389,19 +384,6 @@ static inline struct msi_desc *irq_data_get_msi(struct irq_data *d) return d->msi_desc; } -#ifdef CONFIG_INTR_REMAP -static inline struct irq_2_iommu *get_irq_iommu(unsigned int irq) -{ - struct irq_data *d = irq_get_irq_data(irq); - return d ? d->irq_2_iommu : NULL; -} - -static inline struct irq_2_iommu *irq_data_get_iommu(struct irq_data *d) -{ - return d->irq_2_iommu; -} -#endif - int irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node); void irq_free_descs(unsigned int irq, unsigned int cnt); int irq_reserve_irqs(unsigned int from, unsigned int cnt); diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index 22e426fdd30..f77dc5618d7 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -10,7 +10,6 @@ struct proc_dir_entry; struct timer_rand_state; -struct irq_2_iommu; /** * struct irq_desc - interrupt descriptor * @irq_data: per irq and chip data passed down to chip functions @@ -52,9 +51,6 @@ struct irq_desc { struct msi_desc *msi_desc; #ifdef CONFIG_SMP cpumask_var_t affinity; -#endif -#ifdef CONFIG_INTR_REMAP - struct irq_2_iommu *irq_2_iommu; #endif }; }; -- cgit v1.2.3-18-g5258 From b7d0d8258a9f71949b810e0f82a3d75088f4d364 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 29 Sep 2010 18:44:23 +0200 Subject: genirq: Remove arch_init_chip_data() This function should have not been there in the first place. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar --- include/linux/interrupt.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index a0384a4d1e6..19988983aea 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -641,11 +641,8 @@ static inline void init_irq_proc(void) struct seq_file; int show_interrupts(struct seq_file *p, void *v); -struct irq_desc; - extern int early_irq_init(void); extern int arch_probe_nr_irqs(void); extern int arch_early_irq_init(void); -extern int arch_init_chip_data(struct irq_desc *desc, int node); #endif -- cgit v1.2.3-18-g5258 From b7b29338dc7111ed8bd4d6555d84afae13ebe752 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 29 Sep 2010 18:46:55 +0200 Subject: genirq: Sanitize dynamic irq handling Use the cleanup functions of the dynamic allocator. No need to have separate implementations. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar --- include/linux/irq.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/irq.h b/include/linux/irq.h index 49702b22883..e9639115dff 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -326,11 +326,15 @@ extern unsigned int create_irq_nr(unsigned int irq_want, int node); extern int create_irq(void); extern void destroy_irq(unsigned int irq); -/* Dynamic irq helper functions */ -extern void dynamic_irq_init(unsigned int irq); -void dynamic_irq_init_keep_chip_data(unsigned int irq); +/* + * Dynamic irq helper functions. Obsolete. Use irq_alloc_desc* and + * irq_free_desc instead. + */ extern void dynamic_irq_cleanup(unsigned int irq); -void dynamic_irq_cleanup_keep_chip_data(unsigned int irq); +static inline void dynamic_irq_init(unsigned int irq) +{ + dynamic_irq_cleanup(irq); +} /* Set/get chip/data for an IRQ: */ extern int set_irq_chip(unsigned int irq, struct irq_chip *chip); -- cgit v1.2.3-18-g5258 From 78f90d91f395cd0dc1ef3f21e0c5cd6fd50d202c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 29 Sep 2010 17:18:47 +0200 Subject: genirq: Remove the now unused sparse irq leftovers The move_irq_desc() function was only used due to the problem that the allocator did not free the old descriptors. So the descriptors had to be moved in create_irq_nr(). That's history. The code would have never been able to move active interrupt descriptors on affinity settings. That can be done in a completely different way w/o all this horror. Remove all of it. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar --- include/linux/irqdesc.h | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index f77dc5618d7..979c68cc745 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -82,24 +82,16 @@ struct irq_desc { const char *name; } ____cacheline_internodealigned_in_smp; -extern void arch_init_copy_chip_data(struct irq_desc *old_desc, - struct irq_desc *desc, int node); -extern void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc); - #ifndef CONFIG_SPARSE_IRQ extern struct irq_desc irq_desc[NR_IRQS]; #endif -#ifdef CONFIG_NUMA_IRQ_DESC -extern struct irq_desc *move_irq_desc(struct irq_desc *old_desc, int node); -#else +/* Will be removed once the last users in power and sh are gone */ +extern struct irq_desc *irq_to_desc_alloc_node(unsigned int irq, int node); static inline struct irq_desc *move_irq_desc(struct irq_desc *desc, int node) { return desc; } -#endif - -extern struct irq_desc *irq_to_desc_alloc_node(unsigned int irq, int node); #ifdef CONFIG_GENERIC_HARDIRQS -- cgit v1.2.3-18-g5258 From 8f1e1742233cd1c3444dfc6c945a2efb2814e157 Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Mon, 9 Aug 2010 17:38:10 -0400 Subject: Bluetooth: HCI devices are either BR/EDR or AMP radios HCI transport drivers may not know what type of radio an AMP device has so only say whether they're BR/EDR or AMP devices. Signed-off-by: David Vrabel Signed-off-by: Marcel Holtmann Signed-off-by: Gustavo F. Padovan --- include/net/bluetooth/hci.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index bcbdd6d4e6d..e30e0083434 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -54,7 +54,7 @@ /* HCI controller types */ #define HCI_BREDR 0x00 -#define HCI_80211 0x01 +#define HCI_AMP 0x01 /* HCI device quirks */ enum { -- cgit v1.2.3-18-g5258 From fb3d8eb47ce377d6d7a8fc58b8046ea9eb376a28 Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Mon, 9 Aug 2010 17:42:21 -0400 Subject: Bluetooth: Support SDIO devices that are AMP controllers Signed-off-by: David Vrabel Signed-off-by: Marcel Holtmann Signed-off-by: Gustavo F. Padovan --- include/linux/mmc/sdio_ids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/mmc/sdio_ids.h b/include/linux/mmc/sdio_ids.h index 33b2ea09a4a..a36ab3bc7b0 100644 --- a/include/linux/mmc/sdio_ids.h +++ b/include/linux/mmc/sdio_ids.h @@ -18,6 +18,7 @@ #define SDIO_CLASS_PHS 0x06 /* PHS standard interface */ #define SDIO_CLASS_WLAN 0x07 /* WLAN interface */ #define SDIO_CLASS_ATA 0x08 /* Embedded SDIO-ATA std interface */ +#define SDIO_CLASS_BT_AMP 0x09 /* Type-A Bluetooth AMP interface */ /* * Vendors and devices. Sort key: vendor first, device next. -- cgit v1.2.3-18-g5258 From 796c86eec84ddfd02281c5071838ed1fefda6b90 Mon Sep 17 00:00:00 2001 From: Mat Martineau Date: Wed, 8 Sep 2010 10:05:27 -0700 Subject: Bluetooth: Add common code for stream-oriented recvmsg() This commit adds a bt_sock_stream_recvmsg() function for use by any Bluetooth code that uses SOCK_STREAM sockets. This code is copied from rfcomm_sock_recvmsg() with minimal modifications to remove RFCOMM-specific functionality and improve readability. L2CAP (with the SOCK_STREAM socket type) and RFCOMM have common needs when it comes to reading data. Proper stream read semantics require that applications can read from a stream one byte at a time and not lose any data. The RFCOMM code already operated on and pulled data from the underlying L2CAP socket, so very few changes were required to make the code more generic for use with non-RFCOMM data over L2CAP. Applications that need more awareness of L2CAP frame boundaries are still free to use SOCK_SEQPACKET sockets, and may verify that they connection did not fall back to basic mode by calling getsockopt(). Signed-off-by: Mat Martineau Acked-by: Marcel Holtmann Signed-off-by: Gustavo F. Padovan --- include/net/bluetooth/bluetooth.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index 30fce0128dd..d81ea799770 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -126,6 +126,8 @@ int bt_sock_unregister(int proto); void bt_sock_link(struct bt_sock_list *l, struct sock *s); void bt_sock_unlink(struct bt_sock_list *l, struct sock *s); int bt_sock_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t len, int flags); +int bt_sock_stream_recvmsg(struct kiocb *iocb, struct socket *sock, + struct msghdr *msg, size_t len, int flags); uint bt_sock_poll(struct file * file, struct socket *sock, poll_table *wait); int bt_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); int bt_sock_wait_state(struct sock *sk, int state, unsigned long timeo); -- cgit v1.2.3-18-g5258 From 534c92fde74c8d2143fc15b5f1d957f42cb43570 Mon Sep 17 00:00:00 2001 From: Andrei Emeltchenko Date: Fri, 1 Oct 2010 12:05:11 +0300 Subject: Bluetooth: clean up rfcomm code Remove dead code and unused rfcomm thread events Signed-off-by: Andrei Emeltchenko Acked-by: Marcel Holtmann Signed-off-by: Gustavo F. Padovan --- include/net/bluetooth/rfcomm.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/rfcomm.h b/include/net/bluetooth/rfcomm.h index a140847d622..71047bc0af8 100644 --- a/include/net/bluetooth/rfcomm.h +++ b/include/net/bluetooth/rfcomm.h @@ -213,11 +213,6 @@ struct rfcomm_dlc { #define RFCOMM_DEFER_SETUP 8 /* Scheduling flags and events */ -#define RFCOMM_SCHED_STATE 0 -#define RFCOMM_SCHED_RX 1 -#define RFCOMM_SCHED_TX 2 -#define RFCOMM_SCHED_TIMEO 3 -#define RFCOMM_SCHED_AUTH 4 #define RFCOMM_SCHED_WAKEUP 31 /* MSC exchange flags */ -- cgit v1.2.3-18-g5258 From 29b4433d991c88d86ca48a4c1cc33c671475be4b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 11 Oct 2010 10:22:12 +0000 Subject: net: percpu net_device refcount We tried very hard to remove all possible dev_hold()/dev_put() pairs in network stack, using RCU conversions. There is still an unavoidable device refcount change for every dst we create/destroy, and this can slow down some workloads (routers or some app servers, mmap af_packet) We can switch to a percpu refcount implementation, now dynamic per_cpu infrastructure is mature. On a 64 cpus machine, this consumes 256 bytes per device. On x86, dev_hold(dev) code : before lock incl 0x280(%ebx) after: movl 0x260(%ebx),%eax incl fs:(%eax) Stress bench : (Sending 160.000.000 UDP frames, IP route cache disabled, dual E5540 @2.53GHz, 32bit kernel, FIB_TRIE) Before: real 1m1.662s user 0m14.373s sys 12m55.960s After: real 0m51.179s user 0m15.329s sys 10m15.942s Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 4160db3721b..14fbb04c459 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1026,7 +1026,7 @@ struct net_device { struct timer_list watchdog_timer; /* Number of references to this device */ - atomic_t refcnt ____cacheline_aligned_in_smp; + int __percpu *pcpu_refcnt; /* delayed register/unregister */ struct list_head todo_list; @@ -1330,6 +1330,7 @@ static inline void unregister_netdevice(struct net_device *dev) unregister_netdevice_queue(dev, NULL); } +extern int netdev_refcnt_read(const struct net_device *dev); extern void free_netdev(struct net_device *dev); extern void synchronize_net(void); extern int register_netdevice_notifier(struct notifier_block *nb); @@ -1798,7 +1799,7 @@ extern void netdev_run_todo(void); */ static inline void dev_put(struct net_device *dev) { - atomic_dec(&dev->refcnt); + irqsafe_cpu_dec(*dev->pcpu_refcnt); } /** @@ -1809,7 +1810,7 @@ static inline void dev_put(struct net_device *dev) */ static inline void dev_hold(struct net_device *dev) { - atomic_inc(&dev->refcnt); + irqsafe_cpu_inc(*dev->pcpu_refcnt); } /* Carrier loss detection, dial on demand. The functions netif_carrier_on -- cgit v1.2.3-18-g5258 From c7fc2de0c83dbd2eaf759c5cd0e2b9cf1eb4df3a Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 12 Oct 2010 14:07:09 -0700 Subject: memblock, bootmem: Round pfn properly for memory and reserved regions We need to round memory regions correctly -- specifically, we need to round reserved region in the more expansive direction (lower limit down, upper limit up) whereas usable memory regions need to be rounded in the more restrictive direction (lower limit up, upper limit down). This introduces two set of inlines: memblock_region_memory_base_pfn() memblock_region_memory_end_pfn() memblock_region_reserved_base_pfn() memblock_region_reserved_end_pfn() Although they are antisymmetric (and therefore are technically duplicates) the use of the different inlines explicitly documents the programmer's intention. The lack of proper rounding caused a bug on ARM, which was then found to also affect other architectures. Reported-by: Russell King Signed-off-by: Yinghai Lu LKML-Reference: <4CB4CDFD.4020105@kernel.org> Cc: Jeremy Fitzhardinge Signed-off-by: H. Peter Anvin --- include/linux/memblock.h | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 5096458c753..62a10c2a11f 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -111,40 +111,39 @@ extern void memblock_set_current_limit(phys_addr_t limit); */ /** - * memblock_region_base_pfn - Return the lowest pfn intersecting with the region + * memblock_region_memory_base_pfn - Return the lowest pfn intersecting with the memory region * @reg: memblock_region structure */ -static inline unsigned long memblock_region_base_pfn(const struct memblock_region *reg) +static inline unsigned long memblock_region_memory_base_pfn(const struct memblock_region *reg) { - return reg->base >> PAGE_SHIFT; + return PFN_UP(reg->base); } /** - * memblock_region_last_pfn - Return the highest pfn intersecting with the region + * memblock_region_memory_end_pfn - Return the end_pfn this region * @reg: memblock_region structure */ -static inline unsigned long memblock_region_last_pfn(const struct memblock_region *reg) +static inline unsigned long memblock_region_memory_end_pfn(const struct memblock_region *reg) { - return (reg->base + reg->size - 1) >> PAGE_SHIFT; + return PFN_DOWN(reg->base + reg->size); } /** - * memblock_region_end_pfn - Return the pfn of the first page following the region - * but not intersecting it + * memblock_region_reserved_base_pfn - Return the lowest pfn intersecting with the reserved region * @reg: memblock_region structure */ -static inline unsigned long memblock_region_end_pfn(const struct memblock_region *reg) +static inline unsigned long memblock_region_reserved_base_pfn(const struct memblock_region *reg) { - return memblock_region_last_pfn(reg) + 1; + return PFN_DOWN(reg->base); } /** - * memblock_region_pages - Return the number of pages covering a region + * memblock_region_reserved_end_pfn - Return the end_pfn this region * @reg: memblock_region structure */ -static inline unsigned long memblock_region_pages(const struct memblock_region *reg) +static inline unsigned long memblock_region_reserved_end_pfn(const struct memblock_region *reg) { - return memblock_region_end_pfn(reg) - memblock_region_end_pfn(reg); + return PFN_UP(reg->base + reg->size); } #define for_each_memblock(memblock_type, region) \ -- cgit v1.2.3-18-g5258 From b1b6b9aa6fd32db97469e65d301ebc32dcd67992 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 29 Sep 2010 17:31:35 +0900 Subject: spi/pl022: add PrimeCell generic DMA support This extends the PL022 SSP/SPI driver with generic DMA engine support using the PrimeCell DMA engine interface. Also fix up the test code for the U300 platform. Signed-off-by: Linus Walleij Signed-off-by: Grant Likely --- include/linux/amba/pl022.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/amba/pl022.h b/include/linux/amba/pl022.h index abf26cc47a2..db6a191ddcf 100644 --- a/include/linux/amba/pl022.h +++ b/include/linux/amba/pl022.h @@ -228,6 +228,7 @@ enum ssp_chip_select { }; +struct dma_chan; /** * struct pl022_ssp_master - device.platform_data for SPI controller devices. * @num_chipselect: chipselects are used to distinguish individual @@ -235,11 +236,16 @@ enum ssp_chip_select { * each slave has a chipselect signal, but it's common that not * every chipselect is connected to a slave. * @enable_dma: if true enables DMA driven transfers. + * @dma_rx_param: parameter to locate an RX DMA channel. + * @dma_tx_param: parameter to locate a TX DMA channel. */ struct pl022_ssp_controller { u16 bus_id; u8 num_chipselect; u8 enable_dma:1; + bool (*dma_filter)(struct dma_chan *chan, void *filter_param); + void *dma_rx_param; + void *dma_tx_param; }; /** -- cgit v1.2.3-18-g5258 From bde435a9ca376d0b7809768ca803dbf14416b9c1 Mon Sep 17 00:00:00 2001 From: Kevin Wells Date: Thu, 16 Sep 2010 06:18:50 -0700 Subject: spi/pl022: Add spi->mode support to AMBA SPI driver This patch adds spi->mode support for the AMBA pl022 driver and allows spidev to correctly alter SPI modes. Unused fields used in the pl022 header file for the pl022_config_chip have been removed. The ab8500 client driver selects the data transfer size instead of the platform data. For platforms that use the amba pl022 driver, the unused fields in the controller data structure have been removed and the .mode field in the SPI board info structure is used instead. Signed-off-by: Kevin Wells Tested-by: Linus Walleij Acked-by: Linus Walleij Signed-off-by: Grant Likely --- include/linux/amba/pl022.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/linux/amba/pl022.h b/include/linux/amba/pl022.h index db6a191ddcf..bf143663df8 100644 --- a/include/linux/amba/pl022.h +++ b/include/linux/amba/pl022.h @@ -277,19 +277,13 @@ struct pl022_ssp_controller { */ struct pl022_config_chip { struct device *dev; - enum ssp_loopback lbm; enum ssp_interface iface; enum ssp_hierarchy hierarchy; bool slave_tx_disable; struct ssp_clock_params clk_freq; - enum ssp_rx_endian endian_rx; - enum ssp_tx_endian endian_tx; - enum ssp_data_size data_size; enum ssp_mode com_mode; enum ssp_rx_level_trig rx_lev_trig; enum ssp_tx_level_trig tx_lev_trig; - enum ssp_spi_clk_phase clk_phase; - enum ssp_spi_clk_pol clk_pol; enum ssp_microwire_ctrl_len ctrl_len; enum ssp_microwire_wait_state wait_state; enum ssp_duplex duplex; -- cgit v1.2.3-18-g5258 From 5a1c98be1de165c8ad1bd5343a5d779230669489 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Fri, 1 Oct 2010 11:47:32 +0200 Subject: spi/pl022: get rid of chipinfo dev pointer What is the dev pointer doing inside the platform data anyway. We have another pointer to the actual device at hand, use that. Signed-off-by: Linus Walleij Signed-off-by: Grant Likely --- include/linux/amba/pl022.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/amba/pl022.h b/include/linux/amba/pl022.h index bf143663df8..4ce98f54186 100644 --- a/include/linux/amba/pl022.h +++ b/include/linux/amba/pl022.h @@ -276,7 +276,6 @@ struct pl022_ssp_controller { * @dma_config: DMA configuration for SSP controller and peripheral */ struct pl022_config_chip { - struct device *dev; enum ssp_interface iface; enum ssp_hierarchy hierarchy; bool slave_tx_disable; -- cgit v1.2.3-18-g5258 From f90c34bd658d240cb5ebc5fe0a17796e590c6ec8 Mon Sep 17 00:00:00 2001 From: Andres Salomon Date: Sun, 10 Oct 2010 21:49:45 -0600 Subject: of/promtree: no longer call prom_ functions directly; use an ops structure Rather than assuming an architecture defines prom_getchild and friends, define an ops struct with hooks for the various prom functions that pdt.c needs. This ops struct is filled in by the arch-(and sometimes firmware-)specific code, and passed to of_pdt_build_devicetree. Update sparc code to define the ops struct as well. Signed-off-by: Andres Salomon Acked-by: David S. Miller Signed-off-by: Grant Likely --- include/linux/of_pdt.h | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/of_pdt.h b/include/linux/of_pdt.h index c0a8774e45d..303c5ffae9b 100644 --- a/include/linux/of_pdt.h +++ b/include/linux/of_pdt.h @@ -13,10 +13,28 @@ #ifndef _LINUX_OF_PDT_H #define _LINUX_OF_PDT_H +/* overridable operations for calling into the PROM */ +struct of_pdt_ops { + /* + * buf should be 32 bytes; return 0 on success. + * If prev is NULL, the first property will be returned. + */ + int (*nextprop)(phandle node, char *prev, char *buf); + + /* for both functions, return proplen on success; -1 on error */ + int (*getproplen)(phandle node, const char *prop); + int (*getproperty)(phandle node, const char *prop, char *buf, + int bufsize); + + /* phandles are 0 if no child or sibling exists */ + phandle (*getchild)(phandle parent); + phandle (*getsibling)(phandle node); +}; + extern void *prom_early_alloc(unsigned long size); /* for building the device tree */ -extern void of_pdt_build_devicetree(phandle root_node); +extern void of_pdt_build_devicetree(phandle root_node, struct of_pdt_ops *ops); extern void (*prom_build_more)(struct device_node *dp, struct device_node ***nextp); -- cgit v1.2.3-18-g5258 From ed41850298f7a55519de0b8573e217ed8a45c199 Mon Sep 17 00:00:00 2001 From: Andres Salomon Date: Sun, 10 Oct 2010 21:51:25 -0600 Subject: of/promtree: add of_pdt namespace to pdt code For symbols still lacking namespace qualifiers, add an of_pdt_ prefix. Signed-off-by: Andres Salomon Acked-by: David S. Miller Signed-off-by: Grant Likely --- include/linux/of_pdt.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/of_pdt.h b/include/linux/of_pdt.h index 303c5ffae9b..0f7d0c56348 100644 --- a/include/linux/of_pdt.h +++ b/include/linux/of_pdt.h @@ -36,7 +36,7 @@ extern void *prom_early_alloc(unsigned long size); /* for building the device tree */ extern void of_pdt_build_devicetree(phandle root_node, struct of_pdt_ops *ops); -extern void (*prom_build_more)(struct device_node *dp, +extern void (*of_pdt_build_more)(struct device_node *dp, struct device_node ***nextp); #endif /* _LINUX_OF_PDT_H */ -- cgit v1.2.3-18-g5258 From e2f2a93b6384cfe0face0be595bfbda1475d864b Mon Sep 17 00:00:00 2001 From: Andres Salomon Date: Sun, 10 Oct 2010 21:52:57 -0600 Subject: of/promtree: add package-to-path support to pdt package-to-path is a PROM function which tells us the real (full) name of the node. This provides a hook for that in the prom ops struct, and makes use of it in the pdt code when attempting to determine a node's name. If the hook is available, try using it (falling back to looking at the "name" property if it fails). Signed-off-by: Andres Salomon Signed-off-by: Grant Likely --- include/linux/of_pdt.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/of_pdt.h b/include/linux/of_pdt.h index 0f7d0c56348..c65a18a0cfd 100644 --- a/include/linux/of_pdt.h +++ b/include/linux/of_pdt.h @@ -29,6 +29,9 @@ struct of_pdt_ops { /* phandles are 0 if no child or sibling exists */ phandle (*getchild)(phandle parent); phandle (*getsibling)(phandle node); + + /* return 0 on success; fill in 'len' with number of bytes in path */ + int (*pkg2path)(phandle node, char *buf, const int buflen, int *len); }; extern void *prom_early_alloc(unsigned long size); -- cgit v1.2.3-18-g5258 From 52f6537cb2f0b461a9ce3457c01a6cfa2ae0bb22 Mon Sep 17 00:00:00 2001 From: Andres Salomon Date: Sun, 10 Oct 2010 21:35:05 -0600 Subject: of/irq: remove references to NO_IRQ in drivers/of/platform.c Instead of referencing NO_IRQ in platform.c, define some helper functions in irq.c to call instead from platform.c. Keep NO_IRQ usage local to irq.c, and define NO_IRQ if not defined in headers. Signed-off-by: Andres Salomon Signed-off-by: Grant Likely --- include/linux/of_irq.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/of_irq.h b/include/linux/of_irq.h index 5929781c104..090cbaa4bd3 100644 --- a/include/linux/of_irq.h +++ b/include/linux/of_irq.h @@ -64,6 +64,9 @@ extern unsigned int irq_create_of_mapping(struct device_node *controller, unsigned int intsize); extern int of_irq_to_resource(struct device_node *dev, int index, struct resource *r); +extern int of_irq_count(struct device_node *dev); +extern int of_irq_to_resource_table(struct device_node *dev, + struct resource *res, int nr_irqs); #endif /* CONFIG_OF_IRQ */ #endif /* CONFIG_OF */ -- cgit v1.2.3-18-g5258 From 69395396a0a8866f30d59c66b7be1912ccb5d160 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Wed, 13 Oct 2010 07:44:36 +0000 Subject: sh: remove name and id from struct clk Remove "name" and "id" from drivers/sh/ struct clk. The struct clk members "name" and "id" are not used now when matching is done through clkdev. Signed-off-by: Magnus Damm Signed-off-by: Paul Mundt --- include/linux/sh_clk.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/sh_clk.h b/include/linux/sh_clk.h index 875ce50719a..ecdfea54a49 100644 --- a/include/linux/sh_clk.h +++ b/include/linux/sh_clk.h @@ -21,9 +21,6 @@ struct clk_ops { struct clk { struct list_head node; - const char *name; - int id; - struct clk *parent; struct clk **parent_table; /* list of parents to */ unsigned short parent_num; /* choose between */ -- cgit v1.2.3-18-g5258 From 4780c8df3856398020be2928d9e9fa8c457a09a4 Mon Sep 17 00:00:00 2001 From: Naveen Kumar Gaddipati Date: Mon, 4 Oct 2010 22:32:48 -0700 Subject: Input: add ROHM BU21013 touch panel controller support Add the ROHM BU21013 capacitive touch panel controller support with i2c interface. Acked-by: Linus Walleij Signed-off-by: Naveen Kumar Gaddipati Acked-by: Henrik Rydberg Signed-off-by: Dmitry Torokhov --- include/linux/input/bu21013.h | 44 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 include/linux/input/bu21013.h (limited to 'include') diff --git a/include/linux/input/bu21013.h b/include/linux/input/bu21013.h new file mode 100644 index 00000000000..e470d387dd4 --- /dev/null +++ b/include/linux/input/bu21013.h @@ -0,0 +1,44 @@ +/* + * Copyright (C) ST-Ericsson SA 2010 + * Author: Naveen Kumar G for ST-Ericsson + * License terms:GNU General Public License (GPL) version 2 + */ + +#ifndef _BU21013_H +#define _BU21013_H + +/** + * struct bu21013_platform_device - Handle the platform data + * @cs_en: pointer to the cs enable function + * @cs_dis: pointer to the cs disable function + * @irq_read_val: pointer to read the pen irq value function + * @x_max_res: xmax resolution + * @y_max_res: ymax resolution + * @touch_x_max: touch x max + * @touch_y_max: touch y max + * @cs_pin: chip select pin + * @irq: irq pin + * @ext_clk: external clock flag + * @x_flip: x flip flag + * @y_flip: y flip flag + * @wakeup: wakeup flag + * + * This is used to handle the platform data + */ +struct bu21013_platform_device { + int (*cs_en)(int reset_pin); + int (*cs_dis)(int reset_pin); + int (*irq_read_val)(void); + int x_max_res; + int y_max_res; + int touch_x_max; + int touch_y_max; + unsigned int cs_pin; + unsigned int irq; + bool ext_clk; + bool x_flip; + bool y_flip; + bool wakeup; +}; + +#endif -- cgit v1.2.3-18-g5258 From 0982258264d2f615612ab957634efdeb874f47c8 Mon Sep 17 00:00:00 2001 From: Dmitry Eremin-Solenikov Date: Mon, 4 Oct 2010 21:46:10 -0700 Subject: Input: serio - support multiple child devices per single parent Some (rare) serio devices need to have multiple serio children. One of the examples is PS/2 multiplexer present on several TQC STKxxx boards, which connect PS/2 keyboard and mouse to single tty port. Signed-off-by: Dmitry Eremin-Solenikov Signed-off-by: Dmitry Torokhov --- include/linux/serio.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/serio.h b/include/linux/serio.h index 111ad501b05..109b237603b 100644 --- a/include/linux/serio.h +++ b/include/linux/serio.h @@ -41,7 +41,9 @@ struct serio { int (*start)(struct serio *); void (*stop)(struct serio *); - struct serio *parent, *child; + struct serio *parent; + struct list_head child_node; /* Entry in parent->children list */ + struct list_head children; unsigned int depth; /* level of nesting in serio hierarchy */ struct serio_driver *drv; /* accessed from interrupt, must be protected by serio->lock and serio->sem */ -- cgit v1.2.3-18-g5258 From 12b00c2c025b8af697d9a022ea2e928cad889ef1 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 13 Oct 2010 15:56:56 +0200 Subject: netfilter: xtables: resolve indirect macros 1/3 Many of the used macros are just there for userspace compatibility. Substitute the in-kernel code to directly use the terminal macro and stuff the defines into #ifndef __KERNEL__ sections. Signed-off-by: Jan Engelhardt --- include/linux/netfilter_arp/arp_tables.h | 10 ++++++---- include/linux/netfilter_ipv4/ip_tables.h | 10 ++++++---- include/linux/netfilter_ipv6/ip6_tables.h | 11 ++++++----- 3 files changed, 18 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter_arp/arp_tables.h b/include/linux/netfilter_arp/arp_tables.h index e9948c0560f..81938600470 100644 --- a/include/linux/netfilter_arp/arp_tables.h +++ b/include/linux/netfilter_arp/arp_tables.h @@ -21,8 +21,10 @@ #include +#ifndef __KERNEL__ #define ARPT_FUNCTION_MAXNAMELEN XT_FUNCTION_MAXNAMELEN #define ARPT_TABLE_MAXNAMELEN XT_TABLE_MAXNAMELEN +#endif #define ARPT_DEV_ADDR_LEN_MAX 16 @@ -134,7 +136,7 @@ struct arpt_entry /* The argument to ARPT_SO_GET_INFO */ struct arpt_getinfo { /* Which table: caller fills this in. */ - char name[ARPT_TABLE_MAXNAMELEN]; + char name[XT_TABLE_MAXNAMELEN]; /* Kernel fills these in. */ /* Which hook entry points are valid: bitmask */ @@ -156,7 +158,7 @@ struct arpt_getinfo { /* The argument to ARPT_SO_SET_REPLACE. */ struct arpt_replace { /* Which table. */ - char name[ARPT_TABLE_MAXNAMELEN]; + char name[XT_TABLE_MAXNAMELEN]; /* Which hook entry points are valid: bitmask. You can't change this. */ @@ -191,7 +193,7 @@ struct arpt_replace { /* The argument to ARPT_SO_GET_ENTRIES. */ struct arpt_get_entries { /* Which table: user fills this in. */ - char name[ARPT_TABLE_MAXNAMELEN]; + char name[XT_TABLE_MAXNAMELEN]; /* User fills this in: total entry size. */ unsigned int size; @@ -230,7 +232,7 @@ struct arpt_standard { struct arpt_error_target { struct arpt_entry_target target; - char errorname[ARPT_FUNCTION_MAXNAMELEN]; + char errorname[XT_FUNCTION_MAXNAMELEN]; }; struct arpt_error { diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h index 704a7b6e816..1b7cdf1137e 100644 --- a/include/linux/netfilter_ipv4/ip_tables.h +++ b/include/linux/netfilter_ipv4/ip_tables.h @@ -27,12 +27,14 @@ #include +#ifndef __KERNEL__ #define IPT_FUNCTION_MAXNAMELEN XT_FUNCTION_MAXNAMELEN #define IPT_TABLE_MAXNAMELEN XT_TABLE_MAXNAMELEN #define ipt_match xt_match #define ipt_target xt_target #define ipt_table xt_table #define ipt_get_revision xt_get_revision +#endif /* Yes, Virginia, you have to zero the padding. */ struct ipt_ip { @@ -146,7 +148,7 @@ struct ipt_icmp { /* The argument to IPT_SO_GET_INFO */ struct ipt_getinfo { /* Which table: caller fills this in. */ - char name[IPT_TABLE_MAXNAMELEN]; + char name[XT_TABLE_MAXNAMELEN]; /* Kernel fills these in. */ /* Which hook entry points are valid: bitmask */ @@ -168,7 +170,7 @@ struct ipt_getinfo { /* The argument to IPT_SO_SET_REPLACE. */ struct ipt_replace { /* Which table. */ - char name[IPT_TABLE_MAXNAMELEN]; + char name[XT_TABLE_MAXNAMELEN]; /* Which hook entry points are valid: bitmask. You can't change this. */ @@ -202,7 +204,7 @@ struct ipt_replace { /* The argument to IPT_SO_GET_ENTRIES. */ struct ipt_get_entries { /* Which table: user fills this in. */ - char name[IPT_TABLE_MAXNAMELEN]; + char name[XT_TABLE_MAXNAMELEN]; /* User fills this in: total entry size. */ unsigned int size; @@ -254,7 +256,7 @@ struct ipt_standard { struct ipt_error_target { struct ipt_entry_target target; - char errorname[IPT_FUNCTION_MAXNAMELEN]; + char errorname[XT_FUNCTION_MAXNAMELEN]; }; struct ipt_error { diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h index 18442ff19c0..abe31d020e3 100644 --- a/include/linux/netfilter_ipv6/ip6_tables.h +++ b/include/linux/netfilter_ipv6/ip6_tables.h @@ -27,13 +27,14 @@ #include +#ifndef __KERNEL__ #define IP6T_FUNCTION_MAXNAMELEN XT_FUNCTION_MAXNAMELEN #define IP6T_TABLE_MAXNAMELEN XT_TABLE_MAXNAMELEN - #define ip6t_match xt_match #define ip6t_target xt_target #define ip6t_table xt_table #define ip6t_get_revision xt_get_revision +#endif /* Yes, Virginia, you have to zero the padding. */ struct ip6t_ip6 { @@ -117,7 +118,7 @@ struct ip6t_standard { struct ip6t_error_target { struct ip6t_entry_target target; - char errorname[IP6T_FUNCTION_MAXNAMELEN]; + char errorname[XT_FUNCTION_MAXNAMELEN]; }; struct ip6t_error { @@ -203,7 +204,7 @@ struct ip6t_icmp { /* The argument to IP6T_SO_GET_INFO */ struct ip6t_getinfo { /* Which table: caller fills this in. */ - char name[IP6T_TABLE_MAXNAMELEN]; + char name[XT_TABLE_MAXNAMELEN]; /* Kernel fills these in. */ /* Which hook entry points are valid: bitmask */ @@ -225,7 +226,7 @@ struct ip6t_getinfo { /* The argument to IP6T_SO_SET_REPLACE. */ struct ip6t_replace { /* Which table. */ - char name[IP6T_TABLE_MAXNAMELEN]; + char name[XT_TABLE_MAXNAMELEN]; /* Which hook entry points are valid: bitmask. You can't change this. */ @@ -259,7 +260,7 @@ struct ip6t_replace { /* The argument to IP6T_SO_GET_ENTRIES. */ struct ip6t_get_entries { /* Which table: user fills this in. */ - char name[IP6T_TABLE_MAXNAMELEN]; + char name[XT_TABLE_MAXNAMELEN]; /* User fills this in: total entry size. */ unsigned int size; -- cgit v1.2.3-18-g5258 From 87a2e70db62fec7348c6e5545eb7b7650c33d81b Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 13 Oct 2010 16:11:22 +0200 Subject: netfilter: xtables: resolve indirect macros 2/3 Signed-off-by: Jan Engelhardt --- include/linux/netfilter_arp/arp_tables.h | 15 +++++++-------- include/linux/netfilter_ipv4/ip_tables.h | 18 ++++++++---------- include/linux/netfilter_ipv6/ip6_tables.h | 20 +++++++++----------- 3 files changed, 24 insertions(+), 29 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter_arp/arp_tables.h b/include/linux/netfilter_arp/arp_tables.h index 81938600470..7e193c9241b 100644 --- a/include/linux/netfilter_arp/arp_tables.h +++ b/include/linux/netfilter_arp/arp_tables.h @@ -24,6 +24,8 @@ #ifndef __KERNEL__ #define ARPT_FUNCTION_MAXNAMELEN XT_FUNCTION_MAXNAMELEN #define ARPT_TABLE_MAXNAMELEN XT_TABLE_MAXNAMELEN +#define arpt_entry_target xt_entry_target +#define arpt_standard_target xt_standard_target #endif #define ARPT_DEV_ADDR_LEN_MAX 16 @@ -65,9 +67,6 @@ struct arpt_arp { u_int16_t invflags; }; -#define arpt_entry_target xt_entry_target -#define arpt_standard_target xt_standard_target - /* Values for "flag" field in struct arpt_ip (general arp structure). * No flags defined yet. */ @@ -208,7 +207,7 @@ struct arpt_get_entries { #define ARPT_ERROR_TARGET XT_ERROR_TARGET /* Helper functions */ -static __inline__ struct arpt_entry_target *arpt_get_target(struct arpt_entry *e) +static __inline__ struct xt_entry_target *arpt_get_target(struct arpt_entry *e) { return (void *)e + e->target_offset; } @@ -227,11 +226,11 @@ static __inline__ struct arpt_entry_target *arpt_get_target(struct arpt_entry *e /* Standard entry. */ struct arpt_standard { struct arpt_entry entry; - struct arpt_standard_target target; + struct xt_standard_target target; }; struct arpt_error_target { - struct arpt_entry_target target; + struct xt_entry_target target; char errorname[XT_FUNCTION_MAXNAMELEN]; }; @@ -250,7 +249,7 @@ struct arpt_error { { \ .entry = ARPT_ENTRY_INIT(sizeof(struct arpt_standard)), \ .target = XT_TARGET_INIT(ARPT_STANDARD_TARGET, \ - sizeof(struct arpt_standard_target)), \ + sizeof(struct xt_standard_target)), \ .target.verdict = -(__verdict) - 1, \ } @@ -287,7 +286,7 @@ struct compat_arpt_entry { unsigned char elems[0]; }; -static inline struct arpt_entry_target * +static inline struct xt_entry_target * compat_arpt_get_target(struct compat_arpt_entry *e) { return (void *)e + e->target_offset; diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h index 1b7cdf1137e..ec506918a9b 100644 --- a/include/linux/netfilter_ipv4/ip_tables.h +++ b/include/linux/netfilter_ipv4/ip_tables.h @@ -34,6 +34,10 @@ #define ipt_target xt_target #define ipt_table xt_table #define ipt_get_revision xt_get_revision +#define ipt_entry_match xt_entry_match +#define ipt_entry_target xt_entry_target +#define ipt_standard_target xt_standard_target +#define ipt_counters xt_counters #endif /* Yes, Virginia, you have to zero the padding. */ @@ -54,12 +58,6 @@ struct ipt_ip { u_int8_t invflags; }; -#define ipt_entry_match xt_entry_match -#define ipt_entry_target xt_entry_target -#define ipt_standard_target xt_standard_target - -#define ipt_counters xt_counters - /* Values for "flag" field in struct ipt_ip (general ip structure). */ #define IPT_F_FRAG 0x01 /* Set if rule is a fragment rule */ #define IPT_F_GOTO 0x02 /* Set if jump is a goto */ @@ -219,7 +217,7 @@ struct ipt_get_entries { #define IPT_ERROR_TARGET XT_ERROR_TARGET /* Helper functions */ -static __inline__ struct ipt_entry_target * +static __inline__ struct xt_entry_target * ipt_get_target(struct ipt_entry *e) { return (void *)e + e->target_offset; @@ -251,11 +249,11 @@ extern void ipt_unregister_table(struct net *net, struct xt_table *table); /* Standard entry. */ struct ipt_standard { struct ipt_entry entry; - struct ipt_standard_target target; + struct xt_standard_target target; }; struct ipt_error_target { - struct ipt_entry_target target; + struct xt_entry_target target; char errorname[XT_FUNCTION_MAXNAMELEN]; }; @@ -309,7 +307,7 @@ struct compat_ipt_entry { }; /* Helper functions */ -static inline struct ipt_entry_target * +static inline struct xt_entry_target * compat_ipt_get_target(struct compat_ipt_entry *e) { return (void *)e + e->target_offset; diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h index abe31d020e3..40d11fa0584 100644 --- a/include/linux/netfilter_ipv6/ip6_tables.h +++ b/include/linux/netfilter_ipv6/ip6_tables.h @@ -34,6 +34,10 @@ #define ip6t_target xt_target #define ip6t_table xt_table #define ip6t_get_revision xt_get_revision +#define ip6t_entry_match xt_entry_match +#define ip6t_entry_target xt_entry_target +#define ip6t_standard_target xt_standard_target +#define ip6t_counters xt_counters #endif /* Yes, Virginia, you have to zero the padding. */ @@ -63,12 +67,6 @@ struct ip6t_ip6 { u_int8_t invflags; }; -#define ip6t_entry_match xt_entry_match -#define ip6t_entry_target xt_entry_target -#define ip6t_standard_target xt_standard_target - -#define ip6t_counters xt_counters - /* Values for "flag" field in struct ip6t_ip6 (general ip6 structure). */ #define IP6T_F_PROTO 0x01 /* Set if rule cares about upper protocols */ @@ -113,11 +111,11 @@ struct ip6t_entry { /* Standard entry */ struct ip6t_standard { struct ip6t_entry entry; - struct ip6t_standard_target target; + struct xt_standard_target target; }; struct ip6t_error_target { - struct ip6t_entry_target target; + struct xt_entry_target target; char errorname[XT_FUNCTION_MAXNAMELEN]; }; @@ -136,7 +134,7 @@ struct ip6t_error { { \ .entry = IP6T_ENTRY_INIT(sizeof(struct ip6t_standard)), \ .target = XT_TARGET_INIT(IP6T_STANDARD_TARGET, \ - sizeof(struct ip6t_standard_target)), \ + sizeof(struct xt_standard_target)), \ .target.verdict = -(__verdict) - 1, \ } @@ -275,7 +273,7 @@ struct ip6t_get_entries { #define IP6T_ERROR_TARGET XT_ERROR_TARGET /* Helper functions */ -static __inline__ struct ip6t_entry_target * +static __inline__ struct xt_entry_target * ip6t_get_target(struct ip6t_entry *e) { return (void *)e + e->target_offset; @@ -332,7 +330,7 @@ struct compat_ip6t_entry { unsigned char elems[0]; }; -static inline struct ip6t_entry_target * +static inline struct xt_entry_target * compat_ip6t_get_target(struct compat_ip6t_entry *e) { return (void *)e + e->target_offset; -- cgit v1.2.3-18-g5258 From 243bf6e29eef642de0ff62f1ebf58bc2396d6d6e Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 13 Oct 2010 16:28:00 +0200 Subject: netfilter: xtables: resolve indirect macros 3/3 --- include/linux/netfilter_arp/arp_tables.h | 33 +++++---------- include/linux/netfilter_ipv4/ip_tables.h | 69 ++++++++++++++----------------- include/linux/netfilter_ipv6/ip6_tables.h | 69 +++++++++++-------------------- 3 files changed, 67 insertions(+), 104 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter_arp/arp_tables.h b/include/linux/netfilter_arp/arp_tables.h index 7e193c9241b..6e2341a811d 100644 --- a/include/linux/netfilter_arp/arp_tables.h +++ b/include/linux/netfilter_arp/arp_tables.h @@ -26,6 +26,14 @@ #define ARPT_TABLE_MAXNAMELEN XT_TABLE_MAXNAMELEN #define arpt_entry_target xt_entry_target #define arpt_standard_target xt_standard_target +#define ARPT_CONTINUE XT_CONTINUE +#define ARPT_RETURN XT_RETURN +#define arpt_counters_info xt_counters_info +#define arpt_counters xt_counters +#define ARPT_STANDARD_TARGET XT_STANDARD_TARGET +#define ARPT_ERROR_TARGET XT_ERROR_TARGET +#define ARPT_ENTRY_ITERATE(entries, size, fn, args...) \ + XT_ENTRY_ITERATE(struct arpt_entry, entries, size, fn, ## args) #endif #define ARPT_DEV_ADDR_LEN_MAX 16 @@ -126,12 +134,6 @@ struct arpt_entry #define ARPT_SO_GET_REVISION_TARGET (ARPT_BASE_CTL + 3) #define ARPT_SO_GET_MAX (ARPT_SO_GET_REVISION_TARGET) -/* CONTINUE verdict for targets */ -#define ARPT_CONTINUE XT_CONTINUE - -/* For standard target */ -#define ARPT_RETURN XT_RETURN - /* The argument to ARPT_SO_GET_INFO */ struct arpt_getinfo { /* Which table: caller fills this in. */ @@ -185,10 +187,6 @@ struct arpt_replace { struct arpt_entry entries[0]; }; -/* The argument to ARPT_SO_ADD_COUNTERS. */ -#define arpt_counters_info xt_counters_info -#define arpt_counters xt_counters - /* The argument to ARPT_SO_GET_ENTRIES. */ struct arpt_get_entries { /* Which table: user fills this in. */ @@ -201,23 +199,12 @@ struct arpt_get_entries { struct arpt_entry entrytable[0]; }; -/* Standard return verdict, or do jump. */ -#define ARPT_STANDARD_TARGET XT_STANDARD_TARGET -/* Error verdict. */ -#define ARPT_ERROR_TARGET XT_ERROR_TARGET - /* Helper functions */ static __inline__ struct xt_entry_target *arpt_get_target(struct arpt_entry *e) { return (void *)e + e->target_offset; } -#ifndef __KERNEL__ -/* fn returns 0 to continue iteration */ -#define ARPT_ENTRY_ITERATE(entries, size, fn, args...) \ - XT_ENTRY_ITERATE(struct arpt_entry, entries, size, fn, ## args) -#endif - /* * Main firewall chains definitions and global var's definitions. */ @@ -248,7 +235,7 @@ struct arpt_error { #define ARPT_STANDARD_INIT(__verdict) \ { \ .entry = ARPT_ENTRY_INIT(sizeof(struct arpt_standard)), \ - .target = XT_TARGET_INIT(ARPT_STANDARD_TARGET, \ + .target = XT_TARGET_INIT(XT_STANDARD_TARGET, \ sizeof(struct xt_standard_target)), \ .target.verdict = -(__verdict) - 1, \ } @@ -256,7 +243,7 @@ struct arpt_error { #define ARPT_ERROR_INIT \ { \ .entry = ARPT_ENTRY_INIT(sizeof(struct arpt_error)), \ - .target = XT_TARGET_INIT(ARPT_ERROR_TARGET, \ + .target = XT_TARGET_INIT(XT_ERROR_TARGET, \ sizeof(struct arpt_error_target)), \ .target.errorname = "ERROR", \ } diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h index ec506918a9b..ee54b3b7e23 100644 --- a/include/linux/netfilter_ipv4/ip_tables.h +++ b/include/linux/netfilter_ipv4/ip_tables.h @@ -38,6 +38,36 @@ #define ipt_entry_target xt_entry_target #define ipt_standard_target xt_standard_target #define ipt_counters xt_counters +#define IPT_CONTINUE XT_CONTINUE +#define IPT_RETURN XT_RETURN + +/* This group is older than old (iptables < v1.4.0-rc1~89) */ +#include +#define ipt_udp xt_udp +#define ipt_tcp xt_tcp +#define IPT_TCP_INV_SRCPT XT_TCP_INV_SRCPT +#define IPT_TCP_INV_DSTPT XT_TCP_INV_DSTPT +#define IPT_TCP_INV_FLAGS XT_TCP_INV_FLAGS +#define IPT_TCP_INV_OPTION XT_TCP_INV_OPTION +#define IPT_TCP_INV_MASK XT_TCP_INV_MASK +#define IPT_UDP_INV_SRCPT XT_UDP_INV_SRCPT +#define IPT_UDP_INV_DSTPT XT_UDP_INV_DSTPT +#define IPT_UDP_INV_MASK XT_UDP_INV_MASK + +/* The argument to IPT_SO_ADD_COUNTERS. */ +#define ipt_counters_info xt_counters_info +/* Standard return verdict, or do jump. */ +#define IPT_STANDARD_TARGET XT_STANDARD_TARGET +/* Error verdict. */ +#define IPT_ERROR_TARGET XT_ERROR_TARGET + +/* fn returns 0 to continue iteration */ +#define IPT_MATCH_ITERATE(e, fn, args...) \ + XT_MATCH_ITERATE(struct ipt_entry, e, fn, ## args) + +/* fn returns 0 to continue iteration */ +#define IPT_ENTRY_ITERATE(entries, size, fn, args...) \ + XT_ENTRY_ITERATE(struct ipt_entry, entries, size, fn, ## args) #endif /* Yes, Virginia, you have to zero the padding. */ @@ -116,23 +146,6 @@ struct ipt_entry { #define IPT_SO_GET_REVISION_TARGET (IPT_BASE_CTL + 3) #define IPT_SO_GET_MAX IPT_SO_GET_REVISION_TARGET -#define IPT_CONTINUE XT_CONTINUE -#define IPT_RETURN XT_RETURN - -#include -#define ipt_udp xt_udp -#define ipt_tcp xt_tcp - -#define IPT_TCP_INV_SRCPT XT_TCP_INV_SRCPT -#define IPT_TCP_INV_DSTPT XT_TCP_INV_DSTPT -#define IPT_TCP_INV_FLAGS XT_TCP_INV_FLAGS -#define IPT_TCP_INV_OPTION XT_TCP_INV_OPTION -#define IPT_TCP_INV_MASK XT_TCP_INV_MASK - -#define IPT_UDP_INV_SRCPT XT_UDP_INV_SRCPT -#define IPT_UDP_INV_DSTPT XT_UDP_INV_DSTPT -#define IPT_UDP_INV_MASK XT_UDP_INV_MASK - /* ICMP matching stuff */ struct ipt_icmp { u_int8_t type; /* type to match */ @@ -196,9 +209,6 @@ struct ipt_replace { struct ipt_entry entries[0]; }; -/* The argument to IPT_SO_ADD_COUNTERS. */ -#define ipt_counters_info xt_counters_info - /* The argument to IPT_SO_GET_ENTRIES. */ struct ipt_get_entries { /* Which table: user fills this in. */ @@ -211,11 +221,6 @@ struct ipt_get_entries { struct ipt_entry entrytable[0]; }; -/* Standard return verdict, or do jump. */ -#define IPT_STANDARD_TARGET XT_STANDARD_TARGET -/* Error verdict. */ -#define IPT_ERROR_TARGET XT_ERROR_TARGET - /* Helper functions */ static __inline__ struct xt_entry_target * ipt_get_target(struct ipt_entry *e) @@ -223,16 +228,6 @@ ipt_get_target(struct ipt_entry *e) return (void *)e + e->target_offset; } -#ifndef __KERNEL__ -/* fn returns 0 to continue iteration */ -#define IPT_MATCH_ITERATE(e, fn, args...) \ - XT_MATCH_ITERATE(struct ipt_entry, e, fn, ## args) - -/* fn returns 0 to continue iteration */ -#define IPT_ENTRY_ITERATE(entries, size, fn, args...) \ - XT_ENTRY_ITERATE(struct ipt_entry, entries, size, fn, ## args) -#endif - /* * Main firewall chains definitions and global var's definitions. */ @@ -271,7 +266,7 @@ struct ipt_error { #define IPT_STANDARD_INIT(__verdict) \ { \ .entry = IPT_ENTRY_INIT(sizeof(struct ipt_standard)), \ - .target = XT_TARGET_INIT(IPT_STANDARD_TARGET, \ + .target = XT_TARGET_INIT(XT_STANDARD_TARGET, \ sizeof(struct xt_standard_target)), \ .target.verdict = -(__verdict) - 1, \ } @@ -279,7 +274,7 @@ struct ipt_error { #define IPT_ERROR_INIT \ { \ .entry = IPT_ENTRY_INIT(sizeof(struct ipt_error)), \ - .target = XT_TARGET_INIT(IPT_ERROR_TARGET, \ + .target = XT_TARGET_INIT(XT_ERROR_TARGET, \ sizeof(struct ipt_error_target)), \ .target.errorname = "ERROR", \ } diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h index 40d11fa0584..ac2b411ea63 100644 --- a/include/linux/netfilter_ipv6/ip6_tables.h +++ b/include/linux/netfilter_ipv6/ip6_tables.h @@ -38,6 +38,29 @@ #define ip6t_entry_target xt_entry_target #define ip6t_standard_target xt_standard_target #define ip6t_counters xt_counters +#define IP6T_CONTINUE XT_CONTINUE +#define IP6T_RETURN XT_RETURN + +/* Pre-iptables-1.4.0 */ +#include +#define ip6t_tcp xt_tcp +#define ip6t_udp xt_udp +#define IP6T_TCP_INV_SRCPT XT_TCP_INV_SRCPT +#define IP6T_TCP_INV_DSTPT XT_TCP_INV_DSTPT +#define IP6T_TCP_INV_FLAGS XT_TCP_INV_FLAGS +#define IP6T_TCP_INV_OPTION XT_TCP_INV_OPTION +#define IP6T_TCP_INV_MASK XT_TCP_INV_MASK +#define IP6T_UDP_INV_SRCPT XT_UDP_INV_SRCPT +#define IP6T_UDP_INV_DSTPT XT_UDP_INV_DSTPT +#define IP6T_UDP_INV_MASK XT_UDP_INV_MASK + +#define ip6t_counters_info xt_counters_info +#define IP6T_STANDARD_TARGET XT_STANDARD_TARGET +#define IP6T_ERROR_TARGET XT_ERROR_TARGET +#define IP6T_MATCH_ITERATE(e, fn, args...) \ + XT_MATCH_ITERATE(struct ip6t_entry, e, fn, ## args) +#define IP6T_ENTRY_ITERATE(entries, size, fn, args...) \ + XT_ENTRY_ITERATE(struct ip6t_entry, entries, size, fn, ## args) #endif /* Yes, Virginia, you have to zero the padding. */ @@ -133,7 +156,7 @@ struct ip6t_error { #define IP6T_STANDARD_INIT(__verdict) \ { \ .entry = IP6T_ENTRY_INIT(sizeof(struct ip6t_standard)), \ - .target = XT_TARGET_INIT(IP6T_STANDARD_TARGET, \ + .target = XT_TARGET_INIT(XT_STANDARD_TARGET, \ sizeof(struct xt_standard_target)), \ .target.verdict = -(__verdict) - 1, \ } @@ -141,7 +164,7 @@ struct ip6t_error { #define IP6T_ERROR_INIT \ { \ .entry = IP6T_ENTRY_INIT(sizeof(struct ip6t_error)), \ - .target = XT_TARGET_INIT(IP6T_ERROR_TARGET, \ + .target = XT_TARGET_INIT(XT_ERROR_TARGET, \ sizeof(struct ip6t_error_target)), \ .target.errorname = "ERROR", \ } @@ -165,30 +188,6 @@ struct ip6t_error { #define IP6T_SO_GET_REVISION_TARGET (IP6T_BASE_CTL + 5) #define IP6T_SO_GET_MAX IP6T_SO_GET_REVISION_TARGET -/* CONTINUE verdict for targets */ -#define IP6T_CONTINUE XT_CONTINUE - -/* For standard target */ -#define IP6T_RETURN XT_RETURN - -/* TCP/UDP matching stuff */ -#include - -#define ip6t_tcp xt_tcp -#define ip6t_udp xt_udp - -/* Values for "inv" field in struct ipt_tcp. */ -#define IP6T_TCP_INV_SRCPT XT_TCP_INV_SRCPT -#define IP6T_TCP_INV_DSTPT XT_TCP_INV_DSTPT -#define IP6T_TCP_INV_FLAGS XT_TCP_INV_FLAGS -#define IP6T_TCP_INV_OPTION XT_TCP_INV_OPTION -#define IP6T_TCP_INV_MASK XT_TCP_INV_MASK - -/* Values for "invflags" field in struct ipt_udp. */ -#define IP6T_UDP_INV_SRCPT XT_UDP_INV_SRCPT -#define IP6T_UDP_INV_DSTPT XT_UDP_INV_DSTPT -#define IP6T_UDP_INV_MASK XT_UDP_INV_MASK - /* ICMP matching stuff */ struct ip6t_icmp { u_int8_t type; /* type to match */ @@ -252,9 +251,6 @@ struct ip6t_replace { struct ip6t_entry entries[0]; }; -/* The argument to IP6T_SO_ADD_COUNTERS. */ -#define ip6t_counters_info xt_counters_info - /* The argument to IP6T_SO_GET_ENTRIES. */ struct ip6t_get_entries { /* Which table: user fills this in. */ @@ -267,11 +263,6 @@ struct ip6t_get_entries { struct ip6t_entry entrytable[0]; }; -/* Standard return verdict, or do jump. */ -#define IP6T_STANDARD_TARGET XT_STANDARD_TARGET -/* Error verdict. */ -#define IP6T_ERROR_TARGET XT_ERROR_TARGET - /* Helper functions */ static __inline__ struct xt_entry_target * ip6t_get_target(struct ip6t_entry *e) @@ -279,16 +270,6 @@ ip6t_get_target(struct ip6t_entry *e) return (void *)e + e->target_offset; } -#ifndef __KERNEL__ -/* fn returns 0 to continue iteration */ -#define IP6T_MATCH_ITERATE(e, fn, args...) \ - XT_MATCH_ITERATE(struct ip6t_entry, e, fn, ## args) - -/* fn returns 0 to continue iteration */ -#define IP6T_ENTRY_ITERATE(entries, size, fn, args...) \ - XT_ENTRY_ITERATE(struct ip6t_entry, entries, size, fn, ## args) -#endif - /* * Main firewall chains definitions and global var's definitions. */ -- cgit v1.2.3-18-g5258 From 75f0a0fd787bfa3ea1a916ca632a5b9e0007cbb7 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 13 Oct 2010 16:37:45 +0200 Subject: netfilter: xtables: unify {ip,ip6,arp}t_error_target Unification of struct *_error_target was forgotten in v2.6.16-1689-g1e30a01. Signed-off-by: Jan Engelhardt --- include/linux/netfilter/x_tables.h | 5 +++++ include/linux/netfilter_arp/arp_tables.h | 10 +++------- include/linux/netfilter_ipv4/ip_tables.h | 10 +++------- include/linux/netfilter_ipv6/ip6_tables.h | 10 +++------- 4 files changed, 14 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 24e5d01d27d..742bec05144 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -66,6 +66,11 @@ struct xt_standard_target { int verdict; }; +struct xt_error_target { + struct xt_entry_target target; + char errorname[XT_FUNCTION_MAXNAMELEN]; +}; + /* The argument to IPT_SO_GET_REVISION_*. Returns highest revision * kernel supports, if >= revision. */ struct xt_get_revision { diff --git a/include/linux/netfilter_arp/arp_tables.h b/include/linux/netfilter_arp/arp_tables.h index 6e2341a811d..f02d57436a3 100644 --- a/include/linux/netfilter_arp/arp_tables.h +++ b/include/linux/netfilter_arp/arp_tables.h @@ -26,6 +26,7 @@ #define ARPT_TABLE_MAXNAMELEN XT_TABLE_MAXNAMELEN #define arpt_entry_target xt_entry_target #define arpt_standard_target xt_standard_target +#define arpt_error_target xt_error_target #define ARPT_CONTINUE XT_CONTINUE #define ARPT_RETURN XT_RETURN #define arpt_counters_info xt_counters_info @@ -216,14 +217,9 @@ struct arpt_standard { struct xt_standard_target target; }; -struct arpt_error_target { - struct xt_entry_target target; - char errorname[XT_FUNCTION_MAXNAMELEN]; -}; - struct arpt_error { struct arpt_entry entry; - struct arpt_error_target target; + struct xt_error_target target; }; #define ARPT_ENTRY_INIT(__size) \ @@ -244,7 +240,7 @@ struct arpt_error { { \ .entry = ARPT_ENTRY_INIT(sizeof(struct arpt_error)), \ .target = XT_TARGET_INIT(XT_ERROR_TARGET, \ - sizeof(struct arpt_error_target)), \ + sizeof(struct xt_error_target)), \ .target.errorname = "ERROR", \ } diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h index ee54b3b7e23..d0fef0a436f 100644 --- a/include/linux/netfilter_ipv4/ip_tables.h +++ b/include/linux/netfilter_ipv4/ip_tables.h @@ -37,6 +37,7 @@ #define ipt_entry_match xt_entry_match #define ipt_entry_target xt_entry_target #define ipt_standard_target xt_standard_target +#define ipt_error_target xt_error_target #define ipt_counters xt_counters #define IPT_CONTINUE XT_CONTINUE #define IPT_RETURN XT_RETURN @@ -247,14 +248,9 @@ struct ipt_standard { struct xt_standard_target target; }; -struct ipt_error_target { - struct xt_entry_target target; - char errorname[XT_FUNCTION_MAXNAMELEN]; -}; - struct ipt_error { struct ipt_entry entry; - struct ipt_error_target target; + struct xt_error_target target; }; #define IPT_ENTRY_INIT(__size) \ @@ -275,7 +271,7 @@ struct ipt_error { { \ .entry = IPT_ENTRY_INIT(sizeof(struct ipt_error)), \ .target = XT_TARGET_INIT(XT_ERROR_TARGET, \ - sizeof(struct ipt_error_target)), \ + sizeof(struct xt_error_target)), \ .target.errorname = "ERROR", \ } diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h index ac2b411ea63..dca11186e52 100644 --- a/include/linux/netfilter_ipv6/ip6_tables.h +++ b/include/linux/netfilter_ipv6/ip6_tables.h @@ -37,6 +37,7 @@ #define ip6t_entry_match xt_entry_match #define ip6t_entry_target xt_entry_target #define ip6t_standard_target xt_standard_target +#define ip6t_error_target xt_error_target #define ip6t_counters xt_counters #define IP6T_CONTINUE XT_CONTINUE #define IP6T_RETURN XT_RETURN @@ -137,14 +138,9 @@ struct ip6t_standard { struct xt_standard_target target; }; -struct ip6t_error_target { - struct xt_entry_target target; - char errorname[XT_FUNCTION_MAXNAMELEN]; -}; - struct ip6t_error { struct ip6t_entry entry; - struct ip6t_error_target target; + struct xt_error_target target; }; #define IP6T_ENTRY_INIT(__size) \ @@ -165,7 +161,7 @@ struct ip6t_error { { \ .entry = IP6T_ENTRY_INIT(sizeof(struct ip6t_error)), \ .target = XT_TARGET_INIT(XT_ERROR_TARGET, \ - sizeof(struct ip6t_error_target)), \ + sizeof(struct xt_error_target)), \ .target.errorname = "ERROR", \ } -- cgit v1.2.3-18-g5258 From 9ecdafd883db3c43296797382fc0b2c868144070 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 13 Oct 2010 16:42:02 +0200 Subject: netfilter: xtables: remove unused defines Signed-off-by: Jan Engelhardt --- include/linux/netfilter_arp/arp_tables.h | 4 ---- include/linux/netfilter_ipv4/ip_tables.h | 4 ---- include/linux/netfilter_ipv6/ip6_tables.h | 4 ---- 3 files changed, 12 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter_arp/arp_tables.h b/include/linux/netfilter_arp/arp_tables.h index f02d57436a3..adbf4bff87e 100644 --- a/include/linux/netfilter_arp/arp_tables.h +++ b/include/linux/netfilter_arp/arp_tables.h @@ -255,8 +255,6 @@ extern unsigned int arpt_do_table(struct sk_buff *skb, const struct net_device *out, struct xt_table *table); -#define ARPT_ALIGN(s) XT_ALIGN(s) - #ifdef CONFIG_COMPAT #include @@ -275,8 +273,6 @@ compat_arpt_get_target(struct compat_arpt_entry *e) return (void *)e + e->target_offset; } -#define COMPAT_ARPT_ALIGN(s) COMPAT_XT_ALIGN(s) - #endif /* CONFIG_COMPAT */ #endif /*__KERNEL__*/ #endif /* _ARPTABLES_H */ diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h index d0fef0a436f..64a5d95c58e 100644 --- a/include/linux/netfilter_ipv4/ip_tables.h +++ b/include/linux/netfilter_ipv4/ip_tables.h @@ -282,8 +282,6 @@ extern unsigned int ipt_do_table(struct sk_buff *skb, const struct net_device *out, struct xt_table *table); -#define IPT_ALIGN(s) XT_ALIGN(s) - #ifdef CONFIG_COMPAT #include @@ -304,8 +302,6 @@ compat_ipt_get_target(struct compat_ipt_entry *e) return (void *)e + e->target_offset; } -#define COMPAT_IPT_ALIGN(s) COMPAT_XT_ALIGN(s) - #endif /* CONFIG_COMPAT */ #endif /*__KERNEL__*/ #endif /* _IPTABLES_H */ diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h index dca11186e52..c9784f7a9c1 100644 --- a/include/linux/netfilter_ipv6/ip6_tables.h +++ b/include/linux/netfilter_ipv6/ip6_tables.h @@ -292,8 +292,6 @@ extern int ip6t_ext_hdr(u8 nexthdr); extern int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, int target, unsigned short *fragoff); -#define IP6T_ALIGN(s) XT_ALIGN(s) - #ifdef CONFIG_COMPAT #include @@ -313,8 +311,6 @@ compat_ip6t_get_target(struct compat_ip6t_entry *e) return (void *)e + e->target_offset; } -#define COMPAT_IP6T_ALIGN(s) COMPAT_XT_ALIGN(s) - #endif /* CONFIG_COMPAT */ #endif /*__KERNEL__*/ #endif /* _IP6_TABLES_H */ -- cgit v1.2.3-18-g5258 From 892b6f90db81cccb723d5d92f4fddc2d68b206e1 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Wed, 13 Oct 2010 21:18:03 +0200 Subject: block: Ensure physical block size is unsigned int Physical block size was declared unsigned int to accomodate the maximum size reported by READ CAPACITY(16). Make sure we use the right type in the related functions. Signed-off-by: Martin K. Petersen Acked-by: Mike Snitzer Cc: stable@kernel.org Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 1341df5806d..8f3dd981b97 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -860,7 +860,7 @@ extern void blk_queue_max_segment_size(struct request_queue *, unsigned int); extern void blk_queue_max_discard_sectors(struct request_queue *q, unsigned int max_discard_sectors); extern void blk_queue_logical_block_size(struct request_queue *, unsigned short); -extern void blk_queue_physical_block_size(struct request_queue *, unsigned short); +extern void blk_queue_physical_block_size(struct request_queue *, unsigned int); extern void blk_queue_alignment_offset(struct request_queue *q, unsigned int alignment); extern void blk_limits_io_min(struct queue_limits *limits, unsigned int min); @@ -1013,7 +1013,7 @@ static inline unsigned int queue_physical_block_size(struct request_queue *q) return q->limits.physical_block_size; } -static inline int bdev_physical_block_size(struct block_device *bdev) +static inline unsigned int bdev_physical_block_size(struct block_device *bdev) { return queue_physical_block_size(bdev_get_queue(bdev)); } -- cgit v1.2.3-18-g5258 From 10d8dad8453f8648a448960d7a2d3d983dfe0ed3 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Tue, 12 Oct 2010 07:07:42 +0200 Subject: wext: fix alignment problem in serializing 'struct iw_point' wext: fix alignment problem in serializing 'struct iw_point' This fixes a typo in the definition of the serialized length of struct iw_point: a) wireless.h is exported to userspace, the typo causes IW_EV_POINT_PK_LEN to be 12 on 64-bit, and 8 on 32-bit systems (causing misalignment); b) in compat-64 mode iwe_stream_add_point() memcpys overlap (see below). The second case in in compat-64 mode looks like (variable names are as in include/net/iw_handler.h:iwe_stream_add_point()): point_len = IW_EV_COMPAT_POINT_LEN = 8 lcp_len = IW_EV_COMPAT_LCP_LEN = 4 2nd memcpy: IW_EV_POINT_PK_LEN - IW_EV_LCP_PK_LEN = 12 - 4 = 8 IW_EV_LCP_PK_LEN <--------------> *---> 'extra' data area +-------+-------+-------+-------+---------------+------- ...-+ | len | cmd |length | flags | (empty) -> extra ... | +-------+-------+-------+-------+---------------+------- ...-+ 2 2 2 2 4 lcp_len <--------------> <-!! OVERLAP !!> <--1st memcpy--><------- 2nd memcpy -----------> <---- 3rd memcpy ------- ... > <--------- point_len ----------> This case could cause overrun whenever iw_point.length < 4. The other two cases are - * 32-bit systems: IW_EV_POINT_PK_LEN - IW_EV_LCP_PK_LEN = 8 - 4 = 4, the second memcpy copies exactly the 4 required bytes; * 64-bit systems: IW_EV_POINT_PK_LEN - IW_EV_LCP_PK_LEN = 12 - 4 = 8, the second memcpy copies a superfluous (but non overlapping) 4 bytes. The patch changes IW_EV_POINT_PK_LEN to be 8, so that in all 3 cases always only the requested iw_point.{length,flags} (both __u16) are copied, avoiding overrrun (compat-64) and superfluous copy (64-bit). In addition, the userspace header is sanitized (in agreement with version 30 of the wireless tools). Many thanks to Johannes Berg for help and review with this patch. Signed-off-by: Gerrit Renker Signed-off-by: John W. Linville --- include/linux/wireless.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/wireless.h b/include/linux/wireless.h index e6827eedf18..4395b28bb86 100644 --- a/include/linux/wireless.h +++ b/include/linux/wireless.h @@ -1157,6 +1157,6 @@ struct __compat_iw_event { #define IW_EV_PARAM_PK_LEN (IW_EV_LCP_PK_LEN + sizeof(struct iw_param)) #define IW_EV_ADDR_PK_LEN (IW_EV_LCP_PK_LEN + sizeof(struct sockaddr)) #define IW_EV_QUAL_PK_LEN (IW_EV_LCP_PK_LEN + sizeof(struct iw_quality)) -#define IW_EV_POINT_PK_LEN (IW_EV_LCP_LEN + 4) +#define IW_EV_POINT_PK_LEN (IW_EV_LCP_PK_LEN + 4) #endif /* _LINUX_WIRELESS_H */ -- cgit v1.2.3-18-g5258 From 271733cf844a2f5f186ef3b40c26d6397b71039a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 13 Oct 2010 12:06:23 +0200 Subject: cfg80211: notify drivers about frame registrations Drivers may need to adjust their filters according to frame registrations, so notify them about them. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/cfg80211.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 24d5b586927..2a7936d7851 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1147,6 +1147,9 @@ struct cfg80211_pmksa { * allows the driver to adjust the dynamic ps timeout value. * @set_cqm_rssi_config: Configure connection quality monitor RSSI threshold. * + * @mgmt_frame_register: Notify driver that a management frame type was + * registered. Note that this callback may not sleep, and cannot run + * concurrently with itself. */ struct cfg80211_ops { int (*suspend)(struct wiphy *wiphy); @@ -1297,6 +1300,10 @@ struct cfg80211_ops { int (*set_cqm_rssi_config)(struct wiphy *wiphy, struct net_device *dev, s32 rssi_thold, u32 rssi_hyst); + + void (*mgmt_frame_register)(struct wiphy *wiphy, + struct net_device *dev, + u16 frame_type, bool reg); }; /* -- cgit v1.2.3-18-g5258 From 7be5086d4cb7cceb71d724a9524d5e927785d04f Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 13 Oct 2010 12:06:24 +0200 Subject: mac80211: add probe request filter flag Using the frame registration notification, we can see when probe requests are requested and notify the low-level driver via filtering. The flag is also set in AP and IBSS modes. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 33aa2e39147..9fdf982d128 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1478,12 +1478,14 @@ ieee80211_get_alt_retry_rate(const struct ieee80211_hw *hw, * honour this flag if possible. * * @FIF_CONTROL: pass control frames (except for PS Poll), if PROMISC_IN_BSS - * is not set then only those addressed to this station. + * is not set then only those addressed to this station. * * @FIF_OTHER_BSS: pass frames destined to other BSSes * - * @FIF_PSPOLL: pass PS Poll frames, if PROMISC_IN_BSS is not set then only - * those addressed to this station. + * @FIF_PSPOLL: pass PS Poll frames, if PROMISC_IN_BSS is not set then only + * those addressed to this station. + * + * @FIF_PROBE_REQ: pass probe request frames */ enum ieee80211_filter_flags { FIF_PROMISC_IN_BSS = 1<<0, @@ -1494,6 +1496,7 @@ enum ieee80211_filter_flags { FIF_CONTROL = 1<<5, FIF_OTHER_BSS = 1<<6, FIF_PSPOLL = 1<<7, + FIF_PROBE_REQ = 1<<8, }; /** -- cgit v1.2.3-18-g5258 From b3d6255388de0680a14f0907deb7b7f4fa0d25d5 Mon Sep 17 00:00:00 2001 From: Kumar Sanghvi Date: Tue, 12 Oct 2010 20:14:43 +0000 Subject: Phonet: 'connect' socket implementation for Pipe controller MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Based on suggestion by Rémi Denis-Courmont to implement 'connect' for Pipe controller logic, this patch implements 'connect' socket call for the Pipe controller logic. The patch does following:- - Removes setsockopts for PNPIPE_CREATE and PNPIPE_DESTROY - Adds setsockopt for setting the Pipe handle value - Implements connect socket call - Updates the Pipe controller logic User-space should now follow below sequence with Pipe controller:- -socket -bind -setsockopt for PNPIPE_PIPE_HANDLE -connect -setsockopt for PNPIPE_ENCAP_IP -setsockopt for PNPIPE_ENABLE GPRS/3G data has been tested working fine with this. Signed-off-by: Kumar Sanghvi Acked-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- include/linux/phonet.h | 3 +-- include/net/phonet/pep.h | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/phonet.h b/include/linux/phonet.h index e27cbf93174..26c8df78691 100644 --- a/include/linux/phonet.h +++ b/include/linux/phonet.h @@ -36,10 +36,9 @@ /* Socket options for SOL_PNPIPE level */ #define PNPIPE_ENCAP 1 #define PNPIPE_IFINDEX 2 -#define PNPIPE_CREATE 3 +#define PNPIPE_PIPE_HANDLE 3 #define PNPIPE_ENABLE 4 /* unused slot */ -#define PNPIPE_DESTROY 6 #define PNADDR_ANY 0 #define PNADDR_BROADCAST 0xFC diff --git a/include/net/phonet/pep.h b/include/net/phonet/pep.h index def6cfa3f45..b60b28c99e8 100644 --- a/include/net/phonet/pep.h +++ b/include/net/phonet/pep.h @@ -46,8 +46,8 @@ struct pep_sock { u8 init_enable; /* auto-enable at creation */ u8 aligned; #ifdef CONFIG_PHONET_PIPECTRLR - u16 remote_pep; - u8 pipe_state; + u8 pipe_state; + struct sockaddr_pn remote_pep; #endif }; -- cgit v1.2.3-18-g5258 From 4f0e332239e2b5f79757cb8f8f3db16c66f5d220 Mon Sep 17 00:00:00 2001 From: Kumar Gala Date: Fri, 24 Sep 2010 13:34:42 -0500 Subject: powerpc/fsl-booke: Add PCI device ids for P2040/P3041/P5010/P5020 QoirQ chips Signed-off-by: Kumar Gala --- include/linux/pci_ids.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 570fddeb038..f69dfe5e01e 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2315,6 +2315,14 @@ #define PCI_DEVICE_ID_P4080 0x0401 #define PCI_DEVICE_ID_P4040E 0x0408 #define PCI_DEVICE_ID_P4040 0x0409 +#define PCI_DEVICE_ID_P2040E 0x0410 +#define PCI_DEVICE_ID_P2040 0x0411 +#define PCI_DEVICE_ID_P3041E 0x041E +#define PCI_DEVICE_ID_P3041 0x041F +#define PCI_DEVICE_ID_P5020E 0x0420 +#define PCI_DEVICE_ID_P5020 0x0421 +#define PCI_DEVICE_ID_P5010E 0x0428 +#define PCI_DEVICE_ID_P5010 0x0429 #define PCI_DEVICE_ID_MPC8641 0x7010 #define PCI_DEVICE_ID_MPC8641D 0x7011 #define PCI_DEVICE_ID_MPC8610 0x7018 -- cgit v1.2.3-18-g5258 From 087a4eb55971dfcc8df18312faf9393d0a479f3a Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 14 Oct 2010 12:10:30 +0900 Subject: stopmachine: Define __stop_machine when CONFIG_STOP_MACHINE=n Define dummy __stop_machine() function even when CONFIG_STOP_MACHINE=n. This getcpu-required version of stop_machine() will be used from poke_text_smp(). Signed-off-by: Masami Hiramatsu Acked-by: Tejun Heo Cc: Rusty Russell Cc: Ananth N Mavinakayanahalli Cc: 2nddept-manager@sdl.hitachi.co.jp Cc: Peter Zijlstra LKML-Reference: <20101014031030.4100.34156.stgit@ltc236.sdl.hitachi.co.jp> Signed-off-by: Ingo Molnar --- include/linux/stop_machine.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h index 6b524a0d02e..1808960c505 100644 --- a/include/linux/stop_machine.h +++ b/include/linux/stop_machine.h @@ -126,8 +126,8 @@ int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus); #else /* CONFIG_STOP_MACHINE && CONFIG_SMP */ -static inline int stop_machine(int (*fn)(void *), void *data, - const struct cpumask *cpus) +static inline int __stop_machine(int (*fn)(void *), void *data, + const struct cpumask *cpus) { int ret; local_irq_disable(); @@ -136,5 +136,11 @@ static inline int stop_machine(int (*fn)(void *), void *data, return ret; } +static inline int stop_machine(int (*fn)(void *), void *data, + const struct cpumask *cpus) +{ + return __stop_machine(fn, data, cpus); +} + #endif /* CONFIG_STOP_MACHINE && CONFIG_SMP */ #endif /* _LINUX_STOP_MACHINE */ -- cgit v1.2.3-18-g5258 From 265be2d09853d425ad14a61cda0ca63345613d0c Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 31 May 2010 10:14:17 +0200 Subject: drbd: Finished the "on-no-data-accessible suspend-io;" functionality When no data is accessible (no connection to the peer, nor a local disk) allow the user to select to freeze all IO operations instead of getting IO errors. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- include/linux/drbd.h | 5 +++++ include/linux/drbd_limits.h | 1 + include/linux/drbd_nl.h | 1 + 3 files changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 479ee3a1d90..7be069fcca5 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -91,6 +91,11 @@ enum drbd_after_sb_p { ASB_VIOLENTLY }; +enum drbd_on_no_data { + OND_IO_ERROR, + OND_SUSPEND_IO +}; + /* KEEP the order, do not delete or insert. Only append. */ enum drbd_ret_codes { ERR_CODE_BASE = 100, diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index 440b42e38e8..7eb1e98009e 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -128,6 +128,7 @@ #define DRBD_AFTER_SB_1P_DEF ASB_DISCONNECT #define DRBD_AFTER_SB_2P_DEF ASB_DISCONNECT #define DRBD_RR_CONFLICT_DEF ASB_DISCONNECT +#define DRBD_ON_NO_DATA_DEF OND_IO_ERROR #define DRBD_MAX_BIO_BVECS_MIN 0 #define DRBD_MAX_BIO_BVECS_MAX 128 diff --git a/include/linux/drbd_nl.h b/include/linux/drbd_nl.h index 5f042810a56..9aebd0d80a5 100644 --- a/include/linux/drbd_nl.h +++ b/include/linux/drbd_nl.h @@ -87,6 +87,7 @@ NL_PACKET(syncer_conf, 8, NL_STRING( 51, T_MAY_IGNORE, cpu_mask, 32) NL_STRING( 64, T_MAY_IGNORE, csums_alg, SHARED_SECRET_MAX) NL_BIT( 65, T_MAY_IGNORE, use_rle) + NL_INTEGER( 75, T_MAY_IGNORE, on_no_data) ) NL_PACKET(invalidate, 9, ) -- cgit v1.2.3-18-g5258 From 47ff2d0a8e7ce87fed180729e8341f650bf585c8 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Fri, 18 Jun 2010 13:56:57 +0200 Subject: drbd: Do not allow a fencing-policy of resource-and-stonith with protocol A Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- include/linux/drbd.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 7be069fcca5..0b2bfb58d9c 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -145,6 +145,7 @@ enum drbd_ret_codes { ERR_CONNECTED = 151, /* DRBD 8.3 only */ ERR_PERM = 152, ERR_NEED_APV_93 = 153, + ERR_STONITH_AND_PROT_A = 154, /* insert new ones above this line */ AFTER_LAST_ERR_CODE -- cgit v1.2.3-18-g5258 From 9a31d7164d409ca59cfadb7957ac7b0acf4545b8 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 5 Jul 2010 13:42:03 +0200 Subject: drbd: New sync parameters for the smart resync rate controller Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- include/linux/drbd_limits.h | 24 ++++++++++++------------ include/linux/drbd_nl.h | 4 ++++ 2 files changed, 16 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index 7eb1e98009e..06dbba47a8e 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -134,21 +134,21 @@ #define DRBD_MAX_BIO_BVECS_MAX 128 #define DRBD_MAX_BIO_BVECS_DEF 0 -#define DRBD_DP_VOLUME_MIN 4 -#define DRBD_DP_VOLUME_MAX 1048576 -#define DRBD_DP_VOLUME_DEF 16384 +#define DRBD_C_PLAN_AHEAD_MIN 0 +#define DRBD_C_PLAN_AHEAD_MAX 300 +#define DRBD_C_PLAN_AHEAD_DEF 0 /* RS rate controller disabled by default */ -#define DRBD_DP_INTERVAL_MIN 1 -#define DRBD_DP_INTERVAL_MAX 600 -#define DRBD_DP_INTERVAL_DEF 5 +#define DRBD_C_DELAY_TARGET_MIN 1 +#define DRBD_C_DELAY_TARGET_MAX 100 +#define DRBD_C_DELAY_TARGET_DEF 10 -#define DRBD_RS_THROTTLE_TH_MIN 1 -#define DRBD_RS_THROTTLE_TH_MAX 600 -#define DRBD_RS_THROTTLE_TH_DEF 20 +#define DRBD_C_FILL_TARGET_MIN 0 +#define DRBD_C_FILL_TARGET_MAX 100000 +#define DRBD_C_FILL_TARGET_DEF 0 /* By default disabled -> controlled by delay_target */ -#define DRBD_RS_HOLD_OFF_TH_MIN 1 -#define DRBD_RS_HOLD_OFF_TH_MAX 6000 -#define DRBD_RS_HOLD_OFF_TH_DEF 100 +#define DRBD_C_MAX_RATE_MIN 250 /* kByte/sec */ +#define DRBD_C_MAX_RATE_MAX (4 << 20) +#define DRBD_C_MAX_RATE_DEF 102400 #undef RANGE #endif diff --git a/include/linux/drbd_nl.h b/include/linux/drbd_nl.h index 9aebd0d80a5..e23683c87ca 100644 --- a/include/linux/drbd_nl.h +++ b/include/linux/drbd_nl.h @@ -88,6 +88,10 @@ NL_PACKET(syncer_conf, 8, NL_STRING( 64, T_MAY_IGNORE, csums_alg, SHARED_SECRET_MAX) NL_BIT( 65, T_MAY_IGNORE, use_rle) NL_INTEGER( 75, T_MAY_IGNORE, on_no_data) + NL_INTEGER( 76, T_MAY_IGNORE, c_plan_ahead) + NL_INTEGER( 77, T_MAY_IGNORE, c_delay_target) + NL_INTEGER( 78, T_MAY_IGNORE, c_fill_target) + NL_INTEGER( 79, T_MAY_IGNORE, c_max_rate) ) NL_PACKET(invalidate, 9, ) -- cgit v1.2.3-18-g5258 From 0f0601f4ea2f53cfd8bcae060fb03d9bbde070ec Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Wed, 11 Aug 2010 23:40:24 +0200 Subject: drbd: new configuration parameter c-min-rate We now track the data rate of locally submitted resync related requests, and can thus detect non-resync activity on the lower level device. If the current sync rate is above c-min-rate, and the lower level device appears to be busy, we throttle the resyncer. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- include/linux/drbd_limits.h | 4 ++++ include/linux/drbd_nl.h | 1 + 2 files changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index 06dbba47a8e..0b24ded6fff 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -150,5 +150,9 @@ #define DRBD_C_MAX_RATE_MAX (4 << 20) #define DRBD_C_MAX_RATE_DEF 102400 +#define DRBD_C_MIN_RATE_MIN 0 /* kByte/sec */ +#define DRBD_C_MIN_RATE_MAX (4 << 20) +#define DRBD_C_MIN_RATE_DEF 4096 + #undef RANGE #endif diff --git a/include/linux/drbd_nl.h b/include/linux/drbd_nl.h index e23683c87ca..ade91107c9a 100644 --- a/include/linux/drbd_nl.h +++ b/include/linux/drbd_nl.h @@ -92,6 +92,7 @@ NL_PACKET(syncer_conf, 8, NL_INTEGER( 77, T_MAY_IGNORE, c_delay_target) NL_INTEGER( 78, T_MAY_IGNORE, c_fill_target) NL_INTEGER( 79, T_MAY_IGNORE, c_max_rate) + NL_INTEGER( 80, T_MAY_IGNORE, c_min_rate) ) NL_PACKET(invalidate, 9, ) -- cgit v1.2.3-18-g5258 From 0b70a13dac014ec9274640b9e945bde493ba365e Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Fri, 20 Aug 2010 13:36:10 +0200 Subject: drbd: Sending of big packets, for payloads from 64KByte to 4GByte Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- include/linux/drbd.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 0b2bfb58d9c..89718a39791 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -318,6 +318,8 @@ enum drbd_timeout_flag { #define DRBD_MAGIC 0x83740267 #define BE_DRBD_MAGIC __constant_cpu_to_be32(DRBD_MAGIC) +#define DRBD_MAGIC_BIG 0x835a +#define BE_DRBD_MAGIC_BIG __constant_cpu_to_be16(DRBD_MAGIC_BIG) /* these are of type "int" */ #define DRBD_MD_INDEX_INTERNAL -1 -- cgit v1.2.3-18-g5258 From fb22c402ffdf61dd121795b5809de587185d5240 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 8 Sep 2010 23:20:21 +0200 Subject: drbd: Track the reasons to suspend IO in dedicated state bits There are three ways to get IO suspended: * Loss of any access to data * Fence-peer-handler running * User requested to suspend IO Track those in different bits, so that one condition clearing its state bit does not interfere with the other two conditions. Only when the user resumes IO he overrules all three bits. The fact is hidden from the user, he sees only a single suspend bit. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- include/linux/drbd.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 89718a39791..5e72a5d3d48 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -232,13 +232,17 @@ union drbd_state { unsigned conn:5 ; /* 17/32 cstates */ unsigned disk:4 ; /* 8/16 from D_DISKLESS to D_UP_TO_DATE */ unsigned pdsk:4 ; /* 8/16 from D_DISKLESS to D_UP_TO_DATE */ - unsigned susp:1 ; /* 2/2 IO suspended no/yes */ + unsigned susp:1 ; /* 2/2 IO suspended no/yes (by user) */ unsigned aftr_isp:1 ; /* isp .. imposed sync pause */ unsigned peer_isp:1 ; unsigned user_isp:1 ; - unsigned _pad:11; /* 0 unused */ + unsigned susp_nod:1 ; /* IO suspended because no data */ + unsigned susp_fen:1 ; /* IO suspended because fence peer handler runs*/ + unsigned _pad:9; /* 0 unused */ #elif defined(__BIG_ENDIAN_BITFIELD) - unsigned _pad:11; /* 0 unused */ + unsigned _pad:9; + unsigned susp_fen:1 ; + unsigned susp_nod:1 ; unsigned user_isp:1 ; unsigned peer_isp:1 ; unsigned aftr_isp:1 ; /* isp .. imposed sync pause */ -- cgit v1.2.3-18-g5258 From 00b425377d60e67e86721d4ce6d7cbf131a5d0fd Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 5 Oct 2010 11:19:39 +0200 Subject: drbd: Allow larger values for c-fill-target. Connections through a compressing proxy might have more bits on the fly. 500MByte instead of 50MByte Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- include/linux/drbd_limits.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index 0b24ded6fff..4ac33f34b77 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -143,7 +143,7 @@ #define DRBD_C_DELAY_TARGET_DEF 10 #define DRBD_C_FILL_TARGET_MIN 0 -#define DRBD_C_FILL_TARGET_MAX 100000 +#define DRBD_C_FILL_TARGET_MAX (1<<20) /* 500MByte in sec */ #define DRBD_C_FILL_TARGET_DEF 0 /* By default disabled -> controlled by delay_target */ #define DRBD_C_MAX_RATE_MIN 250 /* kByte/sec */ -- cgit v1.2.3-18-g5258 From 22cc37a943832c948808884604ec6f5ff2594c1d Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Tue, 14 Sep 2010 20:40:41 +0200 Subject: drbd: fix unlikely access after free and list corruption Various cleanup paths have been incomplete, for the very unlikely case that we cannot allocate enough bios from process context when submitting on behalf of the peer or resync process. Never observed. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- include/linux/drbd.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 5e72a5d3d48..da7d9bd4f3f 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -53,10 +53,10 @@ extern const char *drbd_buildtag(void); -#define REL_VERSION "8.3.8.1" +#define REL_VERSION "8.3.9rc1" #define API_VERSION 88 #define PRO_VERSION_MIN 86 -#define PRO_VERSION_MAX 94 +#define PRO_VERSION_MAX 95 enum drbd_io_error_p { -- cgit v1.2.3-18-g5258 From 0eead9ab41da33644ae2c97c57ad03da636a0422 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 14 Oct 2010 10:57:40 -0700 Subject: Don't dump task struct in a.out core-dumps akiphie points out that a.out core-dumps have that odd task struct dumping that was never used and was never really a good idea (it goes back into the mists of history, probably the original core-dumping code). Just remove it. Also do the access_ok() check on dump_write(). It probably doesn't matter (since normal filesystems all seem to do it anyway), but he points out that it's normally done by the VFS layer, so ... [ I suspect that we should possibly do "vfs_write()" instead of calling ->write directly. That also does the whole fsnotify and write statistics thing, which may or may not be a good idea. ] And just to be anal, do this all for the x86-64 32-bit a.out emulation code too, even though it's not enabled (and won't currently even compile) Reported-by: akiphie Signed-off-by: Linus Torvalds --- include/linux/coredump.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/coredump.h b/include/linux/coredump.h index 8ba66a9d902..59579cfee6a 100644 --- a/include/linux/coredump.h +++ b/include/linux/coredump.h @@ -11,7 +11,7 @@ */ static inline int dump_write(struct file *file, const void *addr, int nr) { - return file->f_op->write(file, addr, nr, &file->f_pos) == nr; + return access_ok(VERIFY_READ, addr, nr) && file->f_op->write(file, addr, nr, &file->f_pos) == nr; } static inline int dump_seek(struct file *file, loff_t off) -- cgit v1.2.3-18-g5258 From 3aa0ce825ade0cf5506e32ccf51d01fc8d22a9cf Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 14 Oct 2010 14:32:06 -0700 Subject: Un-inline the core-dump helper functions Tony Luck reports that the addition of the access_ok() check in commit 0eead9ab41da ("Don't dump task struct in a.out core-dumps") broke the ia64 compile due to missing the necessary header file includes. Rather than add yet another include () to make everything happy, just uninline the silly core dump helper functions and move the bodies to fs/exec.c where they make a lot more sense. dump_seek() in particular was too big to be an inline function anyway, and none of them are in any way performance-critical. And we really don't need to mess up our include file headers more than they already are. Reported-and-tested-by: Tony Luck Signed-off-by: Linus Torvalds --- include/linux/coredump.h | 34 ++-------------------------------- 1 file changed, 2 insertions(+), 32 deletions(-) (limited to 'include') diff --git a/include/linux/coredump.h b/include/linux/coredump.h index 59579cfee6a..ba4b85a6d9b 100644 --- a/include/linux/coredump.h +++ b/include/linux/coredump.h @@ -9,37 +9,7 @@ * These are the only things you should do on a core-file: use only these * functions to write out all the necessary info. */ -static inline int dump_write(struct file *file, const void *addr, int nr) -{ - return access_ok(VERIFY_READ, addr, nr) && file->f_op->write(file, addr, nr, &file->f_pos) == nr; -} - -static inline int dump_seek(struct file *file, loff_t off) -{ - int ret = 1; - - if (file->f_op->llseek && file->f_op->llseek != no_llseek) { - if (file->f_op->llseek(file, off, SEEK_CUR) < 0) - return 0; - } else { - char *buf = (char *)get_zeroed_page(GFP_KERNEL); - - if (!buf) - return 0; - while (off > 0) { - unsigned long n = off; - - if (n > PAGE_SIZE) - n = PAGE_SIZE; - if (!dump_write(file, buf, n)) { - ret = 0; - break; - } - off -= n; - } - free_page((unsigned long)buf); - } - return ret; -} +extern int dump_write(struct file *file, const void *addr, int nr); +extern int dump_seek(struct file *file, loff_t off); #endif /* _LINUX_COREDUMP_H */ -- cgit v1.2.3-18-g5258 From 28085bc5de19cad365bcff98e9c8785c397c7c36 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Fri, 15 Oct 2010 16:46:37 +0900 Subject: sh: clkfwk: support clock remapping. This implements support for ioremapping of register windows that encapsulate clock control registers used by a struct clk, with transparent sibling inheritance. Root clocks at the top of a given topology often encapsulate the entire register space of all of their sibling clocks, so this mapping can be done once and handed down. A given clock enable/disable case maps out to a single bit in a shared register, so this prevents creating multiple overlapping mappings. The mapping case breaks down in to a couple of different situations: - Sibling clocks without a specific mapping. - Root clocks without a specific mapping. - Any of sibling/root clocks with a specific mapping. Sibling clocks with no specified mapping will grovel up the clock chain and install the root clock mapping unconditionally at registration time. Root clocks without their own mappings have a dummy BSS-initialized mapping inserted that is handed down the chain just like any other mapping. This permits all of the sibling clock ops to read/write using the mapping offsets without any special configuration, enabling them to not care whether access ultimately goes through translatable or untranslatable memory. Any clock with its own mapping will have the window initialized at registration time and be ready for use by its clock ops. Failure to establish the mapping will prevent registration, so no additional sanity checks are needed. Sibling clocks that double as parents for the moment will not propagate their mapping down, but this is easily tunable if the need arises. All clock mappings are kref refcounted, with each instance of mapping inheritance incrementing the refcount. Tested-by: Kuninori Morimoto Signed-off-by: Paul Mundt --- include/linux/sh_clk.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/sh_clk.h b/include/linux/sh_clk.h index ecdfea54a49..8ae37707a4a 100644 --- a/include/linux/sh_clk.h +++ b/include/linux/sh_clk.h @@ -4,11 +4,20 @@ #include #include #include +#include +#include #include #include struct clk; +struct clk_mapping { + phys_addr_t phys; + void __iomem *base; + unsigned long len; + struct kref ref; +}; + struct clk_ops { void (*init)(struct clk *clk); int (*enable)(struct clk *clk); @@ -42,6 +51,7 @@ struct clk { unsigned long arch_flags; void *priv; struct dentry *dentry; + struct clk_mapping *mapping; struct cpufreq_frequency_table *freq_table; }; -- cgit v1.2.3-18-g5258 From be70e2671b95a8982ff133ebaafff6399ad393d4 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Thu, 14 Oct 2010 11:58:20 +0200 Subject: dynamic_debug.h: Fix dynamic_dev_dbg() macro if CONFIG_DYNAMIC_DEBUG not set Signed-off-by: Philipp Reisner --- include/linux/dynamic_debug.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h index 52c0da4bdd1..81bc20e36ec 100644 --- a/include/linux/dynamic_debug.h +++ b/include/linux/dynamic_debug.h @@ -80,7 +80,7 @@ static inline int ddebug_remove_module(const char *mod) #define dynamic_pr_debug(fmt, ...) \ do { if (0) printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); } while (0) -#define dynamic_dev_dbg(dev, format, ...) \ +#define dynamic_dev_dbg(dev, fmt, ...) \ do { if (0) dev_printk(KERN_DEBUG, dev, fmt, ##__VA_ARGS__); } while (0) #endif -- cgit v1.2.3-18-g5258 From f586903d27e2503a3e7d427b3d665bbaf1b7f4d4 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Fri, 15 Oct 2010 18:17:35 +0900 Subject: sh: clkfwk: Abstract rate rounding helper. Presently the only assisted rate rounding is frequency table backed, but there are cases where it's impractical to use a frequency table for certain clocks (such as the FSIDIV case, which supports 65535 divisors), and we wish to reuse the same rate rounding algorithm. This breaks out the core of the rate rounding logic in to its own helper routine and shuffles the frequency table logic around, switching to using an iterator for the generic helper routine. Signed-off-by: Paul Mundt --- include/linux/sh_clk.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/sh_clk.h b/include/linux/sh_clk.h index 8ae37707a4a..49f6e9b6eda 100644 --- a/include/linux/sh_clk.h +++ b/include/linux/sh_clk.h @@ -53,6 +53,7 @@ struct clk { struct dentry *dentry; struct clk_mapping *mapping; struct cpufreq_frequency_table *freq_table; + unsigned int nr_freqs; }; #define CLK_ENABLE_ON_INIT (1 << 0) -- cgit v1.2.3-18-g5258 From 8e122db61c98debbc35e26dd29504958cbcf2cbb Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Fri, 15 Oct 2010 18:33:24 +0900 Subject: sh: clkfwk: Add a helper for rate rounding by divisor ranges. This adds a new clk_rate_div_range_round() for implementing rate rounding by divisor ranges. This can be used trivially by clocks that support arbitrary ranged divisors without the need for rate table construction. This should only be used by clocks that both have large divisor ranges in addition to clocks that will never be arbitrarily scaled, as the lack of a backing frequency table will prevent cpufreq from being able to do much of anything with them. Primarily intended for use as a ->recalc helper. Signed-off-by: Paul Mundt --- include/linux/sh_clk.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/sh_clk.h b/include/linux/sh_clk.h index 49f6e9b6eda..4dca992f309 100644 --- a/include/linux/sh_clk.h +++ b/include/linux/sh_clk.h @@ -119,6 +119,9 @@ int clk_rate_table_find(struct clk *clk, struct cpufreq_frequency_table *freq_table, unsigned long rate); +long clk_rate_div_range_round(struct clk *clk, unsigned int div_min, + unsigned int div_max, unsigned long rate); + #define SH_CLK_MSTP32(_parent, _enable_reg, _enable_bit, _flags) \ { \ .parent = _parent, \ -- cgit v1.2.3-18-g5258 From b3b3a9b63f2deacfd59137e3781211d21a568ca9 Mon Sep 17 00:00:00 2001 From: Anand Gadiyar Date: Thu, 14 Oct 2010 11:31:42 -0400 Subject: oprofile: fix linker errors Commit e9677b3ce (oprofile, ARM: Use oprofile_arch_exit() to cleanup on failure) caused oprofile_perf_exit to be called in the cleanup path of oprofile_perf_init. The __exit tag for oprofile_perf_exit should therefore be dropped. The same has to be done for exit_driverfs as well, as this function is called from oprofile_perf_exit. Else, we get the following two linker errors. LD .tmp_vmlinux1 `oprofile_perf_exit' referenced in section `.init.text' of arch/arm/oprofile/built-in.o: defined in discarded section `.exit.text' of arch/arm/oprofile/built-in.o make: *** [.tmp_vmlinux1] Error 1 LD .tmp_vmlinux1 `exit_driverfs' referenced in section `.text' of arch/arm/oprofile/built-in.o: defined in discarded section `.exit.text' of arch/arm/oprofile/built-in.o make: *** [.tmp_vmlinux1] Error 1 Signed-off-by: Anand Gadiyar Cc: Will Deacon Signed-off-by: Robert Richter --- include/linux/oprofile.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/oprofile.h b/include/linux/oprofile.h index d67a8330b41..32fb81212fd 100644 --- a/include/linux/oprofile.h +++ b/include/linux/oprofile.h @@ -188,7 +188,7 @@ int oprofile_write_commit(struct op_entry *entry); #ifdef CONFIG_PERF_EVENTS int __init oprofile_perf_init(struct oprofile_operations *ops); -void __exit oprofile_perf_exit(void); +void oprofile_perf_exit(void); char *op_name_from_perf_id(void); #endif /* CONFIG_PERF_EVENTS */ -- cgit v1.2.3-18-g5258 From 5dbfe7aedf54aa7f62fd659e34371d4ea0e7bffe Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Fri, 15 Oct 2010 09:52:46 +0200 Subject: drbd: add race-breaker to drbd_go_diskless This adds a necessary race breaker to these commits: drbd: fix for possible deadlock on IO error during resync drbd: drop wrong debug asserts, fix recently introduced race What we do is get a refcount, check the state, then depending on the state and the requested minimum disk state, either hold it (success), or give it back immediately (failed "try lock"). Some code paths (flushing of drbd metadata) may still grab and hold a refcount even if we are D_FAILED (application IO won't). So even if we hit local_cnt == 0 once after being D_FAILED, we still need to wait for that again after we changed to D_DISKLESS. Once local_cnt reaches 0 while we are D_DISKLESS, we can be sure that no one will look at the protected members anymore, so only then is it safe to free them. We cannot easily convert to standard locking primitives here, as we want to be able to use it in atomic context (we always do a "try lock"), as well as hold references for a "long time" (from IO submission to completion callback). Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- include/linux/drbd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/drbd.h b/include/linux/drbd.h index da7d9bd4f3f..9b2a0158f39 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -53,7 +53,7 @@ extern const char *drbd_buildtag(void); -#define REL_VERSION "8.3.9rc1" +#define REL_VERSION "8.3.9rc2" #define API_VERSION 88 #define PRO_VERSION_MIN 86 #define PRO_VERSION_MAX 95 -- cgit v1.2.3-18-g5258 From 495d2b3883682fcd1c3dee3a45e38fd00154ae25 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Fri, 15 Oct 2010 15:49:20 +0200 Subject: block: Make the integrity mapped property a bio flag Previously we tracked whether the integrity metadata had been remapped using a request flag. This was fine for low-level retries. However, if an I/O was redriven by upper layers we would end up remapping again, causing the retry to fail. Deprecate the REQ_INTEGRITY flag and introduce BIO_MAPPED_INTEGRITY which enables filesystems to notify lower layers that the bio in question has already been remapped. Signed-off-by: Martin K. Petersen Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 10a0c291b55..d36629620a4 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -97,6 +97,7 @@ struct bio { #define BIO_NULL_MAPPED 9 /* contains invalid user pages */ #define BIO_FS_INTEGRITY 10 /* fs owns integrity data, not block layer */ #define BIO_QUIET 11 /* Make BIO Quiet */ +#define BIO_MAPPED_INTEGRITY 12/* integrity metadata has been remapped */ #define bio_flagged(bio, flag) ((bio)->bi_flags & (1 << (flag))) /* @@ -148,7 +149,6 @@ enum rq_flag_bits { __REQ_ORDERED_COLOR, /* is before or after barrier */ __REQ_ALLOCED, /* request came from our alloc pool */ __REQ_COPY_USER, /* contains copies of user pages */ - __REQ_INTEGRITY, /* integrity metadata has been remapped */ __REQ_FLUSH, /* request for cache flush */ __REQ_IO_STAT, /* account I/O stat */ __REQ_MIXED_MERGE, /* merge of different types, fail separately */ @@ -190,7 +190,6 @@ enum rq_flag_bits { #define REQ_ORDERED_COLOR (1 << __REQ_ORDERED_COLOR) #define REQ_ALLOCED (1 << __REQ_ALLOCED) #define REQ_COPY_USER (1 << __REQ_COPY_USER) -#define REQ_INTEGRITY (1 << __REQ_INTEGRITY) #define REQ_FLUSH (1 << __REQ_FLUSH) #define REQ_IO_STAT (1 << __REQ_IO_STAT) #define REQ_MIXED_MERGE (1 << __REQ_MIXED_MERGE) -- cgit v1.2.3-18-g5258 From 40ffa93791985ab300fd488072e9f37ccf72e88c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 15 Oct 2010 21:08:14 +0200 Subject: x86: Remove stale pmtimer_64.c This file is unused since the apic unification in 2.6.29, but nobody noticed. Signed-off-by: Thomas Gleixner --- include/linux/acpi_pmtmr.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/acpi_pmtmr.h b/include/linux/acpi_pmtmr.h index 7e3d2859be5..1d0ef1ae803 100644 --- a/include/linux/acpi_pmtmr.h +++ b/include/linux/acpi_pmtmr.h @@ -25,8 +25,6 @@ static inline u32 acpi_pm_read_early(void) return acpi_pm_read_verified() & ACPI_PM_MASK; } -extern void pmtimer_wait(unsigned); - #else static inline u32 acpi_pm_read_early(void) -- cgit v1.2.3-18-g5258 From 79b5dc0c64d88cda3da23b2e22a5cec0964372ac Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Fri, 15 Oct 2010 14:34:14 -0700 Subject: types.h: define __aligned_u64 and expose to userspace We currently have a kernel internal type called aligned_u64 which aligns __u64's on 8 bytes boundaries even on systems which would normally align them on 4 byte boundaries. This patch creates a new type __aligned_u64 which does the same thing but which is exposed to userspace rather than being kernel internal. [akpm: merge early as both the net and audit trees want this] [akpm@linux-foundation.org: enhance the comment describing the reasons for using aligned_u64. Via Andreas and Andi.] Based-on-patch-by: Andreas Gruenbacher Signed-off-by: Eric Paris Cc: Jan Engelhardt Cc: David Miller Cc: Andi Kleen Cc: Arnd Bergmann Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/types.h | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/types.h b/include/linux/types.h index 01a082f56ef..357dbc19606 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -121,7 +121,15 @@ typedef __u64 u_int64_t; typedef __s64 int64_t; #endif -/* this is a special 64bit data type that is 8-byte aligned */ +/* + * aligned_u64 should be used in defining kernel<->userspace ABIs to avoid + * common 32/64-bit compat problems. + * 64-bit values align to 4-byte boundaries on x86_32 (and possibly other + * architectures) and to 8-byte boundaries on 64-bit architetures. The new + * aligned_64 type enforces 8-byte alignment so that structs containing + * aligned_64 values have the same alignment on 32-bit and 64-bit architectures. + * No conversions are necessary between 32-bit user-space and a 64-bit kernel. + */ #define aligned_u64 __u64 __attribute__((aligned(8))) #define aligned_be64 __be64 __attribute__((aligned(8))) #define aligned_le64 __le64 __attribute__((aligned(8))) @@ -178,6 +186,11 @@ typedef __u64 __bitwise __be64; typedef __u16 __bitwise __sum16; typedef __u32 __bitwise __wsum; +/* this is a special 64bit data type that is 8-byte aligned */ +#define __aligned_u64 __u64 __attribute__((aligned(8))) +#define __aligned_be64 __be64 __attribute__((aligned(8))) +#define __aligned_le64 __le64 __attribute__((aligned(8))) + #ifdef __KERNEL__ typedef unsigned __bitwise__ gfp_t; typedef unsigned __bitwise__ fmode_t; -- cgit v1.2.3-18-g5258 From 564824b0c52c34692d804bb6ea214451615b0b50 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 11 Oct 2010 19:05:25 +0000 Subject: net: allocate skbs on local node commit b30973f877 (node-aware skb allocation) spread a wrong habit of allocating net drivers skbs on a given memory node : The one closest to the NIC hardware. This is wrong because as soon as we try to scale network stack, we need to use many cpus to handle traffic and hit slub/slab management on cross-node allocations/frees when these cpus have to alloc/free skbs bound to a central node. skb allocated in RX path are ephemeral, they have a very short lifetime : Extra cost to maintain NUMA affinity is too expensive. What appeared as a nice idea four years ago is in fact a bad one. In 2010, NIC hardwares are multiqueue, or we use RPS to spread the load, and two 10Gb NIC might deliver more than 28 million packets per second, needing all the available cpus. Cost of cross-node handling in network and vm stacks outperforms the small benefit hardware had when doing its DMA transfert in its 'local' memory node at RX time. Even trying to differentiate the two allocations done for one skb (the sk_buff on local node, the data part on NIC hardware node) is not enough to bring good performance. Signed-off-by: Eric Dumazet Acked-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/skbuff.h | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 0b53c43ac92..05a358f1ba1 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -496,13 +496,13 @@ extern struct sk_buff *__alloc_skb(unsigned int size, static inline struct sk_buff *alloc_skb(unsigned int size, gfp_t priority) { - return __alloc_skb(size, priority, 0, -1); + return __alloc_skb(size, priority, 0, NUMA_NO_NODE); } static inline struct sk_buff *alloc_skb_fclone(unsigned int size, gfp_t priority) { - return __alloc_skb(size, priority, 1, -1); + return __alloc_skb(size, priority, 1, NUMA_NO_NODE); } extern bool skb_recycle_check(struct sk_buff *skb, int skb_size); @@ -1563,13 +1563,25 @@ static inline struct sk_buff *netdev_alloc_skb_ip_align(struct net_device *dev, return skb; } -extern struct page *__netdev_alloc_page(struct net_device *dev, gfp_t gfp_mask); +/** + * __netdev_alloc_page - allocate a page for ps-rx on a specific device + * @dev: network device to receive on + * @gfp_mask: alloc_pages_node mask + * + * Allocate a new page. dev currently unused. + * + * %NULL is returned if there is no free memory. + */ +static inline struct page *__netdev_alloc_page(struct net_device *dev, gfp_t gfp_mask) +{ + return alloc_pages_node(NUMA_NO_NODE, gfp_mask, 0); +} /** * netdev_alloc_page - allocate a page for ps-rx on a specific device * @dev: network device to receive on * - * Allocate a new page node local to the specified device. + * Allocate a new page. dev currently unused. * * %NULL is returned if there is no free memory. */ -- cgit v1.2.3-18-g5258 From 31e3c3f6f1f9b154981a0e6620df700463db30ee Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Wed, 13 Oct 2010 13:20:35 +0000 Subject: tipc: cleanup function namespace Do some cleanups of TIPC based on make namespacecheck 1. Don't export unused symbols 2. Eliminate dead code 3. Make functions and variables local 4. Rename buf_acquire to tipc_buf_acquire since it is used in several files Compile tested only. This make break out of tree kernel modules that depend on TIPC routines. Signed-off-by: Stephen Hemminger Acked-by: Jon Maloy Acked-by: Paul Gortmaker Signed-off-by: David S. Miller --- include/net/tipc/tipc.h | 71 -------------------------------------------- include/net/tipc/tipc_port.h | 2 -- 2 files changed, 73 deletions(-) (limited to 'include') diff --git a/include/net/tipc/tipc.h b/include/net/tipc/tipc.h index 15af6dca0b4..1e0645e1eed 100644 --- a/include/net/tipc/tipc.h +++ b/include/net/tipc/tipc.h @@ -50,8 +50,6 @@ * TIPC operating mode routines */ -u32 tipc_get_addr(void); - #define TIPC_NOT_RUNNING 0 #define TIPC_NODE_MODE 1 #define TIPC_NET_MODE 2 @@ -62,8 +60,6 @@ int tipc_attach(unsigned int *userref, tipc_mode_event, void *usr_handle); void tipc_detach(unsigned int userref); -int tipc_get_mode(void); - /* * TIPC port manipulation routines */ @@ -153,12 +149,6 @@ int tipc_disconnect(u32 portref); int tipc_shutdown(u32 ref); -int tipc_isconnected(u32 portref, int *isconnected); - -int tipc_peer(u32 portref, struct tipc_portid *peer); - -int tipc_ref_valid(u32 portref); - /* * TIPC messaging routines */ @@ -170,38 +160,12 @@ int tipc_send(u32 portref, unsigned int num_sect, struct iovec const *msg_sect); -int tipc_send_buf(u32 portref, - struct sk_buff *buf, - unsigned int dsz); - int tipc_send2name(u32 portref, struct tipc_name const *name, u32 domain, unsigned int num_sect, struct iovec const *msg_sect); -int tipc_send_buf2name(u32 portref, - struct tipc_name const *name, - u32 domain, - struct sk_buff *buf, - unsigned int dsz); - -int tipc_forward2name(u32 portref, - struct tipc_name const *name, - u32 domain, - unsigned int section_count, - struct iovec const *msg_sect, - struct tipc_portid const *origin, - unsigned int importance); - -int tipc_forward_buf2name(u32 portref, - struct tipc_name const *name, - u32 domain, - struct sk_buff *buf, - unsigned int dsz, - struct tipc_portid const *orig, - unsigned int importance); - int tipc_send2port(u32 portref, struct tipc_portid const *dest, unsigned int num_sect, @@ -212,46 +176,11 @@ int tipc_send_buf2port(u32 portref, struct sk_buff *buf, unsigned int dsz); -int tipc_forward2port(u32 portref, - struct tipc_portid const *dest, - unsigned int num_sect, - struct iovec const *msg_sect, - struct tipc_portid const *origin, - unsigned int importance); - -int tipc_forward_buf2port(u32 portref, - struct tipc_portid const *dest, - struct sk_buff *buf, - unsigned int dsz, - struct tipc_portid const *orig, - unsigned int importance); - int tipc_multicast(u32 portref, struct tipc_name_seq const *seq, u32 domain, /* currently unused */ unsigned int section_count, struct iovec const *msg); - -#if 0 -int tipc_multicast_buf(u32 portref, - struct tipc_name_seq const *seq, - u32 domain, - void *buf, - unsigned int size); -#endif - -/* - * TIPC subscription routines - */ - -int tipc_ispublished(struct tipc_name const *name); - -/* - * Get number of available nodes within specified domain (excluding own node) - */ - -unsigned int tipc_available_nodes(const u32 domain); - #endif #endif diff --git a/include/net/tipc/tipc_port.h b/include/net/tipc/tipc_port.h index c54917cbfa4..1893aaf4942 100644 --- a/include/net/tipc/tipc_port.h +++ b/include/net/tipc/tipc_port.h @@ -88,8 +88,6 @@ void tipc_acknowledge(u32 port_ref,u32 ack); struct tipc_port *tipc_get_port(const u32 ref); -void *tipc_get_handle(const u32 ref); - /* * The following routines require that the port be locked on entry */ -- cgit v1.2.3-18-g5258 From 074037ec79bea73edf1b1ec72fef1010e83e3cc5 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 22 Sep 2010 22:09:10 +0200 Subject: PM / Wakeup: Introduce wakeup source objects and event statistics (v3) Introduce struct wakeup_source for representing system wakeup sources within the kernel and for collecting statistics related to them. Make the recently introduced helper functions pm_wakeup_event(), pm_stay_awake() and pm_relax() use struct wakeup_source objects internally, so that wakeup statistics associated with wakeup devices can be collected and reported in a consistent way (the definition of pm_relax() is changed, which is harmless, because this function is not called directly by anyone yet). Introduce new wakeup-related sysfs device attributes in /sys/devices/.../power for reporting the device wakeup statistics. Change the global wakeup events counters event_count and events_in_progress into atomic variables, so that it is not necessary to acquire a global spinlock in pm_wakeup_event(), pm_stay_awake() and pm_relax(), which should allow us to avoid lock contention in these functions on SMP systems with many wakeup devices. Signed-off-by: Rafael J. Wysocki Acked-by: Greg Kroah-Hartman --- include/linux/pm.h | 16 ++---- include/linux/pm_wakeup.h | 127 +++++++++++++++++++++++++++++++++++++--------- include/linux/suspend.h | 4 +- 3 files changed, 109 insertions(+), 38 deletions(-) (limited to 'include') diff --git a/include/linux/pm.h b/include/linux/pm.h index 52e8c55ff31..a84118911ce 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -448,23 +448,24 @@ enum rpm_request { RPM_REQ_RESUME, }; +struct wakeup_source; + struct dev_pm_info { pm_message_t power_state; unsigned int can_wakeup:1; - unsigned int should_wakeup:1; unsigned async_suspend:1; enum dpm_state status; /* Owned by the PM core */ + spinlock_t lock; #ifdef CONFIG_PM_SLEEP struct list_head entry; struct completion completion; - unsigned long wakeup_count; + struct wakeup_source *wakeup; #endif #ifdef CONFIG_PM_RUNTIME struct timer_list suspend_timer; unsigned long timer_expires; struct work_struct work; wait_queue_head_t wait_queue; - spinlock_t lock; atomic_t usage_count; atomic_t child_count; unsigned int disable_depth:3; @@ -559,11 +560,6 @@ extern void __suspend_report_result(const char *function, void *fn, int ret); } while (0) extern void device_pm_wait_for_dev(struct device *sub, struct device *dev); - -/* drivers/base/power/wakeup.c */ -extern void pm_wakeup_event(struct device *dev, unsigned int msec); -extern void pm_stay_awake(struct device *dev); -extern void pm_relax(void); #else /* !CONFIG_PM_SLEEP */ #define device_pm_lock() do {} while (0) @@ -577,10 +573,6 @@ static inline int dpm_suspend_start(pm_message_t state) #define suspend_report_result(fn, ret) do {} while (0) static inline void device_pm_wait_for_dev(struct device *a, struct device *b) {} - -static inline void pm_wakeup_event(struct device *dev, unsigned int msec) {} -static inline void pm_stay_awake(struct device *dev) {} -static inline void pm_relax(void) {} #endif /* !CONFIG_PM_SLEEP */ /* How to reorder dpm_list after device_move() */ diff --git a/include/linux/pm_wakeup.h b/include/linux/pm_wakeup.h index 76aca48722a..9cff00dd6b6 100644 --- a/include/linux/pm_wakeup.h +++ b/include/linux/pm_wakeup.h @@ -2,6 +2,7 @@ * pm_wakeup.h - Power management wakeup interface * * Copyright (C) 2008 Alan Stern + * Copyright (C) 2010 Rafael J. Wysocki, Novell Inc. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -27,19 +28,77 @@ #include -#ifdef CONFIG_PM - -/* Changes to device_may_wakeup take effect on the next pm state change. +/** + * struct wakeup_source - Representation of wakeup sources * - * By default, most devices should leave wakeup disabled. The exceptions - * are devices that everyone expects to be wakeup sources: keyboards, - * power buttons, possibly network interfaces, etc. + * @total_time: Total time this wakeup source has been active. + * @max_time: Maximum time this wakeup source has been continuously active. + * @last_time: Monotonic clock when the wakeup source's was activated last time. + * @event_count: Number of signaled wakeup events. + * @active_count: Number of times the wakeup sorce was activated. + * @relax_count: Number of times the wakeup sorce was deactivated. + * @hit_count: Number of times the wakeup sorce might abort system suspend. + * @active: Status of the wakeup source. */ -static inline void device_init_wakeup(struct device *dev, bool val) +struct wakeup_source { + char *name; + struct list_head entry; + spinlock_t lock; + struct timer_list timer; + unsigned long timer_expires; + ktime_t total_time; + ktime_t max_time; + ktime_t last_time; + unsigned long event_count; + unsigned long active_count; + unsigned long relax_count; + unsigned long hit_count; + unsigned int active:1; +}; + +#ifdef CONFIG_PM_SLEEP + +/* + * Changes to device_may_wakeup take effect on the next pm state change. + */ + +static inline void device_set_wakeup_capable(struct device *dev, bool capable) +{ + dev->power.can_wakeup = capable; +} + +static inline bool device_can_wakeup(struct device *dev) +{ + return dev->power.can_wakeup; +} + + + +static inline bool device_may_wakeup(struct device *dev) { - dev->power.can_wakeup = dev->power.should_wakeup = val; + return dev->power.can_wakeup && !!dev->power.wakeup; } +/* drivers/base/power/wakeup.c */ +extern struct wakeup_source *wakeup_source_create(const char *name); +extern void wakeup_source_destroy(struct wakeup_source *ws); +extern void wakeup_source_add(struct wakeup_source *ws); +extern void wakeup_source_remove(struct wakeup_source *ws); +extern struct wakeup_source *wakeup_source_register(const char *name); +extern void wakeup_source_unregister(struct wakeup_source *ws); +extern int device_wakeup_enable(struct device *dev); +extern int device_wakeup_disable(struct device *dev); +extern int device_init_wakeup(struct device *dev, bool val); +extern int device_set_wakeup_enable(struct device *dev, bool enable); +extern void __pm_stay_awake(struct wakeup_source *ws); +extern void pm_stay_awake(struct device *dev); +extern void __pm_relax(struct wakeup_source *ws); +extern void pm_relax(struct device *dev); +extern void __pm_wakeup_event(struct wakeup_source *ws, unsigned int msec); +extern void pm_wakeup_event(struct device *dev, unsigned int msec); + +#else /* !CONFIG_PM_SLEEP */ + static inline void device_set_wakeup_capable(struct device *dev, bool capable) { dev->power.can_wakeup = capable; @@ -50,43 +109,63 @@ static inline bool device_can_wakeup(struct device *dev) return dev->power.can_wakeup; } -static inline void device_set_wakeup_enable(struct device *dev, bool enable) +static inline bool device_may_wakeup(struct device *dev) { - dev->power.should_wakeup = enable; + return false; } -static inline bool device_may_wakeup(struct device *dev) +static inline struct wakeup_source *wakeup_source_create(const char *name) { - return dev->power.can_wakeup && dev->power.should_wakeup; + return NULL; } -#else /* !CONFIG_PM */ +static inline void wakeup_source_destroy(struct wakeup_source *ws) {} + +static inline void wakeup_source_add(struct wakeup_source *ws) {} -/* For some reason the following routines work even without CONFIG_PM */ -static inline void device_init_wakeup(struct device *dev, bool val) +static inline void wakeup_source_remove(struct wakeup_source *ws) {} + +static inline struct wakeup_source *wakeup_source_register(const char *name) { - dev->power.can_wakeup = val; + return NULL; } -static inline void device_set_wakeup_capable(struct device *dev, bool capable) +static inline void wakeup_source_unregister(struct wakeup_source *ws) {} + +static inline int device_wakeup_enable(struct device *dev) { - dev->power.can_wakeup = capable; + return -EINVAL; } -static inline bool device_can_wakeup(struct device *dev) +static inline int device_wakeup_disable(struct device *dev) { - return dev->power.can_wakeup; + return 0; } -static inline void device_set_wakeup_enable(struct device *dev, bool enable) +static inline int device_init_wakeup(struct device *dev, bool val) { + dev->power.can_wakeup = val; + return val ? -EINVAL : 0; } -static inline bool device_may_wakeup(struct device *dev) + +static inline int device_set_wakeup_enable(struct device *dev, bool enable) { - return false; + return -EINVAL; } -#endif /* !CONFIG_PM */ +static inline void __pm_stay_awake(struct wakeup_source *ws) {} + +static inline void pm_stay_awake(struct device *dev) {} + +static inline void __pm_relax(struct wakeup_source *ws) {} + +static inline void pm_relax(struct device *dev) {} + +static inline void __pm_wakeup_event(struct wakeup_source *ws, unsigned int msec) {} + +static inline void pm_wakeup_event(struct device *dev, unsigned int msec) {} + +#endif /* !CONFIG_PM_SLEEP */ #endif /* _LINUX_PM_WAKEUP_H */ diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 4af270ec220..6b1712c5110 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -293,8 +293,8 @@ extern int unregister_pm_notifier(struct notifier_block *nb); extern bool events_check_enabled; extern bool pm_check_wakeup_events(void); -extern bool pm_get_wakeup_count(unsigned long *count); -extern bool pm_save_wakeup_count(unsigned long count); +extern bool pm_get_wakeup_count(unsigned int *count); +extern bool pm_save_wakeup_count(unsigned int count); #else /* !CONFIG_PM_SLEEP */ static inline int register_pm_notifier(struct notifier_block *nb) -- cgit v1.2.3-18-g5258 From 098dff738abbeaea15fc95c4f4fdaee1e9bbea75 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 22 Sep 2010 22:10:57 +0200 Subject: PM: Fix potential issue with failing asynchronous suspend There is a potential issue with the asynchronous suspend code that a device driver suspending asynchronously may not notice that it should back off. There are two failing scenarions, (1) when the driver is waiting for a driver suspending synchronously to complete and that second driver returns error code, in which case async_error won't be set and the waiting driver will continue suspending and (2) after the driver has called device_pm_wait_for_dev() and the waited for driver returns error code, in which case the caller of device_pm_wait_for_dev() will not know that there was an error and will continue suspending. To fix this issue make __device_suspend() set async_error, so async_suspend() doesn't need to set it any more, and make device_pm_wait_for_dev() return async_error, so that its callers can check whether or not they should continue suspending. No more changes are necessary, since device_pm_wait_for_dev() is not used by any drivers' suspend routines. Reported-by: Colin Cross Signed-off-by: Rafael J. Wysocki Acked-by: Greg Kroah-Hartman --- include/linux/pm.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/pm.h b/include/linux/pm.h index a84118911ce..1abfe84f447 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -559,7 +559,7 @@ extern void __suspend_report_result(const char *function, void *fn, int ret); __suspend_report_result(__func__, fn, ret); \ } while (0) -extern void device_pm_wait_for_dev(struct device *sub, struct device *dev); +extern int device_pm_wait_for_dev(struct device *sub, struct device *dev); #else /* !CONFIG_PM_SLEEP */ #define device_pm_lock() do {} while (0) @@ -572,7 +572,10 @@ static inline int dpm_suspend_start(pm_message_t state) #define suspend_report_result(fn, ret) do {} while (0) -static inline void device_pm_wait_for_dev(struct device *a, struct device *b) {} +static inline int device_pm_wait_for_dev(struct device *a, struct device *b) +{ + return 0; +} #endif /* !CONFIG_PM_SLEEP */ /* How to reorder dpm_list after device_move() */ -- cgit v1.2.3-18-g5258 From 69d44ffbd772bede8c2a6d182e6e14f94826520b Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Sat, 25 Sep 2010 23:34:22 +0200 Subject: sysfs: Add sysfs_merge_group() and sysfs_unmerge_group() This patch (as1420) adds sysfs_merge_group() and sysfs_unmerge_group() functions, allowing drivers easily to add and remove sets of attributes to a pre-existing attribute group directory. Signed-off-by: Alan Stern Acked-by: Greg Kroah-Hartman Signed-off-by: Rafael J. Wysocki --- include/linux/sysfs.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include') diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index 96eb576d82f..30b881555fa 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -164,6 +164,10 @@ int sysfs_add_file_to_group(struct kobject *kobj, const struct attribute *attr, const char *group); void sysfs_remove_file_from_group(struct kobject *kobj, const struct attribute *attr, const char *group); +int sysfs_merge_group(struct kobject *kobj, + const struct attribute_group *grp); +void sysfs_unmerge_group(struct kobject *kobj, + const struct attribute_group *grp); void sysfs_notify(struct kobject *kobj, const char *dir, const char *attr); void sysfs_notify_dirent(struct sysfs_dirent *sd); @@ -302,6 +306,17 @@ static inline void sysfs_remove_file_from_group(struct kobject *kobj, { } +static inline int sysfs_merge_group(struct kobject *kobj, + const struct attribute_group *grp) +{ + return 0; +} + +static inline void sysfs_unmerge_group(struct kobject *kobj, + const struct attribute_group *grp) +{ +} + static inline void sysfs_notify(struct kobject *kobj, const char *dir, const char *attr) { -- cgit v1.2.3-18-g5258 From 3f9af0513ae5b1f185302c2d0ba656640926d970 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Sat, 25 Sep 2010 23:34:54 +0200 Subject: PM / Runtime: Replace boolean arguments with bitflags The "from_wq" argument in __pm_runtime_suspend() and __pm_runtime_resume() supposedly indicates whether or not the function was called by the PM workqueue thread, but in fact it isn't always used this way. It really indicates whether or not the function should return early if the requested operation is already in progress. Along with this badly-named boolean argument, later patches in this series will add several other boolean arguments to these functions and others. Therefore this patch (as1422) begins the conversion process by replacing from_wq with a bitflag argument. The same bitflags are also used in __pm_runtime_get() and __pm_runtime_put(), where they indicate whether or not the operation should be asynchronous. Signed-off-by: Alan Stern Signed-off-by: Rafael J. Wysocki --- include/linux/pm_runtime.h | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index 6e81888c622..c030cac59aa 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -12,6 +12,11 @@ #include #include +/* Runtime PM flag argument bits */ +#define RPM_ASYNC 0x01 /* Request is asynchronous */ +#define RPM_NOWAIT 0x02 /* Don't wait for concurrent + state change */ + #ifdef CONFIG_PM_RUNTIME extern struct workqueue_struct *pm_wq; @@ -22,8 +27,8 @@ extern int pm_runtime_resume(struct device *dev); extern int pm_request_idle(struct device *dev); extern int pm_schedule_suspend(struct device *dev, unsigned int delay); extern int pm_request_resume(struct device *dev); -extern int __pm_runtime_get(struct device *dev, bool sync); -extern int __pm_runtime_put(struct device *dev, bool sync); +extern int __pm_runtime_get(struct device *dev, int rpmflags); +extern int __pm_runtime_put(struct device *dev, int rpmflags); extern int __pm_runtime_set_status(struct device *dev, unsigned int status); extern int pm_runtime_barrier(struct device *dev); extern void pm_runtime_enable(struct device *dev); @@ -81,8 +86,10 @@ static inline int pm_schedule_suspend(struct device *dev, unsigned int delay) return -ENOSYS; } static inline int pm_request_resume(struct device *dev) { return 0; } -static inline int __pm_runtime_get(struct device *dev, bool sync) { return 1; } -static inline int __pm_runtime_put(struct device *dev, bool sync) { return 0; } +static inline int __pm_runtime_get(struct device *dev, int rpmflags) + { return 1; } +static inline int __pm_runtime_put(struct device *dev, int rpmflags) + { return 0; } static inline int __pm_runtime_set_status(struct device *dev, unsigned int status) { return 0; } static inline int pm_runtime_barrier(struct device *dev) { return 0; } @@ -107,22 +114,22 @@ static inline int pm_generic_runtime_resume(struct device *dev) { return 0; } static inline int pm_runtime_get(struct device *dev) { - return __pm_runtime_get(dev, false); + return __pm_runtime_get(dev, RPM_ASYNC); } static inline int pm_runtime_get_sync(struct device *dev) { - return __pm_runtime_get(dev, true); + return __pm_runtime_get(dev, 0); } static inline int pm_runtime_put(struct device *dev) { - return __pm_runtime_put(dev, false); + return __pm_runtime_put(dev, RPM_ASYNC); } static inline int pm_runtime_put_sync(struct device *dev) { - return __pm_runtime_put(dev, true); + return __pm_runtime_put(dev, 0); } static inline int pm_runtime_set_active(struct device *dev) -- cgit v1.2.3-18-g5258 From 140a6c945211ee911dec776fafa52e03a7d7bb9a Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Sat, 25 Sep 2010 23:35:07 +0200 Subject: PM / Runtime: Combine runtime PM entry points This patch (as1424) combines the various public entry points for the runtime PM routines into three simple functions: one for idle, one for suspend, and one for resume. A new bitflag specifies whether or not to increment or decrement the usage_count field. The new entry points are named __pm_runtime_idle, __pm_runtime_suspend, and __pm_runtime_resume, to reflect that they are trampolines. Simultaneously, the corresponding internal routines are renamed to rpm_idle, rpm_suspend, and rpm_resume. Signed-off-by: Alan Stern Signed-off-by: Rafael J. Wysocki --- include/linux/pm_runtime.h | 66 ++++++++++++++++++++++++++++++++-------------- 1 file changed, 46 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index c030cac59aa..5869d87fffa 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -16,19 +16,17 @@ #define RPM_ASYNC 0x01 /* Request is asynchronous */ #define RPM_NOWAIT 0x02 /* Don't wait for concurrent state change */ +#define RPM_GET_PUT 0x04 /* Increment/decrement the + usage_count */ #ifdef CONFIG_PM_RUNTIME extern struct workqueue_struct *pm_wq; -extern int pm_runtime_idle(struct device *dev); -extern int pm_runtime_suspend(struct device *dev); -extern int pm_runtime_resume(struct device *dev); -extern int pm_request_idle(struct device *dev); +extern int __pm_runtime_idle(struct device *dev, int rpmflags); +extern int __pm_runtime_suspend(struct device *dev, int rpmflags); +extern int __pm_runtime_resume(struct device *dev, int rpmflags); extern int pm_schedule_suspend(struct device *dev, unsigned int delay); -extern int pm_request_resume(struct device *dev); -extern int __pm_runtime_get(struct device *dev, int rpmflags); -extern int __pm_runtime_put(struct device *dev, int rpmflags); extern int __pm_runtime_set_status(struct device *dev, unsigned int status); extern int pm_runtime_barrier(struct device *dev); extern void pm_runtime_enable(struct device *dev); @@ -77,19 +75,22 @@ static inline bool pm_runtime_suspended(struct device *dev) #else /* !CONFIG_PM_RUNTIME */ -static inline int pm_runtime_idle(struct device *dev) { return -ENOSYS; } -static inline int pm_runtime_suspend(struct device *dev) { return -ENOSYS; } -static inline int pm_runtime_resume(struct device *dev) { return 0; } -static inline int pm_request_idle(struct device *dev) { return -ENOSYS; } +static inline int __pm_runtime_idle(struct device *dev, int rpmflags) +{ + return -ENOSYS; +} +static inline int __pm_runtime_suspend(struct device *dev, int rpmflags) +{ + return -ENOSYS; +} +static inline int __pm_runtime_resume(struct device *dev, int rpmflags) +{ + return 1; +} static inline int pm_schedule_suspend(struct device *dev, unsigned int delay) { return -ENOSYS; } -static inline int pm_request_resume(struct device *dev) { return 0; } -static inline int __pm_runtime_get(struct device *dev, int rpmflags) - { return 1; } -static inline int __pm_runtime_put(struct device *dev, int rpmflags) - { return 0; } static inline int __pm_runtime_set_status(struct device *dev, unsigned int status) { return 0; } static inline int pm_runtime_barrier(struct device *dev) { return 0; } @@ -112,24 +113,49 @@ static inline int pm_generic_runtime_resume(struct device *dev) { return 0; } #endif /* !CONFIG_PM_RUNTIME */ +static inline int pm_runtime_idle(struct device *dev) +{ + return __pm_runtime_idle(dev, 0); +} + +static inline int pm_runtime_suspend(struct device *dev) +{ + return __pm_runtime_suspend(dev, 0); +} + +static inline int pm_runtime_resume(struct device *dev) +{ + return __pm_runtime_resume(dev, 0); +} + +static inline int pm_request_idle(struct device *dev) +{ + return __pm_runtime_idle(dev, RPM_ASYNC); +} + +static inline int pm_request_resume(struct device *dev) +{ + return __pm_runtime_resume(dev, RPM_ASYNC); +} + static inline int pm_runtime_get(struct device *dev) { - return __pm_runtime_get(dev, RPM_ASYNC); + return __pm_runtime_resume(dev, RPM_GET_PUT | RPM_ASYNC); } static inline int pm_runtime_get_sync(struct device *dev) { - return __pm_runtime_get(dev, 0); + return __pm_runtime_resume(dev, RPM_GET_PUT); } static inline int pm_runtime_put(struct device *dev) { - return __pm_runtime_put(dev, RPM_ASYNC); + return __pm_runtime_idle(dev, RPM_GET_PUT | RPM_ASYNC); } static inline int pm_runtime_put_sync(struct device *dev) { - return __pm_runtime_put(dev, 0); + return __pm_runtime_idle(dev, RPM_GET_PUT); } static inline int pm_runtime_set_active(struct device *dev) -- cgit v1.2.3-18-g5258 From 7490e44239e60293bca0c2663229050c36c660c2 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Sat, 25 Sep 2010 23:35:15 +0200 Subject: PM / Runtime: Add no_callbacks flag Some devices, such as USB interfaces, cannot be power-managed independently of their parents, i.e., they cannot be put in low power while the parent remains at full power. This patch (as1425) creates a new "no_callbacks" flag, which tells the PM core not to invoke the runtime-PM callback routines for the such devices but instead to assume that the callbacks always succeed. In addition, the non-debugging runtime-PM sysfs attributes for the devices are removed, since they are pretty much meaningless. The advantage of this scheme comes not so much from avoiding the callbacks themselves, but rather from the fact that without the need for a process context in which to run the callbacks, more work can be done in interrupt context. Signed-off-by: Alan Stern Signed-off-by: Rafael J. Wysocki --- include/linux/pm.h | 7 +++++++ include/linux/pm_runtime.h | 2 ++ 2 files changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/pm.h b/include/linux/pm.h index 1abfe84f447..abd81ffaba3 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -41,6 +41,12 @@ extern void (*pm_power_off_prepare)(void); struct device; +#ifdef CONFIG_PM +extern const char power_group_name[]; /* = "power" */ +#else +#define power_group_name NULL +#endif + typedef struct pm_message { int event; } pm_message_t; @@ -475,6 +481,7 @@ struct dev_pm_info { unsigned int deferred_resume:1; unsigned int run_wake:1; unsigned int runtime_auto:1; + unsigned int no_callbacks:1; enum rpm_request request; enum rpm_status runtime_status; int runtime_error; diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index 5869d87fffa..8ca52f7c357 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -36,6 +36,7 @@ extern void pm_runtime_forbid(struct device *dev); extern int pm_generic_runtime_idle(struct device *dev); extern int pm_generic_runtime_suspend(struct device *dev); extern int pm_generic_runtime_resume(struct device *dev); +extern void pm_runtime_no_callbacks(struct device *dev); static inline bool pm_children_suspended(struct device *dev) { @@ -110,6 +111,7 @@ static inline bool pm_runtime_suspended(struct device *dev) { return false; } static inline int pm_generic_runtime_idle(struct device *dev) { return 0; } static inline int pm_generic_runtime_suspend(struct device *dev) { return 0; } static inline int pm_generic_runtime_resume(struct device *dev) { return 0; } +static inline void pm_runtime_no_callbacks(struct device *dev) {} #endif /* !CONFIG_PM_RUNTIME */ -- cgit v1.2.3-18-g5258 From 15bcb91d7e607d8a2e060f01f7784a7454668da4 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Sat, 25 Sep 2010 23:35:21 +0200 Subject: PM / Runtime: Implement autosuspend support This patch (as1427) implements the "autosuspend" facility for runtime PM. A few new fields are added to the dev_pm_info structure and several new PM helper functions are defined, for telling the PM core whether or not a device uses autosuspend, for setting the autosuspend delay, and for marking periods of device activity. Drivers that do not want to use autosuspend can continue using the same helper functions as before; their behavior will not change. In addition, drivers supporting autosuspend can also call the old helper functions to get the old behavior. The details are all explained in Documentation/power/runtime_pm.txt and Documentation/ABI/testing/sysfs-devices-power. Signed-off-by: Alan Stern Signed-off-by: Rafael J. Wysocki --- include/linux/pm.h | 8 ++++++++ include/linux/pm_runtime.h | 45 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+) (limited to 'include') diff --git a/include/linux/pm.h b/include/linux/pm.h index abd81ffaba3..40f3f45702b 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -444,6 +444,9 @@ enum rpm_status { * * RPM_REQ_SUSPEND Run the device bus type's ->runtime_suspend() callback * + * RPM_REQ_AUTOSUSPEND Same as RPM_REQ_SUSPEND, but not until the device has + * been inactive for as long as power.autosuspend_delay + * * RPM_REQ_RESUME Run the device bus type's ->runtime_resume() callback */ @@ -451,6 +454,7 @@ enum rpm_request { RPM_REQ_NONE = 0, RPM_REQ_IDLE, RPM_REQ_SUSPEND, + RPM_REQ_AUTOSUSPEND, RPM_REQ_RESUME, }; @@ -482,9 +486,13 @@ struct dev_pm_info { unsigned int run_wake:1; unsigned int runtime_auto:1; unsigned int no_callbacks:1; + unsigned int use_autosuspend:1; + unsigned int timer_autosuspends:1; enum rpm_request request; enum rpm_status runtime_status; int runtime_error; + int autosuspend_delay; + unsigned long last_busy; unsigned long active_jiffies; unsigned long suspended_jiffies; unsigned long accounting_timestamp; diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index 8ca52f7c357..99ed1aa8f93 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -12,12 +12,15 @@ #include #include +#include + /* Runtime PM flag argument bits */ #define RPM_ASYNC 0x01 /* Request is asynchronous */ #define RPM_NOWAIT 0x02 /* Don't wait for concurrent state change */ #define RPM_GET_PUT 0x04 /* Increment/decrement the usage_count */ +#define RPM_AUTO 0x08 /* Use autosuspend_delay */ #ifdef CONFIG_PM_RUNTIME @@ -37,6 +40,9 @@ extern int pm_generic_runtime_idle(struct device *dev); extern int pm_generic_runtime_suspend(struct device *dev); extern int pm_generic_runtime_resume(struct device *dev); extern void pm_runtime_no_callbacks(struct device *dev); +extern void __pm_runtime_use_autosuspend(struct device *dev, bool use); +extern void pm_runtime_set_autosuspend_delay(struct device *dev, int delay); +extern unsigned long pm_runtime_autosuspend_expiration(struct device *dev); static inline bool pm_children_suspended(struct device *dev) { @@ -74,6 +80,11 @@ static inline bool pm_runtime_suspended(struct device *dev) return dev->power.runtime_status == RPM_SUSPENDED; } +static inline void pm_runtime_mark_last_busy(struct device *dev) +{ + ACCESS_ONCE(dev->power.last_busy) = jiffies; +} + #else /* !CONFIG_PM_RUNTIME */ static inline int __pm_runtime_idle(struct device *dev, int rpmflags) @@ -113,6 +124,14 @@ static inline int pm_generic_runtime_suspend(struct device *dev) { return 0; } static inline int pm_generic_runtime_resume(struct device *dev) { return 0; } static inline void pm_runtime_no_callbacks(struct device *dev) {} +static inline void pm_runtime_mark_last_busy(struct device *dev) {} +static inline void __pm_runtime_use_autosuspend(struct device *dev, + bool use) {} +static inline void pm_runtime_set_autosuspend_delay(struct device *dev, + int delay) {} +static inline unsigned long pm_runtime_autosuspend_expiration( + struct device *dev) { return 0; } + #endif /* !CONFIG_PM_RUNTIME */ static inline int pm_runtime_idle(struct device *dev) @@ -125,6 +144,11 @@ static inline int pm_runtime_suspend(struct device *dev) return __pm_runtime_suspend(dev, 0); } +static inline int pm_runtime_autosuspend(struct device *dev) +{ + return __pm_runtime_suspend(dev, RPM_AUTO); +} + static inline int pm_runtime_resume(struct device *dev) { return __pm_runtime_resume(dev, 0); @@ -155,11 +179,22 @@ static inline int pm_runtime_put(struct device *dev) return __pm_runtime_idle(dev, RPM_GET_PUT | RPM_ASYNC); } +static inline int pm_runtime_put_autosuspend(struct device *dev) +{ + return __pm_runtime_suspend(dev, + RPM_GET_PUT | RPM_ASYNC | RPM_AUTO); +} + static inline int pm_runtime_put_sync(struct device *dev) { return __pm_runtime_idle(dev, RPM_GET_PUT); } +static inline int pm_runtime_put_sync_autosuspend(struct device *dev) +{ + return __pm_runtime_suspend(dev, RPM_GET_PUT | RPM_AUTO); +} + static inline int pm_runtime_set_active(struct device *dev) { return __pm_runtime_set_status(dev, RPM_ACTIVE); @@ -175,4 +210,14 @@ static inline void pm_runtime_disable(struct device *dev) __pm_runtime_disable(dev, true); } +static inline void pm_runtime_use_autosuspend(struct device *dev) +{ + __pm_runtime_use_autosuspend(dev, true); +} + +static inline void pm_runtime_dont_use_autosuspend(struct device *dev) +{ + __pm_runtime_use_autosuspend(dev, false); +} + #endif -- cgit v1.2.3-18-g5258 From 5fc62aad4e7779c2f04691e48b351d08c050c1f1 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 29 Sep 2010 00:12:22 +0200 Subject: PM: runtime: add missed pm_request_autosuspend The patch "PM / Runtime: Implement autosuspend support" introduces "autosuspend" facility for runtime PM, but misses helper function of pm_request_autosuspend, so add it. Signed-off-by: Ming Lei Signed-off-by: Rafael J. Wysocki --- include/linux/pm_runtime.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index 99ed1aa8f93..3ec2358f869 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -164,6 +164,11 @@ static inline int pm_request_resume(struct device *dev) return __pm_runtime_resume(dev, RPM_ASYNC); } +static inline int pm_request_autosuspend(struct device *dev) +{ + return __pm_runtime_suspend(dev, RPM_ASYNC | RPM_AUTO); +} + static inline int pm_runtime_get(struct device *dev) { return __pm_runtime_resume(dev, RPM_GET_PUT | RPM_ASYNC); -- cgit v1.2.3-18-g5258 From dbeeec5fe868f2e2e92fe94daa2c5a047240fdc4 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 4 Oct 2010 22:07:32 +0200 Subject: PM: Allow wakeup events to abort freezing of tasks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If there is a wakeup event during the freezing of tasks, suspend or hibernation will fail anyway. Since try_to_freeze_tasks() can take up to 20 seconds to complete or fail, aborting it as soon as a wakeup event is detected improves the worst case wakeup latency. Based on a patch from Arve Hjønnevåg. Signed-off-by: Rafael J. Wysocki Acked-by: Pavel Machek --- include/linux/suspend.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 6b1712c5110..26697514c5e 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -308,6 +308,8 @@ static inline int unregister_pm_notifier(struct notifier_block *nb) } #define pm_notifier(fn, pri) do { (void)(fn); } while (0) + +static inline bool pm_check_wakeup_events(void) { return true; } #endif /* !CONFIG_PM_SLEEP */ extern struct mutex pm_mutex; -- cgit v1.2.3-18-g5258 From d33ac60beaf2c7dee5cd90aba7c1eb385dd70937 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Tue, 12 Oct 2010 00:00:25 +0200 Subject: PM: Add sysfs attr for rechecking dev hash from PM trace If the device which fails to resume is part of a loadable kernel module it won't be checked at startup against the magic number stored in the RTC. Add a read-only sysfs attribute /sys/power/pm_trace_dev_match which contains a list of newline separated devices (usually just the one) which currently match the last magic number. This allows the device which is failing to resume to be found after the modules are loaded again. Signed-off-by: James Hogan Signed-off-by: Rafael J. Wysocki --- include/linux/resume-trace.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/resume-trace.h b/include/linux/resume-trace.h index bc8c3881c72..f31db236878 100644 --- a/include/linux/resume-trace.h +++ b/include/linux/resume-trace.h @@ -3,6 +3,7 @@ #ifdef CONFIG_PM_TRACE #include +#include extern int pm_trace_enabled; @@ -14,6 +15,7 @@ static inline int pm_trace_is_enabled(void) struct device; extern void set_trace_device(struct device *); extern void generate_resume_trace(const void *tracedata, unsigned int user); +extern int show_trace_dev_match(char *buf, size_t size); #define TRACE_DEVICE(dev) do { \ if (pm_trace_enabled) \ -- cgit v1.2.3-18-g5258 From e1f60b292ffd61151403327aa19ff7a1871820bd Mon Sep 17 00:00:00 2001 From: Nishanth Menon Date: Wed, 13 Oct 2010 00:13:10 +0200 Subject: PM: Introduce library for device-specific OPPs (v7) SoCs have a standard set of tuples consisting of frequency and voltage pairs that the device will support per voltage domain. These are called Operating Performance Points or OPPs. The actual definitions of OPP varies over silicon versions. For a specific domain, we can have a set of {frequency, voltage} pairs. As the kernel boots and more information is available, a default set of these are activated based on the precise nature of device. Further on operation, based on conditions prevailing in the system (such as temperature), some OPP availability may be temporarily controlled by the SoC frameworks. To implement an OPP, some sort of power management support is necessary hence this library depends on CONFIG_PM. Contributions include: Sanjeev Premi for the initial concept: http://patchwork.kernel.org/patch/50998/ Kevin Hilman for converting original design to device-based. Kevin Hilman and Paul Walmsey for cleaning up many of the function abstractions, improvements and data structure handling. Romit Dasgupta for using enums instead of opp pointers. Thara Gopinath, Eduardo Valentin and Vishwanath BS for fixes and cleanups. Linus Walleij for recommending this layer be made generic for usage in other architectures beyond OMAP and ARM. Mark Brown, Andrew Morton, Rafael J. Wysocki, Paul E. McKenney for valuable improvements. Discussions and comments from: http://marc.info/?l=linux-omap&m=126033945313269&w=2 http://marc.info/?l=linux-omap&m=125482970102327&w=2 http://marc.info/?t=125809247500002&r=1&w=2 http://marc.info/?l=linux-omap&m=126025973426007&w=2 http://marc.info/?t=128152609200064&r=1&w=2 http://marc.info/?t=128468723000002&r=1&w=2 incorporated. v1: http://marc.info/?t=128468723000002&r=1&w=2 Signed-off-by: Nishanth Menon Signed-off-by: Kevin Hilman Signed-off-by: Rafael J. Wysocki --- include/linux/opp.h | 105 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 105 insertions(+) create mode 100644 include/linux/opp.h (limited to 'include') diff --git a/include/linux/opp.h b/include/linux/opp.h new file mode 100644 index 00000000000..5449945d589 --- /dev/null +++ b/include/linux/opp.h @@ -0,0 +1,105 @@ +/* + * Generic OPP Interface + * + * Copyright (C) 2009-2010 Texas Instruments Incorporated. + * Nishanth Menon + * Romit Dasgupta + * Kevin Hilman + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __LINUX_OPP_H__ +#define __LINUX_OPP_H__ + +#include +#include + +struct opp; + +#if defined(CONFIG_PM_OPP) + +unsigned long opp_get_voltage(struct opp *opp); + +unsigned long opp_get_freq(struct opp *opp); + +int opp_get_opp_count(struct device *dev); + +struct opp *opp_find_freq_exact(struct device *dev, unsigned long freq, + bool available); + +struct opp *opp_find_freq_floor(struct device *dev, unsigned long *freq); + +struct opp *opp_find_freq_ceil(struct device *dev, unsigned long *freq); + +int opp_add(struct device *dev, unsigned long freq, unsigned long u_volt); + +int opp_enable(struct device *dev, unsigned long freq); + +int opp_disable(struct device *dev, unsigned long freq); + +#else +static inline unsigned long opp_get_voltage(struct opp *opp) +{ + return 0; +} + +static inline unsigned long opp_get_freq(struct opp *opp) +{ + return 0; +} + +static inline int opp_get_opp_count(struct device *dev) +{ + return 0; +} + +static inline struct opp *opp_find_freq_exact(struct device *dev, + unsigned long freq, bool available) +{ + return ERR_PTR(-EINVAL); +} + +static inline struct opp *opp_find_freq_floor(struct device *dev, + unsigned long *freq) +{ + return ERR_PTR(-EINVAL); +} + +static inline struct opp *opp_find_freq_ceil(struct device *dev, + unsigned long *freq) +{ + return ERR_PTR(-EINVAL); +} + +static inline int opp_add(struct device *dev, unsigned long freq, + unsigned long u_volt) +{ + return -EINVAL; +} + +static inline int opp_enable(struct device *dev, unsigned long freq) +{ + return 0; +} + +static inline int opp_disable(struct device *dev, unsigned long freq) +{ + return 0; +} +#endif /* CONFIG_PM */ + +#if defined(CONFIG_CPU_FREQ) && defined(CONFIG_PM_OPP) +int opp_init_cpufreq_table(struct device *dev, + struct cpufreq_frequency_table **table); +#else +static inline int opp_init_cpufreq_table(struct device *dev, + struct cpufreq_frequency_table **table) +{ + return -EINVAL; +} +#endif /* CONFIG_CPU_FREQ */ + +#endif /* __LINUX_OPP_H__ */ -- cgit v1.2.3-18-g5258 From 8e602ce2980fd6941dc0d3dda12e5095e8206f34 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 14 Oct 2010 05:56:18 +0000 Subject: netns: reorder fields in struct net In a network bench, I noticed an unfortunate false sharing between 'loopback_dev' and 'count' fields in "struct net". 'count' is written each time a socket is created or destroyed, while loopback_dev might be often read in routing code. Move loopback_dev in a read mostly section of "struct net" Note: struct netns_xfrm is cache line aligned on SMP. (It contains a "struct dst_ops") Move it at the end to avoid holes, and reduce sizeof(struct net) by 128 bytes on ia32. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/net_namespace.h | 17 ++++++++++------- include/net/netns/xfrm.h | 9 +++++---- 2 files changed, 15 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index bd10a790899..65af9a07cf7 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -41,6 +41,8 @@ struct net { * destroy on demand */ #endif + spinlock_t rules_mod_lock; + struct list_head list; /* list of network namespaces */ struct list_head cleanup_list; /* namespaces on death row */ struct list_head exit_list; /* Use only net_mutex */ @@ -52,7 +54,8 @@ struct net { struct ctl_table_set sysctls; #endif - struct net_device *loopback_dev; /* The loopback */ + struct sock *rtnl; /* rtnetlink socket */ + struct sock *genl_sock; struct list_head dev_base_head; struct hlist_head *dev_name_head; @@ -60,11 +63,9 @@ struct net { /* core fib_rules */ struct list_head rules_ops; - spinlock_t rules_mod_lock; - struct sock *rtnl; /* rtnetlink socket */ - struct sock *genl_sock; + struct net_device *loopback_dev; /* The loopback */ struct netns_core core; struct netns_mib mib; struct netns_packet packet; @@ -84,13 +85,15 @@ struct net { struct sock *nfnl; struct sock *nfnl_stash; #endif -#ifdef CONFIG_XFRM - struct netns_xfrm xfrm; -#endif #ifdef CONFIG_WEXT_CORE struct sk_buff_head wext_nlevents; #endif struct net_generic *gen; + + /* Note : following structs are cache line aligned */ +#ifdef CONFIG_XFRM + struct netns_xfrm xfrm; +#endif }; diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index 74f119a2829..748f91f87cd 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -43,10 +43,6 @@ struct netns_xfrm { unsigned int policy_count[XFRM_POLICY_MAX * 2]; struct work_struct policy_hash_work; - struct dst_ops xfrm4_dst_ops; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - struct dst_ops xfrm6_dst_ops; -#endif struct sock *nlsk; struct sock *nlsk_stash; @@ -58,6 +54,11 @@ struct netns_xfrm { #ifdef CONFIG_SYSCTL struct ctl_table_header *sysctl_hdr; #endif + + struct dst_ops xfrm4_dst_ops; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + struct dst_ops xfrm6_dst_ops; +#endif }; #endif -- cgit v1.2.3-18-g5258 From 35dbc0e020c6587f78a6c17693beca73aead7b54 Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Mon, 18 Oct 2010 03:09:39 -0400 Subject: asm-generic/io.h: allow people to override individual funcs For the Blackfin port, we can use much of the asm-generic/io.h header, but we still need to declare some of our own versions of functions. Like the __raw_read* and in/out "string" helpers. So let people do this easily for many of these funcs. Signed-off-by: Mike Frysinger Signed-off-by: Arnd Bergmann --- include/asm-generic/io.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'include') diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h index 118601fce92..3577ca11a0b 100644 --- a/include/asm-generic/io.h +++ b/include/asm-generic/io.h @@ -19,7 +19,9 @@ #include #endif +#ifndef mmiowb #define mmiowb() do {} while (0) +#endif /*****************************************************************************/ /* @@ -28,39 +30,51 @@ * differently. On the simple architectures, we just read/write the * memory location directly. */ +#ifndef __raw_readb static inline u8 __raw_readb(const volatile void __iomem *addr) { return *(const volatile u8 __force *) addr; } +#endif +#ifndef __raw_readw static inline u16 __raw_readw(const volatile void __iomem *addr) { return *(const volatile u16 __force *) addr; } +#endif +#ifndef __raw_readl static inline u32 __raw_readl(const volatile void __iomem *addr) { return *(const volatile u32 __force *) addr; } +#endif #define readb __raw_readb #define readw(addr) __le16_to_cpu(__raw_readw(addr)) #define readl(addr) __le32_to_cpu(__raw_readl(addr)) +#ifndef __raw_writeb static inline void __raw_writeb(u8 b, volatile void __iomem *addr) { *(volatile u8 __force *) addr = b; } +#endif +#ifndef __raw_writew static inline void __raw_writew(u16 b, volatile void __iomem *addr) { *(volatile u16 __force *) addr = b; } +#endif +#ifndef __raw_writel static inline void __raw_writel(u32 b, volatile void __iomem *addr) { *(volatile u32 __force *) addr = b; } +#endif #define writeb __raw_writeb #define writew(b,addr) __raw_writew(__cpu_to_le16(b),addr) @@ -122,6 +136,7 @@ static inline void outl(u32 b, unsigned long addr) #define outw_p(x, addr) outw((x), (addr)) #define outl_p(x, addr) outl((x), (addr)) +#ifndef insb static inline void insb(unsigned long addr, void *buffer, int count) { if (count) { @@ -132,7 +147,9 @@ static inline void insb(unsigned long addr, void *buffer, int count) } while (--count); } } +#endif +#ifndef insw static inline void insw(unsigned long addr, void *buffer, int count) { if (count) { @@ -143,7 +160,9 @@ static inline void insw(unsigned long addr, void *buffer, int count) } while (--count); } } +#endif +#ifndef insl static inline void insl(unsigned long addr, void *buffer, int count) { if (count) { @@ -154,7 +173,9 @@ static inline void insl(unsigned long addr, void *buffer, int count) } while (--count); } } +#endif +#ifndef outsb static inline void outsb(unsigned long addr, const void *buffer, int count) { if (count) { @@ -164,7 +185,9 @@ static inline void outsb(unsigned long addr, const void *buffer, int count) } while (--count); } } +#endif +#ifndef outsw static inline void outsw(unsigned long addr, const void *buffer, int count) { if (count) { @@ -174,7 +197,9 @@ static inline void outsw(unsigned long addr, const void *buffer, int count) } while (--count); } } +#endif +#ifndef outsl static inline void outsl(unsigned long addr, const void *buffer, int count) { if (count) { @@ -184,6 +209,7 @@ static inline void outsl(unsigned long addr, const void *buffer, int count) } while (--count); } } +#endif #ifndef CONFIG_GENERIC_IOMAP #define ioread8(addr) readb(addr) -- cgit v1.2.3-18-g5258 From 631dd1a885b6d7e9f6f51b4e5b311c2bb04c323c Mon Sep 17 00:00:00 2001 From: "Justin P. Mattock" Date: Mon, 18 Oct 2010 11:03:14 +0200 Subject: Update broken web addresses in the kernel. The patch below updates broken web addresses in the kernel Signed-off-by: Justin P. Mattock Cc: Maciej W. Rozycki Cc: Geert Uytterhoeven Cc: Finn Thain Cc: Randy Dunlap Cc: Matt Turner Cc: Dimitry Torokhov Cc: Mike Frysinger Acked-by: Ben Pfaff Acked-by: Hans J. Koch Reviewed-by: Finn Thain Signed-off-by: Jiri Kosina --- include/crypto/gf128mul.h | 4 ++-- include/linux/fdreg.h | 2 +- include/linux/if_infiniband.h | 2 +- include/linux/n_r3964.h | 1 - 4 files changed, 4 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/crypto/gf128mul.h b/include/crypto/gf128mul.h index 4086b8ebfaf..da2530e34b2 100644 --- a/include/crypto/gf128mul.h +++ b/include/crypto/gf128mul.h @@ -54,8 +54,8 @@ /* Comment by Rik: * - * For some background on GF(2^128) see for example: http://- - * csrc.nist.gov/CryptoToolkit/modes/proposedmodes/gcm/gcm-revised-spec.pdf + * For some background on GF(2^128) see for example: + * http://csrc.nist.gov/groups/ST/toolkit/BCM/documents/proposedmodes/gcm/gcm-revised-spec.pdf * * The elements of GF(2^128) := GF(2)[X]/(X^128-X^7-X^2-X^1-1) can * be mapped to computer memory in a variety of ways. Let's examine diff --git a/include/linux/fdreg.h b/include/linux/fdreg.h index c2eeb63b72d..61ce6416900 100644 --- a/include/linux/fdreg.h +++ b/include/linux/fdreg.h @@ -89,7 +89,7 @@ /* the following commands are new in the 82078. They are not used in the * floppy driver, except the first three. These commands may be useful for apps * which use the FDRAWCMD interface. For doc, get the 82078 spec sheets at - * http://www-techdoc.intel.com/docs/periph/fd_contr/datasheets/ */ + * http://www.intel.com/design/archives/periphrl/docs/29046803.htm */ #define FD_PARTID 0x18 /* part id ("extended" version cmd) */ #define FD_SAVE 0x2e /* save fdc regs for later restore */ diff --git a/include/linux/if_infiniband.h b/include/linux/if_infiniband.h index 3e659ec7dfd..7d958475d4a 100644 --- a/include/linux/if_infiniband.h +++ b/include/linux/if_infiniband.h @@ -5,7 +5,7 @@ * , or the OpenIB.org BSD * license, available in the LICENSE.TXT file accompanying this * software. These details are also available at - * . + * . * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF diff --git a/include/linux/n_r3964.h b/include/linux/n_r3964.h index de24af79ebd..54b8e0d8d91 100644 --- a/include/linux/n_r3964.h +++ b/include/linux/n_r3964.h @@ -4,7 +4,6 @@ * Copyright by * Philips Automation Projects * Kassel (Germany) - * http://www.pap-philips.de * ----------------------------------------------------------- * This software may be used and distributed according to the terms of * the GNU General Public License, incorporated herein by reference. -- cgit v1.2.3-18-g5258 From 43f974cdb4ab6d65f849610deb9ef738d62b2e65 Mon Sep 17 00:00:00 2001 From: Nick Bowler Date: Mon, 18 Oct 2010 11:22:05 +0200 Subject: netfilter: install missing ebtables headers for userspace The ebt_ip6.h and ebt_nflog.h headers are not not known to Kbuild and therefore not installed by make headers_install. Fix that up. Signed-off-by: Nick Bowler Signed-off-by: Patrick McHardy --- include/linux/netfilter_bridge/Kbuild | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/netfilter_bridge/Kbuild b/include/linux/netfilter_bridge/Kbuild index d4d78672873..e48f1a3f5a4 100644 --- a/include/linux/netfilter_bridge/Kbuild +++ b/include/linux/netfilter_bridge/Kbuild @@ -3,11 +3,13 @@ header-y += ebt_among.h header-y += ebt_arp.h header-y += ebt_arpreply.h header-y += ebt_ip.h +header-y += ebt_ip6.h header-y += ebt_limit.h header-y += ebt_log.h header-y += ebt_mark_m.h header-y += ebt_mark_t.h header-y += ebt_nat.h +header-y += ebt_nflog.h header-y += ebt_pkttype.h header-y += ebt_redirect.h header-y += ebt_stp.h -- cgit v1.2.3-18-g5258 From f1f8c6cbe6f08f93ac2a4ca19625891d8a82b7f8 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Mon, 18 Oct 2010 15:00:18 +0200 Subject: can: mcp251x: Don't use pdata->model for chip selection anymore Since commit e446630c960946b5c1762e4eadb618becef599e7, i.e. v2.6.35-rc1, the mcp251x chip model can be selected via the modalias member in the struct spi_board_info. The driver stores the actual model in the struct mcp251x_platform_data. From the driver point of view the platform_data should be read only. Since all in-tree users of the mcp251x have already been converted to the modalias method, this patch moves the "model" member from the struct mcp251x_platform_data to the driver's private data structure. Signed-off-by: Marc Kleine-Budde Cc: Christian Pellegrin Cc: Marc Zyngier --- include/linux/can/platform/mcp251x.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/can/platform/mcp251x.h b/include/linux/can/platform/mcp251x.h index dba28268e65..8e20540043f 100644 --- a/include/linux/can/platform/mcp251x.h +++ b/include/linux/can/platform/mcp251x.h @@ -12,7 +12,6 @@ /** * struct mcp251x_platform_data - MCP251X SPI CAN controller platform data * @oscillator_frequency: - oscillator frequency in Hz - * @model: - actual type of chip * @board_specific_setup: - called before probing the chip (power,reset) * @transceiver_enable: - called to power on/off the transceiver * @power_enable: - called to power on/off the mcp *and* the @@ -25,9 +24,6 @@ struct mcp251x_platform_data { unsigned long oscillator_frequency; - int model; -#define CAN_MCP251X_MCP2510 0x2510 -#define CAN_MCP251X_MCP2515 0x2515 int (*board_specific_setup)(struct spi_device *spi); int (*transceiver_enable)(int enable); int (*power_enable) (int enable); -- cgit v1.2.3-18-g5258 From c2355e1ab910278a94d487b78590ee3c8eecd08a Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Wed, 13 Oct 2010 16:01:49 +0000 Subject: bonding: Fix bonding drivers improper modification of netpoll structure The bonding driver currently modifies the netpoll structure in its xmit path while sending frames from netpoll. This is racy, as other cpus can access the netpoll structure in parallel. Since the bonding driver points np->dev to a slave device, other cpus can inadvertently attempt to send data directly to slave devices, leading to improper locking with the bonding master, lost frames, and deadlocks. This patch fixes that up. This patch also removes the real_dev pointer from the netpoll structure as that data is really only used by bonding in the poll_controller, and we can emulate its behavior by check each slave for IS_UP. Signed-off-by: Neil Horman Signed-off-by: David S. Miller --- include/linux/netpoll.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h index 50d8009be86..79358bb712c 100644 --- a/include/linux/netpoll.h +++ b/include/linux/netpoll.h @@ -14,7 +14,6 @@ struct netpoll { struct net_device *dev; - struct net_device *real_dev; char dev_name[IFNAMSIZ]; const char *name; void (*rx_hook)(struct netpoll *, int, char *, int); @@ -53,7 +52,13 @@ void netpoll_set_trap(int trap); void __netpoll_cleanup(struct netpoll *np); void netpoll_cleanup(struct netpoll *np); int __netpoll_rx(struct sk_buff *skb); -void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb); +void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, + struct net_device *dev); +static inline void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) +{ + netpoll_send_skb_on_dev(np, skb, np->dev); +} + #ifdef CONFIG_NETPOLL -- cgit v1.2.3-18-g5258 From fc58d12be416eb51932eec594667ca3181903b9e Mon Sep 17 00:00:00 2001 From: Dmitry Eremin-Solenikov Date: Mon, 18 Oct 2010 09:18:13 -0700 Subject: Input: serio - add support for PS2Mult multiplexer protocol PS2Mult is a simple serial protocol used for multiplexing several PS/2 streams into one serial data stream. It's used e.g. on TQM85xx series of boards. Signed-off-by: Dmitry Eremin-Solenikov Signed-off-by: Dmitry Torokhov --- include/linux/serio.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/serio.h b/include/linux/serio.h index 109b237603b..e26f4788845 100644 --- a/include/linux/serio.h +++ b/include/linux/serio.h @@ -198,5 +198,6 @@ static inline void serio_continue_rx(struct serio *serio) #define SERIO_W8001 0x39 #define SERIO_DYNAPRO 0x3a #define SERIO_HAMPSHIRE 0x3b +#define SERIO_PS2MULT 0x3c #endif -- cgit v1.2.3-18-g5258 From 620162505e5d46bc4494b1761743e4b0b3bf8e16 Mon Sep 17 00:00:00 2001 From: Hitoshi Mitake Date: Tue, 5 Oct 2010 18:01:51 +0900 Subject: lockdep: Add improved subclass caching Current lockdep_map only caches one class with subclass == 0, and looks up hash table of classes when subclass != 0. It seems that this has no problem because the case of subclass != 0 is rare. But locks of struct rq are acquired with subclass == 1 when task migration is executed. Task migration is high frequent event, so I modified lockdep to cache subclasses. I measured the score of perf bench sched messaging. This patch has slightly but certain (order of milli seconds or 10 milli seconds) effect when lots of tasks are running. I'll show the result in the tail of this description. NR_LOCKDEP_CACHING_CLASSES specifies how many classes can be cached in the instances of lockdep_map. I discussed with Peter Zijlstra in LinuxCon Japan about this approach and he taught me that caching every subclasses(8) is cleary waste of memory. So number of cached classes should be configurable. === Score comparison of benchmarks === # "min" means best score, and "max" means worst score for i in `seq 1 10`; do ./perf bench -f simple sched messaging; done before: min: 0.565000, max: 0.583000, avg: 0.572500 after: min: 0.559000, max: 0.568000, avg: 0.563300 # with more processes for i in `seq 1 10`; do ./perf bench -f simple sched messaging -g 40; done before: min: 2.274000, max: 2.298000, avg: 2.286300 after: min: 2.242000, max: 2.270000, avg: 2.259700 Signed-off-by: Hitoshi Mitake Cc: Frederic Weisbecker Signed-off-by: Peter Zijlstra LKML-Reference: <1286269311-28336-2-git-send-email-mitake@dcl.info.waseda.ac.jp> Signed-off-by: Ingo Molnar --- include/linux/lockdep.h | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 06aed8305bf..2186a64ee4b 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -31,6 +31,17 @@ extern int lock_stat; #define MAX_LOCKDEP_SUBCLASSES 8UL +/* + * NR_LOCKDEP_CACHING_CLASSES ... Number of classes + * cached in the instance of lockdep_map + * + * Currently main class (subclass == 0) and signle depth subclass + * are cached in lockdep_map. This optimization is mainly targeting + * on rq->lock. double_rq_lock() acquires this highly competitive with + * single depth. + */ +#define NR_LOCKDEP_CACHING_CLASSES 2 + /* * Lock-classes are keyed via unique addresses, by embedding the * lockclass-key into the kernel (or module) .data section. (For @@ -138,7 +149,7 @@ void clear_lock_stats(struct lock_class *class); */ struct lockdep_map { struct lock_class_key *key; - struct lock_class *class_cache; + struct lock_class *class_cache[NR_LOCKDEP_CACHING_CLASSES]; const char *name; #ifdef CONFIG_LOCK_STAT int cpu; -- cgit v1.2.3-18-g5258 From e360adbe29241a0194e10e20595360dd7b98a2b3 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 14 Oct 2010 14:01:34 +0800 Subject: irq_work: Add generic hardirq context callbacks Provide a mechanism that allows running code in IRQ context. It is most useful for NMI code that needs to interact with the rest of the system -- like wakeup a task to drain buffers. Perf currently has such a mechanism, so extract that and provide it as a generic feature, independent of perf so that others may also benefit. The IRQ context callback is generated through self-IPIs where possible, or on architectures like powerpc the decrementer (the built-in timer facility) is set to generate an interrupt immediately. Architectures that don't have anything like this get to do with a callback from the timer tick. These architectures can call irq_work_run() at the tail of any IRQ handlers that might enqueue such work (like the perf IRQ handler) to avoid undue latencies in processing the work. Signed-off-by: Peter Zijlstra Acked-by: Kyle McMartin Acked-by: Martin Schwidefsky [ various fixes ] Signed-off-by: Huang Ying LKML-Reference: <1287036094.7768.291.camel@yhuang-dev> Signed-off-by: Ingo Molnar --- include/linux/irq_work.h | 20 ++++++++++++++++++++ include/linux/perf_event.h | 11 ++--------- 2 files changed, 22 insertions(+), 9 deletions(-) create mode 100644 include/linux/irq_work.h (limited to 'include') diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h new file mode 100644 index 00000000000..4fa09d4d0b7 --- /dev/null +++ b/include/linux/irq_work.h @@ -0,0 +1,20 @@ +#ifndef _LINUX_IRQ_WORK_H +#define _LINUX_IRQ_WORK_H + +struct irq_work { + struct irq_work *next; + void (*func)(struct irq_work *); +}; + +static inline +void init_irq_work(struct irq_work *entry, void (*func)(struct irq_work *)) +{ + entry->next = NULL; + entry->func = func; +} + +bool irq_work_queue(struct irq_work *entry); +void irq_work_run(void); +void irq_work_sync(struct irq_work *entry); + +#endif /* _LINUX_IRQ_WORK_H */ diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index a9227e98520..2ebfc9ae475 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -486,6 +486,7 @@ struct perf_guest_info_callbacks { #include #include #include +#include #include #include @@ -672,11 +673,6 @@ struct perf_buffer { void *data_pages[0]; }; -struct perf_pending_entry { - struct perf_pending_entry *next; - void (*func)(struct perf_pending_entry *); -}; - struct perf_sample_data; typedef void (*perf_overflow_handler_t)(struct perf_event *, int, @@ -784,7 +780,7 @@ struct perf_event { int pending_wakeup; int pending_kill; int pending_disable; - struct perf_pending_entry pending; + struct irq_work pending; atomic_t event_limit; @@ -898,8 +894,6 @@ extern int perf_event_init_task(struct task_struct *child); extern void perf_event_exit_task(struct task_struct *child); extern void perf_event_free_task(struct task_struct *task); extern void perf_event_delayed_put(struct task_struct *task); -extern void set_perf_event_pending(void); -extern void perf_event_do_pending(void); extern void perf_event_print_debug(void); extern void perf_pmu_disable(struct pmu *pmu); extern void perf_pmu_enable(struct pmu *pmu); @@ -1078,7 +1072,6 @@ static inline int perf_event_init_task(struct task_struct *child) { return 0; } static inline void perf_event_exit_task(struct task_struct *child) { } static inline void perf_event_free_task(struct task_struct *task) { } static inline void perf_event_delayed_put(struct task_struct *task) { } -static inline void perf_event_do_pending(void) { } static inline void perf_event_print_debug(void) { } static inline int perf_event_task_disable(void) { return -EINVAL; } static inline int perf_event_task_enable(void) { return -EINVAL; } -- cgit v1.2.3-18-g5258 From d580ff8699e8811a9af37e9de4dea375401bdeec Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 14 Oct 2010 17:43:23 +0200 Subject: perf, hw_breakpoint: Fix crash in hw_breakpoint creation hw_breakpoint creation needs to account stuff per-task to ensure there is always sufficient hardware resources to back these things due to ptrace. With the perf per pmu context changes the event initialization no longer has access to the event context, for the simple reason that we need to first find the pmu (result of initialization) before we can find the context. This makes hw_breakpoints unhappy, because it can no longer do per task accounting, cure this by frobbing a task pointer in the event::hw bits for now... Signed-off-by: Peter Zijlstra Cc: Frederic Weisbecker LKML-Reference: <20101014203625.391543667@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 2ebfc9ae475..97965fac55f 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -536,6 +536,12 @@ struct hw_perf_event { struct { /* breakpoint */ struct arch_hw_breakpoint info; struct list_head bp_list; + /* + * Crufty hack to avoid the chicken and egg + * problem hw_breakpoint has with context + * creation and event initalization. + */ + struct task_struct *bp_target; }; #endif }; @@ -693,6 +699,7 @@ struct swevent_hlist { #define PERF_ATTACH_CONTEXT 0x01 #define PERF_ATTACH_GROUP 0x02 +#define PERF_ATTACH_TASK 0x04 /** * struct perf_event - performance event kernel representation: -- cgit v1.2.3-18-g5258 From 3b6e901f839f42afb40f614418df82c08b01320a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 14 Oct 2010 21:10:38 +0200 Subject: jump_label: Use more consistent naming Now that there's still only a few users around, rename things to make them more consistent. Signed-off-by: Peter Zijlstra LKML-Reference: <20101014203625.448565169@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/jump_label.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index b72cd9f92c2..81be4962b7a 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -25,10 +25,10 @@ extern void jump_label_update(unsigned long key, enum jump_label_type type); extern void jump_label_apply_nops(struct module *mod); extern int jump_label_text_reserved(void *start, void *end); -#define enable_jump_label(key) \ +#define jump_label_enable(key) \ jump_label_update((unsigned long)key, JUMP_LABEL_ENABLE); -#define disable_jump_label(key) \ +#define jump_label_disable(key) \ jump_label_update((unsigned long)key, JUMP_LABEL_DISABLE); #else @@ -39,12 +39,12 @@ do { \ goto label; \ } while (0) -#define enable_jump_label(cond_var) \ +#define jump_label_enable(cond_var) \ do { \ *(cond_var) = 1; \ } while (0) -#define disable_jump_label(cond_var) \ +#define jump_label_disable(cond_var) \ do { \ *(cond_var) = 0; \ } while (0) -- cgit v1.2.3-18-g5258 From 8b92538d84e50062560ba33adbaed7887b6e4a42 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 14 Oct 2010 21:39:02 +0200 Subject: jump_label: Add atomic_t interface Add an interface to allow usage of jump_labels with atomic counters. Signed-off-by: Peter Zijlstra Acked-by: Frederic Weisbecker LKML-Reference: <20101014203625.501657727@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/jump_label_ref.h | 44 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 include/linux/jump_label_ref.h (limited to 'include') diff --git a/include/linux/jump_label_ref.h b/include/linux/jump_label_ref.h new file mode 100644 index 00000000000..e5d012ad92c --- /dev/null +++ b/include/linux/jump_label_ref.h @@ -0,0 +1,44 @@ +#ifndef _LINUX_JUMP_LABEL_REF_H +#define _LINUX_JUMP_LABEL_REF_H + +#include +#include + +#ifdef HAVE_JUMP_LABEL + +static inline void jump_label_inc(atomic_t *key) +{ + if (atomic_add_return(1, key) == 1) + jump_label_enable(key); +} + +static inline void jump_label_dec(atomic_t *key) +{ + if (atomic_dec_and_test(key)) + jump_label_disable(key); +} + +#else /* !HAVE_JUMP_LABEL */ + +static inline void jump_label_inc(atomic_t *key) +{ + atomic_inc(key); +} + +static inline void jump_label_dec(atomic_t *key) +{ + atomic_dec(key); +} + +#undef JUMP_LABEL +#define JUMP_LABEL(key, label) \ +do { \ + if (unlikely(__builtin_choose_expr( \ + __builtin_types_compatible_p(typeof(key), atomic_t *), \ + atomic_read((atomic_t *)(key)), *(key)))) \ + goto label; \ +} while (0) + +#endif /* HAVE_JUMP_LABEL */ + +#endif /* _LINUX_JUMP_LABEL_REF_H */ -- cgit v1.2.3-18-g5258 From 82cd6def9806dcb6a325fb6abbc1d61388a15f6a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 14 Oct 2010 17:57:23 +0200 Subject: perf: Use jump_labels to optimize the scheduler hooks Trades a call + conditional + ret for an unconditional jmp. Acked-by: Frederic Weisbecker Signed-off-by: Peter Zijlstra LKML-Reference: <20101014203625.501657727@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 97965fac55f..7f0e7f52af8 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -487,6 +487,7 @@ struct perf_guest_info_callbacks { #include #include #include +#include #include #include @@ -895,8 +896,30 @@ extern void perf_pmu_unregister(struct pmu *pmu); extern int perf_num_counters(void); extern const char *perf_pmu_name(void); -extern void perf_event_task_sched_in(struct task_struct *task); -extern void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next); +extern void __perf_event_task_sched_in(struct task_struct *task); +extern void __perf_event_task_sched_out(struct task_struct *task, struct task_struct *next); + +extern atomic_t perf_task_events; + +static inline void perf_event_task_sched_in(struct task_struct *task) +{ + JUMP_LABEL(&perf_task_events, have_events); + return; + +have_events: + __perf_event_task_sched_in(task); +} + +static inline +void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next) +{ + JUMP_LABEL(&perf_task_events, have_events); + return; + +have_events: + __perf_event_task_sched_out(task, next); +} + extern int perf_event_init_task(struct task_struct *child); extern void perf_event_exit_task(struct task_struct *child); extern void perf_event_free_task(struct task_struct *task); -- cgit v1.2.3-18-g5258 From 7e54a5a0b655734326dc78c2b5efc1eb35497bb6 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 14 Oct 2010 22:32:45 +0200 Subject: perf: Optimize sw events Acked-by: Frederic Weisbecker Signed-off-by: Peter Zijlstra LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 7f0e7f52af8..3b80cbf509e 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1012,18 +1012,20 @@ static inline void perf_fetch_caller_regs(struct pt_regs *regs) perf_arch_fetch_caller_regs(regs, CALLER_ADDR0); } -static inline void +static __always_inline void perf_sw_event(u32 event_id, u64 nr, int nmi, struct pt_regs *regs, u64 addr) { - if (atomic_read(&perf_swevent_enabled[event_id])) { - struct pt_regs hot_regs; - - if (!regs) { - perf_fetch_caller_regs(&hot_regs); - regs = &hot_regs; - } - __perf_sw_event(event_id, nr, nmi, regs, addr); + struct pt_regs hot_regs; + + JUMP_LABEL(&perf_swevent_enabled[event_id], have_event); + return; + +have_event: + if (!regs) { + perf_fetch_caller_regs(&hot_regs); + regs = &hot_regs; } + __perf_sw_event(event_id, nr, nmi, regs, addr); } extern void perf_event_mmap(struct vm_area_struct *vma); -- cgit v1.2.3-18-g5258 From ebf31f502492527e2b6b5e5cf85a4ebc7fc8a52e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sun, 17 Oct 2010 12:15:00 +0200 Subject: jump_label: Add COND_STMT(), reducer wrappery The use of the JUMP_LABEL() construct ends up creating endless silly wrappers, create a higher level construct to reduce this clutter. Signed-off-by: Peter Zijlstra Cc: Jason Baron Cc: Steven Rostedt Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Paul Mackerras LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/jump_label.h | 10 ++++++++++ include/linux/perf_event.h | 12 ++---------- 2 files changed, 12 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index 81be4962b7a..b67cb180e6e 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -61,4 +61,14 @@ static inline int jump_label_text_reserved(void *start, void *end) #endif +#define COND_STMT(key, stmt) \ +do { \ + __label__ jl_enabled; \ + JUMP_LABEL(key, jl_enabled); \ + if (0) { \ +jl_enabled: \ + stmt; \ + } \ +} while (0) + #endif diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 3b80cbf509e..057bf22a832 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -903,21 +903,13 @@ extern atomic_t perf_task_events; static inline void perf_event_task_sched_in(struct task_struct *task) { - JUMP_LABEL(&perf_task_events, have_events); - return; - -have_events: - __perf_event_task_sched_in(task); + COND_STMT(&perf_task_events, __perf_event_task_sched_in(task)); } static inline void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next) { - JUMP_LABEL(&perf_task_events, have_events); - return; - -have_events: - __perf_event_task_sched_out(task, next); + COND_STMT(&perf_task_events, __perf_event_task_sched_out(task, next)); } extern int perf_event_init_task(struct task_struct *child); -- cgit v1.2.3-18-g5258 From 75e1056f5c57050415b64cb761a3acc35d91f013 Mon Sep 17 00:00:00 2001 From: Venkatesh Pallipadi Date: Mon, 4 Oct 2010 17:03:16 -0700 Subject: sched: Fix softirq time accounting Peter Zijlstra found a bug in the way softirq time is accounted in VIRT_CPU_ACCOUNTING on this thread: http://lkml.indiana.edu/hypermail//linux/kernel/1009.2/01366.html The problem is, softirq processing uses local_bh_disable internally. There is no way, later in the flow, to differentiate between whether softirq is being processed or is it just that bh has been disabled. So, a hardirq when bh is disabled results in time being wrongly accounted as softirq. Looking at the code a bit more, the problem exists in !VIRT_CPU_ACCOUNTING as well. As account_system_time() in normal tick based accouting also uses softirq_count, which will be set even when not in softirq with bh disabled. Peter also suggested solution of using 2*SOFTIRQ_OFFSET as irq count for local_bh_{disable,enable} and using just SOFTIRQ_OFFSET while softirq processing. The patch below does that and adds API in_serving_softirq() which returns whether we are currently processing softirq or not. Also changes one of the usages of softirq_count in net/sched/cls_cgroup.c to in_serving_softirq. Looks like many usages of in_softirq really want in_serving_softirq. Those changes can be made individually on a case by case basis. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Peter Zijlstra LKML-Reference: <1286237003-12406-2-git-send-email-venki@google.com> Signed-off-by: Ingo Molnar --- include/linux/hardirq.h | 5 +++++ include/linux/sched.h | 6 +++--- 2 files changed, 8 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index d5b387669da..e37a77cbd58 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -64,6 +64,8 @@ #define HARDIRQ_OFFSET (1UL << HARDIRQ_SHIFT) #define NMI_OFFSET (1UL << NMI_SHIFT) +#define SOFTIRQ_DISABLE_OFFSET (2 * SOFTIRQ_OFFSET) + #ifndef PREEMPT_ACTIVE #define PREEMPT_ACTIVE_BITS 1 #define PREEMPT_ACTIVE_SHIFT (NMI_SHIFT + NMI_BITS) @@ -82,10 +84,13 @@ /* * Are we doing bottom half or hardware interrupt processing? * Are we in a softirq context? Interrupt context? + * in_softirq - Are we currently processing softirq or have bh disabled? + * in_serving_softirq - Are we currently processing softirq? */ #define in_irq() (hardirq_count()) #define in_softirq() (softirq_count()) #define in_interrupt() (irq_count()) +#define in_serving_softirq() (softirq_count() & SOFTIRQ_OFFSET) /* * Are we in NMI context? diff --git a/include/linux/sched.h b/include/linux/sched.h index cdf56693ecb..8744e50cb08 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2366,9 +2366,9 @@ extern int __cond_resched_lock(spinlock_t *lock); extern int __cond_resched_softirq(void); -#define cond_resched_softirq() ({ \ - __might_sleep(__FILE__, __LINE__, SOFTIRQ_OFFSET); \ - __cond_resched_softirq(); \ +#define cond_resched_softirq() ({ \ + __might_sleep(__FILE__, __LINE__, SOFTIRQ_DISABLE_OFFSET); \ + __cond_resched_softirq(); \ }) /* -- cgit v1.2.3-18-g5258 From e1e10a265d28273ab8c70be19d43dcbdeead6c5a Mon Sep 17 00:00:00 2001 From: Venkatesh Pallipadi Date: Mon, 4 Oct 2010 17:03:17 -0700 Subject: sched: Consolidate account_system_vtime extern declaration Just a minor cleanup patch that makes things easier to the following patches. No functionality change in this patch. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Peter Zijlstra LKML-Reference: <1286237003-12406-3-git-send-email-venki@google.com> Signed-off-by: Ingo Molnar --- include/linux/hardirq.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index e37a77cbd58..41367c5c3c6 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -141,6 +141,8 @@ struct task_struct; static inline void account_system_vtime(struct task_struct *tsk) { } +#else +extern void account_system_vtime(struct task_struct *tsk); #endif #if defined(CONFIG_NO_HZ) -- cgit v1.2.3-18-g5258 From 6cdd5199daf0cb7b0fcc8dca941af08492612887 Mon Sep 17 00:00:00 2001 From: Venkatesh Pallipadi Date: Mon, 4 Oct 2010 17:03:18 -0700 Subject: sched: Add a PF flag for ksoftirqd identification To account softirq time cleanly in scheduler, we need to identify whether softirq is invoked in ksoftirqd context or softirq at hardirq tail context. Add PF_KSOFTIRQD for that purpose. As all PF flag bits are currently taken, create space by moving one of the infrequently used bits (PF_THREAD_BOUND) down in task_struct to be along with some other state fields. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Peter Zijlstra LKML-Reference: <1286237003-12406-4-git-send-email-venki@google.com> Signed-off-by: Ingo Molnar --- include/linux/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 8744e50cb08..aca0ce67593 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1682,6 +1682,7 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t * /* * Per process flags */ +#define PF_KSOFTIRQD 0x00000001 /* I am ksoftirqd */ #define PF_STARTING 0x00000002 /* being created */ #define PF_EXITING 0x00000004 /* getting shut down */ #define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */ -- cgit v1.2.3-18-g5258 From b52bfee445d315549d41eacf2fa7c156e7d153d5 Mon Sep 17 00:00:00 2001 From: Venkatesh Pallipadi Date: Mon, 4 Oct 2010 17:03:19 -0700 Subject: sched: Add IRQ_TIME_ACCOUNTING, finer accounting of irq time s390/powerpc/ia64 have support for CONFIG_VIRT_CPU_ACCOUNTING which does the fine granularity accounting of user, system, hardirq, softirq times. Adding that option on archs like x86 will be challenging however, given the state of TSC reliability on various platforms and also the overhead it will add in syscall entry exit. Instead, add a lighter variant that only does finer accounting of hardirq and softirq times, providing precise irq times (instead of timer tick based samples). This accounting is added with a new config option CONFIG_IRQ_TIME_ACCOUNTING so that there won't be any overhead for users not interested in paying the perf penalty. This accounting is based on sched_clock, with the code being generic. So, other archs may find it useful as well. This patch just adds the core logic and does not enable this logic yet. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Peter Zijlstra LKML-Reference: <1286237003-12406-5-git-send-email-venki@google.com> Signed-off-by: Ingo Molnar --- include/linux/hardirq.h | 2 +- include/linux/sched.h | 13 +++++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index 41367c5c3c6..ff43e926844 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -137,7 +137,7 @@ extern void synchronize_irq(unsigned int irq); struct task_struct; -#ifndef CONFIG_VIRT_CPU_ACCOUNTING +#if !defined(CONFIG_VIRT_CPU_ACCOUNTING) && !defined(CONFIG_IRQ_TIME_ACCOUNTING) static inline void account_system_vtime(struct task_struct *tsk) { } diff --git a/include/linux/sched.h b/include/linux/sched.h index aca0ce67593..2cca9a92f5e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1826,6 +1826,19 @@ extern void sched_clock_idle_sleep_event(void); extern void sched_clock_idle_wakeup_event(u64 delta_ns); #endif +#ifdef CONFIG_IRQ_TIME_ACCOUNTING +/* + * An i/f to runtime opt-in for irq time accounting based off of sched_clock. + * The reason for this explicit opt-in is not to have perf penalty with + * slow sched_clocks. + */ +extern void enable_sched_clock_irqtime(void); +extern void disable_sched_clock_irqtime(void); +#else +static inline void enable_sched_clock_irqtime(void) {} +static inline void disable_sched_clock_irqtime(void) {} +#endif + extern unsigned long long task_sched_runtime(struct task_struct *task); extern unsigned long long thread_group_sched_runtime(struct task_struct *task); -- cgit v1.2.3-18-g5258 From 7681bfeeccff5efa9eb29bf09249a3c400b15327 Mon Sep 17 00:00:00 2001 From: Yasuaki Ishimatsu Date: Tue, 19 Oct 2010 09:05:00 +0200 Subject: block: fix accounting bug on cross partition merges /proc/diskstats would display a strange output as follows. $ cat /proc/diskstats |grep sda 8 0 sda 90524 7579 102154 20464 0 0 0 0 0 14096 20089 8 1 sda1 19085 1352 21841 4209 0 0 0 0 4294967064 15689 4293424691 ~~~~~~~~~~ 8 2 sda2 71252 3624 74891 15950 0 0 0 0 232 23995 1562390 8 3 sda3 54 487 2188 92 0 0 0 0 0 88 92 8 4 sda4 4 0 8 0 0 0 0 0 0 0 0 8 5 sda5 81 2027 2130 138 0 0 0 0 0 87 137 Its reason is the wrong way of accounting hd_struct->in_flight. When a bio is merged into a request belongs to different partition by ELEVATOR_FRONT_MERGE. The detailed root cause is as follows. Assuming that there are two partition, sda1 and sda2. 1. A request for sda2 is in request_queue. Hence sda1's hd_struct->in_flight is 0 and sda2's one is 1. | hd_struct->in_flight --------------------------- sda1 | 0 sda2 | 1 --------------------------- 2. A bio belongs to sda1 is issued and is merged into the request mentioned on step1 by ELEVATOR_BACK_MERGE. The first sector of the request is changed from sda2 region to sda1 region. However the two partition's hd_struct->in_flight are not changed. | hd_struct->in_flight --------------------------- sda1 | 0 sda2 | 1 --------------------------- 3. The request is finished and blk_account_io_done() is called. In this case, sda2's hd_struct->in_flight, not a sda1's one, is decremented. | hd_struct->in_flight --------------------------- sda1 | -1 sda2 | 1 --------------------------- The patch fixes the problem by caching the partition lookup inside the request structure, hence making sure that the increment and decrement will always happen on the same partition struct. This also speeds up IO with accounting enabled, since it cuts down on the number of lookups we have to do. When reloading partition tables, quiesce IO to ensure that no request references to the partition struct exists. When it is safe to free the partition table, the IO for that device is restarted again. Signed-off-by: Yasuaki Ishimatsu Cc: stable@kernel.org Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 + include/linux/elevator.h | 2 ++ include/linux/genhd.h | 1 + 3 files changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 8f3dd981b97..16f7f1be1ac 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -115,6 +115,7 @@ struct request { void *elevator_private3; struct gendisk *rq_disk; + struct hd_struct *part; unsigned long start_time; #ifdef CONFIG_BLK_CGROUP unsigned long long start_time_ns; diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 2c958f4fce1..df1ee866d71 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -121,6 +121,8 @@ extern void elv_completed_request(struct request_queue *, struct request *); extern int elv_set_request(struct request_queue *, struct request *, gfp_t); extern void elv_put_request(struct request_queue *, struct request *); extern void elv_drain_elevator(struct request_queue *); +extern void elv_quiesce_start(struct request_queue *); +extern void elv_quiesce_end(struct request_queue *); /* * io scheduler registration diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 66e26b5a153..57647ecfc1b 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -140,6 +140,7 @@ struct disk_part_tbl { struct rcu_head rcu_head; int len; struct hd_struct *last_lookup; + struct gendisk *disk; struct hd_struct *part[]; }; -- cgit v1.2.3-18-g5258 From 6362beea8914cbd4630ccde3617d944aeca2d48f Mon Sep 17 00:00:00 2001 From: Mike Miller Date: Tue, 19 Oct 2010 09:40:34 +0200 Subject: cciss: fix PCI IDs for new Smart Array controllers cciss: fix PCI IDs for new controllers This patch fixes the botched up PCI IDs of new controllers. Please consider this patch for inclusion. Signed-off-by: Mike Miller Signed-off-by: Jens Axboe --- include/linux/pci_ids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index f6a3b2d36ca..fe7d72ce33a 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -739,6 +739,7 @@ #define PCI_DEVICE_ID_HP_CISSC 0x3230 #define PCI_DEVICE_ID_HP_CISSD 0x3238 #define PCI_DEVICE_ID_HP_CISSE 0x323a +#define PCI_DEVICE_ID_HP_CISSF 0x323b #define PCI_DEVICE_ID_HP_ZX2_IOC 0x4031 #define PCI_VENDOR_ID_PCTECH 0x1042 -- cgit v1.2.3-18-g5258 From ebbf41df4aabb6d506fa18ea8cb4c2b4388a18b9 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 19 Oct 2010 10:19:06 +0200 Subject: netfilter: ctnetlink: add expectation deletion events This patch allows to listen to events that inform about expectations destroyed. Signed-off-by: Pablo Neira Ayuso Signed-off-by: Patrick McHardy --- include/linux/netfilter/nf_conntrack_common.h | 1 + include/net/netfilter/nf_conntrack_expect.h | 8 +++++++- 2 files changed, 8 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h index 23a1a08578a..50cdc2559a5 100644 --- a/include/linux/netfilter/nf_conntrack_common.h +++ b/include/linux/netfilter/nf_conntrack_common.h @@ -98,6 +98,7 @@ enum ip_conntrack_events { enum ip_conntrack_expect_events { IPEXP_NEW, /* new expectation */ + IPEXP_DESTROY, /* destroyed expectation */ }; /* expectation flags */ diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h index 416b8384448..0f8a8c58753 100644 --- a/include/net/netfilter/nf_conntrack_expect.h +++ b/include/net/netfilter/nf_conntrack_expect.h @@ -82,7 +82,13 @@ struct nf_conntrack_expect * nf_ct_find_expectation(struct net *net, u16 zone, const struct nf_conntrack_tuple *tuple); -void nf_ct_unlink_expect(struct nf_conntrack_expect *exp); +void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp, + u32 pid, int report); +static inline void nf_ct_unlink_expect(struct nf_conntrack_expect *exp) +{ + nf_ct_unlink_expect_report(exp, 0, 0); +} + void nf_ct_remove_expectations(struct nf_conn *ct); void nf_ct_unexpect_related(struct nf_conntrack_expect *exp); void nf_ct_remove_userspace_expectations(void); -- cgit v1.2.3-18-g5258 From 714f095f74582764d629785f03b459a3d0503624 Mon Sep 17 00:00:00 2001 From: Hans Schillstrom Date: Tue, 19 Oct 2010 10:38:48 +0200 Subject: ipvs: IPv6 tunnel mode IPv6 encapsulation uses a bad source address for the tunnel. i.e. VIP will be used as local-addr and encap. dst addr. Decapsulation will not accept this. Example LVS (eth1 2003::2:0:1/96, VIP 2003::2:0:100) (eth0 2003::1:0:1/96) RS (ethX 2003::1:0:5/96) tcpdump 2003::2:0:100 > 2003::1:0:5: IP6 (hlim 63, next-header TCP (6) payload length: 40) 2003::3:0:10.50991 > 2003::2:0:100.http: Flags [S], cksum 0x7312 (correct), seq 3006460279, win 5760, options [mss 1440,sackOK,TS val 1904932 ecr 0,nop,wscale 3], length 0 In Linux IPv6 impl. you can't have a tunnel with an any cast address receiving packets (I have not tried to interpret RFC 2473) To have receive capabilities the tunnel must have: - Local address set as multicast addr or an unicast addr - Remote address set as an unicast addr. - Loop back addres or Link local address are not allowed. This causes us to setup a tunnel in the Real Server with the LVS as the remote address, here you can't use the VIP address since it's used inside the tunnel. Solution Use outgoing interface IPv6 address (match against the destination). i.e. use ip6_route_output() to look up the route cache and then use ipv6_dev_get_saddr(...) to set the source address of the encapsulated packet. Additionally, cache the results in new destination fields: dst_cookie and dst_saddr and properly check the returned dst from ip6_route_output. We now add xfrm_lookup call only for the tunneling method where the source address is a local one. Signed-off-by:Hans Schillstrom Signed-off-by: Patrick McHardy --- include/net/ip_vs.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 52fbe2308c3..6e8a6192e57 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -529,6 +529,10 @@ struct ip_vs_dest { spinlock_t dst_lock; /* lock of dst_cache */ struct dst_entry *dst_cache; /* destination cache entry */ u32 dst_rtos; /* RT_TOS(tos) for dst */ + u32 dst_cookie; +#ifdef CONFIG_IP_VS_IPV6 + struct in6_addr dst_saddr; +#endif /* for virtual service */ struct ip_vs_service *svc; /* service it belongs to */ -- cgit v1.2.3-18-g5258 From daaae6b010ac0f60c9c35e481589966f9f1fcc22 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 19 Oct 2010 11:28:15 +0200 Subject: workqueue: remove in_workqueue_context() Commit a25909a4 (lockdep: Add an in_workqueue_context() lockdep-based test function) added in_workqueue_context() but there hasn't been any in-kernel user and the lockdep annotation in workqueue is scheduled to change. Remove the unused function. Signed-off-by: Tejun Heo Cc: Paul E. McKenney --- include/linux/workqueue.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 03bbe903e5c..070bb7a8893 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -430,8 +430,4 @@ extern bool freeze_workqueues_busy(void); extern void thaw_workqueues(void); #endif /* CONFIG_FREEZER */ -#ifdef CONFIG_LOCKDEP -int in_workqueue_context(struct workqueue_struct *wq); -#endif - #endif -- cgit v1.2.3-18-g5258 From e6484930d7c73d324bccda7d43d131088da697b9 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Mon, 18 Oct 2010 18:04:39 +0000 Subject: net: allocate tx queues in register_netdevice This patch introduces netif_alloc_netdev_queues which is called from register_device instead of alloc_netdev_mq. This makes TX queue allocation symmetric with RX allocation. Also, queue locks allocation is done in netdev_init_one_queue. Change set_real_num_tx_queues to fail if requested number < 1 or greater than number of allocated queues. Signed-off-by: Tom Herbert Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 14fbb04c459..880d5656582 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1696,8 +1696,8 @@ static inline int netif_is_multiqueue(const struct net_device *dev) return dev->num_tx_queues > 1; } -extern void netif_set_real_num_tx_queues(struct net_device *dev, - unsigned int txq); +extern int netif_set_real_num_tx_queues(struct net_device *dev, + unsigned int txq); #ifdef CONFIG_RPS extern int netif_set_real_num_rx_queues(struct net_device *dev, -- cgit v1.2.3-18-g5258 From 27b75c95f10d249574d9c4cb9dab878107faede8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 15 Oct 2010 05:44:11 +0000 Subject: net: avoid RCU for NOCACHE dst There is no point using RCU for dst we allocate for a very short time (used once). Change dst_release() to take DST_NOCACHE into account, but also change skb_dst_set_noref() to force a refcount increment for such dst. This is a _huge_ gain, because we dont waste memory to store xx thousand of dsts. Instead of queueing them to RCU, we can free them instantly. CPU caches can stay hot, re-using same memory blocks to hold temporary dsts. Note : remove unneeded smp_mb__before_atomic_dec(); in dst_release(), since atomic_dec_return() implies a full memory barrier. Stress test, 160.000.000 udp frames sent, IP route cache disabled (DDOS). Before: real 0m38.091s user 0m13.189s sys 7m53.018s After: real 0m29.946s user 0m12.157s sys 7m40.605s For reference, if IP route cache was enabled : real 0m32.030s user 0m10.521s sys 8m15.243s Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/skbuff.h | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 05a358f1ba1..e6ba898de61 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -460,19 +460,7 @@ static inline void skb_dst_set(struct sk_buff *skb, struct dst_entry *dst) skb->_skb_refdst = (unsigned long)dst; } -/** - * skb_dst_set_noref - sets skb dst, without a reference - * @skb: buffer - * @dst: dst entry - * - * Sets skb dst, assuming a reference was not taken on dst - * skb_dst_drop() should not dst_release() this dst - */ -static inline void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst) -{ - WARN_ON(!rcu_read_lock_held() && !rcu_read_lock_bh_held()); - skb->_skb_refdst = (unsigned long)dst | SKB_DST_NOREF; -} +extern void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst); /** * skb_dst_is_noref - Test if skb dst isnt refcounted -- cgit v1.2.3-18-g5258 From c957ef2c59e952803766ddc22e89981ab534606f Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Wed, 20 Oct 2010 11:07:02 +0800 Subject: percpu: Introduce a read-mostly percpu API Add a new readmostly percpu section and API. This can be used to avoid dirtying data lines which are generally not written to, which is especially important for data which may be accessed by processors other than the one for which the percpu area belongs to. [ hpa: moved it *after* the page-aligned section, for obvious reasons. ] Signed-off-by: Shaohua Li LKML-Reference: <1287544022.4571.7.camel@sli10-conroe.sh.intel.com> Cc: Eric Dumazet Signed-off-by: H. Peter Anvin --- include/asm-generic/vmlinux.lds.h | 4 ++++ include/linux/percpu-defs.h | 9 +++++++++ 2 files changed, 13 insertions(+) (limited to 'include') diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 8a92a170fb7..d7e7b21511b 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -677,7 +677,9 @@ - LOAD_OFFSET) { \ VMLINUX_SYMBOL(__per_cpu_start) = .; \ *(.data..percpu..first) \ + . = ALIGN(PAGE_SIZE); \ *(.data..percpu..page_aligned) \ + *(.data..percpu..readmostly) \ *(.data..percpu) \ *(.data..percpu..shared_aligned) \ VMLINUX_SYMBOL(__per_cpu_end) = .; \ @@ -703,6 +705,8 @@ VMLINUX_SYMBOL(__per_cpu_load) = .; \ VMLINUX_SYMBOL(__per_cpu_start) = .; \ *(.data..percpu..first) \ + . = ALIGN(PAGE_SIZE); \ + *(.data..percpu..readmostly) \ *(.data..percpu..page_aligned) \ *(.data..percpu) \ *(.data..percpu..shared_aligned) \ diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h index ce2dc655cd1..27ef6b190ea 100644 --- a/include/linux/percpu-defs.h +++ b/include/linux/percpu-defs.h @@ -138,6 +138,15 @@ DEFINE_PER_CPU_SECTION(type, name, "..page_aligned") \ __aligned(PAGE_SIZE) +/* + * Declaration/definition used for per-CPU variables that must be read mostly. + */ +#define DECLARE_PER_CPU_READ_MOSTLY(type, name) \ + DECLARE_PER_CPU_SECTION(type, name, "..readmostly") + +#define DEFINE_PER_CPU_READ_MOSTLY(type, name) \ + DEFINE_PER_CPU_SECTION(type, name, "..readmostly") + /* * Intermodule exports for per-CPU variables. sparse forgets about * address space across EXPORT_SYMBOL(), change EXPORT_SYMBOL() to -- cgit v1.2.3-18-g5258 From 3d14c5d2b6e15c21d8e5467dc62d33127c23a644 Mon Sep 17 00:00:00 2001 From: Yehuda Sadeh Date: Tue, 6 Apr 2010 15:14:15 -0700 Subject: ceph: factor out libceph from Ceph file system This factors out protocol and low-level storage parts of ceph into a separate libceph module living in net/ceph and include/linux/ceph. This is mostly a matter of moving files around. However, a few key pieces of the interface change as well: - ceph_client becomes ceph_fs_client and ceph_client, where the latter captures the mon and osd clients, and the fs_client gets the mds client and file system specific pieces. - Mount option parsing and debugfs setup is correspondingly broken into two pieces. - The mon client gets a generic handler callback for otherwise unknown messages (mds map, in this case). - The basic supported/required feature bits can be expanded (and are by ceph_fs_client). No functional change, aside from some subtle error handling cases that got cleaned up in the refactoring process. Signed-off-by: Sage Weil --- include/linux/ceph/auth.h | 92 +++++ include/linux/ceph/buffer.h | 39 +++ include/linux/ceph/ceph_debug.h | 38 +++ include/linux/ceph/ceph_frag.h | 109 ++++++ include/linux/ceph/ceph_fs.h | 728 ++++++++++++++++++++++++++++++++++++++++ include/linux/ceph/ceph_hash.h | 13 + include/linux/ceph/debugfs.h | 33 ++ include/linux/ceph/decode.h | 201 +++++++++++ include/linux/ceph/libceph.h | 249 ++++++++++++++ include/linux/ceph/mdsmap.h | 62 ++++ include/linux/ceph/messenger.h | 261 ++++++++++++++ include/linux/ceph/mon_client.h | 122 +++++++ include/linux/ceph/msgpool.h | 25 ++ include/linux/ceph/msgr.h | 175 ++++++++++ include/linux/ceph/osd_client.h | 234 +++++++++++++ include/linux/ceph/osdmap.h | 130 +++++++ include/linux/ceph/pagelist.h | 54 +++ include/linux/ceph/rados.h | 405 ++++++++++++++++++++++ include/linux/ceph/types.h | 29 ++ include/linux/crush/crush.h | 180 ++++++++++ include/linux/crush/hash.h | 17 + include/linux/crush/mapper.h | 20 ++ 22 files changed, 3216 insertions(+) create mode 100644 include/linux/ceph/auth.h create mode 100644 include/linux/ceph/buffer.h create mode 100644 include/linux/ceph/ceph_debug.h create mode 100644 include/linux/ceph/ceph_frag.h create mode 100644 include/linux/ceph/ceph_fs.h create mode 100644 include/linux/ceph/ceph_hash.h create mode 100644 include/linux/ceph/debugfs.h create mode 100644 include/linux/ceph/decode.h create mode 100644 include/linux/ceph/libceph.h create mode 100644 include/linux/ceph/mdsmap.h create mode 100644 include/linux/ceph/messenger.h create mode 100644 include/linux/ceph/mon_client.h create mode 100644 include/linux/ceph/msgpool.h create mode 100644 include/linux/ceph/msgr.h create mode 100644 include/linux/ceph/osd_client.h create mode 100644 include/linux/ceph/osdmap.h create mode 100644 include/linux/ceph/pagelist.h create mode 100644 include/linux/ceph/rados.h create mode 100644 include/linux/ceph/types.h create mode 100644 include/linux/crush/crush.h create mode 100644 include/linux/crush/hash.h create mode 100644 include/linux/crush/mapper.h (limited to 'include') diff --git a/include/linux/ceph/auth.h b/include/linux/ceph/auth.h new file mode 100644 index 00000000000..7fff521d7eb --- /dev/null +++ b/include/linux/ceph/auth.h @@ -0,0 +1,92 @@ +#ifndef _FS_CEPH_AUTH_H +#define _FS_CEPH_AUTH_H + +#include +#include + +/* + * Abstract interface for communicating with the authenticate module. + * There is some handshake that takes place between us and the monitor + * to acquire the necessary keys. These are used to generate an + * 'authorizer' that we use when connecting to a service (mds, osd). + */ + +struct ceph_auth_client; +struct ceph_authorizer; + +struct ceph_auth_client_ops { + const char *name; + + /* + * true if we are authenticated and can connect to + * services. + */ + int (*is_authenticated)(struct ceph_auth_client *ac); + + /* + * true if we should (re)authenticate, e.g., when our tickets + * are getting old and crusty. + */ + int (*should_authenticate)(struct ceph_auth_client *ac); + + /* + * build requests and process replies during monitor + * handshake. if handle_reply returns -EAGAIN, we build + * another request. + */ + int (*build_request)(struct ceph_auth_client *ac, void *buf, void *end); + int (*handle_reply)(struct ceph_auth_client *ac, int result, + void *buf, void *end); + + /* + * Create authorizer for connecting to a service, and verify + * the response to authenticate the service. + */ + int (*create_authorizer)(struct ceph_auth_client *ac, int peer_type, + struct ceph_authorizer **a, + void **buf, size_t *len, + void **reply_buf, size_t *reply_len); + int (*verify_authorizer_reply)(struct ceph_auth_client *ac, + struct ceph_authorizer *a, size_t len); + void (*destroy_authorizer)(struct ceph_auth_client *ac, + struct ceph_authorizer *a); + void (*invalidate_authorizer)(struct ceph_auth_client *ac, + int peer_type); + + /* reset when we (re)connect to a monitor */ + void (*reset)(struct ceph_auth_client *ac); + + void (*destroy)(struct ceph_auth_client *ac); +}; + +struct ceph_auth_client { + u32 protocol; /* CEPH_AUTH_* */ + void *private; /* for use by protocol implementation */ + const struct ceph_auth_client_ops *ops; /* null iff protocol==0 */ + + bool negotiating; /* true if negotiating protocol */ + const char *name; /* entity name */ + u64 global_id; /* our unique id in system */ + const char *secret; /* our secret key */ + unsigned want_keys; /* which services we want */ +}; + +extern struct ceph_auth_client *ceph_auth_init(const char *name, + const char *secret); +extern void ceph_auth_destroy(struct ceph_auth_client *ac); + +extern void ceph_auth_reset(struct ceph_auth_client *ac); + +extern int ceph_auth_build_hello(struct ceph_auth_client *ac, + void *buf, size_t len); +extern int ceph_handle_auth_reply(struct ceph_auth_client *ac, + void *buf, size_t len, + void *reply_buf, size_t reply_len); +extern int ceph_entity_name_encode(const char *name, void **p, void *end); + +extern int ceph_build_auth(struct ceph_auth_client *ac, + void *msg_buf, size_t msg_len); + +extern int ceph_auth_is_authenticated(struct ceph_auth_client *ac); + +#endif diff --git a/include/linux/ceph/buffer.h b/include/linux/ceph/buffer.h new file mode 100644 index 00000000000..58d19014068 --- /dev/null +++ b/include/linux/ceph/buffer.h @@ -0,0 +1,39 @@ +#ifndef __FS_CEPH_BUFFER_H +#define __FS_CEPH_BUFFER_H + +#include +#include +#include +#include +#include + +/* + * a simple reference counted buffer. + * + * use kmalloc for small sizes (<= one page), vmalloc for larger + * sizes. + */ +struct ceph_buffer { + struct kref kref; + struct kvec vec; + size_t alloc_len; + bool is_vmalloc; +}; + +extern struct ceph_buffer *ceph_buffer_new(size_t len, gfp_t gfp); +extern void ceph_buffer_release(struct kref *kref); + +static inline struct ceph_buffer *ceph_buffer_get(struct ceph_buffer *b) +{ + kref_get(&b->kref); + return b; +} + +static inline void ceph_buffer_put(struct ceph_buffer *b) +{ + kref_put(&b->kref, ceph_buffer_release); +} + +extern int ceph_decode_buffer(struct ceph_buffer **b, void **p, void *end); + +#endif diff --git a/include/linux/ceph/ceph_debug.h b/include/linux/ceph/ceph_debug.h new file mode 100644 index 00000000000..aa2e19182d9 --- /dev/null +++ b/include/linux/ceph/ceph_debug.h @@ -0,0 +1,38 @@ +#ifndef _FS_CEPH_DEBUG_H +#define _FS_CEPH_DEBUG_H + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#ifdef CONFIG_CEPH_LIB_PRETTYDEBUG + +/* + * wrap pr_debug to include a filename:lineno prefix on each line. + * this incurs some overhead (kernel size and execution time) due to + * the extra function call at each call site. + */ + +# if defined(DEBUG) || defined(CONFIG_DYNAMIC_DEBUG) +extern const char *ceph_file_part(const char *s, int len); +# define dout(fmt, ...) \ + pr_debug("%.*s %12.12s:%-4d : " fmt, \ + 8 - (int)sizeof(KBUILD_MODNAME), " ", \ + ceph_file_part(__FILE__, sizeof(__FILE__)), \ + __LINE__, ##__VA_ARGS__) +# else +/* faux printk call just to see any compiler warnings. */ +# define dout(fmt, ...) do { \ + if (0) \ + printk(KERN_DEBUG fmt, ##__VA_ARGS__); \ + } while (0) +# endif + +#else + +/* + * or, just wrap pr_debug + */ +# define dout(fmt, ...) pr_debug(" " fmt, ##__VA_ARGS__) + +#endif + +#endif diff --git a/include/linux/ceph/ceph_frag.h b/include/linux/ceph/ceph_frag.h new file mode 100644 index 00000000000..5babb8e9535 --- /dev/null +++ b/include/linux/ceph/ceph_frag.h @@ -0,0 +1,109 @@ +#ifndef FS_CEPH_FRAG_H +#define FS_CEPH_FRAG_H + +/* + * "Frags" are a way to describe a subset of a 32-bit number space, + * using a mask and a value to match against that mask. Any given frag + * (subset of the number space) can be partitioned into 2^n sub-frags. + * + * Frags are encoded into a 32-bit word: + * 8 upper bits = "bits" + * 24 lower bits = "value" + * (We could go to 5+27 bits, but who cares.) + * + * We use the _most_ significant bits of the 24 bit value. This makes + * values logically sort. + * + * Unfortunately, because the "bits" field is still in the high bits, we + * can't sort encoded frags numerically. However, it does allow you + * to feed encoded frags as values into frag_contains_value. + */ +static inline __u32 ceph_frag_make(__u32 b, __u32 v) +{ + return (b << 24) | + (v & (0xffffffu << (24-b)) & 0xffffffu); +} +static inline __u32 ceph_frag_bits(__u32 f) +{ + return f >> 24; +} +static inline __u32 ceph_frag_value(__u32 f) +{ + return f & 0xffffffu; +} +static inline __u32 ceph_frag_mask(__u32 f) +{ + return (0xffffffu << (24-ceph_frag_bits(f))) & 0xffffffu; +} +static inline __u32 ceph_frag_mask_shift(__u32 f) +{ + return 24 - ceph_frag_bits(f); +} + +static inline int ceph_frag_contains_value(__u32 f, __u32 v) +{ + return (v & ceph_frag_mask(f)) == ceph_frag_value(f); +} +static inline int ceph_frag_contains_frag(__u32 f, __u32 sub) +{ + /* is sub as specific as us, and contained by us? */ + return ceph_frag_bits(sub) >= ceph_frag_bits(f) && + (ceph_frag_value(sub) & ceph_frag_mask(f)) == ceph_frag_value(f); +} + +static inline __u32 ceph_frag_parent(__u32 f) +{ + return ceph_frag_make(ceph_frag_bits(f) - 1, + ceph_frag_value(f) & (ceph_frag_mask(f) << 1)); +} +static inline int ceph_frag_is_left_child(__u32 f) +{ + return ceph_frag_bits(f) > 0 && + (ceph_frag_value(f) & (0x1000000 >> ceph_frag_bits(f))) == 0; +} +static inline int ceph_frag_is_right_child(__u32 f) +{ + return ceph_frag_bits(f) > 0 && + (ceph_frag_value(f) & (0x1000000 >> ceph_frag_bits(f))) == 1; +} +static inline __u32 ceph_frag_sibling(__u32 f) +{ + return ceph_frag_make(ceph_frag_bits(f), + ceph_frag_value(f) ^ (0x1000000 >> ceph_frag_bits(f))); +} +static inline __u32 ceph_frag_left_child(__u32 f) +{ + return ceph_frag_make(ceph_frag_bits(f)+1, ceph_frag_value(f)); +} +static inline __u32 ceph_frag_right_child(__u32 f) +{ + return ceph_frag_make(ceph_frag_bits(f)+1, + ceph_frag_value(f) | (0x1000000 >> (1+ceph_frag_bits(f)))); +} +static inline __u32 ceph_frag_make_child(__u32 f, int by, int i) +{ + int newbits = ceph_frag_bits(f) + by; + return ceph_frag_make(newbits, + ceph_frag_value(f) | (i << (24 - newbits))); +} +static inline int ceph_frag_is_leftmost(__u32 f) +{ + return ceph_frag_value(f) == 0; +} +static inline int ceph_frag_is_rightmost(__u32 f) +{ + return ceph_frag_value(f) == ceph_frag_mask(f); +} +static inline __u32 ceph_frag_next(__u32 f) +{ + return ceph_frag_make(ceph_frag_bits(f), + ceph_frag_value(f) + (0x1000000 >> ceph_frag_bits(f))); +} + +/* + * comparator to sort frags logically, as when traversing the + * number space in ascending order... + */ +int ceph_frag_compare(__u32 a, __u32 b); + +#endif diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h new file mode 100644 index 00000000000..d5619ac8671 --- /dev/null +++ b/include/linux/ceph/ceph_fs.h @@ -0,0 +1,728 @@ +/* + * ceph_fs.h - Ceph constants and data types to share between kernel and + * user space. + * + * Most types in this file are defined as little-endian, and are + * primarily intended to describe data structures that pass over the + * wire or that are stored on disk. + * + * LGPL2 + */ + +#ifndef CEPH_FS_H +#define CEPH_FS_H + +#include "msgr.h" +#include "rados.h" + +/* + * subprotocol versions. when specific messages types or high-level + * protocols change, bump the affected components. we keep rev + * internal cluster protocols separately from the public, + * client-facing protocol. + */ +#define CEPH_OSD_PROTOCOL 8 /* cluster internal */ +#define CEPH_MDS_PROTOCOL 12 /* cluster internal */ +#define CEPH_MON_PROTOCOL 5 /* cluster internal */ +#define CEPH_OSDC_PROTOCOL 24 /* server/client */ +#define CEPH_MDSC_PROTOCOL 32 /* server/client */ +#define CEPH_MONC_PROTOCOL 15 /* server/client */ + + +#define CEPH_INO_ROOT 1 +#define CEPH_INO_CEPH 2 /* hidden .ceph dir */ + +/* arbitrary limit on max # of monitors (cluster of 3 is typical) */ +#define CEPH_MAX_MON 31 + + +/* + * feature bits + */ +#define CEPH_FEATURE_UID (1<<0) +#define CEPH_FEATURE_NOSRCADDR (1<<1) +#define CEPH_FEATURE_MONCLOCKCHECK (1<<2) +#define CEPH_FEATURE_FLOCK (1<<3) + + +/* + * ceph_file_layout - describe data layout for a file/inode + */ +struct ceph_file_layout { + /* file -> object mapping */ + __le32 fl_stripe_unit; /* stripe unit, in bytes. must be multiple + of page size. */ + __le32 fl_stripe_count; /* over this many objects */ + __le32 fl_object_size; /* until objects are this big, then move to + new objects */ + __le32 fl_cas_hash; /* 0 = none; 1 = sha256 */ + + /* pg -> disk layout */ + __le32 fl_object_stripe_unit; /* for per-object parity, if any */ + + /* object -> pg layout */ + __le32 fl_pg_preferred; /* preferred primary for pg (-1 for none) */ + __le32 fl_pg_pool; /* namespace, crush ruleset, rep level */ +} __attribute__ ((packed)); + +#define CEPH_MIN_STRIPE_UNIT 65536 + +int ceph_file_layout_is_valid(const struct ceph_file_layout *layout); + + +/* crypto algorithms */ +#define CEPH_CRYPTO_NONE 0x0 +#define CEPH_CRYPTO_AES 0x1 + +#define CEPH_AES_IV "cephsageyudagreg" + +/* security/authentication protocols */ +#define CEPH_AUTH_UNKNOWN 0x0 +#define CEPH_AUTH_NONE 0x1 +#define CEPH_AUTH_CEPHX 0x2 + +#define CEPH_AUTH_UID_DEFAULT ((__u64) -1) + + +/********************************************* + * message layer + */ + +/* + * message types + */ + +/* misc */ +#define CEPH_MSG_SHUTDOWN 1 +#define CEPH_MSG_PING 2 + +/* client <-> monitor */ +#define CEPH_MSG_MON_MAP 4 +#define CEPH_MSG_MON_GET_MAP 5 +#define CEPH_MSG_STATFS 13 +#define CEPH_MSG_STATFS_REPLY 14 +#define CEPH_MSG_MON_SUBSCRIBE 15 +#define CEPH_MSG_MON_SUBSCRIBE_ACK 16 +#define CEPH_MSG_AUTH 17 +#define CEPH_MSG_AUTH_REPLY 18 + +/* client <-> mds */ +#define CEPH_MSG_MDS_MAP 21 + +#define CEPH_MSG_CLIENT_SESSION 22 +#define CEPH_MSG_CLIENT_RECONNECT 23 + +#define CEPH_MSG_CLIENT_REQUEST 24 +#define CEPH_MSG_CLIENT_REQUEST_FORWARD 25 +#define CEPH_MSG_CLIENT_REPLY 26 +#define CEPH_MSG_CLIENT_CAPS 0x310 +#define CEPH_MSG_CLIENT_LEASE 0x311 +#define CEPH_MSG_CLIENT_SNAP 0x312 +#define CEPH_MSG_CLIENT_CAPRELEASE 0x313 + +/* pool ops */ +#define CEPH_MSG_POOLOP_REPLY 48 +#define CEPH_MSG_POOLOP 49 + + +/* osd */ +#define CEPH_MSG_OSD_MAP 41 +#define CEPH_MSG_OSD_OP 42 +#define CEPH_MSG_OSD_OPREPLY 43 + +/* pool operations */ +enum { + POOL_OP_CREATE = 0x01, + POOL_OP_DELETE = 0x02, + POOL_OP_AUID_CHANGE = 0x03, + POOL_OP_CREATE_SNAP = 0x11, + POOL_OP_DELETE_SNAP = 0x12, + POOL_OP_CREATE_UNMANAGED_SNAP = 0x21, + POOL_OP_DELETE_UNMANAGED_SNAP = 0x22, +}; + +struct ceph_mon_request_header { + __le64 have_version; + __le16 session_mon; + __le64 session_mon_tid; +} __attribute__ ((packed)); + +struct ceph_mon_statfs { + struct ceph_mon_request_header monhdr; + struct ceph_fsid fsid; +} __attribute__ ((packed)); + +struct ceph_statfs { + __le64 kb, kb_used, kb_avail; + __le64 num_objects; +} __attribute__ ((packed)); + +struct ceph_mon_statfs_reply { + struct ceph_fsid fsid; + __le64 version; + struct ceph_statfs st; +} __attribute__ ((packed)); + +const char *ceph_pool_op_name(int op); + +struct ceph_mon_poolop { + struct ceph_mon_request_header monhdr; + struct ceph_fsid fsid; + __le32 pool; + __le32 op; + __le64 auid; + __le64 snapid; + __le32 name_len; +} __attribute__ ((packed)); + +struct ceph_mon_poolop_reply { + struct ceph_mon_request_header monhdr; + struct ceph_fsid fsid; + __le32 reply_code; + __le32 epoch; + char has_data; + char data[0]; +} __attribute__ ((packed)); + +struct ceph_mon_unmanaged_snap { + __le64 snapid; +} __attribute__ ((packed)); + +struct ceph_osd_getmap { + struct ceph_mon_request_header monhdr; + struct ceph_fsid fsid; + __le32 start; +} __attribute__ ((packed)); + +struct ceph_mds_getmap { + struct ceph_mon_request_header monhdr; + struct ceph_fsid fsid; +} __attribute__ ((packed)); + +struct ceph_client_mount { + struct ceph_mon_request_header monhdr; +} __attribute__ ((packed)); + +struct ceph_mon_subscribe_item { + __le64 have_version; __le64 have; + __u8 onetime; +} __attribute__ ((packed)); + +struct ceph_mon_subscribe_ack { + __le32 duration; /* seconds */ + struct ceph_fsid fsid; +} __attribute__ ((packed)); + +/* + * mds states + * > 0 -> in + * <= 0 -> out + */ +#define CEPH_MDS_STATE_DNE 0 /* down, does not exist. */ +#define CEPH_MDS_STATE_STOPPED -1 /* down, once existed, but no subtrees. + empty log. */ +#define CEPH_MDS_STATE_BOOT -4 /* up, boot announcement. */ +#define CEPH_MDS_STATE_STANDBY -5 /* up, idle. waiting for assignment. */ +#define CEPH_MDS_STATE_CREATING -6 /* up, creating MDS instance. */ +#define CEPH_MDS_STATE_STARTING -7 /* up, starting previously stopped mds */ +#define CEPH_MDS_STATE_STANDBY_REPLAY -8 /* up, tailing active node's journal */ + +#define CEPH_MDS_STATE_REPLAY 8 /* up, replaying journal. */ +#define CEPH_MDS_STATE_RESOLVE 9 /* up, disambiguating distributed + operations (import, rename, etc.) */ +#define CEPH_MDS_STATE_RECONNECT 10 /* up, reconnect to clients */ +#define CEPH_MDS_STATE_REJOIN 11 /* up, rejoining distributed cache */ +#define CEPH_MDS_STATE_CLIENTREPLAY 12 /* up, replaying client operations */ +#define CEPH_MDS_STATE_ACTIVE 13 /* up, active */ +#define CEPH_MDS_STATE_STOPPING 14 /* up, but exporting metadata */ + +extern const char *ceph_mds_state_name(int s); + + +/* + * metadata lock types. + * - these are bitmasks.. we can compose them + * - they also define the lock ordering by the MDS + * - a few of these are internal to the mds + */ +#define CEPH_LOCK_DVERSION 1 +#define CEPH_LOCK_DN 2 +#define CEPH_LOCK_ISNAP 16 +#define CEPH_LOCK_IVERSION 32 /* mds internal */ +#define CEPH_LOCK_IFILE 64 +#define CEPH_LOCK_IAUTH 128 +#define CEPH_LOCK_ILINK 256 +#define CEPH_LOCK_IDFT 512 /* dir frag tree */ +#define CEPH_LOCK_INEST 1024 /* mds internal */ +#define CEPH_LOCK_IXATTR 2048 +#define CEPH_LOCK_IFLOCK 4096 /* advisory file locks */ +#define CEPH_LOCK_INO 8192 /* immutable inode bits; not a lock */ + +/* client_session ops */ +enum { + CEPH_SESSION_REQUEST_OPEN, + CEPH_SESSION_OPEN, + CEPH_SESSION_REQUEST_CLOSE, + CEPH_SESSION_CLOSE, + CEPH_SESSION_REQUEST_RENEWCAPS, + CEPH_SESSION_RENEWCAPS, + CEPH_SESSION_STALE, + CEPH_SESSION_RECALL_STATE, +}; + +extern const char *ceph_session_op_name(int op); + +struct ceph_mds_session_head { + __le32 op; + __le64 seq; + struct ceph_timespec stamp; + __le32 max_caps, max_leases; +} __attribute__ ((packed)); + +/* client_request */ +/* + * metadata ops. + * & 0x001000 -> write op + * & 0x010000 -> follow symlink (e.g. stat(), not lstat()). + & & 0x100000 -> use weird ino/path trace + */ +#define CEPH_MDS_OP_WRITE 0x001000 +enum { + CEPH_MDS_OP_LOOKUP = 0x00100, + CEPH_MDS_OP_GETATTR = 0x00101, + CEPH_MDS_OP_LOOKUPHASH = 0x00102, + CEPH_MDS_OP_LOOKUPPARENT = 0x00103, + + CEPH_MDS_OP_SETXATTR = 0x01105, + CEPH_MDS_OP_RMXATTR = 0x01106, + CEPH_MDS_OP_SETLAYOUT = 0x01107, + CEPH_MDS_OP_SETATTR = 0x01108, + CEPH_MDS_OP_SETFILELOCK= 0x01109, + CEPH_MDS_OP_GETFILELOCK= 0x00110, + + CEPH_MDS_OP_MKNOD = 0x01201, + CEPH_MDS_OP_LINK = 0x01202, + CEPH_MDS_OP_UNLINK = 0x01203, + CEPH_MDS_OP_RENAME = 0x01204, + CEPH_MDS_OP_MKDIR = 0x01220, + CEPH_MDS_OP_RMDIR = 0x01221, + CEPH_MDS_OP_SYMLINK = 0x01222, + + CEPH_MDS_OP_CREATE = 0x01301, + CEPH_MDS_OP_OPEN = 0x00302, + CEPH_MDS_OP_READDIR = 0x00305, + + CEPH_MDS_OP_LOOKUPSNAP = 0x00400, + CEPH_MDS_OP_MKSNAP = 0x01400, + CEPH_MDS_OP_RMSNAP = 0x01401, + CEPH_MDS_OP_LSSNAP = 0x00402, +}; + +extern const char *ceph_mds_op_name(int op); + + +#define CEPH_SETATTR_MODE 1 +#define CEPH_SETATTR_UID 2 +#define CEPH_SETATTR_GID 4 +#define CEPH_SETATTR_MTIME 8 +#define CEPH_SETATTR_ATIME 16 +#define CEPH_SETATTR_SIZE 32 +#define CEPH_SETATTR_CTIME 64 + +union ceph_mds_request_args { + struct { + __le32 mask; /* CEPH_CAP_* */ + } __attribute__ ((packed)) getattr; + struct { + __le32 mode; + __le32 uid; + __le32 gid; + struct ceph_timespec mtime; + struct ceph_timespec atime; + __le64 size, old_size; /* old_size needed by truncate */ + __le32 mask; /* CEPH_SETATTR_* */ + } __attribute__ ((packed)) setattr; + struct { + __le32 frag; /* which dir fragment */ + __le32 max_entries; /* how many dentries to grab */ + __le32 max_bytes; + } __attribute__ ((packed)) readdir; + struct { + __le32 mode; + __le32 rdev; + } __attribute__ ((packed)) mknod; + struct { + __le32 mode; + } __attribute__ ((packed)) mkdir; + struct { + __le32 flags; + __le32 mode; + __le32 stripe_unit; /* layout for newly created file */ + __le32 stripe_count; /* ... */ + __le32 object_size; + __le32 file_replication; + __le32 preferred; + } __attribute__ ((packed)) open; + struct { + __le32 flags; + } __attribute__ ((packed)) setxattr; + struct { + struct ceph_file_layout layout; + } __attribute__ ((packed)) setlayout; + struct { + __u8 rule; /* currently fcntl or flock */ + __u8 type; /* shared, exclusive, remove*/ + __le64 pid; /* process id requesting the lock */ + __le64 pid_namespace; + __le64 start; /* initial location to lock */ + __le64 length; /* num bytes to lock from start */ + __u8 wait; /* will caller wait for lock to become available? */ + } __attribute__ ((packed)) filelock_change; +} __attribute__ ((packed)); + +#define CEPH_MDS_FLAG_REPLAY 1 /* this is a replayed op */ +#define CEPH_MDS_FLAG_WANT_DENTRY 2 /* want dentry in reply */ + +struct ceph_mds_request_head { + __le64 oldest_client_tid; + __le32 mdsmap_epoch; /* on client */ + __le32 flags; /* CEPH_MDS_FLAG_* */ + __u8 num_retry, num_fwd; /* count retry, fwd attempts */ + __le16 num_releases; /* # include cap/lease release records */ + __le32 op; /* mds op code */ + __le32 caller_uid, caller_gid; + __le64 ino; /* use this ino for openc, mkdir, mknod, + etc. (if replaying) */ + union ceph_mds_request_args args; +} __attribute__ ((packed)); + +/* cap/lease release record */ +struct ceph_mds_request_release { + __le64 ino, cap_id; /* ino and unique cap id */ + __le32 caps, wanted; /* new issued, wanted */ + __le32 seq, issue_seq, mseq; + __le32 dname_seq; /* if releasing a dentry lease, a */ + __le32 dname_len; /* string follows. */ +} __attribute__ ((packed)); + +/* client reply */ +struct ceph_mds_reply_head { + __le32 op; + __le32 result; + __le32 mdsmap_epoch; + __u8 safe; /* true if committed to disk */ + __u8 is_dentry, is_target; /* true if dentry, target inode records + are included with reply */ +} __attribute__ ((packed)); + +/* one for each node split */ +struct ceph_frag_tree_split { + __le32 frag; /* this frag splits... */ + __le32 by; /* ...by this many bits */ +} __attribute__ ((packed)); + +struct ceph_frag_tree_head { + __le32 nsplits; /* num ceph_frag_tree_split records */ + struct ceph_frag_tree_split splits[]; +} __attribute__ ((packed)); + +/* capability issue, for bundling with mds reply */ +struct ceph_mds_reply_cap { + __le32 caps, wanted; /* caps issued, wanted */ + __le64 cap_id; + __le32 seq, mseq; + __le64 realm; /* snap realm */ + __u8 flags; /* CEPH_CAP_FLAG_* */ +} __attribute__ ((packed)); + +#define CEPH_CAP_FLAG_AUTH 1 /* cap is issued by auth mds */ + +/* inode record, for bundling with mds reply */ +struct ceph_mds_reply_inode { + __le64 ino; + __le64 snapid; + __le32 rdev; + __le64 version; /* inode version */ + __le64 xattr_version; /* version for xattr blob */ + struct ceph_mds_reply_cap cap; /* caps issued for this inode */ + struct ceph_file_layout layout; + struct ceph_timespec ctime, mtime, atime; + __le32 time_warp_seq; + __le64 size, max_size, truncate_size; + __le32 truncate_seq; + __le32 mode, uid, gid; + __le32 nlink; + __le64 files, subdirs, rbytes, rfiles, rsubdirs; /* dir stats */ + struct ceph_timespec rctime; + struct ceph_frag_tree_head fragtree; /* (must be at end of struct) */ +} __attribute__ ((packed)); +/* followed by frag array, then symlink string, then xattr blob */ + +/* reply_lease follows dname, and reply_inode */ +struct ceph_mds_reply_lease { + __le16 mask; /* lease type(s) */ + __le32 duration_ms; /* lease duration */ + __le32 seq; +} __attribute__ ((packed)); + +struct ceph_mds_reply_dirfrag { + __le32 frag; /* fragment */ + __le32 auth; /* auth mds, if this is a delegation point */ + __le32 ndist; /* number of mds' this is replicated on */ + __le32 dist[]; +} __attribute__ ((packed)); + +#define CEPH_LOCK_FCNTL 1 +#define CEPH_LOCK_FLOCK 2 + +#define CEPH_LOCK_SHARED 1 +#define CEPH_LOCK_EXCL 2 +#define CEPH_LOCK_UNLOCK 4 + +struct ceph_filelock { + __le64 start;/* file offset to start lock at */ + __le64 length; /* num bytes to lock; 0 for all following start */ + __le64 client; /* which client holds the lock */ + __le64 pid; /* process id holding the lock on the client */ + __le64 pid_namespace; + __u8 type; /* shared lock, exclusive lock, or unlock */ +} __attribute__ ((packed)); + + +/* file access modes */ +#define CEPH_FILE_MODE_PIN 0 +#define CEPH_FILE_MODE_RD 1 +#define CEPH_FILE_MODE_WR 2 +#define CEPH_FILE_MODE_RDWR 3 /* RD | WR */ +#define CEPH_FILE_MODE_LAZY 4 /* lazy io */ +#define CEPH_FILE_MODE_NUM 8 /* bc these are bit fields.. mostly */ + +int ceph_flags_to_mode(int flags); + + +/* capability bits */ +#define CEPH_CAP_PIN 1 /* no specific capabilities beyond the pin */ + +/* generic cap bits */ +#define CEPH_CAP_GSHARED 1 /* client can reads */ +#define CEPH_CAP_GEXCL 2 /* client can read and update */ +#define CEPH_CAP_GCACHE 4 /* (file) client can cache reads */ +#define CEPH_CAP_GRD 8 /* (file) client can read */ +#define CEPH_CAP_GWR 16 /* (file) client can write */ +#define CEPH_CAP_GBUFFER 32 /* (file) client can buffer writes */ +#define CEPH_CAP_GWREXTEND 64 /* (file) client can extend EOF */ +#define CEPH_CAP_GLAZYIO 128 /* (file) client can perform lazy io */ + +/* per-lock shift */ +#define CEPH_CAP_SAUTH 2 +#define CEPH_CAP_SLINK 4 +#define CEPH_CAP_SXATTR 6 +#define CEPH_CAP_SFILE 8 +#define CEPH_CAP_SFLOCK 20 + +#define CEPH_CAP_BITS 22 + +/* composed values */ +#define CEPH_CAP_AUTH_SHARED (CEPH_CAP_GSHARED << CEPH_CAP_SAUTH) +#define CEPH_CAP_AUTH_EXCL (CEPH_CAP_GEXCL << CEPH_CAP_SAUTH) +#define CEPH_CAP_LINK_SHARED (CEPH_CAP_GSHARED << CEPH_CAP_SLINK) +#define CEPH_CAP_LINK_EXCL (CEPH_CAP_GEXCL << CEPH_CAP_SLINK) +#define CEPH_CAP_XATTR_SHARED (CEPH_CAP_GSHARED << CEPH_CAP_SXATTR) +#define CEPH_CAP_XATTR_EXCL (CEPH_CAP_GEXCL << CEPH_CAP_SXATTR) +#define CEPH_CAP_FILE(x) (x << CEPH_CAP_SFILE) +#define CEPH_CAP_FILE_SHARED (CEPH_CAP_GSHARED << CEPH_CAP_SFILE) +#define CEPH_CAP_FILE_EXCL (CEPH_CAP_GEXCL << CEPH_CAP_SFILE) +#define CEPH_CAP_FILE_CACHE (CEPH_CAP_GCACHE << CEPH_CAP_SFILE) +#define CEPH_CAP_FILE_RD (CEPH_CAP_GRD << CEPH_CAP_SFILE) +#define CEPH_CAP_FILE_WR (CEPH_CAP_GWR << CEPH_CAP_SFILE) +#define CEPH_CAP_FILE_BUFFER (CEPH_CAP_GBUFFER << CEPH_CAP_SFILE) +#define CEPH_CAP_FILE_WREXTEND (CEPH_CAP_GWREXTEND << CEPH_CAP_SFILE) +#define CEPH_CAP_FILE_LAZYIO (CEPH_CAP_GLAZYIO << CEPH_CAP_SFILE) +#define CEPH_CAP_FLOCK_SHARED (CEPH_CAP_GSHARED << CEPH_CAP_SFLOCK) +#define CEPH_CAP_FLOCK_EXCL (CEPH_CAP_GEXCL << CEPH_CAP_SFLOCK) + + +/* cap masks (for getattr) */ +#define CEPH_STAT_CAP_INODE CEPH_CAP_PIN +#define CEPH_STAT_CAP_TYPE CEPH_CAP_PIN /* mode >> 12 */ +#define CEPH_STAT_CAP_SYMLINK CEPH_CAP_PIN +#define CEPH_STAT_CAP_UID CEPH_CAP_AUTH_SHARED +#define CEPH_STAT_CAP_GID CEPH_CAP_AUTH_SHARED +#define CEPH_STAT_CAP_MODE CEPH_CAP_AUTH_SHARED +#define CEPH_STAT_CAP_NLINK CEPH_CAP_LINK_SHARED +#define CEPH_STAT_CAP_LAYOUT CEPH_CAP_FILE_SHARED +#define CEPH_STAT_CAP_MTIME CEPH_CAP_FILE_SHARED +#define CEPH_STAT_CAP_SIZE CEPH_CAP_FILE_SHARED +#define CEPH_STAT_CAP_ATIME CEPH_CAP_FILE_SHARED /* fixme */ +#define CEPH_STAT_CAP_XATTR CEPH_CAP_XATTR_SHARED +#define CEPH_STAT_CAP_INODE_ALL (CEPH_CAP_PIN | \ + CEPH_CAP_AUTH_SHARED | \ + CEPH_CAP_LINK_SHARED | \ + CEPH_CAP_FILE_SHARED | \ + CEPH_CAP_XATTR_SHARED) + +#define CEPH_CAP_ANY_SHARED (CEPH_CAP_AUTH_SHARED | \ + CEPH_CAP_LINK_SHARED | \ + CEPH_CAP_XATTR_SHARED | \ + CEPH_CAP_FILE_SHARED) +#define CEPH_CAP_ANY_RD (CEPH_CAP_ANY_SHARED | CEPH_CAP_FILE_RD | \ + CEPH_CAP_FILE_CACHE) + +#define CEPH_CAP_ANY_EXCL (CEPH_CAP_AUTH_EXCL | \ + CEPH_CAP_LINK_EXCL | \ + CEPH_CAP_XATTR_EXCL | \ + CEPH_CAP_FILE_EXCL) +#define CEPH_CAP_ANY_FILE_WR (CEPH_CAP_FILE_WR | CEPH_CAP_FILE_BUFFER | \ + CEPH_CAP_FILE_EXCL) +#define CEPH_CAP_ANY_WR (CEPH_CAP_ANY_EXCL | CEPH_CAP_ANY_FILE_WR) +#define CEPH_CAP_ANY (CEPH_CAP_ANY_RD | CEPH_CAP_ANY_EXCL | \ + CEPH_CAP_ANY_FILE_WR | CEPH_CAP_FILE_LAZYIO | \ + CEPH_CAP_PIN) + +#define CEPH_CAP_LOCKS (CEPH_LOCK_IFILE | CEPH_LOCK_IAUTH | CEPH_LOCK_ILINK | \ + CEPH_LOCK_IXATTR) + +int ceph_caps_for_mode(int mode); + +enum { + CEPH_CAP_OP_GRANT, /* mds->client grant */ + CEPH_CAP_OP_REVOKE, /* mds->client revoke */ + CEPH_CAP_OP_TRUNC, /* mds->client trunc notify */ + CEPH_CAP_OP_EXPORT, /* mds has exported the cap */ + CEPH_CAP_OP_IMPORT, /* mds has imported the cap */ + CEPH_CAP_OP_UPDATE, /* client->mds update */ + CEPH_CAP_OP_DROP, /* client->mds drop cap bits */ + CEPH_CAP_OP_FLUSH, /* client->mds cap writeback */ + CEPH_CAP_OP_FLUSH_ACK, /* mds->client flushed */ + CEPH_CAP_OP_FLUSHSNAP, /* client->mds flush snapped metadata */ + CEPH_CAP_OP_FLUSHSNAP_ACK, /* mds->client flushed snapped metadata */ + CEPH_CAP_OP_RELEASE, /* client->mds release (clean) cap */ + CEPH_CAP_OP_RENEW, /* client->mds renewal request */ +}; + +extern const char *ceph_cap_op_name(int op); + +/* + * caps message, used for capability callbacks, acks, requests, etc. + */ +struct ceph_mds_caps { + __le32 op; /* CEPH_CAP_OP_* */ + __le64 ino, realm; + __le64 cap_id; + __le32 seq, issue_seq; + __le32 caps, wanted, dirty; /* latest issued/wanted/dirty */ + __le32 migrate_seq; + __le64 snap_follows; + __le32 snap_trace_len; + + /* authlock */ + __le32 uid, gid, mode; + + /* linklock */ + __le32 nlink; + + /* xattrlock */ + __le32 xattr_len; + __le64 xattr_version; + + /* filelock */ + __le64 size, max_size, truncate_size; + __le32 truncate_seq; + struct ceph_timespec mtime, atime, ctime; + struct ceph_file_layout layout; + __le32 time_warp_seq; +} __attribute__ ((packed)); + +/* cap release msg head */ +struct ceph_mds_cap_release { + __le32 num; /* number of cap_items that follow */ +} __attribute__ ((packed)); + +struct ceph_mds_cap_item { + __le64 ino; + __le64 cap_id; + __le32 migrate_seq, seq; +} __attribute__ ((packed)); + +#define CEPH_MDS_LEASE_REVOKE 1 /* mds -> client */ +#define CEPH_MDS_LEASE_RELEASE 2 /* client -> mds */ +#define CEPH_MDS_LEASE_RENEW 3 /* client <-> mds */ +#define CEPH_MDS_LEASE_REVOKE_ACK 4 /* client -> mds */ + +extern const char *ceph_lease_op_name(int o); + +/* lease msg header */ +struct ceph_mds_lease { + __u8 action; /* CEPH_MDS_LEASE_* */ + __le16 mask; /* which lease */ + __le64 ino; + __le64 first, last; /* snap range */ + __le32 seq; + __le32 duration_ms; /* duration of renewal */ +} __attribute__ ((packed)); +/* followed by a __le32+string for dname */ + +/* client reconnect */ +struct ceph_mds_cap_reconnect { + __le64 cap_id; + __le32 wanted; + __le32 issued; + __le64 snaprealm; + __le64 pathbase; /* base ino for our path to this ino */ + __le32 flock_len; /* size of flock state blob, if any */ +} __attribute__ ((packed)); +/* followed by flock blob */ + +struct ceph_mds_cap_reconnect_v1 { + __le64 cap_id; + __le32 wanted; + __le32 issued; + __le64 size; + struct ceph_timespec mtime, atime; + __le64 snaprealm; + __le64 pathbase; /* base ino for our path to this ino */ +} __attribute__ ((packed)); + +struct ceph_mds_snaprealm_reconnect { + __le64 ino; /* snap realm base */ + __le64 seq; /* snap seq for this snap realm */ + __le64 parent; /* parent realm */ +} __attribute__ ((packed)); + +/* + * snaps + */ +enum { + CEPH_SNAP_OP_UPDATE, /* CREATE or DESTROY */ + CEPH_SNAP_OP_CREATE, + CEPH_SNAP_OP_DESTROY, + CEPH_SNAP_OP_SPLIT, +}; + +extern const char *ceph_snap_op_name(int o); + +/* snap msg header */ +struct ceph_mds_snap_head { + __le32 op; /* CEPH_SNAP_OP_* */ + __le64 split; /* ino to split off, if any */ + __le32 num_split_inos; /* # inos belonging to new child realm */ + __le32 num_split_realms; /* # child realms udner new child realm */ + __le32 trace_len; /* size of snap trace blob */ +} __attribute__ ((packed)); +/* followed by split ino list, then split realms, then the trace blob */ + +/* + * encode info about a snaprealm, as viewed by a client + */ +struct ceph_mds_snap_realm { + __le64 ino; /* ino */ + __le64 created; /* snap: when created */ + __le64 parent; /* ino: parent realm */ + __le64 parent_since; /* snap: same parent since */ + __le64 seq; /* snap: version */ + __le32 num_snaps; + __le32 num_prior_parent_snaps; +} __attribute__ ((packed)); +/* followed by my snap list, then prior parent snap list */ + +#endif diff --git a/include/linux/ceph/ceph_hash.h b/include/linux/ceph/ceph_hash.h new file mode 100644 index 00000000000..d099c3f9023 --- /dev/null +++ b/include/linux/ceph/ceph_hash.h @@ -0,0 +1,13 @@ +#ifndef FS_CEPH_HASH_H +#define FS_CEPH_HASH_H + +#define CEPH_STR_HASH_LINUX 0x1 /* linux dcache hash */ +#define CEPH_STR_HASH_RJENKINS 0x2 /* robert jenkins' */ + +extern unsigned ceph_str_hash_linux(const char *s, unsigned len); +extern unsigned ceph_str_hash_rjenkins(const char *s, unsigned len); + +extern unsigned ceph_str_hash(int type, const char *s, unsigned len); +extern const char *ceph_str_hash_name(int type); + +#endif diff --git a/include/linux/ceph/debugfs.h b/include/linux/ceph/debugfs.h new file mode 100644 index 00000000000..2a79702e092 --- /dev/null +++ b/include/linux/ceph/debugfs.h @@ -0,0 +1,33 @@ +#ifndef _FS_CEPH_DEBUGFS_H +#define _FS_CEPH_DEBUGFS_H + +#include "ceph_debug.h" +#include "types.h" + +#define CEPH_DEFINE_SHOW_FUNC(name) \ +static int name##_open(struct inode *inode, struct file *file) \ +{ \ + struct seq_file *sf; \ + int ret; \ + \ + ret = single_open(file, name, NULL); \ + sf = file->private_data; \ + sf->private = inode->i_private; \ + return ret; \ +} \ + \ +static const struct file_operations name##_fops = { \ + .open = name##_open, \ + .read = seq_read, \ + .llseek = seq_lseek, \ + .release = single_release, \ +}; + +/* debugfs.c */ +extern int ceph_debugfs_init(void); +extern void ceph_debugfs_cleanup(void); +extern int ceph_debugfs_client_init(struct ceph_client *client); +extern void ceph_debugfs_client_cleanup(struct ceph_client *client); + +#endif + diff --git a/include/linux/ceph/decode.h b/include/linux/ceph/decode.h new file mode 100644 index 00000000000..c5b6939fb32 --- /dev/null +++ b/include/linux/ceph/decode.h @@ -0,0 +1,201 @@ +#ifndef __CEPH_DECODE_H +#define __CEPH_DECODE_H + +#include +#include + +#include "types.h" + +/* + * in all cases, + * void **p pointer to position pointer + * void *end pointer to end of buffer (last byte + 1) + */ + +static inline u64 ceph_decode_64(void **p) +{ + u64 v = get_unaligned_le64(*p); + *p += sizeof(u64); + return v; +} +static inline u32 ceph_decode_32(void **p) +{ + u32 v = get_unaligned_le32(*p); + *p += sizeof(u32); + return v; +} +static inline u16 ceph_decode_16(void **p) +{ + u16 v = get_unaligned_le16(*p); + *p += sizeof(u16); + return v; +} +static inline u8 ceph_decode_8(void **p) +{ + u8 v = *(u8 *)*p; + (*p)++; + return v; +} +static inline void ceph_decode_copy(void **p, void *pv, size_t n) +{ + memcpy(pv, *p, n); + *p += n; +} + +/* + * bounds check input. + */ +#define ceph_decode_need(p, end, n, bad) \ + do { \ + if (unlikely(*(p) + (n) > (end))) \ + goto bad; \ + } while (0) + +#define ceph_decode_64_safe(p, end, v, bad) \ + do { \ + ceph_decode_need(p, end, sizeof(u64), bad); \ + v = ceph_decode_64(p); \ + } while (0) +#define ceph_decode_32_safe(p, end, v, bad) \ + do { \ + ceph_decode_need(p, end, sizeof(u32), bad); \ + v = ceph_decode_32(p); \ + } while (0) +#define ceph_decode_16_safe(p, end, v, bad) \ + do { \ + ceph_decode_need(p, end, sizeof(u16), bad); \ + v = ceph_decode_16(p); \ + } while (0) +#define ceph_decode_8_safe(p, end, v, bad) \ + do { \ + ceph_decode_need(p, end, sizeof(u8), bad); \ + v = ceph_decode_8(p); \ + } while (0) + +#define ceph_decode_copy_safe(p, end, pv, n, bad) \ + do { \ + ceph_decode_need(p, end, n, bad); \ + ceph_decode_copy(p, pv, n); \ + } while (0) + +/* + * struct ceph_timespec <-> struct timespec + */ +static inline void ceph_decode_timespec(struct timespec *ts, + const struct ceph_timespec *tv) +{ + ts->tv_sec = le32_to_cpu(tv->tv_sec); + ts->tv_nsec = le32_to_cpu(tv->tv_nsec); +} +static inline void ceph_encode_timespec(struct ceph_timespec *tv, + const struct timespec *ts) +{ + tv->tv_sec = cpu_to_le32(ts->tv_sec); + tv->tv_nsec = cpu_to_le32(ts->tv_nsec); +} + +/* + * sockaddr_storage <-> ceph_sockaddr + */ +static inline void ceph_encode_addr(struct ceph_entity_addr *a) +{ + __be16 ss_family = htons(a->in_addr.ss_family); + a->in_addr.ss_family = *(__u16 *)&ss_family; +} +static inline void ceph_decode_addr(struct ceph_entity_addr *a) +{ + __be16 ss_family = *(__be16 *)&a->in_addr.ss_family; + a->in_addr.ss_family = ntohs(ss_family); + WARN_ON(a->in_addr.ss_family == 512); +} + +/* + * encoders + */ +static inline void ceph_encode_64(void **p, u64 v) +{ + put_unaligned_le64(v, (__le64 *)*p); + *p += sizeof(u64); +} +static inline void ceph_encode_32(void **p, u32 v) +{ + put_unaligned_le32(v, (__le32 *)*p); + *p += sizeof(u32); +} +static inline void ceph_encode_16(void **p, u16 v) +{ + put_unaligned_le16(v, (__le16 *)*p); + *p += sizeof(u16); +} +static inline void ceph_encode_8(void **p, u8 v) +{ + *(u8 *)*p = v; + (*p)++; +} +static inline void ceph_encode_copy(void **p, const void *s, int len) +{ + memcpy(*p, s, len); + *p += len; +} + +/* + * filepath, string encoders + */ +static inline void ceph_encode_filepath(void **p, void *end, + u64 ino, const char *path) +{ + u32 len = path ? strlen(path) : 0; + BUG_ON(*p + sizeof(ino) + sizeof(len) + len > end); + ceph_encode_8(p, 1); + ceph_encode_64(p, ino); + ceph_encode_32(p, len); + if (len) + memcpy(*p, path, len); + *p += len; +} + +static inline void ceph_encode_string(void **p, void *end, + const char *s, u32 len) +{ + BUG_ON(*p + sizeof(len) + len > end); + ceph_encode_32(p, len); + if (len) + memcpy(*p, s, len); + *p += len; +} + +#define ceph_encode_need(p, end, n, bad) \ + do { \ + if (unlikely(*(p) + (n) > (end))) \ + goto bad; \ + } while (0) + +#define ceph_encode_64_safe(p, end, v, bad) \ + do { \ + ceph_encode_need(p, end, sizeof(u64), bad); \ + ceph_encode_64(p, v); \ + } while (0) +#define ceph_encode_32_safe(p, end, v, bad) \ + do { \ + ceph_encode_need(p, end, sizeof(u32), bad); \ + ceph_encode_32(p, v); \ + } while (0) +#define ceph_encode_16_safe(p, end, v, bad) \ + do { \ + ceph_encode_need(p, end, sizeof(u16), bad); \ + ceph_encode_16(p, v); \ + } while (0) + +#define ceph_encode_copy_safe(p, end, pv, n, bad) \ + do { \ + ceph_encode_need(p, end, n, bad); \ + ceph_encode_copy(p, pv, n); \ + } while (0) +#define ceph_encode_string_safe(p, end, s, n, bad) \ + do { \ + ceph_encode_need(p, end, n, bad); \ + ceph_encode_string(p, end, s, n); \ + } while (0) + + +#endif diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h new file mode 100644 index 00000000000..f22b2e94168 --- /dev/null +++ b/include/linux/ceph/libceph.h @@ -0,0 +1,249 @@ +#ifndef _FS_CEPH_LIBCEPH_H +#define _FS_CEPH_LIBCEPH_H + +#include "ceph_debug.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "types.h" +#include "messenger.h" +#include "msgpool.h" +#include "mon_client.h" +#include "osd_client.h" +#include "ceph_fs.h" + +/* + * Supported features + */ +#define CEPH_FEATURE_SUPPORTED_DEFAULT CEPH_FEATURE_NOSRCADDR +#define CEPH_FEATURE_REQUIRED_DEFAULT CEPH_FEATURE_NOSRCADDR + +/* + * mount options + */ +#define CEPH_OPT_FSID (1<<0) +#define CEPH_OPT_NOSHARE (1<<1) /* don't share client with other sbs */ +#define CEPH_OPT_MYIP (1<<2) /* specified my ip */ +#define CEPH_OPT_NOCRC (1<<3) /* no data crc on writes */ + +#define CEPH_OPT_DEFAULT (0); + +#define ceph_set_opt(client, opt) \ + (client)->options->flags |= CEPH_OPT_##opt; +#define ceph_test_opt(client, opt) \ + (!!((client)->options->flags & CEPH_OPT_##opt)) + +struct ceph_options { + int flags; + struct ceph_fsid fsid; + struct ceph_entity_addr my_addr; + int mount_timeout; + int osd_idle_ttl; + int osd_timeout; + int osd_keepalive_timeout; + + /* + * any type that can't be simply compared or doesn't need need + * to be compared should go beyond this point, + * ceph_compare_options() should be updated accordingly + */ + + struct ceph_entity_addr *mon_addr; /* should be the first + pointer type of args */ + int num_mon; + char *name; + char *secret; +}; + +/* + * defaults + */ +#define CEPH_MOUNT_TIMEOUT_DEFAULT 60 +#define CEPH_OSD_TIMEOUT_DEFAULT 60 /* seconds */ +#define CEPH_OSD_KEEPALIVE_DEFAULT 5 +#define CEPH_OSD_IDLE_TTL_DEFAULT 60 +#define CEPH_MOUNT_RSIZE_DEFAULT (512*1024) /* readahead */ + +#define CEPH_MSG_MAX_FRONT_LEN (16*1024*1024) +#define CEPH_MSG_MAX_DATA_LEN (16*1024*1024) + +#define CEPH_AUTH_NAME_DEFAULT "guest" + +/* + * Delay telling the MDS we no longer want caps, in case we reopen + * the file. Delay a minimum amount of time, even if we send a cap + * message for some other reason. Otherwise, take the oppotunity to + * update the mds to avoid sending another message later. + */ +#define CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT 5 /* cap release delay */ +#define CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT 60 /* cap release delay */ + +#define CEPH_CAP_RELEASE_SAFETY_DEFAULT (CEPH_CAPS_PER_RELEASE * 4) + +/* mount state */ +enum { + CEPH_MOUNT_MOUNTING, + CEPH_MOUNT_MOUNTED, + CEPH_MOUNT_UNMOUNTING, + CEPH_MOUNT_UNMOUNTED, + CEPH_MOUNT_SHUTDOWN, +}; + +/* + * subtract jiffies + */ +static inline unsigned long time_sub(unsigned long a, unsigned long b) +{ + BUG_ON(time_after(b, a)); + return (long)a - (long)b; +} + +struct ceph_mds_client; + +/* + * per client state + * + * possibly shared by multiple mount points, if they are + * mounting the same ceph filesystem/cluster. + */ +struct ceph_client { + struct ceph_fsid fsid; + bool have_fsid; + + void *private; + + struct ceph_options *options; + + struct mutex mount_mutex; /* serialize mount attempts */ + wait_queue_head_t auth_wq; + int auth_err; + + int (*extra_mon_dispatch)(struct ceph_client *, struct ceph_msg *); + + u32 supported_features; + u32 required_features; + + struct ceph_messenger *msgr; /* messenger instance */ + struct ceph_mon_client monc; + struct ceph_osd_client osdc; + +#ifdef CONFIG_DEBUG_FS + struct dentry *debugfs_dir; + struct dentry *debugfs_monmap; + struct dentry *debugfs_osdmap; +#endif +}; + + + +/* + * snapshots + */ + +/* + * A "snap context" is the set of existing snapshots when we + * write data. It is used by the OSD to guide its COW behavior. + * + * The ceph_snap_context is refcounted, and attached to each dirty + * page, indicating which context the dirty data belonged when it was + * dirtied. + */ +struct ceph_snap_context { + atomic_t nref; + u64 seq; + int num_snaps; + u64 snaps[]; +}; + +static inline struct ceph_snap_context * +ceph_get_snap_context(struct ceph_snap_context *sc) +{ + /* + printk("get_snap_context %p %d -> %d\n", sc, atomic_read(&sc->nref), + atomic_read(&sc->nref)+1); + */ + if (sc) + atomic_inc(&sc->nref); + return sc; +} + +static inline void ceph_put_snap_context(struct ceph_snap_context *sc) +{ + if (!sc) + return; + /* + printk("put_snap_context %p %d -> %d\n", sc, atomic_read(&sc->nref), + atomic_read(&sc->nref)-1); + */ + if (atomic_dec_and_test(&sc->nref)) { + /*printk(" deleting snap_context %p\n", sc);*/ + kfree(sc); + } +} + +/* + * calculate the number of pages a given length and offset map onto, + * if we align the data. + */ +static inline int calc_pages_for(u64 off, u64 len) +{ + return ((off+len+PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT) - + (off >> PAGE_CACHE_SHIFT); +} + +/* ceph_common.c */ +extern const char *ceph_msg_type_name(int type); +extern int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid); +extern struct kmem_cache *ceph_inode_cachep; +extern struct kmem_cache *ceph_cap_cachep; +extern struct kmem_cache *ceph_dentry_cachep; +extern struct kmem_cache *ceph_file_cachep; + +extern int ceph_parse_options(struct ceph_options **popt, char *options, + const char *dev_name, const char *dev_name_end, + int (*parse_extra_token)(char *c, void *private), + void *private); +extern void ceph_destroy_options(struct ceph_options *opt); +extern int ceph_compare_options(struct ceph_options *new_opt, + struct ceph_client *client); +extern struct ceph_client *ceph_create_client(struct ceph_options *opt, + void *private); +extern u64 ceph_client_id(struct ceph_client *client); +extern void ceph_destroy_client(struct ceph_client *client); +extern int __ceph_open_session(struct ceph_client *client, + unsigned long started); +extern int ceph_open_session(struct ceph_client *client); + +/* pagevec.c */ +extern void ceph_release_page_vector(struct page **pages, int num_pages); + +extern struct page **ceph_get_direct_page_vector(const char __user *data, + int num_pages, + loff_t off, size_t len); +extern void ceph_put_page_vector(struct page **pages, int num_pages); +extern void ceph_release_page_vector(struct page **pages, int num_pages); +extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags); +extern int ceph_copy_user_to_page_vector(struct page **pages, + const char __user *data, + loff_t off, size_t len); +extern int ceph_copy_to_page_vector(struct page **pages, + const char *data, + loff_t off, size_t len); +extern int ceph_copy_from_page_vector(struct page **pages, + char *data, + loff_t off, size_t len); +extern int ceph_copy_page_vector_to_user(struct page **pages, char __user *data, + loff_t off, size_t len); +extern void ceph_zero_page_vector_range(int off, int len, struct page **pages); + + +#endif /* _FS_CEPH_SUPER_H */ diff --git a/include/linux/ceph/mdsmap.h b/include/linux/ceph/mdsmap.h new file mode 100644 index 00000000000..4c5cb0880bb --- /dev/null +++ b/include/linux/ceph/mdsmap.h @@ -0,0 +1,62 @@ +#ifndef _FS_CEPH_MDSMAP_H +#define _FS_CEPH_MDSMAP_H + +#include "types.h" + +/* + * mds map - describe servers in the mds cluster. + * + * we limit fields to those the client actually xcares about + */ +struct ceph_mds_info { + u64 global_id; + struct ceph_entity_addr addr; + s32 state; + int num_export_targets; + bool laggy; + u32 *export_targets; +}; + +struct ceph_mdsmap { + u32 m_epoch, m_client_epoch, m_last_failure; + u32 m_root; + u32 m_session_timeout; /* seconds */ + u32 m_session_autoclose; /* seconds */ + u64 m_max_file_size; + u32 m_max_mds; /* size of m_addr, m_state arrays */ + struct ceph_mds_info *m_info; + + /* which object pools file data can be stored in */ + int m_num_data_pg_pools; + u32 *m_data_pg_pools; + u32 m_cas_pg_pool; +}; + +static inline struct ceph_entity_addr * +ceph_mdsmap_get_addr(struct ceph_mdsmap *m, int w) +{ + if (w >= m->m_max_mds) + return NULL; + return &m->m_info[w].addr; +} + +static inline int ceph_mdsmap_get_state(struct ceph_mdsmap *m, int w) +{ + BUG_ON(w < 0); + if (w >= m->m_max_mds) + return CEPH_MDS_STATE_DNE; + return m->m_info[w].state; +} + +static inline bool ceph_mdsmap_is_laggy(struct ceph_mdsmap *m, int w) +{ + if (w >= 0 && w < m->m_max_mds) + return m->m_info[w].laggy; + return false; +} + +extern int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m); +extern struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end); +extern void ceph_mdsmap_destroy(struct ceph_mdsmap *m); + +#endif diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h new file mode 100644 index 00000000000..5956d62c305 --- /dev/null +++ b/include/linux/ceph/messenger.h @@ -0,0 +1,261 @@ +#ifndef __FS_CEPH_MESSENGER_H +#define __FS_CEPH_MESSENGER_H + +#include +#include +#include +#include +#include +#include +#include + +#include "types.h" +#include "buffer.h" + +struct ceph_msg; +struct ceph_connection; + +extern struct workqueue_struct *ceph_msgr_wq; /* receive work queue */ + +/* + * Ceph defines these callbacks for handling connection events. + */ +struct ceph_connection_operations { + struct ceph_connection *(*get)(struct ceph_connection *); + void (*put)(struct ceph_connection *); + + /* handle an incoming message. */ + void (*dispatch) (struct ceph_connection *con, struct ceph_msg *m); + + /* authorize an outgoing connection */ + int (*get_authorizer) (struct ceph_connection *con, + void **buf, int *len, int *proto, + void **reply_buf, int *reply_len, int force_new); + int (*verify_authorizer_reply) (struct ceph_connection *con, int len); + int (*invalidate_authorizer)(struct ceph_connection *con); + + /* protocol version mismatch */ + void (*bad_proto) (struct ceph_connection *con); + + /* there was some error on the socket (disconnect, whatever) */ + void (*fault) (struct ceph_connection *con); + + /* a remote host as terminated a message exchange session, and messages + * we sent (or they tried to send us) may be lost. */ + void (*peer_reset) (struct ceph_connection *con); + + struct ceph_msg * (*alloc_msg) (struct ceph_connection *con, + struct ceph_msg_header *hdr, + int *skip); +}; + +/* use format string %s%d */ +#define ENTITY_NAME(n) ceph_entity_type_name((n).type), le64_to_cpu((n).num) + +struct ceph_messenger { + struct ceph_entity_inst inst; /* my name+address */ + struct ceph_entity_addr my_enc_addr; + struct page *zero_page; /* used in certain error cases */ + + bool nocrc; + + /* + * the global_seq counts connections i (attempt to) initiate + * in order to disambiguate certain connect race conditions. + */ + u32 global_seq; + spinlock_t global_seq_lock; + + u32 supported_features; + u32 required_features; +}; + +/* + * a single message. it contains a header (src, dest, message type, etc.), + * footer (crc values, mainly), a "front" message body, and possibly a + * data payload (stored in some number of pages). + */ +struct ceph_msg { + struct ceph_msg_header hdr; /* header */ + struct ceph_msg_footer footer; /* footer */ + struct kvec front; /* unaligned blobs of message */ + struct ceph_buffer *middle; + struct page **pages; /* data payload. NOT OWNER. */ + unsigned nr_pages; /* size of page array */ + struct ceph_pagelist *pagelist; /* instead of pages */ + struct list_head list_head; + struct kref kref; + struct bio *bio; /* instead of pages/pagelist */ + struct bio *bio_iter; /* bio iterator */ + int bio_seg; /* current bio segment */ + struct ceph_pagelist *trail; /* the trailing part of the data */ + bool front_is_vmalloc; + bool more_to_follow; + bool needs_out_seq; + int front_max; + + struct ceph_msgpool *pool; +}; + +struct ceph_msg_pos { + int page, page_pos; /* which page; offset in page */ + int data_pos; /* offset in data payload */ + int did_page_crc; /* true if we've calculated crc for current page */ +}; + +/* ceph connection fault delay defaults, for exponential backoff */ +#define BASE_DELAY_INTERVAL (HZ/2) +#define MAX_DELAY_INTERVAL (5 * 60 * HZ) + +/* + * ceph_connection state bit flags + * + * QUEUED and BUSY are used together to ensure that only a single + * thread is currently opening, reading or writing data to the socket. + */ +#define LOSSYTX 0 /* we can close channel or drop messages on errors */ +#define CONNECTING 1 +#define NEGOTIATING 2 +#define KEEPALIVE_PENDING 3 +#define WRITE_PENDING 4 /* we have data ready to send */ +#define QUEUED 5 /* there is work queued on this connection */ +#define BUSY 6 /* work is being done */ +#define STANDBY 8 /* no outgoing messages, socket closed. we keep + * the ceph_connection around to maintain shared + * state with the peer. */ +#define CLOSED 10 /* we've closed the connection */ +#define SOCK_CLOSED 11 /* socket state changed to closed */ +#define OPENING 13 /* open connection w/ (possibly new) peer */ +#define DEAD 14 /* dead, about to kfree */ + +/* + * A single connection with another host. + * + * We maintain a queue of outgoing messages, and some session state to + * ensure that we can preserve the lossless, ordered delivery of + * messages in the case of a TCP disconnect. + */ +struct ceph_connection { + void *private; + atomic_t nref; + + const struct ceph_connection_operations *ops; + + struct ceph_messenger *msgr; + struct socket *sock; + unsigned long state; /* connection state (see flags above) */ + const char *error_msg; /* error message, if any */ + + struct ceph_entity_addr peer_addr; /* peer address */ + struct ceph_entity_name peer_name; /* peer name */ + struct ceph_entity_addr peer_addr_for_me; + unsigned peer_features; + u32 connect_seq; /* identify the most recent connection + attempt for this connection, client */ + u32 peer_global_seq; /* peer's global seq for this connection */ + + int auth_retry; /* true if we need a newer authorizer */ + void *auth_reply_buf; /* where to put the authorizer reply */ + int auth_reply_buf_len; + + struct mutex mutex; + + /* out queue */ + struct list_head out_queue; + struct list_head out_sent; /* sending or sent but unacked */ + u64 out_seq; /* last message queued for send */ + bool out_keepalive_pending; + + u64 in_seq, in_seq_acked; /* last message received, acked */ + + /* connection negotiation temps */ + char in_banner[CEPH_BANNER_MAX_LEN]; + union { + struct { /* outgoing connection */ + struct ceph_msg_connect out_connect; + struct ceph_msg_connect_reply in_reply; + }; + struct { /* incoming */ + struct ceph_msg_connect in_connect; + struct ceph_msg_connect_reply out_reply; + }; + }; + struct ceph_entity_addr actual_peer_addr; + + /* message out temps */ + struct ceph_msg *out_msg; /* sending message (== tail of + out_sent) */ + bool out_msg_done; + struct ceph_msg_pos out_msg_pos; + + struct kvec out_kvec[8], /* sending header/footer data */ + *out_kvec_cur; + int out_kvec_left; /* kvec's left in out_kvec */ + int out_skip; /* skip this many bytes */ + int out_kvec_bytes; /* total bytes left */ + bool out_kvec_is_msg; /* kvec refers to out_msg */ + int out_more; /* there is more data after the kvecs */ + __le64 out_temp_ack; /* for writing an ack */ + + /* message in temps */ + struct ceph_msg_header in_hdr; + struct ceph_msg *in_msg; + struct ceph_msg_pos in_msg_pos; + u32 in_front_crc, in_middle_crc, in_data_crc; /* calculated crc */ + + char in_tag; /* protocol control byte */ + int in_base_pos; /* bytes read */ + __le64 in_temp_ack; /* for reading an ack */ + + struct delayed_work work; /* send|recv work */ + unsigned long delay; /* current delay interval */ +}; + + +extern const char *ceph_pr_addr(const struct sockaddr_storage *ss); +extern int ceph_parse_ips(const char *c, const char *end, + struct ceph_entity_addr *addr, + int max_count, int *count); + + +extern int ceph_msgr_init(void); +extern void ceph_msgr_exit(void); +extern void ceph_msgr_flush(void); + +extern struct ceph_messenger *ceph_messenger_create( + struct ceph_entity_addr *myaddr, + u32 features, u32 required); +extern void ceph_messenger_destroy(struct ceph_messenger *); + +extern void ceph_con_init(struct ceph_messenger *msgr, + struct ceph_connection *con); +extern void ceph_con_open(struct ceph_connection *con, + struct ceph_entity_addr *addr); +extern bool ceph_con_opened(struct ceph_connection *con); +extern void ceph_con_close(struct ceph_connection *con); +extern void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg); +extern void ceph_con_revoke(struct ceph_connection *con, struct ceph_msg *msg); +extern void ceph_con_revoke_message(struct ceph_connection *con, + struct ceph_msg *msg); +extern void ceph_con_keepalive(struct ceph_connection *con); +extern struct ceph_connection *ceph_con_get(struct ceph_connection *con); +extern void ceph_con_put(struct ceph_connection *con); + +extern struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags); +extern void ceph_msg_kfree(struct ceph_msg *m); + + +static inline struct ceph_msg *ceph_msg_get(struct ceph_msg *msg) +{ + kref_get(&msg->kref); + return msg; +} +extern void ceph_msg_last_put(struct kref *kref); +static inline void ceph_msg_put(struct ceph_msg *msg) +{ + kref_put(&msg->kref, ceph_msg_last_put); +} + +extern void ceph_msg_dump(struct ceph_msg *msg); + +#endif diff --git a/include/linux/ceph/mon_client.h b/include/linux/ceph/mon_client.h new file mode 100644 index 00000000000..545f8591778 --- /dev/null +++ b/include/linux/ceph/mon_client.h @@ -0,0 +1,122 @@ +#ifndef _FS_CEPH_MON_CLIENT_H +#define _FS_CEPH_MON_CLIENT_H + +#include +#include +#include + +#include "messenger.h" + +struct ceph_client; +struct ceph_mount_args; +struct ceph_auth_client; + +/* + * The monitor map enumerates the set of all monitors. + */ +struct ceph_monmap { + struct ceph_fsid fsid; + u32 epoch; + u32 num_mon; + struct ceph_entity_inst mon_inst[0]; +}; + +struct ceph_mon_client; +struct ceph_mon_generic_request; + + +/* + * Generic mechanism for resending monitor requests. + */ +typedef void (*ceph_monc_request_func_t)(struct ceph_mon_client *monc, + int newmon); + +/* a pending monitor request */ +struct ceph_mon_request { + struct ceph_mon_client *monc; + struct delayed_work delayed_work; + unsigned long delay; + ceph_monc_request_func_t do_request; +}; + +/* + * ceph_mon_generic_request is being used for the statfs and poolop requests + * which are bening done a bit differently because we need to get data back + * to the caller + */ +struct ceph_mon_generic_request { + struct kref kref; + u64 tid; + struct rb_node node; + int result; + void *buf; + int buf_len; + struct completion completion; + struct ceph_msg *request; /* original request */ + struct ceph_msg *reply; /* and reply */ +}; + +struct ceph_mon_client { + struct ceph_client *client; + struct ceph_monmap *monmap; + + struct mutex mutex; + struct delayed_work delayed_work; + + struct ceph_auth_client *auth; + struct ceph_msg *m_auth, *m_auth_reply, *m_subscribe, *m_subscribe_ack; + int pending_auth; + + bool hunting; + int cur_mon; /* last monitor i contacted */ + unsigned long sub_sent, sub_renew_after; + struct ceph_connection *con; + bool have_fsid; + + /* pending generic requests */ + struct rb_root generic_request_tree; + int num_generic_requests; + u64 last_tid; + + /* mds/osd map */ + int want_mdsmap; + int want_next_osdmap; /* 1 = want, 2 = want+asked */ + u32 have_osdmap, have_mdsmap; + +#ifdef CONFIG_DEBUG_FS + struct dentry *debugfs_file; +#endif +}; + +extern struct ceph_monmap *ceph_monmap_decode(void *p, void *end); +extern int ceph_monmap_contains(struct ceph_monmap *m, + struct ceph_entity_addr *addr); + +extern int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl); +extern void ceph_monc_stop(struct ceph_mon_client *monc); + +/* + * The model here is to indicate that we need a new map of at least + * epoch @want, and also call in when we receive a map. We will + * periodically rerequest the map from the monitor cluster until we + * get what we want. + */ +extern int ceph_monc_got_mdsmap(struct ceph_mon_client *monc, u32 have); +extern int ceph_monc_got_osdmap(struct ceph_mon_client *monc, u32 have); + +extern void ceph_monc_request_next_osdmap(struct ceph_mon_client *monc); + +extern int ceph_monc_do_statfs(struct ceph_mon_client *monc, + struct ceph_statfs *buf); + +extern int ceph_monc_open_session(struct ceph_mon_client *monc); + +extern int ceph_monc_validate_auth(struct ceph_mon_client *monc); + +extern int ceph_monc_create_snapid(struct ceph_mon_client *monc, + u32 pool, u64 *snapid); + +extern int ceph_monc_delete_snapid(struct ceph_mon_client *monc, + u32 pool, u64 snapid); + +#endif diff --git a/include/linux/ceph/msgpool.h b/include/linux/ceph/msgpool.h new file mode 100644 index 00000000000..a362605f936 --- /dev/null +++ b/include/linux/ceph/msgpool.h @@ -0,0 +1,25 @@ +#ifndef _FS_CEPH_MSGPOOL +#define _FS_CEPH_MSGPOOL + +#include +#include "messenger.h" + +/* + * we use memory pools for preallocating messages we may receive, to + * avoid unexpected OOM conditions. + */ +struct ceph_msgpool { + const char *name; + mempool_t *pool; + int front_len; /* preallocated payload size */ +}; + +extern int ceph_msgpool_init(struct ceph_msgpool *pool, + int front_len, int size, bool blocking, + const char *name); +extern void ceph_msgpool_destroy(struct ceph_msgpool *pool); +extern struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *, + int front_len); +extern void ceph_msgpool_put(struct ceph_msgpool *, struct ceph_msg *); + +#endif diff --git a/include/linux/ceph/msgr.h b/include/linux/ceph/msgr.h new file mode 100644 index 00000000000..680d3d648ca --- /dev/null +++ b/include/linux/ceph/msgr.h @@ -0,0 +1,175 @@ +#ifndef CEPH_MSGR_H +#define CEPH_MSGR_H + +/* + * Data types for message passing layer used by Ceph. + */ + +#define CEPH_MON_PORT 6789 /* default monitor port */ + +/* + * client-side processes will try to bind to ports in this + * range, simply for the benefit of tools like nmap or wireshark + * that would like to identify the protocol. + */ +#define CEPH_PORT_FIRST 6789 +#define CEPH_PORT_START 6800 /* non-monitors start here */ +#define CEPH_PORT_LAST 6900 + +/* + * tcp connection banner. include a protocol version. and adjust + * whenever the wire protocol changes. try to keep this string length + * constant. + */ +#define CEPH_BANNER "ceph v027" +#define CEPH_BANNER_MAX_LEN 30 + + +/* + * Rollover-safe type and comparator for 32-bit sequence numbers. + * Comparator returns -1, 0, or 1. + */ +typedef __u32 ceph_seq_t; + +static inline __s32 ceph_seq_cmp(__u32 a, __u32 b) +{ + return (__s32)a - (__s32)b; +} + + +/* + * entity_name -- logical name for a process participating in the + * network, e.g. 'mds0' or 'osd3'. + */ +struct ceph_entity_name { + __u8 type; /* CEPH_ENTITY_TYPE_* */ + __le64 num; +} __attribute__ ((packed)); + +#define CEPH_ENTITY_TYPE_MON 0x01 +#define CEPH_ENTITY_TYPE_MDS 0x02 +#define CEPH_ENTITY_TYPE_OSD 0x04 +#define CEPH_ENTITY_TYPE_CLIENT 0x08 +#define CEPH_ENTITY_TYPE_AUTH 0x20 + +#define CEPH_ENTITY_TYPE_ANY 0xFF + +extern const char *ceph_entity_type_name(int type); + +/* + * entity_addr -- network address + */ +struct ceph_entity_addr { + __le32 type; + __le32 nonce; /* unique id for process (e.g. pid) */ + struct sockaddr_storage in_addr; +} __attribute__ ((packed)); + +struct ceph_entity_inst { + struct ceph_entity_name name; + struct ceph_entity_addr addr; +} __attribute__ ((packed)); + + +/* used by message exchange protocol */ +#define CEPH_MSGR_TAG_READY 1 /* server->client: ready for messages */ +#define CEPH_MSGR_TAG_RESETSESSION 2 /* server->client: reset, try again */ +#define CEPH_MSGR_TAG_WAIT 3 /* server->client: wait for racing + incoming connection */ +#define CEPH_MSGR_TAG_RETRY_SESSION 4 /* server->client + cseq: try again + with higher cseq */ +#define CEPH_MSGR_TAG_RETRY_GLOBAL 5 /* server->client + gseq: try again + with higher gseq */ +#define CEPH_MSGR_TAG_CLOSE 6 /* closing pipe */ +#define CEPH_MSGR_TAG_MSG 7 /* message */ +#define CEPH_MSGR_TAG_ACK 8 /* message ack */ +#define CEPH_MSGR_TAG_KEEPALIVE 9 /* just a keepalive byte! */ +#define CEPH_MSGR_TAG_BADPROTOVER 10 /* bad protocol version */ +#define CEPH_MSGR_TAG_BADAUTHORIZER 11 /* bad authorizer */ +#define CEPH_MSGR_TAG_FEATURES 12 /* insufficient features */ + + +/* + * connection negotiation + */ +struct ceph_msg_connect { + __le64 features; /* supported feature bits */ + __le32 host_type; /* CEPH_ENTITY_TYPE_* */ + __le32 global_seq; /* count connections initiated by this host */ + __le32 connect_seq; /* count connections initiated in this session */ + __le32 protocol_version; + __le32 authorizer_protocol; + __le32 authorizer_len; + __u8 flags; /* CEPH_MSG_CONNECT_* */ +} __attribute__ ((packed)); + +struct ceph_msg_connect_reply { + __u8 tag; + __le64 features; /* feature bits for this session */ + __le32 global_seq; + __le32 connect_seq; + __le32 protocol_version; + __le32 authorizer_len; + __u8 flags; +} __attribute__ ((packed)); + +#define CEPH_MSG_CONNECT_LOSSY 1 /* messages i send may be safely dropped */ + + +/* + * message header + */ +struct ceph_msg_header_old { + __le64 seq; /* message seq# for this session */ + __le64 tid; /* transaction id */ + __le16 type; /* message type */ + __le16 priority; /* priority. higher value == higher priority */ + __le16 version; /* version of message encoding */ + + __le32 front_len; /* bytes in main payload */ + __le32 middle_len;/* bytes in middle payload */ + __le32 data_len; /* bytes of data payload */ + __le16 data_off; /* sender: include full offset; + receiver: mask against ~PAGE_MASK */ + + struct ceph_entity_inst src, orig_src; + __le32 reserved; + __le32 crc; /* header crc32c */ +} __attribute__ ((packed)); + +struct ceph_msg_header { + __le64 seq; /* message seq# for this session */ + __le64 tid; /* transaction id */ + __le16 type; /* message type */ + __le16 priority; /* priority. higher value == higher priority */ + __le16 version; /* version of message encoding */ + + __le32 front_len; /* bytes in main payload */ + __le32 middle_len;/* bytes in middle payload */ + __le32 data_len; /* bytes of data payload */ + __le16 data_off; /* sender: include full offset; + receiver: mask against ~PAGE_MASK */ + + struct ceph_entity_name src; + __le32 reserved; + __le32 crc; /* header crc32c */ +} __attribute__ ((packed)); + +#define CEPH_MSG_PRIO_LOW 64 +#define CEPH_MSG_PRIO_DEFAULT 127 +#define CEPH_MSG_PRIO_HIGH 196 +#define CEPH_MSG_PRIO_HIGHEST 255 + +/* + * follows data payload + */ +struct ceph_msg_footer { + __le32 front_crc, middle_crc, data_crc; + __u8 flags; +} __attribute__ ((packed)); + +#define CEPH_MSG_FOOTER_COMPLETE (1<<0) /* msg wasn't aborted */ +#define CEPH_MSG_FOOTER_NOCRC (1<<1) /* no data crc */ + + +#endif diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h new file mode 100644 index 00000000000..6c91fb032c3 --- /dev/null +++ b/include/linux/ceph/osd_client.h @@ -0,0 +1,234 @@ +#ifndef _FS_CEPH_OSD_CLIENT_H +#define _FS_CEPH_OSD_CLIENT_H + +#include +#include +#include +#include + +#include "types.h" +#include "osdmap.h" +#include "messenger.h" + +struct ceph_msg; +struct ceph_snap_context; +struct ceph_osd_request; +struct ceph_osd_client; +struct ceph_authorizer; +struct ceph_pagelist; + +/* + * completion callback for async writepages + */ +typedef void (*ceph_osdc_callback_t)(struct ceph_osd_request *, + struct ceph_msg *); + +/* a given osd we're communicating with */ +struct ceph_osd { + atomic_t o_ref; + struct ceph_osd_client *o_osdc; + int o_osd; + int o_incarnation; + struct rb_node o_node; + struct ceph_connection o_con; + struct list_head o_requests; + struct list_head o_osd_lru; + struct ceph_authorizer *o_authorizer; + void *o_authorizer_buf, *o_authorizer_reply_buf; + size_t o_authorizer_buf_len, o_authorizer_reply_buf_len; + unsigned long lru_ttl; + int o_marked_for_keepalive; + struct list_head o_keepalive_item; +}; + +/* an in-flight request */ +struct ceph_osd_request { + u64 r_tid; /* unique for this client */ + struct rb_node r_node; + struct list_head r_req_lru_item; + struct list_head r_osd_item; + struct ceph_osd *r_osd; + struct ceph_pg r_pgid; + int r_pg_osds[CEPH_PG_MAX_SIZE]; + int r_num_pg_osds; + + struct ceph_connection *r_con_filling_msg; + + struct ceph_msg *r_request, *r_reply; + int r_result; + int r_flags; /* any additional flags for the osd */ + u32 r_sent; /* >0 if r_request is sending/sent */ + int r_got_reply; + + struct ceph_osd_client *r_osdc; + struct kref r_kref; + bool r_mempool; + struct completion r_completion, r_safe_completion; + ceph_osdc_callback_t r_callback, r_safe_callback; + struct ceph_eversion r_reassert_version; + struct list_head r_unsafe_item; + + struct inode *r_inode; /* for use by callbacks */ + void *r_priv; /* ditto */ + + char r_oid[40]; /* object name */ + int r_oid_len; + unsigned long r_stamp; /* send OR check time */ + bool r_resend; /* msg send failed, needs retry */ + + struct ceph_file_layout r_file_layout; + struct ceph_snap_context *r_snapc; /* snap context for writes */ + unsigned r_num_pages; /* size of page array (follows) */ + struct page **r_pages; /* pages for data payload */ + int r_pages_from_pool; + int r_own_pages; /* if true, i own page list */ +#ifdef CONFIG_BLOCK + struct bio *r_bio; /* instead of pages */ +#endif + + struct ceph_pagelist *r_trail; /* trailing part of the data */ +}; + +struct ceph_osd_client { + struct ceph_client *client; + + struct ceph_osdmap *osdmap; /* current map */ + struct rw_semaphore map_sem; + struct completion map_waiters; + u64 last_requested_map; + + struct mutex request_mutex; + struct rb_root osds; /* osds */ + struct list_head osd_lru; /* idle osds */ + u64 timeout_tid; /* tid of timeout triggering rq */ + u64 last_tid; /* tid of last request */ + struct rb_root requests; /* pending requests */ + struct list_head req_lru; /* pending requests lru */ + int num_requests; + struct delayed_work timeout_work; + struct delayed_work osds_timeout_work; +#ifdef CONFIG_DEBUG_FS + struct dentry *debugfs_file; +#endif + + mempool_t *req_mempool; + + struct ceph_msgpool msgpool_op; + struct ceph_msgpool msgpool_op_reply; +}; + +struct ceph_osd_req_op { + u16 op; /* CEPH_OSD_OP_* */ + u32 flags; /* CEPH_OSD_FLAG_* */ + union { + struct { + u64 offset, length; + u64 truncate_size; + u32 truncate_seq; + } extent; + struct { + const char *name; + u32 name_len; + const char *val; + u32 value_len; + __u8 cmp_op; /* CEPH_OSD_CMPXATTR_OP_* */ + __u8 cmp_mode; /* CEPH_OSD_CMPXATTR_MODE_* */ + } xattr; + struct { + const char *class_name; + __u8 class_len; + const char *method_name; + __u8 method_len; + __u8 argc; + const char *indata; + u32 indata_len; + } cls; + struct { + u64 cookie, count; + } pgls; + struct { + u64 snapid; + } snap; + }; + u32 payload_len; +}; + +extern int ceph_osdc_init(struct ceph_osd_client *osdc, + struct ceph_client *client); +extern void ceph_osdc_stop(struct ceph_osd_client *osdc); + +extern void ceph_osdc_handle_reply(struct ceph_osd_client *osdc, + struct ceph_msg *msg); +extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc, + struct ceph_msg *msg); + +extern void ceph_calc_raw_layout(struct ceph_osd_client *osdc, + struct ceph_file_layout *layout, + u64 snapid, + u64 off, u64 *plen, u64 *bno, + struct ceph_osd_request *req, + struct ceph_osd_req_op *op); + +extern struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, + int flags, + struct ceph_snap_context *snapc, + struct ceph_osd_req_op *ops, + bool use_mempool, + gfp_t gfp_flags, + struct page **pages, + struct bio *bio); + +extern void ceph_osdc_build_request(struct ceph_osd_request *req, + u64 off, u64 *plen, + struct ceph_osd_req_op *src_ops, + struct ceph_snap_context *snapc, + struct timespec *mtime, + const char *oid, + int oid_len); + +extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *, + struct ceph_file_layout *layout, + struct ceph_vino vino, + u64 offset, u64 *len, int op, int flags, + struct ceph_snap_context *snapc, + int do_sync, u32 truncate_seq, + u64 truncate_size, + struct timespec *mtime, + bool use_mempool, int num_reply); + +static inline void ceph_osdc_get_request(struct ceph_osd_request *req) +{ + kref_get(&req->r_kref); +} +extern void ceph_osdc_release_request(struct kref *kref); +static inline void ceph_osdc_put_request(struct ceph_osd_request *req) +{ + kref_put(&req->r_kref, ceph_osdc_release_request); +} + +extern int ceph_osdc_start_request(struct ceph_osd_client *osdc, + struct ceph_osd_request *req, + bool nofail); +extern int ceph_osdc_wait_request(struct ceph_osd_client *osdc, + struct ceph_osd_request *req); +extern void ceph_osdc_sync(struct ceph_osd_client *osdc); + +extern int ceph_osdc_readpages(struct ceph_osd_client *osdc, + struct ceph_vino vino, + struct ceph_file_layout *layout, + u64 off, u64 *plen, + u32 truncate_seq, u64 truncate_size, + struct page **pages, int nr_pages); + +extern int ceph_osdc_writepages(struct ceph_osd_client *osdc, + struct ceph_vino vino, + struct ceph_file_layout *layout, + struct ceph_snap_context *sc, + u64 off, u64 len, + u32 truncate_seq, u64 truncate_size, + struct timespec *mtime, + struct page **pages, int nr_pages, + int flags, int do_sync, bool nofail); + +#endif + diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h new file mode 100644 index 00000000000..ba4c205cbb0 --- /dev/null +++ b/include/linux/ceph/osdmap.h @@ -0,0 +1,130 @@ +#ifndef _FS_CEPH_OSDMAP_H +#define _FS_CEPH_OSDMAP_H + +#include +#include "types.h" +#include "ceph_fs.h" +#include + +/* + * The osd map describes the current membership of the osd cluster and + * specifies the mapping of objects to placement groups and placement + * groups to (sets of) osds. That is, it completely specifies the + * (desired) distribution of all data objects in the system at some + * point in time. + * + * Each map version is identified by an epoch, which increases monotonically. + * + * The map can be updated either via an incremental map (diff) describing + * the change between two successive epochs, or as a fully encoded map. + */ +struct ceph_pg_pool_info { + struct rb_node node; + int id; + struct ceph_pg_pool v; + int pg_num_mask, pgp_num_mask, lpg_num_mask, lpgp_num_mask; + char *name; +}; + +struct ceph_pg_mapping { + struct rb_node node; + struct ceph_pg pgid; + int len; + int osds[]; +}; + +struct ceph_osdmap { + struct ceph_fsid fsid; + u32 epoch; + u32 mkfs_epoch; + struct ceph_timespec created, modified; + + u32 flags; /* CEPH_OSDMAP_* */ + + u32 max_osd; /* size of osd_state, _offload, _addr arrays */ + u8 *osd_state; /* CEPH_OSD_* */ + u32 *osd_weight; /* 0 = failed, 0x10000 = 100% normal */ + struct ceph_entity_addr *osd_addr; + + struct rb_root pg_temp; + struct rb_root pg_pools; + u32 pool_max; + + /* the CRUSH map specifies the mapping of placement groups to + * the list of osds that store+replicate them. */ + struct crush_map *crush; +}; + +/* + * file layout helpers + */ +#define ceph_file_layout_su(l) ((__s32)le32_to_cpu((l).fl_stripe_unit)) +#define ceph_file_layout_stripe_count(l) \ + ((__s32)le32_to_cpu((l).fl_stripe_count)) +#define ceph_file_layout_object_size(l) ((__s32)le32_to_cpu((l).fl_object_size)) +#define ceph_file_layout_cas_hash(l) ((__s32)le32_to_cpu((l).fl_cas_hash)) +#define ceph_file_layout_object_su(l) \ + ((__s32)le32_to_cpu((l).fl_object_stripe_unit)) +#define ceph_file_layout_pg_preferred(l) \ + ((__s32)le32_to_cpu((l).fl_pg_preferred)) +#define ceph_file_layout_pg_pool(l) \ + ((__s32)le32_to_cpu((l).fl_pg_pool)) + +static inline unsigned ceph_file_layout_stripe_width(struct ceph_file_layout *l) +{ + return le32_to_cpu(l->fl_stripe_unit) * + le32_to_cpu(l->fl_stripe_count); +} + +/* "period" == bytes before i start on a new set of objects */ +static inline unsigned ceph_file_layout_period(struct ceph_file_layout *l) +{ + return le32_to_cpu(l->fl_object_size) * + le32_to_cpu(l->fl_stripe_count); +} + + +static inline int ceph_osd_is_up(struct ceph_osdmap *map, int osd) +{ + return (osd < map->max_osd) && (map->osd_state[osd] & CEPH_OSD_UP); +} + +static inline bool ceph_osdmap_flag(struct ceph_osdmap *map, int flag) +{ + return map && (map->flags & flag); +} + +extern char *ceph_osdmap_state_str(char *str, int len, int state); + +static inline struct ceph_entity_addr *ceph_osd_addr(struct ceph_osdmap *map, + int osd) +{ + if (osd >= map->max_osd) + return NULL; + return &map->osd_addr[osd]; +} + +extern struct ceph_osdmap *osdmap_decode(void **p, void *end); +extern struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, + struct ceph_osdmap *map, + struct ceph_messenger *msgr); +extern void ceph_osdmap_destroy(struct ceph_osdmap *map); + +/* calculate mapping of a file extent to an object */ +extern void ceph_calc_file_object_mapping(struct ceph_file_layout *layout, + u64 off, u64 *plen, + u64 *bno, u64 *oxoff, u64 *oxlen); + +/* calculate mapping of object to a placement group */ +extern int ceph_calc_object_layout(struct ceph_object_layout *ol, + const char *oid, + struct ceph_file_layout *fl, + struct ceph_osdmap *osdmap); +extern int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid, + int *acting); +extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, + struct ceph_pg pgid); + +extern int ceph_pg_poolid_by_name(struct ceph_osdmap *map, const char *name); + +#endif diff --git a/include/linux/ceph/pagelist.h b/include/linux/ceph/pagelist.h new file mode 100644 index 00000000000..cc9327aa1c9 --- /dev/null +++ b/include/linux/ceph/pagelist.h @@ -0,0 +1,54 @@ +#ifndef __FS_CEPH_PAGELIST_H +#define __FS_CEPH_PAGELIST_H + +#include + +struct ceph_pagelist { + struct list_head head; + void *mapped_tail; + size_t length; + size_t room; +}; + +static inline void ceph_pagelist_init(struct ceph_pagelist *pl) +{ + INIT_LIST_HEAD(&pl->head); + pl->mapped_tail = NULL; + pl->length = 0; + pl->room = 0; +} +extern int ceph_pagelist_release(struct ceph_pagelist *pl); + +extern int ceph_pagelist_append(struct ceph_pagelist *pl, const void *d, size_t l); + +static inline int ceph_pagelist_encode_64(struct ceph_pagelist *pl, u64 v) +{ + __le64 ev = cpu_to_le64(v); + return ceph_pagelist_append(pl, &ev, sizeof(ev)); +} +static inline int ceph_pagelist_encode_32(struct ceph_pagelist *pl, u32 v) +{ + __le32 ev = cpu_to_le32(v); + return ceph_pagelist_append(pl, &ev, sizeof(ev)); +} +static inline int ceph_pagelist_encode_16(struct ceph_pagelist *pl, u16 v) +{ + __le16 ev = cpu_to_le16(v); + return ceph_pagelist_append(pl, &ev, sizeof(ev)); +} +static inline int ceph_pagelist_encode_8(struct ceph_pagelist *pl, u8 v) +{ + return ceph_pagelist_append(pl, &v, 1); +} +static inline int ceph_pagelist_encode_string(struct ceph_pagelist *pl, + char *s, size_t len) +{ + int ret = ceph_pagelist_encode_32(pl, len); + if (ret) + return ret; + if (len) + return ceph_pagelist_append(pl, s, len); + return 0; +} + +#endif diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h new file mode 100644 index 00000000000..6d5247f2e81 --- /dev/null +++ b/include/linux/ceph/rados.h @@ -0,0 +1,405 @@ +#ifndef CEPH_RADOS_H +#define CEPH_RADOS_H + +/* + * Data types for the Ceph distributed object storage layer RADOS + * (Reliable Autonomic Distributed Object Store). + */ + +#include "msgr.h" + +/* + * osdmap encoding versions + */ +#define CEPH_OSDMAP_INC_VERSION 5 +#define CEPH_OSDMAP_INC_VERSION_EXT 5 +#define CEPH_OSDMAP_VERSION 5 +#define CEPH_OSDMAP_VERSION_EXT 5 + +/* + * fs id + */ +struct ceph_fsid { + unsigned char fsid[16]; +}; + +static inline int ceph_fsid_compare(const struct ceph_fsid *a, + const struct ceph_fsid *b) +{ + return memcmp(a, b, sizeof(*a)); +} + +/* + * ino, object, etc. + */ +typedef __le64 ceph_snapid_t; +#define CEPH_SNAPDIR ((__u64)(-1)) /* reserved for hidden .snap dir */ +#define CEPH_NOSNAP ((__u64)(-2)) /* "head", "live" revision */ +#define CEPH_MAXSNAP ((__u64)(-3)) /* largest valid snapid */ + +struct ceph_timespec { + __le32 tv_sec; + __le32 tv_nsec; +} __attribute__ ((packed)); + + +/* + * object layout - how objects are mapped into PGs + */ +#define CEPH_OBJECT_LAYOUT_HASH 1 +#define CEPH_OBJECT_LAYOUT_LINEAR 2 +#define CEPH_OBJECT_LAYOUT_HASHINO 3 + +/* + * pg layout -- how PGs are mapped onto (sets of) OSDs + */ +#define CEPH_PG_LAYOUT_CRUSH 0 +#define CEPH_PG_LAYOUT_HASH 1 +#define CEPH_PG_LAYOUT_LINEAR 2 +#define CEPH_PG_LAYOUT_HYBRID 3 + +#define CEPH_PG_MAX_SIZE 16 /* max # osds in a single pg */ + +/* + * placement group. + * we encode this into one __le64. + */ +struct ceph_pg { + __le16 preferred; /* preferred primary osd */ + __le16 ps; /* placement seed */ + __le32 pool; /* object pool */ +} __attribute__ ((packed)); + +/* + * pg_pool is a set of pgs storing a pool of objects + * + * pg_num -- base number of pseudorandomly placed pgs + * + * pgp_num -- effective number when calculating pg placement. this + * is used for pg_num increases. new pgs result in data being "split" + * into new pgs. for this to proceed smoothly, new pgs are intiially + * colocated with their parents; that is, pgp_num doesn't increase + * until the new pgs have successfully split. only _then_ are the new + * pgs placed independently. + * + * lpg_num -- localized pg count (per device). replicas are randomly + * selected. + * + * lpgp_num -- as above. + */ +#define CEPH_PG_TYPE_REP 1 +#define CEPH_PG_TYPE_RAID4 2 +#define CEPH_PG_POOL_VERSION 2 +struct ceph_pg_pool { + __u8 type; /* CEPH_PG_TYPE_* */ + __u8 size; /* number of osds in each pg */ + __u8 crush_ruleset; /* crush placement rule */ + __u8 object_hash; /* hash mapping object name to ps */ + __le32 pg_num, pgp_num; /* number of pg's */ + __le32 lpg_num, lpgp_num; /* number of localized pg's */ + __le32 last_change; /* most recent epoch changed */ + __le64 snap_seq; /* seq for per-pool snapshot */ + __le32 snap_epoch; /* epoch of last snap */ + __le32 num_snaps; + __le32 num_removed_snap_intervals; /* if non-empty, NO per-pool snaps */ + __le64 auid; /* who owns the pg */ +} __attribute__ ((packed)); + +/* + * stable_mod func is used to control number of placement groups. + * similar to straight-up modulo, but produces a stable mapping as b + * increases over time. b is the number of bins, and bmask is the + * containing power of 2 minus 1. + * + * b <= bmask and bmask=(2**n)-1 + * e.g., b=12 -> bmask=15, b=123 -> bmask=127 + */ +static inline int ceph_stable_mod(int x, int b, int bmask) +{ + if ((x & bmask) < b) + return x & bmask; + else + return x & (bmask >> 1); +} + +/* + * object layout - how a given object should be stored. + */ +struct ceph_object_layout { + struct ceph_pg ol_pgid; /* raw pg, with _full_ ps precision. */ + __le32 ol_stripe_unit; /* for per-object parity, if any */ +} __attribute__ ((packed)); + +/* + * compound epoch+version, used by storage layer to serialize mutations + */ +struct ceph_eversion { + __le32 epoch; + __le64 version; +} __attribute__ ((packed)); + +/* + * osd map bits + */ + +/* status bits */ +#define CEPH_OSD_EXISTS 1 +#define CEPH_OSD_UP 2 + +/* osd weights. fixed point value: 0x10000 == 1.0 ("in"), 0 == "out" */ +#define CEPH_OSD_IN 0x10000 +#define CEPH_OSD_OUT 0 + + +/* + * osd map flag bits + */ +#define CEPH_OSDMAP_NEARFULL (1<<0) /* sync writes (near ENOSPC) */ +#define CEPH_OSDMAP_FULL (1<<1) /* no data writes (ENOSPC) */ +#define CEPH_OSDMAP_PAUSERD (1<<2) /* pause all reads */ +#define CEPH_OSDMAP_PAUSEWR (1<<3) /* pause all writes */ +#define CEPH_OSDMAP_PAUSEREC (1<<4) /* pause recovery */ + +/* + * osd ops + */ +#define CEPH_OSD_OP_MODE 0xf000 +#define CEPH_OSD_OP_MODE_RD 0x1000 +#define CEPH_OSD_OP_MODE_WR 0x2000 +#define CEPH_OSD_OP_MODE_RMW 0x3000 +#define CEPH_OSD_OP_MODE_SUB 0x4000 + +#define CEPH_OSD_OP_TYPE 0x0f00 +#define CEPH_OSD_OP_TYPE_LOCK 0x0100 +#define CEPH_OSD_OP_TYPE_DATA 0x0200 +#define CEPH_OSD_OP_TYPE_ATTR 0x0300 +#define CEPH_OSD_OP_TYPE_EXEC 0x0400 +#define CEPH_OSD_OP_TYPE_PG 0x0500 + +enum { + /** data **/ + /* read */ + CEPH_OSD_OP_READ = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 1, + CEPH_OSD_OP_STAT = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 2, + + /* fancy read */ + CEPH_OSD_OP_MASKTRUNC = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 4, + + /* write */ + CEPH_OSD_OP_WRITE = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 1, + CEPH_OSD_OP_WRITEFULL = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 2, + CEPH_OSD_OP_TRUNCATE = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 3, + CEPH_OSD_OP_ZERO = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 4, + CEPH_OSD_OP_DELETE = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 5, + + /* fancy write */ + CEPH_OSD_OP_APPEND = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 6, + CEPH_OSD_OP_STARTSYNC = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 7, + CEPH_OSD_OP_SETTRUNC = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 8, + CEPH_OSD_OP_TRIMTRUNC = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 9, + + CEPH_OSD_OP_TMAPUP = CEPH_OSD_OP_MODE_RMW | CEPH_OSD_OP_TYPE_DATA | 10, + CEPH_OSD_OP_TMAPPUT = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 11, + CEPH_OSD_OP_TMAPGET = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 12, + + CEPH_OSD_OP_CREATE = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 13, + CEPH_OSD_OP_ROLLBACK= CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 14, + + /** attrs **/ + /* read */ + CEPH_OSD_OP_GETXATTR = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_ATTR | 1, + CEPH_OSD_OP_GETXATTRS = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_ATTR | 2, + CEPH_OSD_OP_CMPXATTR = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_ATTR | 3, + + /* write */ + CEPH_OSD_OP_SETXATTR = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_ATTR | 1, + CEPH_OSD_OP_SETXATTRS = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_ATTR | 2, + CEPH_OSD_OP_RESETXATTRS = CEPH_OSD_OP_MODE_WR|CEPH_OSD_OP_TYPE_ATTR | 3, + CEPH_OSD_OP_RMXATTR = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_ATTR | 4, + + /** subop **/ + CEPH_OSD_OP_PULL = CEPH_OSD_OP_MODE_SUB | 1, + CEPH_OSD_OP_PUSH = CEPH_OSD_OP_MODE_SUB | 2, + CEPH_OSD_OP_BALANCEREADS = CEPH_OSD_OP_MODE_SUB | 3, + CEPH_OSD_OP_UNBALANCEREADS = CEPH_OSD_OP_MODE_SUB | 4, + CEPH_OSD_OP_SCRUB = CEPH_OSD_OP_MODE_SUB | 5, + + /** lock **/ + CEPH_OSD_OP_WRLOCK = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 1, + CEPH_OSD_OP_WRUNLOCK = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 2, + CEPH_OSD_OP_RDLOCK = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 3, + CEPH_OSD_OP_RDUNLOCK = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 4, + CEPH_OSD_OP_UPLOCK = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 5, + CEPH_OSD_OP_DNLOCK = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 6, + + /** exec **/ + CEPH_OSD_OP_CALL = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_EXEC | 1, + + /** pg **/ + CEPH_OSD_OP_PGLS = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_PG | 1, +}; + +static inline int ceph_osd_op_type_lock(int op) +{ + return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_LOCK; +} +static inline int ceph_osd_op_type_data(int op) +{ + return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_DATA; +} +static inline int ceph_osd_op_type_attr(int op) +{ + return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_ATTR; +} +static inline int ceph_osd_op_type_exec(int op) +{ + return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_EXEC; +} +static inline int ceph_osd_op_type_pg(int op) +{ + return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_PG; +} + +static inline int ceph_osd_op_mode_subop(int op) +{ + return (op & CEPH_OSD_OP_MODE) == CEPH_OSD_OP_MODE_SUB; +} +static inline int ceph_osd_op_mode_read(int op) +{ + return (op & CEPH_OSD_OP_MODE) == CEPH_OSD_OP_MODE_RD; +} +static inline int ceph_osd_op_mode_modify(int op) +{ + return (op & CEPH_OSD_OP_MODE) == CEPH_OSD_OP_MODE_WR; +} + +/* + * note that the following tmap stuff is also defined in the ceph librados.h + * any modification here needs to be updated there + */ +#define CEPH_OSD_TMAP_HDR 'h' +#define CEPH_OSD_TMAP_SET 's' +#define CEPH_OSD_TMAP_RM 'r' + +extern const char *ceph_osd_op_name(int op); + + +/* + * osd op flags + * + * An op may be READ, WRITE, or READ|WRITE. + */ +enum { + CEPH_OSD_FLAG_ACK = 1, /* want (or is) "ack" ack */ + CEPH_OSD_FLAG_ONNVRAM = 2, /* want (or is) "onnvram" ack */ + CEPH_OSD_FLAG_ONDISK = 4, /* want (or is) "ondisk" ack */ + CEPH_OSD_FLAG_RETRY = 8, /* resend attempt */ + CEPH_OSD_FLAG_READ = 16, /* op may read */ + CEPH_OSD_FLAG_WRITE = 32, /* op may write */ + CEPH_OSD_FLAG_ORDERSNAP = 64, /* EOLDSNAP if snapc is out of order */ + CEPH_OSD_FLAG_PEERSTAT = 128, /* msg includes osd_peer_stat */ + CEPH_OSD_FLAG_BALANCE_READS = 256, + CEPH_OSD_FLAG_PARALLELEXEC = 512, /* execute op in parallel */ + CEPH_OSD_FLAG_PGOP = 1024, /* pg op, no object */ + CEPH_OSD_FLAG_EXEC = 2048, /* op may exec */ + CEPH_OSD_FLAG_EXEC_PUBLIC = 4096, /* op may exec (public) */ +}; + +enum { + CEPH_OSD_OP_FLAG_EXCL = 1, /* EXCL object create */ +}; + +#define EOLDSNAPC ERESTART /* ORDERSNAP flag set; writer has old snapc*/ +#define EBLACKLISTED ESHUTDOWN /* blacklisted */ + +/* xattr comparison */ +enum { + CEPH_OSD_CMPXATTR_OP_NOP = 0, + CEPH_OSD_CMPXATTR_OP_EQ = 1, + CEPH_OSD_CMPXATTR_OP_NE = 2, + CEPH_OSD_CMPXATTR_OP_GT = 3, + CEPH_OSD_CMPXATTR_OP_GTE = 4, + CEPH_OSD_CMPXATTR_OP_LT = 5, + CEPH_OSD_CMPXATTR_OP_LTE = 6 +}; + +enum { + CEPH_OSD_CMPXATTR_MODE_STRING = 1, + CEPH_OSD_CMPXATTR_MODE_U64 = 2 +}; + +/* + * an individual object operation. each may be accompanied by some data + * payload + */ +struct ceph_osd_op { + __le16 op; /* CEPH_OSD_OP_* */ + __le32 flags; /* CEPH_OSD_FLAG_* */ + union { + struct { + __le64 offset, length; + __le64 truncate_size; + __le32 truncate_seq; + } __attribute__ ((packed)) extent; + struct { + __le32 name_len; + __le32 value_len; + __u8 cmp_op; /* CEPH_OSD_CMPXATTR_OP_* */ + __u8 cmp_mode; /* CEPH_OSD_CMPXATTR_MODE_* */ + } __attribute__ ((packed)) xattr; + struct { + __u8 class_len; + __u8 method_len; + __u8 argc; + __le32 indata_len; + } __attribute__ ((packed)) cls; + struct { + __le64 cookie, count; + } __attribute__ ((packed)) pgls; + struct { + __le64 snapid; + } __attribute__ ((packed)) snap; + }; + __le32 payload_len; +} __attribute__ ((packed)); + +/* + * osd request message header. each request may include multiple + * ceph_osd_op object operations. + */ +struct ceph_osd_request_head { + __le32 client_inc; /* client incarnation */ + struct ceph_object_layout layout; /* pgid */ + __le32 osdmap_epoch; /* client's osdmap epoch */ + + __le32 flags; + + struct ceph_timespec mtime; /* for mutations only */ + struct ceph_eversion reassert_version; /* if we are replaying op */ + + __le32 object_len; /* length of object name */ + + __le64 snapid; /* snapid to read */ + __le64 snap_seq; /* writer's snap context */ + __le32 num_snaps; + + __le16 num_ops; + struct ceph_osd_op ops[]; /* followed by ops[], obj, ticket, snaps */ +} __attribute__ ((packed)); + +struct ceph_osd_reply_head { + __le32 client_inc; /* client incarnation */ + __le32 flags; + struct ceph_object_layout layout; + __le32 osdmap_epoch; + struct ceph_eversion reassert_version; /* for replaying uncommitted */ + + __le32 result; /* result code */ + + __le32 object_len; /* length of object name */ + __le32 num_ops; + struct ceph_osd_op ops[0]; /* ops[], object */ +} __attribute__ ((packed)); + + +#endif diff --git a/include/linux/ceph/types.h b/include/linux/ceph/types.h new file mode 100644 index 00000000000..28b35a005ec --- /dev/null +++ b/include/linux/ceph/types.h @@ -0,0 +1,29 @@ +#ifndef _FS_CEPH_TYPES_H +#define _FS_CEPH_TYPES_H + +/* needed before including ceph_fs.h */ +#include +#include +#include +#include + +#include "ceph_fs.h" +#include "ceph_frag.h" +#include "ceph_hash.h" + +/* + * Identify inodes by both their ino AND snapshot id (a u64). + */ +struct ceph_vino { + u64 ino; + u64 snap; +}; + + +/* context for the caps reservation mechanism */ +struct ceph_cap_reservation { + int count; +}; + + +#endif diff --git a/include/linux/crush/crush.h b/include/linux/crush/crush.h new file mode 100644 index 00000000000..97e435b191f --- /dev/null +++ b/include/linux/crush/crush.h @@ -0,0 +1,180 @@ +#ifndef CEPH_CRUSH_CRUSH_H +#define CEPH_CRUSH_CRUSH_H + +#include + +/* + * CRUSH is a pseudo-random data distribution algorithm that + * efficiently distributes input values (typically, data objects) + * across a heterogeneous, structured storage cluster. + * + * The algorithm was originally described in detail in this paper + * (although the algorithm has evolved somewhat since then): + * + * http://www.ssrc.ucsc.edu/Papers/weil-sc06.pdf + * + * LGPL2 + */ + + +#define CRUSH_MAGIC 0x00010000ul /* for detecting algorithm revisions */ + + +#define CRUSH_MAX_DEPTH 10 /* max crush hierarchy depth */ +#define CRUSH_MAX_SET 10 /* max size of a mapping result */ + + +/* + * CRUSH uses user-defined "rules" to describe how inputs should be + * mapped to devices. A rule consists of sequence of steps to perform + * to generate the set of output devices. + */ +struct crush_rule_step { + __u32 op; + __s32 arg1; + __s32 arg2; +}; + +/* step op codes */ +enum { + CRUSH_RULE_NOOP = 0, + CRUSH_RULE_TAKE = 1, /* arg1 = value to start with */ + CRUSH_RULE_CHOOSE_FIRSTN = 2, /* arg1 = num items to pick */ + /* arg2 = type */ + CRUSH_RULE_CHOOSE_INDEP = 3, /* same */ + CRUSH_RULE_EMIT = 4, /* no args */ + CRUSH_RULE_CHOOSE_LEAF_FIRSTN = 6, + CRUSH_RULE_CHOOSE_LEAF_INDEP = 7, +}; + +/* + * for specifying choose num (arg1) relative to the max parameter + * passed to do_rule + */ +#define CRUSH_CHOOSE_N 0 +#define CRUSH_CHOOSE_N_MINUS(x) (-(x)) + +/* + * The rule mask is used to describe what the rule is intended for. + * Given a ruleset and size of output set, we search through the + * rule list for a matching rule_mask. + */ +struct crush_rule_mask { + __u8 ruleset; + __u8 type; + __u8 min_size; + __u8 max_size; +}; + +struct crush_rule { + __u32 len; + struct crush_rule_mask mask; + struct crush_rule_step steps[0]; +}; + +#define crush_rule_size(len) (sizeof(struct crush_rule) + \ + (len)*sizeof(struct crush_rule_step)) + + + +/* + * A bucket is a named container of other items (either devices or + * other buckets). Items within a bucket are chosen using one of a + * few different algorithms. The table summarizes how the speed of + * each option measures up against mapping stability when items are + * added or removed. + * + * Bucket Alg Speed Additions Removals + * ------------------------------------------------ + * uniform O(1) poor poor + * list O(n) optimal poor + * tree O(log n) good good + * straw O(n) optimal optimal + */ +enum { + CRUSH_BUCKET_UNIFORM = 1, + CRUSH_BUCKET_LIST = 2, + CRUSH_BUCKET_TREE = 3, + CRUSH_BUCKET_STRAW = 4 +}; +extern const char *crush_bucket_alg_name(int alg); + +struct crush_bucket { + __s32 id; /* this'll be negative */ + __u16 type; /* non-zero; type=0 is reserved for devices */ + __u8 alg; /* one of CRUSH_BUCKET_* */ + __u8 hash; /* which hash function to use, CRUSH_HASH_* */ + __u32 weight; /* 16-bit fixed point */ + __u32 size; /* num items */ + __s32 *items; + + /* + * cached random permutation: used for uniform bucket and for + * the linear search fallback for the other bucket types. + */ + __u32 perm_x; /* @x for which *perm is defined */ + __u32 perm_n; /* num elements of *perm that are permuted/defined */ + __u32 *perm; +}; + +struct crush_bucket_uniform { + struct crush_bucket h; + __u32 item_weight; /* 16-bit fixed point; all items equally weighted */ +}; + +struct crush_bucket_list { + struct crush_bucket h; + __u32 *item_weights; /* 16-bit fixed point */ + __u32 *sum_weights; /* 16-bit fixed point. element i is sum + of weights 0..i, inclusive */ +}; + +struct crush_bucket_tree { + struct crush_bucket h; /* note: h.size is _tree_ size, not number of + actual items */ + __u8 num_nodes; + __u32 *node_weights; +}; + +struct crush_bucket_straw { + struct crush_bucket h; + __u32 *item_weights; /* 16-bit fixed point */ + __u32 *straws; /* 16-bit fixed point */ +}; + + + +/* + * CRUSH map includes all buckets, rules, etc. + */ +struct crush_map { + struct crush_bucket **buckets; + struct crush_rule **rules; + + /* + * Parent pointers to identify the parent bucket a device or + * bucket in the hierarchy. If an item appears more than + * once, this is the _last_ time it appeared (where buckets + * are processed in bucket id order, from -1 on down to + * -max_buckets. + */ + __u32 *bucket_parents; + __u32 *device_parents; + + __s32 max_buckets; + __u32 max_rules; + __s32 max_devices; +}; + + +/* crush.c */ +extern int crush_get_bucket_item_weight(struct crush_bucket *b, int pos); +extern void crush_calc_parents(struct crush_map *map); +extern void crush_destroy_bucket_uniform(struct crush_bucket_uniform *b); +extern void crush_destroy_bucket_list(struct crush_bucket_list *b); +extern void crush_destroy_bucket_tree(struct crush_bucket_tree *b); +extern void crush_destroy_bucket_straw(struct crush_bucket_straw *b); +extern void crush_destroy_bucket(struct crush_bucket *b); +extern void crush_destroy(struct crush_map *map); + +#endif diff --git a/include/linux/crush/hash.h b/include/linux/crush/hash.h new file mode 100644 index 00000000000..91e884230d5 --- /dev/null +++ b/include/linux/crush/hash.h @@ -0,0 +1,17 @@ +#ifndef CEPH_CRUSH_HASH_H +#define CEPH_CRUSH_HASH_H + +#define CRUSH_HASH_RJENKINS1 0 + +#define CRUSH_HASH_DEFAULT CRUSH_HASH_RJENKINS1 + +extern const char *crush_hash_name(int type); + +extern __u32 crush_hash32(int type, __u32 a); +extern __u32 crush_hash32_2(int type, __u32 a, __u32 b); +extern __u32 crush_hash32_3(int type, __u32 a, __u32 b, __u32 c); +extern __u32 crush_hash32_4(int type, __u32 a, __u32 b, __u32 c, __u32 d); +extern __u32 crush_hash32_5(int type, __u32 a, __u32 b, __u32 c, __u32 d, + __u32 e); + +#endif diff --git a/include/linux/crush/mapper.h b/include/linux/crush/mapper.h new file mode 100644 index 00000000000..c46b99c18bb --- /dev/null +++ b/include/linux/crush/mapper.h @@ -0,0 +1,20 @@ +#ifndef CEPH_CRUSH_MAPPER_H +#define CEPH_CRUSH_MAPPER_H + +/* + * CRUSH functions for find rules and then mapping an input to an + * output set. + * + * LGPL2 + */ + +#include "crush.h" + +extern int crush_find_rule(struct crush_map *map, int pool, int type, int size); +extern int crush_do_rule(struct crush_map *map, + int ruleno, + int x, int *result, int result_max, + int forcefeed, /* -1 for none */ + __u32 *weights); + +#endif -- cgit v1.2.3-18-g5258 From ac0b74d8a1ced8ea86147467daf06b15b130dd94 Mon Sep 17 00:00:00 2001 From: Greg Farnum Date: Fri, 17 Sep 2010 10:10:55 -0700 Subject: ceph: add pagelist_reserve, pagelist_truncate, pagelist_set_cursor These facilitate preallocation of pages so that we can encode into the pagelist in an atomic context. Signed-off-by: Greg Farnum Signed-off-by: Sage Weil --- include/linux/ceph/pagelist.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include') diff --git a/include/linux/ceph/pagelist.h b/include/linux/ceph/pagelist.h index cc9327aa1c9..9660d6b0a35 100644 --- a/include/linux/ceph/pagelist.h +++ b/include/linux/ceph/pagelist.h @@ -8,6 +8,14 @@ struct ceph_pagelist { void *mapped_tail; size_t length; size_t room; + struct list_head free_list; + size_t num_pages_free; +}; + +struct ceph_pagelist_cursor { + struct ceph_pagelist *pl; /* pagelist, for error checking */ + struct list_head *page_lru; /* page in list */ + size_t room; /* room remaining to reset to */ }; static inline void ceph_pagelist_init(struct ceph_pagelist *pl) @@ -16,11 +24,24 @@ static inline void ceph_pagelist_init(struct ceph_pagelist *pl) pl->mapped_tail = NULL; pl->length = 0; pl->room = 0; + INIT_LIST_HEAD(&pl->free_list); + pl->num_pages_free = 0; } + extern int ceph_pagelist_release(struct ceph_pagelist *pl); extern int ceph_pagelist_append(struct ceph_pagelist *pl, const void *d, size_t l); +extern int ceph_pagelist_reserve(struct ceph_pagelist *pl, size_t space); + +extern int ceph_pagelist_free_reserve(struct ceph_pagelist *pl); + +extern void ceph_pagelist_set_cursor(struct ceph_pagelist *pl, + struct ceph_pagelist_cursor *c); + +extern int ceph_pagelist_truncate(struct ceph_pagelist *pl, + struct ceph_pagelist_cursor *c); + static inline int ceph_pagelist_encode_64(struct ceph_pagelist *pl, u64 v) { __le64 ev = cpu_to_le64(v); -- cgit v1.2.3-18-g5258 From 571dba52a34015a5a7aa5d480a86936878444a6f Mon Sep 17 00:00:00 2001 From: Greg Farnum Date: Fri, 24 Sep 2010 14:56:40 -0700 Subject: ceph: add CEPH_MDS_OP_SETDIRLAYOUT and associated ioctl. Signed-off-by: Sage Weil --- include/linux/ceph/ceph_fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index d5619ac8671..c3c74aef289 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -299,6 +299,7 @@ enum { CEPH_MDS_OP_SETATTR = 0x01108, CEPH_MDS_OP_SETFILELOCK= 0x01109, CEPH_MDS_OP_GETFILELOCK= 0x00110, + CEPH_MDS_OP_SETDIRLAYOUT=0x0110a, CEPH_MDS_OP_MKNOD = 0x01201, CEPH_MDS_OP_LINK = 0x01202, -- cgit v1.2.3-18-g5258 From b0ae19811375031ae3b3fecc65b702a9c6e5cc28 Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Fri, 15 Oct 2010 04:21:18 +0900 Subject: security: remove unused parameter from security_task_setscheduler() All security modules shouldn't change sched_param parameter of security_task_setscheduler(). This is not only meaningless, but also make a harmful result if caller pass a static variable. This patch remove policy and sched_param parameter from security_task_setscheduler() becuase none of security module is using it. Cc: James Morris Signed-off-by: KOSAKI Motohiro Signed-off-by: James Morris --- include/linux/security.h | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/security.h b/include/linux/security.h index a22219afff0..294a0b22812 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -74,7 +74,7 @@ extern int cap_file_mmap(struct file *file, unsigned long reqprot, extern int cap_task_fix_setuid(struct cred *new, const struct cred *old, int flags); extern int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5); -extern int cap_task_setscheduler(struct task_struct *p, int policy, struct sched_param *lp); +extern int cap_task_setscheduler(struct task_struct *p); extern int cap_task_setioprio(struct task_struct *p, int ioprio); extern int cap_task_setnice(struct task_struct *p, int nice); extern int cap_syslog(int type, bool from_file); @@ -1501,8 +1501,7 @@ struct security_operations { int (*task_getioprio) (struct task_struct *p); int (*task_setrlimit) (struct task_struct *p, unsigned int resource, struct rlimit *new_rlim); - int (*task_setscheduler) (struct task_struct *p, int policy, - struct sched_param *lp); + int (*task_setscheduler) (struct task_struct *p); int (*task_getscheduler) (struct task_struct *p); int (*task_movememory) (struct task_struct *p); int (*task_kill) (struct task_struct *p, @@ -1752,8 +1751,7 @@ int security_task_setioprio(struct task_struct *p, int ioprio); int security_task_getioprio(struct task_struct *p); int security_task_setrlimit(struct task_struct *p, unsigned int resource, struct rlimit *new_rlim); -int security_task_setscheduler(struct task_struct *p, - int policy, struct sched_param *lp); +int security_task_setscheduler(struct task_struct *p); int security_task_getscheduler(struct task_struct *p); int security_task_movememory(struct task_struct *p); int security_task_kill(struct task_struct *p, struct siginfo *info, @@ -2320,11 +2318,9 @@ static inline int security_task_setrlimit(struct task_struct *p, return 0; } -static inline int security_task_setscheduler(struct task_struct *p, - int policy, - struct sched_param *lp) +static inline int security_task_setscheduler(struct task_struct *p) { - return cap_task_setscheduler(p, policy, lp); + return cap_task_setscheduler(p); } static inline int security_task_getscheduler(struct task_struct *p) -- cgit v1.2.3-18-g5258 From 2606fd1fa5710205b23ee859563502aa18362447 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Wed, 13 Oct 2010 16:24:41 -0400 Subject: secmark: make secmark object handling generic Right now secmark has lots of direct selinux calls. Use all LSM calls and remove all SELinux specific knowledge. The only SELinux specific knowledge we leave is the mode. The only point is to make sure that other LSMs at least test this generic code before they assume it works. (They may also have to make changes if they do not represent labels as strings) Signed-off-by: Eric Paris Acked-by: Paul Moore Acked-by: Patrick McHardy Signed-off-by: James Morris --- include/linux/netfilter/xt_SECMARK.h | 12 ++----- include/linux/security.h | 25 ++++++++++++++ include/linux/selinux.h | 63 ------------------------------------ 3 files changed, 28 insertions(+), 72 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/xt_SECMARK.h b/include/linux/netfilter/xt_SECMARK.h index 6fcd3448b18..989092bd627 100644 --- a/include/linux/netfilter/xt_SECMARK.h +++ b/include/linux/netfilter/xt_SECMARK.h @@ -11,18 +11,12 @@ * packets are being marked for. */ #define SECMARK_MODE_SEL 0x01 /* SELinux */ -#define SECMARK_SELCTX_MAX 256 - -struct xt_secmark_target_selinux_info { - __u32 selsid; - char selctx[SECMARK_SELCTX_MAX]; -}; +#define SECMARK_SECCTX_MAX 256 struct xt_secmark_target_info { __u8 mode; - union { - struct xt_secmark_target_selinux_info sel; - } u; + __u32 secid; + char secctx[SECMARK_SECCTX_MAX]; }; #endif /*_XT_SECMARK_H_target */ diff --git a/include/linux/security.h b/include/linux/security.h index 294a0b22812..d70adc394f6 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -959,6 +959,12 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * Sets the new child socket's sid to the openreq sid. * @inet_conn_established: * Sets the connection's peersid to the secmark on skb. + * @secmark_relabel_packet: + * check if the process should be allowed to relabel packets to the given secid + * @security_secmark_refcount_inc + * tells the LSM to increment the number of secmark labeling rules loaded + * @security_secmark_refcount_dec + * tells the LSM to decrement the number of secmark labeling rules loaded * @req_classify_flow: * Sets the flow's sid to the openreq sid. * @tun_dev_create: @@ -1593,6 +1599,9 @@ struct security_operations { struct request_sock *req); void (*inet_csk_clone) (struct sock *newsk, const struct request_sock *req); void (*inet_conn_established) (struct sock *sk, struct sk_buff *skb); + int (*secmark_relabel_packet) (u32 secid); + void (*secmark_refcount_inc) (void); + void (*secmark_refcount_dec) (void); void (*req_classify_flow) (const struct request_sock *req, struct flowi *fl); int (*tun_dev_create)(void); void (*tun_dev_post_create)(struct sock *sk); @@ -2547,6 +2556,9 @@ void security_inet_csk_clone(struct sock *newsk, const struct request_sock *req); void security_inet_conn_established(struct sock *sk, struct sk_buff *skb); +int security_secmark_relabel_packet(u32 secid); +void security_secmark_refcount_inc(void); +void security_secmark_refcount_dec(void); int security_tun_dev_create(void); void security_tun_dev_post_create(struct sock *sk); int security_tun_dev_attach(struct sock *sk); @@ -2701,6 +2713,19 @@ static inline void security_inet_conn_established(struct sock *sk, { } +static inline int security_secmark_relabel_packet(u32 secid) +{ + return 0; +} + +static inline void security_secmark_refcount_inc(void) +{ +} + +static inline void security_secmark_refcount_dec(void) +{ +} + static inline int security_tun_dev_create(void) { return 0; diff --git a/include/linux/selinux.h b/include/linux/selinux.h index 82e0f26a129..44f45961269 100644 --- a/include/linux/selinux.h +++ b/include/linux/selinux.h @@ -20,75 +20,12 @@ struct kern_ipc_perm; #ifdef CONFIG_SECURITY_SELINUX -/** - * selinux_string_to_sid - map a security context string to a security ID - * @str: the security context string to be mapped - * @sid: ID value returned via this. - * - * Returns 0 if successful, with the SID stored in sid. A value - * of zero for sid indicates no SID could be determined (but no error - * occurred). - */ -int selinux_string_to_sid(char *str, u32 *sid); - -/** - * selinux_secmark_relabel_packet_permission - secmark permission check - * @sid: SECMARK ID value to be applied to network packet - * - * Returns 0 if the current task is allowed to set the SECMARK label of - * packets with the supplied security ID. Note that it is implicit that - * the packet is always being relabeled from the default unlabeled value, - * and that the access control decision is made in the AVC. - */ -int selinux_secmark_relabel_packet_permission(u32 sid); - -/** - * selinux_secmark_refcount_inc - increments the secmark use counter - * - * SELinux keeps track of the current SECMARK targets in use so it knows - * when to apply SECMARK label access checks to network packets. This - * function incements this reference count to indicate that a new SECMARK - * target has been configured. - */ -void selinux_secmark_refcount_inc(void); - -/** - * selinux_secmark_refcount_dec - decrements the secmark use counter - * - * SELinux keeps track of the current SECMARK targets in use so it knows - * when to apply SECMARK label access checks to network packets. This - * function decements this reference count to indicate that one of the - * existing SECMARK targets has been removed/flushed. - */ -void selinux_secmark_refcount_dec(void); - /** * selinux_is_enabled - is SELinux enabled? */ bool selinux_is_enabled(void); #else -static inline int selinux_string_to_sid(const char *str, u32 *sid) -{ - *sid = 0; - return 0; -} - -static inline int selinux_secmark_relabel_packet_permission(u32 sid) -{ - return 0; -} - -static inline void selinux_secmark_refcount_inc(void) -{ - return; -} - -static inline void selinux_secmark_refcount_dec(void) -{ - return; -} - static inline bool selinux_is_enabled(void) { return false; -- cgit v1.2.3-18-g5258 From d5630b9d276bd389299ffea620b7c340ab19bcf5 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Wed, 13 Oct 2010 16:24:48 -0400 Subject: security: secid_to_secctx returns len when data is NULL With the (long ago) interface change to have the secid_to_secctx functions do the string allocation instead of having the caller do the allocation we lost the ability to query the security server for the length of the upcoming string. The SECMARK code would like to allocate a netlink skb with enough length to hold the string but it is just too unclean to do the string allocation twice or to do the allocation the first time and hold onto the string and slen. This patch adds the ability to call security_secid_to_secctx() with a NULL data pointer and it will just set the slen pointer. Signed-off-by: Eric Paris Reviewed-by: Paul Moore Signed-off-by: James Morris --- include/linux/security.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/security.h b/include/linux/security.h index d70adc394f6..b8246a8df7d 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1285,9 +1285,13 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * Return 0 if permission is granted. * * @secid_to_secctx: - * Convert secid to security context. + * Convert secid to security context. If secdata is NULL the length of + * the result will be returned in seclen, but no secdata will be returned. + * This does mean that the length could change between calls to check the + * length and the next call which actually allocates and returns the secdata. * @secid contains the security ID. * @secdata contains the pointer that stores the converted security context. + * @seclen pointer which contains the length of the data * @secctx_to_secid: * Convert security context to secid. * @secid contains the pointer to the generated security ID. -- cgit v1.2.3-18-g5258 From 1cc63249adfa957b34ca51effdee90ff8261d63f Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Wed, 13 Oct 2010 16:24:54 -0400 Subject: conntrack: export lsm context rather than internal secid via netlink The conntrack code can export the internal secid to userspace. These are dynamic, can change on lsm changes, and have no meaning in userspace. We should instead be sending lsm contexts to userspace instead. This patch sends the secctx (rather than secid) to userspace over the netlink socket. We use a new field CTA_SECCTX and stop using the the old CTA_SECMARK field since it did not send particularly useful information. Signed-off-by: Eric Paris Reviewed-by: Paul Moore Acked-by: Patrick McHardy Signed-off-by: James Morris --- include/linux/netfilter/nfnetlink_conntrack.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netfilter/nfnetlink_conntrack.h b/include/linux/netfilter/nfnetlink_conntrack.h index 9ed534c991b..70cd0603911 100644 --- a/include/linux/netfilter/nfnetlink_conntrack.h +++ b/include/linux/netfilter/nfnetlink_conntrack.h @@ -39,8 +39,9 @@ enum ctattr_type { CTA_TUPLE_MASTER, CTA_NAT_SEQ_ADJ_ORIG, CTA_NAT_SEQ_ADJ_REPLY, - CTA_SECMARK, + CTA_SECMARK, /* obsolete */ CTA_ZONE, + CTA_SECCTX, __CTA_MAX }; #define CTA_MAX (__CTA_MAX - 1) @@ -172,4 +173,11 @@ enum ctattr_help { }; #define CTA_HELP_MAX (__CTA_HELP_MAX - 1) +enum ctattr_secctx { + CTA_SECCTX_UNSPEC, + CTA_SECCTX_NAME, + __CTA_SECCTX_MAX +}; +#define CTA_SECCTX_MAX (__CTA_SECCTX_MAX - 1) + #endif /* _IPCONNTRACK_NETLINK_H */ -- cgit v1.2.3-18-g5258 From 686a0f3d71203bbfcc186900bbb8ac2cfc3d803c Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Wed, 13 Oct 2010 17:50:02 -0400 Subject: kernel: rounddown helper function The roundup() helper function will round a given value up to a multiple of another given value. aka roundup(11, 7) would give 14 = 7 * 2. This new function does the opposite. It will round a given number down to the nearest multiple of the second number: rounddown(11, 7) would give 7. I need this in some future SELinux code and can carry the macro myself, but figured I would put it in the core kernel so others might find and use it if need be. Signed-off-by: Eric Paris Signed-off-by: James Morris --- include/linux/kernel.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 2b0a35e6bc6..6d6eea7f7b1 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -59,6 +59,12 @@ extern const char linux_proc_banner[]; #define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f)) #define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d)) #define roundup(x, y) ((((x) + ((y) - 1)) / (y)) * (y)) +#define rounddown(x, y) ( \ +{ \ + typeof(x) __x = (x); \ + __x - (__x % (y)); \ +} \ +) #define DIV_ROUND_CLOSEST(x, divisor)( \ { \ typeof(divisor) __divisor = divisor; \ -- cgit v1.2.3-18-g5258 From b28efd54d9d5c8005a29cd8782335beb9daaa32d Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Wed, 13 Oct 2010 17:50:08 -0400 Subject: kernel: roundup should only reference arguments once Currently the roundup macro references it's arguments more than one time. This patch changes it so it will only use its arguments once. Suggested-by: Andrew Morton Signed-off-by: Eric Paris Signed-off-by: James Morris --- include/linux/kernel.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 6d6eea7f7b1..1759ba5adce 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -58,7 +58,12 @@ extern const char linux_proc_banner[]; #define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f)) #define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d)) -#define roundup(x, y) ((((x) + ((y) - 1)) / (y)) * (y)) +#define roundup(x, y) ( \ +{ \ + typeof(y) __y = y; \ + (((x) + (__y - 1)) / __y) * __y; \ +} \ +) #define rounddown(x, y) ( \ { \ typeof(x) __x = (x); \ -- cgit v1.2.3-18-g5258 From 2aeb66d3036dbafc297ac553a257a40283dadb3e Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Thu, 21 Oct 2010 00:15:00 -0700 Subject: x86-32, percpu: Correct the ordering of the percpu readmostly section Checkin c957ef2c59e952803766ddc22e89981ab534606f had inconsistent ordering of .data..percpu..page_aligned and .data..percpu..readmostly; the still-broken version affected x86-32 at least. The page aligned version really must be page aligned... Signed-off-by: H. Peter Anvin LKML-Reference: <1287544022.4571.7.camel@sli10-conroe.sh.intel.com> Cc: Shaohua Li Cc: Eric Dumazet --- include/asm-generic/vmlinux.lds.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index d7e7b21511b..1457b81357a 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -706,8 +706,8 @@ VMLINUX_SYMBOL(__per_cpu_start) = .; \ *(.data..percpu..first) \ . = ALIGN(PAGE_SIZE); \ - *(.data..percpu..readmostly) \ *(.data..percpu..page_aligned) \ + *(.data..percpu..readmostly) \ *(.data..percpu) \ *(.data..percpu..shared_aligned) \ VMLINUX_SYMBOL(__per_cpu_end) = .; \ -- cgit v1.2.3-18-g5258 From b738127dfb469bb9f595cdace30e7f881e8146b2 Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Wed, 20 Oct 2010 13:56:02 +0000 Subject: vlan: Rename VLAN_GROUP_ARRAY_LEN to VLAN_N_VID. VLAN_GROUP_ARRAY_LEN is simply the number of possible vlan VIDs. Since vlan groups will soon be more of an implementation detail for vlan devices, rename the constant to be descriptive of its actual purpose. Signed-off-by: Jesse Gross Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index a52320751bf..494cce86656 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -68,6 +68,7 @@ static inline struct vlan_ethhdr *vlan_eth_hdr(const struct sk_buff *skb) #define VLAN_CFI_MASK 0x1000 /* Canonical Format Indicator */ #define VLAN_TAG_PRESENT VLAN_CFI_MASK #define VLAN_VID_MASK 0x0fff /* VLAN Identifier */ +#define VLAN_N_VID 4096 /* found in socket.c */ extern void vlan_ioctl_set(int (*hook)(struct net *, void __user *)); @@ -76,9 +77,8 @@ extern void vlan_ioctl_set(int (*hook)(struct net *, void __user *)); * depends on completely exhausting the VLAN identifier space. Thus * it gives constant time look-up, but in many cases it wastes memory. */ -#define VLAN_GROUP_ARRAY_LEN 4096 #define VLAN_GROUP_ARRAY_SPLIT_PARTS 8 -#define VLAN_GROUP_ARRAY_PART_LEN (VLAN_GROUP_ARRAY_LEN/VLAN_GROUP_ARRAY_SPLIT_PARTS) +#define VLAN_GROUP_ARRAY_PART_LEN (VLAN_N_VID/VLAN_GROUP_ARRAY_SPLIT_PARTS) struct vlan_group { struct net_device *real_dev; /* The ethernet(like) device -- cgit v1.2.3-18-g5258 From 7b9c60903714bf0a19d746b228864bad3497284e Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Wed, 20 Oct 2010 13:56:04 +0000 Subject: vlan: Enable software emulation for vlan accleration. Currently users of hardware vlan accleration need to know whether the device supports it before generating packets. However, vlan acceleration will soon be available in a more flexible manner so knowing ahead of time becomes much more difficult. This adds a software fallback path for vlan packets on devices without the necessary offloading support, similar to other types of hardware accleration. Signed-off-by: Jesse Gross Signed-off-by: David S. Miller --- include/linux/netdevice.h | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 880d5656582..2861565a27d 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2248,9 +2248,17 @@ static inline int skb_gso_ok(struct sk_buff *skb, int features) static inline int netif_needs_gso(struct net_device *dev, struct sk_buff *skb) { - return skb_is_gso(skb) && - (!skb_gso_ok(skb, dev->features) || - unlikely(skb->ip_summed != CHECKSUM_PARTIAL)); + if (skb_is_gso(skb)) { + int features = dev->features; + + if (skb->protocol == htons(ETH_P_8021Q) || skb->vlan_tci) + features &= dev->vlan_features; + + return (!skb_gso_ok(skb, features) || + unlikely(skb->ip_summed != CHECKSUM_PARTIAL)); + } + + return 0; } static inline void netif_set_gso_max_size(struct net_device *dev, -- cgit v1.2.3-18-g5258 From 65ac6a5fa658b90f1be700c55e7cd72e4611015d Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Wed, 20 Oct 2010 13:56:05 +0000 Subject: vlan: Avoid hash table lookup to find group. A struct net_device always maps to zero or one vlan groups and we always know the device when we are looking up a group. We currently do a hash table lookup on the device to find the group but it is much simpler to just store a pointer. Signed-off-by: Jesse Gross Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 19 +++++++++++++++++++ include/linux/netdevice.h | 5 ++++- 2 files changed, 23 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 494cce86656..4047781da72 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -16,6 +16,7 @@ #ifdef __KERNEL__ #include #include +#include #define VLAN_HLEN 4 /* The additional bytes (on top of the Ethernet header) * that VLAN requires. @@ -114,6 +115,18 @@ static inline void vlan_group_set_device(struct vlan_group *vg, #define vlan_tx_tag_get(__skb) ((__skb)->vlan_tci & ~VLAN_TAG_PRESENT) #if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) +/* Must be invoked with rcu_read_lock or with RTNL. */ +static inline struct net_device *vlan_find_dev(struct net_device *real_dev, + u16 vlan_id) +{ + struct vlan_group *grp = rcu_dereference_rtnl(real_dev->vlgrp); + + if (grp) + return vlan_group_get_device(grp, vlan_id); + + return NULL; +} + extern struct net_device *vlan_dev_real_dev(const struct net_device *dev); extern u16 vlan_dev_vlan_id(const struct net_device *dev); @@ -128,6 +141,12 @@ vlan_gro_frags(struct napi_struct *napi, struct vlan_group *grp, unsigned int vlan_tci); #else +static inline struct net_device *vlan_find_dev(struct net_device *real_dev, + u16 vlan_id) +{ + return NULL; +} + static inline struct net_device *vlan_dev_real_dev(const struct net_device *dev) { BUG(); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 2861565a27d..9c78312ce14 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -942,7 +942,10 @@ struct net_device { /* Protocol specific pointers */ - + +#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) + struct vlan_group *vlgrp; /* VLAN group */ +#endif #ifdef CONFIG_NET_DSA void *dsa_ptr; /* dsa specific data */ #endif -- cgit v1.2.3-18-g5258 From 3701e51382a026cba10c60b03efabe534fba4ca4 Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Wed, 20 Oct 2010 13:56:06 +0000 Subject: vlan: Centralize handling of hardware acceleration. Currently each driver that is capable of vlan hardware acceleration must be aware of the vlan groups that are configured and then pass the stripped tag to a specialized receive function. This is different from other types of hardware offload in that it places a significant amount of knowledge in the driver itself rather keeping it in the networking core. This makes vlan offloading function more similarly to other forms of offloading (such as checksum offloading or TSO) by doing the following: * On receive, stripped vlans are passed directly to the network core, without attempting to check for vlan groups or reconstructing the header if no group * vlans are made less special by folding the logic into the main receive routines * On transmit, the device layer will add the vlan header in software if the hardware doesn't support it, instead of spreading that logic out in upper layers, such as bonding. There are a number of advantages to this: * Fixes all bugs with drivers incorrectly dropping vlan headers at once. * Avoids having to disable VLAN acceleration when in promiscuous mode (good for bridging since it always puts devices in promiscuous mode). * Keeps VLAN tag separate until given to ultimate consumer, which avoids needing to do header reconstruction as in tg3 unless absolutely necessary. * Consolidates common code in core networking. Signed-off-by: Jesse Gross Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 6 ++++-- include/linux/netdevice.h | 1 - 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 4047781da72..a0d9786c202 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -132,7 +132,7 @@ extern u16 vlan_dev_vlan_id(const struct net_device *dev); extern int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp, u16 vlan_tci, int polling); -extern void vlan_hwaccel_do_receive(struct sk_buff *skb); +extern bool vlan_hwaccel_do_receive(struct sk_buff **skb); extern gro_result_t vlan_gro_receive(struct napi_struct *napi, struct vlan_group *grp, unsigned int vlan_tci, struct sk_buff *skb); @@ -166,8 +166,10 @@ static inline int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp, return NET_XMIT_SUCCESS; } -static inline void vlan_hwaccel_do_receive(struct sk_buff *skb) +static inline bool vlan_hwaccel_do_receive(struct sk_buff **skb) { + BUG(); + return false; } static inline gro_result_t diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9c78312ce14..ed7db7eebbf 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1768,7 +1768,6 @@ extern int netdev_rx_handler_register(struct net_device *dev, void *rx_handler_data); extern void netdev_rx_handler_unregister(struct net_device *dev); -extern void netif_nit_deliver(struct sk_buff *skb); extern int dev_valid_name(const char *name); extern int dev_ioctl(struct net *net, unsigned int cmd, void __user *); extern int dev_ethtool(struct net *net, struct ifreq *); -- cgit v1.2.3-18-g5258 From d5dbda23804156ae6f35025ade5307a49d1db6d7 Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Wed, 20 Oct 2010 13:56:07 +0000 Subject: ethtool: Add support for vlan accleration. Now that vlan acceleration is handled consistently regardless of usage, it is possible to enable and disable it at will. This adds support for Ethtool operations that change the offloading status for debugging purposes, similar to other forms of hardware acceleration. Signed-off-by: Jesse Gross Signed-off-by: David S. Miller --- include/linux/ethtool.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 8a3338ceb43..6628a507fd3 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -309,6 +309,8 @@ struct ethtool_perm_addr { * flag differs from the read-only value. */ enum ethtool_flags { + ETH_FLAG_TXVLAN = (1 << 7), /* TX VLAN offload enabled */ + ETH_FLAG_RXVLAN = (1 << 8), /* RX VLAN offload enabled */ ETH_FLAG_LRO = (1 << 15), /* LRO is enabled */ ETH_FLAG_NTUPLE = (1 << 27), /* N-tuple filters enabled */ ETH_FLAG_RXHASH = (1 << 28), -- cgit v1.2.3-18-g5258 From 11a691bea48887c27425cc40bf291e74c922df25 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 21 Oct 2010 10:32:29 +0200 Subject: block: Turn bvec_k{un,}map_irq() into static inline functions Convert bvec_k{un,}map_irq() from macros to static inline functions if !CONFIG_HIGHMEM, so we can easier detect mistakes like the one fixed in 93055c31045a2d5599ec613a0c6cdcefc481a460 ("ps3disk: passing wrong variable = to bvec_kunmap_irq()") Signed-off-by: Geert Uytterhoeven Signed-off-by: Jens Axboe --- include/linux/bio.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/bio.h b/include/linux/bio.h index 2c3fd742160..ba679992d39 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -346,8 +346,15 @@ static inline void bvec_kunmap_irq(char *buffer, unsigned long *flags) } #else -#define bvec_kmap_irq(bvec, flags) (page_address((bvec)->bv_page) + (bvec)->bv_offset) -#define bvec_kunmap_irq(buf, flags) do { *(flags) = 0; } while (0) +static inline char *bvec_kmap_irq(struct bio_vec *bvec, unsigned long *flags) +{ + return page_address(bvec->bv_page) + bvec->bv_offset; +} + +static inline void bvec_kunmap_irq(char *buffer, unsigned long *flags) +{ + *flags = 0; +} #endif static inline char *__bio_kmap_irq(struct bio *bio, unsigned short idx, -- cgit v1.2.3-18-g5258 From 8b27b10f5863a5b63e46304a71aa01463d1efac4 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sun, 17 Oct 2010 16:17:20 +0300 Subject: ipvs: optimize checksums for apps Avoid full checksum calculation for apps that can provide info whether csum was broken after payload mangling. For now only ip_vs_ftp mangles payload and it updates the csum, so the full recalculation is avoided for all packets. Add CHECKSUM_UNNECESSARY for snat_handler (TCP and UDP). It is needed to support SNAT from local address for the case when csum is fully recalculated. Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman --- include/net/ip_vs.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 6e8a6192e57..adcdba9dd18 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -597,11 +597,19 @@ struct ip_vs_app { __be16 port; /* port number in net order */ atomic_t usecnt; /* usage counter */ - /* output hook: return false if can't linearize. diff set for TCP. */ + /* + * output hook: Process packet in inout direction, diff set for TCP. + * Return: 0=Error, 1=Payload Not Mangled/Mangled but checksum is ok, + * 2=Mangled but checksum was not updated + */ int (*pkt_out)(struct ip_vs_app *, struct ip_vs_conn *, struct sk_buff *, int *diff); - /* input hook: return false if can't linearize. diff set for TCP. */ + /* + * input hook: Process packet in outin direction, diff set for TCP. + * Return: 0=Error, 1=Payload Not Mangled/Mangled but checksum is ok, + * 2=Mangled but checksum was not updated + */ int (*pkt_in)(struct ip_vs_app *, struct ip_vs_conn *, struct sk_buff *, int *diff); -- cgit v1.2.3-18-g5258 From cf356d69db0afef692cd640917bc70f708c27f14 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sun, 17 Oct 2010 16:21:07 +0300 Subject: ipvs: switch to notrack mode Change skb->ipvs_property semantic. This is preparation to support ip_vs_out processing in LOCAL_OUT. ipvs_property=1 will be used to avoid expensive lookups for traffic sent by transmitters. Now when conntrack support is not used we call ip_vs_notrack method to avoid problems in OUTPUT and POST_ROUTING hooks instead of exiting POST_ROUTING as before. Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman --- include/net/ip_vs.h | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index adcdba9dd18..0e4618470ce 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -25,7 +25,7 @@ #include #include /* for struct ipv6hdr */ #include /* for ipv6_addr_copy */ -#ifdef CONFIG_IP_VS_NFCT +#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) #include #endif @@ -1021,6 +1021,24 @@ static inline __wsum ip_vs_check_diff2(__be16 old, __be16 new, __wsum oldsum) return csum_partial(diff, sizeof(diff), oldsum); } +/* + * Forget current conntrack (unconfirmed) and attach notrack entry + */ +static inline void ip_vs_notrack(struct sk_buff *skb) +{ +#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) + enum ip_conntrack_info ctinfo; + struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo); + + if (!ct || !nf_ct_is_untracked(ct)) { + nf_reset(skb); + skb->nfct = &nf_ct_untracked_get()->ct_general; + skb->nfctinfo = IP_CT_NEW; + nf_conntrack_get(skb->nfct); + } +#endif +} + #ifdef CONFIG_IP_VS_NFCT /* * Netfilter connection tracking -- cgit v1.2.3-18-g5258 From 190ecd27cd7294105e3b26ca71663c7d940acbbb Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sun, 17 Oct 2010 16:24:37 +0300 Subject: ipvs: do not schedule conns from real servers This patch is needed to avoid scheduling of packets from local real server when we add ip_vs_in in LOCAL_OUT hook to support local client. Currently, when ip_vs_in can not find existing connection it tries to create new one by calling ip_vs_schedule. The default indication from ip_vs_schedule was if connection was scheduled to real server. If real server is not available we try to use the bypass forwarding method or to send ICMP error. But in some cases we do not want to use the bypass feature. So, add flag 'ignored' to indicate if the scheduler ignores this packet. Make sure we do not create new connections from replies. We can hit this problem for persistent services and local real server when ip_vs_in is added to LOCAL_OUT hook to handle local clients. Also, make sure ip_vs_schedule ignores SYN packets for Active FTP DATA from local real server. The FTP DATA connection should be created on SYN+ACK from client to assign correct connection daddr. Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman --- include/net/ip_vs.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 0e4618470ce..9d5c1b96530 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -849,7 +849,8 @@ extern int ip_vs_unbind_scheduler(struct ip_vs_service *svc); extern struct ip_vs_scheduler *ip_vs_scheduler_get(const char *sched_name); extern void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler); extern struct ip_vs_conn * -ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb); +ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, + struct ip_vs_protocol *pp, int *ignored); extern int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, struct ip_vs_protocol *pp); -- cgit v1.2.3-18-g5258 From fc604767613b6d2036cdc35b660bc39451040a47 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sun, 17 Oct 2010 16:38:15 +0300 Subject: ipvs: changes for local real server This patch deals with local real servers: - Add support for DNAT to local address (different real server port). It needs ip_vs_out hook in LOCAL_OUT for both families because skb->protocol is not set for locally generated packets and can not be used to set 'af'. - Skip packets in ip_vs_in marked with skb->ipvs_property because ip_vs_out processing can be executed in LOCAL_OUT but we still have the conn_out_get check in ip_vs_in. - Ignore packets with inet->nodefrag from local stack - Require skb_dst(skb) != NULL because we use it to get struct net - Add support for changing the route to local IPv4 stack after DNAT depending on the source address type. Local client sets output route and the remote client sets input route. It looks like IPv6 does not need such rerouting because the replies use addresses from initial incoming header, not from skb route. - All transmitters now have strict checks for the destination address type: redirect from non-local address to local real server requires NAT method, local address can not be used as source address when talking to remote real server. - Now LOCALNODE is not set explicitly as forwarding method in real server to allow the connections to provide correct forwarding method to the backup server. Not sure if this breaks tools that expect to see 'Local' real server type. If needed, this can be supported with new flag IP_VS_DEST_F_LOCAL. Now it should be possible connections in backup that lost their fwmark information during sync to be forwarded properly to their daddr, even if it is local address in the backup server. By this way backup could be used as real server for DR or TUN, for NAT there are some restrictions because tuple collisions in conntracks can create problems for the traffic. - Call ip_vs_dst_reset when destination is updated in case some real server IP type is changed between local and remote. [ horms@verge.net.au: removed trailing whitespace ] Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman --- include/net/ip_vs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 9d5c1b96530..2f88d594233 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -409,6 +409,7 @@ struct ip_vs_conn { /* packet transmitter for different forwarding methods. If it mangles the packet, it must return NF_DROP or better NF_STOLEN, otherwise this must be changed to a sk_buff **. + NF_ACCEPT can be returned when destination is local. */ int (*packet_xmit)(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp); -- cgit v1.2.3-18-g5258 From 0d79641a96d612aaa6d57a4d4f521d7ed9c9ccdd Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sun, 17 Oct 2010 16:46:17 +0300 Subject: ipvs: provide address family for debugging As skb->protocol is not valid in LOCAL_OUT add parameter for address family in packet debugging functions. Even if ports are not present in AH and ESP change them to use ip_vs_tcpudp_debug_packet to show at least valid addresses as before. This patch removes the last user of skb->protocol in IPVS. Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman --- include/net/ip_vs.h | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 2f88d594233..b7bbd6c28cf 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -136,24 +136,24 @@ static inline const char *ip_vs_dbg_addr(int af, char *buf, size_t buf_len, if (net_ratelimit()) \ printk(KERN_DEBUG pr_fmt(msg), ##__VA_ARGS__); \ } while (0) -#define IP_VS_DBG_PKT(level, pp, skb, ofs, msg) \ +#define IP_VS_DBG_PKT(level, af, pp, skb, ofs, msg) \ do { \ if (level <= ip_vs_get_debug_level()) \ - pp->debug_packet(pp, skb, ofs, msg); \ + pp->debug_packet(af, pp, skb, ofs, msg); \ } while (0) -#define IP_VS_DBG_RL_PKT(level, pp, skb, ofs, msg) \ +#define IP_VS_DBG_RL_PKT(level, af, pp, skb, ofs, msg) \ do { \ if (level <= ip_vs_get_debug_level() && \ net_ratelimit()) \ - pp->debug_packet(pp, skb, ofs, msg); \ + pp->debug_packet(af, pp, skb, ofs, msg); \ } while (0) #else /* NO DEBUGGING at ALL */ #define IP_VS_DBG_BUF(level, msg...) do {} while (0) #define IP_VS_ERR_BUF(msg...) do {} while (0) #define IP_VS_DBG(level, msg...) do {} while (0) #define IP_VS_DBG_RL(msg...) do {} while (0) -#define IP_VS_DBG_PKT(level, pp, skb, ofs, msg) do {} while (0) -#define IP_VS_DBG_RL_PKT(level, pp, skb, ofs, msg) do {} while (0) +#define IP_VS_DBG_PKT(level, af, pp, skb, ofs, msg) do {} while (0) +#define IP_VS_DBG_RL_PKT(level, af, pp, skb, ofs, msg) do {} while (0) #endif #define IP_VS_BUG() BUG() @@ -345,7 +345,7 @@ struct ip_vs_protocol { int (*app_conn_bind)(struct ip_vs_conn *cp); - void (*debug_packet)(struct ip_vs_protocol *pp, + void (*debug_packet)(int af, struct ip_vs_protocol *pp, const struct sk_buff *skb, int offset, const char *msg); @@ -828,7 +828,8 @@ extern int ip_vs_set_state_timeout(int *table, int num, const char *const *names, const char *name, int to); extern void -ip_vs_tcpudp_debug_packet(struct ip_vs_protocol *pp, const struct sk_buff *skb, +ip_vs_tcpudp_debug_packet(int af, struct ip_vs_protocol *pp, + const struct sk_buff *skb, int offset, const char *msg); extern struct ip_vs_protocol ip_vs_protocol_tcp; -- cgit v1.2.3-18-g5258 From 11165f1457181e4499e5eada442434a07827ffd8 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Mon, 18 Oct 2010 14:27:29 +0000 Subject: socket: localize functions A couple of functions in socket.c are only used there and should be localized. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/socket.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/socket.h b/include/linux/socket.h index a8f56e1ec76..5146b50202c 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -326,7 +326,6 @@ extern long verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *a extern int memcpy_toiovec(struct iovec *v, unsigned char *kdata, int len); extern int memcpy_toiovecend(const struct iovec *v, unsigned char *kdata, int offset, int len); -extern int move_addr_to_user(struct sockaddr *kaddr, int klen, void __user *uaddr, int __user *ulen); extern int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr *kaddr); extern int put_cmsg(struct msghdr*, int level, int type, int len, void *data); -- cgit v1.2.3-18-g5258 From 6f747aca5e61778190d1e27bdc469f49149f0230 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Fri, 15 Oct 2010 05:15:59 +0000 Subject: xfrm6: make xfrm6_tunnel_free_spi local Function only defined and used in one file. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/net/xfrm.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 4f53532d4c2..36fdcb3fab9 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1419,7 +1419,6 @@ extern int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr, extern int xfrm6_tunnel_register(struct xfrm6_tunnel *handler, unsigned short family); extern int xfrm6_tunnel_deregister(struct xfrm6_tunnel *handler, unsigned short family); extern __be32 xfrm6_tunnel_alloc_spi(struct net *net, xfrm_address_t *saddr); -extern void xfrm6_tunnel_free_spi(struct net *net, xfrm_address_t *saddr); extern __be32 xfrm6_tunnel_spi_lookup(struct net *net, xfrm_address_t *saddr); extern int xfrm6_extract_output(struct xfrm_state *x, struct sk_buff *skb); extern int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb); -- cgit v1.2.3-18-g5258 From 8d8a0b1cc2a8f9794a3f1f747089b6a93774408d Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Fri, 15 Oct 2010 05:12:01 +0000 Subject: rtnetlink: remove rtnl_kill_links The function rtnl_kill_links is defined but never used. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/net/rtnetlink.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index af60fd05084..e013c68bfb0 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -79,7 +79,6 @@ struct rtnl_link_ops { extern int __rtnl_link_register(struct rtnl_link_ops *ops); extern void __rtnl_link_unregister(struct rtnl_link_ops *ops); -extern void rtnl_kill_links(struct net *net, struct rtnl_link_ops *ops); extern int rtnl_link_register(struct rtnl_link_ops *ops); extern void rtnl_link_unregister(struct rtnl_link_ops *ops); -- cgit v1.2.3-18-g5258 From 1c4c40c42da468ef02dc04940930c1926c964558 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Fri, 15 Oct 2010 05:14:19 +0000 Subject: xfrm: make xfrm_bundle_ok local Only used in one place. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/net/xfrm.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 36fdcb3fab9..f28d7c9b9f8 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1465,8 +1465,6 @@ struct xfrm_state *xfrm_find_acq(struct net *net, struct xfrm_mark *mark, xfrm_address_t *saddr, int create, unsigned short family); extern int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol); -extern int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *xdst, - struct flowi *fl, int family, int strict); #ifdef CONFIG_XFRM_MIGRATE extern int km_migrate(struct xfrm_selector *sel, u8 dir, u8 type, -- cgit v1.2.3-18-g5258 From 3511c9132f8b1e1b5634e41a3331c44b0c13be70 Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Sat, 16 Oct 2010 13:04:08 +0000 Subject: net_sched: remove the unused parameter of qdisc_create_dflt() The first parameter dev isn't in use in qdisc_create_dflt(). Signed-off-by: Changli Gao Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/sch_generic.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index eda8808fdac..ea1f8a83160 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -328,8 +328,7 @@ extern void qdisc_destroy(struct Qdisc *qdisc); extern void qdisc_tree_decrease_qlen(struct Qdisc *qdisc, unsigned int n); extern struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, struct Qdisc_ops *ops); -extern struct Qdisc *qdisc_create_dflt(struct net_device *dev, - struct netdev_queue *dev_queue, +extern struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue, struct Qdisc_ops *ops, u32 parentid); extern void qdisc_calculate_pkt_len(struct sk_buff *skb, struct qdisc_size_table *stab); -- cgit v1.2.3-18-g5258 From 106e4c26b1529e559d1aae777f11b4f8f7bafc26 Mon Sep 17 00:00:00 2001 From: Balazs Scheidler Date: Thu, 21 Oct 2010 12:45:14 +0200 Subject: tproxy: kick out TIME_WAIT sockets in case a new connection comes in with the same tuple Without tproxy redirections an incoming SYN kicks out conflicting TIME_WAIT sockets, in order to handle clients that reuse ports within the TIME_WAIT period. The same mechanism didn't work in case TProxy is involved in finding the proper socket, as the time_wait processing code looked up the listening socket assuming that the listener addr/port matches those of the established connection. This is not the case with TProxy as the listener addr/port is possibly changed with the tproxy rule. Signed-off-by: Balazs Scheidler Signed-off-by: KOVACS Krisztian Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_tproxy_core.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tproxy_core.h b/include/net/netfilter/nf_tproxy_core.h index 208b46f4d6d..b3a8942a4e6 100644 --- a/include/net/netfilter/nf_tproxy_core.h +++ b/include/net/netfilter/nf_tproxy_core.h @@ -8,12 +8,16 @@ #include #include +#define NFT_LOOKUP_ANY 0 +#define NFT_LOOKUP_LISTENER 1 +#define NFT_LOOKUP_ESTABLISHED 2 + /* look up and get a reference to a matching socket */ extern struct sock * nf_tproxy_get_sock_v4(struct net *net, const u8 protocol, const __be32 saddr, const __be32 daddr, const __be16 sport, const __be16 dport, - const struct net_device *in, bool listening); + const struct net_device *in, int lookup_type); static inline void nf_tproxy_put_sock(struct sock *sk) -- cgit v1.2.3-18-g5258 From 6006db84a91838813cdad8a6622a4e39efe9ea47 Mon Sep 17 00:00:00 2001 From: Balazs Scheidler Date: Thu, 21 Oct 2010 12:47:34 +0200 Subject: tproxy: add lookup type checks for UDP in nf_tproxy_get_sock_v4() Also, inline this function as the lookup_type is always a literal and inlining removes branches performed at runtime. Signed-off-by: Balazs Scheidler Signed-off-by: KOVACS Krisztian Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_tproxy_core.h | 116 ++++++++++++++++++++++++++++++++- 1 file changed, 114 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tproxy_core.h b/include/net/netfilter/nf_tproxy_core.h index b3a8942a4e6..1027d7f901a 100644 --- a/include/net/netfilter/nf_tproxy_core.h +++ b/include/net/netfilter/nf_tproxy_core.h @@ -13,11 +13,123 @@ #define NFT_LOOKUP_ESTABLISHED 2 /* look up and get a reference to a matching socket */ -extern struct sock * + + +/* This function is used by the 'TPROXY' target and the 'socket' + * match. The following lookups are supported: + * + * Explicit TProxy target rule + * =========================== + * + * This is used when the user wants to intercept a connection matching + * an explicit iptables rule. In this case the sockets are assumed + * matching in preference order: + * + * - match: if there's a fully established connection matching the + * _packet_ tuple, it is returned, assuming the redirection + * already took place and we process a packet belonging to an + * established connection + * + * - match: if there's a listening socket matching the redirection + * (e.g. on-port & on-ip of the connection), it is returned, + * regardless if it was bound to 0.0.0.0 or an explicit + * address. The reasoning is that if there's an explicit rule, it + * does not really matter if the listener is bound to an interface + * or to 0. The user already stated that he wants redirection + * (since he added the rule). + * + * "socket" match based redirection (no specific rule) + * =================================================== + * + * There are connections with dynamic endpoints (e.g. FTP data + * connection) that the user is unable to add explicit rules + * for. These are taken care of by a generic "socket" rule. It is + * assumed that the proxy application is trusted to open such + * connections without explicit iptables rule (except of course the + * generic 'socket' rule). In this case the following sockets are + * matched in preference order: + * + * - match: if there's a fully established connection matching the + * _packet_ tuple + * + * - match: if there's a non-zero bound listener (possibly with a + * non-local address) We don't accept zero-bound listeners, since + * then local services could intercept traffic going through the + * box. + * + * Please note that there's an overlap between what a TPROXY target + * and a socket match will match. Normally if you have both rules the + * "socket" match will be the first one, effectively all packets + * belonging to established connections going through that one. + */ +static inline struct sock * nf_tproxy_get_sock_v4(struct net *net, const u8 protocol, const __be32 saddr, const __be32 daddr, const __be16 sport, const __be16 dport, - const struct net_device *in, int lookup_type); + const struct net_device *in, int lookup_type) +{ + struct sock *sk; + + /* look up socket */ + switch (protocol) { + case IPPROTO_TCP: + switch (lookup_type) { + case NFT_LOOKUP_ANY: + sk = __inet_lookup(net, &tcp_hashinfo, + saddr, sport, daddr, dport, + in->ifindex); + break; + case NFT_LOOKUP_LISTENER: + sk = inet_lookup_listener(net, &tcp_hashinfo, + daddr, dport, + in->ifindex); + + /* NOTE: we return listeners even if bound to + * 0.0.0.0, those are filtered out in + * xt_socket, since xt_TPROXY needs 0 bound + * listeners too */ + + break; + case NFT_LOOKUP_ESTABLISHED: + sk = inet_lookup_established(net, &tcp_hashinfo, + saddr, sport, daddr, dport, + in->ifindex); + break; + default: + WARN_ON(1); + sk = NULL; + break; + } + break; + case IPPROTO_UDP: + sk = udp4_lib_lookup(net, saddr, sport, daddr, dport, + in->ifindex); + if (sk && lookup_type != NFT_LOOKUP_ANY) { + int connected = (sk->sk_state == TCP_ESTABLISHED); + int wildcard = (inet_sk(sk)->inet_rcv_saddr == 0); + + /* NOTE: we return listeners even if bound to + * 0.0.0.0, those are filtered out in + * xt_socket, since xt_TPROXY needs 0 bound + * listeners too */ + if ((lookup_type == NFT_LOOKUP_ESTABLISHED && (!connected || wildcard)) || + (lookup_type == NFT_LOOKUP_LISTENER && connected)) { + sock_put(sk); + sk = NULL; + } + } + break; + default: + WARN_ON(1); + sk = NULL; + } + + pr_debug("tproxy socket lookup: proto %u %08x:%u -> %08x:%u, lookup type: %d, sock %p\n", + protocol, ntohl(saddr), ntohs(sport), ntohl(daddr), ntohs(dport), lookup_type, sk); + + return sk; +} + static inline void nf_tproxy_put_sock(struct sock *sk) -- cgit v1.2.3-18-g5258 From 093d282321daeb19c107e5f1f16d7f68484f3ade Mon Sep 17 00:00:00 2001 From: Balazs Scheidler Date: Thu, 21 Oct 2010 13:06:43 +0200 Subject: tproxy: fix hash locking issue when using port redirection in __inet_inherit_port() When __inet_inherit_port() is called on a tproxy connection the wrong locks are held for the inet_bind_bucket it is added to. __inet_inherit_port() made an implicit assumption that the listener's port number (and thus its bind bucket). Unfortunately, if you're using the TPROXY target to redirect skbs to a transparent proxy that assumption is not true anymore and things break. This patch adds code to __inet_inherit_port() so that it can handle this case by looking up or creating a new bind bucket for the child socket and updates callers of __inet_inherit_port() to gracefully handle __inet_inherit_port() failing. Reported by and original patch from Stephen Buck . See http://marc.info/?t=128169268200001&r=1&w=2 for the original discussion. Signed-off-by: KOVACS Krisztian Signed-off-by: Patrick McHardy --- include/net/inet_hashtables.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 74358d1b3f4..e9c2ed8af86 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -245,7 +245,7 @@ static inline int inet_sk_listen_hashfn(const struct sock *sk) } /* Caller must disable local BH processing. */ -extern void __inet_inherit_port(struct sock *sk, struct sock *child); +extern int __inet_inherit_port(struct sock *sk, struct sock *child); extern void inet_put_port(struct sock *sk); -- cgit v1.2.3-18-g5258 From 8c974438085d2c81b006daeaab8801eedbd19758 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Thu, 21 Oct 2010 01:06:15 +0000 Subject: Revert c6537d6742985da1fbf12ae26cde6a096fd35b5c Backout the tipc changes to the flags int he subscription message. These changees, while reasonable on the surface, interefere with user space ABI compatibility which is a no-no. This was part of the changes to fix the endianess issues in the TIPC protocol, which would be really nice to do but we need to do so in a way that is backwards compatible with user space. Signed-off-by: Neil Horman Signed-off-by: David S. Miller --- include/linux/tipc.h | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/tipc.h b/include/linux/tipc.h index 181c8d0e6f7..d10614b29d5 100644 --- a/include/linux/tipc.h +++ b/include/linux/tipc.h @@ -127,17 +127,23 @@ static inline unsigned int tipc_node(__u32 addr) * TIPC topology subscription service definitions */ -#define TIPC_SUB_SERVICE 0x00 /* Filter for service availability */ -#define TIPC_SUB_PORTS 0x01 /* Filter for port availability */ -#define TIPC_SUB_CANCEL 0x04 /* Cancel a subscription */ +#define TIPC_SUB_PORTS 0x01 /* filter for port availability */ +#define TIPC_SUB_SERVICE 0x02 /* filter for service availability */ +#define TIPC_SUB_CANCEL 0x04 /* cancel a subscription */ +#if 0 +/* The following filter options are not currently implemented */ +#define TIPC_SUB_NO_BIND_EVTS 0x04 /* filter out "publish" events */ +#define TIPC_SUB_NO_UNBIND_EVTS 0x08 /* filter out "withdraw" events */ +#define TIPC_SUB_SINGLE_EVT 0x10 /* expire after first event */ +#endif #define TIPC_WAIT_FOREVER ~0 /* timeout for permanent subscription */ struct tipc_subscr { - struct tipc_name_seq seq; /* NBO. Name sequence of interest */ - __u32 timeout; /* NBO. Subscription duration (in ms) */ - __u32 filter; /* NBO. Bitmask of filter options */ - char usr_handle[8]; /* Opaque. Available for subscriber use */ + struct tipc_name_seq seq; /* name sequence of interest */ + __u32 timeout; /* subscription duration (in ms) */ + __u32 filter; /* bitmask of filter options */ + char usr_handle[8]; /* available for subscriber use */ }; #define TIPC_PUBLISHED 1 /* publication event */ @@ -145,11 +151,11 @@ struct tipc_subscr { #define TIPC_SUBSCR_TIMEOUT 3 /* subscription timeout event */ struct tipc_event { - __u32 event; /* NBO. Event type, as defined above */ - __u32 found_lower; /* NBO. Matching name seq instances */ - __u32 found_upper; /* " " " " " */ - struct tipc_portid port; /* NBO. Associated port */ - struct tipc_subscr s; /* Original, associated subscription */ + __u32 event; /* event type */ + __u32 found_lower; /* matching name seq instances */ + __u32 found_upper; /* " " " " */ + struct tipc_portid port; /* associated port */ + struct tipc_subscr s; /* associated subscription */ }; /* -- cgit v1.2.3-18-g5258 From d0c2b0d265a0f1f92922a99a31def9da582197ac Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Tue, 19 Oct 2010 07:12:10 +0000 Subject: napi: unexport napi_reuse_skb The function napi_reuse_skb is only used inside core. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ed7db7eebbf..fcd3dda8632 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1748,8 +1748,6 @@ extern gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb); extern gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb); extern void napi_gro_flush(struct napi_struct *napi); -extern void napi_reuse_skb(struct napi_struct *napi, - struct sk_buff *skb); extern struct sk_buff * napi_get_frags(struct napi_struct *napi); extern gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, -- cgit v1.2.3-18-g5258 From 32a875adcdcf5f470bf967250cfd01722e23844f Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Tue, 19 Oct 2010 06:48:16 +0000 Subject: 9p: client code cleanup Make p9_client_version static since only used in one file. Remove p9_client_auth because it is defined but never used. Compile tested only. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/net/9p/client.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/net/9p/client.h b/include/net/9p/client.h index d1aa2cfb30f..7f63d5ab7b4 100644 --- a/include/net/9p/client.h +++ b/include/net/9p/client.h @@ -212,15 +212,12 @@ struct p9_dirent { int p9_client_statfs(struct p9_fid *fid, struct p9_rstatfs *sb); int p9_client_rename(struct p9_fid *fid, struct p9_fid *newdirfid, char *name); -int p9_client_version(struct p9_client *); struct p9_client *p9_client_create(const char *dev_name, char *options); void p9_client_destroy(struct p9_client *clnt); void p9_client_disconnect(struct p9_client *clnt); void p9_client_begin_disconnect(struct p9_client *clnt); struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid, char *uname, u32 n_uname, char *aname); -struct p9_fid *p9_client_auth(struct p9_client *clnt, char *uname, - u32 n_uname, char *aname); struct p9_fid *p9_client_walk(struct p9_fid *oldfid, int nwname, char **wnames, int clone); int p9_client_open(struct p9_fid *fid, int mode); -- cgit v1.2.3-18-g5258 From 30e1f7a8122145f44f45c95366e27b6bb0b08428 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 2 Sep 2010 17:26:48 +0200 Subject: EDAC: Export edac sysfs class to users. Move toplevel sysfs class to the stub and make it available to non-modularized code too. Add proper refcounting of its users and move the registration functionality into the reference counting routines. Signed-off-by: Borislav Petkov --- include/linux/edac.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/edac.h b/include/linux/edac.h index 7cf92e8a419..36c66443bdf 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -13,6 +13,7 @@ #define _LINUX_EDAC_H_ #include +#include #define EDAC_OPSTATE_INVAL -1 #define EDAC_OPSTATE_POLL 0 @@ -22,9 +23,12 @@ extern int edac_op_state; extern int edac_err_assert; extern atomic_t edac_handlers; +extern struct sysdev_class edac_class; extern int edac_handler_set(void); extern void edac_atomic_assert_error(void); +extern struct sysdev_class *edac_get_sysfs_class(void); +extern void edac_put_sysfs_class(void); static inline void opstate_init(void) { -- cgit v1.2.3-18-g5258 From 6de5bd128d381ad88ac6d419a5e597048eb468cf Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sat, 11 Sep 2010 18:00:57 +0200 Subject: BKL: introduce CONFIG_BKL. With all the patches we have queued in the BKL removal tree, only a few dozen modules are left that actually rely on the BKL, and even there are lots of low-hanging fruit. We need to decide what to do about them, this patch illustrates one of the options: Every user of the BKL is marked as 'depends on BKL' in Kconfig, and the CONFIG_BKL becomes a user-visible option. If it gets disabled, no BKL using module can be built any more and the BKL code itself is compiled out. The one exception is file locking, which is practically always enabled and does a 'select BKL' instead. This effectively forces CONFIG_BKL to be enabled until we have solved the fs/lockd mess and can apply the patch that removes the BKL from fs/locks.c. Signed-off-by: Arnd Bergmann --- include/linux/smp_lock.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/smp_lock.h b/include/linux/smp_lock.h index 2ea1dd1ba21..291f721144c 100644 --- a/include/linux/smp_lock.h +++ b/include/linux/smp_lock.h @@ -54,12 +54,15 @@ static inline void cycle_kernel_lock(void) #else +#ifdef CONFIG_BKL /* provoke build bug if not set */ #define lock_kernel() #define unlock_kernel() -#define release_kernel_lock(task) do { } while(0) #define cycle_kernel_lock() do { } while(0) -#define reacquire_kernel_lock(task) 0 #define kernel_locked() 1 +#endif /* CONFIG_BKL */ + +#define release_kernel_lock(task) do { } while(0) +#define reacquire_kernel_lock(task) 0 #endif /* CONFIG_LOCK_KERNEL */ #endif /* __LINUX_SMPLOCK_H */ -- cgit v1.2.3-18-g5258 From e97c3e278e951501c2f385de70c3ceacdea78c4a Mon Sep 17 00:00:00 2001 From: Balazs Scheidler Date: Thu, 21 Oct 2010 16:03:43 +0200 Subject: tproxy: split off ipv6 defragmentation to a separate module Like with IPv4, TProxy needs IPv6 defragmentation but does not require connection tracking. Since defragmentation was coupled with conntrack, I split off the two, creating an nf_defrag_ipv6 module, similar to the already existing nf_defrag_ipv4. Signed-off-by: Balazs Scheidler Signed-off-by: KOVACS Krisztian Signed-off-by: Patrick McHardy --- include/net/netfilter/ipv6/nf_defrag_ipv6.h | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 include/net/netfilter/ipv6/nf_defrag_ipv6.h (limited to 'include') diff --git a/include/net/netfilter/ipv6/nf_defrag_ipv6.h b/include/net/netfilter/ipv6/nf_defrag_ipv6.h new file mode 100644 index 00000000000..94dd54d76b4 --- /dev/null +++ b/include/net/netfilter/ipv6/nf_defrag_ipv6.h @@ -0,0 +1,6 @@ +#ifndef _NF_DEFRAG_IPV6_H +#define _NF_DEFRAG_IPV6_H + +extern void nf_defrag_ipv6_enable(void); + +#endif /* _NF_DEFRAG_IPV6_H */ -- cgit v1.2.3-18-g5258 From aa976fc011efa1f0e3290c6c9addf7c20757f885 Mon Sep 17 00:00:00 2001 From: Balazs Scheidler Date: Thu, 21 Oct 2010 16:05:41 +0200 Subject: tproxy: added udp6_lib_lookup function Just like with IPv4, we need access to the UDP hash table to look up local sockets, but instead of exporting the global udp_table, export a lookup function. Signed-off-by: Balazs Scheidler Signed-off-by: KOVACS Krisztian Signed-off-by: Patrick McHardy --- include/net/udp.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/net/udp.h b/include/net/udp.h index a184d3496b1..200b82848c9 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -183,6 +183,9 @@ extern int udp_lib_setsockopt(struct sock *sk, int level, int optname, extern struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, __be32 daddr, __be16 dport, int dif); +extern struct sock *udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be16 sport, + const struct in6_addr *daddr, __be16 dport, + int dif); /* * SNMP statistics for UDP and UDP-Lite -- cgit v1.2.3-18-g5258 From 6c46862280c5f55eda7750391bc65cd7e08c7535 Mon Sep 17 00:00:00 2001 From: Balazs Scheidler Date: Thu, 21 Oct 2010 16:08:28 +0200 Subject: tproxy: added tproxy sockopt interface in the IPV6 layer Support for IPV6_RECVORIGDSTADDR sockopt for UDP sockets were contributed by Harry Mason. Signed-off-by: Balazs Scheidler Signed-off-by: KOVACS Krisztian Signed-off-by: Patrick McHardy --- include/linux/in6.h | 4 ++++ include/linux/ipv6.h | 4 +++- 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/in6.h b/include/linux/in6.h index c4bf46f764b..097a34b5556 100644 --- a/include/linux/in6.h +++ b/include/linux/in6.h @@ -268,6 +268,10 @@ struct in6_flowlabel_req { /* RFC5082: Generalized Ttl Security Mechanism */ #define IPV6_MINHOPCOUNT 73 +#define IPV6_ORIGDSTADDR 74 +#define IPV6_RECVORIGDSTADDR IPV6_ORIGDSTADDR +#define IPV6_TRANSPARENT 75 + /* * Multicast Routing: * see include/linux/mroute6.h. diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index e62683ba88e..8e429d0e040 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -341,7 +341,9 @@ struct ipv6_pinfo { odstopts:1, rxflow:1, rxtclass:1, - rxpmtu:1; + rxpmtu:1, + rxorigdstaddr:1; + /* 2 bits hole */ } bits; __u16 all; } rxopt; -- cgit v1.2.3-18-g5258 From 3b9afb29917f4ab08decf358ecfd354a72a91ac0 Mon Sep 17 00:00:00 2001 From: Balazs Scheidler Date: Thu, 21 Oct 2010 16:12:14 +0200 Subject: tproxy: added IPv6 socket lookup function to nf_tproxy_core Signed-off-by: Balazs Scheidler Signed-off-by: KOVACS Krisztian Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_tproxy_core.h | 72 +++++++++++++++++++++++++++++++++- 1 file changed, 71 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tproxy_core.h b/include/net/netfilter/nf_tproxy_core.h index 1027d7f901a..cd85b3bc832 100644 --- a/include/net/netfilter/nf_tproxy_core.h +++ b/include/net/netfilter/nf_tproxy_core.h @@ -5,7 +5,8 @@ #include #include #include -#include +#include +#include #include #define NFT_LOOKUP_ANY 0 @@ -130,6 +131,75 @@ nf_tproxy_get_sock_v4(struct net *net, const u8 protocol, return sk; } +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +static inline struct sock * +nf_tproxy_get_sock_v6(struct net *net, const u8 protocol, + const struct in6_addr *saddr, const struct in6_addr *daddr, + const __be16 sport, const __be16 dport, + const struct net_device *in, int lookup_type) +{ + struct sock *sk; + + /* look up socket */ + switch (protocol) { + case IPPROTO_TCP: + switch (lookup_type) { + case NFT_LOOKUP_ANY: + sk = inet6_lookup(net, &tcp_hashinfo, + saddr, sport, daddr, dport, + in->ifindex); + break; + case NFT_LOOKUP_LISTENER: + sk = inet6_lookup_listener(net, &tcp_hashinfo, + daddr, ntohs(dport), + in->ifindex); + + /* NOTE: we return listeners even if bound to + * 0.0.0.0, those are filtered out in + * xt_socket, since xt_TPROXY needs 0 bound + * listeners too */ + + break; + case NFT_LOOKUP_ESTABLISHED: + sk = __inet6_lookup_established(net, &tcp_hashinfo, + saddr, sport, daddr, ntohs(dport), + in->ifindex); + break; + default: + WARN_ON(1); + sk = NULL; + break; + } + break; + case IPPROTO_UDP: + sk = udp6_lib_lookup(net, saddr, sport, daddr, dport, + in->ifindex); + if (sk && lookup_type != NFT_LOOKUP_ANY) { + int connected = (sk->sk_state == TCP_ESTABLISHED); + int wildcard = ipv6_addr_any(&inet6_sk(sk)->rcv_saddr); + + /* NOTE: we return listeners even if bound to + * 0.0.0.0, those are filtered out in + * xt_socket, since xt_TPROXY needs 0 bound + * listeners too */ + if ((lookup_type == NFT_LOOKUP_ESTABLISHED && (!connected || wildcard)) || + (lookup_type == NFT_LOOKUP_LISTENER && connected)) { + sock_put(sk); + sk = NULL; + } + } + break; + default: + WARN_ON(1); + sk = NULL; + } + + pr_debug("tproxy socket lookup: proto %u %pI6:%u -> %pI6:%u, lookup type: %d, sock %p\n", + protocol, saddr, ntohs(sport), daddr, ntohs(dport), lookup_type, sk); + + return sk; +} +#endif static inline void nf_tproxy_put_sock(struct sock *sk) -- cgit v1.2.3-18-g5258 From 6ad7889327a5ee6ab4220bd34e4428c7d0de0f32 Mon Sep 17 00:00:00 2001 From: Balazs Scheidler Date: Thu, 21 Oct 2010 16:17:26 +0200 Subject: tproxy: added IPv6 support to the TPROXY target This requires a new revision as the old target structure was IPv4 specific. Signed-off-by: Balazs Scheidler Signed-off-by: KOVACS Krisztian Signed-off-by: Patrick McHardy --- include/linux/netfilter/xt_TPROXY.h | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/xt_TPROXY.h b/include/linux/netfilter/xt_TPROXY.h index 152e8f97132..3f3d6936128 100644 --- a/include/linux/netfilter/xt_TPROXY.h +++ b/include/linux/netfilter/xt_TPROXY.h @@ -1,5 +1,5 @@ -#ifndef _XT_TPROXY_H_target -#define _XT_TPROXY_H_target +#ifndef _XT_TPROXY_H +#define _XT_TPROXY_H /* TPROXY target is capable of marking the packet to perform * redirection. We can get rid of that whenever we get support for @@ -11,4 +11,11 @@ struct xt_tproxy_target_info { __be16 lport; }; -#endif /* _XT_TPROXY_H_target */ +struct xt_tproxy_target_info_v1 { + u_int32_t mark_mask; + u_int32_t mark_value; + union nf_inet_addr laddr; + __be16 lport; +}; + +#endif /* _XT_TPROXY_H */ -- cgit v1.2.3-18-g5258 From 7096d0422153ffcc2264eef652fc3a7bca3e6d3c Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Wed, 20 Oct 2010 11:45:13 -0600 Subject: of/device: Rework to use common platform_device_alloc() for allocating devices The current code allocates and manages platform_devices created from the device tree manually. It also uses an unsafe shortcut for allocating the platform_device and the resource table at the same time. (which I added in the last rework; sorry). This patch refactors the code to use platform_device_alloc() for allocating new devices. This reduces the amount of custom code implemented by of_platform, eliminates the unsafe alloc trick, and has the side benefit of letting the platform_bus code manage freeing the device data and resources when the device is freed. Signed-off-by: Grant Likely Cc: Benjamin Herrenschmidt Cc: Greg Kroah-Hartman Cc: "David S. Miller" Cc: Michal Simek --- include/linux/of_device.h | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/of_device.h b/include/linux/of_device.h index 835f85ecd2d..975d347079d 100644 --- a/include/linux/of_device.h +++ b/include/linux/of_device.h @@ -27,20 +27,19 @@ static inline int of_driver_match_device(const struct device *dev, extern struct platform_device *of_dev_get(struct platform_device *dev); extern void of_dev_put(struct platform_device *dev); +extern int of_device_add(struct platform_device *pdev); extern int of_device_register(struct platform_device *ofdev); extern void of_device_unregister(struct platform_device *ofdev); -extern void of_release_dev(struct device *dev); - -static inline void of_device_free(struct platform_device *dev) -{ - of_release_dev(&dev->dev); -} extern ssize_t of_device_get_modalias(struct device *dev, char *str, ssize_t len); extern int of_device_uevent(struct device *dev, struct kobj_uevent_env *env); +static inline void of_device_node_put(struct device *dev) +{ + of_node_put(dev->of_node); +} #else /* CONFIG_OF_DEVICE */ @@ -56,6 +55,8 @@ static inline int of_device_uevent(struct device *dev, return -ENODEV; } +static inline void of_device_node_put(struct device *dev) { } + #endif /* CONFIG_OF_DEVICE */ #endif /* _LINUX_OF_DEVICE_H */ -- cgit v1.2.3-18-g5258 From 32c97689c46b272302053778f1a6c2facb0e220c Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Wed, 20 Oct 2010 11:45:14 -0600 Subject: of/flattree: Eliminate need to provide early_init_dt_scan_chosen_arch This patch refactors the early init parsing of the chosen node so that architectures aren't forced to provide an empty implementation of early_init_dt_scan_chosen_arch. Instead, if an architecture wants to do something different, it can either use a wrapper function around early_init_dt_scan_chosen(), or it can replace it altogether. This patch was written in preparation to adding device tree support to both x86 ad MIPS. Signed-off-by: Grant Likely Tested-by: David Daney --- include/linux/of_fdt.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h index 71e1a916d3f..7bbf5b32843 100644 --- a/include/linux/of_fdt.h +++ b/include/linux/of_fdt.h @@ -72,7 +72,7 @@ extern void *of_get_flat_dt_prop(unsigned long node, const char *name, unsigned long *size); extern int of_flat_dt_is_compatible(unsigned long node, const char *name); extern unsigned long of_get_flat_dt_root(void); -extern void early_init_dt_scan_chosen_arch(unsigned long node); + extern int early_init_dt_scan_chosen(unsigned long node, const char *uname, int depth, void *data); extern void early_init_dt_check_for_initrd(unsigned long node); -- cgit v1.2.3-18-g5258 From 530719b2341fea925f58a5d6be0353fa43a88baf Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Thu, 21 Oct 2010 11:34:55 -0600 Subject: of/irq: of_irq.c needs to include linux/irq.h It works on current architectures simply because asm/prom.h includes it, but it broke when x86 turned on CONFIG_OF. Signed-off-by: Grant Likely --- include/linux/of_irq.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/of_irq.h b/include/linux/of_irq.h index 090cbaa4bd3..109e013b177 100644 --- a/include/linux/of_irq.h +++ b/include/linux/of_irq.h @@ -5,6 +5,7 @@ struct of_irq; #include #include +#include #include #include -- cgit v1.2.3-18-g5258 From d9027470b88631d0956ac37cdadfdeb9cdcf2c99 Mon Sep 17 00:00:00 2001 From: Gwendal Grignou Date: Tue, 25 May 2010 12:31:38 -0700 Subject: [libata] Add ATA transport class This is a scheleton for libata transport class. All information is read only, exporting information from libata: - ata_port class: one per ATA port - ata_link class: one per ATA port or 15 for SATA Port Multiplier - ata_device class: up to 2 for PATA link, usually one for SATA. Signed-off-by: Gwendal Grignou Reviewed-by: Grant Grundler Signed-off-by: Jeff Garzik --- include/linux/libata.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/libata.h b/include/linux/libata.h index 45fb2967b66..c50f66d4382 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -604,6 +604,7 @@ struct ata_device { union acpi_object *gtf_cache; unsigned int gtf_filter; #endif + struct device tdev; /* n_sector is CLEAR_BEGIN, read comment above CLEAR_BEGIN */ u64 n_sectors; /* size of device, if ATA */ u64 n_native_sectors; /* native size, if ATA */ @@ -690,6 +691,7 @@ struct ata_link { struct ata_port *ap; int pmp; /* port multiplier port # */ + struct device tdev; unsigned int active_tag; /* active tag on this link */ u32 sactive; /* active NCQ commands */ @@ -707,6 +709,8 @@ struct ata_link { struct ata_device device[ATA_MAX_DEVICES]; }; +#define ATA_LINK_CLEAR_BEGIN offsetof(struct ata_link, active_tag) +#define ATA_LINK_CLEAR_END offsetof(struct ata_link, device[0]) struct ata_port { struct Scsi_Host *scsi_host; /* our co-allocated scsi host */ @@ -752,6 +756,7 @@ struct ata_port { struct ata_port_stats stats; struct ata_host *host; struct device *dev; + struct device tdev; struct mutex scsi_scan_mutex; struct delayed_work hotplug_task; -- cgit v1.2.3-18-g5258 From 295124dce4ddfd40b1f12d3ffd2779673e87c701 Mon Sep 17 00:00:00 2001 From: Grant Grundler Date: Tue, 17 Aug 2010 10:56:53 -0700 Subject: [libata] support for > 512 byte sectors (e.g. 4K Native) This change enables my x86 machine to recognize and talk to a "Native 4K" SATA device. When I started working on this, I didn't know Matthew Wilcox had posted a similar patch 2 years ago: http://git.kernel.org/?p=linux/kernel/git/willy/ata.git;a=shortlog;h=refs/heads/ata-large-sectors Gwendal Grignou pointed me at the the above code and small portions of this patch include Matthew's work. That's why Mathew is first on the "Signed-off-by:". I've NOT included his use of a bitmap to determine 512 vs Native for ATA command block size - just used a simple table. And bugs are almost certainly mine. Lastly, the patch has been tested with a native 4K 'Engineering Sample' drive provided by Hitachi GST. Signed-off-by: Matthew Wilcox Signed-off-by: Grant Grundler Reviewed-by: Gwendal Grignou Signed-off-by: Jeff Garzik --- include/linux/ata.h | 46 ++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 40 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/ata.h b/include/linux/ata.h index fe6e681a9d7..0c4929fa34d 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -89,6 +89,7 @@ enum { ATA_ID_SPG = 98, ATA_ID_LBA_CAPACITY_2 = 100, ATA_ID_SECTOR_SIZE = 106, + ATA_ID_LOGICAL_SECTOR_SIZE = 117, /* and 118 */ ATA_ID_LAST_LUN = 126, ATA_ID_DLF = 128, ATA_ID_CSFO = 129, @@ -640,16 +641,49 @@ static inline int ata_id_flush_ext_enabled(const u16 *id) return (id[ATA_ID_CFS_ENABLE_2] & 0x2400) == 0x2400; } -static inline int ata_id_has_large_logical_sectors(const u16 *id) +static inline u32 ata_id_logical_sector_size(const u16 *id) { - if ((id[ATA_ID_SECTOR_SIZE] & 0xc000) != 0x4000) - return 0; - return id[ATA_ID_SECTOR_SIZE] & (1 << 13); + /* T13/1699-D Revision 6a, Sep 6, 2008. Page 128. + * IDENTIFY DEVICE data, word 117-118. + * 0xd000 ignores bit 13 (logical:physical > 1) + */ + if ((id[ATA_ID_SECTOR_SIZE] & 0xd000) == 0x5000) + return (((id[ATA_ID_LOGICAL_SECTOR_SIZE+1] << 16) + + id[ATA_ID_LOGICAL_SECTOR_SIZE]) * sizeof(u16)) ; + return ATA_SECT_SIZE; +} + +static inline u8 ata_id_log2_per_physical_sector(const u16 *id) +{ + /* T13/1699-D Revision 6a, Sep 6, 2008. Page 128. + * IDENTIFY DEVICE data, word 106. + * 0xe000 ignores bit 12 (logical sector > 512 bytes) + */ + if ((id[ATA_ID_SECTOR_SIZE] & 0xe000) == 0x6000) + return (id[ATA_ID_SECTOR_SIZE] & 0xf); + return 0; } -static inline u16 ata_id_logical_per_physical_sectors(const u16 *id) +/* Offset of logical sectors relative to physical sectors. + * + * If device has more than one logical sector per physical sector + * (aka 512 byte emulation), vendors might offset the "sector 0" address + * so sector 63 is "naturally aligned" - e.g. FAT partition table. + * This avoids Read/Mod/Write penalties when using FAT partition table + * and updating "well aligned" (FS perspective) physical sectors on every + * transaction. + */ +static inline u16 ata_id_logical_sector_offset(const u16 *id, + u8 log2_per_phys) { - return 1 << (id[ATA_ID_SECTOR_SIZE] & 0xf); + u16 word_209 = id[209]; + + if ((log2_per_phys > 1) && (word_209 & 0xc000) == 0x4000) { + u16 first = word_209 & 0x3fff; + if (first > 0) + return (1 << log2_per_phys) - first; + } + return 0; } static inline int ata_id_has_lba48(const u16 *id) -- cgit v1.2.3-18-g5258 From c93b263e0d4fa8ce5fec0142a98196d1a127e845 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 1 Sep 2010 17:50:04 +0200 Subject: libata: clean up lpm related symbols and sysfs show/store functions Link power management related symbols are in confusing state w/ mixed usages of lpm, ipm and pm. This patch cleans up lpm related symbols and sysfs show/store functions as follows. * lpm states - NOT_AVAILABLE, MIN_POWER, MAX_PERFORMANCE and MEDIUM_POWER are renamed to ATA_LPM_UNKNOWN and ATA_LPM_{MIN|MAX|MED}_POWER. * Pre/postfixes are unified to lpm. * sysfs show/store functions for link_power_management_policy were curiously named get/put and unnecessarily complex. Renamed to show/store and simplified. Signed-off-by: Tejun Heo Signed-off-by: Jeff Garzik --- include/linux/libata.h | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/linux/libata.h b/include/linux/libata.h index c50f66d4382..c5bdc90fd31 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -196,7 +196,7 @@ enum { ATA_FLAG_ACPI_SATA = (1 << 17), /* need native SATA ACPI layout */ ATA_FLAG_AN = (1 << 18), /* controller supports AN */ ATA_FLAG_PMP = (1 << 19), /* controller supports PMP */ - ATA_FLAG_IPM = (1 << 20), /* driver can handle IPM */ + ATA_FLAG_LPM = (1 << 20), /* driver can handle LPM */ ATA_FLAG_EM = (1 << 21), /* driver supports enclosure * management */ ATA_FLAG_SW_ACTIVITY = (1 << 22), /* driver supports sw activity @@ -377,7 +377,7 @@ enum { ATA_HORKAGE_BROKEN_HPA = (1 << 4), /* Broken HPA */ ATA_HORKAGE_DISABLE = (1 << 5), /* Disable it */ ATA_HORKAGE_HPA_SIZE = (1 << 6), /* native size off by one */ - ATA_HORKAGE_IPM = (1 << 7), /* Link PM problems */ + ATA_HORKAGE_LPM = (1 << 7), /* Link PM problems */ ATA_HORKAGE_IVB = (1 << 8), /* cbl det validity bit bugs */ ATA_HORKAGE_STUCK_ERR = (1 << 9), /* stuck ERR on next PACKET */ ATA_HORKAGE_BRIDGE_OK = (1 << 10), /* no bridge limits */ @@ -464,6 +464,17 @@ enum ata_completion_errors { AC_ERR_NCQ = (1 << 10), /* marker for offending NCQ qc */ }; +/* + * Link power management policy: If you alter this, you also need to + * alter libata-scsi.c (for the ascii descriptions) + */ +enum ata_lpm_policy { + ATA_LPM_UNKNOWN, + ATA_LPM_MAX_POWER, + ATA_LPM_MED_POWER, + ATA_LPM_MIN_POWER, +}; + /* forward declarations */ struct scsi_device; struct ata_port_operations; @@ -478,16 +489,6 @@ typedef int (*ata_reset_fn_t)(struct ata_link *link, unsigned int *classes, unsigned long deadline); typedef void (*ata_postreset_fn_t)(struct ata_link *link, unsigned int *classes); -/* - * host pm policy: If you alter this, you also need to alter libata-scsi.c - * (for the ascii descriptions) - */ -enum link_pm { - NOT_AVAILABLE, - MIN_POWER, - MAX_PERFORMANCE, - MEDIUM_POWER, -}; extern struct device_attribute dev_attr_link_power_management_policy; extern struct device_attribute dev_attr_unload_heads; extern struct device_attribute dev_attr_em_message_type; @@ -772,7 +773,7 @@ struct ata_port { pm_message_t pm_mesg; int *pm_result; - enum link_pm pm_policy; + enum ata_lpm_policy lpm_policy; struct timer_list fastdrain_timer; unsigned long fastdrain_cnt; @@ -838,7 +839,7 @@ struct ata_port_operations { int (*scr_write)(struct ata_link *link, unsigned int sc_reg, u32 val); void (*pmp_attach)(struct ata_port *ap); void (*pmp_detach)(struct ata_port *ap); - int (*enable_pm)(struct ata_port *ap, enum link_pm policy); + int (*enable_pm)(struct ata_port *ap, enum ata_lpm_policy policy); void (*disable_pm)(struct ata_port *ap); /* -- cgit v1.2.3-18-g5258 From 1152b2617a6e1943b6b82e07c962950e56f1000c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 1 Sep 2010 17:50:05 +0200 Subject: libata: implement sata_link_scr_lpm() and make ata_dev_set_feature() global Link power management is about to be reimplemented. Prepare for it. * Implement sata_link_scr_lpm(). * Drop static from ata_dev_set_feature() and make it available to other libata files. * Trivial whitespace adjustments. Signed-off-by: Tejun Heo Signed-off-by: Jeff Garzik --- include/linux/libata.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/libata.h b/include/linux/libata.h index c5bdc90fd31..7770eeb2103 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -952,6 +952,8 @@ extern int sata_link_debounce(struct ata_link *link, const unsigned long *params, unsigned long deadline); extern int sata_link_resume(struct ata_link *link, const unsigned long *params, unsigned long deadline); +extern int sata_link_scr_lpm(struct ata_link *link, enum ata_lpm_policy policy, + bool spm_wakeup); extern int sata_link_hardreset(struct ata_link *link, const unsigned long *timing, unsigned long deadline, bool *online, int (*check_ready)(struct ata_link *)); -- cgit v1.2.3-18-g5258 From 6b7ae9545ad9875a289f4191c0216b473e313cb9 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 1 Sep 2010 17:50:06 +0200 Subject: libata: reimplement link power management The current LPM implementation has the following issues. * Operation order isn't well thought-out. e.g. HIPM should be configured after IPM in SControl is properly configured. Not the other way around. * Suspend/resume paths call ata_lpm_enable/disable() which must only be called from EH context directly. Also, ata_lpm_enable/disable() were called whether LPM was in use or not. * Implementation is per-port when it should be per-link. As a result, it can't be used for controllers with slave links or PMP. * LPM state isn't managed consistently. After a link reset for whatever reason including suspend/resume the actual LPM state would be reset leaving ap->lpm_policy inconsistent. * Generic/driver-specific logic boundary isn't clear. Currently, libahci has to mangle stuff which libata EH proper should be handling. This makes the implementation unnecessarily complex and fragile. * Tied to ALPM. Doesn't consider DIPM only cases and doesn't check whether the device allows HIPM. * Error handling isn't implemented. Given the extent of mismatch with the rest of libata, I don't think trying to fix it piecewise makes much sense. This patch reimplements LPM support. * The new implementation is per-link. The target policy is still port-wide (ap->target_lpm_policy) but all the mechanisms and states are per-link and integrate well with the rest of link abstraction and can work with slave and PMP links. * Core EH has proper control of LPM state. LPM state is reconfigured when and only when reconfiguration is necessary. It makes sure that LPM state is reset when probing for new device on the link. Controller agnostic logic is now implemented in libata EH proper and driver implementation only has to deal with controller specifics. * Proper error handling. LPM config failure is attributed to the device on the link and LPM is disabled for the link if it fails repeatedly. * ops->enable/disable_pm() are replaced with single ops->set_lpm() which takes @policy and @hints. This simplifies driver specific implementation. Signed-off-by: Tejun Heo Signed-off-by: Jeff Garzik --- include/linux/libata.h | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/libata.h b/include/linux/libata.h index 7770eeb2103..bc4ee218b18 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -172,6 +172,7 @@ enum { ATA_LFLAG_NO_RETRY = (1 << 5), /* don't retry this link */ ATA_LFLAG_DISABLED = (1 << 6), /* link is disabled */ ATA_LFLAG_SW_ACTIVITY = (1 << 7), /* keep activity stats */ + ATA_LFLAG_NO_LPM = (1 << 8), /* disable LPM on this link */ /* struct ata_port flags */ ATA_FLAG_SLAVE_POSS = (1 << 0), /* host supports slave dev */ @@ -324,12 +325,11 @@ enum { ATA_EH_HARDRESET = (1 << 2), /* meaningful only in ->prereset */ ATA_EH_RESET = ATA_EH_SOFTRESET | ATA_EH_HARDRESET, ATA_EH_ENABLE_LINK = (1 << 3), - ATA_EH_LPM = (1 << 4), /* link power management action */ ATA_EH_PARK = (1 << 5), /* unload heads and stop I/O */ ATA_EH_PERDEV_MASK = ATA_EH_REVALIDATE | ATA_EH_PARK, ATA_EH_ALL_ACTIONS = ATA_EH_REVALIDATE | ATA_EH_RESET | - ATA_EH_ENABLE_LINK | ATA_EH_LPM, + ATA_EH_ENABLE_LINK, /* ata_eh_info->flags */ ATA_EHI_HOTPLUGGED = (1 << 0), /* could have been hotplugged */ @@ -377,7 +377,6 @@ enum { ATA_HORKAGE_BROKEN_HPA = (1 << 4), /* Broken HPA */ ATA_HORKAGE_DISABLE = (1 << 5), /* Disable it */ ATA_HORKAGE_HPA_SIZE = (1 << 6), /* native size off by one */ - ATA_HORKAGE_LPM = (1 << 7), /* Link PM problems */ ATA_HORKAGE_IVB = (1 << 8), /* cbl det validity bit bugs */ ATA_HORKAGE_STUCK_ERR = (1 << 9), /* stuck ERR on next PACKET */ ATA_HORKAGE_BRIDGE_OK = (1 << 10), /* no bridge limits */ @@ -475,6 +474,11 @@ enum ata_lpm_policy { ATA_LPM_MIN_POWER, }; +enum ata_lpm_hints { + ATA_LPM_EMPTY = (1 << 0), /* port empty/probing */ + ATA_LPM_HIPM = (1 << 1), /* may use HIPM */ +}; + /* forward declarations */ struct scsi_device; struct ata_port_operations; @@ -702,6 +706,7 @@ struct ata_link { unsigned int hw_sata_spd_limit; unsigned int sata_spd_limit; unsigned int sata_spd; /* current SATA PHY speed */ + enum ata_lpm_policy lpm_policy; /* record runtime error info, protected by host_set lock */ struct ata_eh_info eh_info; @@ -773,7 +778,7 @@ struct ata_port { pm_message_t pm_mesg; int *pm_result; - enum ata_lpm_policy lpm_policy; + enum ata_lpm_policy target_lpm_policy; struct timer_list fastdrain_timer; unsigned long fastdrain_cnt; @@ -839,8 +844,8 @@ struct ata_port_operations { int (*scr_write)(struct ata_link *link, unsigned int sc_reg, u32 val); void (*pmp_attach)(struct ata_port *ap); void (*pmp_detach)(struct ata_port *ap); - int (*enable_pm)(struct ata_port *ap, enum ata_lpm_policy policy); - void (*disable_pm)(struct ata_port *ap); + int (*set_lpm)(struct ata_link *link, enum ata_lpm_policy policy, + unsigned hints); /* * Start, stop, suspend and resume -- cgit v1.2.3-18-g5258 From 97750cebb3000a9cc08f8ce8dc8c7143be7d7201 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 6 Sep 2010 17:56:29 +0200 Subject: libata: add @ap to ata_wait_register() and introduce ata_msleep() Add optional @ap argument to ata_wait_register() and replace msleep() calls with ata_msleep() which take optional @ap in addition to the duration. These will be used to implement EH exclusion. This patch doesn't cause any behavior difference. Signed-off-by: Tejun Heo Signed-off-by: Jeff Garzik --- include/linux/libata.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/libata.h b/include/linux/libata.h index bc4ee218b18..2fbd22bd68c 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1004,8 +1004,9 @@ extern int ata_host_suspend(struct ata_host *host, pm_message_t mesg); extern void ata_host_resume(struct ata_host *host); #endif extern int ata_ratelimit(void); -extern u32 ata_wait_register(void __iomem *reg, u32 mask, u32 val, - unsigned long interval, unsigned long timeout); +extern void ata_msleep(struct ata_port *ap, unsigned int msecs); +extern u32 ata_wait_register(struct ata_port *ap, void __iomem *reg, u32 mask, + u32 val, unsigned long interval, unsigned long timeout); extern int atapi_cmd_type(u8 opcode); extern void ata_tf_to_fis(const struct ata_taskfile *tf, u8 pmp, int is_cmd, u8 *fis); -- cgit v1.2.3-18-g5258 From c0c362b60e259e3480a36ef70280d545818844f0 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 6 Sep 2010 17:57:14 +0200 Subject: libata: implement cross-port EH exclusion In libata, the non-EH code paths should always take and release ap->lock explicitly when accessing hardware or shared data structures. However, once EH is active, it's assumed that the port is owned by EH and EH methods don't explicitly take ap->lock unless race from irq handler or other code paths are expected. However, libata EH didn't guarantee exclusion among EHs for ports of the same host. IOW, multiple EHs may execute in parallel on multiple ports of the same controller. In many cases, especially in SATA, the ports are completely independent of each other and this doesn't cause problems; however, there are cases where different ports share the same resource, which lead to obscure timing related bugs such as the one fixed by commit 213373cf (ata_piix: fix locking around SIDPR access). This patch implements exclusion among EHs of the same host. When EH begins, it acquires per-host EH ownership by calling ata_eh_acquire(). When EH finishes, the ownership is released by calling ata_eh_release(). EH ownership is also released whenever the EH thread goes to sleep from ata_msleep() or explicitly and reacquired after waking up. This ensures that while EH is actively accessing the hardware, it has exclusive access to it while allowing EHs to interleave and progress in parallel as they hit waiting stages, which dominate the time spent in EH. This achieves cross-port EH exclusion without pervasive and fragile changes while still allowing parallel EH for the most part. This was first reported by yuanding02@gmail.com more than three years ago in the following bugzilla. :-) https://bugzilla.kernel.org/show_bug.cgi?id=8223 Signed-off-by: Tejun Heo Cc: Alan Cox Reported-by: yuanding02@gmail.com Signed-off-by: Jeff Garzik --- include/linux/libata.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/libata.h b/include/linux/libata.h index 2fbd22bd68c..52112d39d71 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -37,6 +37,7 @@ #include #include #include +#include /* * Define if arch has non-standard setup. This is a _PCI_ standard @@ -535,6 +536,10 @@ struct ata_host { void *private_data; struct ata_port_operations *ops; unsigned long flags; + + struct mutex eh_mutex; + struct task_struct *eh_owner; + #ifdef CONFIG_ATA_ACPI acpi_handle acpi_handle; #endif -- cgit v1.2.3-18-g5258 From b34e90429ce8a23546b6b927d4e151df4c113644 Mon Sep 17 00:00:00 2001 From: Richard Kennedy Date: Fri, 10 Sep 2010 12:19:43 +0100 Subject: libata: reorder ata_queued_cmd to remove alignment padding on 64 bit builds Reorder structure ata_queued_cmd to remove 8 bytes of alignment padding on 64 bit builds & therefore reduce the size of structure ata_port by 256 bytes. Overall this will have little impact, other than reducing the amount of memory that is cleared when allocating ata_ports. Signed-off-by: Richard Kennedy Signed-off-by: Jeff Garzik --- include/linux/libata.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/libata.h b/include/linux/libata.h index 52112d39d71..15efec05df6 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -570,13 +570,13 @@ struct ata_queued_cmd { unsigned int extrabytes; unsigned int curbytes; - struct scatterlist *cursg; - unsigned int cursg_ofs; - struct scatterlist sgent; struct scatterlist *sg; + struct scatterlist *cursg; + unsigned int cursg_ofs; + unsigned int err_mask; struct ata_taskfile result_tf; ata_qc_cb_t complete_fn; -- cgit v1.2.3-18-g5258 From 89692c03226a066a017048cf7fbacbaa645f0e79 Mon Sep 17 00:00:00 2001 From: Andrea Gelmini Date: Sat, 16 Oct 2010 15:19:18 +0200 Subject: include/linux/libata.h: fix typo Signed-off-by: Andrea Gelmini Signed-off-by: Jeff Garzik --- include/linux/libata.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/libata.h b/include/linux/libata.h index 15efec05df6..15b77b8dc7e 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -342,7 +342,7 @@ enum { ATA_EHI_DID_HARDRESET = (1 << 17), /* already soft-reset this port */ ATA_EHI_PRINTINFO = (1 << 18), /* print configuration info */ ATA_EHI_SETMODE = (1 << 19), /* configure transfer mode */ - ATA_EHI_POST_SETMODE = (1 << 20), /* revaildating after setmode */ + ATA_EHI_POST_SETMODE = (1 << 20), /* revalidating after setmode */ ATA_EHI_DID_RESET = ATA_EHI_DID_SOFTRESET | ATA_EHI_DID_HARDRESET, -- cgit v1.2.3-18-g5258 From 0988c4c7fb5881377ec20a6452f739a722e97c6b Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Thu, 21 Oct 2010 11:30:42 +0000 Subject: vlan: Calling vlan_hwaccel_do_receive() is always valid. It is now acceptable to receive vlan tagged packets at any time, even if CONFIG_VLAN_8021Q is not set. This means that calling vlan_hwaccel_do_receive() should not result in BUG() but rather just behave as if there were no vlan devices configured. Reported-by: Vladislav Zolotarov Signed-off-by: Jesse Gross Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index a0d9786c202..c2f3a72712c 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -168,7 +168,8 @@ static inline int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp, static inline bool vlan_hwaccel_do_receive(struct sk_buff **skb) { - BUG(); + if ((*skb)->vlan_tci & VLAN_VID_MASK) + (*skb)->pkt_type = PACKET_OTHERHOST; return false; } -- cgit v1.2.3-18-g5258 From ff10fca5ceacf7bc59636f5ab808e775d1717167 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Fri, 22 Oct 2010 08:46:36 +0200 Subject: pcmcia: IOCARD is also required for using IRQs Dave Hinds pointed out to me that 37979e1546a7 will break b43 and ray_cs, as IOCARD is not -- as the name would suggest -- only needed for cards using IO ports. Instead, as it re-deines several pins, it is also required for using interrupts. Signed-off-by: Dominik Brodowski --- include/pcmcia/ds.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/pcmcia/ds.h b/include/pcmcia/ds.h index d830c87ff0a..8479b66c067 100644 --- a/include/pcmcia/ds.h +++ b/include/pcmcia/ds.h @@ -259,6 +259,8 @@ void pcmcia_disable_device(struct pcmcia_device *p_dev); #define CONF_ENABLE_SPKR 0x0002 #define CONF_ENABLE_PULSE_IRQ 0x0004 #define CONF_ENABLE_ESR 0x0008 +#define CONF_ENABLE_IOCARD 0x0010 /* auto-enabled if IO resources or IRQ + * (CONF_ENABLE_IRQ) in use */ /* flags used by pcmcia_loop_config() autoconfiguration */ #define CONF_AUTO_CHECK_VCC 0x0100 /* check for matching Vcc? */ -- cgit v1.2.3-18-g5258 From c64a0926710153b9d44c979d2942f4a8648fd74e Mon Sep 17 00:00:00 2001 From: Kevin Hilman Date: Wed, 25 Aug 2010 12:50:00 -0700 Subject: driver core: platform_bus: allow runtime override of dev_pm_ops Currently, the platform_bus allows customization of several of the busses dev_pm_ops methods by using weak symbols so that platform code can override them. The weak-symbol approach is not scalable when wanting to support multiple platforms in a single kernel binary. Instead, provide __init methods for platform code to customize the dev_pm_ops methods at runtime. NOTE: after these dynamic methods are merged, the weak symbols should be removed from drivers/base/platform.c. AFAIK, this will only affect SH and sh-mobile which should be converted to use this runtime approach instead of the weak symbols. After SH & sh-mobile are converted, the weak symobols could be removed. Tested on OMAP3. Cc: Magnus Damm Acked-by: Grant Likely Signed-off-by: Kevin Hilman Signed-off-by: Greg Kroah-Hartman --- include/linux/platform_device.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h index d7ecad0093b..2e700ec0601 100644 --- a/include/linux/platform_device.h +++ b/include/linux/platform_device.h @@ -138,6 +138,9 @@ extern struct platform_device *platform_create_bundle(struct platform_driver *dr struct resource *res, unsigned int n_res, const void *data, size_t size); +extern const struct dev_pm_ops * platform_bus_get_pm_ops(void); +extern void platform_bus_set_pm_ops(const struct dev_pm_ops *pm); + /* early platform driver interface */ struct early_platform_driver { const char *class_str; -- cgit v1.2.3-18-g5258 From 39aba963d937edb20db7d9d93e6dda5d2adfdcdd Mon Sep 17 00:00:00 2001 From: Kay Sievers Date: Sat, 4 Sep 2010 22:33:14 -0700 Subject: driver core: remove CONFIG_SYSFS_DEPRECATED_V2 but keep it for block devices This patch removes the old CONFIG_SYSFS_DEPRECATED_V2 config option, but it keeps the logic around to handle block devices in the old manner as some people like to run new kernel versions on old (pre 2007/2008) distros. Signed-off-by: Kay Sievers Cc: Jens Axboe Cc: Stephen Hemminger Cc: "Eric W. Biederman" Cc: Alan Stern Cc: "James E.J. Bottomley" Cc: Andrew Morton Cc: Alexey Kuznetsov Cc: Randy Dunlap Cc: Tejun Heo Cc: "David S. Miller" Cc: Jaroslav Kysela Cc: Takashi Iwai Cc: Ingo Molnar Cc: Peter Zijlstra Cc: David Howells Signed-off-by: Greg Kroah-Hartman --- include/sound/core.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/sound/core.h b/include/sound/core.h index 89e0ac17f44..df26ebbfa9c 100644 --- a/include/sound/core.h +++ b/include/sound/core.h @@ -133,9 +133,7 @@ struct snd_card { int free_on_last_close; /* free in context of file_release */ wait_queue_head_t shutdown_sleep; struct device *dev; /* device assigned to this card */ -#ifndef CONFIG_SYSFS_DEPRECATED struct device *card_dev; /* cardX object for sysfs */ -#endif #ifdef CONFIG_PM unsigned int power_state; /* power state */ @@ -196,11 +194,7 @@ struct snd_minor { /* return a device pointer linked to each sound device as a parent */ static inline struct device *snd_card_get_device_link(struct snd_card *card) { -#ifdef CONFIG_SYSFS_DEPRECATED - return card ? card->dev : NULL; -#else return card ? card->card_dev : NULL; -#endif } /* sound.c */ -- cgit v1.2.3-18-g5258 From e52eec13cd6b7f30ab19081b387813e03e592ae5 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 8 Sep 2010 16:54:17 +0200 Subject: SYSFS: Allow boot time switching between deprecated and modern sysfs layout I have some systems which need legacy sysfs due to old tools that are making assumptions that a directory can never be a symlink to another directory, and it's a big hazzle to compile separate kernels for them. This patch turns CONFIG_SYSFS_DEPRECATED into a run time option that can be switched on/off the kernel command line. This way the same binary can be used in both cases with just a option on the command line. The old CONFIG_SYSFS_DEPRECATED_V2 option is still there to set the default. I kept the weird name to not break existing config files. Also the compat code can be still completely disabled by undefining CONFIG_SYSFS_DEPRECATED_SWITCH -- just the optimizer takes care of this now instead of lots of ifdefs. This makes the code look nicer. v2: This is an updated version on top of Kay's patch to only handle the block devices. I tested it on my old systems and that seems to work. Cc: axboe@kernel.dk Signed-off-by: Andi Kleen Cc: Kay Sievers Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/device.h b/include/linux/device.h index 516fecacf27..dd489531346 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -751,4 +751,11 @@ do { \ MODULE_ALIAS("char-major-" __stringify(major) "-" __stringify(minor)) #define MODULE_ALIAS_CHARDEV_MAJOR(major) \ MODULE_ALIAS("char-major-" __stringify(major) "-*") + +#ifdef CONFIG_SYSFS_DEPRECATED +extern long sysfs_deprecated; +#else +#define sysfs_deprecated 0 +#endif + #endif /* _DEVICE_H_ */ -- cgit v1.2.3-18-g5258 From 6427a7655afd7f07dfa83736defd1d94656c83e5 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 14 Sep 2010 11:37:36 -0700 Subject: uio: Cleanup irq handling. Change the value of UIO_IRQ_NONE -2 to 0. 0 is well defined in the rest of the kernel as the value to indicate an irq has not been assigned. Update the calls to request_irq and free_irq to only ignore UIO_IRQ_NONE and UIO_IRQ_CUSTOM allowing the rest of the kernel's possible irq numbers to be used. Signed-off-by: Eric W. Biederman Reviewed-by: Thomas Gleixner Signed-off-by: Hans J. Koch Signed-off-by: Greg Kroah-Hartman --- include/linux/uio_driver.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/uio_driver.h b/include/linux/uio_driver.h index 5dcc9ff72f6..d6188e5a52d 100644 --- a/include/linux/uio_driver.h +++ b/include/linux/uio_driver.h @@ -108,7 +108,7 @@ extern void uio_event_notify(struct uio_info *info); /* defines for uio_info->irq */ #define UIO_IRQ_CUSTOM -1 -#define UIO_IRQ_NONE -2 +#define UIO_IRQ_NONE 0 /* defines for uio_mem->memtype */ #define UIO_MEM_NONE 0 -- cgit v1.2.3-18-g5258 From c25d1dfbd403209025df41a737f82ce8f43d93f5 Mon Sep 17 00:00:00 2001 From: Robin Holt Date: Wed, 29 Sep 2010 14:00:54 -0500 Subject: kobject: Introduce kset_find_obj_hinted. One call chain getting to kset_find_obj is: link_mem_sections() find_mem_section() kset_find_obj() This is done during boot. The memory sections were added in a linearly increasing order and link_mem_sections tends to utilize them in that same linear order. Introduce a kset_find_obj_hinted which is passed the result of the previous kset_find_obj which it uses for a quick "is the next object our desired object" check before falling back to the old behavior. Signed-off-by: Robin Holt To: Robert P. J. Day Reviewed-by: KAMEZAWA Hiroyuki Signed-off-by: Greg Kroah-Hartman --- include/linux/kobject.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/kobject.h b/include/linux/kobject.h index 7950a37a714..8f6d1215104 100644 --- a/include/linux/kobject.h +++ b/include/linux/kobject.h @@ -191,6 +191,8 @@ static inline struct kobj_type *get_ktype(struct kobject *kobj) } extern struct kobject *kset_find_obj(struct kset *, const char *); +extern struct kobject *kset_find_obj_hinted(struct kset *, const char *, + struct kobject *); /* The global /sys/kernel/ kobject for people to chain off of */ extern struct kobject *kernel_kobj; -- cgit v1.2.3-18-g5258 From 98383031ed77c6eb49ab612166fef9c0efe1604a Mon Sep 17 00:00:00 2001 From: Robin Holt Date: Wed, 29 Sep 2010 14:00:55 -0500 Subject: driver core: Introduce find_memory_block_hinted which utilizes kset_find_obj_hinted. Introduce a find_memory_block_hinted() which utilizes the recently added kset_find_obj_hinted(). Signed-off-by: Robin Holt To: Dave Hansen To: Matt Tolentino Reviewed-by: KAMEZAWA Hiroyuki Signed-off-by: Greg Kroah-Hartman --- include/linux/memory.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/memory.h b/include/linux/memory.h index 85582e1bcee..c4f3127dbd4 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -113,6 +113,8 @@ extern int memory_dev_init(void); extern int remove_memory_block(unsigned long, struct mem_section *, int); extern int memory_notify(unsigned long val, void *v); extern int memory_isolate_notify(unsigned long val, void *v); +extern struct memory_block *find_memory_block_hinted(struct mem_section *, + struct memory_block *); extern struct memory_block *find_memory_block(struct mem_section *); #define CONFIG_MEM_BLOCK_SIZE (PAGES_PER_SECTION< Date: Tue, 19 Oct 2010 12:46:19 -0500 Subject: Driver core: Add section count to memory_block struct Add a section count property to the memory_block struct to track the number of memory sections that have been added/removed from a memory block. This allows us to know when the last memory section of a memory block has been removed so we can remove the memory block. Signed-off-by: Nathan Fontenot Reviewed-by: Robin Holt Reviewed-by: KAMEZAWA Hiroyuki Signed-off-by: Greg Kroah-Hartman --- include/linux/memory.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/memory.h b/include/linux/memory.h index c4f3127dbd4..06c1fa0a5c7 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -23,6 +23,8 @@ struct memory_block { unsigned long phys_index; unsigned long state; + int section_count; + /* * This serializes all state change requests. It isn't * held during creation because the control files are -- cgit v1.2.3-18-g5258 From 30004ac9c090dcdcca99556b4587b3bad828731a Mon Sep 17 00:00:00 2001 From: Dmitry Eremin-Solenikov Date: Mon, 9 Aug 2010 18:22:49 +0400 Subject: tty: add tty_struct->dev pointer to corresponding device instance Some device drivers (mostly tty line disciplines) would like to have way know a struct device instance corresponding to passed tty_struct. Add a struct device pointer to struct tty_struct and populate it during initialize_tty_struct(). Signed-off-by: Dmitry Eremin-Solenikov Acked-by: Alan Cox Signed-off-by: Greg Kroah-Hartman --- include/linux/tty.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/tty.h b/include/linux/tty.h index 67d64e6efe7..d94eb86266c 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -256,6 +256,7 @@ struct tty_operations; struct tty_struct { int magic; struct kref kref; + struct device *dev; struct tty_driver *driver; const struct tty_operations *ops; int index; -- cgit v1.2.3-18-g5258 From f573bd1764f0f3f47754ca1ae7b2eb2909798a60 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Tue, 24 Aug 2010 07:48:34 +0300 Subject: tty: Remove __GFP_NOFAIL from tty_add_file() This patch removes __GFP_NOFAIL use from tty_add_file() and adds proper error handling to the call-sites of the function. Cc: Andrew Morton Cc: Alan Cox Cc: Arnd Bergmann Signed-off-by: Pekka Enberg Acked-by: David Rientjes Signed-off-by: Greg Kroah-Hartman --- include/linux/tty.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/tty.h b/include/linux/tty.h index d94eb86266c..86be0cdeb11 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -466,7 +466,7 @@ extern void proc_clear_tty(struct task_struct *p); extern struct tty_struct *get_current_tty(void); extern void tty_default_fops(struct file_operations *fops); extern struct tty_struct *alloc_tty_struct(void); -extern void tty_add_file(struct tty_struct *tty, struct file *file); +extern int tty_add_file(struct tty_struct *tty, struct file *file); extern void free_tty_struct(struct tty_struct *tty); extern void initialize_tty_struct(struct tty_struct *tty, struct tty_driver *driver, int idx); -- cgit v1.2.3-18-g5258 From 8bc3372d9e57db3c65cf00cea6cf14969875b055 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Fri, 20 Aug 2010 17:14:04 -0400 Subject: ioctl: Use asm-generic/ioctls.h on cris (enables termiox) This patch converts cris to use asm-generic/ioctls.h instead of its own version. The differences between the arch-specific version and the generic version are as follows: - CRIS defines two ioctls: TIOCSERSETRS485 and TIOCSERWRRS485, kept in arch-specific portion - CRIS defines a different value for TIOCSRS485, kept via ifndef in generic - The generic version adds support for termiox Cc: Mikael Starvik Cc: Jesper Nilsson Signed-off-by: Jeff Mahoney Signed-off-by: Greg Kroah-Hartman --- include/asm-generic/ioctls.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/asm-generic/ioctls.h b/include/asm-generic/ioctls.h index 8554cb6a81b..a3216655d65 100644 --- a/include/asm-generic/ioctls.h +++ b/include/asm-generic/ioctls.h @@ -62,7 +62,9 @@ #define TCSETSW2 _IOW('T', 0x2C, struct termios2) #define TCSETSF2 _IOW('T', 0x2D, struct termios2) #define TIOCGRS485 0x542E +#ifndef TIOCSRS485 #define TIOCSRS485 0x542F +#endif #define TIOCGPTN _IOR('T', 0x30, unsigned int) /* Get Pty Number (of pty-mux device) */ #define TIOCSPTLCK _IOW('T', 0x31, int) /* Lock/unlock Pty */ #define TCGETX 0x5432 /* SYS5 TCGETX compatibility */ -- cgit v1.2.3-18-g5258 From d281da7ff6f70efca0553c288bb883e8605b3862 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Thu, 16 Sep 2010 18:21:24 +0100 Subject: tty: Make tiocgicount a handler Dan Rosenberg noted that various drivers return the struct with uncleared fields. Instead of spending forever trying to stomp all the drivers that get it wrong (and every new driver) do the job in one place. This first patch adds the needed operations and hooks them up, including the needed USB midlayer and serial core plumbing. Signed-off-by: Alan Cox Signed-off-by: Greg Kroah-Hartman --- include/linux/tty_driver.h | 9 +++++++++ include/linux/usb/serial.h | 2 ++ 2 files changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h index b0867798252..db2d227694d 100644 --- a/include/linux/tty_driver.h +++ b/include/linux/tty_driver.h @@ -224,6 +224,12 @@ * unless the tty also has a valid tty->termiox pointer. * * Optional: Called under the termios lock + * + * int (*get_icount)(struct tty_struct *tty, struct serial_icounter *icount); + * + * Called when the device receives a TIOCGICOUNT ioctl. Passed a kernel + * structure to complete. This method is optional and will only be called + * if provided (otherwise EINVAL will be returned). */ #include @@ -232,6 +238,7 @@ struct tty_struct; struct tty_driver; +struct serial_icounter_struct; struct tty_operations { struct tty_struct * (*lookup)(struct tty_driver *driver, @@ -268,6 +275,8 @@ struct tty_operations { unsigned int set, unsigned int clear); int (*resize)(struct tty_struct *tty, struct winsize *ws); int (*set_termiox)(struct tty_struct *tty, struct termiox *tnew); + int (*get_icount)(struct tty_struct *tty, + struct serial_icounter_struct *icount); #ifdef CONFIG_CONSOLE_POLL int (*poll_init)(struct tty_driver *driver, int line, char *options); int (*poll_get_char)(struct tty_driver *driver, int line); diff --git a/include/linux/usb/serial.h b/include/linux/usb/serial.h index 55675b1efb2..16d682f4f7c 100644 --- a/include/linux/usb/serial.h +++ b/include/linux/usb/serial.h @@ -271,6 +271,8 @@ struct usb_serial_driver { int (*tiocmget)(struct tty_struct *tty, struct file *file); int (*tiocmset)(struct tty_struct *tty, struct file *file, unsigned int set, unsigned int clear); + int (*get_icount)(struct tty_struct *tty, + struct serial_icounter_struct *icount); /* Called by the tty layer for port level work. There may or may not be an attached tty at this point */ void (*dtr_rts)(struct usb_serial_port *port, int on); -- cgit v1.2.3-18-g5258 From 432c9ed22aff641039ccd400cdabf983fabc285e Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Fri, 1 Oct 2010 00:10:44 -0400 Subject: vcs: invoke the vt update callback when /dev/vcs* is written to A notifier chain is called whenever the vt code modifies a terminal content, except for one case which is when the modification comes through writes to /dev/vcs* devices. Let's add the missing notifier invocation at the end of vcs_write() for that case too. Signed-off-by: Nicolas Pitre Cc: Alan Cox Signed-off-by: Greg Kroah-Hartman --- include/linux/selection.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/selection.h b/include/linux/selection.h index 8cdaa1151d2..85193aa8c1e 100644 --- a/include/linux/selection.h +++ b/include/linux/selection.h @@ -39,5 +39,6 @@ extern void putconsxy(struct vc_data *vc, unsigned char *p); extern u16 vcs_scr_readw(struct vc_data *vc, const u16 *org); extern void vcs_scr_writew(struct vc_data *vc, u16 val, u16 *org); +extern void vcs_scr_updated(struct vc_data *vc); #endif -- cgit v1.2.3-18-g5258 From 54381067ed7873e6173d6fe32818a585ad667723 Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Fri, 1 Oct 2010 17:21:25 +0400 Subject: serial: Factor out uart_poll_timeout() from 8250 driver Soon we will use that handy function in the altera_uart driver. Signed-off-by: Anton Vorontsov Cc: Alan Cox Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_core.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 563e2340091..ac48082f355 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -411,6 +411,14 @@ unsigned int uart_get_baud_rate(struct uart_port *port, struct ktermios *termios unsigned int max); unsigned int uart_get_divisor(struct uart_port *port, unsigned int baud); +/* Base timer interval for polling */ +static inline int uart_poll_timeout(struct uart_port *port) +{ + int timeout = port->timeout; + + return timeout > 6 ? (timeout / 2 - 2) : 1; +} + /* * Console helpers. */ -- cgit v1.2.3-18-g5258 From 0d426eda7c94d864ead913f7099c623521368443 Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Fri, 1 Oct 2010 17:21:54 +0400 Subject: altera_uart: Add support for different address strides Some controllers implement registers with a stride, to support those we must implement the proper IO accessors. Signed-off-by: Anton Vorontsov Acked-by: Tobias Klauser Cc: Alan Cox Signed-off-by: Greg Kroah-Hartman --- include/linux/altera_uart.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/altera_uart.h b/include/linux/altera_uart.h index 8d441064a30..c022c82db7c 100644 --- a/include/linux/altera_uart.h +++ b/include/linux/altera_uart.h @@ -9,6 +9,7 @@ struct altera_uart_platform_uart { unsigned long mapbase; /* Physical address base */ unsigned int irq; /* Interrupt vector */ unsigned int uartclk; /* UART clock rate */ + unsigned int bus_shift; /* Bus shift (address stride) */ }; #endif /* __ALTUART_H */ -- cgit v1.2.3-18-g5258 From 5d89a48acfbaae02e7ecf97d4d8cc570a31964c5 Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Fri, 1 Oct 2010 17:22:55 +0400 Subject: altera_uart: Fix missing prototype for registering an early console Simply add an early_altera_uart_setup() prototype declaration, otherwise platform code have to do it in .c files, which is ugly. Signed-off-by: Anton Vorontsov Acked-by: Tobias Klauser Cc: Alan Cox Signed-off-by: Greg Kroah-Hartman --- include/linux/altera_uart.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/altera_uart.h b/include/linux/altera_uart.h index c022c82db7c..a10a9079197 100644 --- a/include/linux/altera_uart.h +++ b/include/linux/altera_uart.h @@ -5,6 +5,8 @@ #ifndef __ALTUART_H #define __ALTUART_H +#include + struct altera_uart_platform_uart { unsigned long mapbase; /* Physical address base */ unsigned int irq; /* Interrupt vector */ @@ -12,4 +14,6 @@ struct altera_uart_platform_uart { unsigned int bus_shift; /* Bus shift (address stride) */ }; +int __init early_altera_uart_setup(struct altera_uart_platform_uart *platp); + #endif /* __ALTUART_H */ -- cgit v1.2.3-18-g5258 From c161afe9759ddcc174d08e7c4f683d08ac9ba86f Mon Sep 17 00:00:00 2001 From: Manuel Lauss Date: Sat, 25 Sep 2010 15:13:45 +0200 Subject: 8250: allow platforms to override PM hook. Add a hook for platforms to specify custom pm methods. Signed-off-by: Manuel Lauss Cc: Alan Cox Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_8250.h | 4 ++++ include/linux/serial_core.h | 2 ++ 2 files changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h index 7638deaaba6..bf9c2bdb2e0 100644 --- a/include/linux/serial_8250.h +++ b/include/linux/serial_8250.h @@ -35,6 +35,8 @@ struct plat_serial8250_port { void (*set_termios)(struct uart_port *, struct ktermios *new, struct ktermios *old); + void (*pm)(struct uart_port *, unsigned int state, + unsigned old); }; /* @@ -76,5 +78,7 @@ extern int serial8250_find_port_for_earlycon(void); extern int setup_early_serial8250_console(char *cmdline); extern void serial8250_do_set_termios(struct uart_port *port, struct ktermios *termios, struct ktermios *old); +extern void serial8250_do_pm(struct uart_port *port, unsigned int state, + unsigned int oldstate); #endif diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index ac48082f355..99e5994e6f8 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -289,6 +289,8 @@ struct uart_port { void (*set_termios)(struct uart_port *, struct ktermios *new, struct ktermios *old); + void (*pm)(struct uart_port *, unsigned int state, + unsigned int old); unsigned int irq; /* irq number */ unsigned long irqflags; /* irq flags */ unsigned int uartclk; /* base uart clock */ -- cgit v1.2.3-18-g5258 From af7f3743567e3d5b40e2f9c21541b7f40b99c103 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Mon, 18 Oct 2010 11:38:02 -0700 Subject: serial: abstraction for 8250 legacy ports Not every platform that has generic legacy 8250 ports manages to have them clocked the right way or without errata. Provide a generic interface to allow platforms to override the default behaviour in a manner that dumps the complexity in *their* code not the 8250 driver. Signed-off-by: Alan Cox Signed-off-by: Dirk Brandewie Acked-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_8250.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h index bf9c2bdb2e0..97f5b45bbc0 100644 --- a/include/linux/serial_8250.h +++ b/include/linux/serial_8250.h @@ -81,4 +81,8 @@ extern void serial8250_do_set_termios(struct uart_port *port, extern void serial8250_do_pm(struct uart_port *port, unsigned int state, unsigned int oldstate); +extern void serial8250_set_isa_configurator(void (*v) + (int port, struct uart_port *up, + unsigned short *capabilities)); + #endif -- cgit v1.2.3-18-g5258 From f0ae849df1cd6b597130d890f2107ee31bf02c19 Mon Sep 17 00:00:00 2001 From: Hao Wu Date: Thu, 5 Aug 2010 14:17:28 +0100 Subject: usb: Add Intel Langwell USB OTG Transceiver Driver This adds support for the USB transceiver driver in the Langwell chipset used on the Intel MID platforms. It folds up the original patch set which includes basic support for the device, PHY low power mode (Please notice that there is a limitation, after we drive VBus down, 2ms delay is required from SCU FW to sync up OTGSC register with USBCFG register), software timers (the hardware timers do not work in low power mode), HNP, SRP. Signed-off-by: Hao Wu Signed-off-by: Alek Du Signed-off-by: Alan Cox Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/langwell_otg.h | 139 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 139 insertions(+) create mode 100644 include/linux/usb/langwell_otg.h (limited to 'include') diff --git a/include/linux/usb/langwell_otg.h b/include/linux/usb/langwell_otg.h new file mode 100644 index 00000000000..a6562f1d4e2 --- /dev/null +++ b/include/linux/usb/langwell_otg.h @@ -0,0 +1,139 @@ +/* + * Intel Langwell USB OTG transceiver driver + * Copyright (C) 2008, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * + */ + +#ifndef __LANGWELL_OTG_H +#define __LANGWELL_OTG_H + +#include + +#define CI_USBCMD 0x30 +# define USBCMD_RST BIT(1) +# define USBCMD_RS BIT(0) +#define CI_USBSTS 0x34 +# define USBSTS_SLI BIT(8) +# define USBSTS_URI BIT(6) +# define USBSTS_PCI BIT(2) +#define CI_PORTSC1 0x74 +# define PORTSC_PP BIT(12) +# define PORTSC_LS (BIT(11) | BIT(10)) +# define PORTSC_SUSP BIT(7) +# define PORTSC_CCS BIT(0) +#define CI_HOSTPC1 0xb4 +# define HOSTPC1_PHCD BIT(22) +#define CI_OTGSC 0xf4 +# define OTGSC_DPIE BIT(30) +# define OTGSC_1MSE BIT(29) +# define OTGSC_BSEIE BIT(28) +# define OTGSC_BSVIE BIT(27) +# define OTGSC_ASVIE BIT(26) +# define OTGSC_AVVIE BIT(25) +# define OTGSC_IDIE BIT(24) +# define OTGSC_DPIS BIT(22) +# define OTGSC_1MSS BIT(21) +# define OTGSC_BSEIS BIT(20) +# define OTGSC_BSVIS BIT(19) +# define OTGSC_ASVIS BIT(18) +# define OTGSC_AVVIS BIT(17) +# define OTGSC_IDIS BIT(16) +# define OTGSC_DPS BIT(14) +# define OTGSC_1MST BIT(13) +# define OTGSC_BSE BIT(12) +# define OTGSC_BSV BIT(11) +# define OTGSC_ASV BIT(10) +# define OTGSC_AVV BIT(9) +# define OTGSC_ID BIT(8) +# define OTGSC_HABA BIT(7) +# define OTGSC_HADP BIT(6) +# define OTGSC_IDPU BIT(5) +# define OTGSC_DP BIT(4) +# define OTGSC_OT BIT(3) +# define OTGSC_HAAR BIT(2) +# define OTGSC_VC BIT(1) +# define OTGSC_VD BIT(0) +# define OTGSC_INTEN_MASK (0x7f << 24) +# define OTGSC_INT_MASK (0x5f << 24) +# define OTGSC_INTSTS_MASK (0x7f << 16) +#define CI_USBMODE 0xf8 +# define USBMODE_CM (BIT(1) | BIT(0)) +# define USBMODE_IDLE 0 +# define USBMODE_DEVICE 0x2 +# define USBMODE_HOST 0x3 +#define USBCFG_ADDR 0xff10801c +#define USBCFG_LEN 4 +# define USBCFG_VBUSVAL BIT(14) +# define USBCFG_AVALID BIT(13) +# define USBCFG_BVALID BIT(12) +# define USBCFG_SESEND BIT(11) + +#define INTR_DUMMY_MASK (USBSTS_SLI | USBSTS_URI | USBSTS_PCI) + +enum langwell_otg_timer_type { + TA_WAIT_VRISE_TMR, + TA_WAIT_BCON_TMR, + TA_AIDL_BDIS_TMR, + TB_ASE0_BRST_TMR, + TB_SE0_SRP_TMR, + TB_SRP_INIT_TMR, + TB_SRP_FAIL_TMR, + TB_BUS_SUSPEND_TMR +}; + +#define TA_WAIT_VRISE 100 +#define TA_WAIT_BCON 30000 +#define TA_AIDL_BDIS 15000 +#define TB_ASE0_BRST 5000 +#define TB_SE0_SRP 2 +#define TB_SRP_INIT 100 +#define TB_SRP_FAIL 5500 +#define TB_BUS_SUSPEND 500 + +struct langwell_otg_timer { + unsigned long expires; /* Number of count increase to timeout */ + unsigned long count; /* Tick counter */ + void (*function)(unsigned long); /* Timeout function */ + unsigned long data; /* Data passed to function */ + struct list_head list; +}; + +struct langwell_otg { + struct intel_mid_otg_xceiv iotg; + struct device *dev; + + void __iomem *usbcfg; /* SCCBUSB config Reg */ + + unsigned region; + unsigned cfg_region; + + struct work_struct work; + struct workqueue_struct *qwork; + struct timer_list hsm_timer; + + spinlock_t lock; + spinlock_t wq_lock; + + struct notifier_block iotg_notifier; +}; + +static inline +struct langwell_otg *mid_xceiv_to_lnw(struct intel_mid_otg_xceiv *iotg) +{ + return container_of(iotg, struct langwell_otg, iotg); +} + +#endif /* __LANGWELL_OTG_H__ */ -- cgit v1.2.3-18-g5258 From 37b5801e16d2e192fe2b20f4af33aa8c6e8786f3 Mon Sep 17 00:00:00 2001 From: Parirajan Muthalagu Date: Wed, 25 Aug 2010 16:33:26 +0530 Subject: USB Gadget: Verify VBUS current before setting the device self-powered bit Acked-by: David Brownell Acked-by: Linus Walleij Signed-off-by: Praveena Nadahally Signed-off-by: Parirajan Muthalagu Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/ch9.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/usb/ch9.h b/include/linux/usb/ch9.h index da2ed77d3e8..b0f7e9f5717 100644 --- a/include/linux/usb/ch9.h +++ b/include/linux/usb/ch9.h @@ -808,4 +808,14 @@ enum usb_device_state { */ }; +/*-------------------------------------------------------------------------*/ + +/* + * As per USB compliance update, a device that is actively drawing + * more than 100mA from USB must report itself as bus-powered in + * the GetStatus(DEVICE) call. + * http://compliance.usb.org/index.asp?UpdateFile=Electrical&Format=Standard#34 + */ +#define USB_SELF_POWER_VBUS_MAX_DRAW 100 + #endif /* __LINUX_USB_CH9_H */ -- cgit v1.2.3-18-g5258 From ad1a8102f957f4d25fc58cdc10736c5ade7557e1 Mon Sep 17 00:00:00 2001 From: Michal Nazarewicz Date: Thu, 12 Aug 2010 17:43:46 +0200 Subject: USB: gadget: composite: Better string override handling The iManufatcurer, iProduct and iSerialNumber composite module parameters were only used when the gadget driver registers strings for manufacturer, product and serial number. If the gadget never bothered to set corresponding fields in USB device descriptors those module parameters are ignored. This commit makes the parameters work even if the strings ID have not been assigned. It also changes the way IDs are overridden -- what IDs are overridden is now saved in usb_composite_dev structure -- which makes it unnecessary to modify the string tables the way previous code did. The commit also adds a iProduct and iManufatcurer fields to the usb_composite_device structure. If they are set, appropriate strings are reserved and added to device descriptor. This makes it unnecessary for gadget drivers to maintain code for setting those. If iProduct is not set it defaults to usb_composite_device::name; if iManufatcurer is not set a default " with " is used. The last thing is that if needs_serial field of usb_composite_device is set and user failed to provided iSerialNumber parameter a warning is issued. Signed-off-by: Michal Nazarewicz Signed-off-by: Kyungmin Park Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/composite.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include') diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h index 617068134ae..a78e813d27e 100644 --- a/include/linux/usb/composite.h +++ b/include/linux/usb/composite.h @@ -237,10 +237,17 @@ int usb_add_config(struct usb_composite_dev *, /** * struct usb_composite_driver - groups configurations into a gadget * @name: For diagnostics, identifies the driver. + * @iProduct: Used as iProduct override if @dev->iProduct is not set. + * If NULL value of @name is taken. + * @iManufacturer: Used as iManufacturer override if @dev->iManufacturer is + * not set. If NULL a default " with " value + * will be used. * @dev: Template descriptor for the device, including default device * identifiers. * @strings: tables of strings, keyed by identifiers assigned during bind() * and language IDs provided in control requests + * @needs_serial: set to 1 if the gadget needs userspace to provide + * a serial number. If one is not provided, warning will be printed. * @bind: (REQUIRED) Used to allocate resources that are shared across the * whole device, such as string IDs, and add its configurations using * @usb_add_config(). This may fail by returning a negative errno @@ -266,8 +273,11 @@ int usb_add_config(struct usb_composite_dev *, */ struct usb_composite_driver { const char *name; + const char *iProduct; + const char *iManufacturer; const struct usb_device_descriptor *dev; struct usb_gadget_strings **strings; + unsigned needs_serial:1; /* REVISIT: bind() functions can be marked __init, which * makes trouble for section mismatch analysis. See if @@ -334,6 +344,9 @@ struct usb_composite_dev { struct list_head configs; struct usb_composite_driver *driver; u8 next_string_id; + u8 manufacturer_override; + u8 product_override; + u8 serial_override; /* the gadget driver won't enable the data pullup * while the deactivation count is nonzero. -- cgit v1.2.3-18-g5258 From b0fca50f5a94a268ed02cfddf44448051ed9343f Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Thu, 12 Aug 2010 17:43:53 +0200 Subject: usb gadget: don't save bind callback in struct usb_gadget_driver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To accomplish this the function to register a gadget driver takes the bind function as a second argument. To make things clearer rename the function to resemble platform_driver_probe. This fixes many section mismatches like WARNING: drivers/usb/gadget/g_printer.o(.data+0xc): Section mismatch in reference from the variable printer_driver to the function .init.text:printer_bind() The variable printer_driver references the function __init printer_bind() All callers are fixed. Signed-off-by: Uwe Kleine-König [m.nazarewicz@samsung.com: added dbgp] Signed-off-by: Michał Nazarewicz Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/gadget.h | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h index d3ef42d7d2f..006412ce230 100644 --- a/include/linux/usb/gadget.h +++ b/include/linux/usb/gadget.h @@ -705,11 +705,6 @@ static inline int usb_gadget_disconnect(struct usb_gadget *gadget) * struct usb_gadget_driver - driver for usb 'slave' devices * @function: String describing the gadget's function * @speed: Highest speed the driver handles. - * @bind: Invoked when the driver is bound to a gadget, usually - * after registering the driver. - * At that point, ep0 is fully initialized, and ep_list holds - * the currently-available endpoints. - * Called in a context that permits sleeping. * @setup: Invoked for ep0 control requests that aren't handled by * the hardware level driver. Most calls must be handled by * the gadget driver, including descriptor and configuration @@ -774,7 +769,6 @@ static inline int usb_gadget_disconnect(struct usb_gadget *gadget) struct usb_gadget_driver { char *function; enum usb_device_speed speed; - int (*bind)(struct usb_gadget *); void (*unbind)(struct usb_gadget *); int (*setup)(struct usb_gadget *, const struct usb_ctrlrequest *); @@ -798,17 +792,19 @@ struct usb_gadget_driver { */ /** - * usb_gadget_register_driver - register a gadget driver - * @driver:the driver being registered + * usb_gadget_probe_driver - probe a gadget driver + * @driver: the driver being registered + * @bind: the driver's bind callback * Context: can sleep * * Call this in your gadget driver's module initialization function, * to tell the underlying usb controller driver about your driver. - * The driver's bind() function will be called to bind it to a - * gadget before this registration call returns. It's expected that - * the bind() functions will be in init sections. + * The @bind() function will be called to bind it to a gadget before this + * registration call returns. It's expected that the @bind() function will + * be in init sections. */ -int usb_gadget_register_driver(struct usb_gadget_driver *driver); +int usb_gadget_probe_driver(struct usb_gadget_driver *driver, + int (*bind)(struct usb_gadget *)); /** * usb_gadget_unregister_driver - unregister a gadget driver -- cgit v1.2.3-18-g5258 From 07a18bd716ed5dea336429404b132568cfaaef95 Mon Sep 17 00:00:00 2001 From: Michal Nazarewicz Date: Thu, 12 Aug 2010 17:43:54 +0200 Subject: usb gadget: don't save bind callback in struct usb_composite_driver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The bind function is most of the time only called at init time so there is no need to save a pointer to it in the composite driver structure. This fixes many section mismatches reported by modpost. Signed-off-by: Michał Nazarewicz Acked-by: Uwe Kleine-König Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/composite.h | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h index a78e813d27e..e28b6626802 100644 --- a/include/linux/usb/composite.h +++ b/include/linux/usb/composite.h @@ -248,11 +248,7 @@ int usb_add_config(struct usb_composite_dev *, * and language IDs provided in control requests * @needs_serial: set to 1 if the gadget needs userspace to provide * a serial number. If one is not provided, warning will be printed. - * @bind: (REQUIRED) Used to allocate resources that are shared across the - * whole device, such as string IDs, and add its configurations using - * @usb_add_config(). This may fail by returning a negative errno - * value; it should return zero on successful initialization. - * @unbind: Reverses @bind(); called as a side effect of unregistering + * @unbind: Reverses bind; called as a side effect of unregistering * this driver. * @disconnect: optional driver disconnect method * @suspend: Notifies when the host stops sending USB traffic, @@ -263,7 +259,7 @@ int usb_add_config(struct usb_composite_dev *, * Devices default to reporting self powered operation. Devices which rely * on bus powered operation should report this in their @bind() method. * - * Before returning from @bind, various fields in the template descriptor + * Before returning from bind, various fields in the template descriptor * may be overridden. These include the idVendor/idProduct/bcdDevice values * normally to bind the appropriate host side driver, and the three strings * (iManufacturer, iProduct, iSerialNumber) normally used to provide user @@ -279,12 +275,6 @@ struct usb_composite_driver { struct usb_gadget_strings **strings; unsigned needs_serial:1; - /* REVISIT: bind() functions can be marked __init, which - * makes trouble for section mismatch analysis. See if - * we can't restructure things to avoid mismatching... - */ - - int (*bind)(struct usb_composite_dev *); int (*unbind)(struct usb_composite_dev *); void (*disconnect)(struct usb_composite_dev *); @@ -294,8 +284,9 @@ struct usb_composite_driver { void (*resume)(struct usb_composite_dev *); }; -extern int usb_composite_register(struct usb_composite_driver *); -extern void usb_composite_unregister(struct usb_composite_driver *); +extern int usb_composite_probe(struct usb_composite_driver *driver, + int (*bind)(struct usb_composite_dev *cdev)); +extern void usb_composite_unregister(struct usb_composite_driver *driver); /** -- cgit v1.2.3-18-g5258 From c9bfff9c98671ad50e4abbfe1ab606a9957f7539 Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Thu, 12 Aug 2010 17:43:55 +0200 Subject: usb gadget: don't save bind callback in struct usb_configuration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The bind function is most of the time only called at init time so there is no need to save a pointer to it in the configuration structure. This fixes many section mismatches reported by modpost. Signed-off-by: Uwe Kleine-König [m.nazarewicz@samsung.com: updated for -next] Signed-off-by: Michał Nazarewicz Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/composite.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h index e28b6626802..3d29a7dcac2 100644 --- a/include/linux/usb/composite.h +++ b/include/linux/usb/composite.h @@ -161,8 +161,6 @@ ep_choose(struct usb_gadget *g, struct usb_endpoint_descriptor *hs, * and by language IDs provided in control requests. * @descriptors: Table of descriptors preceding all function descriptors. * Examples include OTG and vendor-specific descriptors. - * @bind: Called from @usb_add_config() to allocate resources unique to this - * configuration and to call @usb_add_function() for each function used. * @unbind: Reverses @bind; called as a side effect of unregistering the * driver which added this configuration. * @setup: Used to delegate control requests that aren't handled by standard @@ -207,8 +205,7 @@ struct usb_configuration { * we can't restructure things to avoid mismatching... */ - /* configuration management: bind/unbind */ - int (*bind)(struct usb_configuration *); + /* configuration management: unbind/setup */ void (*unbind)(struct usb_configuration *); int (*setup)(struct usb_configuration *, const struct usb_ctrlrequest *); @@ -232,7 +229,8 @@ struct usb_configuration { }; int usb_add_config(struct usb_composite_dev *, - struct usb_configuration *); + struct usb_configuration *, + int (*)(struct usb_configuration *)); /** * struct usb_composite_driver - groups configurations into a gadget -- cgit v1.2.3-18-g5258 From 5ea081785dde6041eb2f4acc2369abbb9099a981 Mon Sep 17 00:00:00 2001 From: Michal Nazarewicz Date: Thu, 12 Aug 2010 17:43:56 +0200 Subject: init.h: add some more documentation to __ref* tags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The __ref* tags may have been confusing for new kernel developers (I was confused by them for sure) so adding a few more sentences to comment to clear things up for people who see those for the first time. Signed-off-by: Michal Nazarewicz Acked-by: Uwe Kleine-König Acked-by: Sam Ravnborg Signed-off-by: Greg Kroah-Hartman --- include/linux/init.h | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/init.h b/include/linux/init.h index de994304e0b..577671c5515 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -46,16 +46,23 @@ #define __exitdata __section(.exit.data) #define __exit_call __used __section(.exitcall.exit) -/* modpost check for section mismatches during the kernel build. +/* + * modpost check for section mismatches during the kernel build. * A section mismatch happens when there are references from a * code or data section to an init section (both code or data). * The init sections are (for most archs) discarded by the kernel * when early init has completed so all such references are potential bugs. * For exit sections the same issue exists. + * * The following markers are used for the cases where the reference to * the *init / *exit section (code or data) is valid and will teach - * modpost not to issue a warning. - * The markers follow same syntax rules as __init / __initdata. */ + * modpost not to issue a warning. Intended semantics is that a code or + * data tagged __ref* can reference code or data from init section without + * producing a warning (of course, no warning does not mean code is + * correct, so optimally document why the __ref is needed and why it's OK). + * + * The markers follow same syntax rules as __init / __initdata. + */ #define __ref __section(.ref.text) noinline #define __refdata __section(.ref.data) #define __refconst __section(.ref.rodata) -- cgit v1.2.3-18-g5258 From d39a0edad60dc65cf4774ee732aa7a84cf35c27a Mon Sep 17 00:00:00 2001 From: Hao Wu Date: Thu, 9 Sep 2010 22:35:39 +0100 Subject: USB OTG: Add common data structure for Intel MID Platform (Langwell/Penwell) This patch adds one new header file for the common data structure used in Intel Penwell/Langwell MID Platform OTG Transceiver drivers. After switched to the common data structure, Langwell/Penwell OTG Transceiver driver will provide an unified interface to host/client driver. Reported-by: Randy Dunlap Signed-off-by: Hao Wu Signed-off-by: Alan Cox Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/intel_mid_otg.h | 180 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 180 insertions(+) create mode 100644 include/linux/usb/intel_mid_otg.h (limited to 'include') diff --git a/include/linux/usb/intel_mid_otg.h b/include/linux/usb/intel_mid_otg.h new file mode 100644 index 00000000000..a0ccf795f36 --- /dev/null +++ b/include/linux/usb/intel_mid_otg.h @@ -0,0 +1,180 @@ +/* + * Intel MID (Langwell/Penwell) USB OTG Transceiver driver + * Copyright (C) 2008 - 2010, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * + */ + +#ifndef __INTEL_MID_OTG_H +#define __INTEL_MID_OTG_H + +#include +#include +#include + +struct intel_mid_otg_xceiv; + +/* This is a common data structure for Intel MID platform to + * save values of the OTG state machine */ +struct otg_hsm { + /* Input */ + int a_bus_resume; + int a_bus_suspend; + int a_conn; + int a_sess_vld; + int a_srp_det; + int a_vbus_vld; + int b_bus_resume; + int b_bus_suspend; + int b_conn; + int b_se0_srp; + int b_ssend_srp; + int b_sess_end; + int b_sess_vld; + int id; +/* id values */ +#define ID_B 0x05 +#define ID_A 0x04 +#define ID_ACA_C 0x03 +#define ID_ACA_B 0x02 +#define ID_ACA_A 0x01 + int power_up; + int adp_change; + int test_device; + + /* Internal variables */ + int a_set_b_hnp_en; + int b_srp_done; + int b_hnp_enable; + int hnp_poll_enable; + + /* Timeout indicator for timers */ + int a_wait_vrise_tmout; + int a_wait_bcon_tmout; + int a_aidl_bdis_tmout; + int a_bidl_adis_tmout; + int a_bidl_adis_tmr; + int a_wait_vfall_tmout; + int b_ase0_brst_tmout; + int b_bus_suspend_tmout; + int b_srp_init_tmout; + int b_srp_fail_tmout; + int b_srp_fail_tmr; + int b_adp_sense_tmout; + + /* Informative variables */ + int a_bus_drop; + int a_bus_req; + int a_clr_err; + int b_bus_req; + int a_suspend_req; + int b_bus_suspend_vld; + + /* Output */ + int drv_vbus; + int loc_conn; + int loc_sof; + + /* Others */ + int vbus_srp_up; +}; + +/* must provide ULPI access function to read/write registers implemented in + * ULPI address space */ +struct iotg_ulpi_access_ops { + int (*read)(struct intel_mid_otg_xceiv *iotg, u8 reg, u8 *val); + int (*write)(struct intel_mid_otg_xceiv *iotg, u8 reg, u8 val); +}; + +#define OTG_A_DEVICE 0x0 +#define OTG_B_DEVICE 0x1 + +/* + * the Intel MID (Langwell/Penwell) otg transceiver driver needs to interact + * with device and host drivers to implement the USB OTG related feature. More + * function members are added based on otg_transceiver data structure for this + * purpose. + */ +struct intel_mid_otg_xceiv { + struct otg_transceiver otg; + struct otg_hsm hsm; + + /* base address */ + void __iomem *base; + + /* ops to access ulpi */ + struct iotg_ulpi_access_ops ulpi_ops; + + /* atomic notifier for interrupt context */ + struct atomic_notifier_head iotg_notifier; + + /* start/stop USB Host function */ + int (*start_host)(struct intel_mid_otg_xceiv *iotg); + int (*stop_host)(struct intel_mid_otg_xceiv *iotg); + + /* start/stop USB Peripheral function */ + int (*start_peripheral)(struct intel_mid_otg_xceiv *iotg); + int (*stop_peripheral)(struct intel_mid_otg_xceiv *iotg); + + /* start/stop ADP sense/probe function */ + int (*set_adp_probe)(struct intel_mid_otg_xceiv *iotg, + bool enabled, int dev); + int (*set_adp_sense)(struct intel_mid_otg_xceiv *iotg, + bool enabled); + +#ifdef CONFIG_PM + /* suspend/resume USB host function */ + int (*suspend_host)(struct intel_mid_otg_xceiv *iotg, + pm_message_t message); + int (*resume_host)(struct intel_mid_otg_xceiv *iotg); + + int (*suspend_peripheral)(struct intel_mid_otg_xceiv *iotg, + pm_message_t message); + int (*resume_peripheral)(struct intel_mid_otg_xceiv *iotg); +#endif + +}; +static inline +struct intel_mid_otg_xceiv *otg_to_mid_xceiv(struct otg_transceiver *otg) +{ + return container_of(otg, struct intel_mid_otg_xceiv, otg); +} + +#define MID_OTG_NOTIFY_CONNECT 0x0001 +#define MID_OTG_NOTIFY_DISCONN 0x0002 +#define MID_OTG_NOTIFY_HSUSPEND 0x0003 +#define MID_OTG_NOTIFY_HRESUME 0x0004 +#define MID_OTG_NOTIFY_CSUSPEND 0x0005 +#define MID_OTG_NOTIFY_CRESUME 0x0006 +#define MID_OTG_NOTIFY_HOSTADD 0x0007 +#define MID_OTG_NOTIFY_HOSTREMOVE 0x0008 +#define MID_OTG_NOTIFY_CLIENTADD 0x0009 +#define MID_OTG_NOTIFY_CLIENTREMOVE 0x000a + +static inline int +intel_mid_otg_register_notifier(struct intel_mid_otg_xceiv *iotg, + struct notifier_block *nb) +{ + return atomic_notifier_chain_register(&iotg->iotg_notifier, nb); +} + +static inline void +intel_mid_otg_unregister_notifier(struct intel_mid_otg_xceiv *iotg, + struct notifier_block *nb) +{ + atomic_notifier_chain_unregister(&iotg->iotg_notifier, nb); +} + +#endif /* __INTEL_MID_OTG_H */ -- cgit v1.2.3-18-g5258 From 56e9406ca22968e3c9dc27d6dc0f1825e13bfff9 Mon Sep 17 00:00:00 2001 From: Hao Wu Date: Thu, 9 Sep 2010 22:35:54 +0100 Subject: USB OTG Langwell: Update OTG Kconfig and driver version. This patch updated Kconfig for langwell otg transceiver driver. Add ipc driver(INTEL_SCU_IPC) as a dependency. Driver version is updated too. Signed-off-by: Hao Wu Signed-off-by: Alan Cox Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/langwell_otg.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/usb/langwell_otg.h b/include/linux/usb/langwell_otg.h index a6562f1d4e2..51f17b16d31 100644 --- a/include/linux/usb/langwell_otg.h +++ b/include/linux/usb/langwell_otg.h @@ -1,6 +1,6 @@ /* * Intel Langwell USB OTG transceiver driver - * Copyright (C) 2008, Intel Corporation. + * Copyright (C) 2008 - 2010, Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, -- cgit v1.2.3-18-g5258 From 1f53c0e9bbf654ed93f63deee2bf5c9a1816966e Mon Sep 17 00:00:00 2001 From: Yauheni Kaliuta Date: Mon, 20 Sep 2010 15:40:26 +0300 Subject: USB: cdc.h: ncm: typo and style fixes Some typos were in the initial commit, make the spelling according to the spec. Add some more comments. Also change constant names according to the style of the rest of the file Signed-off-by: Yauheni Kaliuta Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/cdc.h | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/usb/cdc.h b/include/linux/usb/cdc.h index c117a68d04a..583264abca0 100644 --- a/include/linux/usb/cdc.h +++ b/include/linux/usb/cdc.h @@ -274,13 +274,13 @@ struct usb_cdc_notification { /* * Class Specific structures and constants * - * CDC NCM parameter structure, CDC NCM subclass 6.2.1 + * CDC NCM NTB parameters structure, CDC NCM subclass 6.2.1 * */ -struct usb_cdc_ncm_ntb_parameter { +struct usb_cdc_ncm_ntb_parameters { __le16 wLength; - __le16 bmNtbFormatSupported; + __le16 bmNtbFormatsSupported; __le32 dwNtbInMaxSize; __le16 wNdpInDivisor; __le16 wNdpInPayloadRemainder; @@ -297,8 +297,8 @@ struct usb_cdc_ncm_ntb_parameter { * CDC NCM transfer headers, CDC NCM subclass 3.2 */ -#define NCM_NTH16_SIGN 0x484D434E /* NCMH */ -#define NCM_NTH32_SIGN 0x686D636E /* ncmh */ +#define USB_CDC_NCM_NTH16_SIGN 0x484D434E /* NCMH */ +#define USB_CDC_NCM_NTH32_SIGN 0x686D636E /* ncmh */ struct usb_cdc_ncm_nth16 { __le32 dwSignature; @@ -320,11 +320,12 @@ struct usb_cdc_ncm_nth32 { * CDC NCM datagram pointers, CDC NCM subclass 3.3 */ -#define NCM_NDP16_CRC_SIGN 0x314D434E /* NCM1 */ -#define NCM_NDP16_NOCRC_SIGN 0x304D434E /* NCM0 */ -#define NCM_NDP32_CRC_SIGN 0x316D636E /* ncm1 */ -#define NCM_NDP32_NOCRC_SIGN 0x306D636E /* ncm0 */ +#define USB_CDC_NCM_NDP16_CRC_SIGN 0x314D434E /* NCM1 */ +#define USB_CDC_NCM_NDP16_NOCRC_SIGN 0x304D434E /* NCM0 */ +#define USB_CDC_NCM_NDP32_CRC_SIGN 0x316D636E /* ncm1 */ +#define USB_CDC_NCM_NDP32_NOCRC_SIGN 0x306D636E /* ncm0 */ +/* 16-bit NCM Datagram Pointer Table */ struct usb_cdc_ncm_ndp16 { __le32 dwSignature; __le16 wLength; @@ -332,11 +333,12 @@ struct usb_cdc_ncm_ndp16 { __u8 data[0]; } __attribute__ ((packed)); +/* 32-bit NCM Datagram Pointer Table */ struct usb_cdc_ncm_ndp32 { __le32 dwSignature; __le16 wLength; __le16 wReserved6; - __le32 dwNextFpIndex; + __le32 dwNextNdpIndex; __le32 dwReserved12; __u8 data[0]; } __attribute__ ((packed)); -- cgit v1.2.3-18-g5258 From 7fc09170cedc329ad274433b4f1a653e603600b5 Mon Sep 17 00:00:00 2001 From: Yauheni Kaliuta Date: Mon, 20 Sep 2010 15:40:27 +0300 Subject: Revert "USB: ncm: added ncm.h with auxiliary definitions" This reverts commit 65e0b499105ec8ff3bc4ab7680873dec20127f9d. Since the host and gadget implementations are different, there is no common code for the file, remove for now. Signed-off-by: Yauheni Kaliuta Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/ncm.h | 114 ------------------------------------------------ 1 file changed, 114 deletions(-) delete mode 100644 include/linux/usb/ncm.h (limited to 'include') diff --git a/include/linux/usb/ncm.h b/include/linux/usb/ncm.h deleted file mode 100644 index 006d1064c8b..00000000000 --- a/include/linux/usb/ncm.h +++ /dev/null @@ -1,114 +0,0 @@ -/* - * USB CDC NCM auxiliary definitions - */ - -#ifndef __LINUX_USB_NCM_H -#define __LINUX_USB_NCM_H - -#include -#include -#include - -#define NCM_NTB_MIN_IN_SIZE 2048 -#define NCM_NTB_MIN_OUT_SIZE 2048 - -#define NCM_CONTROL_TIMEOUT (5 * 1000) - -/* bmNetworkCapabilities */ - -#define NCM_NCAP_ETH_FILTER (1 << 0) -#define NCM_NCAP_NET_ADDRESS (1 << 1) -#define NCM_NCAP_ENCAP_COMM (1 << 2) -#define NCM_NCAP_MAX_DGRAM (1 << 3) -#define NCM_NCAP_CRC_MODE (1 << 4) - -/* - * Here are options for NCM Datagram Pointer table (NDP) parser. - * There are 2 different formats: NDP16 and NDP32 in the spec (ch. 3), - * in NDP16 offsets and sizes fields are 1 16bit word wide, - * in NDP32 -- 2 16bit words wide. Also signatures are different. - * To make the parser code the same, put the differences in the structure, - * and switch pointers to the structures when the format is changed. - */ - -struct ndp_parser_opts { - u32 nth_sign; - u32 ndp_sign; - unsigned nth_size; - unsigned ndp_size; - unsigned ndplen_align; - /* sizes in u16 units */ - unsigned dgram_item_len; /* index or length */ - unsigned block_length; - unsigned fp_index; - unsigned reserved1; - unsigned reserved2; - unsigned next_fp_index; -}; - -#define INIT_NDP16_OPTS { \ - .nth_sign = NCM_NTH16_SIGN, \ - .ndp_sign = NCM_NDP16_NOCRC_SIGN, \ - .nth_size = sizeof(struct usb_cdc_ncm_nth16), \ - .ndp_size = sizeof(struct usb_cdc_ncm_ndp16), \ - .ndplen_align = 4, \ - .dgram_item_len = 1, \ - .block_length = 1, \ - .fp_index = 1, \ - .reserved1 = 0, \ - .reserved2 = 0, \ - .next_fp_index = 1, \ - } - - -#define INIT_NDP32_OPTS { \ - .nth_sign = NCM_NTH32_SIGN, \ - .ndp_sign = NCM_NDP32_NOCRC_SIGN, \ - .nth_size = sizeof(struct usb_cdc_ncm_nth32), \ - .ndp_size = sizeof(struct usb_cdc_ncm_ndp32), \ - .ndplen_align = 8, \ - .dgram_item_len = 2, \ - .block_length = 2, \ - .fp_index = 2, \ - .reserved1 = 1, \ - .reserved2 = 2, \ - .next_fp_index = 2, \ - } - -static inline void put_ncm(__le16 **p, unsigned size, unsigned val) -{ - switch (size) { - case 1: - put_unaligned_le16((u16)val, *p); - break; - case 2: - put_unaligned_le32((u32)val, *p); - - break; - default: - BUG(); - } - - *p += size; -} - -static inline unsigned get_ncm(__le16 **p, unsigned size) -{ - unsigned tmp; - - switch (size) { - case 1: - tmp = get_unaligned_le16(*p); - break; - case 2: - tmp = get_unaligned_le32(*p); - break; - default: - BUG(); - } - - *p += size; - return tmp; -} - -#endif /* __LINUX_USB_NCM_H */ -- cgit v1.2.3-18-g5258 From e5dcd531ac7a040f1b4d35f58914a36ad6174e84 Mon Sep 17 00:00:00 2001 From: Yauheni Kaliuta Date: Mon, 20 Sep 2010 15:40:28 +0300 Subject: USB: cdc.h: ncm: add missed constants and structures Make a dedicated structure for datagram pointer entry. There is no explicit declaration in the spec, but it's used by the host implementation and makes the structure more clear. Add some missed constants from the spec Signed-off-by: Yauheni Kaliuta Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/cdc.h | 57 +++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 55 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/usb/cdc.h b/include/linux/usb/cdc.h index 583264abca0..2d5b6f296aa 100644 --- a/include/linux/usb/cdc.h +++ b/include/linux/usb/cdc.h @@ -32,6 +32,8 @@ #define USB_CDC_PROTO_EEM 7 +#define USB_CDC_NCM_PROTO_NTB 1 + /*-------------------------------------------------------------------------*/ /* @@ -325,14 +327,26 @@ struct usb_cdc_ncm_nth32 { #define USB_CDC_NCM_NDP32_CRC_SIGN 0x316D636E /* ncm1 */ #define USB_CDC_NCM_NDP32_NOCRC_SIGN 0x306D636E /* ncm0 */ +/* 16-bit NCM Datagram Pointer Entry */ +struct usb_cdc_ncm_dpe16 { + __le16 wDatagramIndex; + __le16 wDatagramLength; +} __attribute__((__packed__)); + /* 16-bit NCM Datagram Pointer Table */ struct usb_cdc_ncm_ndp16 { __le32 dwSignature; __le16 wLength; __le16 wNextFpIndex; - __u8 data[0]; + struct usb_cdc_ncm_dpe16 dpe16[0]; } __attribute__ ((packed)); +/* 32-bit NCM Datagram Pointer Entry */ +struct usb_cdc_ncm_dpe32 { + __le32 wDatagramIndex; + __le32 wDatagramLength; +} __attribute__((__packed__)); + /* 32-bit NCM Datagram Pointer Table */ struct usb_cdc_ncm_ndp32 { __le32 dwSignature; @@ -340,7 +354,46 @@ struct usb_cdc_ncm_ndp32 { __le16 wReserved6; __le32 dwNextNdpIndex; __le32 dwReserved12; - __u8 data[0]; + struct usb_cdc_ncm_dpe32 dpe32[0]; } __attribute__ ((packed)); +/* CDC NCM subclass 3.2.1 and 3.2.2 */ +#define USB_CDC_NCM_NDP16_INDEX_MIN 0x000C +#define USB_CDC_NCM_NDP32_INDEX_MIN 0x0010 + +/* CDC NCM subclass 3.3.3 Datagram Formatting */ +#define USB_CDC_NCM_DATAGRAM_FORMAT_CRC 0x30 +#define USB_CDC_NCM_DATAGRAM_FORMAT_NOCRC 0X31 + +/* CDC NCM subclass 4.2 NCM Communications Interface Protocol Code */ +#define USB_CDC_NCM_PROTO_CODE_NO_ENCAP_COMMANDS 0x00 +#define USB_CDC_NCM_PROTO_CODE_EXTERN_PROTO 0xFE + +/* CDC NCM subclass 5.2.1 NCM Functional Descriptor, bmNetworkCapabilities */ +#define USB_CDC_NCM_NCAP_ETH_FILTER (1 << 0) +#define USB_CDC_NCM_NCAP_NET_ADDRESS (1 << 1) +#define USB_CDC_NCM_NCAP_ENCAP_COMMAND (1 << 2) +#define USB_CDC_NCM_NCAP_MAX_DATAGRAM_SIZE (1 << 3) +#define USB_CDC_NCM_NCAP_CRC_MODE (1 << 4) + +/* CDC NCM subclass Table 6-3: NTB Parameter Structure */ +#define USB_CDC_NCM_NTB16_SUPPORTED (1 << 0) +#define USB_CDC_NCM_NTB32_SUPPORTED (1 << 1) + +/* CDC NCM subclass Table 6-3: NTB Parameter Structure */ +#define USB_CDC_NCM_NDP_ALIGN_MIN_SIZE 0x04 +#define USB_CDC_NCM_NTB_MAX_LENGTH 0x1C + +/* CDC NCM subclass 6.2.5 SetNtbFormat */ +#define USB_CDC_NCM_NTB16_FORMAT 0x00 +#define USB_CDC_NCM_NTB32_FORMAT 0x01 + +/* CDC NCM subclass 6.2.7 SetNtbInputSize */ +#define USB_CDC_NCM_NTB_MIN_IN_SIZE 2048 +#define USB_CDC_NCM_NTB_MIN_OUT_SIZE 2048 + +/* CDC NCM subclass 6.2.11 SetCrcMode */ +#define USB_CDC_NCM_CRC_NOT_APPENDED 0x00 +#define USB_CDC_NCM_CRC_APPENDED 0x01 + #endif /* __LINUX_USB_CDC_H */ -- cgit v1.2.3-18-g5258 From 6195e3c6aa84dbbf80a60731168118824bd58bba Mon Sep 17 00:00:00 2001 From: Yauheni Kaliuta Date: Fri, 24 Sep 2010 09:43:27 +0300 Subject: USB: cdc.h: ncm: fix one more typo In usb_cdc_ncm_dpe32 the fields are 32 bit long and according to usb style (hungarian notation) should be called dwDatagramIndex and dwDatagramLength (see CDC NCM subclass spec, 3.3.2). Actually, they were called wDatagramIndex, wDatagramLength. Signed-off-by: Yauheni Kaliuta Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/cdc.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/usb/cdc.h b/include/linux/usb/cdc.h index 2d5b6f296aa..5e86dc771da 100644 --- a/include/linux/usb/cdc.h +++ b/include/linux/usb/cdc.h @@ -343,8 +343,8 @@ struct usb_cdc_ncm_ndp16 { /* 32-bit NCM Datagram Pointer Entry */ struct usb_cdc_ncm_dpe32 { - __le32 wDatagramIndex; - __le32 wDatagramLength; + __le32 dwDatagramIndex; + __le32 dwDatagramLength; } __attribute__((__packed__)); /* 32-bit NCM Datagram Pointer Table */ -- cgit v1.2.3-18-g5258 From 8fa7fd74ef398370383df276ca41082ba35aafd8 Mon Sep 17 00:00:00 2001 From: Michal Nazarewicz Date: Thu, 7 Oct 2010 13:05:21 +0200 Subject: USB: storage: Use USB_ prefix instead of US_ prefix This commit changes prefix for some of the USB mass storage class related macros (ie. USB_SC_ for subclass and USB_PR_ for class). Signed-off-by: Michal Nazarewicz Cc: Alan Stern Cc: Matthew Wilcox Signed-off-by: Greg Kroah-Hartman --- include/linux/usb_usual.h | 59 ++++++++++++++++++++++++----------------------- 1 file changed, 30 insertions(+), 29 deletions(-) (limited to 'include') diff --git a/include/linux/usb_usual.h b/include/linux/usb_usual.h index a4b947e470a..f387c436042 100644 --- a/include/linux/usb_usual.h +++ b/include/linux/usb_usual.h @@ -81,35 +81,36 @@ enum { US_DO_ALL_FLAGS }; /* Sub Classes */ -#define US_SC_RBC 0x01 /* Typically, flash devices */ -#define US_SC_8020 0x02 /* CD-ROM */ -#define US_SC_QIC 0x03 /* QIC-157 Tapes */ -#define US_SC_UFI 0x04 /* Floppy */ -#define US_SC_8070 0x05 /* Removable media */ -#define US_SC_SCSI 0x06 /* Transparent */ -#define US_SC_LOCKABLE 0x07 /* Password-protected */ - -#define US_SC_ISD200 0xf0 /* ISD200 ATA */ -#define US_SC_CYP_ATACB 0xf1 /* Cypress ATACB */ -#define US_SC_DEVICE 0xff /* Use device's value */ - -/* Protocols */ - -#define US_PR_CBI 0x00 /* Control/Bulk/Interrupt */ -#define US_PR_CB 0x01 /* Control/Bulk w/o interrupt */ -#define US_PR_BULK 0x50 /* bulk only */ - -#define US_PR_USBAT 0x80 /* SCM-ATAPI bridge */ -#define US_PR_EUSB_SDDR09 0x81 /* SCM-SCSI bridge for SDDR-09 */ -#define US_PR_SDDR55 0x82 /* SDDR-55 (made up) */ -#define US_PR_DPCM_USB 0xf0 /* Combination CB/SDDR09 */ -#define US_PR_FREECOM 0xf1 /* Freecom */ -#define US_PR_DATAFAB 0xf2 /* Datafab chipsets */ -#define US_PR_JUMPSHOT 0xf3 /* Lexar Jumpshot */ -#define US_PR_ALAUDA 0xf4 /* Alauda chipsets */ -#define US_PR_KARMA 0xf5 /* Rio Karma */ - -#define US_PR_DEVICE 0xff /* Use device's value */ +#define USB_SC_RBC 0x01 /* Typically, flash devices */ +#define USB_SC_8020 0x02 /* CD-ROM */ +#define USB_SC_QIC 0x03 /* QIC-157 Tapes */ +#define USB_SC_UFI 0x04 /* Floppy */ +#define USB_SC_8070 0x05 /* Removable media */ +#define USB_SC_SCSI 0x06 /* Transparent */ +#define USB_SC_LOCKABLE 0x07 /* Password-protected */ + +#define USB_SC_ISD200 0xf0 /* ISD200 ATA */ +#define USB_SC_CYP_ATACB 0xf1 /* Cypress ATACB */ +#define USB_SC_DEVICE 0xff /* Use device's value */ + +/* Storage protocol codes */ + +#define USB_PR_CBI 0x00 /* Control/Bulk/Interrupt */ +#define USB_PR_CB 0x01 /* Control/Bulk w/o interrupt */ +#define USB_PR_BULK 0x50 /* bulk only */ +#define USB_PR_UAS 0x62 /* USB Attached SCSI */ + +#define USB_PR_USBAT 0x80 /* SCM-ATAPI bridge */ +#define USB_PR_EUSB_SDDR09 0x81 /* SCM-SCSI bridge for SDDR-09 */ +#define USB_PR_SDDR55 0x82 /* SDDR-55 (made up) */ +#define USB_PR_DPCM_USB 0xf0 /* Combination CB/SDDR09 */ +#define USB_PR_FREECOM 0xf1 /* Freecom */ +#define USB_PR_DATAFAB 0xf2 /* Datafab chipsets */ +#define USB_PR_JUMPSHOT 0xf3 /* Lexar Jumpshot */ +#define USB_PR_ALAUDA 0xf4 /* Alauda chipsets */ +#define USB_PR_KARMA 0xf5 /* Rio Karma */ + +#define USB_PR_DEVICE 0xff /* Use device's value */ /* */ -- cgit v1.2.3-18-g5258 From ae6d22fe1812ce8d40add3eb74ede9cfd2eae44f Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Thu, 7 Oct 2010 13:05:22 +0200 Subject: USB: Move USB Storage definitions to their own header file The libusual header file is hard to use from code that isn't part of libusual. As the comment suggests, these definitions are moved to their own header file, paralleling other USB classes. Signed-off-by: Matthew Wilcox Cc: Alan Stern [mina86@mina86.com: updated to use USB_ prefix and added #include guard] Signed-off-by: Michal Nazarewicz Signed-off-by: Greg Kroah-Hartman index 0000000..d7fc910 --- include/linux/usb/storage.h | 48 +++++++++++++++++++++++++++++++++++++++++++++ include/linux/usb_usual.h | 38 +---------------------------------- 2 files changed, 49 insertions(+), 37 deletions(-) create mode 100644 include/linux/usb/storage.h (limited to 'include') diff --git a/include/linux/usb/storage.h b/include/linux/usb/storage.h new file mode 100644 index 00000000000..d7fc910f1dc --- /dev/null +++ b/include/linux/usb/storage.h @@ -0,0 +1,48 @@ +#ifndef __LINUX_USB_STORAGE_H +#define __LINUX_USB_STORAGE_H + +/* + * linux/usb/storage.h + * + * Copyright Matthew Wilcox for Intel Corp, 2010 + * + * This file contains definitions taken from the + * USB Mass Storage Class Specification Overview + * + * Distributed under the terms of the GNU GPL, version two. + */ + +/* Storage subclass codes */ + +#define USB_SC_RBC 0x01 /* Typically, flash devices */ +#define USB_SC_8020 0x02 /* CD-ROM */ +#define USB_SC_QIC 0x03 /* QIC-157 Tapes */ +#define USB_SC_UFI 0x04 /* Floppy */ +#define USB_SC_8070 0x05 /* Removable media */ +#define USB_SC_SCSI 0x06 /* Transparent */ +#define USB_SC_LOCKABLE 0x07 /* Password-protected */ + +#define USB_SC_ISD200 0xf0 /* ISD200 ATA */ +#define USB_SC_CYP_ATACB 0xf1 /* Cypress ATACB */ +#define USB_SC_DEVICE 0xff /* Use device's value */ + +/* Storage protocol codes */ + +#define USB_PR_CBI 0x00 /* Control/Bulk/Interrupt */ +#define USB_PR_CB 0x01 /* Control/Bulk w/o interrupt */ +#define USB_PR_BULK 0x50 /* bulk only */ +#define USB_PR_UAS 0x62 /* USB Attached SCSI */ + +#define USB_PR_USBAT 0x80 /* SCM-ATAPI bridge */ +#define USB_PR_EUSB_SDDR09 0x81 /* SCM-SCSI bridge for SDDR-09 */ +#define USB_PR_SDDR55 0x82 /* SDDR-55 (made up) */ +#define USB_PR_DPCM_USB 0xf0 /* Combination CB/SDDR09 */ +#define USB_PR_FREECOM 0xf1 /* Freecom */ +#define USB_PR_DATAFAB 0xf2 /* Datafab chipsets */ +#define USB_PR_JUMPSHOT 0xf3 /* Lexar Jumpshot */ +#define USB_PR_ALAUDA 0xf4 /* Alauda chipsets */ +#define USB_PR_KARMA 0xf5 /* Rio Karma */ + +#define USB_PR_DEVICE 0xff /* Use device's value */ + +#endif diff --git a/include/linux/usb_usual.h b/include/linux/usb_usual.h index f387c436042..f091dc6e5a0 100644 --- a/include/linux/usb_usual.h +++ b/include/linux/usb_usual.h @@ -74,43 +74,7 @@ enum { US_DO_ALL_FLAGS }; #define USB_US_TYPE(flags) (((flags) >> 24) & 0xFF) #define USB_US_ORIG_FLAGS(flags) ((flags) & 0x00FFFFFF) -/* - * This is probably not the best place to keep these constants, conceptually. - * But it's the only header included into all places which need them. - */ - -/* Sub Classes */ - -#define USB_SC_RBC 0x01 /* Typically, flash devices */ -#define USB_SC_8020 0x02 /* CD-ROM */ -#define USB_SC_QIC 0x03 /* QIC-157 Tapes */ -#define USB_SC_UFI 0x04 /* Floppy */ -#define USB_SC_8070 0x05 /* Removable media */ -#define USB_SC_SCSI 0x06 /* Transparent */ -#define USB_SC_LOCKABLE 0x07 /* Password-protected */ - -#define USB_SC_ISD200 0xf0 /* ISD200 ATA */ -#define USB_SC_CYP_ATACB 0xf1 /* Cypress ATACB */ -#define USB_SC_DEVICE 0xff /* Use device's value */ - -/* Storage protocol codes */ - -#define USB_PR_CBI 0x00 /* Control/Bulk/Interrupt */ -#define USB_PR_CB 0x01 /* Control/Bulk w/o interrupt */ -#define USB_PR_BULK 0x50 /* bulk only */ -#define USB_PR_UAS 0x62 /* USB Attached SCSI */ - -#define USB_PR_USBAT 0x80 /* SCM-ATAPI bridge */ -#define USB_PR_EUSB_SDDR09 0x81 /* SCM-SCSI bridge for SDDR-09 */ -#define USB_PR_SDDR55 0x82 /* SDDR-55 (made up) */ -#define USB_PR_DPCM_USB 0xf0 /* Combination CB/SDDR09 */ -#define USB_PR_FREECOM 0xf1 /* Freecom */ -#define USB_PR_DATAFAB 0xf2 /* Datafab chipsets */ -#define USB_PR_JUMPSHOT 0xf3 /* Lexar Jumpshot */ -#define USB_PR_ALAUDA 0xf4 /* Alauda chipsets */ -#define USB_PR_KARMA 0xf5 /* Rio Karma */ - -#define USB_PR_DEVICE 0xff /* Use device's value */ +#include /* */ -- cgit v1.2.3-18-g5258 From 496dda704bca1208e08773ba39b29a69536f5381 Mon Sep 17 00:00:00 2001 From: Maulik Mankad Date: Fri, 24 Sep 2010 13:44:06 +0300 Subject: usb: musb: host: unmap the buffer for PIO data transfers The USB stack maps the buffer for DMA if the controller supports DMA. MUSB controller can perform DMA as well as PIO transfers. The buffer needs to be unmapped before CPU can perform PIO data transfers. Export unmap_urb_for_dma() so that drivers can perform the DMA unmapping in a sane way. Signed-off-by: Maulik Mankad Acked-by: Alan Stern Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/hcd.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h index 3b571f1ffbb..fe89f7c298a 100644 --- a/include/linux/usb/hcd.h +++ b/include/linux/usb/hcd.h @@ -329,6 +329,7 @@ extern int usb_hcd_submit_urb(struct urb *urb, gfp_t mem_flags); extern int usb_hcd_unlink_urb(struct urb *urb, int status); extern void usb_hcd_giveback_urb(struct usb_hcd *hcd, struct urb *urb, int status); +extern void unmap_urb_for_dma(struct usb_hcd *, struct urb *); extern void usb_hcd_flush_endpoint(struct usb_device *udev, struct usb_host_endpoint *ep); extern void usb_hcd_disable_endpoint(struct usb_device *udev, -- cgit v1.2.3-18-g5258 From 748eee0986f0d51c7bc39f194d515a8d8248ebdd Mon Sep 17 00:00:00 2001 From: Grazvydas Ignotas Date: Mon, 27 Sep 2010 15:17:18 +0300 Subject: USB: Add more empty functions in otg.h Add empty functions for get/put transceiver functions too, so that drivers that optionally use them can call them without worrying that they might not exist, eliminating ifdefs. Signed-off-by: Grazvydas Ignotas Acked-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/otg.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/usb/otg.h b/include/linux/usb/otg.h index 545cba73cca..0a5b3711e50 100644 --- a/include/linux/usb/otg.h +++ b/include/linux/usb/otg.h @@ -164,8 +164,19 @@ otg_shutdown(struct otg_transceiver *otg) } /* for usb host and peripheral controller drivers */ +#ifdef CONFIG_USB_OTG_UTILS extern struct otg_transceiver *otg_get_transceiver(void); extern void otg_put_transceiver(struct otg_transceiver *); +#else +static inline struct otg_transceiver *otg_get_transceiver(void) +{ + return NULL; +} + +static inline void otg_put_transceiver(struct otg_transceiver *x) +{ +} +#endif /* Context: can sleep */ static inline int -- cgit v1.2.3-18-g5258 From 230f7ede6c2f0e403f29e03e0251a470aa9350dd Mon Sep 17 00:00:00 2001 From: Anatolij Gustschin Date: Tue, 28 Sep 2010 20:55:21 +0200 Subject: USB: add USB EHCI support for MPC5121 SoC Extends FSL EHCI platform driver glue layer to support MPC5121 USB controllers. MPC5121 Rev 2.0 silicon EHCI registers are in big endian format. The appropriate flags are set using the information in the platform data structure. MPC83xx system interface registers are not available on MPC512x, so the access to these registers is isolated in MPC512x case. Furthermore the USB controller clocks must be enabled before 512x register accesses which is done by providing platform specific init callback. The MPC512x internal USB PHY doesn't provide supply voltage. For boards using different power switches allow specifying DRVVBUS and PWR_FAULT signal polarity of the MPC5121 internal PHY using "fsl,invert-drvvbus" and "fsl,invert-pwr-fault" properties in the device tree USB nodes. Adds documentation for this new device tree bindings. Signed-off-by: Anatolij Gustschin Cc: Grant Likely Signed-off-by: Greg Kroah-Hartman --- include/linux/fsl_devices.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include') diff --git a/include/linux/fsl_devices.h b/include/linux/fsl_devices.h index 28e33fea510..d5f9a7431bd 100644 --- a/include/linux/fsl_devices.h +++ b/include/linux/fsl_devices.h @@ -58,11 +58,26 @@ enum fsl_usb2_phy_modes { FSL_USB2_PHY_SERIAL, }; +struct clk; +struct platform_device; + struct fsl_usb2_platform_data { /* board specific information */ enum fsl_usb2_operating_modes operating_mode; enum fsl_usb2_phy_modes phy_mode; unsigned int port_enables; + + int (*init)(struct platform_device *); + void (*exit)(struct platform_device *); + void __iomem *regs; /* ioremap'd register base */ + struct clk *clk; + unsigned big_endian_mmio:1; + unsigned big_endian_desc:1; + unsigned es:1; /* need USBMODE:ES */ + unsigned le_setup_buf:1; + unsigned have_sysif_regs:1; + unsigned invert_drvvbus:1; + unsigned invert_pwr_fault:1; }; /* Flags in fsl_usb2_mph_platform_data */ -- cgit v1.2.3-18-g5258 From 1dae423dd9b247b048eda00cb598c755e5933213 Mon Sep 17 00:00:00 2001 From: Martin Fuzzey Date: Fri, 1 Oct 2010 00:21:55 +0200 Subject: USB: introduce unmap_urb_setup_for_dma() Split unmap_urb_for_dma() to allow just the setup buffer to be unmapped. This allows HCDs to use PIO for the setup buffer if it is not suitable for DMA. Signed-off-by: Martin Fuzzey Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/hcd.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h index fe89f7c298a..0b6e751ea0b 100644 --- a/include/linux/usb/hcd.h +++ b/include/linux/usb/hcd.h @@ -329,6 +329,7 @@ extern int usb_hcd_submit_urb(struct urb *urb, gfp_t mem_flags); extern int usb_hcd_unlink_urb(struct urb *urb, int status); extern void usb_hcd_giveback_urb(struct usb_hcd *hcd, struct urb *urb, int status); +extern void unmap_urb_setup_for_dma(struct usb_hcd *, struct urb *); extern void unmap_urb_for_dma(struct usb_hcd *, struct urb *); extern void usb_hcd_flush_endpoint(struct usb_device *udev, struct usb_host_endpoint *ep); -- cgit v1.2.3-18-g5258 From 8e04d8056c1ea0e0aab730994b74756f0526cda8 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 1 Oct 2010 14:20:08 -0700 Subject: scsi/sr: add no_read_disc_info scsi_device flag Some USB devices emulate a usb-mass-storage attached (scsi) cdrom device, usually this fake cdrom contains the windows software for the device. While working on supporting Appotech ax3003 based photoframes, which do this I discovered that they will go of into lala land when ever they see a READ_DISC_INFO scsi command. Thus this patch adds a scsi_device flag (which can then be set by the usb-storage driver through an unsual-devs entry), to indicate this, and makes the sr driver honor this flag. I know this sucks, but as discussed on linux-scsi list there is no other way to make this device work properly. Looking at usb traces made under windows, windows never sends a READ_DISC_INFO during normal interactions with a usb cdrom device. So as this cdrom emulation thingie becomes more common we might see more of this problem. Signed-off-by: Hans de Goede Cc: James Bottomley Cc: Alan Stern Cc: Matthew Dharm Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- include/scsi/scsi_device.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index 50cb34ffef1..e8c2433ad8a 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -148,6 +148,7 @@ struct scsi_device { unsigned retry_hwerror:1; /* Retry HARDWARE_ERROR */ unsigned last_sector_bug:1; /* do not use multisector accesses on SD_LAST_BUGGY_SECTORS */ + unsigned no_read_disc_info:1; /* Avoid READ_DISC_INFO cmds */ unsigned is_visible:1; /* is the device visible in sysfs */ DECLARE_BITMAP(supported_events, SDEV_EVT_MAXBITS); /* supported events */ -- cgit v1.2.3-18-g5258 From ae38c78a03e1b77ad45248fcf097e4568e740209 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 1 Oct 2010 14:20:10 -0700 Subject: usb-storage: add new no_read_disc_info quirk Appotech ax3003 (the larger brother of the ax203) based devices are even more buggy then the ax203. They will go of into lala land when ever they see a READ_DISC_INFO scsi command. So add a new US_FL which tells the scsi sr driver to not issue any READ_DISC_INFO scsi commands. [akpm@linux-foundation.org: fix build] Signed-off-by: Hans de Goede Cc: James Bottomley Cc: Alan Stern Cc: Matthew Dharm Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- include/linux/usb_usual.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/usb_usual.h b/include/linux/usb_usual.h index f091dc6e5a0..e62e9fe0888 100644 --- a/include/linux/usb_usual.h +++ b/include/linux/usb_usual.h @@ -58,7 +58,9 @@ US_FLAG(CAPACITY_OK, 0x00010000) \ /* READ CAPACITY response is correct */ \ US_FLAG(BAD_SENSE, 0x00020000) \ - /* Bad Sense (never more than 18 bytes) */ + /* Bad Sense (never more than 18 bytes) */ \ + US_FLAG(NO_READ_DISC_INFO, 0x00040000) \ + /* cannot handle READ_DISC_INFO */ #define US_FLAG(name, value) US_FL_##name = value , enum { US_DO_ALL_FLAGS }; -- cgit v1.2.3-18-g5258 From 5ce524bdff367b4abda20bcfd4dafd9d30c773df Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 1 Oct 2010 14:20:10 -0700 Subject: scsi/sd: add a no_read_capacity_16 scsi_device flag I seem to have a knack for digging up buggy usb devices which don't work with Linux, and I'm crazy enough to try to make them work. So this time a friend of mine asked me to get an mp4 player (an mp3 player which can play videos on a small screen) to work with Linux. It is based on the well known rockbox chipset for which we already have an unusual devs entries to work around some of its bugs. But this model comes with an additional twist. This model chokes on read_capacity_16 calls. Now normally we don't make those calls, but this model comes with an sdcard slot and when there is no card in there (and shipped from the factory there is none), it reports a size of 0. However this time the programmers actually got the read_capacity_10 response right! So they substract one from the size as stored internally in the mp3 player before reporting it back, resulting in an answer of ... 0xffffffff sectors, causing sd.c to try a read_capacity_16, on which the device crashes. This patch adds a flag to scsi_device to indicate that a a device cannot handle read_capacity_16, and when this flag is set if a device reports an lba of 0xffffffff as answer to a read_capacity_10, assumes it tries to report a size of 0. Signed-off-by: Hans de Goede Cc: James Bottomley Cc: Alan Stern Cc: Matthew Dharm Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- include/scsi/scsi_device.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index e8c2433ad8a..85867dcde33 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -149,6 +149,7 @@ struct scsi_device { unsigned last_sector_bug:1; /* do not use multisector accesses on SD_LAST_BUGGY_SECTORS */ unsigned no_read_disc_info:1; /* Avoid READ_DISC_INFO cmds */ + unsigned no_read_capacity_16:1; /* Avoid READ_CAPACITY_16 cmds */ unsigned is_visible:1; /* is the device visible in sysfs */ DECLARE_BITMAP(supported_events, SDEV_EVT_MAXBITS); /* supported events */ -- cgit v1.2.3-18-g5258 From 00914025cc4e783d4703b4db1d47b41f389e50c8 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 1 Oct 2010 14:20:11 -0700 Subject: usb-storage: add new no_read_capacity_16 quirk Some Rockbox based mp4 players will crash when ever they see a read_capacity_16 scsi command. So add a new US_FL which tells the scsi sd driver to not issue any read_capacity_16 scsi commands. Signed-off-by: Hans de Goede Cc: James Bottomley Cc: Alan Stern Cc: Matthew Dharm Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- include/linux/usb_usual.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/usb_usual.h b/include/linux/usb_usual.h index e62e9fe0888..71693d4a4fe 100644 --- a/include/linux/usb_usual.h +++ b/include/linux/usb_usual.h @@ -60,7 +60,9 @@ US_FLAG(BAD_SENSE, 0x00020000) \ /* Bad Sense (never more than 18 bytes) */ \ US_FLAG(NO_READ_DISC_INFO, 0x00040000) \ - /* cannot handle READ_DISC_INFO */ + /* cannot handle READ_DISC_INFO */ \ + US_FLAG(NO_READ_CAPACITY_16, 0x00080000) \ + /* cannot handle READ_CAPACITY_16 */ #define US_FLAG(name, value) US_FL_##name = value , enum { US_DO_ALL_FLAGS }; -- cgit v1.2.3-18-g5258 From 0a6a717ceff67f887b16783ce891f5dcf846f1fc Mon Sep 17 00:00:00 2001 From: Michal Nazarewicz Date: Thu, 7 Oct 2010 14:46:15 +0200 Subject: USB: gadget: storage: reuse definitions from scsi.h header file This commit changes storage_common.h, file_storage.c and f_mass_storage.c to use definitions of SCSI commands from scsi/scsi.h file instead of redefining the commands in storage_common.c. scsi/scsi.h header file was missing READ_FORMAT_CAPACITIES and READ_HEADER so this commit also add those to the header. Signed-off-by: Michal Nazarewicz Cc: Alan Stern Cc: James Bottomley Signed-off-by: Greg Kroah-Hartman --- include/scsi/scsi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/scsi/scsi.h b/include/scsi/scsi.h index 8fcb6e0e9e7..1418e9010f3 100644 --- a/include/scsi/scsi.h +++ b/include/scsi/scsi.h @@ -67,6 +67,7 @@ struct scsi_cmnd; #define SEND_DIAGNOSTIC 0x1d #define ALLOW_MEDIUM_REMOVAL 0x1e +#define READ_FORMAT_CAPACITIES 0x23 #define SET_WINDOW 0x24 #define READ_CAPACITY 0x25 #define READ_10 0x28 @@ -96,6 +97,7 @@ struct scsi_cmnd; #define WRITE_SAME 0x41 #define UNMAP 0x42 #define READ_TOC 0x43 +#define READ_HEADER 0x44 #define LOG_SELECT 0x4c #define LOG_SENSE 0x4d #define XDWRITEREAD_10 0x53 -- cgit v1.2.3-18-g5258 From 562e7c71c6708353bfe7b615576bcbcf7afd522e Mon Sep 17 00:00:00 2001 From: Tatyana Brokhman Date: Sat, 9 Oct 2010 16:46:12 +0200 Subject: usb: usb3.0 ch9 definitions Adding SuperSpeed usb definitions as defined by ch9 of the USB3.0 spec. This patch is a preparation for adding SuperSpeed support to the gadget framework. Signed-off-by: Tatyana Brokhman Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/ch9.h | 58 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 57 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/usb/ch9.h b/include/linux/usb/ch9.h index b0f7e9f5717..f917bbbc890 100644 --- a/include/linux/usb/ch9.h +++ b/include/linux/usb/ch9.h @@ -123,8 +123,23 @@ #define USB_DEVICE_A_ALT_HNP_SUPPORT 5 /* (otg) other RH port does */ #define USB_DEVICE_DEBUG_MODE 6 /* (special devices only) */ +/* + * New Feature Selectors as added by USB 3.0 + * See USB 3.0 spec Table 9-6 + */ +#define USB_DEVICE_U1_ENABLE 48 /* dev may initiate U1 transition */ +#define USB_DEVICE_U2_ENABLE 49 /* dev may initiate U2 transition */ +#define USB_DEVICE_LTM_ENABLE 50 /* dev may send LTM */ +#define USB_INTRF_FUNC_SUSPEND 0 /* function suspend */ + +#define USB_INTR_FUNC_SUSPEND_OPT_MASK 0xFF00 + #define USB_ENDPOINT_HALT 0 /* IN/OUT will STALL */ +/* Bit array elements as returned by the USB_REQ_GET_STATUS request. */ +#define USB_DEV_STAT_U1_ENABLED 2 /* transition into U1 state */ +#define USB_DEV_STAT_U2_ENABLED 3 /* transition into U2 state */ +#define USB_DEV_STAT_LTM_ENABLED 4 /* Latency tolerance messages */ /** * struct usb_ctrlrequest - SETUP data for a USB device control request @@ -675,6 +690,7 @@ struct usb_bos_descriptor { __u8 bNumDeviceCaps; } __attribute__((packed)); +#define USB_DT_BOS_SIZE 5 /*-------------------------------------------------------------------------*/ /* USB_DT_DEVICE_CAPABILITY: grouped with BOS */ @@ -712,16 +728,56 @@ struct usb_wireless_cap_descriptor { /* Ultra Wide Band */ __u8 bReserved; } __attribute__((packed)); +/* USB 2.0 Extension descriptor */ #define USB_CAP_TYPE_EXT 2 struct usb_ext_cap_descriptor { /* Link Power Management */ __u8 bLength; __u8 bDescriptorType; __u8 bDevCapabilityType; - __u8 bmAttributes; + __le32 bmAttributes; #define USB_LPM_SUPPORT (1 << 1) /* supports LPM */ } __attribute__((packed)); +#define USB_DT_USB_EXT_CAP_SIZE 7 + +/* + * SuperSpeed USB Capability descriptor: Defines the set of SuperSpeed USB + * specific device level capabilities + */ +#define USB_SS_CAP_TYPE 3 +struct usb_ss_cap_descriptor { /* Link Power Management */ + __u8 bLength; + __u8 bDescriptorType; + __u8 bDevCapabilityType; + __u8 bmAttributes; +#define USB_LTM_SUPPORT (1 << 1) /* supports LTM */ + __le16 wSpeedSupported; +#define USB_LOW_SPEED_OPERATION (1) /* Low speed operation */ +#define USB_FULL_SPEED_OPERATION (1 << 1) /* Full speed operation */ +#define USB_HIGH_SPEED_OPERATION (1 << 2) /* High speed operation */ +#define USB_5GBPS_OPERATION (1 << 3) /* Operation at 5Gbps */ + __u8 bFunctionalitySupport; + __u8 bU1devExitLat; + __le16 bU2DevExitLat; +} __attribute__((packed)); + +#define USB_DT_USB_SS_CAP_SIZE 10 + +/* + * Container ID Capability descriptor: Defines the instance unique ID used to + * identify the instance across all operating modes + */ +#define CONTAINER_ID_TYPE 4 +struct usb_ss_container_id_descriptor { + __u8 bLength; + __u8 bDescriptorType; + __u8 bDevCapabilityType; + __u8 bReserved; + __u8 ContainerID[16]; /* 128-bit number */ +} __attribute__((packed)); + +#define USB_DT_USB_SS_CONTN_ID_SIZE 20 /*-------------------------------------------------------------------------*/ /* USB_DT_WIRELESS_ENDPOINT_COMP: companion descriptor associated with -- cgit v1.2.3-18-g5258 From 69cb1ec4ce4da4bc4c07bb09c4c98b3e25d99fb1 Mon Sep 17 00:00:00 2001 From: Eric Bénard Date: Fri, 15 Oct 2010 14:30:58 +0200 Subject: mxc_udc: add workaround for ENGcm09152 for i.MX35 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit this patch gives the possibility to workaround bug ENGcm09152 on i.MX35 when the hardware workaround is also implemented on the board. It covers the workaround described on page 25 of the following Errata : http://cache.freescale.com/files/dsp/doc/errata/IMX35CE.pdf Signed-off-by: Eric Bénard Signed-off-by: Greg Kroah-Hartman --- include/linux/fsl_devices.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/fsl_devices.h b/include/linux/fsl_devices.h index d5f9a7431bd..4eb56ed75fb 100644 --- a/include/linux/fsl_devices.h +++ b/include/linux/fsl_devices.h @@ -66,6 +66,7 @@ struct fsl_usb2_platform_data { enum fsl_usb2_operating_modes operating_mode; enum fsl_usb2_phy_modes phy_mode; unsigned int port_enables; + unsigned int workaround; int (*init)(struct platform_device *); void (*exit)(struct platform_device *); @@ -84,6 +85,8 @@ struct fsl_usb2_platform_data { #define FSL_USB2_PORT0_ENABLED 0x00000001 #define FSL_USB2_PORT1_ENABLED 0x00000002 +#define FLS_USB2_WORKAROUND_ENGCM09152 (1 << 0) + struct spi_device; struct fsl_spi_platform_data { -- cgit v1.2.3-18-g5258 From f7030bbc446430ecd12c9ad02cf0ea94934e5f91 Mon Sep 17 00:00:00 2001 From: Jason Wessel Date: Mon, 11 Oct 2010 10:20:14 -0500 Subject: kdb: Allow kernel loadable modules to add kdb shell functions In order to allow kernel modules to dynamically add a command to the kdb shell the kdb_register, kdb_register_repeat, kdb_unregister, and kdb_printf need to be exported as GPL symbols. Any kernel module that adds a dynamic kdb shell function should only need to include linux/kdb.h. Signed-off-by: Jason Wessel --- include/linux/kdb.h | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) (limited to 'include') diff --git a/include/linux/kdb.h b/include/linux/kdb.h index ea6e5244ed3..deda197ced6 100644 --- a/include/linux/kdb.h +++ b/include/linux/kdb.h @@ -28,6 +28,41 @@ extern int kdb_poll_idx; extern int kdb_initial_cpu; extern atomic_t kdb_event; +/* Types and messages used for dynamically added kdb shell commands */ + +#define KDB_MAXARGS 16 /* Maximum number of arguments to a function */ + +typedef enum { + KDB_REPEAT_NONE = 0, /* Do not repeat this command */ + KDB_REPEAT_NO_ARGS, /* Repeat the command without arguments */ + KDB_REPEAT_WITH_ARGS, /* Repeat the command including its arguments */ +} kdb_repeat_t; + +typedef int (*kdb_func_t)(int, const char **); + +/* KDB return codes from a command or internal kdb function */ +#define KDB_NOTFOUND (-1) +#define KDB_ARGCOUNT (-2) +#define KDB_BADWIDTH (-3) +#define KDB_BADRADIX (-4) +#define KDB_NOTENV (-5) +#define KDB_NOENVVALUE (-6) +#define KDB_NOTIMP (-7) +#define KDB_ENVFULL (-8) +#define KDB_ENVBUFFULL (-9) +#define KDB_TOOMANYBPT (-10) +#define KDB_TOOMANYDBREGS (-11) +#define KDB_DUPBPT (-12) +#define KDB_BPTNOTFOUND (-13) +#define KDB_BADMODE (-14) +#define KDB_BADINT (-15) +#define KDB_INVADDRFMT (-16) +#define KDB_BADREG (-17) +#define KDB_BADCPUNUM (-18) +#define KDB_BADLENGTH (-19) +#define KDB_NOBP (-20) +#define KDB_BADADDR (-21) + /* * kdb_diemsg * @@ -105,9 +140,17 @@ int kdb_process_cpu(const struct task_struct *p) /* kdb access to register set for stack dumping */ extern struct pt_regs *kdb_current_regs; +/* Dynamic kdb shell command registration */ +extern int kdb_register(char *, kdb_func_t, char *, char *, short); +extern int kdb_register_repeat(char *, kdb_func_t, char *, char *, + short, kdb_repeat_t); +extern int kdb_unregister(char *); #else /* ! CONFIG_KGDB_KDB */ #define kdb_printf(...) #define kdb_init(x) +#define kdb_register(...) +#define kdb_register_repeat(...) +#define kdb_uregister(x) #endif /* CONFIG_KGDB_KDB */ enum { KDB_NOT_INITIALIZED, -- cgit v1.2.3-18-g5258 From 91b152aa85bbcf076e269565394c31964f940371 Mon Sep 17 00:00:00 2001 From: Jason Wessel Date: Mon, 23 Aug 2010 09:20:14 -0500 Subject: kdb,kgdb: fix sparse fixups Fix the following sparse warnings: kdb_main.c:328:5: warning: symbol 'kdbgetu64arg' was not declared. Should it be static? kgdboc.c:246:12: warning: symbol 'kgdboc_early_init' was not declared. Should it be static? kgdb.c:652:26: warning: incorrect type in argument 1 (different address spaces) kgdb.c:652:26: expected void const *ptr kgdb.c:652:26: got struct perf_event *[noderef] *pev The one in kgdb.c required the (void * __force) because of the return code from register_wide_hw_breakpoint looking like: return (void __percpu __force *)ERR_PTR(err); Signed-off-by: Jason Wessel --- include/linux/kdb.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/kdb.h b/include/linux/kdb.h index deda197ced6..aadff7cc2b8 100644 --- a/include/linux/kdb.h +++ b/include/linux/kdb.h @@ -139,6 +139,14 @@ int kdb_process_cpu(const struct task_struct *p) /* kdb access to register set for stack dumping */ extern struct pt_regs *kdb_current_regs; +#ifdef CONFIG_KALLSYMS +extern const char *kdb_walk_kallsyms(loff_t *pos); +#else /* ! CONFIG_KALLSYMS */ +static inline const char *kdb_walk_kallsyms(loff_t *pos) +{ + return NULL; +} +#endif /* ! CONFIG_KALLSYMS */ /* Dynamic kdb shell command registration */ extern int kdb_register(char *, kdb_func_t, char *, char *, short); -- cgit v1.2.3-18-g5258 From 9566a7a851eb7201e3207eab53ee81efd0850fee Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Tue, 10 Aug 2010 00:58:41 +0900 Subject: nilfs2: accept future revisions Compatibility of nilfs partitions is now managed with three feature sets. This changes old compatibility check with revision number so that it can accept future revisions. Note that we can stop support of experimental versions of nilfs that doesn't know the feature sets by incrementing NILFS_CURRENT_REV. We don't have to do it soon, but it would be a possible option whenever the need arises. Signed-off-by: Ryusuke Konishi --- include/linux/nilfs2_fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/nilfs2_fs.h b/include/linux/nilfs2_fs.h index f5487b6f91e..b07f5cdff5e 100644 --- a/include/linux/nilfs2_fs.h +++ b/include/linux/nilfs2_fs.h @@ -229,6 +229,7 @@ struct nilfs_super_block { */ #define NILFS_CURRENT_REV 2 /* current major revision */ #define NILFS_MINOR_REV 0 /* minor revision */ +#define NILFS_MIN_SUPP_REV 2 /* minimum supported revision */ /* * Feature set definitions -- cgit v1.2.3-18-g5258 From 6c43f41000312fefa482c3bfdd97e7f81d6be0ec Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Fri, 20 Aug 2010 20:10:38 +0900 Subject: nilfs2: keep zero value in i_cno except for gc-inodes On-memory inode structures of nilfs have a member "i_cno" which stores a checkpoint number related to the inode. For gc-inodes, this field indicates version of data each gc-inode caches for GC. Log writer temporarily uses "i_cno" to transfer the latest checkpoint number. This stops the latter use and lets only gc-inodes use it. The purpose of this patch is to allow the successive change use "i_cno" for inode lookup. Signed-off-by: Ryusuke Konishi --- include/linux/nilfs2_fs.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/nilfs2_fs.h b/include/linux/nilfs2_fs.h index b07f5cdff5e..bcdb34c68d0 100644 --- a/include/linux/nilfs2_fs.h +++ b/include/linux/nilfs2_fs.h @@ -270,6 +270,14 @@ struct nilfs_super_block { #define NILFS_MIN_NRSVSEGS 8 /* Minimum number of reserved segments */ +/* + * We call DAT, cpfile, and sufile root metadata files. Inodes of + * these files are written in super root block instead of ifile, and + * garbage collector doesn't keep any past versions of these files. + */ +#define NILFS_ROOT_METADATA_FILE(ino) \ + ((ino) >= NILFS_DAT_INO && (ino) <= NILFS_SUFILE_INO) + /* * bytes offset of secondary super block */ -- cgit v1.2.3-18-g5258 From 8e656fd518784b49453f60c5f78b78703bc85cb2 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Fri, 27 Aug 2010 00:23:02 +0900 Subject: nilfs2: make snapshots in checkpoint tree exportable The previous export operations cannot handle multiple versions of a filesystem if they belong to the same sb instance. This adds a new type of file handle and extends export operations so that they can get the inode specified by a checkpoint number as well as an inode number and a generation number. Signed-off-by: Ryusuke Konishi --- include/linux/exportfs.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include') diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index a9cd507f8cd..28028988c86 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -67,6 +67,19 @@ enum fid_type { * 32 bit parent block number, 32 bit parent generation number */ FILEID_UDF_WITH_PARENT = 0x52, + + /* + * 64 bit checkpoint number, 64 bit inode number, + * 32 bit generation number. + */ + FILEID_NILFS_WITHOUT_PARENT = 0x61, + + /* + * 64 bit checkpoint number, 64 bit inode number, + * 32 bit generation number, 32 bit parent generation. + * 64 bit parent inode number. + */ + FILEID_NILFS_WITH_PARENT = 0x62, }; struct fid { -- cgit v1.2.3-18-g5258 From b453c95eb8d6a3b2348e9c7bc28a7d223cb640e3 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Wed, 25 Aug 2010 23:52:46 +0900 Subject: nilfs2: get rid of snapshot mount flag This flag is a fake used to distinguish type of super block instance. And, it got obsolete by the unification of sb. Signed-off-by: Ryusuke Konishi --- include/linux/nilfs2_fs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/nilfs2_fs.h b/include/linux/nilfs2_fs.h index bcdb34c68d0..46604671ccd 100644 --- a/include/linux/nilfs2_fs.h +++ b/include/linux/nilfs2_fs.h @@ -147,7 +147,6 @@ struct nilfs_super_root { #define NILFS_MOUNT_ERRORS_CONT 0x0010 /* Continue on errors */ #define NILFS_MOUNT_ERRORS_RO 0x0020 /* Remount fs ro on errors */ #define NILFS_MOUNT_ERRORS_PANIC 0x0040 /* Panic on errors */ -#define NILFS_MOUNT_SNAPSHOT 0x0080 /* Snapshot flag */ #define NILFS_MOUNT_BARRIER 0x1000 /* Use block barriers */ #define NILFS_MOUNT_STRICT_ORDER 0x2000 /* Apply strict in-order semantics also for data */ -- cgit v1.2.3-18-g5258 From c486f3895d6dc751f7c0f04f0fa67390ce4d168e Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Sun, 3 Oct 2010 17:44:03 +0900 Subject: nilfs2: change license of exported header file This allows other projects to carry copies of the header file related to ABI and disk format (i.e. "nilfs2_fs.h") without it or distributors having to worry about effects on the project's overall license terms. It's also desired for switching the license of nilfs library to LGPL. Jiro SEKIBA pointed out these license issues (Message-ID: <87tylo7msw.wl%jir@sekiba.com>), and he suggested switching license of the library and nilfs2_fs.h to GNU Lesser General Public License. We take in his suggestion to avoid the license issues. Signed-off-by: Ryusuke Konishi Cc: Jiro SEKIBA Cc: linux-nilfs --- include/linux/nilfs2_fs.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/nilfs2_fs.h b/include/linux/nilfs2_fs.h index 46604671ccd..227e49dd572 100644 --- a/include/linux/nilfs2_fs.h +++ b/include/linux/nilfs2_fs.h @@ -4,16 +4,16 @@ * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. * * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; either version 2.1 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. + * GNU Lesser General Public License for more details. * - * You should have received a copy of the GNU General Public License + * You should have received a copy of the GNU Lesser General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * -- cgit v1.2.3-18-g5258 From 2a342ed57756ad5d8af5456959433884367e5ab2 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 29 Jul 2010 14:47:48 +0200 Subject: KVM: PPC: Implement hypervisor interface To communicate with KVM directly we need to plumb some sort of interface between the guest and KVM. Usually those interfaces use hypercalls. This hypercall implementation is described in the last patch of the series in a special documentation file. Please read that for further information. This patch implements stubs to handle KVM PPC hypercalls on the host and guest side alike. Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- include/linux/kvm_para.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/kvm_para.h b/include/linux/kvm_para.h index d73109243fd..3b8080e1843 100644 --- a/include/linux/kvm_para.h +++ b/include/linux/kvm_para.h @@ -17,6 +17,7 @@ #define KVM_HC_VAPIC_POLL_IRQ 1 #define KVM_HC_MMU_OP 2 +#define KVM_HC_FEATURES 3 /* * hypercalls use architecture specific -- cgit v1.2.3-18-g5258 From beb03f14da9ceff76ff08cbb8af064b52dc21f7e Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 29 Jul 2010 14:47:53 +0200 Subject: KVM: PPC: First magic page steps We will be introducing a method to project the shared page in guest context. As soon as we're talking about this coupling, the shared page is colled magic page. This patch introduces simple defines, so the follow-up patches are easier to read. Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- include/linux/kvm_para.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/kvm_para.h b/include/linux/kvm_para.h index 3b8080e1843..ac2015a2501 100644 --- a/include/linux/kvm_para.h +++ b/include/linux/kvm_para.h @@ -18,6 +18,7 @@ #define KVM_HC_VAPIC_POLL_IRQ 1 #define KVM_HC_MMU_OP 2 #define KVM_HC_FEATURES 3 +#define KVM_HC_PPC_MAP_MAGIC_PAGE 4 /* * hypercalls use architecture specific -- cgit v1.2.3-18-g5258 From ba492962363a02c45836be205f339be48093e1be Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 29 Jul 2010 14:47:56 +0200 Subject: KVM: Move kvm_guest_init out of generic code Currently x86 is the only architecture that uses kvm_guest_init(). With PowerPC we're getting a second user, but the signature is different there and we don't need to export it, as it uses the normal kernel init framework. So let's move the x86 specific definition of that function over to the x86 specfic header file. Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- include/linux/kvm_para.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/linux/kvm_para.h b/include/linux/kvm_para.h index ac2015a2501..47a070b0520 100644 --- a/include/linux/kvm_para.h +++ b/include/linux/kvm_para.h @@ -26,11 +26,6 @@ #include #ifdef __KERNEL__ -#ifdef CONFIG_KVM_GUEST -void __init kvm_guest_init(void); -#else -#define kvm_guest_init() do { } while (0) -#endif static inline int kvm_para_has_feature(unsigned int feature) { -- cgit v1.2.3-18-g5258 From 15711e9c927bfc08e66791cbf0ca7887c0880768 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 29 Jul 2010 14:48:08 +0200 Subject: KVM: PPC: Add get_pvinfo interface to query hypercall instructions We need to tell the guest the opcodes that make up a hypercall through interfaces that are controlled by userspace. So we need to add a call for userspace to allow it to query those opcodes so it can pass them on. This is required because the hypercall opcodes can change based on the hypervisor conditions. If we're running in hardware accelerated hypervisor mode, a hypercall looks different from when we're running without hardware acceleration. Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- include/linux/kvm.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 636fc381c89..37077045970 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -414,6 +414,14 @@ struct kvm_enable_cap { __u8 pad[64]; }; +/* for KVM_PPC_GET_PVINFO */ +struct kvm_ppc_pvinfo { + /* out */ + __u32 flags; + __u32 hcall[4]; + __u8 pad[108]; +}; + #define KVMIO 0xAE /* @@ -530,6 +538,7 @@ struct kvm_enable_cap { #ifdef __KVM_HAVE_XCRS #define KVM_CAP_XCRS 56 #endif +#define KVM_CAP_PPC_GET_PVINFO 57 #ifdef KVM_CAP_IRQ_ROUTING @@ -664,6 +673,8 @@ struct kvm_clock_data { /* Available with KVM_CAP_PIT_STATE2 */ #define KVM_GET_PIT2 _IOR(KVMIO, 0x9f, struct kvm_pit_state2) #define KVM_SET_PIT2 _IOW(KVMIO, 0xa0, struct kvm_pit_state2) +/* Available with KVM_CAP_PPC_GET_PVINFO */ +#define KVM_PPC_GET_PVINFO _IOW(KVMIO, 0xa1, struct kvm_ppc_pvinfo) /* * ioctls for vcpu fds -- cgit v1.2.3-18-g5258 From 887c08ac191efb103e33e589aacbc2ce1a3f131e Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Sun, 22 Aug 2010 19:10:28 +0800 Subject: KVM: MMU: introduce hva_to_pfn_atomic function Introduce hva_to_pfn_atomic(), it's the fast path and can used in atomic context, the later patch will use it Signed-off-by: Xiao Guangrong Signed-off-by: Marcelo Tosatti --- include/linux/kvm_host.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index c13cc48697a..307d0e2c0f5 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -296,6 +296,7 @@ void kvm_release_page_dirty(struct page *page); void kvm_set_page_dirty(struct page *page); void kvm_set_page_accessed(struct page *page); +pfn_t hva_to_pfn_atomic(struct kvm *kvm, unsigned long addr); pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn); pfn_t gfn_to_pfn_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn); @@ -518,6 +519,12 @@ static inline void kvm_guest_exit(void) current->flags &= ~PF_VCPU; } +static inline unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, + gfn_t gfn) +{ + return slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE; +} + static inline gpa_t gfn_to_gpa(gfn_t gfn) { return (gpa_t)gfn << PAGE_SHIFT; -- cgit v1.2.3-18-g5258 From 48987781eb1d1e8ded41f55cd5806615fda92c6e Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Sun, 22 Aug 2010 19:11:43 +0800 Subject: KVM: MMU: introduce gfn_to_page_many_atomic() function Introduce this function to get consecutive gfn's pages, it can reduce gup's overload, used by later patch Signed-off-by: Xiao Guangrong Signed-off-by: Marcelo Tosatti --- include/linux/kvm_host.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 307d0e2c0f5..b837ec80885 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -289,6 +289,9 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, void kvm_disable_largepages(void); void kvm_arch_flush_shadow(struct kvm *kvm); +int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages, + int nr_pages); + struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn); unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn); void kvm_release_page_clean(struct page *page); -- cgit v1.2.3-18-g5258 From 365fb3fdf6769d3553999d8eb6cc2a8c56c747c1 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Sat, 28 Aug 2010 19:24:13 +0800 Subject: KVM: MMU: rewrite audit_mappings_page() function There is a bugs in this function, we call gfn_to_pfn() and kvm_mmu_gva_to_gpa_read() in atomic context(kvm_mmu_audit() is called under the spinlock(mmu_lock)'s protection). This patch fix it by: - introduce gfn_to_pfn_atomic instead of gfn_to_pfn - get the mapping gfn from kvm_mmu_page_get_gfn() And it adds 'notrap' ptes check in unsync/direct sps Signed-off-by: Xiao Guangrong Signed-off-by: Avi Kivity --- include/linux/kvm_host.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index b837ec80885..f2ecdd52032 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -300,6 +300,7 @@ void kvm_set_page_dirty(struct page *page); void kvm_set_page_accessed(struct page *page); pfn_t hva_to_pfn_atomic(struct kvm *kvm, unsigned long addr); +pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn); pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn); pfn_t gfn_to_pfn_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn); -- cgit v1.2.3-18-g5258 From 7b4203e8cb5c5d9bc49da62b7a6fa4ba876a1b3f Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Mon, 30 Aug 2010 13:50:45 +0200 Subject: KVM: PPC: Expose level based interrupt cap Now that we have all the level interrupt magic in place, let's expose the capability to user space, so it can make use of it! Signed-off-by: Alexander Graf --- include/linux/kvm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 37077045970..919ae53adc5 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -539,6 +539,7 @@ struct kvm_ppc_pvinfo { #define KVM_CAP_XCRS 56 #endif #define KVM_CAP_PPC_GET_PVINFO 57 +#define KVM_CAP_PPC_IRQ_LEVEL 58 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3-18-g5258 From c30a358d33e0e111f06e54a4a4125371e6b6693c Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 10 Sep 2010 17:30:48 +0200 Subject: KVM: MMU: Add infrastructure for two-level page walker This patch introduces a mmu-callback to translate gpa addresses in the walk_addr code. This is later used to translate l2_gpa addresses into l1_gpa addresses. Signed-off-by: Joerg Roedel Signed-off-by: Avi Kivity --- include/linux/kvm_host.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index f2ecdd52032..917e68ff5ed 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -534,6 +534,11 @@ static inline gpa_t gfn_to_gpa(gfn_t gfn) return (gpa_t)gfn << PAGE_SHIFT; } +static inline gfn_t gpa_to_gfn(gpa_t gpa) +{ + return (gfn_t)(gpa >> PAGE_SHIFT); +} + static inline hpa_t pfn_to_hpa(pfn_t pfn) { return (hpa_t)pfn << PAGE_SHIFT; -- cgit v1.2.3-18-g5258 From 3842d135ff246b6543f1df77f5600e12094a6845 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 27 Jul 2010 12:30:24 +0300 Subject: KVM: Check for pending events before attempting injection Instead of blindly attempting to inject an event before each guest entry, check for a possible event first in vcpu->requests. Sites that can trigger event injection are modified to set KVM_REQ_EVENT: - interrupt, nmi window opening - ppr updates - i8259 output changes - local apic irr changes - rflags updates - gif flag set - event set on exit This improves non-injecting entry performance, and sets the stage for non-atomic injection. Signed-off-by: Avi Kivity --- include/linux/kvm_host.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 917e68ff5ed..6022da1490e 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -39,6 +39,7 @@ #define KVM_REQ_KVMCLOCK_UPDATE 8 #define KVM_REQ_KICK 9 #define KVM_REQ_DEACTIVATE_FPU 10 +#define KVM_REQ_EVENT 11 #define KVM_USERSPACE_IRQ_SOURCE_ID 0 -- cgit v1.2.3-18-g5258 From 34c238a1d1832d7b1f655641f52782e86396b30a Mon Sep 17 00:00:00 2001 From: Zachary Amsden Date: Sat, 18 Sep 2010 14:38:14 -1000 Subject: KVM: x86: Rename timer function This just changes some names to better reflect the usage they will be given. Separated out to keep confusion to a minimum. Signed-off-by: Zachary Amsden Signed-off-by: Marcelo Tosatti --- include/linux/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 6022da1490e..0b89d008db6 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -36,7 +36,7 @@ #define KVM_REQ_PENDING_TIMER 5 #define KVM_REQ_UNHALT 6 #define KVM_REQ_MMU_SYNC 7 -#define KVM_REQ_KVMCLOCK_UPDATE 8 +#define KVM_REQ_CLOCK_UPDATE 8 #define KVM_REQ_KICK 9 #define KVM_REQ_DEACTIVATE_FPU 10 #define KVM_REQ_EVENT 11 -- cgit v1.2.3-18-g5258 From d7a79b6c80fdbe4366484805ee07a4735fc427d8 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Thu, 14 Oct 2010 13:59:04 +0200 Subject: KVM: Fix signature of kvm_iommu_map_pages stub Breaks otherwise if CONFIG_IOMMU_API is not set. KVM-Stable-Tag. Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti --- include/linux/kvm_host.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 0b89d008db6..866ed308436 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -483,8 +483,7 @@ int kvm_deassign_device(struct kvm *kvm, struct kvm_assigned_dev_kernel *assigned_dev); #else /* CONFIG_IOMMU_API */ static inline int kvm_iommu_map_pages(struct kvm *kvm, - gfn_t base_gfn, - unsigned long npages) + struct kvm_memory_slot *slot) { return 0; } -- cgit v1.2.3-18-g5258 From d582963a027fd63f8dfc97a0bf3654d4380e34ce Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Sun, 24 Oct 2010 18:16:57 +0200 Subject: i2c: Simplify i2c_parent_is_i2c_adapter Only i2c devices can have their type set to i2c_adapter_type, so testing the bus type is redundant. Signed-off-by: Jean Delvare Cc: Michael Lawnick --- include/linux/i2c.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 4bae0b72ed3..9391a491501 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -387,7 +387,6 @@ static inline void i2c_set_adapdata(struct i2c_adapter *dev, void *data) static inline int i2c_parent_is_i2c_adapter(const struct i2c_adapter *adapter) { return adapter->dev.parent != NULL - && adapter->dev.parent->bus == &i2c_bus_type && adapter->dev.parent->type == &i2c_adapter_type; } -- cgit v1.2.3-18-g5258 From 97cc4d49cfcda1c2dad89c00b62a25b628ce2115 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Sun, 24 Oct 2010 18:16:57 +0200 Subject: i2c: Let i2c_parent_is_i2c_adapter return the parent adapter This makes the calling site's code clearer IMHO. Signed-off-by: Jean Delvare Acked-by: Michael Lawnick --- include/linux/i2c.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 9391a491501..1f66fa06a97 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -384,10 +384,15 @@ static inline void i2c_set_adapdata(struct i2c_adapter *dev, void *data) dev_set_drvdata(&dev->dev, data); } -static inline int i2c_parent_is_i2c_adapter(const struct i2c_adapter *adapter) +static inline struct i2c_adapter * +i2c_parent_is_i2c_adapter(const struct i2c_adapter *adapter) { - return adapter->dev.parent != NULL - && adapter->dev.parent->type == &i2c_adapter_type; + struct device *parent = adapter->dev.parent; + + if (parent != NULL && parent->type == &i2c_adapter_type) + return to_i2c_adapter(parent); + else + return NULL; } /* Adapter locking functions, exported for shared pin cases */ -- cgit v1.2.3-18-g5258 From f253b86b4ad1b3220544e75880510fd455ebd23f Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 24 Oct 2010 22:06:02 +0200 Subject: Revert "block: fix accounting bug on cross partition merges" This reverts commit 7681bfeeccff5efa9eb29bf09249a3c400b15327. Conflicts: include/linux/genhd.h It has numerous issues with the cleanup path and non-elevator devices. Revert it for now so we can come up with a clean version without rushing things. Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 - include/linux/elevator.h | 2 -- include/linux/genhd.h | 1 - 3 files changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 009b80e49f5..646b462d04d 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -115,7 +115,6 @@ struct request { void *elevator_private3; struct gendisk *rq_disk; - struct hd_struct *part; unsigned long start_time; #ifdef CONFIG_BLK_CGROUP unsigned long long start_time_ns; diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 80a0ece8f7e..4fd978e7eb8 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -122,8 +122,6 @@ extern void elv_completed_request(struct request_queue *, struct request *); extern int elv_set_request(struct request_queue *, struct request *, gfp_t); extern void elv_put_request(struct request_queue *, struct request *); extern void elv_drain_elevator(struct request_queue *); -extern void elv_quiesce_start(struct request_queue *); -extern void elv_quiesce_end(struct request_queue *); /* * io scheduler registration diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 557c3927e70..7a7b9c1644e 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -140,7 +140,6 @@ struct disk_part_tbl { struct rcu_head rcu_head; int len; struct hd_struct __rcu *last_lookup; - struct gendisk *disk; struct hd_struct __rcu *part[]; }; -- cgit v1.2.3-18-g5258 From b5ce1d83a62fc109d8e815b1fc71dcdb0d26bc49 Mon Sep 17 00:00:00 2001 From: Yoshihisa Abe Date: Mon, 25 Oct 2010 02:03:44 -0400 Subject: Coda: add spin lock to protect accesses to struct coda_inode_info. We mostly need it to protect cached user permissions. The c_flags field is advisory, reading the wrong value is harmless and in the worst case we hit a slow path where we have to make an extra upcall to the userspace cache manager when revalidating a dentry or inode. Signed-off-by: Yoshihisa Abe Signed-off-by: Jan Harkes Signed-off-by: Linus Torvalds --- include/linux/coda_fs_i.h | 13 +++++++++---- include/linux/coda_linux.h | 6 +++++- 2 files changed, 14 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/coda_fs_i.h b/include/linux/coda_fs_i.h index b3ef0c46157..e35071b1de0 100644 --- a/include/linux/coda_fs_i.h +++ b/include/linux/coda_fs_i.h @@ -10,19 +10,24 @@ #include #include +#include #include /* * coda fs inode data + * c_lock protects accesses to c_flags, c_mapcount, c_cached_epoch, c_uid and + * c_cached_perm. + * vfs_inode is set only when the inode is created and never changes. + * c_fid is set when the inode is created and should be considered immutable. */ struct coda_inode_info { - struct CodaFid c_fid; /* Coda identifier */ - u_short c_flags; /* flags (see below) */ - struct list_head c_cilist; /* list of all coda inodes */ + struct CodaFid c_fid; /* Coda identifier */ + u_short c_flags; /* flags (see below) */ unsigned int c_mapcount; /* nr of times this inode is mapped */ unsigned int c_cached_epoch; /* epoch for cached permissions */ vuid_t c_uid; /* fsuid for cached permissions */ - unsigned int c_cached_perm; /* cached access permissions */ + unsigned int c_cached_perm; /* cached access permissions */ + spinlock_t c_lock; struct inode vfs_inode; }; diff --git a/include/linux/coda_linux.h b/include/linux/coda_linux.h index dcc228aa335..2e914d0771b 100644 --- a/include/linux/coda_linux.h +++ b/include/linux/coda_linux.h @@ -89,7 +89,11 @@ static __inline__ char *coda_i2s(struct inode *inode) /* this will not zap the inode away */ static __inline__ void coda_flag_inode(struct inode *inode, int flag) { - ITOC(inode)->c_flags |= flag; + struct coda_inode_info *cii = ITOC(inode); + + spin_lock(&cii->c_lock); + cii->c_flags |= flag; + spin_unlock(&cii->c_lock); } #endif -- cgit v1.2.3-18-g5258 From f7cc02b8715618e179242ba9cc10bdc5146ae565 Mon Sep 17 00:00:00 2001 From: Yoshihisa Abe Date: Mon, 25 Oct 2010 02:03:45 -0400 Subject: Coda: push BKL regions into coda_upcall() Now that shared inode state is locked using the cii->c_lock, the BKL is only used to protect the upcall queues used to communicate with the userspace cache manager. The remaining state is all local and we can push the lock further down into coda_upcall(). Signed-off-by: Yoshihisa Abe Signed-off-by: Jan Harkes Signed-off-by: Linus Torvalds --- include/linux/coda_psdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/coda_psdev.h b/include/linux/coda_psdev.h index 284b520934a..1e60c5a41a5 100644 --- a/include/linux/coda_psdev.h +++ b/include/linux/coda_psdev.h @@ -63,7 +63,7 @@ int venus_symlink(struct super_block *sb, struct CodaFid *fid, int venus_access(struct super_block *sb, struct CodaFid *fid, int mask); int venus_pioctl(struct super_block *sb, struct CodaFid *fid, unsigned int cmd, struct PioctlData *data); -int coda_downcall(int opcode, union outputArgs *out, struct super_block *sb); +int coda_downcall(struct venus_comm *vcp, int opcode, union outputArgs *out); int venus_fsync(struct super_block *sb, struct CodaFid *fid); int venus_statfs(struct dentry *dentry, struct kstatfs *sfs); -- cgit v1.2.3-18-g5258 From da47c19e5c746829042933c8f945a71e2b62d6fc Mon Sep 17 00:00:00 2001 From: Yoshihisa Abe Date: Mon, 25 Oct 2010 02:03:46 -0400 Subject: Coda: replace BKL with mutex Replace the BKL with a mutex to protect the venus_comm structure which binds the mountpoint with the character device and holds the upcall queues. Signed-off-by: Yoshihisa Abe Signed-off-by: Jan Harkes Signed-off-by: Linus Torvalds --- include/linux/coda_psdev.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/coda_psdev.h b/include/linux/coda_psdev.h index 1e60c5a41a5..72f2d2f0af9 100644 --- a/include/linux/coda_psdev.h +++ b/include/linux/coda_psdev.h @@ -8,6 +8,7 @@ #ifdef __KERNEL__ #include +#include struct kstatfs; @@ -20,6 +21,7 @@ struct venus_comm { int vc_inuse; struct super_block *vc_sb; struct backing_dev_info bdi; + struct mutex vc_mutex; }; -- cgit v1.2.3-18-g5258