From a26ac2455ffcf3be5c6ef92bc6df7182700f2114 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" <paul.mckenney@linaro.org> Date: Wed, 12 Jan 2011 14:10:23 -0800 Subject: rcu: move TREE_RCU from softirq to kthread If RCU priority boosting is to be meaningful, callback invocation must be boosted in addition to preempted RCU readers. Otherwise, in presence of CPU real-time threads, the grace period ends, but the callbacks don't get invoked. If the callbacks don't get invoked, the associated memory doesn't get freed, so the system is still subject to OOM. But it is not reasonable to priority-boost RCU_SOFTIRQ, so this commit moves the callback invocations to a kthread, which can be boosted easily. Also add comments and properly synchronized all accesses to rcu_cpu_kthread_task, as suggested by Lai Jiangshan. Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Reviewed-by: Josh Triplett <josh@joshtriplett.org> --- include/linux/interrupt.h | 1 - include/trace/events/irq.h | 3 +-- 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index bea0ac75071..6c12989839d 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -414,7 +414,6 @@ enum TASKLET_SOFTIRQ, SCHED_SOFTIRQ, HRTIMER_SOFTIRQ, - RCU_SOFTIRQ, /* Preferable RCU should always be the last softirq */ NR_SOFTIRQS }; diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h index 1c09820df58..ae045ca7d35 100644 --- a/include/trace/events/irq.h +++ b/include/trace/events/irq.h @@ -20,8 +20,7 @@ struct softirq_action; softirq_name(BLOCK_IOPOLL), \ softirq_name(TASKLET), \ softirq_name(SCHED), \ - softirq_name(HRTIMER), \ - softirq_name(RCU)) + softirq_name(HRTIMER)) /** * irq_handler_entry - called immediately before the irq action handler -- cgit v1.2.3-18-g5258 From 4a29865689dbb87a02e3b0fff4a4ae5041273173 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" <paul.mckenney@linaro.org> Date: Sun, 3 Apr 2011 21:33:51 -0700 Subject: rcu: make rcutorture version numbers available through debugfs It is not possible to accurately correlate rcutorture output with that of debugfs. This patch therefore adds a debugfs file that prints out the rcutorture version number, permitting easy correlation. Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Reviewed-by: Josh Triplett <josh@joshtriplett.org> --- include/linux/rcupdate.h | 13 ++++++++++++- include/linux/rcutree.h | 3 +++ 2 files changed, 15 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index ff422d2b7f9..9e169c2ba91 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -47,6 +47,18 @@ extern int rcutorture_runnable; /* for sysctl */ #endif /* #ifdef CONFIG_RCU_TORTURE_TEST */ +#if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) +extern void rcutorture_record_test_transition(void); +extern void rcutorture_record_progress(unsigned long vernum); +#else +static inline void rcutorture_record_test_transition(void) +{ +} +static inline void rcutorture_record_progress(unsigned long vernum) +{ +} +#endif + #define UINT_CMP_GE(a, b) (UINT_MAX / 2 >= (a) - (b)) #define UINT_CMP_LT(a, b) (UINT_MAX / 2 < (a) - (b)) #define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b)) @@ -68,7 +80,6 @@ extern void call_rcu_sched(struct rcu_head *head, extern void synchronize_sched(void); extern void rcu_barrier_bh(void); extern void rcu_barrier_sched(void); -extern int sched_expedited_torture_stats(char *page); static inline void __rcu_read_lock_bh(void) { diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 3a933482734..284dad10c55 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -58,9 +58,12 @@ static inline void synchronize_rcu_bh_expedited(void) extern void rcu_barrier(void); +extern unsigned long rcutorture_testseq; +extern unsigned long rcutorture_vernum; extern long rcu_batches_completed(void); extern long rcu_batches_completed_bh(void); extern long rcu_batches_completed_sched(void); + extern void rcu_force_quiescent_state(void); extern void rcu_bh_force_quiescent_state(void); extern void rcu_sched_force_quiescent_state(void); -- cgit v1.2.3-18-g5258 From b0c9d7ff2793502650ad987c3f237d5fe5587a1e Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com> Date: Tue, 29 Mar 2011 12:56:56 -0700 Subject: rcu: add DEBUG_OBJECTS_RCU_HEAD check for alignment Verify that rcu_head structures are aligned to a four-byte boundary. This check is enabled by CONFIG_DEBUG_OBJECTS_RCU_HEAD. Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Reviewed-by: Josh Triplett <josh@joshtriplett.org> --- include/linux/rcupdate.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 9e169c2ba91..c7aeacf7fc9 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -785,6 +785,7 @@ extern struct debug_obj_descr rcuhead_debug_descr; static inline void debug_rcu_head_queue(struct rcu_head *head) { + WARN_ON_ONCE((unsigned long)head & 0x3); debug_object_activate(head, &rcuhead_debug_descr); debug_object_active_state(head, &rcuhead_debug_descr, STATE_RCU_HEAD_READY, -- cgit v1.2.3-18-g5258 From 9ab1544eb4196ca8d05c433b2eb56f74496b1ee3 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan <laijs@cn.fujitsu.com> Date: Fri, 18 Mar 2011 11:15:47 +0800 Subject: rcu: introduce kfree_rcu() Many rcu callbacks functions just call kfree() on the base structure. These functions are trivial, but their size adds up, and furthermore when they are used in a kernel module, that module must invoke the high-latency rcu_barrier() function at module-unload time. The kfree_rcu() function introduced by this commit addresses this issue. Rather than encoding a function address in the embedded rcu_head structure, kfree_rcu() instead encodes the offset of the rcu_head structure within the base structure. Because the functions are not allowed in the low-order 4096 bytes of kernel virtual memory, offsets up to 4095 bytes can be accommodated. If the offset is larger than 4095 bytes, a compile-time error will be generated in __kfree_rcu(). If this error is triggered, you can either fall back to use of call_rcu() or rearrange the structure to position the rcu_head structure into the first 4096 bytes. Note that the allowable offset might decrease in the future, for example, to allow something like kmem_cache_free_rcu(). The new kfree_rcu() function can replace code as follows: call_rcu(&p->rcu, simple_kfree_callback); where "simple_kfree_callback()" might be defined as follows: void simple_kfree_callback(struct rcu_head *p) { struct foo *q = container_of(p, struct foo, rcu); kfree(q); } with the following: kfree_rcu(&p->rcu, rcu); Note that the "rcu" is the name of a field in the structure being freed. The reason for using this rather than passing in a pointer to the base structure is that the above approach allows better type checking. This commit is based on earlier work by Lai Jiangshan and Manfred Spraul: Lai's V1 patch: http://lkml.org/lkml/2008/9/18/1 Manfred's patch: http://lkml.org/lkml/2009/1/2/115 Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com> Signed-off-by: Manfred Spraul <manfred@colorfullife.com> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Reviewed-by: David Howells <dhowells@redhat.com> Reviewed-by: Josh Triplett <josh@joshtriplett.org> --- include/linux/rcupdate.h | 56 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index c7aeacf7fc9..99f9aa7c280 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -809,4 +809,60 @@ static inline void debug_rcu_head_unqueue(struct rcu_head *head) } #endif /* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */ +static __always_inline bool __is_kfree_rcu_offset(unsigned long offset) +{ + return offset < 4096; +} + +static __always_inline +void __kfree_rcu(struct rcu_head *head, unsigned long offset) +{ + typedef void (*rcu_callback)(struct rcu_head *); + + BUILD_BUG_ON(!__builtin_constant_p(offset)); + + /* See the kfree_rcu() header comment. */ + BUILD_BUG_ON(!__is_kfree_rcu_offset(offset)); + + call_rcu(head, (rcu_callback)offset); +} + +extern void kfree(const void *); + +static inline void __rcu_reclaim(struct rcu_head *head) +{ + unsigned long offset = (unsigned long)head->func; + + if (__is_kfree_rcu_offset(offset)) + kfree((void *)head - offset); + else + head->func(head); +} + +/** + * kfree_rcu() - kfree an object after a grace period. + * @ptr: pointer to kfree + * @rcu_head: the name of the struct rcu_head within the type of @ptr. + * + * Many rcu callbacks functions just call kfree() on the base structure. + * These functions are trivial, but their size adds up, and furthermore + * when they are used in a kernel module, that module must invoke the + * high-latency rcu_barrier() function at module-unload time. + * + * The kfree_rcu() function handles this issue. Rather than encoding a + * function address in the embedded rcu_head structure, kfree_rcu() instead + * encodes the offset of the rcu_head structure within the base structure. + * Because the functions are not allowed in the low-order 4096 bytes of + * kernel virtual memory, offsets up to 4095 bytes can be accommodated. + * If the offset is larger than 4095 bytes, a compile-time error will + * be generated in __kfree_rcu(). If this error is triggered, you can + * either fall back to use of call_rcu() or rearrange the structure to + * position the rcu_head structure into the first 4096 bytes. + * + * Note that the allowable offset might decrease in the future, for example, + * to allow something like kmem_cache_free_rcu(). + */ +#define kfree_rcu(ptr, rcu_head) \ + __kfree_rcu(&((ptr)->rcu_head), offsetof(typeof(*(ptr)), rcu_head)) + #endif /* __LINUX_RCUPDATE_H */ -- cgit v1.2.3-18-g5258 From 29ce831000081dd757d3116bf774aafffc4b6b20 Mon Sep 17 00:00:00 2001 From: Gleb Natapov <gleb@redhat.com> Date: Wed, 4 May 2011 16:31:03 +0300 Subject: rcu: provide rcu_virt_note_context_switch() function. Provide rcu_virt_note_context_switch() for vitalization use to note quiescent state during guest entry. Signed-off-by: Gleb Natapov <gleb@redhat.com> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> --- include/linux/rcutiny.h | 8 ++++++++ include/linux/rcutree.h | 10 ++++++++++ 2 files changed, 18 insertions(+) (limited to 'include') diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 30ebd7c8d87..52b3e0281fd 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -99,6 +99,14 @@ static inline void rcu_note_context_switch(int cpu) rcu_preempt_note_context_switch(); } +/* + * Take advantage of the fact that there is only one CPU, which + * allows us to ignore virtualization-based context switches. + */ +static inline void rcu_virt_note_context_switch(int cpu) +{ +} + /* * Return the number of grace periods. */ diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 284dad10c55..e65d06634dd 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -35,6 +35,16 @@ extern void rcu_note_context_switch(int cpu); extern int rcu_needs_cpu(int cpu); extern void rcu_cpu_stall_reset(void); +/* + * Note a virtualization-based context switch. This is simply a + * wrapper around rcu_note_context_switch(), which allows TINY_RCU + * to save a few bytes. + */ +static inline void rcu_virt_note_context_switch(int cpu) +{ + rcu_note_context_switch(cpu); +} + #ifdef CONFIG_TREE_PREEMPT_RCU extern void exit_rcu(void); -- cgit v1.2.3-18-g5258