aboutsummaryrefslogtreecommitdiff
path: root/arch/powerpc/kernel/time.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/kernel/time.c')
-rw-r--r--arch/powerpc/kernel/time.c213
1 files changed, 129 insertions, 84 deletions
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index be171ee73bf..9fff9cdcc51 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -42,6 +42,7 @@
#include <linux/timex.h>
#include <linux/kernel_stat.h>
#include <linux/time.h>
+#include <linux/clockchips.h>
#include <linux/init.h>
#include <linux/profile.h>
#include <linux/cpu.h>
@@ -73,7 +74,7 @@
/* powerpc clocksource/clockevent code */
#include <linux/clockchips.h>
-#include <linux/clocksource.h>
+#include <linux/timekeeper_internal.h>
static cycle_t rtc_read(struct clocksource *);
static struct clocksource clocksource_rtc = {
@@ -106,7 +107,7 @@ struct clock_event_device decrementer_clockevent = {
.irq = 0,
.set_next_event = decrementer_set_next_event,
.set_mode = decrementer_set_mode,
- .features = CLOCK_EVT_FEAT_ONESHOT,
+ .features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_C3STOP,
};
EXPORT_SYMBOL(decrementer_clockevent);
@@ -143,7 +144,7 @@ EXPORT_SYMBOL_GPL(ppc_proc_freq);
unsigned long ppc_tb_freq;
EXPORT_SYMBOL_GPL(ppc_tb_freq);
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
/*
* Factors for converting from cputime_t (timebase ticks) to
* jiffies, microseconds, seconds, and clock_t (1/USER_HZ seconds).
@@ -210,23 +211,23 @@ static u64 scan_dispatch_log(u64 stop_tb)
if (!dtl)
return 0;
- if (i == vpa->dtl_idx)
+ if (i == be64_to_cpu(vpa->dtl_idx))
return 0;
- while (i < vpa->dtl_idx) {
- if (dtl_consumer)
- dtl_consumer(dtl, i);
- dtb = dtl->timebase;
- tb_delta = dtl->enqueue_to_dispatch_time +
- dtl->ready_to_enqueue_time;
+ while (i < be64_to_cpu(vpa->dtl_idx)) {
+ dtb = be64_to_cpu(dtl->timebase);
+ tb_delta = be32_to_cpu(dtl->enqueue_to_dispatch_time) +
+ be32_to_cpu(dtl->ready_to_enqueue_time);
barrier();
- if (i + N_DISPATCH_LOG < vpa->dtl_idx) {
+ if (i + N_DISPATCH_LOG < be64_to_cpu(vpa->dtl_idx)) {
/* buffer has overflowed */
- i = vpa->dtl_idx - N_DISPATCH_LOG;
+ i = be64_to_cpu(vpa->dtl_idx) - N_DISPATCH_LOG;
dtl = local_paca->dispatch_log + (i % N_DISPATCH_LOG);
continue;
}
if (dtb > stop_tb)
break;
+ if (dtl_consumer)
+ dtl_consumer(dtl, i);
stolen += tb_delta;
++i;
++dtl;
@@ -269,7 +270,7 @@ static inline u64 calculate_stolen_time(u64 stop_tb)
{
u64 stolen = 0;
- if (get_paca()->dtl_ridx != get_paca()->lppaca_ptr->dtl_idx) {
+ if (get_paca()->dtl_ridx != be64_to_cpu(get_lppaca()->dtl_idx)) {
stolen = scan_dispatch_log(stop_tb);
get_paca()->system_time -= stolen;
}
@@ -291,13 +292,14 @@ static inline u64 calculate_stolen_time(u64 stop_tb)
* Account time for a transition between system, hard irq
* or soft irq state.
*/
-void account_system_vtime(struct task_struct *tsk)
+static u64 vtime_delta(struct task_struct *tsk,
+ u64 *sys_scaled, u64 *stolen)
{
- u64 now, nowscaled, delta, deltascaled;
- unsigned long flags;
- u64 stolen, udelta, sys_scaled, user_scaled;
+ u64 now, nowscaled, deltascaled;
+ u64 udelta, delta, user_scaled;
+
+ WARN_ON_ONCE(!irqs_disabled());
- local_irq_save(flags);
now = mftb();
nowscaled = read_spurr(now);
get_paca()->system_time += now - get_paca()->starttime;
@@ -305,7 +307,7 @@ void account_system_vtime(struct task_struct *tsk)
deltascaled = nowscaled - get_paca()->startspurr;
get_paca()->startspurr = nowscaled;
- stolen = calculate_stolen_time(now);
+ *stolen = calculate_stolen_time(now);
delta = get_paca()->system_time;
get_paca()->system_time = 0;
@@ -322,39 +324,50 @@ void account_system_vtime(struct task_struct *tsk)
* the user ticks get saved up in paca->user_time_scaled to be
* used by account_process_tick.
*/
- sys_scaled = delta;
+ *sys_scaled = delta;
user_scaled = udelta;
if (deltascaled != delta + udelta) {
if (udelta) {
- sys_scaled = deltascaled * delta / (delta + udelta);
- user_scaled = deltascaled - sys_scaled;
+ *sys_scaled = deltascaled * delta / (delta + udelta);
+ user_scaled = deltascaled - *sys_scaled;
} else {
- sys_scaled = deltascaled;
+ *sys_scaled = deltascaled;
}
}
get_paca()->user_time_scaled += user_scaled;
- if (in_interrupt() || idle_task(smp_processor_id()) != tsk) {
- account_system_time(tsk, 0, delta, sys_scaled);
- if (stolen)
- account_steal_time(stolen);
- } else {
- account_idle_time(delta + stolen);
- }
- local_irq_restore(flags);
+ return delta;
+}
+
+void vtime_account_system(struct task_struct *tsk)
+{
+ u64 delta, sys_scaled, stolen;
+
+ delta = vtime_delta(tsk, &sys_scaled, &stolen);
+ account_system_time(tsk, 0, delta, sys_scaled);
+ if (stolen)
+ account_steal_time(stolen);
+}
+EXPORT_SYMBOL_GPL(vtime_account_system);
+
+void vtime_account_idle(struct task_struct *tsk)
+{
+ u64 delta, sys_scaled, stolen;
+
+ delta = vtime_delta(tsk, &sys_scaled, &stolen);
+ account_idle_time(delta + stolen);
}
-EXPORT_SYMBOL_GPL(account_system_vtime);
/*
- * Transfer the user and system times accumulated in the paca
- * by the exception entry and exit code to the generic process
- * user and system time records.
+ * Transfer the user time accumulated in the paca
+ * by the exception entry and exit code to the generic
+ * process user time records.
* Must be called with interrupts disabled.
- * Assumes that account_system_vtime() has been called recently
- * (i.e. since the last entry from usermode) so that
+ * Assumes that vtime_account_system/idle() has been called
+ * recently (i.e. since the last entry from usermode) so that
* get_paca()->user_time_scaled is up to date.
*/
-void account_process_tick(struct task_struct *tsk, int user_tick)
+void vtime_account_user(struct task_struct *tsk)
{
cputime_t utime, utimescaled;
@@ -366,7 +379,7 @@ void account_process_tick(struct task_struct *tsk, int user_tick)
account_user_time(tsk, utime, utimescaled);
}
-#else /* ! CONFIG_VIRT_CPU_ACCOUNTING */
+#else /* ! CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
#define calc_cputime_factors()
#endif
@@ -466,45 +479,15 @@ void arch_irq_work_raise(void)
#endif /* CONFIG_IRQ_WORK */
-/*
- * timer_interrupt - gets called when the decrementer overflows,
- * with interrupts disabled.
- */
-void timer_interrupt(struct pt_regs * regs)
+void __timer_interrupt(void)
{
- struct pt_regs *old_regs;
+ struct pt_regs *regs = get_irq_regs();
u64 *next_tb = &__get_cpu_var(decrementers_next_tb);
struct clock_event_device *evt = &__get_cpu_var(decrementers);
u64 now;
- /* Ensure a positive value is written to the decrementer, or else
- * some CPUs will continue to take decrementer exceptions.
- */
- set_dec(DECREMENTER_MAX);
-
- /* Some implementations of hotplug will get timer interrupts while
- * offline, just ignore these
- */
- if (!cpu_online(smp_processor_id()))
- return;
-
- /* Conditionally hard-enable interrupts now that the DEC has been
- * bumped to its maximum value
- */
- may_hard_irq_enable();
-
trace_timer_interrupt_entry(regs);
- __get_cpu_var(irq_stat).timer_irqs++;
-
-#if defined(CONFIG_PPC32) && defined(CONFIG_PMAC)
- if (atomic_read(&ppc_n_lost_interrupts) != 0)
- do_IRQ(regs);
-#endif
-
- old_regs = set_irq_regs(regs);
- irq_enter();
-
if (test_irq_work_pending()) {
clear_irq_work_pending();
irq_work_run();
@@ -515,10 +498,15 @@ void timer_interrupt(struct pt_regs * regs)
*next_tb = ~(u64)0;
if (evt->event_handler)
evt->event_handler(evt);
+ __get_cpu_var(irq_stat).timer_irqs_event++;
} else {
now = *next_tb - now;
if (now <= DECREMENTER_MAX)
set_dec((int)now);
+ /* We may have raced with new irq work */
+ if (test_irq_work_pending())
+ set_dec(1);
+ __get_cpu_var(irq_stat).timer_irqs_others++;
}
#ifdef CONFIG_PPC64
@@ -529,10 +517,60 @@ void timer_interrupt(struct pt_regs * regs)
}
#endif
+ trace_timer_interrupt_exit(regs);
+}
+
+/*
+ * timer_interrupt - gets called when the decrementer overflows,
+ * with interrupts disabled.
+ */
+void timer_interrupt(struct pt_regs * regs)
+{
+ struct pt_regs *old_regs;
+ u64 *next_tb = &__get_cpu_var(decrementers_next_tb);
+
+ /* Ensure a positive value is written to the decrementer, or else
+ * some CPUs will continue to take decrementer exceptions.
+ */
+ set_dec(DECREMENTER_MAX);
+
+ /* Some implementations of hotplug will get timer interrupts while
+ * offline, just ignore these and we also need to set
+ * decrementers_next_tb as MAX to make sure __check_irq_replay
+ * don't replay timer interrupt when return, otherwise we'll trap
+ * here infinitely :(
+ */
+ if (!cpu_online(smp_processor_id())) {
+ *next_tb = ~(u64)0;
+ return;
+ }
+
+ /* Conditionally hard-enable interrupts now that the DEC has been
+ * bumped to its maximum value
+ */
+ may_hard_irq_enable();
+
+
+#if defined(CONFIG_PPC32) && defined(CONFIG_PPC_PMAC)
+ if (atomic_read(&ppc_n_lost_interrupts) != 0)
+ do_IRQ(regs);
+#endif
+
+ old_regs = set_irq_regs(regs);
+ irq_enter();
+
+ __timer_interrupt();
irq_exit();
set_irq_regs(old_regs);
+}
- trace_timer_interrupt_exit(regs);
+/*
+ * Hypervisor decrementer interrupts shouldn't occur but are sometimes
+ * left pending on exit from a KVM guest. We don't need to do anything
+ * to clear them, as they are edge-triggered.
+ */
+void hdec_interrupt(struct pt_regs *regs)
+{
}
#ifdef CONFIG_SUSPEND
@@ -586,7 +624,7 @@ unsigned long long sched_clock(void)
static int __init get_freq(char *name, int cells, unsigned long *val)
{
struct device_node *cpu;
- const unsigned int *fp;
+ const __be32 *fp;
int found = 0;
/* The cpu node should have timebase and clock frequency properties */
@@ -605,7 +643,6 @@ static int __init get_freq(char *name, int cells, unsigned long *val)
return found;
}
-/* should become __cpuinit when secondary_cpu_time_init also is */
void start_cpu_decrementer(void)
{
#if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
@@ -643,7 +680,7 @@ int update_persistent_clock(struct timespec now)
struct rtc_time tm;
if (!ppc_md.set_rtc_time)
- return 0;
+ return -ENODEV;
to_tm(now.tv_sec + 1 + timezone_offset, &tm);
tm.tm_year -= 1900;
@@ -703,7 +740,7 @@ static cycle_t timebase_read(struct clocksource *cs)
return (cycle_t)get_tb();
}
-void update_vsyscall(struct timespec *wall_time, struct timespec *wtm,
+void update_vsyscall_old(struct timespec *wall_time, struct timespec *wtm,
struct clocksource *clock, u32 mult)
{
u64 new_tb_to_xs, new_stamp_xsec;
@@ -750,13 +787,8 @@ void update_vsyscall(struct timespec *wall_time, struct timespec *wtm,
void update_vsyscall_tz(void)
{
- /* Make userspace gettimeofday spin until we're done. */
- ++vdso_data->tb_update_count;
- smp_mb();
vdso_data->tz_minuteswest = sys_tz.tz_minuteswest;
vdso_data->tz_dsttime = sys_tz.tz_dsttime;
- smp_mb();
- ++vdso_data->tb_update_count;
}
static void __init clocksource_init(void)
@@ -783,6 +815,11 @@ static int decrementer_set_next_event(unsigned long evt,
{
__get_cpu_var(decrementers_next_tb) = get_tb_or_rtc() + evt;
set_dec(evt);
+
+ /* We may have raced with new irq work */
+ if (test_irq_work_pending())
+ set_dec(1);
+
return 0;
}
@@ -793,6 +830,15 @@ static void decrementer_set_mode(enum clock_event_mode mode,
decrementer_set_next_event(DECREMENTER_MAX, dev);
}
+/* Interrupt handler for the timer broadcast IPI */
+void tick_broadcast_ipi_handler(void)
+{
+ u64 *next_tb = &__get_cpu_var(decrementers_next_tb);
+
+ *next_tb = get_tb_or_rtc();
+ __timer_interrupt();
+}
+
static void register_decrementer_clockevent(int cpu)
{
struct clock_event_device *dec = &per_cpu(decrementers, cpu);
@@ -896,6 +942,7 @@ void __init time_init(void)
clocksource_init();
init_decrementer_clockevent();
+ tick_setup_hrtimer_broadcast();
}
@@ -1028,10 +1075,8 @@ static int __init rtc_init(void)
return -ENODEV;
pdev = platform_device_register_simple("rtc-generic", -1, NULL, 0);
- if (IS_ERR(pdev))
- return PTR_ERR(pdev);
- return 0;
+ return PTR_ERR_OR_ZERO(pdev);
}
module_init(rtc_init);