diff options
author | Paul Mackerras <paulus@samba.org> | 2010-08-26 19:56:43 +0000 |
---|---|---|
committer | Benjamin Herrenschmidt <benh@kernel.crashing.org> | 2010-09-02 14:07:31 +1000 |
commit | cf9efce0ce3136fa076f53e53154e98455229514 (patch) | |
tree | 0e110018b160aff4813b81e0e8c3a43a364edd48 /arch/powerpc/platforms | |
parent | 93c22703efa72c7527dbd586d1951c1f4a85fd70 (diff) |
powerpc: Account time using timebase rather than PURR
Currently, when CONFIG_VIRT_CPU_ACCOUNTING is enabled, we use the
PURR register for measuring the user and system time used by
processes, as well as other related times such as hardirq and
softirq times. This turns out to be quite confusing for users
because it means that a program will often be measured as taking
less time when run on a multi-threaded processor (SMT2 or SMT4 mode)
than it does when run on a single-threaded processor (ST mode), even
though the program takes longer to finish. The discrepancy is
accounted for as stolen time, which is also confusing, particularly
when there are no other partitions running.
This changes the accounting to use the timebase instead, meaning that
the reported user and system times are the actual number of real-time
seconds that the program was executing on the processor thread,
regardless of which SMT mode the processor is in. Thus a program will
generally show greater user and system times when run on a
multi-threaded processor than on a single-threaded processor.
On pSeries systems on POWER5 or later processors, we measure the
stolen time (time when this partition wasn't running) using the
hypervisor dispatch trace log. We check for new entries in the
log on every entry from user mode and on every transition from
kernel process context to soft or hard IRQ context (i.e. when
account_system_vtime() gets called). So that we can correctly
distinguish time stolen from user time and time stolen from system
time, without having to check the log on every exit to user mode,
we store separate timestamps for exit to user mode and entry from
user mode.
On systems that have a SPURR (POWER6 and POWER7), we read the SPURR
in account_system_vtime() (as before), and then apportion the SPURR
ticks since the last time we read it between scaled user time and
scaled system time according to the relative proportions of user
time and system time over the same interval. This avoids having to
read the SPURR on every kernel entry and exit. On systems that have
PURR but not SPURR (i.e., POWER5), we do the same using the PURR
rather than the SPURR.
This disables the DTL user interface in /sys/debug/kernel/powerpc/dtl
for now since it conflicts with the use of the dispatch trace log
by the time accounting code.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Diffstat (limited to 'arch/powerpc/platforms')
-rw-r--r-- | arch/powerpc/platforms/pseries/dtl.c | 24 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/lpar.c | 21 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/setup.c | 52 |
3 files changed, 79 insertions, 18 deletions
diff --git a/arch/powerpc/platforms/pseries/dtl.c b/arch/powerpc/platforms/pseries/dtl.c index adfd5441b61..0357655db49 100644 --- a/arch/powerpc/platforms/pseries/dtl.c +++ b/arch/powerpc/platforms/pseries/dtl.c @@ -27,27 +27,10 @@ #include <asm/system.h> #include <asm/uaccess.h> #include <asm/firmware.h> +#include <asm/lppaca.h> #include "plpar_wrappers.h" -/* - * Layout of entries in the hypervisor's DTL buffer. Although we don't - * actually access the internals of an entry (we only need to know the size), - * we might as well define it here for reference. - */ -struct dtl_entry { - u8 dispatch_reason; - u8 preempt_reason; - u16 processor_id; - u32 enqueue_to_dispatch_time; - u32 ready_to_enqueue_time; - u32 waiting_to_ready_time; - u64 timebase; - u64 fault_addr; - u64 srr0; - u64 srr1; -}; - struct dtl { struct dtl_entry *buf; struct dentry *file; @@ -237,6 +220,11 @@ static int dtl_init(void) struct dentry *event_mask_file, *buf_entries_file; int rc, i; +#ifdef CONFIG_VIRT_CPU_ACCOUNTING + /* disable this for now */ + return -ENODEV; +#endif + if (!firmware_has_feature(FW_FEATURE_SPLPAR)) return -ENODEV; diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index a17fe4a9059..f129040d974 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -248,6 +248,8 @@ void vpa_init(int cpu) int hwcpu = get_hard_smp_processor_id(cpu); unsigned long addr; long ret; + struct paca_struct *pp; + struct dtl_entry *dtl; if (cpu_has_feature(CPU_FTR_ALTIVEC)) lppaca_of(cpu).vmxregs_in_use = 1; @@ -274,6 +276,25 @@ void vpa_init(int cpu) "registration for cpu %d (hw %d) of area %lx " "returns %ld\n", cpu, hwcpu, addr, ret); } + + /* + * Register dispatch trace log, if one has been allocated. + */ + pp = &paca[cpu]; + dtl = pp->dispatch_log; + if (dtl) { + pp->dtl_ridx = 0; + pp->dtl_curr = dtl; + lppaca_of(cpu).dtl_idx = 0; + + /* hypervisor reads buffer length from this field */ + dtl->enqueue_to_dispatch_time = DISPATCH_LOG_BYTES; + ret = register_dtl(hwcpu, __pa(dtl)); + if (ret) + pr_warn("DTL registration failed for cpu %d (%ld)\n", + cpu, ret); + lppaca_of(cpu).dtl_enable_mask = 2; + } } static long pSeries_lpar_hpte_insert(unsigned long hpte_group, diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index a6d19e3a505..d345bfd56bb 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -273,6 +273,58 @@ static struct notifier_block pci_dn_reconfig_nb = { .notifier_call = pci_dn_reconfig_notifier, }; +#ifdef CONFIG_VIRT_CPU_ACCOUNTING +/* + * Allocate space for the dispatch trace log for all possible cpus + * and register the buffers with the hypervisor. This is used for + * computing time stolen by the hypervisor. + */ +static int alloc_dispatch_logs(void) +{ + int cpu, ret; + struct paca_struct *pp; + struct dtl_entry *dtl; + + if (!firmware_has_feature(FW_FEATURE_SPLPAR)) + return 0; + + for_each_possible_cpu(cpu) { + pp = &paca[cpu]; + dtl = kmalloc_node(DISPATCH_LOG_BYTES, GFP_KERNEL, + cpu_to_node(cpu)); + if (!dtl) { + pr_warn("Failed to allocate dispatch trace log for cpu %d\n", + cpu); + pr_warn("Stolen time statistics will be unreliable\n"); + break; + } + + pp->dtl_ridx = 0; + pp->dispatch_log = dtl; + pp->dispatch_log_end = dtl + N_DISPATCH_LOG; + pp->dtl_curr = dtl; + } + + /* Register the DTL for the current (boot) cpu */ + dtl = get_paca()->dispatch_log; + get_paca()->dtl_ridx = 0; + get_paca()->dtl_curr = dtl; + get_paca()->lppaca_ptr->dtl_idx = 0; + + /* hypervisor reads buffer length from this field */ + dtl->enqueue_to_dispatch_time = DISPATCH_LOG_BYTES; + ret = register_dtl(hard_smp_processor_id(), __pa(dtl)); + if (ret) + pr_warn("DTL registration failed for boot cpu %d (%d)\n", + smp_processor_id(), ret); + get_paca()->lppaca_ptr->dtl_enable_mask = 2; + + return 0; +} + +early_initcall(alloc_dispatch_logs); +#endif /* CONFIG_VIRT_CPU_ACCOUNTING */ + static void __init pSeries_setup_arch(void) { /* Discover PIC type and setup ppc_md accordingly */ |