aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Neuling <mikey@neuling.org>2010-05-10 20:28:26 +0000
committerWilly Tarreau <w@1wt.eu>2011-04-30 16:53:13 +0200
commit3ab0ecbb3ff75f2730e6b333472f25a318eca7ea (patch)
treeb613a6713a656ab0a0508051c3cfa0cbf12660ca
parent6c7f1a4b53d648f818490155d4ee13e85a9ccd31 (diff)
powerpc/kexec: Speedup kexec hash PTE tear down
commit d504bed676caad29a3dba3d3727298c560628f5c upstream. Currently for kexec the PTE tear down on 1TB segment systems normally requires 3 hcalls for each PTE removal. On a machine with 32GB of memory it can take around a minute to remove all the PTEs. This optimises the path so that we only remove PTEs that are valid. It also uses the read 4 PTEs at once HCALL. For the common case where a PTEs is invalid in a 1TB segment, this turns the 3 HCALLs per PTE down to 1 HCALL per 4 PTEs. This gives an > 10x speedup in kexec times on PHYP, taking a 32GB machine from around 1 minute down to a few seconds. Signed-off-by: Michael Neuling <mikey@neuling.org> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> Signed-off-by: Kamalesh babulal <kamalesh@linux.vnet.ibm.com> cc: Anton Blanchard <anton@samba.org> Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
-rw-r--r--arch/powerpc/platforms/pseries/lpar.c35
1 files changed, 21 insertions, 14 deletions
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 52a80e5840e..2f1ff34460a 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -366,21 +366,28 @@ static void pSeries_lpar_hptab_clear(void)
{
unsigned long size_bytes = 1UL << ppc64_pft_size;
unsigned long hpte_count = size_bytes >> 4;
- unsigned long dummy1, dummy2, dword0;
+ struct {
+ unsigned long pteh;
+ unsigned long ptel;
+ } ptes[4];
long lpar_rc;
- int i;
-
- /* TODO: Use bulk call */
- for (i = 0; i < hpte_count; i++) {
- /* dont remove HPTEs with VRMA mappings */
- lpar_rc = plpar_pte_remove_raw(H_ANDCOND, i, HPTE_V_1TB_SEG,
- &dummy1, &dummy2);
- if (lpar_rc == H_NOT_FOUND) {
- lpar_rc = plpar_pte_read_raw(0, i, &dword0, &dummy1);
- if (!lpar_rc && ((dword0 & HPTE_V_VRMA_MASK)
- != HPTE_V_VRMA_MASK))
- /* Can be hpte for 1TB Seg. So remove it */
- plpar_pte_remove_raw(0, i, 0, &dummy1, &dummy2);
+ int i, j;
+
+ /* Read in batches of 4,
+ * invalidate only valid entries not in the VRMA
+ * hpte_count will be a multiple of 4
+ */
+ for (i = 0; i < hpte_count; i += 4) {
+ lpar_rc = plpar_pte_read_4_raw(0, i, (void *)ptes);
+ if (lpar_rc != H_SUCCESS)
+ continue;
+ for (j = 0; j < 4; j++){
+ if ((ptes[j].pteh & HPTE_V_VRMA_MASK) ==
+ HPTE_V_VRMA_MASK)
+ continue;
+ if (ptes[j].pteh & HPTE_V_VALID)
+ plpar_pte_remove_raw(0, i + j, 0,
+ &(ptes[j].pteh), &(ptes[j].ptel));
}
}
}