aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/Target/PowerPC/PPCISelDAGToDAG.cpp155
-rw-r--r--test/CodeGen/PowerPC/mcm-10.ll25
-rw-r--r--test/CodeGen/PowerPC/mcm-11.ll27
-rw-r--r--test/CodeGen/PowerPC/mcm-12.ll18
-rw-r--r--test/CodeGen/PowerPC/mcm-obj-2.ll77
-rw-r--r--test/CodeGen/PowerPC/tls-2.ll15
-rw-r--r--test/CodeGen/PowerPC/tls-ld-2.ll24
-rw-r--r--test/CodeGen/PowerPC/tls.ll2
8 files changed, 342 insertions, 1 deletions
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 14535065d5..8663dd41e5 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -67,6 +67,8 @@ namespace {
return true;
}
+ virtual void PostprocessISelDAG();
+
/// getI32Imm - Return a target constant with the specified value, of type
/// i32.
inline SDValue getI32Imm(unsigned Imm) {
@@ -1398,6 +1400,159 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
return SelectCode(N);
}
+/// PostProcessISelDAG - Perform some late peephole optimizations
+/// on the DAG representation.
+void PPCDAGToDAGISel::PostprocessISelDAG() {
+
+ // Skip peepholes at -O0.
+ if (TM.getOptLevel() == CodeGenOpt::None)
+ return;
+
+ // These optimizations are currently supported only for 64-bit SVR4.
+ if (PPCSubTarget.isDarwin() || !PPCSubTarget.isPPC64())
+ return;
+
+ SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode());
+ ++Position;
+
+ while (Position != CurDAG->allnodes_begin()) {
+ SDNode *N = --Position;
+ // Skip dead nodes and any non-machine opcodes.
+ if (N->use_empty() || !N->isMachineOpcode())
+ continue;
+
+ unsigned FirstOp;
+ unsigned StorageOpcode = N->getMachineOpcode();
+
+ switch (StorageOpcode) {
+ default: continue;
+
+ case PPC::LBZ:
+ case PPC::LBZ8:
+ case PPC::LD:
+ case PPC::LFD:
+ case PPC::LFS:
+ case PPC::LHA:
+ case PPC::LHA8:
+ case PPC::LHZ:
+ case PPC::LHZ8:
+ case PPC::LWA:
+ case PPC::LWZ:
+ case PPC::LWZ8:
+ FirstOp = 0;
+ break;
+
+ case PPC::STB:
+ case PPC::STB8:
+ case PPC::STD:
+ case PPC::STFD:
+ case PPC::STFS:
+ case PPC::STH:
+ case PPC::STH8:
+ case PPC::STW:
+ case PPC::STW8:
+ FirstOp = 1;
+ break;
+ }
+
+ // If this is a load or store with a zero offset, we may be able to
+ // fold an add-immediate into the memory operation.
+ if (!isa<ConstantSDNode>(N->getOperand(FirstOp)) ||
+ N->getConstantOperandVal(FirstOp) != 0)
+ continue;
+
+ SDValue Base = N->getOperand(FirstOp + 1);
+ if (!Base.isMachineOpcode())
+ continue;
+
+ unsigned Flags = 0;
+ bool ReplaceFlags = true;
+
+ // When the feeding operation is an add-immediate of some sort,
+ // determine whether we need to add relocation information to the
+ // target flags on the immediate operand when we fold it into the
+ // load instruction.
+ //
+ // For something like ADDItocL, the relocation information is
+ // inferred from the opcode; when we process it in the AsmPrinter,
+ // we add the necessary relocation there. A load, though, can receive
+ // relocation from various flavors of ADDIxxx, so we need to carry
+ // the relocation information in the target flags.
+ switch (Base.getMachineOpcode()) {
+ default: continue;
+
+ case PPC::ADDI8:
+ case PPC::ADDI8L:
+ case PPC::ADDIL:
+ // In some cases (such as TLS) the relocation information
+ // is already in place on the operand, so copying the operand
+ // is sufficient.
+ ReplaceFlags = false;
+ // For these cases, the immediate may not be divisible by 4, in
+ // which case the fold is illegal for DS-form instructions. (The
+ // other cases provide aligned addresses and are always safe.)
+ if ((StorageOpcode == PPC::LWA ||
+ StorageOpcode == PPC::LD ||
+ StorageOpcode == PPC::STD) &&
+ (!isa<ConstantSDNode>(Base.getOperand(1)) ||
+ Base.getConstantOperandVal(1) % 4 != 0))
+ continue;
+ break;
+ case PPC::ADDIdtprelL:
+ Flags = PPCII::MO_DTPREL16_LO;
+ break;
+ case PPC::ADDItlsldL:
+ Flags = PPCII::MO_TLSLD16_LO;
+ break;
+ case PPC::ADDItocL:
+ Flags = PPCII::MO_TOC16_LO;
+ break;
+ }
+
+ // We found an opportunity. Reverse the operands from the add
+ // immediate and substitute them into the load or store. If
+ // needed, update the target flags for the immediate operand to
+ // reflect the necessary relocation information.
+ DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase: ");
+ DEBUG(Base->dump(CurDAG));
+ DEBUG(dbgs() << "\nN: ");
+ DEBUG(N->dump(CurDAG));
+ DEBUG(dbgs() << "\n");
+
+ SDValue ImmOpnd = Base.getOperand(1);
+
+ // If the relocation information isn't already present on the
+ // immediate operand, add it now.
+ if (ReplaceFlags) {
+ GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd);
+
+ if (GA) {
+ DebugLoc dl = GA->getDebugLoc();
+ const GlobalValue *GV = GA->getGlobal();
+ ImmOpnd = CurDAG->getTargetGlobalAddress(GV, dl, MVT::i64, 0, Flags);
+ } else {
+ ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(ImmOpnd);
+ if (CP) {
+ const Constant *C = CP->getConstVal();
+ ImmOpnd = CurDAG->getTargetConstantPool(C, MVT::i64,
+ CP->getAlignment(),
+ 0, Flags);
+ }
+ }
+ }
+
+ if (FirstOp == 1) // Store
+ (void)CurDAG->UpdateNodeOperands(N, N->getOperand(0), ImmOpnd,
+ Base.getOperand(0), N->getOperand(3));
+ else // Load
+ (void)CurDAG->UpdateNodeOperands(N, ImmOpnd, Base.getOperand(0),
+ N->getOperand(2));
+
+ // The add-immediate may now be dead, in which case remove it.
+ if (Base.getNode()->use_empty())
+ CurDAG->RemoveDeadNode(Base.getNode());
+ }
+}
/// createPPCISelDag - This pass converts a legalized DAG into a
diff --git a/test/CodeGen/PowerPC/mcm-10.ll b/test/CodeGen/PowerPC/mcm-10.ll
new file mode 100644
index 0000000000..4bec3e16fa
--- /dev/null
+++ b/test/CodeGen/PowerPC/mcm-10.ll
@@ -0,0 +1,25 @@
+; RUN: llc -mcpu=pwr7 -O1 -code-model=medium <%s | FileCheck %s
+
+; Test peephole optimization for medium code model (32-bit TOC offsets)
+; for loading and storing a static variable scoped to a function.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@test_fn_static.si = internal global i32 0, align 4
+
+define signext i32 @test_fn_static() nounwind {
+entry:
+ %0 = load i32* @test_fn_static.si, align 4
+ %inc = add nsw i32 %0, 1
+ store i32 %inc, i32* @test_fn_static.si, align 4
+ ret i32 %0
+}
+
+; CHECK: test_fn_static:
+; CHECK: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha
+; CHECK: lwz {{[0-9]+}}, [[VAR]]@toc@l([[REG1]])
+; CHECK: stw {{[0-9]+}}, [[VAR]]@toc@l([[REG1]])
+; CHECK: .type [[VAR]],@object
+; CHECK: .local [[VAR]]
+; CHECK: .comm [[VAR]],4,4
diff --git a/test/CodeGen/PowerPC/mcm-11.ll b/test/CodeGen/PowerPC/mcm-11.ll
new file mode 100644
index 0000000000..f2bc4c9cb7
--- /dev/null
+++ b/test/CodeGen/PowerPC/mcm-11.ll
@@ -0,0 +1,27 @@
+; RUN: llc -mcpu=pwr7 -O1 -code-model=medium <%s | FileCheck %s
+
+; Test peephole optimization for medium code model (32-bit TOC offsets)
+; for loading and storing a file-scope static variable.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@gi = global i32 5, align 4
+
+define signext i32 @test_file_static() nounwind {
+entry:
+ %0 = load i32* @gi, align 4
+ %inc = add nsw i32 %0, 1
+ store i32 %inc, i32* @gi, align 4
+ ret i32 %0
+}
+
+; CHECK: test_file_static:
+; CHECK: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha
+; CHECK: lwz {{[0-9]+}}, [[VAR]]@toc@l([[REG1]])
+; CHECK: stw {{[0-9]+}}, [[VAR]]@toc@l([[REG1]])
+; CHECK: .type [[VAR]],@object
+; CHECK: .data
+; CHECK: .globl [[VAR]]
+; CHECK: [[VAR]]:
+; CHECK: .long 5
diff --git a/test/CodeGen/PowerPC/mcm-12.ll b/test/CodeGen/PowerPC/mcm-12.ll
new file mode 100644
index 0000000000..911305d435
--- /dev/null
+++ b/test/CodeGen/PowerPC/mcm-12.ll
@@ -0,0 +1,18 @@
+; RUN: llc -mcpu=pwr7 -O1 -code-model=medium <%s | FileCheck %s
+
+; Test peephole optimization for medium code model (32-bit TOC offsets)
+; for loading a value from the constant pool (TOC-relative).
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define double @test_double_const() nounwind {
+entry:
+ ret double 0x3F4FD4920B498CF0
+}
+
+; CHECK: [[VAR:[a-z0-9A-Z_.]+]]:
+; CHECK: .quad 4562098671269285104
+; CHECK: test_double_const:
+; CHECK: addis [[REG1:[0-9]+]], 2, [[VAR]]@toc@ha
+; CHECK: lfd {{[0-9]+}}, [[VAR]]@toc@l([[REG1]])
diff --git a/test/CodeGen/PowerPC/mcm-obj-2.ll b/test/CodeGen/PowerPC/mcm-obj-2.ll
new file mode 100644
index 0000000000..2dd1718ba7
--- /dev/null
+++ b/test/CodeGen/PowerPC/mcm-obj-2.ll
@@ -0,0 +1,77 @@
+; RUN: llc -O1 -mcpu=pwr7 -code-model=medium -filetype=obj %s -o - | \
+; RUN: elf-dump --dump-section-data | FileCheck %s
+
+; FIXME: When asm-parse is available, could make this an assembly test.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@test_fn_static.si = internal global i32 0, align 4
+
+define signext i32 @test_fn_static() nounwind {
+entry:
+ %0 = load i32* @test_fn_static.si, align 4
+ %inc = add nsw i32 %0, 1
+ store i32 %inc, i32* @test_fn_static.si, align 4
+ ret i32 %0
+}
+
+; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO for
+; accessing function-scoped variable si.
+;
+; CHECK: Relocation 0
+; CHECK-NEXT: 'r_offset'
+; CHECK-NEXT: 'r_sym', 0x[[SYM2:[0-9]+]]
+; CHECK-NEXT: 'r_type', 0x00000032
+; CHECK: Relocation 1
+; CHECK-NEXT: 'r_offset'
+; CHECK-NEXT: 'r_sym', 0x[[SYM2]]
+; CHECK-NEXT: 'r_type', 0x00000030
+; CHECK: Relocation 2
+; CHECK-NEXT: 'r_offset'
+; CHECK-NEXT: 'r_sym', 0x[[SYM2]]
+; CHECK-NEXT: 'r_type', 0x00000030
+
+@gi = global i32 5, align 4
+
+define signext i32 @test_file_static() nounwind {
+entry:
+ %0 = load i32* @gi, align 4
+ %inc = add nsw i32 %0, 1
+ store i32 %inc, i32* @gi, align 4
+ ret i32 %0
+}
+
+; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO for
+; accessing file-scope variable gi.
+;
+; CHECK: Relocation 3
+; CHECK-NEXT: 'r_offset'
+; CHECK-NEXT: 'r_sym', 0x[[SYM3:[0-9]+]]
+; CHECK-NEXT: 'r_type', 0x00000032
+; CHECK: Relocation 4
+; CHECK-NEXT: 'r_offset'
+; CHECK-NEXT: 'r_sym', 0x[[SYM3]]
+; CHECK-NEXT: 'r_type', 0x00000030
+; CHECK: Relocation 5
+; CHECK-NEXT: 'r_offset'
+; CHECK-NEXT: 'r_sym', 0x[[SYM3]]
+; CHECK-NEXT: 'r_type', 0x00000030
+
+define double @test_double_const() nounwind {
+entry:
+ ret double 0x3F4FD4920B498CF0
+}
+
+; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO for
+; accessing a constant.
+;
+; CHECK: Relocation 6
+; CHECK-NEXT: 'r_offset'
+; CHECK-NEXT: 'r_sym', 0x[[SYM4:[0-9]+]]
+; CHECK-NEXT: 'r_type', 0x00000032
+; CHECK: Relocation 7
+; CHECK-NEXT: 'r_offset'
+; CHECK-NEXT: 'r_sym', 0x[[SYM4]]
+; CHECK-NEXT: 'r_type', 0x00000030
+
diff --git a/test/CodeGen/PowerPC/tls-2.ll b/test/CodeGen/PowerPC/tls-2.ll
new file mode 100644
index 0000000000..20d8fe46ea
--- /dev/null
+++ b/test/CodeGen/PowerPC/tls-2.ll
@@ -0,0 +1,15 @@
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-freebsd10.0"
+; RUN: llc -O1 < %s -march=ppc64 | FileCheck %s
+
+@a = thread_local global i32 0, align 4
+
+;CHECK: localexec:
+define i32 @localexec() nounwind {
+entry:
+;CHECK: addis [[REG1:[0-9]+]], 13, a@tprel@ha
+;CHECK-NEXT: li [[REG2:[0-9]+]], 42
+;CHECK-NEXT: stw [[REG2]], a@tprel@l([[REG1]])
+ store i32 42, i32* @a, align 4
+ ret i32 0
+}
diff --git a/test/CodeGen/PowerPC/tls-ld-2.ll b/test/CodeGen/PowerPC/tls-ld-2.ll
new file mode 100644
index 0000000000..4954afeb24
--- /dev/null
+++ b/test/CodeGen/PowerPC/tls-ld-2.ll
@@ -0,0 +1,24 @@
+; RUN: llc -mcpu=pwr7 -O1 -relocation-model=pic < %s | FileCheck %s
+
+; Test peephole optimization for thread-local storage using the
+; local dynamic model.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@a = hidden thread_local global i32 0, align 4
+
+define signext i32 @main() nounwind {
+entry:
+ %retval = alloca i32, align 4
+ store i32 0, i32* %retval
+ %0 = load i32* @a, align 4
+ ret i32 %0
+}
+
+; CHECK: addis [[REG:[0-9]+]], 2, a@got@tlsld@ha
+; CHECK-NEXT: addi 3, [[REG]], a@got@tlsld@l
+; CHECK-NEXT: bl __tls_get_addr(a@tlsld)
+; CHECK-NEXT: nop
+; CHECK-NEXT: addis [[REG2:[0-9]+]], 3, a@dtprel@ha
+; CHECK-NEXT: lwa {{[0-9]+}}, a@dtprel@l([[REG2]])
diff --git a/test/CodeGen/PowerPC/tls.ll b/test/CodeGen/PowerPC/tls.ll
index 713893bf58..9021f03b9d 100644
--- a/test/CodeGen/PowerPC/tls.ll
+++ b/test/CodeGen/PowerPC/tls.ll
@@ -1,6 +1,6 @@
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
target triple = "powerpc64-unknown-freebsd10.0"
-; RUN: llc < %s -march=ppc64 | FileCheck %s
+; RUN: llc -O0 < %s -march=ppc64 | FileCheck %s
@a = thread_local global i32 0, align 4