diff options
Diffstat (limited to 'lib/Target')
-rw-r--r-- | lib/Target/X86/README-SSE.txt | 54 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 24 |
2 files changed, 5 insertions, 73 deletions
diff --git a/lib/Target/X86/README-SSE.txt b/lib/Target/X86/README-SSE.txt index 1a5d9045b0..34b949a601 100644 --- a/lib/Target/X86/README-SSE.txt +++ b/lib/Target/X86/README-SSE.txt @@ -428,60 +428,6 @@ entry: //===---------------------------------------------------------------------===// -Consider (PR2108): - -#include <xmmintrin.h> -__m128i doload64(unsigned long long x) { return _mm_loadl_epi64(&x);} -__m128i doload64_2(unsigned long long *x) { return _mm_loadl_epi64(x);} - -These are very similar routines, but we generate significantly worse code for -the first one on x86-32: - -_doload64: - subl $12, %esp - movl 20(%esp), %eax - movl %eax, 4(%esp) - movl 16(%esp), %eax - movl %eax, (%esp) - movsd (%esp), %xmm0 - addl $12, %esp - ret -_doload64_2: - movl 4(%esp), %eax - movsd (%eax), %xmm0 - ret - -The problem is that the argument lowering logic splits the i64 argument into -2x i32 loads early, the f64 insert doesn't match. Here's a reduced testcase: - -define fastcc double @doload64(i64 %x) nounwind { -entry: - %tmp717 = bitcast i64 %x to double ; <double> [#uses=1] - ret double %tmp717 -} - -compiles to: - -_doload64: - subl $12, %esp - movl 20(%esp), %eax - movl %eax, 4(%esp) - movl 16(%esp), %eax - movl %eax, (%esp) - movsd (%esp), %xmm0 - addl $12, %esp - ret - -instead of movsd from the stack. This is actually not too bad to implement. The -best way to do this is to implement a dag combine that turns -bitconvert(build_pair(load a, load b)) into one load of the right type. The -only trick to this is writing the predicate that determines that a/b are at the -right offset from each other. For the enterprising hacker, InferAlignment is a -helpful place to start poking if interested. - - -//===---------------------------------------------------------------------===// - __m128d test1( __m128d A, __m128d B) { return _mm_shuffle_pd(A, B, 0x3); } diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 5d50e36535..806b626456 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -6285,13 +6285,7 @@ static SDOperand PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, LD->getAlignment()); } -static SDNode *getBuildPairElt(SDNode *N, unsigned i) { - SDOperand Elt = N->getOperand(i); - if (Elt.getOpcode() != ISD::MERGE_VALUES) - return Elt.Val; - return Elt.getOperand(Elt.ResNo).Val; -} - +/// PerformBuildVectorCombine - build_vector 0,(load i64 / f64) -> movq / movsd. static SDOperand PerformBuildVectorCombine(SDNode *N, SelectionDAG &DAG, const X86Subtarget *Subtarget, const TargetLowering &TLI) { @@ -6312,25 +6306,17 @@ static SDOperand PerformBuildVectorCombine(SDNode *N, SelectionDAG &DAG, return SDOperand(); // Value must be a load. - MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); SDNode *Base = N->getOperand(0).Val; if (!isa<LoadSDNode>(Base)) { - if (Base->getOpcode() == ISD::BIT_CONVERT) - Base = Base->getOperand(0).Val; - if (Base->getOpcode() != ISD::BUILD_PAIR) + if (Base->getOpcode() != ISD::BIT_CONVERT) return SDOperand(); - SDNode *Pair = Base; - Base = getBuildPairElt(Pair, 0); - if (!ISD::isNON_EXTLoad(Base)) - return SDOperand(); - SDNode *NextLD = getBuildPairElt(Pair, 1); - if (!ISD::isNON_EXTLoad(NextLD) || - !TLI.isConsecutiveLoad(NextLD, Base, 4/*32 bits*/, 1, MFI)) + Base = Base->getOperand(0).Val; + if (!isa<LoadSDNode>(Base)) return SDOperand(); } - LoadSDNode *LD = cast<LoadSDNode>(Base); // Transform it into VZEXT_LOAD addr. + LoadSDNode *LD = cast<LoadSDNode>(Base); return DAG.getNode(X86ISD::VZEXT_LOAD, VT, LD->getChain(), LD->getBasePtr()); } |