On AVX, we can load v8i32 at a time. The bug happens when two uneven loads are used.

When we load the v12i32 type, the GenWidenVectorLoads method generates two loads: v8i32 and v4i32 and attempts to use CONCAT_VECTORS to join them. In this fix I concat undef values to widen the smaller value. The test "widen_load-2.ll" also exposes this bug on AVX. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@147964 91177308-0d34-0410-b5e6-96231b3b80d8
author: Nadav Rotem <nadav.rotem@intel.com> 2012-01-11 20:19:17 +0000
committer: Nadav Rotem <nadav.rotem@intel.com> 2012-01-11 20:19:17 +0000
commit: c8d12eee12bbd0dca3def72d52e410eaf4e61b2d (patch)
tree: 22e697b4cbced6d5b2e38b075b687e3358758fae
parent: 3bf052b76c9fe2842edf69c3f6577b7db58037f8 (diff)
2 files changed, 35 insertions, 5 deletions
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 6aecca994a..41b4221ef9 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -2329,19 +2329,37 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16> &LdChain,
     BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
                           DAG.getIntPtrConstant(Increment));
 
+    SDValue L;
     if (LdWidth < NewVTWidth) {
       // Our current type we are using is too large, find a better size
       NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff);
       NewVTWidth = NewVT.getSizeInBits();
-    }
-
-    SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr,
+      L = DAG.getLoad(NewVT, dl, Chain, BasePtr,
                                LD->getPointerInfo().getWithOffset(Offset),
                                isVolatile,
                                isNonTemporal, isInvariant,
                                MinAlign(Align, Increment));
-    LdChain.push_back(LdOp.getValue(1));
-    LdOps.push_back(LdOp);
+      LdChain.push_back(L.getValue(1));
+      if (L->getValueType(0).isVector()) {
+        SmallVector<SDValue, 16> Loads;
+        Loads.push_back(L);
+        unsigned size = L->getValueSizeInBits(0);
+        while (size < LdOp->getValueSizeInBits(0)) {
+          Loads.push_back(DAG.getUNDEF(L->getValueType(0)));
+          size += L->getValueSizeInBits(0);
+        }
+        L = DAG.getNode(ISD::CONCAT_VECTORS, dl, LdOp->getValueType(0),
+                        &Loads[0], Loads.size());
+      }
+    } else {
+      L = DAG.getLoad(NewVT, dl, Chain, BasePtr,
+                      LD->getPointerInfo().getWithOffset(Offset), isVolatile,
+                      isNonTemporal, isInvariant, MinAlign(Align, Increment));
+      LdChain.push_back(L.getValue(1));
+    }
+
+    LdOps.push_back(L);
+
 
     LdWidth -= NewVTWidth;
   }
diff --git a/test/CodeGen/X86/2012-01-11-split-cv.ll b/test/CodeGen/X86/2012-01-11-split-cv.ll
new file mode 100644
index 0000000000..6b90072919
--- /dev/null
+++ b/test/CodeGen/X86/2012-01-11-split-cv.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86 -mcpu=corei7-avx -mattr=+avx -mtriple=i686-pc-win32 | FileCheck %s
+
+;CHECK: add18i16
+define void @add18i16(<18 x i16>* nocapture sret %ret, <18 x i16>* %bp) nounwind {
+;CHECK: vmovups
+  %b = load <18 x i16>* %bp, align 16
+  %x = add <18 x i16> zeroinitializer, %b
+  store <18 x i16> %x, <18 x i16>* %ret, align 16
+;CHECK: ret
+  ret void
+}
+
author	Nadav Rotem <nadav.rotem@intel.com>	2012-01-11 20:19:17 +0000
committer	Nadav Rotem <nadav.rotem@intel.com>	2012-01-11 20:19:17 +0000
commit	c8d12eee12bbd0dca3def72d52e410eaf4e61b2d (patch)
tree	22e697b4cbced6d5b2e38b075b687e3358758fae
parent	3bf052b76c9fe2842edf69c3f6577b7db58037f8 (diff)