aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNadav Rotem <nadav.rotem@intel.com>2011-02-20 12:37:50 +0000
committerNadav Rotem <nadav.rotem@intel.com>2011-02-20 12:37:50 +0000
commited9b934f65d82324506f03e2db2834682c7a8914 (patch)
treef9eb5310a278d45abf817090ac930efe6f9c42f2
parent1a4021a2be4a59e9f9010776cb6f72107241aeb5 (diff)
Fix 9267; Add vector zext support.
The DAGCombiner folds the zext into complex load instructions. This patch prevents this optimization on vectors since none of the supported targets knows how to perform load+vector_zext in one instruction. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@126080 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--docs/LangRef.html10
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp4
-rw-r--r--test/CodeGen/X86/vec_zext.ll69
3 files changed, 78 insertions, 5 deletions
diff --git a/docs/LangRef.html b/docs/LangRef.html
index fa6a078465..81fa8cc6e8 100644
--- a/docs/LangRef.html
+++ b/docs/LangRef.html
@@ -4614,10 +4614,11 @@ entry:
<h5>Arguments:</h5>
-<p>The '<tt>zext</tt>' instruction takes a value to cast, which must be of
- <a href="#t_integer">integer</a> type, and a type to cast it to, which must
- also be of <a href="#t_integer">integer</a> type. The bit size of the
- <tt>value</tt> must be smaller than the bit size of the destination type,
+<p>The '<tt>zext</tt>' instruction takes a value to cast, and a type to cast it to.
+ Both types must be of <a href="#t_integer">integer</a> types, or vectors
+ of the same number of integers.
+ The bit size of the <tt>value</tt> must be smaller than
+ the bit size of the destination type,
<tt>ty2</tt>.</p>
<h5>Semantics:</h5>
@@ -4630,6 +4631,7 @@ entry:
<pre>
%X = zext i32 257 to i64 <i>; yields i64:257</i>
%Y = zext i1 true to i32 <i>; yields i32:1</i>
+ %Z = zext &lt;2 x i16&gt; &lt;i16 8, i16 7&gt; to &lt;2 x i32&gt; <i>; yields &lt;i32 8, i32 7&gt;</i>
</pre>
</div>
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 90356021f6..c5f0324ac4 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -3887,7 +3887,9 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
}
// fold (zext (load x)) -> (zext (truncate (zextload x)))
- if (ISD::isNON_EXTLoad(N0.getNode()) &&
+ // None of the supported targets knows how to perform load and vector_zext
+ // in one instruction. We only perform this transformation on scalar zext.
+ if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
TLI.isLoadExtLegal(ISD::ZEXTLOAD, N0.getValueType()))) {
bool DoXform = true;
diff --git a/test/CodeGen/X86/vec_zext.ll b/test/CodeGen/X86/vec_zext.ll
new file mode 100644
index 0000000000..615a50b7af
--- /dev/null
+++ b/test/CodeGen/X86/vec_zext.ll
@@ -0,0 +1,69 @@
+; RUN: llc < %s -march=x86-64
+; PR 9267
+
+define<4 x i32> @func_16_32() {
+ %F = load <4 x i16>* undef
+ %G = zext <4 x i16> %F to <4 x i32>
+ %H = load <4 x i16>* undef
+ %Y = zext <4 x i16> %H to <4 x i32>
+ %T = add <4 x i32> %Y, %G
+ store <4 x i32>%T , <4 x i32>* undef
+ ret <4 x i32> %T
+}
+
+define<4 x i64> @func_16_64() {
+ %F = load <4 x i16>* undef
+ %G = zext <4 x i16> %F to <4 x i64>
+ %H = load <4 x i16>* undef
+ %Y = zext <4 x i16> %H to <4 x i64>
+ %T = xor <4 x i64> %Y, %G
+ store <4 x i64>%T , <4 x i64>* undef
+ ret <4 x i64> %T
+}
+
+define<4 x i64> @func_32_64() {
+ %F = load <4 x i32>* undef
+ %G = zext <4 x i32> %F to <4 x i64>
+ %H = load <4 x i32>* undef
+ %Y = zext <4 x i32> %H to <4 x i64>
+ %T = or <4 x i64> %Y, %G
+ ret <4 x i64> %T
+}
+
+define<4 x i16> @func_8_16() {
+ %F = load <4 x i8>* undef
+ %G = zext <4 x i8> %F to <4 x i16>
+ %H = load <4 x i8>* undef
+ %Y = zext <4 x i8> %H to <4 x i16>
+ %T = add <4 x i16> %Y, %G
+ ret <4 x i16> %T
+}
+
+define<4 x i32> @func_8_32() {
+ %F = load <4 x i8>* undef
+ %G = zext <4 x i8> %F to <4 x i32>
+ %H = load <4 x i8>* undef
+ %Y = zext <4 x i8> %H to <4 x i32>
+ %T = sub <4 x i32> %Y, %G
+ ret <4 x i32> %T
+}
+
+define<4 x i64> @func_8_64() {
+ %F = load <4 x i8>* undef
+ %G = zext <4 x i8> %F to <4 x i64>
+ %H = load <4 x i8>* undef
+ %Y = zext <4 x i8> %H to <4 x i64>
+ %T = add <4 x i64> %Y, %G
+ ret <4 x i64> %T
+}
+
+define<4 x i32> @const_16_32() {
+ %G = zext <4 x i16> <i16 0, i16 3, i16 8, i16 7> to <4 x i32>
+ ret <4 x i32> %G
+}
+
+define<4 x i64> @const_16_64() {
+ %G = zext <4 x i16> <i16 0, i16 3, i16 8, i16 7> to <4 x i64>
+ ret <4 x i64> %G
+}
+