aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChe-Liang Chiou <clchiou@gmail.com>2011-04-20 09:28:20 +0000
committerChe-Liang Chiou <clchiou@gmail.com>2011-04-20 09:28:20 +0000
commit1e93249199d25d3a68bdc51d7c0dd682e2e894be (patch)
treea4276401058516e1a2d14f7fb489ac494f5fcc39
parent2a2dbd03a4476d38919c58432d1a07718709720e (diff)
ptx: add floating-point comparison to setp
Patched by Dan Bailey git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@129847 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/PTX/PTXInstrInfo.td248
1 files changed, 234 insertions, 14 deletions
diff --git a/lib/Target/PTX/PTXInstrInfo.td b/lib/Target/PTX/PTXInstrInfo.td
index c075512e8a..25e7ae1b6e 100644
--- a/lib/Target/PTX/PTXInstrInfo.td
+++ b/lib/Target/PTX/PTXInstrInfo.td
@@ -323,9 +323,9 @@ multiclass INT3ntnc<string opcstr, SDNode opnode> {
[(set RRegu64:$d, (opnode imm:$a, RRegu64:$b))]>;
}
-multiclass PTX_SETP<RegisterClass RC, string regclsname, Operand immcls,
+multiclass PTX_SETP_I<RegisterClass RC, string regclsname, Operand immcls,
CondCode cmp, string cmpstr> {
- // TODO 1. support floating-point 2. support 5-operand format: p|q, a, b, c
+ // TODO support 5-operand format: p|q, a, b, c
def rr
: InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b),
@@ -387,6 +387,74 @@ multiclass PTX_SETP<RegisterClass RC, string regclsname, Operand immcls,
[(set Preds:$p, (xor (setcc RC:$a, imm:$b, cmp), (not Preds:$c)))]>;
}
+multiclass PTX_SETP_FP<RegisterClass RC, string regclsname,
+ CondCode ucmp, CondCode ocmp, string cmpstr> {
+ // TODO support 5-operand format: p|q, a, b, c
+
+ def rr_u
+ : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b),
+ !strconcat("setp.", cmpstr, "u.", regclsname, "\t$p, $a, $b"),
+ [(set Preds:$p, (setcc RC:$a, RC:$b, ucmp))]>;
+ def rr_o
+ : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b),
+ !strconcat("setp.", cmpstr, ".", regclsname, "\t$p, $a, $b"),
+ [(set Preds:$p, (setcc RC:$a, RC:$b, ocmp))]>;
+
+ def rr_and_r_u
+ : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+ !strconcat("setp.", cmpstr, "u.and.", regclsname, "\t$p, $a, $b, $c"),
+ [(set Preds:$p, (and (setcc RC:$a, RC:$b, ucmp), Preds:$c))]>;
+ def rr_and_r_o
+ : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+ !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, $c"),
+ [(set Preds:$p, (and (setcc RC:$a, RC:$b, ocmp), Preds:$c))]>;
+
+ def rr_or_r_u
+ : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+ !strconcat("setp.", cmpstr, "u.or.", regclsname, "\t$p, $a, $b, $c"),
+ [(set Preds:$p, (or (setcc RC:$a, RC:$b, ucmp), Preds:$c))]>;
+ def rr_or_r_o
+ : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+ !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, $c"),
+ [(set Preds:$p, (or (setcc RC:$a, RC:$b, ocmp), Preds:$c))]>;
+
+ def rr_xor_r_u
+ : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+ !strconcat("setp.", cmpstr, "u.xor.", regclsname, "\t$p, $a, $b, $c"),
+ [(set Preds:$p, (xor (setcc RC:$a, RC:$b, ucmp), Preds:$c))]>;
+ def rr_xor_r_o
+ : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+ !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, $c"),
+ [(set Preds:$p, (xor (setcc RC:$a, RC:$b, ocmp), Preds:$c))]>;
+
+ def rr_and_not_r_u
+ : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+ !strconcat("setp.", cmpstr, "u.and.", regclsname, "\t$p, $a, $b, !$c"),
+ [(set Preds:$p, (and (setcc RC:$a, RC:$b, ucmp), (not Preds:$c)))]>;
+ def rr_and_not_r_o
+ : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+ !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, !$c"),
+ [(set Preds:$p, (and (setcc RC:$a, RC:$b, ocmp), (not Preds:$c)))]>;
+
+ def rr_or_not_r_u
+ : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+ !strconcat("setp.", cmpstr, "u.or.", regclsname, "\t$p, $a, $b, !$c"),
+ [(set Preds:$p, (or (setcc RC:$a, RC:$b, ucmp), (not Preds:$c)))]>;
+ def rr_or_not_r_o
+ : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+ !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, !$c"),
+ [(set Preds:$p, (or (setcc RC:$a, RC:$b, ocmp), (not Preds:$c)))]>;
+
+ def rr_xor_not_r_u
+ : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+ !strconcat("setp.", cmpstr, "u.xor.", regclsname, "\t$p, $a, $b, !$c"),
+ [(set Preds:$p, (xor (setcc RC:$a, RC:$b, ucmp), (not Preds:$c)))]>;
+ def rr_xor_not_r_o
+ : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+ !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, !$c"),
+ [(set Preds:$p, (xor (setcc RC:$a, RC:$b, ocmp), (not Preds:$c)))]>;
+}
+
multiclass PTX_LD<string opstr, string typestr, RegisterClass RC, PatFrag pat_load> {
def rr32 : InstPTX<(outs RC:$d),
(ins MEMri32:$a),
@@ -557,18 +625,50 @@ def FCOS64 : InstPTX<(outs RRegf64:$d),
///===- Comparison and Selection Instructions -----------------------------===//
-defm SETPEQu32 : PTX_SETP<RRegu32, "u32", i32imm, SETEQ, "eq">;
-defm SETPNEu32 : PTX_SETP<RRegu32, "u32", i32imm, SETNE, "ne">;
-defm SETPLTu32 : PTX_SETP<RRegu32, "u32", i32imm, SETULT, "lt">;
-defm SETPLEu32 : PTX_SETP<RRegu32, "u32", i32imm, SETULE, "le">;
-defm SETPGTu32 : PTX_SETP<RRegu32, "u32", i32imm, SETUGT, "gt">;
-defm SETPGEu32 : PTX_SETP<RRegu32, "u32", i32imm, SETUGE, "ge">;
-defm SETPEQu64 : PTX_SETP<RRegu64, "u64", i64imm, SETEQ, "eq">;
-defm SETPNEu64 : PTX_SETP<RRegu64, "u64", i64imm, SETNE, "ne">;
-defm SETPLTu64 : PTX_SETP<RRegu64, "u64", i64imm, SETULT, "lt">;
-defm SETPLEu64 : PTX_SETP<RRegu64, "u64", i64imm, SETULE, "le">;
-defm SETPGTu64 : PTX_SETP<RRegu64, "u64", i64imm, SETUGT, "gt">;
-defm SETPGEu64 : PTX_SETP<RRegu64, "u64", i64imm, SETUGE, "ge">;
+// Compare u16
+
+defm SETPEQu16 : PTX_SETP_I<RRegu16, "u16", i16imm, SETEQ, "eq">;
+defm SETPNEu16 : PTX_SETP_I<RRegu16, "u16", i16imm, SETNE, "ne">;
+defm SETPLTu16 : PTX_SETP_I<RRegu16, "u16", i16imm, SETULT, "lt">;
+defm SETPLEu16 : PTX_SETP_I<RRegu16, "u16", i16imm, SETULE, "le">;
+defm SETPGTu16 : PTX_SETP_I<RRegu16, "u16", i16imm, SETUGT, "gt">;
+defm SETPGEu16 : PTX_SETP_I<RRegu16, "u16", i16imm, SETUGE, "ge">;
+
+// Compare u32
+
+defm SETPEQu32 : PTX_SETP_I<RRegu32, "u32", i32imm, SETEQ, "eq">;
+defm SETPNEu32 : PTX_SETP_I<RRegu32, "u32", i32imm, SETNE, "ne">;
+defm SETPLTu32 : PTX_SETP_I<RRegu32, "u32", i32imm, SETULT, "lt">;
+defm SETPLEu32 : PTX_SETP_I<RRegu32, "u32", i32imm, SETULE, "le">;
+defm SETPGTu32 : PTX_SETP_I<RRegu32, "u32", i32imm, SETUGT, "gt">;
+defm SETPGEu32 : PTX_SETP_I<RRegu32, "u32", i32imm, SETUGE, "ge">;
+
+// Compare u64
+
+defm SETPEQu64 : PTX_SETP_I<RRegu64, "u64", i64imm, SETEQ, "eq">;
+defm SETPNEu64 : PTX_SETP_I<RRegu64, "u64", i64imm, SETNE, "ne">;
+defm SETPLTu64 : PTX_SETP_I<RRegu64, "u64", i64imm, SETULT, "lt">;
+defm SETPLEu64 : PTX_SETP_I<RRegu64, "u64", i64imm, SETULE, "le">;
+defm SETPGTu64 : PTX_SETP_I<RRegu64, "u64", i64imm, SETUGT, "gt">;
+defm SETPGEu64 : PTX_SETP_I<RRegu64, "u64", i64imm, SETUGE, "ge">;
+
+// Compare f32
+
+defm SETPEQf32 : PTX_SETP_FP<RRegf32, "f32", SETUEQ, SETOEQ, "eq">;
+defm SETPNEf32 : PTX_SETP_FP<RRegf32, "f32", SETUNE, SETONE, "ne">;
+defm SETPLTf32 : PTX_SETP_FP<RRegf32, "f32", SETULT, SETOLT, "lt">;
+defm SETPLEf32 : PTX_SETP_FP<RRegf32, "f32", SETULE, SETOLE, "le">;
+defm SETPGTf32 : PTX_SETP_FP<RRegf32, "f32", SETUGT, SETOGT, "gt">;
+defm SETPGEf32 : PTX_SETP_FP<RRegf32, "f32", SETUGE, SETOGE, "ge">;
+
+// Compare f64
+
+defm SETPEQf64 : PTX_SETP_FP<RRegf64, "f64", SETUEQ, SETOEQ, "eq">;
+defm SETPNEf64 : PTX_SETP_FP<RRegf64, "f64", SETUNE, SETONE, "ne">;
+defm SETPLTf64 : PTX_SETP_FP<RRegf64, "f64", SETULT, SETOLT, "lt">;
+defm SETPLEf64 : PTX_SETP_FP<RRegf64, "f64", SETULE, SETOLE, "le">;
+defm SETPGTf64 : PTX_SETP_FP<RRegf64, "f64", SETUGT, SETOGT, "gt">;
+defm SETPGEf64 : PTX_SETP_FP<RRegf64, "f64", SETUGE, SETOGE, "ge">;
///===- Logic and Shift Instructions --------------------------------------===//
@@ -654,18 +754,138 @@ defm STs : PTX_ST_ALL<"st.shared", store_shared>;
// defm LDp : PTX_LD_ALL<"ld.param", load_parameter>;
// TODO: Do something with st.param if/when it is needed.
+// Conversion to pred
+
+def CVT_pred_u16
+ : InstPTX<(outs Preds:$d), (ins RRegu16:$a), "cvt.pred.u16\t$d, $a",
+ [(set Preds:$d, (trunc RRegu16:$a))]>;
+
def CVT_pred_u32
: InstPTX<(outs Preds:$d), (ins RRegu32:$a), "cvt.pred.u32\t$d, $a",
[(set Preds:$d, (trunc RRegu32:$a))]>;
+def CVT_pred_u64
+ : InstPTX<(outs Preds:$d), (ins RRegu64:$a), "cvt.pred.u64\t$d, $a",
+ [(set Preds:$d, (trunc RRegu64:$a))]>;
+
+def CVT_pred_f32
+ : InstPTX<(outs Preds:$d), (ins RRegf32:$a), "cvt.pred.f32\t$d, $a",
+ [(set Preds:$d, (fp_to_uint RRegf32:$a))]>;
+
+def CVT_pred_f64
+ : InstPTX<(outs Preds:$d), (ins RRegf64:$a), "cvt.pred.f64\t$d, $a",
+ [(set Preds:$d, (fp_to_uint RRegf64:$a))]>;
+
+// Conversion to u16
+
+def CVT_u16_pred
+ : InstPTX<(outs RRegu16:$d), (ins Preds:$a), "cvt.u16.pred\t$d, $a",
+ [(set RRegu16:$d, (zext Preds:$a))]>;
+
+def CVT_u16_u32
+ : InstPTX<(outs RRegu16:$d), (ins RRegu32:$a), "cvt.u16.u32\t$d, $a",
+ [(set RRegu16:$d, (trunc RRegu32:$a))]>;
+
+def CVT_u16_u64
+ : InstPTX<(outs RRegu16:$d), (ins RRegu64:$a), "cvt.u16.u64\t$d, $a",
+ [(set RRegu16:$d, (trunc RRegu64:$a))]>;
+
+def CVT_u16_f32
+ : InstPTX<(outs RRegu16:$d), (ins RRegf32:$a), "cvt.u16.f32\t$d, $a",
+ [(set RRegu16:$d, (fp_to_uint RRegf32:$a))]>;
+
+def CVT_u16_f64
+ : InstPTX<(outs RRegu16:$d), (ins RRegf64:$a), "cvt.u16.f64\t$d, $a",
+ [(set RRegu16:$d, (fp_to_uint RRegf64:$a))]>;
+
+// Conversion to u32
+
def CVT_u32_pred
: InstPTX<(outs RRegu32:$d), (ins Preds:$a), "cvt.u32.pred\t$d, $a",
[(set RRegu32:$d, (zext Preds:$a))]>;
+def CVT_u32_u16
+ : InstPTX<(outs RRegu32:$d), (ins RRegu16:$a), "cvt.u32.u16\t$d, $a",
+ [(set RRegu32:$d, (zext RRegu16:$a))]>;
+
+def CVT_u32_u64
+ : InstPTX<(outs RRegu32:$d), (ins RRegu64:$a), "cvt.u32.u64\t$d, $a",
+ [(set RRegu32:$d, (trunc RRegu64:$a))]>;
+
+def CVT_u32_f32
+ : InstPTX<(outs RRegu32:$d), (ins RRegf32:$a), "cvt.u32.f32\t$d, $a",
+ [(set RRegu32:$d, (fp_to_uint RRegf32:$a))]>;
+
+def CVT_u32_f64
+ : InstPTX<(outs RRegu32:$d), (ins RRegf64:$a), "cvt.u32.f64\t$d, $a",
+ [(set RRegu32:$d, (fp_to_uint RRegf64:$a))]>;
+
+// Conversion to u64
+
+def CVT_u64_pred
+ : InstPTX<(outs RRegu64:$d), (ins Preds:$a), "cvt.u64.pred\t$d, $a",
+ [(set RRegu64:$d, (zext Preds:$a))]>;
+
+def CVT_u64_u16
+ : InstPTX<(outs RRegu64:$d), (ins RRegu16:$a), "cvt.u64.u16\t$d, $a",
+ [(set RRegu64:$d, (zext RRegu16:$a))]>;
+
def CVT_u64_u32
: InstPTX<(outs RRegu64:$d), (ins RRegu32:$a), "cvt.u64.u32\t$d, $a",
[(set RRegu64:$d, (zext RRegu32:$a))]>;
+def CVT_u64_f32
+ : InstPTX<(outs RRegu64:$d), (ins RRegf32:$a), "cvt.u64.f32\t$d, $a",
+ [(set RRegu64:$d, (fp_to_uint RRegf32:$a))]>;
+
+def CVT_u64_f64
+ : InstPTX<(outs RRegu64:$d), (ins RRegf64:$a), "cvt.u64.f32\t$d, $a",
+ [(set RRegu64:$d, (fp_to_uint RRegf64:$a))]>;
+
+// Conversion to f32
+
+def CVT_f32_pred
+ : InstPTX<(outs RRegf32:$d), (ins Preds:$a), "cvt.f32.pred\t$d, $a",
+ [(set RRegf32:$d, (uint_to_fp Preds:$a))]>;
+
+def CVT_f32_u16
+ : InstPTX<(outs RRegf32:$d), (ins RRegu16:$a), "cvt.f32.u16\t$d, $a",
+ [(set RRegf32:$d, (uint_to_fp RRegu16:$a))]>;
+
+def CVT_f32_u32
+ : InstPTX<(outs RRegf32:$d), (ins RRegu32:$a), "cvt.f32.u32\t$d, $a",
+ [(set RRegf32:$d, (uint_to_fp RRegu32:$a))]>;
+
+def CVT_f32_u64
+ : InstPTX<(outs RRegf32:$d), (ins RRegu64:$a), "cvt.f32.u64\t$d, $a",
+ [(set RRegf32:$d, (uint_to_fp RRegu64:$a))]>;
+
+def CVT_f32_f64
+ : InstPTX<(outs RRegf32:$d), (ins RRegf64:$a), "cvt.f32.f64\t$d, $a",
+ [(set RRegf32:$d, (fround RRegf64:$a))]>;
+
+// Conversion to f64
+
+def CVT_f64_pred
+ : InstPTX<(outs RRegf64:$d), (ins Preds:$a), "cvt.f64.pred\t$d, $a",
+ [(set RRegf64:$d, (uint_to_fp Preds:$a))]>;
+
+def CVT_f64_u16
+ : InstPTX<(outs RRegf64:$d), (ins RRegu16:$a), "cvt.f64.u16\t$d, $a",
+ [(set RRegf64:$d, (uint_to_fp RRegu16:$a))]>;
+
+def CVT_f64_u32
+ : InstPTX<(outs RRegf64:$d), (ins RRegu32:$a), "cvt.f64.u32\t$d, $a",
+ [(set RRegf64:$d, (uint_to_fp RRegu32:$a))]>;
+
+def CVT_f64_u64
+ : InstPTX<(outs RRegf64:$d), (ins RRegu64:$a), "cvt.f64.u64\t$d, $a",
+ [(set RRegf64:$d, (uint_to_fp RRegu64:$a))]>;
+
+def CVT_f64_f32
+ : InstPTX<(outs RRegf64:$d), (ins RRegf32:$a), "cvt.f64.f32\t$d, $a",
+ [(set RRegf64:$d, (fextend RRegf32:$a))]>;
+
///===- Control Flow Instructions -----------------------------------------===//
let isBranch = 1, isTerminator = 1, isBarrier = 1 in {