diff options
author | Che-Liang Chiou <clchiou@gmail.com> | 2011-04-20 09:28:20 +0000 |
---|---|---|
committer | Che-Liang Chiou <clchiou@gmail.com> | 2011-04-20 09:28:20 +0000 |
commit | 1e93249199d25d3a68bdc51d7c0dd682e2e894be (patch) | |
tree | a4276401058516e1a2d14f7fb489ac494f5fcc39 | |
parent | 2a2dbd03a4476d38919c58432d1a07718709720e (diff) |
ptx: add floating-point comparison to setp
Patched by Dan Bailey
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@129847 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Target/PTX/PTXInstrInfo.td | 248 |
1 files changed, 234 insertions, 14 deletions
diff --git a/lib/Target/PTX/PTXInstrInfo.td b/lib/Target/PTX/PTXInstrInfo.td index c075512e8a..25e7ae1b6e 100644 --- a/lib/Target/PTX/PTXInstrInfo.td +++ b/lib/Target/PTX/PTXInstrInfo.td @@ -323,9 +323,9 @@ multiclass INT3ntnc<string opcstr, SDNode opnode> { [(set RRegu64:$d, (opnode imm:$a, RRegu64:$b))]>; } -multiclass PTX_SETP<RegisterClass RC, string regclsname, Operand immcls, +multiclass PTX_SETP_I<RegisterClass RC, string regclsname, Operand immcls, CondCode cmp, string cmpstr> { - // TODO 1. support floating-point 2. support 5-operand format: p|q, a, b, c + // TODO support 5-operand format: p|q, a, b, c def rr : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b), @@ -387,6 +387,74 @@ multiclass PTX_SETP<RegisterClass RC, string regclsname, Operand immcls, [(set Preds:$p, (xor (setcc RC:$a, imm:$b, cmp), (not Preds:$c)))]>; } +multiclass PTX_SETP_FP<RegisterClass RC, string regclsname, + CondCode ucmp, CondCode ocmp, string cmpstr> { + // TODO support 5-operand format: p|q, a, b, c + + def rr_u + : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b), + !strconcat("setp.", cmpstr, "u.", regclsname, "\t$p, $a, $b"), + [(set Preds:$p, (setcc RC:$a, RC:$b, ucmp))]>; + def rr_o + : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b), + !strconcat("setp.", cmpstr, ".", regclsname, "\t$p, $a, $b"), + [(set Preds:$p, (setcc RC:$a, RC:$b, ocmp))]>; + + def rr_and_r_u + : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + !strconcat("setp.", cmpstr, "u.and.", regclsname, "\t$p, $a, $b, $c"), + [(set Preds:$p, (and (setcc RC:$a, RC:$b, ucmp), Preds:$c))]>; + def rr_and_r_o + : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, $c"), + [(set Preds:$p, (and (setcc RC:$a, RC:$b, ocmp), Preds:$c))]>; + + def rr_or_r_u + : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + !strconcat("setp.", cmpstr, "u.or.", regclsname, "\t$p, $a, $b, $c"), + [(set Preds:$p, (or (setcc RC:$a, RC:$b, ucmp), Preds:$c))]>; + def rr_or_r_o + : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, $c"), + [(set Preds:$p, (or (setcc RC:$a, RC:$b, ocmp), Preds:$c))]>; + + def rr_xor_r_u + : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + !strconcat("setp.", cmpstr, "u.xor.", regclsname, "\t$p, $a, $b, $c"), + [(set Preds:$p, (xor (setcc RC:$a, RC:$b, ucmp), Preds:$c))]>; + def rr_xor_r_o + : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, $c"), + [(set Preds:$p, (xor (setcc RC:$a, RC:$b, ocmp), Preds:$c))]>; + + def rr_and_not_r_u + : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + !strconcat("setp.", cmpstr, "u.and.", regclsname, "\t$p, $a, $b, !$c"), + [(set Preds:$p, (and (setcc RC:$a, RC:$b, ucmp), (not Preds:$c)))]>; + def rr_and_not_r_o + : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, !$c"), + [(set Preds:$p, (and (setcc RC:$a, RC:$b, ocmp), (not Preds:$c)))]>; + + def rr_or_not_r_u + : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + !strconcat("setp.", cmpstr, "u.or.", regclsname, "\t$p, $a, $b, !$c"), + [(set Preds:$p, (or (setcc RC:$a, RC:$b, ucmp), (not Preds:$c)))]>; + def rr_or_not_r_o + : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, !$c"), + [(set Preds:$p, (or (setcc RC:$a, RC:$b, ocmp), (not Preds:$c)))]>; + + def rr_xor_not_r_u + : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + !strconcat("setp.", cmpstr, "u.xor.", regclsname, "\t$p, $a, $b, !$c"), + [(set Preds:$p, (xor (setcc RC:$a, RC:$b, ucmp), (not Preds:$c)))]>; + def rr_xor_not_r_o + : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, !$c"), + [(set Preds:$p, (xor (setcc RC:$a, RC:$b, ocmp), (not Preds:$c)))]>; +} + multiclass PTX_LD<string opstr, string typestr, RegisterClass RC, PatFrag pat_load> { def rr32 : InstPTX<(outs RC:$d), (ins MEMri32:$a), @@ -557,18 +625,50 @@ def FCOS64 : InstPTX<(outs RRegf64:$d), ///===- Comparison and Selection Instructions -----------------------------===// -defm SETPEQu32 : PTX_SETP<RRegu32, "u32", i32imm, SETEQ, "eq">; -defm SETPNEu32 : PTX_SETP<RRegu32, "u32", i32imm, SETNE, "ne">; -defm SETPLTu32 : PTX_SETP<RRegu32, "u32", i32imm, SETULT, "lt">; -defm SETPLEu32 : PTX_SETP<RRegu32, "u32", i32imm, SETULE, "le">; -defm SETPGTu32 : PTX_SETP<RRegu32, "u32", i32imm, SETUGT, "gt">; -defm SETPGEu32 : PTX_SETP<RRegu32, "u32", i32imm, SETUGE, "ge">; -defm SETPEQu64 : PTX_SETP<RRegu64, "u64", i64imm, SETEQ, "eq">; -defm SETPNEu64 : PTX_SETP<RRegu64, "u64", i64imm, SETNE, "ne">; -defm SETPLTu64 : PTX_SETP<RRegu64, "u64", i64imm, SETULT, "lt">; -defm SETPLEu64 : PTX_SETP<RRegu64, "u64", i64imm, SETULE, "le">; -defm SETPGTu64 : PTX_SETP<RRegu64, "u64", i64imm, SETUGT, "gt">; -defm SETPGEu64 : PTX_SETP<RRegu64, "u64", i64imm, SETUGE, "ge">; +// Compare u16 + +defm SETPEQu16 : PTX_SETP_I<RRegu16, "u16", i16imm, SETEQ, "eq">; +defm SETPNEu16 : PTX_SETP_I<RRegu16, "u16", i16imm, SETNE, "ne">; +defm SETPLTu16 : PTX_SETP_I<RRegu16, "u16", i16imm, SETULT, "lt">; +defm SETPLEu16 : PTX_SETP_I<RRegu16, "u16", i16imm, SETULE, "le">; +defm SETPGTu16 : PTX_SETP_I<RRegu16, "u16", i16imm, SETUGT, "gt">; +defm SETPGEu16 : PTX_SETP_I<RRegu16, "u16", i16imm, SETUGE, "ge">; + +// Compare u32 + +defm SETPEQu32 : PTX_SETP_I<RRegu32, "u32", i32imm, SETEQ, "eq">; +defm SETPNEu32 : PTX_SETP_I<RRegu32, "u32", i32imm, SETNE, "ne">; +defm SETPLTu32 : PTX_SETP_I<RRegu32, "u32", i32imm, SETULT, "lt">; +defm SETPLEu32 : PTX_SETP_I<RRegu32, "u32", i32imm, SETULE, "le">; +defm SETPGTu32 : PTX_SETP_I<RRegu32, "u32", i32imm, SETUGT, "gt">; +defm SETPGEu32 : PTX_SETP_I<RRegu32, "u32", i32imm, SETUGE, "ge">; + +// Compare u64 + +defm SETPEQu64 : PTX_SETP_I<RRegu64, "u64", i64imm, SETEQ, "eq">; +defm SETPNEu64 : PTX_SETP_I<RRegu64, "u64", i64imm, SETNE, "ne">; +defm SETPLTu64 : PTX_SETP_I<RRegu64, "u64", i64imm, SETULT, "lt">; +defm SETPLEu64 : PTX_SETP_I<RRegu64, "u64", i64imm, SETULE, "le">; +defm SETPGTu64 : PTX_SETP_I<RRegu64, "u64", i64imm, SETUGT, "gt">; +defm SETPGEu64 : PTX_SETP_I<RRegu64, "u64", i64imm, SETUGE, "ge">; + +// Compare f32 + +defm SETPEQf32 : PTX_SETP_FP<RRegf32, "f32", SETUEQ, SETOEQ, "eq">; +defm SETPNEf32 : PTX_SETP_FP<RRegf32, "f32", SETUNE, SETONE, "ne">; +defm SETPLTf32 : PTX_SETP_FP<RRegf32, "f32", SETULT, SETOLT, "lt">; +defm SETPLEf32 : PTX_SETP_FP<RRegf32, "f32", SETULE, SETOLE, "le">; +defm SETPGTf32 : PTX_SETP_FP<RRegf32, "f32", SETUGT, SETOGT, "gt">; +defm SETPGEf32 : PTX_SETP_FP<RRegf32, "f32", SETUGE, SETOGE, "ge">; + +// Compare f64 + +defm SETPEQf64 : PTX_SETP_FP<RRegf64, "f64", SETUEQ, SETOEQ, "eq">; +defm SETPNEf64 : PTX_SETP_FP<RRegf64, "f64", SETUNE, SETONE, "ne">; +defm SETPLTf64 : PTX_SETP_FP<RRegf64, "f64", SETULT, SETOLT, "lt">; +defm SETPLEf64 : PTX_SETP_FP<RRegf64, "f64", SETULE, SETOLE, "le">; +defm SETPGTf64 : PTX_SETP_FP<RRegf64, "f64", SETUGT, SETOGT, "gt">; +defm SETPGEf64 : PTX_SETP_FP<RRegf64, "f64", SETUGE, SETOGE, "ge">; ///===- Logic and Shift Instructions --------------------------------------===// @@ -654,18 +754,138 @@ defm STs : PTX_ST_ALL<"st.shared", store_shared>; // defm LDp : PTX_LD_ALL<"ld.param", load_parameter>; // TODO: Do something with st.param if/when it is needed. +// Conversion to pred + +def CVT_pred_u16 + : InstPTX<(outs Preds:$d), (ins RRegu16:$a), "cvt.pred.u16\t$d, $a", + [(set Preds:$d, (trunc RRegu16:$a))]>; + def CVT_pred_u32 : InstPTX<(outs Preds:$d), (ins RRegu32:$a), "cvt.pred.u32\t$d, $a", [(set Preds:$d, (trunc RRegu32:$a))]>; +def CVT_pred_u64 + : InstPTX<(outs Preds:$d), (ins RRegu64:$a), "cvt.pred.u64\t$d, $a", + [(set Preds:$d, (trunc RRegu64:$a))]>; + +def CVT_pred_f32 + : InstPTX<(outs Preds:$d), (ins RRegf32:$a), "cvt.pred.f32\t$d, $a", + [(set Preds:$d, (fp_to_uint RRegf32:$a))]>; + +def CVT_pred_f64 + : InstPTX<(outs Preds:$d), (ins RRegf64:$a), "cvt.pred.f64\t$d, $a", + [(set Preds:$d, (fp_to_uint RRegf64:$a))]>; + +// Conversion to u16 + +def CVT_u16_pred + : InstPTX<(outs RRegu16:$d), (ins Preds:$a), "cvt.u16.pred\t$d, $a", + [(set RRegu16:$d, (zext Preds:$a))]>; + +def CVT_u16_u32 + : InstPTX<(outs RRegu16:$d), (ins RRegu32:$a), "cvt.u16.u32\t$d, $a", + [(set RRegu16:$d, (trunc RRegu32:$a))]>; + +def CVT_u16_u64 + : InstPTX<(outs RRegu16:$d), (ins RRegu64:$a), "cvt.u16.u64\t$d, $a", + [(set RRegu16:$d, (trunc RRegu64:$a))]>; + +def CVT_u16_f32 + : InstPTX<(outs RRegu16:$d), (ins RRegf32:$a), "cvt.u16.f32\t$d, $a", + [(set RRegu16:$d, (fp_to_uint RRegf32:$a))]>; + +def CVT_u16_f64 + : InstPTX<(outs RRegu16:$d), (ins RRegf64:$a), "cvt.u16.f64\t$d, $a", + [(set RRegu16:$d, (fp_to_uint RRegf64:$a))]>; + +// Conversion to u32 + def CVT_u32_pred : InstPTX<(outs RRegu32:$d), (ins Preds:$a), "cvt.u32.pred\t$d, $a", [(set RRegu32:$d, (zext Preds:$a))]>; +def CVT_u32_u16 + : InstPTX<(outs RRegu32:$d), (ins RRegu16:$a), "cvt.u32.u16\t$d, $a", + [(set RRegu32:$d, (zext RRegu16:$a))]>; + +def CVT_u32_u64 + : InstPTX<(outs RRegu32:$d), (ins RRegu64:$a), "cvt.u32.u64\t$d, $a", + [(set RRegu32:$d, (trunc RRegu64:$a))]>; + +def CVT_u32_f32 + : InstPTX<(outs RRegu32:$d), (ins RRegf32:$a), "cvt.u32.f32\t$d, $a", + [(set RRegu32:$d, (fp_to_uint RRegf32:$a))]>; + +def CVT_u32_f64 + : InstPTX<(outs RRegu32:$d), (ins RRegf64:$a), "cvt.u32.f64\t$d, $a", + [(set RRegu32:$d, (fp_to_uint RRegf64:$a))]>; + +// Conversion to u64 + +def CVT_u64_pred + : InstPTX<(outs RRegu64:$d), (ins Preds:$a), "cvt.u64.pred\t$d, $a", + [(set RRegu64:$d, (zext Preds:$a))]>; + +def CVT_u64_u16 + : InstPTX<(outs RRegu64:$d), (ins RRegu16:$a), "cvt.u64.u16\t$d, $a", + [(set RRegu64:$d, (zext RRegu16:$a))]>; + def CVT_u64_u32 : InstPTX<(outs RRegu64:$d), (ins RRegu32:$a), "cvt.u64.u32\t$d, $a", [(set RRegu64:$d, (zext RRegu32:$a))]>; +def CVT_u64_f32 + : InstPTX<(outs RRegu64:$d), (ins RRegf32:$a), "cvt.u64.f32\t$d, $a", + [(set RRegu64:$d, (fp_to_uint RRegf32:$a))]>; + +def CVT_u64_f64 + : InstPTX<(outs RRegu64:$d), (ins RRegf64:$a), "cvt.u64.f32\t$d, $a", + [(set RRegu64:$d, (fp_to_uint RRegf64:$a))]>; + +// Conversion to f32 + +def CVT_f32_pred + : InstPTX<(outs RRegf32:$d), (ins Preds:$a), "cvt.f32.pred\t$d, $a", + [(set RRegf32:$d, (uint_to_fp Preds:$a))]>; + +def CVT_f32_u16 + : InstPTX<(outs RRegf32:$d), (ins RRegu16:$a), "cvt.f32.u16\t$d, $a", + [(set RRegf32:$d, (uint_to_fp RRegu16:$a))]>; + +def CVT_f32_u32 + : InstPTX<(outs RRegf32:$d), (ins RRegu32:$a), "cvt.f32.u32\t$d, $a", + [(set RRegf32:$d, (uint_to_fp RRegu32:$a))]>; + +def CVT_f32_u64 + : InstPTX<(outs RRegf32:$d), (ins RRegu64:$a), "cvt.f32.u64\t$d, $a", + [(set RRegf32:$d, (uint_to_fp RRegu64:$a))]>; + +def CVT_f32_f64 + : InstPTX<(outs RRegf32:$d), (ins RRegf64:$a), "cvt.f32.f64\t$d, $a", + [(set RRegf32:$d, (fround RRegf64:$a))]>; + +// Conversion to f64 + +def CVT_f64_pred + : InstPTX<(outs RRegf64:$d), (ins Preds:$a), "cvt.f64.pred\t$d, $a", + [(set RRegf64:$d, (uint_to_fp Preds:$a))]>; + +def CVT_f64_u16 + : InstPTX<(outs RRegf64:$d), (ins RRegu16:$a), "cvt.f64.u16\t$d, $a", + [(set RRegf64:$d, (uint_to_fp RRegu16:$a))]>; + +def CVT_f64_u32 + : InstPTX<(outs RRegf64:$d), (ins RRegu32:$a), "cvt.f64.u32\t$d, $a", + [(set RRegf64:$d, (uint_to_fp RRegu32:$a))]>; + +def CVT_f64_u64 + : InstPTX<(outs RRegf64:$d), (ins RRegu64:$a), "cvt.f64.u64\t$d, $a", + [(set RRegf64:$d, (uint_to_fp RRegu64:$a))]>; + +def CVT_f64_f32 + : InstPTX<(outs RRegf64:$d), (ins RRegf32:$a), "cvt.f64.f32\t$d, $a", + [(set RRegf64:$d, (fextend RRegf32:$a))]>; + ///===- Control Flow Instructions -----------------------------------------===// let isBranch = 1, isTerminator = 1, isBarrier = 1 in { |