diff options
-rw-r--r-- | lib/Target/X86/X86.td | 10 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrFormats.td | 15 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrInfo.td | 1 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrSSE.td | 117 | ||||
-rw-r--r-- | lib/Target/X86/X86Subtarget.cpp | 2 | ||||
-rw-r--r-- | lib/Target/X86/X86Subtarget.h | 4 |
6 files changed, 101 insertions, 48 deletions
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index 6b62795a6e..89cc84ffb3 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -74,6 +74,8 @@ def FeatureFMA4 : SubtargetFeature<"fma4", "HasFMA4", "true", def FeatureVectorUAMem : SubtargetFeature<"vector-unaligned-mem", "HasVectorUAMem", "true", "Allow unaligned memory operands on vector/SIMD instructions">; +def FeatureAES : SubtargetFeature<"aes", "HasAES", "true", + "Enable AES instructions">; //===----------------------------------------------------------------------===// // X86 processors supported. @@ -101,11 +103,17 @@ def : Proc<"nocona", [FeatureSSE3, Feature64Bit, FeatureSlowBTMem]>; def : Proc<"core2", [FeatureSSSE3, Feature64Bit, FeatureSlowBTMem]>; def : Proc<"penryn", [FeatureSSE41, Feature64Bit, FeatureSlowBTMem]>; def : Proc<"atom", [FeatureSSE3, Feature64Bit, FeatureSlowBTMem]>; +// "Arrandale" along with corei3 and corei5 def : Proc<"corei7", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem, - FeatureFastUAMem]>; + FeatureFastUAMem, FeatureAES]>; def : Proc<"nehalem", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem, FeatureFastUAMem]>; +// Westmere is a similar machine to nehalem with some additional features. +// Westmere is the corei3/i5/i7 path from nehalem to sandybridge +def : Proc<"westmere", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem, + FeatureFastUAMem, FeatureAES]>; // Sandy Bridge does not have FMA +// FIXME: Wikipedia says it does... it should have AES as well. def : Proc<"sandybridge", [FeatureSSE42, FeatureAVX, Feature64Bit]>; def : Proc<"k6", [FeatureMMX]>; diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td index d25ec26049..cbe4c82562 100644 --- a/lib/Target/X86/X86InstrFormats.td +++ b/lib/Target/X86/X86InstrFormats.td @@ -311,6 +311,21 @@ class SS42AI<bits<8> o, Format F, dag outs, dag ins, string asm, : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA, Requires<[HasSSE42]>; +// AES Instruction Templates: +// +// AES8I +// FIXME: Verify these, they appear to use the same encoding as the SSE4.2 T8 +// and TA encodings. +class AES8I<bits<8> o, Format F, dag outs, dag ins, string asm, + list<dag>pattern> + : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8, + Requires<[HasAES]>; + +class AESAI<bits<8> o, Format F, dag outs, dag ins, string asm, + list<dag> pattern> + : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA, + Requires<[HasAES]>; + // X86-64 Instruction templates... // diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 8fccc8a37a..65b7ec023d 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -330,6 +330,7 @@ def OptForSize : Predicate<"OptForSize">; def OptForSpeed : Predicate<"!OptForSize">; def FastBTMem : Predicate<"!Subtarget->isBTMemSlow()">; def CallImmAddr : Predicate<"Subtarget->IsLegalToCallImmediateAddr(TM)">; +def HasAES : Predicate<"Subtarget->hasAES()">; //===----------------------------------------------------------------------===// // X86 Instruction Format Definitions. diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index dadc2a663b..08105c5588 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -3848,53 +3848,6 @@ def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, VR128:$src2)), def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, (memop addr:$src2))), (PCMPGTQrm VR128:$src1, addr:$src2)>; -// TODO: These should be AES as a feature set. -defm AESIMC : SS42I_binop_rm_int<0xDB, "aesimc", - int_x86_aesni_aesimc>; -defm AESENC : SS42I_binop_rm_int<0xDC, "aesenc", - int_x86_aesni_aesenc>; -defm AESENCLAST : SS42I_binop_rm_int<0xDD, "aesenclast", - int_x86_aesni_aesenclast>; -defm AESDEC : SS42I_binop_rm_int<0xDE, "aesdec", - int_x86_aesni_aesdec>; -defm AESDECLAST : SS42I_binop_rm_int<0xDF, "aesdeclast", - int_x86_aesni_aesdeclast>; - -def : Pat<(v2i64 (int_x86_aesni_aesimc VR128:$src1, VR128:$src2)), - (AESIMCrr VR128:$src1, VR128:$src2)>; -def : Pat<(v2i64 (int_x86_aesni_aesimc VR128:$src1, (memop addr:$src2))), - (AESIMCrm VR128:$src1, addr:$src2)>; -def : Pat<(v2i64 (int_x86_aesni_aesenc VR128:$src1, VR128:$src2)), - (AESENCrr VR128:$src1, VR128:$src2)>; -def : Pat<(v2i64 (int_x86_aesni_aesenc VR128:$src1, (memop addr:$src2))), - (AESENCrm VR128:$src1, addr:$src2)>; -def : Pat<(v2i64 (int_x86_aesni_aesenclast VR128:$src1, VR128:$src2)), - (AESENCLASTrr VR128:$src1, VR128:$src2)>; -def : Pat<(v2i64 (int_x86_aesni_aesenclast VR128:$src1, (memop addr:$src2))), - (AESENCLASTrm VR128:$src1, addr:$src2)>; -def : Pat<(v2i64 (int_x86_aesni_aesdec VR128:$src1, VR128:$src2)), - (AESDECrr VR128:$src1, VR128:$src2)>; -def : Pat<(v2i64 (int_x86_aesni_aesdec VR128:$src1, (memop addr:$src2))), - (AESDECrm VR128:$src1, addr:$src2)>; -def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, VR128:$src2)), - (AESDECLASTrr VR128:$src1, VR128:$src2)>; -def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, (memop addr:$src2))), - (AESDECLASTrm VR128:$src1, addr:$src2)>; - -def AESKEYGENASSIST128rr : SS42AI<0xDF, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, i32i8imm:$src2), - "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, - (int_x86_aesni_aeskeygenassist VR128:$src1, imm:$src2))]>, - OpSize; -def AESKEYGENASSIST128rm : SS42AI<0xDF, MRMSrcMem, (outs VR128:$dst), - (ins i128mem:$src1, i32i8imm:$src2), - "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, - (int_x86_aesni_aeskeygenassist (bitconvert (memopv2i64 addr:$src1)), - imm:$src2))]>, - OpSize; - // crc intrinsic instruction // This set of instructions are only rm, the only difference is the size // of r and m. @@ -4056,3 +4009,73 @@ defm PCMPESTRIC : SS42AI_pcmpestri<int_x86_sse42_pcmpestric128>; defm PCMPESTRIO : SS42AI_pcmpestri<int_x86_sse42_pcmpestrio128>; defm PCMPESTRIS : SS42AI_pcmpestri<int_x86_sse42_pcmpestris128>; defm PCMPESTRIZ : SS42AI_pcmpestri<int_x86_sse42_pcmpestriz128>; + +//===----------------------------------------------------------------------===// +// AES-NI Instructions +//===----------------------------------------------------------------------===// + +let Constraints = "$src1 = $dst" in { + multiclass AESI_binop_rm_int<bits<8> opc, string OpcodeStr, + Intrinsic IntId128, bit Commutable = 0> { + def rr : AES8I<opc, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2), + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>, + OpSize { + let isCommutable = Commutable; + } + def rm : AES8I<opc, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, i128mem:$src2), + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + [(set VR128:$dst, + (IntId128 VR128:$src1, + (bitconvert (memopv16i8 addr:$src2))))]>, OpSize; + } +} + +defm AESIMC : AESI_binop_rm_int<0xDB, "aesimc", + int_x86_aesni_aesimc>; +defm AESENC : AESI_binop_rm_int<0xDC, "aesenc", + int_x86_aesni_aesenc>; +defm AESENCLAST : AESI_binop_rm_int<0xDD, "aesenclast", + int_x86_aesni_aesenclast>; +defm AESDEC : AESI_binop_rm_int<0xDE, "aesdec", + int_x86_aesni_aesdec>; +defm AESDECLAST : AESI_binop_rm_int<0xDF, "aesdeclast", + int_x86_aesni_aesdeclast>; + +def : Pat<(v2i64 (int_x86_aesni_aesimc VR128:$src1, VR128:$src2)), + (AESIMCrr VR128:$src1, VR128:$src2)>; +def : Pat<(v2i64 (int_x86_aesni_aesimc VR128:$src1, (memop addr:$src2))), + (AESIMCrm VR128:$src1, addr:$src2)>; +def : Pat<(v2i64 (int_x86_aesni_aesenc VR128:$src1, VR128:$src2)), + (AESENCrr VR128:$src1, VR128:$src2)>; +def : Pat<(v2i64 (int_x86_aesni_aesenc VR128:$src1, (memop addr:$src2))), + (AESENCrm VR128:$src1, addr:$src2)>; +def : Pat<(v2i64 (int_x86_aesni_aesenclast VR128:$src1, VR128:$src2)), + (AESENCLASTrr VR128:$src1, VR128:$src2)>; +def : Pat<(v2i64 (int_x86_aesni_aesenclast VR128:$src1, (memop addr:$src2))), + (AESENCLASTrm VR128:$src1, addr:$src2)>; +def : Pat<(v2i64 (int_x86_aesni_aesdec VR128:$src1, VR128:$src2)), + (AESDECrr VR128:$src1, VR128:$src2)>; +def : Pat<(v2i64 (int_x86_aesni_aesdec VR128:$src1, (memop addr:$src2))), + (AESDECrm VR128:$src1, addr:$src2)>; +def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, VR128:$src2)), + (AESDECLASTrr VR128:$src1, VR128:$src2)>; +def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, (memop addr:$src2))), + (AESDECLASTrm VR128:$src1, addr:$src2)>; + +def AESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, i32i8imm:$src2), + "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR128:$dst, + (int_x86_aesni_aeskeygenassist VR128:$src1, imm:$src2))]>, + OpSize; +def AESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst), + (ins i128mem:$src1, i32i8imm:$src2), + "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR128:$dst, + (int_x86_aesni_aeskeygenassist (bitconvert (memopv2i64 addr:$src1)), + imm:$src2))]>, + OpSize; + diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 8a0cde49ae..09a26858eb 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -259,6 +259,7 @@ void X86Subtarget::AutoDetectSubtargetFeatures() { HasFMA3 = IsIntel && ((ECX >> 12) & 0x1); HasAVX = ((ECX >> 28) & 0x1); + HasAES = IsIntel && ((ECX >> 25) & 0x1); if (IsIntel || IsAMD) { // Determine if bit test memory instructions are slow. @@ -286,6 +287,7 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS, , HasX86_64(false) , HasSSE4A(false) , HasAVX(false) + , HasAES(false) , HasFMA3(false) , HasFMA4(false) , IsBTMemSlow(false) diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index bf30154625..8a873f04df 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -69,6 +69,9 @@ protected: /// HasAVX - Target has AVX instructions bool HasAVX; + /// HasAES - Target has AES instructions + bool HasAES; + /// HasFMA3 - Target has 3-operand fused multiply-add bool HasFMA3; @@ -148,6 +151,7 @@ public: bool has3DNow() const { return X863DNowLevel >= ThreeDNow; } bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; } bool hasAVX() const { return HasAVX; } + bool hasAES() const { return HasAES; } bool hasFMA3() const { return HasFMA3; } bool hasFMA4() const { return HasFMA4; } bool isBTMemSlow() const { return IsBTMemSlow; } |