aboutsummaryrefslogtreecommitdiff
path: root/lib/CodeGen
diff options
context:
space:
mode:
authorTanya Lattner <tonic@nondot.org>2012-08-16 00:10:13 +0000
committerTanya Lattner <tonic@nondot.org>2012-08-16 00:10:13 +0000
commitc58dcdc8facb646d88675bb6fbcb5c787166c4be (patch)
tree5c00fdcc75e64d345b0d2fa8eff5d431436c7176 /lib/CodeGen
parent058ab170a5b8597f32c3f343a1e9de2cd7b05848 (diff)
Convert loads and stores of vec3 to vec4 to achieve better code generation. Add test case.
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@162002 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/CodeGen')
-rw-r--r--lib/CodeGen/CGExpr.cpp80
1 files changed, 80 insertions, 0 deletions
diff --git a/lib/CodeGen/CGExpr.cpp b/lib/CodeGen/CGExpr.cpp
index ecee7b4931..1fe4c18bad 100644
--- a/lib/CodeGen/CGExpr.cpp
+++ b/lib/CodeGen/CGExpr.cpp
@@ -938,6 +938,50 @@ llvm::MDNode *CodeGenFunction::getRangeForLoadFromType(QualType Ty) {
llvm::Value *CodeGenFunction::EmitLoadOfScalar(llvm::Value *Addr, bool Volatile,
unsigned Alignment, QualType Ty,
llvm::MDNode *TBAAInfo) {
+
+ // For better performance, handle vector loads differently.
+ if (Ty->isVectorType()) {
+ llvm::Value *V;
+ const llvm::Type *EltTy =
+ cast<llvm::PointerType>(Addr->getType())->getElementType();
+
+ const llvm::VectorType *VTy = cast<llvm::VectorType>(EltTy);
+
+ // Handle vectors of size 3, like size 4 for better performance.
+ if (VTy->getNumElements() == 3) {
+
+ // Bitcast to vec4 type.
+ llvm::VectorType *vec4Ty = llvm::VectorType::get(VTy->getElementType(),
+ 4);
+ llvm::PointerType *ptVec4Ty =
+ llvm::PointerType::get(vec4Ty,
+ (cast<llvm::PointerType>(
+ Addr->getType()))->getAddressSpace());
+ llvm::Value *Cast = Builder.CreateBitCast(Addr, ptVec4Ty,
+ "castToVec4");
+ // Now load value.
+ llvm::Value *LoadVal = Builder.CreateLoad(Cast, Volatile, "loadVec4");
+
+ // Shuffle vector to get vec3.
+ llvm::SmallVector<llvm::Constant*, 3> Mask;
+ Mask.push_back(llvm::ConstantInt::get(
+ llvm::Type::getInt32Ty(getLLVMContext()),
+ 0));
+ Mask.push_back(llvm::ConstantInt::get(
+ llvm::Type::getInt32Ty(getLLVMContext()),
+ 1));
+ Mask.push_back(llvm::ConstantInt::get(
+ llvm::Type::getInt32Ty(getLLVMContext()),
+ 2));
+
+ llvm::Value *MaskV = llvm::ConstantVector::get(Mask);
+ V = Builder.CreateShuffleVector(LoadVal,
+ llvm::UndefValue::get(vec4Ty),
+ MaskV, "extractVec");
+ return EmitFromMemory(V, Ty);
+ }
+ }
+
llvm::LoadInst *Load = Builder.CreateLoad(Addr);
if (Volatile)
Load->setVolatile(true);
@@ -984,6 +1028,42 @@ void CodeGenFunction::EmitStoreOfScalar(llvm::Value *Value, llvm::Value *Addr,
QualType Ty,
llvm::MDNode *TBAAInfo,
bool isInit) {
+
+ // Handle vectors differently to get better performance.
+ if (Ty->isVectorType()) {
+ llvm::Type *SrcTy = Value->getType();
+ llvm::VectorType *VecTy = cast<llvm::VectorType>(SrcTy);
+ // Handle vec3 special.
+ if (VecTy->getNumElements() == 3) {
+ llvm::LLVMContext &VMContext = getLLVMContext();
+
+ // Our source is a vec3, do a shuffle vector to make it a vec4.
+ llvm::SmallVector<llvm::Constant*, 4> Mask;
+ Mask.push_back(llvm::ConstantInt::get(
+ llvm::Type::getInt32Ty(VMContext),
+ 0));
+ Mask.push_back(llvm::ConstantInt::get(
+ llvm::Type::getInt32Ty(VMContext),
+ 1));
+ Mask.push_back(llvm::ConstantInt::get(
+ llvm::Type::getInt32Ty(VMContext),
+ 2));
+ Mask.push_back(llvm::UndefValue::get(llvm::Type::getInt32Ty(VMContext)));
+
+ llvm::Value *MaskV = llvm::ConstantVector::get(Mask);
+ Value = Builder.CreateShuffleVector(Value,
+ llvm::UndefValue::get(VecTy),
+ MaskV, "extractVec");
+ SrcTy = llvm::VectorType::get(VecTy->getElementType(), 4);
+ }
+ llvm::PointerType *DstPtr = cast<llvm::PointerType>(Addr->getType());
+ if (DstPtr->getElementType() != SrcTy) {
+ llvm::Type *MemTy =
+ llvm::PointerType::get(SrcTy, DstPtr->getAddressSpace());
+ Addr = Builder.CreateBitCast(Addr, MemTy, "storetmp");
+ }
+ }
+
Value = EmitToMemory(Value, Ty);
llvm::StoreInst *Store = Builder.CreateStore(Value, Addr, Volatile);