aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/CodeGen/CGCall.cpp10
-rw-r--r--test/CodeGenOpenCL/addr-space-struct-arg.cl23
2 files changed, 31 insertions, 2 deletions
diff --git a/lib/CodeGen/CGCall.cpp b/lib/CodeGen/CGCall.cpp
index b6ec67d83c..fa5ce7fdca 100644
--- a/lib/CodeGen/CGCall.cpp
+++ b/lib/CodeGen/CGCall.cpp
@@ -2199,17 +2199,23 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
checkArgMatches(AI, IRArgNo, IRFuncTy);
} else {
// We want to avoid creating an unnecessary temporary+copy here;
- // however, we need one in two cases:
+ // however, we need one in three cases:
// 1. If the argument is not byval, and we are required to copy the
// source. (This case doesn't occur on any common architecture.)
// 2. If the argument is byval, RV is not sufficiently aligned, and
// we cannot force it to be sufficiently aligned.
+ // 3. If the argument is byval, but RV is located in an address space
+ // different than that of the argument (0).
llvm::Value *Addr = RV.getAggregateAddr();
unsigned Align = ArgInfo.getIndirectAlign();
const llvm::DataLayout *TD = &CGM.getDataLayout();
+ const unsigned RVAddrSpace = Addr->getType()->getPointerAddressSpace();
+ const unsigned ArgAddrSpace = (IRArgNo < IRFuncTy->getNumParams() ?
+ IRFuncTy->getParamType(IRArgNo)->getPointerAddressSpace() : 0);
if ((!ArgInfo.getIndirectByVal() && I->NeedsCopy) ||
(ArgInfo.getIndirectByVal() && TypeAlign.getQuantity() < Align &&
- llvm::getOrEnforceKnownAlignment(Addr, Align, TD) < Align)) {
+ llvm::getOrEnforceKnownAlignment(Addr, Align, TD) < Align) ||
+ (ArgInfo.getIndirectByVal() && (RVAddrSpace != ArgAddrSpace))) {
// Create an aligned temporary, and copy to it.
llvm::AllocaInst *AI = CreateMemTemp(I->Ty);
if (Align > AI->getAlignment())
diff --git a/test/CodeGenOpenCL/addr-space-struct-arg.cl b/test/CodeGenOpenCL/addr-space-struct-arg.cl
new file mode 100644
index 0000000000..f04923d39c
--- /dev/null
+++ b/test/CodeGenOpenCL/addr-space-struct-arg.cl
@@ -0,0 +1,23 @@
+// RUN: %clang_cc1 %s -emit-llvm -o - -O0 -ffake-address-space-map -triple i686-pc-darwin | FileCheck %s
+
+typedef struct {
+ int cells[9];
+} Mat3X3;
+
+typedef struct {
+ int cells[16];
+} Mat4X4;
+
+Mat4X4 __attribute__((noinline)) foo(Mat3X3 in) {
+ Mat4X4 out;
+ return out;
+}
+
+kernel void ker(global Mat3X3 *in, global Mat4X4 *out) {
+ out[0] = foo(in[1]);
+}
+
+// Expect two mem copies: one for the argument "in", and one for
+// the return value.
+// CHECK: call void @llvm.memcpy.p0i8.p1i8.i32(i8*
+// CHECK: call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)*