ldc-developers · kinke · Jul 3, 2020 · May 9, 2020 · May 9, 2020 · May 9, 2020
diff --git a/gen/dcompute/target.cpp b/gen/dcompute/target.cpp
@@ -19,9 +19,16 @@
 #include "gen/dcompute/target.h"
 #include "gen/llvmhelpers.h"
 #include "gen/runtime.h"
-#include <string>
+#include "ir/irtypestruct.h"
+
 
 void DComputeTarget::doCodeGen(Module *m) {
+  // Reset any generated type info for dcompute types.
+  // The ll types get generated when the host code gets
+  // gen'd which means the address space info is not
+  // properly set.
+  IrTypeStruct::resetDComputeTypes();
+
   // process module members
   for (unsigned k = 0; k < m->members->length; k++) {
     Dsymbol *dsym = (*m->members)[k];

diff --git a/ir/irtypestruct.cpp b/ir/irtypestruct.cpp
@@ -30,6 +30,21 @@ IrTypeStruct::IrTypeStruct(StructDeclaration *sd)
 
 //////////////////////////////////////////////////////////////////////////////
 
+std::vector<IrTypeStruct*> IrTypeStruct::dcomputeTypes;
+
+/// Resets special DCompute structs so they get re-created
+/// with the proper address space when generating device code.
+void IrTypeStruct::resetDComputeTypes() {
+  for(auto&& irTypeStruct : dcomputeTypes) {
+    delete irTypeStruct->dtype->ctype;
+    irTypeStruct->dtype->ctype = nullptr;
+  }
+
+  dcomputeTypes.clear();
+}
+
+//////////////////////////////////////////////////////////////////////////////
+
 IrTypeStruct *IrTypeStruct::get(StructDeclaration *sd) {
   auto t = new IrTypeStruct(sd);
   sd->type->ctype = t;
@@ -45,9 +60,14 @@ IrTypeStruct *IrTypeStruct::get(StructDeclaration *sd) {
 
   t->packed = isPacked(sd);
 
+  if(isFromLDC_DCompute(sd)) {
+    dcomputeTypes.push_back(t);
+  }
+
   // For ldc.dcomptetypes.Pointer!(uint n,T),
   // emit { T addrspace(gIR->dcomputetarget->mapping[n])* }
-    llvm::Optional<DcomputePointer> p;
+  llvm::Optional<DcomputePointer> p;
+
   if (gIR->dcomputetarget && (p = toDcomputePointer(sd))) {
 
     // Translate the virtual dcompute address space into the real one for

diff --git a/ir/irtypestruct.h b/ir/irtypestruct.h
@@ -23,10 +23,16 @@ class IrTypeStruct : public IrTypeAggr {
   ///
   IrTypeStruct *isStruct() override { return this; }
 
+  ///
+  static void resetDComputeTypes();
+
 protected:
   ///
   explicit IrTypeStruct(StructDeclaration *sd);
 
+  ///
+  static std::vector<IrTypeStruct*> dcomputeTypes;
+
   /// StructDeclaration this type represents.
   StructDeclaration *sd = nullptr;
 

diff --git a/tests/codegen/dcompute_host_and_device.d b/tests/codegen/dcompute_host_and_device.d
@@ -1,8 +1,12 @@
 // Check that we can generate code for both the host and device in one compiler invocation
 // REQUIRES: target_NVPTX
-// RUN: %ldc -mdcompute-targets=cuda-350 -mdcompute-file-prefix=host_and_device -Iinputs %s %S/inputs/kernel.d
+// RUN: %ldc -c -mdcompute-targets=cuda-350 -m64 -output-ll -mdcompute-file-prefix=host_and_device -Iinputs -output-o %s %S/inputs/kernel.d
+// RUN: FileCheck %s --check-prefix=PTX < host_and_device_cuda350_64.ptx
+// RUN: FileCheck %s --check-prefix=LL < dcompute_host_and_device.ll
 
-import inputs.kernel : foo;
+import inputs.kernel : k_foo;
+
+import ldc.dcompute;
 
 int tlGlobal;
 __gshared int gGlobal;
@@ -12,4 +16,53 @@ void main(string[] args)
     tlGlobal = 0;
     gGlobal  = 0;
     string s = foo.mangleof;
+    string k_s = k_foo.mangleof;
+
+    GlobalPointer!float global_x;
+    foo(global_x);
+}
+
+void foo(GlobalPointer!float x_in) {
+    // LL-LABEL: foo
+    SharedPointer!float shared_x;
+	PrivatePointer!float private_x;
+	ConstantPointer!float const_x;
+
+    // LL: [[s_load_reg:%[0-9]*]] = load float*, float** {{%[0-9]*}}
+    // LL: [[s_addr_reg:%[0-9]*]] = load float*, float** {{%[0-9]*}}
+    // LL: [[s_store_reg:%[0-9]*]] = load float, float* [[s_addr_reg]]
+    // LL: store float [[s_store_reg]], float* [[s_load_reg]]
+	*shared_x = *x_in;
+
+    // LL: [[p_load_reg:%[0-9]*]] = load float*, float** {{%[0-9]*}}
+    // LL: [[p_addr_reg:%[0-9]*]] = load float*, float** {{%[0-9]*}}
+    // LL: [[p_store_reg:%[0-9]*]] = load float, float* [[p_addr_reg]]
+    // LL: store float [[p_store_reg]], float* [[p_load_reg]]
+	*private_x = *x_in;
+
+    // LL: [[c_load_reg:%[0-9]*]] = load float*, float** {{%[0-9]*}}
+    // LL: [[c_addr_reg:%[0-9]*]] = load float*, float** {{%[0-9]*}}
+    // LL: [[c_store_reg:%[0-9]*]] = load float, float* [[c_addr_reg]]
+    // LL: store float [[c_store_reg]], float* [[c_load_reg]]
+	*x_in = *const_x;
+
+    // LL: [[g1_load_reg:%[0-9]*]] = load float*, float** {{%[0-9]*}}
+    // LL: [[g1_addr_reg:%[0-9]*]] = load float*, float** {{%[0-9]*}}
+    // LL: [[g1_store_reg:%[0-9]*]] = load float, float* [[g1_addr_reg]]
+    // LL: store float [[g1_store_reg]], float* [[g1_load_reg]]
+    *x_in = *shared_x;
+
+    // LL: [[g2_load_reg:%[0-9]*]] = load float*, float** {{%[0-9]*}}
+    // LL: [[g2_addr_reg:%[0-9]*]] = load float*, float** {{%[0-9]*}}
+    // LL: [[g2_store_reg:%[0-9]*]] = load float, float* [[g2_addr_reg]]
+    // LL: store float [[g2_store_reg]], float* [[g2_load_reg]]
+	*x_in = *private_x;
 }
+
+// PTX-LABEL: k_foo
+// PTX: ld.global.f32
+// PTX: st.shared.f32
+// PTX: st.local.f32
+// PTX: ld.const.f32
+// PTX: ld.shared.f32
+// PTX: ld.local.f32
diff --git a/tests/codegen/inputs/kernel.d b/tests/codegen/inputs/kernel.d
@@ -2,4 +2,15 @@
 module inputs.kernel;
 
 import ldc.dcompute;
-@kernel void foo() {}
+@kernel void k_foo(GlobalPointer!float x_in)
+{
+	SharedPointer!float shared_x;
+	PrivatePointer!float private_x;
+	ConstantPointer!float const_x;
+	*shared_x = *x_in;
+	*private_x = *x_in;
+	*x_in = *const_x;
+
+	*x_in = *shared_x;
+	*x_in = *private_x;
+}