From 9208888c578b70c654c56d1ae12aa9cca9088ff9 Mon Sep 17 00:00:00 2001
From: Tanguy Pruvot <tanguy.pruvot@gmail.com>
Date: Tue, 21 Apr 2015 11:11:12 +0200
Subject: [PATCH] scrypt: assign specific compute models to the kernels

---
 Makefile.am     | 29 +++++++++++++++++++++++------
 ccminer.vcxproj | 30 +++++++++++++++++++-----------
 2 files changed, 42 insertions(+), 17 deletions(-)
diff --git a/Makefile.am b/Makefile.am
index 6b4fd01..fd23379 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -114,20 +114,37 @@ x17/cuda_x17_sha512.o: x17/cuda_x17_sha512.cu
 	$(NVCC) $(nvcc_FLAGS) --maxrregcount=80 -o $@ -c $<
 
 quark/cuda_quark_blake512.o: quark/cuda_quark_blake512.cu
-	$(NVCC) $(nvcc_FLAGS) -I cudpp-2.1/include --maxrregcount=80 -o $@ -c $<
+	$(NVCC) $(nvcc_FLAGS) --maxrregcount=80 -o $@ -c $<
 
 quark/cuda_quark_keccak512.o: quark/cuda_quark_keccak512.cu
-	$(NVCC) $(nvcc_FLAGS) -I cudpp-2.1/include --maxrregcount=88 -o $@ -c $<
+	$(NVCC) $(nvcc_FLAGS) --maxrregcount=88 -o $@ -c $<
 
 quark/cuda_quark_compactionTest.o: quark/cuda_quark_compactionTest.cu
-	$(NVCC) $(nvcc_FLAGS) -I cudpp-2.1/include --maxrregcount=80 -o $@ -c $<
+	$(NVCC) $(nvcc_FLAGS) --maxrregcount=80 -o $@ -c $<
 
 JHA/cuda_jha_compactionTest.o: JHA/cuda_jha_compactionTest.cu
-	$(NVCC) $(nvcc_FLAGS) -I cudpp-2.1/include --maxrregcount=80 -o $@ -c $<
+	$(NVCC) $(nvcc_FLAGS) --maxrregcount=80 -o $@ -c $<
 
-# This kernel need also an older SM to be able to autotune kernels
+# This object does not use cuda device code but call the different kernels (autotune)
 scrypt/salsa_kernel.o: scrypt/salsa_kernel.cu
-	$(NVCC) -I . -gencode=arch=compute_20,code=\"sm_21,compute_20\" -o $@ -c $<
+	$(NVCC) -I. @CUDA_INCLUDES@ @CUDA_CFLAGS@ -gencode=arch=compute_20,code=\"sm_21,compute_20\" -o $@ -c $<
+
+# These kernels are for older devices (SM)
+
+scrypt/test_kernel.o: scrypt/test_kernel.cu
+	$(NVCC) -I. @CUDA_INCLUDES@ @CUDA_CFLAGS@ -gencode=arch=compute_20,code=\"sm_20,compute_20\" -o $@ -c $<
+
+scrypt/fermi_kernel.o: scrypt/fermi_kernel.cu
+	$(NVCC) -I. @CUDA_INCLUDES@ @CUDA_CFLAGS@ -gencode=arch=compute_20,code=\"sm_21,compute_20\" -o $@ -c $<
+
+scrypt/kepler_kernel.o: scrypt/kepler_kernel.cu
+	$(NVCC) -I. @CUDA_INCLUDES@ @CUDA_CFLAGS@ -gencode=arch=compute_30,code=\"sm_30,compute_30\" -o $@ -c $<
+
+scrypt/nv_kernel.o: scrypt/nv_kernel.cu
+	$(NVCC) -I. @CUDA_INCLUDES@ @CUDA_CFLAGS@ -gencode=arch=compute_30,code=\"sm_30,compute_30\" -o $@ -c $<
+
+scrypt/titan_kernel.o: scrypt/titan_kernel.cu
+	$(NVCC) -I. @CUDA_INCLUDES@ @CUDA_CFLAGS@ -gencode=arch=compute_35,code=\"sm_35,compute_35\" -o $@ -c $<
 
 skein.o: skein.cu
 	$(NVCC) $(nvcc_FLAGS) --maxrregcount=64 -o $@ -c $<
diff --git a/ccminer.vcxproj b/ccminer.vcxproj
index f8b3a6c..a65b345 100644
--- a/ccminer.vcxproj
+++ b/ccminer.vcxproj
@@ -352,21 +352,29 @@
     <CudaCompile Include="cuda_nist5.cu">
     </CudaCompile>
     <CudaCompile Include="scrypt\blake.cu" />
+    <CudaCompile Include="scrypt\keccak.cu" />
+    <CudaCompile Include="scrypt\sha256.cu" />
+    <CudaCompile Include="scrypt\salsa_kernel.cu">
+      <CodeGeneration>compute_20,sm_21</CodeGeneration>
+    </CudaCompile>
     <CudaCompile Include="scrypt\fermi_kernel.cu">
-      <CodeGeneration Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">compute_20,sm_21;compute_30,sm_30;compute_35,sm_35;compute_50,sm_50;compute_52,sm_52</CodeGeneration>
+      <CodeGeneration>compute_20,sm_21</CodeGeneration>
+    </CudaCompile>
+    <CudaCompile Include="scrypt\kepler_kernel.cu">
+      <CodeGeneration>compute_30,sm_30</CodeGeneration>
+    </CudaCompile>
+    <CudaCompile Include="scrypt\nv_kernel.cu">
+      <CodeGeneration>compute_30,sm_30</CodeGeneration>
     </CudaCompile>
-    <CudaCompile Include="scrypt\keccak.cu" />
-    <CudaCompile Include="scrypt\kepler_kernel.cu" />
-    <CudaCompile Include="scrypt\nv_kernel.cu" />
     <CudaCompile Include="scrypt\nv_kernel2.cu">
-      <CodeGeneration Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">compute_35,sm_35;compute_50,sm_50;compute_52,sm_52</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_50,sm_50;compute_52,sm_52</CodeGeneration>
     </CudaCompile>
-    <CudaCompile Include="scrypt\salsa_kernel.cu">
-      <CodeGeneration Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">compute_20,sm_21</CodeGeneration>
+    <CudaCompile Include="scrypt\test_kernel.cu">
+      <CodeGeneration>compute_20,sm_21</CodeGeneration>
+    </CudaCompile>
+    <CudaCompile Include="scrypt\titan_kernel.cu">
+      <CodeGeneration>compute_35,sm_35</CodeGeneration>
     </CudaCompile>
-    <CudaCompile Include="scrypt\sha256.cu" />
-    <CudaCompile Include="scrypt\test_kernel.cu" />
-    <CudaCompile Include="scrypt\titan_kernel.cu" />
     <CudaCompile Include="zr5.cu" />
     <CudaCompile Include="heavy\cuda_blake512.cu">
     </CudaCompile>
@@ -525,4 +533,4 @@
   <Target Name="AfterClean">
     <Delete Files="@(FilesToCopy->'$(OutDir)%(Filename)%(Extension)')" TreatErrorsAsWarnings="true" />
   </Target>
-</Project>
\ No newline at end of file
+</Project>