sources: swith to UTF-8

10 years ago · b4e690b486
17 changed files with 52 additions and 52 deletions
--- a/JHA/cuda_jha_compactionTest.cu
+++ b/JHA/cuda_jha_compactionTest.cu
@ -14,7 +14,7 @@ static uint32_t *d_tempBranch1Nonces[8];
 static uint32_t *d_numValid[8];
 static uint32_t *h_numValid[8];
-static uint32_t *d_partSum[2][8]; // für bis zu vier partielle Summen
+static uint32_t *d_partSum[2][8]; // für bis zu vier partielle Summen
 // aus heavy.cu
 extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
@ -93,7 +93,7 @@ __global__ void jackpot_compactTest_gpu_SCAN(uint32_t *data, int width, uint32_t
 				inpHash = &inpHashes[id<<4];
 			}else
 			{
-				// Nonce-Liste verfügbar
+				// Nonce-Liste verfügbar
 				int nonce = d_validNonceTable[id] - startNounce;
 				inpHash = &inpHashes[nonce<<4];
 			}			
@ -210,7 +210,7 @@ __global__ void jackpot_compactTest_gpu_SCATTER(uint32_t *sum, uint32_t *outp, c
 			inpHash = &inpHashes[id<<4];
 		}else
 		{
-			// Nonce-Liste verfügbar
+			// Nonce-Liste verfügbar
 			int nonce = d_validNonceTable[id] - startNounce;
 			actNounce = nonce;
 			inpHash = &inpHashes[nonce<<4];
@ -345,7 +345,7 @@ __host__ void jackpot_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t
 											int order)
 {
 	// Wenn validNonceTable genutzt wird, dann werden auch nur die Nonces betrachtet, die dort enthalten sind
-	// "threads" ist in diesem Fall auf die Länge dieses Array's zu setzen!
+	// "threads" ist in diesem Fall auf die Länge dieses Array's zu setzen!
 	jackpot_compactTest_cpu_dualCompaction(thr_id, threads,
 		h_numValid[thr_id], d_nonces1, d_nonces2,
--- a/JHA/cuda_jha_keccak512.cu
+++ b/JHA/cuda_jha_keccak512.cu
@ -127,7 +127,7 @@ __global__ void jackpot_keccak512_gpu_hash(int threads, uint32_t startNounce, ui
        for (int i=0; i<25; i++)
            keccak_gpu_state[i] = c_State[i];
-        // den Block einmal gut durchschütteln
+        // den Block einmal gut durchschütteln
        keccak_block(keccak_gpu_state, message, c_keccak_round_constants);
        // das Hash erzeugen
@ -533,7 +533,7 @@ __host__ void jackpot_keccak512_cpu_hash(int thr_id, int threads, uint32_t start
    dim3 grid((threads + threadsperblock-1)/threadsperblock);
    dim3 block(threadsperblock);
-    // Größe des dynamischen Shared Memory Bereichs
+    // Größe des dynamischen Shared Memory Bereichs
    size_t shared_size = 0;
    jackpot_keccak512_gpu_hash<<<grid, block, shared_size>>>(threads, startNounce, (uint64_t*)d_hash);
--- a/JHA/jackpotcoin.cu
+++ b/JHA/jackpotcoin.cu
@ -12,7 +12,7 @@ extern "C"
 // aus cpu-miner.c
 extern int device_map[8];
-// Speicher für Input/Output der verketteten Hashfunktionen
+// Speicher für Input/Output der verketteten Hashfunktionen
 static uint32_t *d_hash[8];
 extern void jackpot_keccak512_cpu_init(int thr_id, int threads);
@ -41,7 +41,7 @@ extern void jackpot_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t st
 											uint32_t *d_nonces2, size_t *nrm2,
 											int order);
-// Speicher zur Generierung der Noncevektoren für die bedingten Hashes
+// Speicher zur Generierung der Noncevektoren für die bedingten Hashes
 static uint32_t *d_jackpotNonces[8];
 static uint32_t *d_branch1Nonces[8];
 static uint32_t *d_branch2Nonces[8];
@ -142,7 +142,7 @@ extern "C" int scanhash_jackpot(int thr_id, uint32_t *pdata,
 		size_t nrm1, nrm2, nrm3;
-		// Runde 1 (ohne Gröstl)
+		// Runde 1 (ohne Gröstl)
 		jackpot_compactTest_cpu_hash_64(thr_id, throughput, pdata[19], d_hash[thr_id], NULL,
 				d_branch1Nonces[thr_id], &nrm1,
@ -165,7 +165,7 @@ extern "C" int scanhash_jackpot(int thr_id, uint32_t *pdata,
 		// Runde 3 (komplett)
-		// jackpotNonces in branch1/2 aufsplitten gemäss if (hash[0] & 0x01)
+		// jackpotNonces in branch1/2 aufsplitten gemäss if (hash[0] & 0x01)
 		jackpot_compactTest_cpu_hash_64(thr_id, nrm3, pdata[19], d_hash[thr_id], d_branch3Nonces[thr_id],
 			d_branch1Nonces[thr_id], &nrm1,
 			d_branch2Nonces[thr_id], &nrm2,
@ -176,7 +176,7 @@ extern "C" int scanhash_jackpot(int thr_id, uint32_t *pdata,
 			quark_skein512_cpu_hash_64(thr_id, nrm2, pdata[19], d_branch2Nonces[thr_id], d_hash[thr_id], order++);
 		}
-		// jackpotNonces in branch1/2 aufsplitten gemäss if (hash[0] & 0x01)
+		// jackpotNonces in branch1/2 aufsplitten gemäss if (hash[0] & 0x01)
 		jackpot_compactTest_cpu_hash_64(thr_id, nrm3, pdata[19], d_hash[thr_id], d_branch3Nonces[thr_id],
 			d_branch1Nonces[thr_id], &nrm1,
 			d_branch2Nonces[thr_id], &nrm2,
@ -189,7 +189,7 @@ extern "C" int scanhash_jackpot(int thr_id, uint32_t *pdata,
 		// Runde 3 (komplett)
-		// jackpotNonces in branch1/2 aufsplitten gemäss if (hash[0] & 0x01)
+		// jackpotNonces in branch1/2 aufsplitten gemäss if (hash[0] & 0x01)
 		jackpot_compactTest_cpu_hash_64(thr_id, nrm3, pdata[19], d_hash[thr_id], d_branch3Nonces[thr_id],
 			d_branch1Nonces[thr_id], &nrm1,
 			d_branch2Nonces[thr_id], &nrm2,
@ -200,7 +200,7 @@ extern "C" int scanhash_jackpot(int thr_id, uint32_t *pdata,
 			quark_skein512_cpu_hash_64(thr_id, nrm2, pdata[19], d_branch2Nonces[thr_id], d_hash[thr_id], order++);
 		}
-		// jackpotNonces in branch1/2 aufsplitten gemäss if (hash[0] & 0x01)
+		// jackpotNonces in branch1/2 aufsplitten gemäss if (hash[0] & 0x01)
 		jackpot_compactTest_cpu_hash_64(thr_id, nrm3, pdata[19], d_hash[thr_id], d_branch3Nonces[thr_id],
 			d_branch1Nonces[thr_id], &nrm1,
 			d_branch2Nonces[thr_id], &nrm2,
@ -219,7 +219,7 @@ extern "C" int scanhash_jackpot(int thr_id, uint32_t *pdata,
 			uint32_t vhash64[8];
 			be32enc(&endiandata[19], foundNonce);
-			// diese jackpothash Funktion gibt die Zahl der Runden zurück
+			// diese jackpothash Funktion gibt die Zahl der Runden zurück
 			rounds = jackpothash(vhash64, endiandata);
 			if ((vhash64[7]<=Htarg) && fulltest(vhash64, ptarget)) {
--- a/cuda_fugue256.cu
+++ b/cuda_fugue256.cu
@ -734,7 +734,7 @@ void fugue256_cpu_init(int thr_id, int threads)
 	texDef(mixTab2Tex, mixTab2m, mixtab2_cpu, sizeof(uint32_t)*256);
 	texDef(mixTab3Tex, mixTab3m, mixtab3_cpu, sizeof(uint32_t)*256);
-	// Speicher für alle Ergebnisse belegen
+	// Speicher für alle Ergebnisse belegen
 	cudaMalloc(&d_fugue256_hashoutput[thr_id], 8 * sizeof(uint32_t) * threads);
 	cudaMalloc(&d_resultNonce[thr_id], sizeof(uint32_t)); 
 }
@ -760,15 +760,15 @@ __host__ void fugue256_cpu_setBlock(int thr_id, void *data, void *pTargetIn)
 __host__ void fugue256_cpu_hash(int thr_id, int threads, int startNounce, void *outputHashes, uint32_t *nounce)
 {
 #if USE_SHARED
-	const int threadsperblock = 256; // Alignment mit mixtab Grösse. NICHT ÄNDERN
+	const int threadsperblock = 256; // Alignment mit mixtab Grösse. NICHT ÄNDERN
 #else
-	const int threadsperblock = 512; // so einstellen wie gewünscht ;-)
+	const int threadsperblock = 512; // so einstellen wie gewünscht ;-)
 #endif
 	// berechne wie viele Thread Blocks wir brauchen
 	dim3 grid((threads + threadsperblock-1)/threadsperblock);
 	dim3 block(threadsperblock);
-	// Größe des dynamischen Shared Memory Bereichs
+	// Größe des dynamischen Shared Memory Bereichs
 #if USE_SHARED
 	size_t shared_size = 4 * 256 * sizeof(uint32_t);
 #else
--- a/cuda_groestlcoin.cu
+++ b/cuda_groestlcoin.cu
@ -15,13 +15,13 @@ extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int t
 // diese Struktur wird in der Init Funktion angefordert
 static cudaDeviceProp props[8];
-// globaler Speicher für alle HeftyHashes aller Threads
+// globaler Speicher für alle HeftyHashes aller Threads
 __constant__ uint32_t pTarget[8]; // Single GPU
 extern uint32_t *d_resultNonce[8];
 __constant__ uint32_t groestlcoin_gpu_msg[32];
-// 64 Register Variante für Compute 3.0
+// 64 Register Variante für Compute 3.0
 #include "groestl_functions_quad.cu"
 #include "bitslice_transformations_quad.cu"
@ -104,7 +104,7 @@ __host__ void groestlcoin_cpu_init(int thr_id, int threads)
    cudaGetDeviceProperties(&props[thr_id], device_map[thr_id]);
-    // Speicher für Gewinner-Nonce belegen
+    // Speicher für Gewinner-Nonce belegen
    cudaMalloc(&d_resultNonce[thr_id], sizeof(uint32_t)); 
 }
@ -121,8 +121,8 @@ __host__ void groestlcoin_cpu_setBlock(int thr_id, void *data, void *pTargetIn)
    msgBlock[20] = 0x80;
    msgBlock[31] = 0x01000000;
-    // groestl512 braucht hierfür keinen CPU-Code (die einzige Runde wird
+    // groestl512 braucht hierfür keinen CPU-Code (die einzige Runde wird
-    // auf der GPU ausgeführt)
+    // auf der GPU ausgeführt)
    // Blockheader setzen (korrekte Nonce und Hefty Hash fehlen da drin noch)
    cudaMemcpyToSymbol( groestlcoin_gpu_msg,
@ -147,7 +147,7 @@ __host__ void groestlcoin_cpu_hash(int thr_id, int threads, uint32_t startNounce
    dim3 grid(factor*((threads + threadsperblock-1)/threadsperblock));
    dim3 block(threadsperblock);
-    // Größe des dynamischen Shared Memory Bereichs
+    // Größe des dynamischen Shared Memory Bereichs
    size_t shared_size = 0;
    cudaMemset(d_resultNonce[thr_id], 0xFF, sizeof(uint32_t));
--- a/cuda_myriadgroestl.cu
+++ b/cuda_myriadgroestl.cu
@ -14,7 +14,7 @@ extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int t
 // diese Struktur wird in der Init Funktion angefordert
 static cudaDeviceProp props[8];
-// globaler Speicher für alle HeftyHashes aller Threads
+// globaler Speicher für alle HeftyHashes aller Threads
 __constant__ uint32_t pTarget[8]; // Single GPU
 uint32_t *d_outputHashes[8];
 extern uint32_t *d_resultNonce[8];
@ -49,7 +49,7 @@ uint32_t myr_sha256_cpu_w2Table[] = {
    0x69bc7ac4, 0xbd11375b, 0xe3ba71e5, 0x3b209ff2, 0x18feee17, 0xe25ad9e7, 0x13375046, 0x0515089d,
    0x4f0d0f04, 0x2627484e, 0x310128d2, 0xc668b434, 0x420841cc, 0x62d311b8, 0xe59ba771, 0x85a7a484 };
-// 64 Register Variante für Compute 3.0
+// 64 Register Variante für Compute 3.0
 #include "groestl_functions_quad.cu"
 #include "bitslice_transformations_quad.cu"
@ -326,10 +326,10 @@ __host__ void myriadgroestl_cpu_init(int thr_id, int threads)
    cudaGetDeviceProperties(&props[thr_id], device_map[thr_id]);
-    // Speicher für Gewinner-Nonce belegen
+    // Speicher für Gewinner-Nonce belegen
    cudaMalloc(&d_resultNonce[thr_id], sizeof(uint32_t)); 
-    // Speicher für temporäreHashes
+    // Speicher für temporäreHashes
    cudaMalloc(&d_outputHashes[thr_id], 16*sizeof(uint32_t)*threads); 
 }
@ -346,8 +346,8 @@ __host__ void myriadgroestl_cpu_setBlock(int thr_id, void *data, void *pTargetIn
    msgBlock[20] = 0x80;
    msgBlock[31] = 0x01000000;
-    // groestl512 braucht hierfür keinen CPU-Code (die einzige Runde wird
+    // groestl512 braucht hierfür keinen CPU-Code (die einzige Runde wird
-    // auf der GPU ausgeführt)
+    // auf der GPU ausgeführt)
    // Blockheader setzen (korrekte Nonce und Hefty Hash fehlen da drin noch)
    cudaMemcpyToSymbol( myriadgroestl_gpu_msg,
@ -368,7 +368,7 @@ __host__ void myriadgroestl_cpu_hash(int thr_id, int threads, uint32_t startNoun
    // mit den Quad Funktionen brauchen wir jetzt 4 threads pro Hash, daher Faktor 4 bei der Blockzahl
    const int factor=4;
-    // Größe des dynamischen Shared Memory Bereichs
+    // Größe des dynamischen Shared Memory Bereichs
    size_t shared_size = 0;
    cudaMemset(d_resultNonce[thr_id], 0xFF, sizeof(uint32_t));
--- a/cuda_nist5.cu
+++ b/cuda_nist5.cu
@ -12,7 +12,7 @@ extern "C"
 // aus cpu-miner.c
 extern int device_map[8];
-// Speicher für Input/Output der verketteten Hashfunktionen
+// Speicher für Input/Output der verketteten Hashfunktionen
 static uint32_t *d_hash[8];
 extern void quark_blake512_cpu_init(int thr_id, int threads);
--- a/x11/cuda_x11_cubehash512.cu
+++ b/x11/cuda_x11_cubehash512.cu
@ -259,10 +259,10 @@ __global__ void x11_cubehash512_gpu_hash_64(int threads, uint32_t startNounce, u
        uint32_t x[2][2][2][2][2];
        Init(x);
-        // erste Hälfte des Hashes (32 bytes)
+        // erste Hälfte des Hashes (32 bytes)
        Update32(x, (const BitSequence*)Hash);
-        // zweite Hälfte des Hashes (32 bytes)
+        // zweite Hälfte des Hashes (32 bytes)
        Update32(x, (const BitSequence*)(Hash+8));
        // Padding Block
@ -290,7 +290,7 @@ __host__ void x11_cubehash512_cpu_hash_64(int thr_id, int threads, uint32_t star
    dim3 grid((threads + threadsperblock-1)/threadsperblock);
    dim3 block(threadsperblock);
-    // Größe des dynamischen Shared Memory Bereichs
+    // Größe des dynamischen Shared Memory Bereichs
    size_t shared_size = 0;
    x11_cubehash512_gpu_hash_64<<<grid, block, shared_size>>>(threads, startNounce, (uint64_t*)d_hash, d_nonceVector);
--- a/x11/cuda_x11_echo.cu
+++ b/x11/cuda_x11_echo.cu
@ -33,7 +33,7 @@ __device__ __forceinline__ void cuda_echo_round(
 	uint32_t &k0, uint32_t &k1, uint32_t &k2, uint32_t &k3,
 	uint32_t *W, int round)
 {
-	// W hat 16*4 als Abmaße
+	// W hat 16*4 als Abmaße
 	// Big Sub Words
 #pragma unroll 16
@ -76,10 +76,10 @@ __device__ __forceinline__ void cuda_echo_round(
 	// Mix Columns
 #pragma unroll 4
-	for(int i=0;i<4;i++) // Schleife über je 2*uint32_t
+	for(int i=0;i<4;i++) // Schleife über je 2*uint32_t
 	{
 #pragma unroll 4
-		for(int j=0;j<4;j++) // Schleife über die elemnte
+		for(int j=0;j<4;j++) // Schleife über die elemnte
 		{
 			int idx = j<<2; // j*4
@ -138,7 +138,7 @@ __global__ void x11_echo512_gpu_hash_64(int threads, uint32_t startNounce, uint6
 			W[i + 3] = 0;
 		}
-		// kopiere 32-byte großen hash
+		// kopiere 32-byte großen hash
 #pragma unroll 16
 		for(int i=0;i<16;i++)
 			W[i+32] = Hash[i];
@ -198,7 +198,7 @@ __host__ void x11_echo512_cpu_hash_64(int thr_id, int threads, uint32_t startNou
    dim3 grid((threads + threadsperblock-1)/threadsperblock);
    dim3 block(threadsperblock);
-    // Größe des dynamischen Shared Memory Bereichs
+    // Größe des dynamischen Shared Memory Bereichs
    size_t shared_size = 0;
 //    fprintf(stderr, "threads=%d, %d blocks, %d threads per block, %d bytes shared\n", threads, grid.x, block.x, shared_size);
--- a/x11/cuda_x11_luffa512.cu
+++ b/x11/cuda_x11_luffa512.cu
@ -368,7 +368,7 @@ __host__ void x11_luffa512_cpu_hash_64(int thr_id, int threads, uint32_t startNo
    dim3 grid((threads + threadsperblock-1)/threadsperblock);
    dim3 block(threadsperblock);
-    // Größe des dynamischen Shared Memory Bereichs
+    // Größe des dynamischen Shared Memory Bereichs
    size_t shared_size = 0;
    x11_luffa512_gpu_hash_64<<<grid, block, shared_size>>>(threads, startNounce, (uint64_t*)d_hash, d_nonceVector);
--- a/x11/cuda_x11_shavite512.cu
+++ b/x11/cuda_x11_shavite512.cu
@ -1319,7 +1319,7 @@ __global__ void x11_shavite512_gpu_hash_64(int threads, uint32_t startNounce, ui
 		// nachricht laden
 		uint32_t msg[32];
-		// fülle die Nachricht mit 64-byte (vorheriger Hash)
+		// fülle die Nachricht mit 64-byte (vorheriger Hash)
 		#pragma unroll 16
 		for(int i=0;i<16;i++)
 			msg[i] = Hash[i];
--- a/x11/cuda_x11_simd512.cu
+++ b/x11/cuda_x11_simd512.cu
@ -1,6 +1,6 @@
 // Parallelisierung:
 //
-// FFT_8  wird 2 mal 8-fach parallel ausgeführt (in FFT_64)
+// FFT_8  wird 2 mal 8-fach parallel ausgeführt (in FFT_64)
 //        und 1 mal 16-fach parallel (in FFT_128_full)
 //
 // STEP8_IF und STEP8_MAJ beinhalten je zwei 8-fach parallele Operationen
@ -573,7 +573,7 @@ x11_simd512_gpu_expand_64(int threads, uint32_t startNounce, uint64_t *g_hash, u
        for (int i=0; i<2; i++)
            Hash[i] = inpHash[8*i+(threadIdx.x&7)];
-        // Puffer für expandierte Nachricht
+        // Puffer für expandierte Nachricht
        uint4 *temp4 = &g_temp4[64 * hashPosition];
        Expansion(Hash, temp4);
@ -630,7 +630,7 @@ __host__ void x11_simd512_cpu_init(int thr_id, int threads)
    cudaMalloc( &d_state[thr_id], 32*sizeof(int)*threads );
    cudaMalloc( &d_temp4[thr_id], 64*sizeof(uint4)*threads );
-    // Textur für 128 Bit Zugriffe
+    // Textur für 128 Bit Zugriffe
    cudaChannelFormatDesc channelDesc128 = cudaCreateChannelDesc<uint4>();
    texRef1D_128.normalized = 0;
    texRef1D_128.filterMode = cudaFilterModePoint;
@ -651,7 +651,7 @@ __host__ void x11_simd512_cpu_hash_64(int thr_id, int threads, uint32_t startNou
 {
    const int threadsperblock = TPB;
-    // Größe des dynamischen Shared Memory Bereichs
+    // Größe des dynamischen Shared Memory Bereichs
    size_t shared_size = 0;
    // berechne wie viele Thread Blocks wir brauchen
@ -662,7 +662,7 @@ __host__ void x11_simd512_cpu_hash_64(int thr_id, int threads, uint32_t startNou
    dim3 grid((threads + threadsperblock-1)/threadsperblock);
-    // künstlich die Occupancy limitieren, um das totale Erschöpfen des Texture Cache zu vermeiden
+    // künstlich die Occupancy limitieren, um das totale Erschöpfen des Texture Cache zu vermeiden
    x11_simd512_gpu_compress1_64<<<grid, block, shared_size>>>(threads, startNounce, (uint64_t*)d_hash, d_nonceVector, d_temp4[thr_id], d_state[thr_id]);
    x11_simd512_gpu_compress2_64<<<grid, block, shared_size>>>(threads, startNounce, (uint64_t*)d_hash, d_nonceVector, d_temp4[thr_id], d_state[thr_id]);
--- a/x11/x11.cu
+++ b/x11/x11.cu
@ -24,7 +24,7 @@ extern "C"
 // aus cpu-miner.c
 extern int device_map[8];
-// Speicher für Input/Output der verketteten Hashfunktionen
+// Speicher für Input/Output der verketteten Hashfunktionen
 static uint32_t *d_hash[8];
 extern void quark_blake512_cpu_init(int thr_id, int threads);
--- a/x13/cuda_x13_fugue512.cu
+++ b/x13/cuda_x13_fugue512.cu
@ -686,7 +686,7 @@ __host__ void x13_fugue512_cpu_hash_64(int thr_id, int threads, uint32_t startNo
 	dim3 grid((threads + threadsperblock-1)/threadsperblock);
 	dim3 block(threadsperblock);
-	// Größe des dynamischen Shared Memory Bereichs
+	// Größe des dynamischen Shared Memory Bereichs
 	size_t shared_size = 4 * 256 * sizeof(uint32_t);
 	// fprintf(stderr, "threads=%d, %d blocks, %d threads per block, %d bytes shared\n", threads, grid.x, block.x, shared_size);
--- a/x13/cuda_x13_hamsi512.cu
+++ b/x13/cuda_x13_hamsi512.cu
@ -733,7 +733,7 @@ __host__ void x13_hamsi512_cpu_hash_64(int thr_id, int threads, uint32_t startNo
 	dim3 grid((threads + threadsperblock-1)/threadsperblock);
 	dim3 block(threadsperblock);
-	// Größe des dynamischen Shared Memory Bereichs
+	// Größe des dynamischen Shared Memory Bereichs
 	size_t shared_size = 0;
 	// fprintf(stderr, "threads=%d, %d blocks, %d threads per block, %d bytes shared\n", threads, grid.x, block.x, shared_size);
--- a/x13/x13.cu
+++ b/x13/x13.cu
@ -27,7 +27,7 @@ extern "C"
 // aus cpu-miner.c
 extern int device_map[8];
-// Speicher für Input/Output der verketteten Hashfunktionen
+// Speicher für Input/Output der verketteten Hashfunktionen
 static uint32_t *d_hash[8];
 extern void quark_blake512_cpu_init(int thr_id, int threads);
--- a/x15/whirlcoin.cu
+++ b/x15/whirlcoin.cu
@ -11,7 +11,7 @@ extern "C"
 extern int device_map[8];
 extern bool opt_benchmark;
-// Speicher für Input/Output der verketteten Hashfunktionen
+// Speicher für Input/Output der verketteten Hashfunktionen
 static uint32_t *d_hash[8];
 extern void x15_whirlpool_cpu_init(int thr_id, int threads, int mode);