cubeluffa: Fix indent and add some static prefixes

use git "show -w <commithash>" to see changes Duplicated functions in merged Cube+Luffa could be cross linked without
10 years ago · 94c9945fe6
2 changed files with 243 additions and 241 deletions
--- a/x11/cuda_x11_echo.cu
+++ b/x11/cuda_x11_echo.cu
@ -61,8 +61,8 @@ void cuda_echo_round(
				@@ -61,8 +61,8 @@ void cuda_echo_round(

 	k0 = 512 + 8;

-	#pragma unroll
-	for (int idx = 0; idx < 16; idx+= 4)
+	#pragma unroll 4
+	for (int idx = 0; idx < 16; idx += 4)
 	{
 		AES_2ROUND(sharedMemory,
 			h[idx + 0], h[idx + 1], h[idx + 2], h[idx + 3], k0);
@ -144,7 +144,7 @@ void cuda_echo_round(
				@@ -144,7 +144,7 @@ void cuda_echo_round(
 		W[32 + i + 12] = abx ^ bcx ^ cdx ^ ab ^ c;

 		a = P[36 + i ];
-		b = P[36 + i +4 ];
+		b = P[36 + i + 4];
 		c = P[36 + i + 8];
 		d = h[i + 12];

@ -221,7 +221,7 @@ void cuda_echo_round(
				@@ -221,7 +221,7 @@ void cuda_echo_round(
 		#pragma unroll 4
 		for (int i = 0; i < 4; i++) // Schleife über je 2*uint32_t
 		{
-			#pragma unroll 64
+			#pragma unroll 4
 			for (int idx = 0; idx < 64; idx += 16) // Schleife über die elemnte
 			{
 				uint32_t a = W[idx + i];
@ -251,7 +251,7 @@ void cuda_echo_round(
				@@ -251,7 +251,7 @@ void cuda_echo_round(
 	}

 	#pragma unroll
-	for (int i = 0; i<16; i += 4)
+	for (int i = 0; i < 16; i += 4)
 	{
 		W[i] ^= W[32 + i] ^ 512;
 		W[i + 1] ^= W[32 + i + 1];
@ -260,7 +260,7 @@ void cuda_echo_round(
				@@ -260,7 +260,7 @@ void cuda_echo_round(
 	}

 	#pragma unroll
-	for (int i = 0; i<16; i++)
+	for (int i = 0; i < 16; i++)
 		hash[i] ^= W[i];
 }

--- a/x11/cuda_x11_luffa512_Cubehash.cu
+++ b/x11/cuda_x11_luffa512_Cubehash.cu
@ -93,8 +93,8 @@ typedef struct {
				@@ -93,8 +93,8 @@ typedef struct {
 	b0 ^= c1;

 /* initial values of chaining variables */
-__device__ __constant__ uint32_t c_IV[40];
-const uint32_t h_IV[40] = {
+__device__ static __constant__ uint32_t c_IV[40];
+static const uint32_t h_IV[40] = {
 	0x6d251e69,0x44b051e0,0x4eaa6fb4,0xdbf78465,
 	0x6e292011,0x90152df4,0xee058139,0xdef610bb,
 	0xc3b44b95,0xd9d2f256,0x70eee9a0,0xde099fa3,
@ -106,8 +106,8 @@ const uint32_t h_IV[40] = {
				@@ -106,8 +106,8 @@ const uint32_t h_IV[40] = {
 	0x6c68e9be,0x5ec41e22,0xc825b7c7,0xaffb4363,
 	0xf5df3999,0x0fc688f1,0xb07224cc,0x03e86cea};

-__device__ __constant__ uint32_t c_CNS[80];
-const uint32_t h_CNS[80] = {
+__device__ static __constant__ uint32_t c_CNS[80];
+static const uint32_t h_CNS[80] = {
 	0x303994a6,0xe0337818,0xc0e65299,0x441ba90d,
 	0x6cc33a12,0x7f34d442,0xdc56983e,0x9389217f,
 	0x1e00108f,0xe5a8bce6,0x7800423d,0x5274baf4,
@ -132,7 +132,7 @@ const uint32_t h_CNS[80] = {
				@@ -132,7 +132,7 @@ const uint32_t h_CNS[80] = {

 /***************************************************/
 __device__ __forceinline__
-void rnd512(hashState *state)
+static void rnd512(hashState *state)
 {
 	int i,j;
 	uint32_t t[40];
@ -279,7 +279,7 @@ void rnd512(hashState *state)
				@@ -279,7 +279,7 @@ void rnd512(hashState *state)


 __device__ __forceinline__
-void Update512(hashState *state, const BitSequence *data)
+static void Update512(hashState *state, const BitSequence *data)
 {
 #pragma unroll 8
 	for(int i=0;i<8;i++) state->buffer[i] = cuda_swab32(((uint32_t*)data)[i]);
@ -293,7 +293,7 @@ void Update512(hashState *state, const BitSequence *data)
				@@ -293,7 +293,7 @@ void Update512(hashState *state, const BitSequence *data)

 /***************************************************/
 __device__ __forceinline__
-void finalization512(hashState *state, uint32_t *b)
+static void finalization512(hashState *state, uint32_t *b)
 {
 	int i,j;

@ -332,7 +332,7 @@ void finalization512(hashState *state, uint32_t *b)
				@@ -332,7 +332,7 @@ void finalization512(hashState *state, uint32_t *b)
 	}
 }

-typedef unsigned char BitSequence;
+//typedef unsigned char BitSequence;

 #define CUBEHASH_ROUNDS 16 /* this is r for CubeHashr/b */
 #define CUBEHASH_BLOCKBYTES 32 /* this is b for CubeHashr/b */
@ -480,7 +480,8 @@ static __device__ __forceinline__ void rrounds(uint32_t x[2][2][2][2][2])
				@@ -480,7 +480,8 @@ static __device__ __forceinline__ void rrounds(uint32_t x[2][2][2][2][2])
 }


-static __device__ __forceinline__ void block_tox(uint32_t *in, uint32_t x[2][2][2][2][2])
+__device__ __forceinline__
+static void block_tox(uint32_t *in, uint32_t x[2][2][2][2][2])
 {
 	int k;
 	int l;
@ -496,7 +497,8 @@ static __device__ __forceinline__ void block_tox(uint32_t *in, uint32_t x[2][2][
				@@ -496,7 +497,8 @@ static __device__ __forceinline__ void block_tox(uint32_t *in, uint32_t x[2][2][
 				x[0][0][k][l][m] ^= *in++;
 }

-static __device__ __forceinline__ void hash_fromx(uint32_t *out, uint32_t x[2][2][2][2][2])
+__device__ __forceinline__
+static void hash_fromx(uint32_t *out, uint32_t x[2][2][2][2][2])
 {
 	int j;
 	int k;
@ -556,7 +558,8 @@ void __device__ __forceinline__ Init(uint32_t x[2][2][2][2][2])
				@@ -556,7 +558,8 @@ void __device__ __forceinline__ Init(uint32_t x[2][2][2][2][2])
 #endif
 }

-void __device__ __forceinline__ Update32(uint32_t x[2][2][2][2][2], const BitSequence *data)
+__device__ __forceinline__
+static void Update32(uint32_t x[2][2][2][2][2], const BitSequence *data)
 {
 	/* "xor the block into the first b bytes of the state" */
 	/* "and then transform the state invertibly through r identical rounds" */
@ -564,7 +567,8 @@ void __device__ __forceinline__ Update32(uint32_t x[2][2][2][2][2], const BitSeq
				@@ -564,7 +567,8 @@ void __device__ __forceinline__ Update32(uint32_t x[2][2][2][2][2], const BitSeq
 	rrounds(x);
 }

-void __device__ __forceinline__ Final(uint32_t x[2][2][2][2][2], BitSequence *hashval)
+__device__ __forceinline__
+static void Final(uint32_t x[2][2][2][2][2], BitSequence *hashval)
 {
 	int i;

@ -581,7 +585,7 @@ void __device__ __forceinline__ Final(uint32_t x[2][2][2][2][2], BitSequence *ha
				@@ -581,7 +585,7 @@ void __device__ __forceinline__ Final(uint32_t x[2][2][2][2][2], BitSequence *ha


 /***************************************************/
-// Die Hash-Funktion
+// Hash Function
 __global__
 void x11_luffaCubehash512_gpu_hash_64(int threads, uint32_t startNounce, uint64_t *g_hash, uint32_t *g_nonceVector)
 {
@ -619,25 +623,23 @@ void x11_luffaCubehash512_gpu_hash_64(int threads, uint32_t startNounce, uint64_
				@@ -619,25 +623,23 @@ void x11_luffaCubehash512_gpu_hash_64(int threads, uint32_t startNounce, uint64_
 }


-// Setup-Funktionen
-__host__ void x11_luffaCubehash512_cpu_init(int thr_id, int threads)
+// Setup
+__host__
+void x11_luffaCubehash512_cpu_init(int thr_id, int threads)
 {
 	cudaMemcpyToSymbol(c_IV, h_IV, sizeof(h_IV), 0, cudaMemcpyHostToDevice);
 	cudaMemcpyToSymbol(c_CNS, h_CNS, sizeof(h_CNS), 0, cudaMemcpyHostToDevice);
 }

-__host__ void x11_luffaCubehash512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order)
+__host__
+void x11_luffaCubehash512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order)
 {
 	const int threadsperblock = 256;

-    // berechne wie viele Thread Blocks wir brauchen
 	dim3 grid((threads + threadsperblock-1)/threadsperblock);
 	dim3 block(threadsperblock);

-    // Größe des dynamischen Shared Memory Bereichs
-    size_t shared_size = 0;
-
-	x11_luffaCubehash512_gpu_hash_64 << <grid, block, shared_size >> >(threads, startNounce, (uint64_t*)d_hash, d_nonceVector);
+	x11_luffaCubehash512_gpu_hash_64 <<< grid, block >>> (threads, startNounce, (uint64_t*)d_hash, d_nonceVector);
 	MyStreamSynchronize(NULL, order, thr_id);
 }