/* * tiger-192 djm34 * */ /* * tiger-192 kernel implementation. * * ==========================(LICENSE BEGIN)============================ * * Copyright (c) 2014 djm34 * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * ===========================(LICENSE END)============================= * * @author phm */ //#include #include #include "cuda_helper.h" #define HIWORD _HIWORD #define LOWORD _LOWORD #if 0 #define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); } inline void gpuAssert(cudaError_t code, char *file, int line, bool abort=true) { if (code != cudaSuccess) { fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line); if (abort) exit(code); } } #endif extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id); __device__ __forceinline__ void bigmul(uint64_t *w, uint64_t* am, uint64_t* bm, int sizea, int sizeb, int thread) { int threads = 256*256*8*2; #pragma unroll for (int i=0;i>>(threads,len1,len2,Hash1,Hash2,finalHash); // MyStreamSynchronize(NULL, order, thr_id); // gpuErrchk(cudaDeviceSynchronize()); // gpuErrchk(cudaThreadSynchronize()); } __host__ void m7_bigmul_unroll1_cpu(int thr_id, int threads,uint64_t* Hash1, uint64_t* Hash2,uint64_t *finalHash,int order) { const int threadsperblock = 256; dim3 grid((threads + threadsperblock-1)/threadsperblock); dim3 block(threadsperblock); size_t shared_size =0; m7_bigmul_unroll1_gpu<<>>(threads,Hash1,Hash2,finalHash); } __host__ void m7_bigmul_unroll2_cpu(int thr_id, int threads,uint64_t* Hash1, uint64_t* Hash2,uint64_t *finalHash,int order) { const int threadsperblock = 256; dim3 grid((threads + threadsperblock-1)/threadsperblock); dim3 block(threadsperblock); size_t shared_size =0; m7_bigmul_unroll2_gpu<<>>(threads,Hash1,Hash2,finalHash); } __host__ void m7_bigmul_init(int thr_id, int threads) { // why I am here ? }