mirror of
https://github.com/GOSTSec/sgminer
synced 2025-02-07 04:24:22 +00:00
convert tabs to spaces in most commonly modified files
This commit is contained in:
parent
2ea8f5d018
commit
25d7d426fd
2078
driver-opencl.c
2078
driver-opencl.c
File diff suppressed because it is too large
Load Diff
254
findnonce.c
254
findnonce.c
@ -18,115 +18,115 @@
|
||||
#include "algorithm/scrypt.h"
|
||||
|
||||
const uint32_t SHA256_K[64] = {
|
||||
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
|
||||
0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
|
||||
0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
|
||||
0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
|
||||
0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
|
||||
0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
|
||||
0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
|
||||
0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
|
||||
0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
|
||||
0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
|
||||
0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
|
||||
0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
|
||||
0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
|
||||
0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
|
||||
0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
|
||||
0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
|
||||
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
|
||||
0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
|
||||
0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
|
||||
0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
|
||||
0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
|
||||
0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
|
||||
0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
|
||||
0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
|
||||
0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
|
||||
0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
|
||||
0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
|
||||
0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
|
||||
0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
|
||||
0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
|
||||
0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
|
||||
0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
|
||||
};
|
||||
|
||||
#define rotate(x,y) ((x<<y) | (x>>(sizeof(x)*8-y)))
|
||||
#define rotr(x,y) ((x>>y) | (x<<(sizeof(x)*8-y)))
|
||||
|
||||
#define R(a, b, c, d, e, f, g, h, w, k) \
|
||||
h = h + (rotate(e, 26) ^ rotate(e, 21) ^ rotate(e, 7)) + (g ^ (e & (f ^ g))) + k + w; \
|
||||
d = d + h; \
|
||||
h = h + (rotate(a, 30) ^ rotate(a, 19) ^ rotate(a, 10)) + ((a & b) | (c & (a | b)))
|
||||
h = h + (rotate(e, 26) ^ rotate(e, 21) ^ rotate(e, 7)) + (g ^ (e & (f ^ g))) + k + w; \
|
||||
d = d + h; \
|
||||
h = h + (rotate(a, 30) ^ rotate(a, 19) ^ rotate(a, 10)) + ((a & b) | (c & (a | b)))
|
||||
|
||||
void precalc_hash(dev_blk_ctx *blk, uint32_t *state, uint32_t *data)
|
||||
{
|
||||
cl_uint A, B, C, D, E, F, G, H;
|
||||
cl_uint A, B, C, D, E, F, G, H;
|
||||
|
||||
A = state[0];
|
||||
B = state[1];
|
||||
C = state[2];
|
||||
D = state[3];
|
||||
E = state[4];
|
||||
F = state[5];
|
||||
G = state[6];
|
||||
H = state[7];
|
||||
A = state[0];
|
||||
B = state[1];
|
||||
C = state[2];
|
||||
D = state[3];
|
||||
E = state[4];
|
||||
F = state[5];
|
||||
G = state[6];
|
||||
H = state[7];
|
||||
|
||||
R(A, B, C, D, E, F, G, H, data[0], SHA256_K[0]);
|
||||
R(H, A, B, C, D, E, F, G, data[1], SHA256_K[1]);
|
||||
R(G, H, A, B, C, D, E, F, data[2], SHA256_K[2]);
|
||||
R(A, B, C, D, E, F, G, H, data[0], SHA256_K[0]);
|
||||
R(H, A, B, C, D, E, F, G, data[1], SHA256_K[1]);
|
||||
R(G, H, A, B, C, D, E, F, data[2], SHA256_K[2]);
|
||||
|
||||
blk->cty_a = A;
|
||||
blk->cty_b = B;
|
||||
blk->cty_c = C;
|
||||
blk->cty_d = D;
|
||||
blk->cty_a = A;
|
||||
blk->cty_b = B;
|
||||
blk->cty_c = C;
|
||||
blk->cty_d = D;
|
||||
|
||||
blk->D1A = D + 0xb956c25b;
|
||||
blk->D1A = D + 0xb956c25b;
|
||||
|
||||
blk->cty_e = E;
|
||||
blk->cty_f = F;
|
||||
blk->cty_g = G;
|
||||
blk->cty_h = H;
|
||||
blk->cty_e = E;
|
||||
blk->cty_f = F;
|
||||
blk->cty_g = G;
|
||||
blk->cty_h = H;
|
||||
|
||||
blk->ctx_a = state[0];
|
||||
blk->ctx_b = state[1];
|
||||
blk->ctx_c = state[2];
|
||||
blk->ctx_d = state[3];
|
||||
blk->ctx_e = state[4];
|
||||
blk->ctx_f = state[5];
|
||||
blk->ctx_g = state[6];
|
||||
blk->ctx_h = state[7];
|
||||
blk->ctx_a = state[0];
|
||||
blk->ctx_b = state[1];
|
||||
blk->ctx_c = state[2];
|
||||
blk->ctx_d = state[3];
|
||||
blk->ctx_e = state[4];
|
||||
blk->ctx_f = state[5];
|
||||
blk->ctx_g = state[6];
|
||||
blk->ctx_h = state[7];
|
||||
|
||||
blk->merkle = data[0];
|
||||
blk->ntime = data[1];
|
||||
blk->nbits = data[2];
|
||||
blk->merkle = data[0];
|
||||
blk->ntime = data[1];
|
||||
blk->nbits = data[2];
|
||||
|
||||
blk->W16 = blk->fW0 = data[0] + (rotr(data[1], 7) ^ rotr(data[1], 18) ^ (data[1] >> 3));
|
||||
blk->W17 = blk->fW1 = data[1] + (rotr(data[2], 7) ^ rotr(data[2], 18) ^ (data[2] >> 3)) + 0x01100000;
|
||||
blk->PreVal4 = blk->fcty_e = blk->ctx_e + (rotr(B, 6) ^ rotr(B, 11) ^ rotr(B, 25)) + (D ^ (B & (C ^ D))) + 0xe9b5dba5;
|
||||
blk->T1 = blk->fcty_e2 = (rotr(F, 2) ^ rotr(F, 13) ^ rotr(F, 22)) + ((F & G) | (H & (F | G)));
|
||||
blk->PreVal4_2 = blk->PreVal4 + blk->T1;
|
||||
blk->PreVal0 = blk->PreVal4 + blk->ctx_a;
|
||||
blk->PreW31 = 0x00000280 + (rotr(blk->W16, 7) ^ rotr(blk->W16, 18) ^ (blk->W16 >> 3));
|
||||
blk->PreW32 = blk->W16 + (rotr(blk->W17, 7) ^ rotr(blk->W17, 18) ^ (blk->W17 >> 3));
|
||||
blk->PreW18 = data[2] + (rotr(blk->W16, 17) ^ rotr(blk->W16, 19) ^ (blk->W16 >> 10));
|
||||
blk->PreW19 = 0x11002000 + (rotr(blk->W17, 17) ^ rotr(blk->W17, 19) ^ (blk->W17 >> 10));
|
||||
blk->W16 = blk->fW0 = data[0] + (rotr(data[1], 7) ^ rotr(data[1], 18) ^ (data[1] >> 3));
|
||||
blk->W17 = blk->fW1 = data[1] + (rotr(data[2], 7) ^ rotr(data[2], 18) ^ (data[2] >> 3)) + 0x01100000;
|
||||
blk->PreVal4 = blk->fcty_e = blk->ctx_e + (rotr(B, 6) ^ rotr(B, 11) ^ rotr(B, 25)) + (D ^ (B & (C ^ D))) + 0xe9b5dba5;
|
||||
blk->T1 = blk->fcty_e2 = (rotr(F, 2) ^ rotr(F, 13) ^ rotr(F, 22)) + ((F & G) | (H & (F | G)));
|
||||
blk->PreVal4_2 = blk->PreVal4 + blk->T1;
|
||||
blk->PreVal0 = blk->PreVal4 + blk->ctx_a;
|
||||
blk->PreW31 = 0x00000280 + (rotr(blk->W16, 7) ^ rotr(blk->W16, 18) ^ (blk->W16 >> 3));
|
||||
blk->PreW32 = blk->W16 + (rotr(blk->W17, 7) ^ rotr(blk->W17, 18) ^ (blk->W17 >> 3));
|
||||
blk->PreW18 = data[2] + (rotr(blk->W16, 17) ^ rotr(blk->W16, 19) ^ (blk->W16 >> 10));
|
||||
blk->PreW19 = 0x11002000 + (rotr(blk->W17, 17) ^ rotr(blk->W17, 19) ^ (blk->W17 >> 10));
|
||||
|
||||
|
||||
blk->W2 = data[2];
|
||||
blk->W2 = data[2];
|
||||
|
||||
blk->W2A = blk->W2 + (rotr(blk->W16, 19) ^ rotr(blk->W16, 17) ^ (blk->W16 >> 10));
|
||||
blk->W17_2 = 0x11002000 + (rotr(blk->W17, 19) ^ rotr(blk->W17, 17) ^ (blk->W17 >> 10));
|
||||
blk->W2A = blk->W2 + (rotr(blk->W16, 19) ^ rotr(blk->W16, 17) ^ (blk->W16 >> 10));
|
||||
blk->W17_2 = 0x11002000 + (rotr(blk->W17, 19) ^ rotr(blk->W17, 17) ^ (blk->W17 >> 10));
|
||||
|
||||
blk->fW2 = data[2] + (rotr(blk->fW0, 17) ^ rotr(blk->fW0, 19) ^ (blk->fW0 >> 10));
|
||||
blk->fW3 = 0x11002000 + (rotr(blk->fW1, 17) ^ rotr(blk->fW1, 19) ^ (blk->fW1 >> 10));
|
||||
blk->fW15 = 0x00000280 + (rotr(blk->fW0, 7) ^ rotr(blk->fW0, 18) ^ (blk->fW0 >> 3));
|
||||
blk->fW01r = blk->fW0 + (rotr(blk->fW1, 7) ^ rotr(blk->fW1, 18) ^ (blk->fW1 >> 3));
|
||||
blk->fW2 = data[2] + (rotr(blk->fW0, 17) ^ rotr(blk->fW0, 19) ^ (blk->fW0 >> 10));
|
||||
blk->fW3 = 0x11002000 + (rotr(blk->fW1, 17) ^ rotr(blk->fW1, 19) ^ (blk->fW1 >> 10));
|
||||
blk->fW15 = 0x00000280 + (rotr(blk->fW0, 7) ^ rotr(blk->fW0, 18) ^ (blk->fW0 >> 3));
|
||||
blk->fW01r = blk->fW0 + (rotr(blk->fW1, 7) ^ rotr(blk->fW1, 18) ^ (blk->fW1 >> 3));
|
||||
|
||||
|
||||
blk->PreVal4addT1 = blk->PreVal4 + blk->T1;
|
||||
blk->T1substate0 = blk->ctx_a - blk->T1;
|
||||
blk->PreVal4addT1 = blk->PreVal4 + blk->T1;
|
||||
blk->T1substate0 = blk->ctx_a - blk->T1;
|
||||
|
||||
blk->C1addK5 = blk->cty_c + SHA256_K[5];
|
||||
blk->B1addK6 = blk->cty_b + SHA256_K[6];
|
||||
blk->PreVal0addK7 = blk->PreVal0 + SHA256_K[7];
|
||||
blk->W16addK16 = blk->W16 + SHA256_K[16];
|
||||
blk->W17addK17 = blk->W17 + SHA256_K[17];
|
||||
blk->C1addK5 = blk->cty_c + SHA256_K[5];
|
||||
blk->B1addK6 = blk->cty_b + SHA256_K[6];
|
||||
blk->PreVal0addK7 = blk->PreVal0 + SHA256_K[7];
|
||||
blk->W16addK16 = blk->W16 + SHA256_K[16];
|
||||
blk->W17addK17 = blk->W17 + SHA256_K[17];
|
||||
|
||||
blk->zeroA = blk->ctx_a + 0x98c7e2a2;
|
||||
blk->zeroB = blk->ctx_a + 0xfc08884d;
|
||||
blk->oneA = blk->ctx_b + 0x90bb1e3c;
|
||||
blk->twoA = blk->ctx_c + 0x50c6645b;
|
||||
blk->threeA = blk->ctx_d + 0x3ac42e24;
|
||||
blk->fourA = blk->ctx_e + SHA256_K[4];
|
||||
blk->fiveA = blk->ctx_f + SHA256_K[5];
|
||||
blk->sixA = blk->ctx_g + SHA256_K[6];
|
||||
blk->sevenA = blk->ctx_h + SHA256_K[7];
|
||||
blk->zeroA = blk->ctx_a + 0x98c7e2a2;
|
||||
blk->zeroB = blk->ctx_a + 0xfc08884d;
|
||||
blk->oneA = blk->ctx_b + 0x90bb1e3c;
|
||||
blk->twoA = blk->ctx_c + 0x50c6645b;
|
||||
blk->threeA = blk->ctx_d + 0x3ac42e24;
|
||||
blk->fourA = blk->ctx_e + SHA256_K[4];
|
||||
blk->fiveA = blk->ctx_f + SHA256_K[5];
|
||||
blk->sixA = blk->ctx_g + SHA256_K[6];
|
||||
blk->sevenA = blk->ctx_h + SHA256_K[7];
|
||||
}
|
||||
|
||||
#if 0 // not used any more
|
||||
@ -170,67 +170,67 @@ void precalc_hash(dev_blk_ctx *blk, uint32_t *state, uint32_t *data)
|
||||
#endif
|
||||
|
||||
struct pc_data {
|
||||
struct thr_info *thr;
|
||||
struct work *work;
|
||||
uint32_t res[MAXBUFFERS];
|
||||
pthread_t pth;
|
||||
int found;
|
||||
struct thr_info *thr;
|
||||
struct work *work;
|
||||
uint32_t res[MAXBUFFERS];
|
||||
pthread_t pth;
|
||||
int found;
|
||||
};
|
||||
|
||||
static void *postcalc_hash(void *userdata)
|
||||
{
|
||||
struct pc_data *pcd = (struct pc_data *)userdata;
|
||||
struct thr_info *thr = pcd->thr;
|
||||
unsigned int entry = 0;
|
||||
struct pc_data *pcd = (struct pc_data *)userdata;
|
||||
struct thr_info *thr = pcd->thr;
|
||||
unsigned int entry = 0;
|
||||
|
||||
int found = thr->cgpu->algorithm.found_idx;
|
||||
int found = thr->cgpu->algorithm.found_idx;
|
||||
|
||||
pthread_detach(pthread_self());
|
||||
pthread_detach(pthread_self());
|
||||
|
||||
/* To prevent corrupt values in FOUND from trying to read beyond the
|
||||
* end of the res[] array */
|
||||
if (unlikely(pcd->res[found] & ~found)) {
|
||||
applog(LOG_WARNING, "%s%d: invalid nonce count - HW error",
|
||||
thr->cgpu->drv->name, thr->cgpu->device_id);
|
||||
hw_errors++;
|
||||
thr->cgpu->hw_errors++;
|
||||
pcd->res[found] &= found;
|
||||
}
|
||||
/* To prevent corrupt values in FOUND from trying to read beyond the
|
||||
* end of the res[] array */
|
||||
if (unlikely(pcd->res[found] & ~found)) {
|
||||
applog(LOG_WARNING, "%s%d: invalid nonce count - HW error",
|
||||
thr->cgpu->drv->name, thr->cgpu->device_id);
|
||||
hw_errors++;
|
||||
thr->cgpu->hw_errors++;
|
||||
pcd->res[found] &= found;
|
||||
}
|
||||
|
||||
for (entry = 0; entry < pcd->res[found]; entry++) {
|
||||
uint32_t nonce = pcd->res[entry];
|
||||
if (found == 0x0F)
|
||||
for (entry = 0; entry < pcd->res[found]; entry++) {
|
||||
uint32_t nonce = pcd->res[entry];
|
||||
if (found == 0x0F)
|
||||
nonce = swab32(nonce);
|
||||
|
||||
applog(LOG_DEBUG, "OCL NONCE %u found in slot %d", nonce, entry);
|
||||
submit_nonce(thr, pcd->work, nonce);
|
||||
}
|
||||
applog(LOG_DEBUG, "OCL NONCE %u found in slot %d", nonce, entry);
|
||||
submit_nonce(thr, pcd->work, nonce);
|
||||
}
|
||||
|
||||
discard_work(pcd->work);
|
||||
free(pcd);
|
||||
discard_work(pcd->work);
|
||||
free(pcd);
|
||||
|
||||
return NULL;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void postcalc_hash_async(struct thr_info *thr, struct work *work, uint32_t *res)
|
||||
{
|
||||
struct pc_data *pcd = (struct pc_data *)malloc(sizeof(struct pc_data));
|
||||
int buffersize;
|
||||
struct pc_data *pcd = (struct pc_data *)malloc(sizeof(struct pc_data));
|
||||
int buffersize;
|
||||
|
||||
if (unlikely(!pcd)) {
|
||||
applog(LOG_ERR, "Failed to malloc pc_data in postcalc_hash_async");
|
||||
return;
|
||||
}
|
||||
if (unlikely(!pcd)) {
|
||||
applog(LOG_ERR, "Failed to malloc pc_data in postcalc_hash_async");
|
||||
return;
|
||||
}
|
||||
|
||||
pcd->thr = thr;
|
||||
pcd->work = copy_work(work);
|
||||
buffersize = BUFFERSIZE;
|
||||
pcd->thr = thr;
|
||||
pcd->work = copy_work(work);
|
||||
buffersize = BUFFERSIZE;
|
||||
|
||||
memcpy(&pcd->res, res, buffersize);
|
||||
memcpy(&pcd->res, res, buffersize);
|
||||
|
||||
if (pthread_create(&pcd->pth, NULL, postcalc_hash, (void *)pcd)) {
|
||||
applog(LOG_ERR, "Failed to create postcalc_hash thread");
|
||||
discard_work(pcd->work);
|
||||
free(pcd);
|
||||
}
|
||||
if (pthread_create(&pcd->pth, NULL, postcalc_hash, (void *)pcd)) {
|
||||
applog(LOG_ERR, "Failed to create postcalc_hash thread");
|
||||
discard_work(pcd->work);
|
||||
free(pcd);
|
||||
}
|
||||
}
|
||||
|
554
ocl.c
554
ocl.c
@ -17,11 +17,11 @@
|
||||
#include <sys/types.h>
|
||||
|
||||
#ifdef WIN32
|
||||
#include <winsock2.h>
|
||||
#include <winsock2.h>
|
||||
#else
|
||||
#include <sys/socket.h>
|
||||
#include <netinet/in.h>
|
||||
#include <netdb.h>
|
||||
#include <sys/socket.h>
|
||||
#include <netinet/in.h>
|
||||
#include <netdb.h>
|
||||
#endif
|
||||
|
||||
#include <time.h>
|
||||
@ -44,114 +44,114 @@
|
||||
int opt_platform_id = -1;
|
||||
|
||||
bool get_opencl_platform(int preferred_platform_id, cl_platform_id *platform) {
|
||||
cl_int status;
|
||||
cl_uint numPlatforms;
|
||||
cl_platform_id *platforms = NULL;
|
||||
unsigned int i;
|
||||
bool ret = false;
|
||||
cl_int status;
|
||||
cl_uint numPlatforms;
|
||||
cl_platform_id *platforms = NULL;
|
||||
unsigned int i;
|
||||
bool ret = false;
|
||||
|
||||
status = clGetPlatformIDs(0, NULL, &numPlatforms);
|
||||
/* If this fails, assume no GPUs. */
|
||||
if (status != CL_SUCCESS) {
|
||||
applog(LOG_ERR, "Error %d: clGetPlatformsIDs failed (no OpenCL SDK installed?)", status);
|
||||
goto out;
|
||||
}
|
||||
status = clGetPlatformIDs(0, NULL, &numPlatforms);
|
||||
/* If this fails, assume no GPUs. */
|
||||
if (status != CL_SUCCESS) {
|
||||
applog(LOG_ERR, "Error %d: clGetPlatformsIDs failed (no OpenCL SDK installed?)", status);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (numPlatforms == 0) {
|
||||
applog(LOG_ERR, "clGetPlatformsIDs returned no platforms (no OpenCL SDK installed?)");
|
||||
goto out;
|
||||
}
|
||||
if (numPlatforms == 0) {
|
||||
applog(LOG_ERR, "clGetPlatformsIDs returned no platforms (no OpenCL SDK installed?)");
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (preferred_platform_id >= (int)numPlatforms) {
|
||||
applog(LOG_ERR, "Specified platform that does not exist");
|
||||
goto out;
|
||||
}
|
||||
if (preferred_platform_id >= (int)numPlatforms) {
|
||||
applog(LOG_ERR, "Specified platform that does not exist");
|
||||
goto out;
|
||||
}
|
||||
|
||||
platforms = (cl_platform_id *)malloc(numPlatforms*sizeof(cl_platform_id));
|
||||
status = clGetPlatformIDs(numPlatforms, platforms, NULL);
|
||||
if (status != CL_SUCCESS) {
|
||||
applog(LOG_ERR, "Error %d: Getting Platform Ids. (clGetPlatformsIDs)", status);
|
||||
goto out;
|
||||
}
|
||||
platforms = (cl_platform_id *)malloc(numPlatforms*sizeof(cl_platform_id));
|
||||
status = clGetPlatformIDs(numPlatforms, platforms, NULL);
|
||||
if (status != CL_SUCCESS) {
|
||||
applog(LOG_ERR, "Error %d: Getting Platform Ids. (clGetPlatformsIDs)", status);
|
||||
goto out;
|
||||
}
|
||||
|
||||
for (i = 0; i < numPlatforms; i++) {
|
||||
if (preferred_platform_id >= 0 && (int)i != preferred_platform_id)
|
||||
continue;
|
||||
for (i = 0; i < numPlatforms; i++) {
|
||||
if (preferred_platform_id >= 0 && (int)i != preferred_platform_id)
|
||||
continue;
|
||||
|
||||
*platform = platforms[i];
|
||||
ret = true;
|
||||
break;
|
||||
}
|
||||
*platform = platforms[i];
|
||||
ret = true;
|
||||
break;
|
||||
}
|
||||
out:
|
||||
if (platforms) free(platforms);
|
||||
return ret;
|
||||
if (platforms) free(platforms);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
int clDevicesNum(void) {
|
||||
cl_int status;
|
||||
char pbuff[256];
|
||||
cl_uint numDevices;
|
||||
cl_platform_id platform = NULL;
|
||||
int ret = -1;
|
||||
cl_int status;
|
||||
char pbuff[256];
|
||||
cl_uint numDevices;
|
||||
cl_platform_id platform = NULL;
|
||||
int ret = -1;
|
||||
|
||||
if (!get_opencl_platform(opt_platform_id, &platform)) {
|
||||
goto out;
|
||||
}
|
||||
if (!get_opencl_platform(opt_platform_id, &platform)) {
|
||||
goto out;
|
||||
}
|
||||
|
||||
status = clGetPlatformInfo(platform, CL_PLATFORM_VENDOR, sizeof(pbuff), pbuff, NULL);
|
||||
if (status != CL_SUCCESS) {
|
||||
applog(LOG_ERR, "Error %d: Getting Platform Info. (clGetPlatformInfo)", status);
|
||||
goto out;
|
||||
}
|
||||
status = clGetPlatformInfo(platform, CL_PLATFORM_VENDOR, sizeof(pbuff), pbuff, NULL);
|
||||
if (status != CL_SUCCESS) {
|
||||
applog(LOG_ERR, "Error %d: Getting Platform Info. (clGetPlatformInfo)", status);
|
||||
goto out;
|
||||
}
|
||||
|
||||
applog(LOG_INFO, "CL Platform vendor: %s", pbuff);
|
||||
status = clGetPlatformInfo(platform, CL_PLATFORM_NAME, sizeof(pbuff), pbuff, NULL);
|
||||
if (status == CL_SUCCESS)
|
||||
applog(LOG_INFO, "CL Platform name: %s", pbuff);
|
||||
status = clGetPlatformInfo(platform, CL_PLATFORM_VERSION, sizeof(pbuff), pbuff, NULL);
|
||||
if (status == CL_SUCCESS)
|
||||
applog(LOG_INFO, "CL Platform version: %s", pbuff);
|
||||
status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, NULL, &numDevices);
|
||||
if (status != CL_SUCCESS) {
|
||||
applog(LOG_INFO, "Error %d: Getting Device IDs (num)", status);
|
||||
goto out;
|
||||
}
|
||||
applog(LOG_INFO, "Platform devices: %d", numDevices);
|
||||
if (numDevices) {
|
||||
unsigned int j;
|
||||
cl_device_id *devices = (cl_device_id *)malloc(numDevices*sizeof(cl_device_id));
|
||||
applog(LOG_INFO, "CL Platform vendor: %s", pbuff);
|
||||
status = clGetPlatformInfo(platform, CL_PLATFORM_NAME, sizeof(pbuff), pbuff, NULL);
|
||||
if (status == CL_SUCCESS)
|
||||
applog(LOG_INFO, "CL Platform name: %s", pbuff);
|
||||
status = clGetPlatformInfo(platform, CL_PLATFORM_VERSION, sizeof(pbuff), pbuff, NULL);
|
||||
if (status == CL_SUCCESS)
|
||||
applog(LOG_INFO, "CL Platform version: %s", pbuff);
|
||||
status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, NULL, &numDevices);
|
||||
if (status != CL_SUCCESS) {
|
||||
applog(LOG_INFO, "Error %d: Getting Device IDs (num)", status);
|
||||
goto out;
|
||||
}
|
||||
applog(LOG_INFO, "Platform devices: %d", numDevices);
|
||||
if (numDevices) {
|
||||
unsigned int j;
|
||||
cl_device_id *devices = (cl_device_id *)malloc(numDevices*sizeof(cl_device_id));
|
||||
|
||||
clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, numDevices, devices, NULL);
|
||||
for (j = 0; j < numDevices; j++) {
|
||||
clGetDeviceInfo(devices[j], CL_DEVICE_NAME, sizeof(pbuff), pbuff, NULL);
|
||||
applog(LOG_INFO, "\t%i\t%s", j, pbuff);
|
||||
}
|
||||
free(devices);
|
||||
}
|
||||
clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, numDevices, devices, NULL);
|
||||
for (j = 0; j < numDevices; j++) {
|
||||
clGetDeviceInfo(devices[j], CL_DEVICE_NAME, sizeof(pbuff), pbuff, NULL);
|
||||
applog(LOG_INFO, "\t%i\t%s", j, pbuff);
|
||||
}
|
||||
free(devices);
|
||||
}
|
||||
|
||||
ret = numDevices;
|
||||
ret = numDevices;
|
||||
out:
|
||||
return ret;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static cl_int create_opencl_context(cl_context *context, cl_platform_id *platform)
|
||||
{
|
||||
cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)*platform, 0 };
|
||||
cl_int status;
|
||||
cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)*platform, 0 };
|
||||
cl_int status;
|
||||
|
||||
*context = clCreateContextFromType(cps, CL_DEVICE_TYPE_GPU, NULL, NULL, &status);
|
||||
return status;
|
||||
*context = clCreateContextFromType(cps, CL_DEVICE_TYPE_GPU, NULL, NULL, &status);
|
||||
return status;
|
||||
}
|
||||
|
||||
static cl_int create_opencl_command_queue(cl_command_queue *command_queue, cl_context *context, cl_device_id *device, cl_command_queue_properties cq_properties)
|
||||
{
|
||||
cl_int status;
|
||||
*command_queue = clCreateCommandQueue(*context, *device,
|
||||
cq_properties, &status);
|
||||
if (status != CL_SUCCESS) /* Try again without OOE enable */
|
||||
*command_queue = clCreateCommandQueue(*context, *device, 0, &status);
|
||||
return status;
|
||||
cl_int status;
|
||||
*command_queue = clCreateCommandQueue(*context, *device,
|
||||
cq_properties, &status);
|
||||
if (status != CL_SUCCESS) /* Try again without OOE enable */
|
||||
*command_queue = clCreateCommandQueue(*context, *device, 0, &status);
|
||||
return status;
|
||||
}
|
||||
|
||||
static float get_opencl_version(cl_device_id device)
|
||||
@ -160,7 +160,7 @@ static float get_opencl_version(cl_device_id device)
|
||||
char devoclver[1024];
|
||||
char *find;
|
||||
float version = 1.0;
|
||||
cl_int status;
|
||||
cl_int status;
|
||||
|
||||
status = clGetDeviceInfo(device, CL_DEVICE_VERSION, 1024, (void *)devoclver, NULL);
|
||||
if (status != CL_SUCCESS) {
|
||||
@ -178,199 +178,199 @@ static float get_opencl_version(cl_device_id device)
|
||||
|
||||
static bool get_opencl_bit_align_support(cl_device_id *device)
|
||||
{
|
||||
char extensions[1024];
|
||||
const char * camo = "cl_amd_media_ops";
|
||||
char *find;
|
||||
cl_int status;
|
||||
char extensions[1024];
|
||||
const char * camo = "cl_amd_media_ops";
|
||||
char *find;
|
||||
cl_int status;
|
||||
|
||||
status = clGetDeviceInfo(*device, CL_DEVICE_EXTENSIONS, 1024, (void *)extensions, NULL);
|
||||
if (status != CL_SUCCESS) {
|
||||
return false;
|
||||
}
|
||||
find = strstr(extensions, camo);
|
||||
return !!find;
|
||||
status = clGetDeviceInfo(*device, CL_DEVICE_EXTENSIONS, 1024, (void *)extensions, NULL);
|
||||
if (status != CL_SUCCESS) {
|
||||
return false;
|
||||
}
|
||||
find = strstr(extensions, camo);
|
||||
return !!find;
|
||||
}
|
||||
|
||||
_clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *algorithm)
|
||||
{
|
||||
_clState *clState = (_clState *)calloc(1, sizeof(_clState));
|
||||
struct cgpu_info *cgpu = &gpus[gpu];
|
||||
cl_platform_id platform = NULL;
|
||||
char pbuff[256];
|
||||
build_kernel_data *build_data = (build_kernel_data *) alloca(sizeof(struct _build_kernel_data));
|
||||
cl_uint preferred_vwidth;
|
||||
cl_device_id *devices;
|
||||
cl_uint numDevices;
|
||||
cl_int status;
|
||||
_clState *clState = (_clState *)calloc(1, sizeof(_clState));
|
||||
struct cgpu_info *cgpu = &gpus[gpu];
|
||||
cl_platform_id platform = NULL;
|
||||
char pbuff[256];
|
||||
build_kernel_data *build_data = (build_kernel_data *) alloca(sizeof(struct _build_kernel_data));
|
||||
cl_uint preferred_vwidth;
|
||||
cl_device_id *devices;
|
||||
cl_uint numDevices;
|
||||
cl_int status;
|
||||
|
||||
if (!get_opencl_platform(opt_platform_id, &platform)) {
|
||||
return NULL;
|
||||
}
|
||||
if (!get_opencl_platform(opt_platform_id, &platform)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
numDevices = clDevicesNum();
|
||||
numDevices = clDevicesNum();
|
||||
|
||||
if (numDevices <= 0 ) return NULL;
|
||||
if (numDevices <= 0 ) return NULL;
|
||||
|
||||
devices = (cl_device_id *)alloca(numDevices*sizeof(cl_device_id));
|
||||
devices = (cl_device_id *)alloca(numDevices*sizeof(cl_device_id));
|
||||
|
||||
/* Now, get the device list data */
|
||||
/* Now, get the device list data */
|
||||
|
||||
status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, numDevices, devices, NULL);
|
||||
if (status != CL_SUCCESS) {
|
||||
applog(LOG_ERR, "Error %d: Getting Device IDs (list)", status);
|
||||
return NULL;
|
||||
}
|
||||
status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, numDevices, devices, NULL);
|
||||
if (status != CL_SUCCESS) {
|
||||
applog(LOG_ERR, "Error %d: Getting Device IDs (list)", status);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
applog(LOG_INFO, "List of devices:");
|
||||
applog(LOG_INFO, "List of devices:");
|
||||
|
||||
unsigned int i;
|
||||
for (i = 0; i < numDevices; i++) {
|
||||
status = clGetDeviceInfo(devices[i], CL_DEVICE_NAME, sizeof(pbuff), pbuff, NULL);
|
||||
if (status != CL_SUCCESS) {
|
||||
applog(LOG_ERR, "Error %d: Getting Device Info", status);
|
||||
return NULL;
|
||||
}
|
||||
unsigned int i;
|
||||
for (i = 0; i < numDevices; i++) {
|
||||
status = clGetDeviceInfo(devices[i], CL_DEVICE_NAME, sizeof(pbuff), pbuff, NULL);
|
||||
if (status != CL_SUCCESS) {
|
||||
applog(LOG_ERR, "Error %d: Getting Device Info", status);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
applog(LOG_INFO, "\t%i\t%s", i, pbuff);
|
||||
applog(LOG_INFO, "\t%i\t%s", i, pbuff);
|
||||
|
||||
if (i == gpu) {
|
||||
applog(LOG_INFO, "Selected %i: %s", gpu, pbuff);
|
||||
strncpy(name, pbuff, nameSize);
|
||||
}
|
||||
}
|
||||
if (i == gpu) {
|
||||
applog(LOG_INFO, "Selected %i: %s", gpu, pbuff);
|
||||
strncpy(name, pbuff, nameSize);
|
||||
}
|
||||
}
|
||||
|
||||
if (gpu >= numDevices) {
|
||||
applog(LOG_ERR, "Invalid GPU %i", gpu);
|
||||
return NULL;
|
||||
}
|
||||
if (gpu >= numDevices) {
|
||||
applog(LOG_ERR, "Invalid GPU %i", gpu);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
status = create_opencl_context(&clState->context, &platform);
|
||||
if (status != CL_SUCCESS) {
|
||||
applog(LOG_ERR, "Error %d: Creating Context. (clCreateContextFromType)", status);
|
||||
return NULL;
|
||||
}
|
||||
status = create_opencl_context(&clState->context, &platform);
|
||||
if (status != CL_SUCCESS) {
|
||||
applog(LOG_ERR, "Error %d: Creating Context. (clCreateContextFromType)", status);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
status = create_opencl_command_queue(&clState->commandQueue, &clState->context, &devices[gpu], cgpu->algorithm.cq_properties);
|
||||
if (status != CL_SUCCESS) {
|
||||
applog(LOG_ERR, "Error %d: Creating Command Queue. (clCreateCommandQueue)", status);
|
||||
return NULL;
|
||||
}
|
||||
status = create_opencl_command_queue(&clState->commandQueue, &clState->context, &devices[gpu], cgpu->algorithm.cq_properties);
|
||||
if (status != CL_SUCCESS) {
|
||||
applog(LOG_ERR, "Error %d: Creating Command Queue. (clCreateCommandQueue)", status);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
clState->hasBitAlign = get_opencl_bit_align_support(&devices[gpu]);
|
||||
clState->hasBitAlign = get_opencl_bit_align_support(&devices[gpu]);
|
||||
|
||||
status = clGetDeviceInfo(devices[gpu], CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, sizeof(cl_uint), (void *)&preferred_vwidth, NULL);
|
||||
if (status != CL_SUCCESS) {
|
||||
applog(LOG_ERR, "Error %d: Failed to clGetDeviceInfo when trying to get CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT", status);
|
||||
return NULL;
|
||||
}
|
||||
applog(LOG_DEBUG, "Preferred vector width reported %d", preferred_vwidth);
|
||||
status = clGetDeviceInfo(devices[gpu], CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, sizeof(cl_uint), (void *)&preferred_vwidth, NULL);
|
||||
if (status != CL_SUCCESS) {
|
||||
applog(LOG_ERR, "Error %d: Failed to clGetDeviceInfo when trying to get CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT", status);
|
||||
return NULL;
|
||||
}
|
||||
applog(LOG_DEBUG, "Preferred vector width reported %d", preferred_vwidth);
|
||||
|
||||
status = clGetDeviceInfo(devices[gpu], CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), (void *)&clState->max_work_size, NULL);
|
||||
if (status != CL_SUCCESS) {
|
||||
applog(LOG_ERR, "Error %d: Failed to clGetDeviceInfo when trying to get CL_DEVICE_MAX_WORK_GROUP_SIZE", status);
|
||||
return NULL;
|
||||
}
|
||||
applog(LOG_DEBUG, "Max work group size reported %d", (int)(clState->max_work_size));
|
||||
status = clGetDeviceInfo(devices[gpu], CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), (void *)&clState->max_work_size, NULL);
|
||||
if (status != CL_SUCCESS) {
|
||||
applog(LOG_ERR, "Error %d: Failed to clGetDeviceInfo when trying to get CL_DEVICE_MAX_WORK_GROUP_SIZE", status);
|
||||
return NULL;
|
||||
}
|
||||
applog(LOG_DEBUG, "Max work group size reported %d", (int)(clState->max_work_size));
|
||||
|
||||
size_t compute_units = 0;
|
||||
status = clGetDeviceInfo(devices[gpu], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(size_t), (void *)&compute_units, NULL);
|
||||
if (status != CL_SUCCESS) {
|
||||
applog(LOG_ERR, "Error %d: Failed to clGetDeviceInfo when trying to get CL_DEVICE_MAX_COMPUTE_UNITS", status);
|
||||
return NULL;
|
||||
}
|
||||
// AMD architechture got 64 compute shaders per compute unit.
|
||||
// Source: http://www.amd.com/us/Documents/GCN_Architecture_whitepaper.pdf
|
||||
clState->compute_shaders = compute_units * 64;
|
||||
applog(LOG_DEBUG, "Max shaders calculated %d", (int)(clState->compute_shaders));
|
||||
size_t compute_units = 0;
|
||||
status = clGetDeviceInfo(devices[gpu], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(size_t), (void *)&compute_units, NULL);
|
||||
if (status != CL_SUCCESS) {
|
||||
applog(LOG_ERR, "Error %d: Failed to clGetDeviceInfo when trying to get CL_DEVICE_MAX_COMPUTE_UNITS", status);
|
||||
return NULL;
|
||||
}
|
||||
// AMD architechture got 64 compute shaders per compute unit.
|
||||
// Source: http://www.amd.com/us/Documents/GCN_Architecture_whitepaper.pdf
|
||||
clState->compute_shaders = compute_units * 64;
|
||||
applog(LOG_DEBUG, "Max shaders calculated %d", (int)(clState->compute_shaders));
|
||||
|
||||
status = clGetDeviceInfo(devices[gpu], CL_DEVICE_MAX_MEM_ALLOC_SIZE , sizeof(cl_ulong), (void *)&cgpu->max_alloc, NULL);
|
||||
if (status != CL_SUCCESS) {
|
||||
applog(LOG_ERR, "Error %d: Failed to clGetDeviceInfo when trying to get CL_DEVICE_MAX_MEM_ALLOC_SIZE", status);
|
||||
return NULL;
|
||||
}
|
||||
applog(LOG_DEBUG, "Max mem alloc size is %lu", (long unsigned int)(cgpu->max_alloc));
|
||||
status = clGetDeviceInfo(devices[gpu], CL_DEVICE_MAX_MEM_ALLOC_SIZE , sizeof(cl_ulong), (void *)&cgpu->max_alloc, NULL);
|
||||
if (status != CL_SUCCESS) {
|
||||
applog(LOG_ERR, "Error %d: Failed to clGetDeviceInfo when trying to get CL_DEVICE_MAX_MEM_ALLOC_SIZE", status);
|
||||
return NULL;
|
||||
}
|
||||
applog(LOG_DEBUG, "Max mem alloc size is %lu", (long unsigned int)(cgpu->max_alloc));
|
||||
|
||||
/* Create binary filename based on parameters passed to opencl
|
||||
* compiler to ensure we only load a binary that matches what
|
||||
* would have otherwise created. The filename is:
|
||||
* name + g + lg + lookup_gap + tc + thread_concurrency + nf + nfactor + w + work_size + l + sizeof(long) + .bin
|
||||
*/
|
||||
char filename[255];
|
||||
char strbuf[32];
|
||||
/* Create binary filename based on parameters passed to opencl
|
||||
* compiler to ensure we only load a binary that matches what
|
||||
* would have otherwise created. The filename is:
|
||||
* name + g + lg + lookup_gap + tc + thread_concurrency + nf + nfactor + w + work_size + l + sizeof(long) + .bin
|
||||
*/
|
||||
char filename[255];
|
||||
char strbuf[32];
|
||||
|
||||
sprintf(strbuf, "%s.cl", cgpu->algorithm.name);
|
||||
strcpy(filename, strbuf);
|
||||
sprintf(strbuf, "%s.cl", cgpu->algorithm.name);
|
||||
strcpy(filename, strbuf);
|
||||
|
||||
/* For some reason 2 vectors is still better even if the card says
|
||||
* otherwise, and many cards lie about their max so use 256 as max
|
||||
* unless explicitly set on the command line. Tahiti prefers 1 */
|
||||
if (strstr(name, "Tahiti"))
|
||||
preferred_vwidth = 1;
|
||||
else if (preferred_vwidth > 2)
|
||||
preferred_vwidth = 2;
|
||||
/* For some reason 2 vectors is still better even if the card says
|
||||
* otherwise, and many cards lie about their max so use 256 as max
|
||||
* unless explicitly set on the command line. Tahiti prefers 1 */
|
||||
if (strstr(name, "Tahiti"))
|
||||
preferred_vwidth = 1;
|
||||
else if (preferred_vwidth > 2)
|
||||
preferred_vwidth = 2;
|
||||
|
||||
/* All available kernels only support vector 1 */
|
||||
cgpu->vwidth = 1;
|
||||
/* All available kernels only support vector 1 */
|
||||
cgpu->vwidth = 1;
|
||||
|
||||
/* Vectors are hard-set to 1 above. */
|
||||
if (likely(cgpu->vwidth))
|
||||
clState->vwidth = cgpu->vwidth;
|
||||
else {
|
||||
clState->vwidth = preferred_vwidth;
|
||||
cgpu->vwidth = preferred_vwidth;
|
||||
}
|
||||
/* Vectors are hard-set to 1 above. */
|
||||
if (likely(cgpu->vwidth))
|
||||
clState->vwidth = cgpu->vwidth;
|
||||
else {
|
||||
clState->vwidth = preferred_vwidth;
|
||||
cgpu->vwidth = preferred_vwidth;
|
||||
}
|
||||
|
||||
clState->goffset = true;
|
||||
clState->goffset = true;
|
||||
|
||||
if (cgpu->work_size && cgpu->work_size <= clState->max_work_size)
|
||||
clState->wsize = cgpu->work_size;
|
||||
else
|
||||
clState->wsize = 256;
|
||||
if (cgpu->work_size && cgpu->work_size <= clState->max_work_size)
|
||||
clState->wsize = cgpu->work_size;
|
||||
else
|
||||
clState->wsize = 256;
|
||||
|
||||
if (!cgpu->opt_lg) {
|
||||
applog(LOG_DEBUG, "GPU %d: selecting lookup gap of 2", gpu);
|
||||
cgpu->lookup_gap = 2;
|
||||
} else
|
||||
cgpu->lookup_gap = cgpu->opt_lg;
|
||||
if (!cgpu->opt_lg) {
|
||||
applog(LOG_DEBUG, "GPU %d: selecting lookup gap of 2", gpu);
|
||||
cgpu->lookup_gap = 2;
|
||||
} else
|
||||
cgpu->lookup_gap = cgpu->opt_lg;
|
||||
|
||||
if ((strcmp(cgpu->algorithm.name, "zuikkis") == 0) && (cgpu->lookup_gap != 2)) {
|
||||
applog(LOG_WARNING, "Kernel zuikkis only supports lookup-gap = 2 (currently %d), forcing.", cgpu->lookup_gap);
|
||||
cgpu->lookup_gap = 2;
|
||||
}
|
||||
if ((strcmp(cgpu->algorithm.name, "zuikkis") == 0) && (cgpu->lookup_gap != 2)) {
|
||||
applog(LOG_WARNING, "Kernel zuikkis only supports lookup-gap = 2 (currently %d), forcing.", cgpu->lookup_gap);
|
||||
cgpu->lookup_gap = 2;
|
||||
}
|
||||
|
||||
if ((strcmp(cgpu->algorithm.name, "bufius") == 0) && ((cgpu->lookup_gap != 2) && (cgpu->lookup_gap != 4) && (cgpu->lookup_gap != 8))) {
|
||||
applog(LOG_WARNING, "Kernel bufius only supports lookup-gap of 2, 4 or 8 (currently %d), forcing to 2", cgpu->lookup_gap);
|
||||
cgpu->lookup_gap = 2;
|
||||
}
|
||||
if ((strcmp(cgpu->algorithm.name, "bufius") == 0) && ((cgpu->lookup_gap != 2) && (cgpu->lookup_gap != 4) && (cgpu->lookup_gap != 8))) {
|
||||
applog(LOG_WARNING, "Kernel bufius only supports lookup-gap of 2, 4 or 8 (currently %d), forcing to 2", cgpu->lookup_gap);
|
||||
cgpu->lookup_gap = 2;
|
||||
}
|
||||
|
||||
if (!cgpu->opt_tc) {
|
||||
unsigned int sixtyfours;
|
||||
if (!cgpu->opt_tc) {
|
||||
unsigned int sixtyfours;
|
||||
|
||||
sixtyfours = cgpu->max_alloc / 131072 / 64 / (algorithm->n/1024) - 1;
|
||||
cgpu->thread_concurrency = sixtyfours * 64;
|
||||
if (cgpu->shaders && cgpu->thread_concurrency > cgpu->shaders) {
|
||||
cgpu->thread_concurrency -= cgpu->thread_concurrency % cgpu->shaders;
|
||||
if (cgpu->thread_concurrency > cgpu->shaders * 5)
|
||||
cgpu->thread_concurrency = cgpu->shaders * 5;
|
||||
}
|
||||
applog(LOG_DEBUG, "GPU %d: selecting thread concurrency of %d", gpu, (int)(cgpu->thread_concurrency));
|
||||
} else
|
||||
cgpu->thread_concurrency = cgpu->opt_tc;
|
||||
sixtyfours = cgpu->max_alloc / 131072 / 64 / (algorithm->n/1024) - 1;
|
||||
cgpu->thread_concurrency = sixtyfours * 64;
|
||||
if (cgpu->shaders && cgpu->thread_concurrency > cgpu->shaders) {
|
||||
cgpu->thread_concurrency -= cgpu->thread_concurrency % cgpu->shaders;
|
||||
if (cgpu->thread_concurrency > cgpu->shaders * 5)
|
||||
cgpu->thread_concurrency = cgpu->shaders * 5;
|
||||
}
|
||||
applog(LOG_DEBUG, "GPU %d: selecting thread concurrency of %d", gpu, (int)(cgpu->thread_concurrency));
|
||||
} else
|
||||
cgpu->thread_concurrency = cgpu->opt_tc;
|
||||
|
||||
|
||||
cl_uint slot, cpnd;
|
||||
cl_uint slot, cpnd;
|
||||
|
||||
slot = cpnd = 0;
|
||||
slot = cpnd = 0;
|
||||
|
||||
build_data->context = clState->context;
|
||||
build_data->device = &devices[gpu];
|
||||
build_data->context = clState->context;
|
||||
build_data->device = &devices[gpu];
|
||||
|
||||
// Build information
|
||||
// Build information
|
||||
strcpy(build_data->source_filename, filename);
|
||||
strcpy(build_data->platform, name);
|
||||
strcpy(build_data->sgminer_path, sgminer_path);
|
||||
if (opt_kernel_path && *opt_kernel_path)
|
||||
build_data->kernel_path = opt_kernel_path;
|
||||
build_data->kernel_path = opt_kernel_path;
|
||||
|
||||
build_data->work_size = clState->wsize;
|
||||
build_data->has_bit_align = clState->hasBitAlign;
|
||||
@ -378,47 +378,47 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
|
||||
build_data->opencl_version = get_opencl_version(devices[gpu]);
|
||||
build_data->patch_bfi = needs_bfi_patch(build_data);
|
||||
|
||||
strcpy(build_data->binary_filename, cgpu->algorithm.name);
|
||||
strcat(build_data->binary_filename, name);
|
||||
if (clState->goffset)
|
||||
strcat(build_data->binary_filename, "g");
|
||||
strcpy(build_data->binary_filename, cgpu->algorithm.name);
|
||||
strcat(build_data->binary_filename, name);
|
||||
if (clState->goffset)
|
||||
strcat(build_data->binary_filename, "g");
|
||||
|
||||
set_base_compiler_options(build_data);
|
||||
if (algorithm->set_compile_options)
|
||||
algorithm->set_compile_options(build_data, cgpu, algorithm);
|
||||
algorithm->set_compile_options(build_data, cgpu, algorithm);
|
||||
|
||||
strcat(build_data->binary_filename, ".bin");
|
||||
strcat(build_data->binary_filename, ".bin");
|
||||
|
||||
// Load program from file or build it if it doesn't exist
|
||||
if (!(clState->program = load_opencl_binary_kernel(build_data))) {
|
||||
applog(LOG_NOTICE, "Building binary %s", build_data->binary_filename);
|
||||
// Load program from file or build it if it doesn't exist
|
||||
if (!(clState->program = load_opencl_binary_kernel(build_data))) {
|
||||
applog(LOG_NOTICE, "Building binary %s", build_data->binary_filename);
|
||||
|
||||
if (!(clState->program = build_opencl_kernel(build_data, filename)))
|
||||
return NULL;
|
||||
if (!(clState->program = build_opencl_kernel(build_data, filename)))
|
||||
return NULL;
|
||||
|
||||
if (save_opencl_kernel(build_data, clState->program)) {
|
||||
/* Program needs to be rebuilt, because the binary was patched */
|
||||
if (build_data->patch_bfi) {
|
||||
clReleaseProgram(clState->program);
|
||||
clState->program = load_opencl_binary_kernel(build_data);
|
||||
}
|
||||
} else {
|
||||
if (save_opencl_kernel(build_data, clState->program)) {
|
||||
/* Program needs to be rebuilt, because the binary was patched */
|
||||
if (build_data->patch_bfi) {
|
||||
clReleaseProgram(clState->program);
|
||||
clState->program = load_opencl_binary_kernel(build_data);
|
||||
}
|
||||
} else {
|
||||
if (build_data->patch_bfi)
|
||||
quit(1, "Could not save kernel to file, but it is necessary to apply BFI patch");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Load kernels
|
||||
applog(LOG_NOTICE, "Initialising kernel %s with%s bitalign, %spatched BFI, nfactor %d, n %d",
|
||||
filename, clState->hasBitAlign ? "" : "out", build_data->patch_bfi ? "" : "un",
|
||||
algorithm->nfactor, algorithm->n);
|
||||
// Load kernels
|
||||
applog(LOG_NOTICE, "Initialising kernel %s with%s bitalign, %spatched BFI, nfactor %d, n %d",
|
||||
filename, clState->hasBitAlign ? "" : "out", build_data->patch_bfi ? "" : "un",
|
||||
algorithm->nfactor, algorithm->n);
|
||||
|
||||
/* get a kernel object handle for a kernel with the given name */
|
||||
clState->kernel = clCreateKernel(clState->program, "search", &status);
|
||||
if (status != CL_SUCCESS) {
|
||||
applog(LOG_ERR, "Error %d: Creating Kernel from program. (clCreateKernel)", status);
|
||||
return NULL;
|
||||
}
|
||||
/* get a kernel object handle for a kernel with the given name */
|
||||
clState->kernel = clCreateKernel(clState->program, "search", &status);
|
||||
if (status != CL_SUCCESS) {
|
||||
applog(LOG_ERR, "Error %d: Creating Kernel from program. (clCreateKernel)", status);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
clState->n_extra_kernels = algorithm->n_extra_kernels;
|
||||
@ -438,7 +438,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
|
||||
}
|
||||
}
|
||||
|
||||
size_t bufsize;
|
||||
size_t bufsize;
|
||||
|
||||
if (algorithm->rw_buffer_size < 0) {
|
||||
size_t ipt = (algorithm->n / cgpu->lookup_gap +
|
||||
@ -469,18 +469,18 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
|
||||
}
|
||||
}
|
||||
|
||||
clState->CLbuffer0 = clCreateBuffer(clState->context, CL_MEM_READ_ONLY, 128, NULL, &status);
|
||||
if (status != CL_SUCCESS) {
|
||||
applog(LOG_ERR, "Error %d: clCreateBuffer (CLbuffer0)", status);
|
||||
return NULL;
|
||||
}
|
||||
clState->outputBuffer = clCreateBuffer(clState->context, CL_MEM_WRITE_ONLY, BUFFERSIZE, NULL, &status);
|
||||
clState->CLbuffer0 = clCreateBuffer(clState->context, CL_MEM_READ_ONLY, 128, NULL, &status);
|
||||
if (status != CL_SUCCESS) {
|
||||
applog(LOG_ERR, "Error %d: clCreateBuffer (CLbuffer0)", status);
|
||||
return NULL;
|
||||
}
|
||||
clState->outputBuffer = clCreateBuffer(clState->context, CL_MEM_WRITE_ONLY, BUFFERSIZE, NULL, &status);
|
||||
|
||||
if (status != CL_SUCCESS) {
|
||||
applog(LOG_ERR, "Error %d: clCreateBuffer (outputBuffer)", status);
|
||||
return NULL;
|
||||
}
|
||||
if (status != CL_SUCCESS) {
|
||||
applog(LOG_ERR, "Error %d: clCreateBuffer (outputBuffer)", status);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return clState;
|
||||
return clState;
|
||||
}
|
||||
|
||||
|
32
ocl.h
32
ocl.h
@ -13,22 +13,22 @@
|
||||
#include "miner.h"
|
||||
|
||||
typedef struct __clState {
|
||||
cl_context context;
|
||||
cl_kernel kernel;
|
||||
cl_kernel *extra_kernels;
|
||||
size_t n_extra_kernels;
|
||||
cl_command_queue commandQueue;
|
||||
cl_program program;
|
||||
cl_mem outputBuffer;
|
||||
cl_mem CLbuffer0;
|
||||
cl_mem padbuffer8;
|
||||
unsigned char cldata[80];
|
||||
bool hasBitAlign;
|
||||
bool goffset;
|
||||
cl_uint vwidth;
|
||||
size_t max_work_size;
|
||||
size_t wsize;
|
||||
size_t compute_shaders;
|
||||
cl_context context;
|
||||
cl_kernel kernel;
|
||||
cl_kernel *extra_kernels;
|
||||
size_t n_extra_kernels;
|
||||
cl_command_queue commandQueue;
|
||||
cl_program program;
|
||||
cl_mem outputBuffer;
|
||||
cl_mem CLbuffer0;
|
||||
cl_mem padbuffer8;
|
||||
unsigned char cldata[80];
|
||||
bool hasBitAlign;
|
||||
bool goffset;
|
||||
cl_uint vwidth;
|
||||
size_t max_work_size;
|
||||
size_t wsize;
|
||||
size_t compute_shaders;
|
||||
} _clState;
|
||||
|
||||
extern int clDevicesNum(void);
|
||||
|
112
util.h
112
util.h
@ -4,64 +4,64 @@
|
||||
#include <semaphore.h>
|
||||
|
||||
#if defined(unix) || defined(__APPLE__)
|
||||
#include <errno.h>
|
||||
#include <sys/socket.h>
|
||||
#include <netinet/in.h>
|
||||
#include <arpa/inet.h>
|
||||
#include <errno.h>
|
||||
#include <sys/socket.h>
|
||||
#include <netinet/in.h>
|
||||
#include <arpa/inet.h>
|
||||
|
||||
#define SOCKETTYPE long
|
||||
#define SOCKETFAIL(a) ((a) < 0)
|
||||
#define INVSOCK -1
|
||||
#define INVINETADDR -1
|
||||
#define CLOSESOCKET close
|
||||
#define SOCKETTYPE long
|
||||
#define SOCKETFAIL(a) ((a) < 0)
|
||||
#define INVSOCK -1
|
||||
#define INVINETADDR -1
|
||||
#define CLOSESOCKET close
|
||||
|
||||
#define SOCKERRMSG strerror(errno)
|
||||
static inline bool sock_blocks(void)
|
||||
{
|
||||
return (errno == EAGAIN || errno == EWOULDBLOCK);
|
||||
}
|
||||
static inline bool sock_timeout(void)
|
||||
{
|
||||
return (errno == ETIMEDOUT);
|
||||
}
|
||||
static inline bool interrupted(void)
|
||||
{
|
||||
return (errno == EINTR);
|
||||
}
|
||||
#define SOCKERRMSG strerror(errno)
|
||||
static inline bool sock_blocks(void)
|
||||
{
|
||||
return (errno == EAGAIN || errno == EWOULDBLOCK);
|
||||
}
|
||||
static inline bool sock_timeout(void)
|
||||
{
|
||||
return (errno == ETIMEDOUT);
|
||||
}
|
||||
static inline bool interrupted(void)
|
||||
{
|
||||
return (errno == EINTR);
|
||||
}
|
||||
#elif defined WIN32
|
||||
#include <ws2tcpip.h>
|
||||
#include <winsock2.h>
|
||||
#include <ws2tcpip.h>
|
||||
#include <winsock2.h>
|
||||
|
||||
#define SOCKETTYPE SOCKET
|
||||
#define SOCKETFAIL(a) ((int)(a) == SOCKET_ERROR)
|
||||
#define INVSOCK INVALID_SOCKET
|
||||
#define INVINETADDR INADDR_NONE
|
||||
#define CLOSESOCKET closesocket
|
||||
#define SOCKETTYPE SOCKET
|
||||
#define SOCKETFAIL(a) ((int)(a) == SOCKET_ERROR)
|
||||
#define INVSOCK INVALID_SOCKET
|
||||
#define INVINETADDR INADDR_NONE
|
||||
#define CLOSESOCKET closesocket
|
||||
|
||||
extern char *WSAErrorMsg(void);
|
||||
#define SOCKERRMSG WSAErrorMsg()
|
||||
extern char *WSAErrorMsg(void);
|
||||
#define SOCKERRMSG WSAErrorMsg()
|
||||
|
||||
/* Check for windows variants of the errors as well as when ming
|
||||
* decides to wrap the error into the errno equivalent. */
|
||||
static inline bool sock_blocks(void)
|
||||
{
|
||||
return (WSAGetLastError() == WSAEWOULDBLOCK || errno == EAGAIN);
|
||||
}
|
||||
static inline bool sock_timeout(void)
|
||||
{
|
||||
return (WSAGetLastError() == WSAETIMEDOUT || errno == ETIMEDOUT);
|
||||
}
|
||||
static inline bool interrupted(void)
|
||||
{
|
||||
return (WSAGetLastError() == WSAEINTR || errno == EINTR);
|
||||
}
|
||||
#ifndef SHUT_RDWR
|
||||
#define SHUT_RDWR SD_BOTH
|
||||
#endif
|
||||
/* Check for windows variants of the errors as well as when ming
|
||||
* decides to wrap the error into the errno equivalent. */
|
||||
static inline bool sock_blocks(void)
|
||||
{
|
||||
return (WSAGetLastError() == WSAEWOULDBLOCK || errno == EAGAIN);
|
||||
}
|
||||
static inline bool sock_timeout(void)
|
||||
{
|
||||
return (WSAGetLastError() == WSAETIMEDOUT || errno == ETIMEDOUT);
|
||||
}
|
||||
static inline bool interrupted(void)
|
||||
{
|
||||
return (WSAGetLastError() == WSAEINTR || errno == EINTR);
|
||||
}
|
||||
#ifndef SHUT_RDWR
|
||||
#define SHUT_RDWR SD_BOTH
|
||||
#endif
|
||||
|
||||
#ifndef in_addr_t
|
||||
#define in_addr_t uint32_t
|
||||
#endif
|
||||
#ifndef in_addr_t
|
||||
#define in_addr_t uint32_t
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if JANSSON_MAJOR_VERSION >= 2
|
||||
@ -79,8 +79,8 @@ typedef int proxytypes_t;
|
||||
|
||||
/* sgminer locks, a write biased variant of rwlocks */
|
||||
struct cglock {
|
||||
pthread_mutex_t mutex;
|
||||
pthread_rwlock_t rwlock;
|
||||
pthread_mutex_t mutex;
|
||||
pthread_rwlock_t rwlock;
|
||||
};
|
||||
|
||||
typedef struct cglock cglock_t;
|
||||
@ -89,7 +89,7 @@ typedef struct cglock cglock_t;
|
||||
* implementing them. */
|
||||
#ifdef __APPLE__
|
||||
struct cgsem {
|
||||
int pipefd[2];
|
||||
int pipefd[2];
|
||||
};
|
||||
|
||||
typedef struct cgsem cgsem_t;
|
||||
@ -160,8 +160,8 @@ bool cg_completion_timeout(void *fn, void *fnarg, int timeout);
|
||||
/* Align a size_t to 4 byte boundaries for fussy arches */
|
||||
static inline void align_len(size_t *len)
|
||||
{
|
||||
if (*len % 4)
|
||||
*len += 4 - (*len % 4);
|
||||
if (*len % 4)
|
||||
*len += 4 - (*len % 4);
|
||||
}
|
||||
|
||||
#endif /* UTIL_H */
|
||||
|
Loading…
x
Reference in New Issue
Block a user