|
|
@ -48,6 +48,7 @@ const int debug = 0; |
|
|
|
#define MAX_ARG 6 |
|
|
|
#define MAX_ARG 6 |
|
|
|
#define MAX_KERNEL 3 |
|
|
|
#define MAX_KERNEL 3 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#define is_pow2(v) (!((v) & ((v)-1))) |
|
|
|
#define round_up_pow2(x, a) (((x) + ((a)-1)) & ~((a)-1)) |
|
|
|
#define round_up_pow2(x, a) (((x) + ((a)-1)) & ~((a)-1)) |
|
|
|
|
|
|
|
|
|
|
|
/* OpenCL address searching mode */ |
|
|
|
/* OpenCL address searching mode */ |
|
|
@ -74,6 +75,8 @@ typedef struct _vg_ocl_context_s { |
|
|
|
int voc_pattern_rewrite; |
|
|
|
int voc_pattern_rewrite; |
|
|
|
int voc_pattern_alloc; |
|
|
|
int voc_pattern_alloc; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vg_ocl_check_t voc_verify_func[MAX_KERNEL]; |
|
|
|
|
|
|
|
|
|
|
|
pthread_t voc_ocl_thread; |
|
|
|
pthread_t voc_ocl_thread; |
|
|
|
pthread_mutex_t voc_lock; |
|
|
|
pthread_mutex_t voc_lock; |
|
|
|
pthread_cond_t voc_wait; |
|
|
|
pthread_cond_t voc_wait; |
|
|
@ -1164,6 +1167,9 @@ vg_ocl_kernel_start(vg_ocl_context_t *vocp, int slot, int ncol, int nrow, |
|
|
|
|
|
|
|
|
|
|
|
assert(!vocp->voc_oclkrnwait[slot]); |
|
|
|
assert(!vocp->voc_oclkrnwait[slot]); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* heap_invert() preconditions */ |
|
|
|
|
|
|
|
assert(is_pow2(invsize) && (invsize > 1)); |
|
|
|
|
|
|
|
|
|
|
|
val = invsize; |
|
|
|
val = invsize; |
|
|
|
ret = clSetKernelArg(vocp->voc_oclkernel[slot][1], |
|
|
|
ret = clSetKernelArg(vocp->voc_oclkernel[slot][1], |
|
|
|
1, |
|
|
|
1, |
|
|
@ -1191,6 +1197,12 @@ vg_ocl_kernel_start(vg_ocl_context_t *vocp, int slot, int ncol, int nrow, |
|
|
|
return 0; |
|
|
|
return 0; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (vocp->voc_verify_func[0] && |
|
|
|
|
|
|
|
!(vocp->voc_verify_func[0])(vocp, slot)) { |
|
|
|
|
|
|
|
printf("ERROR: Kernel 0 failed verification test\n"); |
|
|
|
|
|
|
|
return 0; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
ret = clEnqueueNDRangeKernel(vocp->voc_oclcmdq, |
|
|
|
ret = clEnqueueNDRangeKernel(vocp->voc_oclcmdq, |
|
|
|
vocp->voc_oclkernel[slot][1], |
|
|
|
vocp->voc_oclkernel[slot][1], |
|
|
|
1, |
|
|
|
1, |
|
|
@ -1209,6 +1221,12 @@ vg_ocl_kernel_start(vg_ocl_context_t *vocp, int slot, int ncol, int nrow, |
|
|
|
return 0; |
|
|
|
return 0; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (vocp->voc_verify_func[1] && |
|
|
|
|
|
|
|
!(vocp->voc_verify_func[1])(vocp, slot)) { |
|
|
|
|
|
|
|
printf("ERROR: Kernel 1 failed verification test\n"); |
|
|
|
|
|
|
|
return 0; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
ret = clEnqueueNDRangeKernel(vocp->voc_oclcmdq, |
|
|
|
ret = clEnqueueNDRangeKernel(vocp->voc_oclcmdq, |
|
|
|
vocp->voc_oclkernel[slot][2], |
|
|
|
vocp->voc_oclkernel[slot][2], |
|
|
|
2, |
|
|
|
2, |
|
|
@ -1244,6 +1262,45 @@ vg_ocl_kernel_wait(vg_ocl_context_t *vocp, int slot) |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
INLINE void |
|
|
|
|
|
|
|
vg_ocl_get_bignum_raw(BIGNUM *bn, const unsigned char *buf) |
|
|
|
|
|
|
|
{ |
|
|
|
|
|
|
|
bn_expand(bn, 256); |
|
|
|
|
|
|
|
memcpy(bn->d, buf, 32); |
|
|
|
|
|
|
|
bn->top = (32 / sizeof(BN_ULONG)); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
INLINE void |
|
|
|
|
|
|
|
vg_ocl_put_bignum_raw(unsigned char *buf, const BIGNUM *bn) |
|
|
|
|
|
|
|
{ |
|
|
|
|
|
|
|
int bnlen = (bn->top * sizeof(BN_ULONG)); |
|
|
|
|
|
|
|
if (bnlen >= 32) { |
|
|
|
|
|
|
|
memcpy(buf, bn->d, 32); |
|
|
|
|
|
|
|
} else { |
|
|
|
|
|
|
|
memcpy(buf, bn->d, bnlen); |
|
|
|
|
|
|
|
memset(buf + bnlen, 0, 32 - bnlen); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#define ACCESS_BUNDLE 1024 |
|
|
|
|
|
|
|
#define ACCESS_STRIDE (ACCESS_BUNDLE/8) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void |
|
|
|
|
|
|
|
vg_ocl_get_bignum_tpa(BIGNUM *bn, const unsigned char *buf, int cell) |
|
|
|
|
|
|
|
{ |
|
|
|
|
|
|
|
unsigned char bnbuf[32]; |
|
|
|
|
|
|
|
int start, i; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
start = (((cell / ACCESS_STRIDE) * ACCESS_BUNDLE) + |
|
|
|
|
|
|
|
(cell % ACCESS_STRIDE)); |
|
|
|
|
|
|
|
for (i = 0; i < 8; i++) |
|
|
|
|
|
|
|
memcpy(bnbuf+(i*4), |
|
|
|
|
|
|
|
buf + 4*(start + i*ACCESS_STRIDE), |
|
|
|
|
|
|
|
4); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vg_ocl_get_bignum_raw(bn, bnbuf); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
/*
|
|
|
|
* Absolutely disgusting. |
|
|
|
* Absolutely disgusting. |
|
|
|
* We want points in Montgomery form, and it's a lot easier to read the |
|
|
|
* We want points in Montgomery form, and it's a lot easier to read the |
|
|
@ -1259,20 +1316,29 @@ struct ec_point_st { |
|
|
|
}; |
|
|
|
}; |
|
|
|
|
|
|
|
|
|
|
|
INLINE void |
|
|
|
INLINE void |
|
|
|
vg_ocl_put_point(unsigned char *buf, EC_POINT *ppnt) |
|
|
|
vg_ocl_get_point(EC_POINT *ppnt, const unsigned char *buf) |
|
|
|
{ |
|
|
|
{ |
|
|
|
assert(ppnt->Z_is_one); |
|
|
|
static const unsigned char mont_one[] = { 0x01,0x00,0x00,0x03,0xd1 }; |
|
|
|
memcpy(buf, ppnt->X.d, 32); |
|
|
|
vg_ocl_get_bignum_raw(&ppnt->X, buf); |
|
|
|
memcpy(buf + 32, ppnt->Y.d, 32); |
|
|
|
vg_ocl_get_bignum_raw(&ppnt->Y, buf + 32); |
|
|
|
|
|
|
|
if (!ppnt->Z_is_one) { |
|
|
|
|
|
|
|
ppnt->Z_is_one = 1; |
|
|
|
|
|
|
|
BN_bin2bn(mont_one, sizeof(mont_one), &ppnt->Z); |
|
|
|
|
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
#define ACCESS_BUNDLE 1024 |
|
|
|
|
|
|
|
#define ACCESS_STRIDE (ACCESS_BUNDLE/8) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
INLINE void |
|
|
|
INLINE void |
|
|
|
vg_ocl_put_point_tpa(unsigned char *buf, int cell, EC_POINT *ppnt) |
|
|
|
vg_ocl_put_point(unsigned char *buf, const EC_POINT *ppnt) |
|
|
|
{ |
|
|
|
{ |
|
|
|
uint8_t pntbuf[64]; |
|
|
|
assert(ppnt->Z_is_one); |
|
|
|
|
|
|
|
vg_ocl_put_bignum_raw(buf, &ppnt->X); |
|
|
|
|
|
|
|
vg_ocl_put_bignum_raw(buf + 32, &ppnt->Y); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void |
|
|
|
|
|
|
|
vg_ocl_put_point_tpa(unsigned char *buf, int cell, const EC_POINT *ppnt) |
|
|
|
|
|
|
|
{ |
|
|
|
|
|
|
|
unsigned char pntbuf[64]; |
|
|
|
int start, i; |
|
|
|
int start, i; |
|
|
|
|
|
|
|
|
|
|
|
vg_ocl_put_point(pntbuf, ppnt); |
|
|
|
vg_ocl_put_point(pntbuf, ppnt); |
|
|
@ -1289,6 +1355,26 @@ vg_ocl_put_point_tpa(unsigned char *buf, int cell, EC_POINT *ppnt) |
|
|
|
4); |
|
|
|
4); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void |
|
|
|
|
|
|
|
vg_ocl_get_point_tpa(EC_POINT *ppnt, const unsigned char *buf, int cell) |
|
|
|
|
|
|
|
{ |
|
|
|
|
|
|
|
unsigned char pntbuf[64]; |
|
|
|
|
|
|
|
int start, i; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
start = ((((2 * cell) / ACCESS_STRIDE) * ACCESS_BUNDLE) + |
|
|
|
|
|
|
|
(cell % (ACCESS_STRIDE/2))); |
|
|
|
|
|
|
|
for (i = 0; i < 8; i++) |
|
|
|
|
|
|
|
memcpy(pntbuf+(i*4), |
|
|
|
|
|
|
|
buf + 4*(start + i*ACCESS_STRIDE), |
|
|
|
|
|
|
|
4); |
|
|
|
|
|
|
|
for (i = 0; i < 8; i++) |
|
|
|
|
|
|
|
memcpy(pntbuf+32+(i*4), |
|
|
|
|
|
|
|
buf + 4*(start + (ACCESS_STRIDE/2) + (i*ACCESS_STRIDE)), |
|
|
|
|
|
|
|
4); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vg_ocl_get_point(ppnt, pntbuf); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
void |
|
|
|
void |
|
|
|
show_elapsed(struct timeval *tv, const char *place) |
|
|
|
show_elapsed(struct timeval *tv, const char *place) |
|
|
|
{ |
|
|
|
{ |
|
|
@ -1425,7 +1511,14 @@ vg_ocl_prefix_check(vg_ocl_context_t *vocp, int slot) |
|
|
|
orig_delta = vxcp->vxc_delta; |
|
|
|
orig_delta = vxcp->vxc_delta; |
|
|
|
vxcp->vxc_delta += found_delta; |
|
|
|
vxcp->vxc_delta += found_delta; |
|
|
|
vg_exec_context_calc_address(vxcp); |
|
|
|
vg_exec_context_calc_address(vxcp); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* Make sure the GPU produced the expected hash */ |
|
|
|
|
|
|
|
res = 0; |
|
|
|
|
|
|
|
if (!memcmp(vxcp->vxc_binres + 1, |
|
|
|
|
|
|
|
ocl_found_out + 2, |
|
|
|
|
|
|
|
20)) { |
|
|
|
res = test_func(vxcp); |
|
|
|
res = test_func(vxcp); |
|
|
|
|
|
|
|
} |
|
|
|
if (res == 0) { |
|
|
|
if (res == 0) { |
|
|
|
/*
|
|
|
|
/*
|
|
|
|
* The match was not found in |
|
|
|
* The match was not found in |
|
|
@ -1491,6 +1584,172 @@ vg_ocl_config_pattern(vg_ocl_context_t *vocp) |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
|
|
|
* Temporary buffer content verification functions |
|
|
|
|
|
|
|
* This provides a simple test of the kernel, the OpenCL compiler, |
|
|
|
|
|
|
|
* and the hardware. |
|
|
|
|
|
|
|
*/ |
|
|
|
|
|
|
|
int |
|
|
|
|
|
|
|
vg_ocl_verify_temporary(vg_ocl_context_t *vocp, int slot, int z_inverted) |
|
|
|
|
|
|
|
{ |
|
|
|
|
|
|
|
vg_exec_context_t *vxcp = &vocp->base; |
|
|
|
|
|
|
|
unsigned char *point_tmp = NULL, *z_heap = NULL; |
|
|
|
|
|
|
|
unsigned char *ocl_points_in = NULL, *ocl_strides_in = NULL; |
|
|
|
|
|
|
|
const EC_GROUP *pgroup; |
|
|
|
|
|
|
|
EC_POINT *ppr = NULL, *ppc = NULL, *pps = NULL, *ppt = NULL; |
|
|
|
|
|
|
|
BIGNUM bnz, bnez, bnm, *bnzc; |
|
|
|
|
|
|
|
BN_CTX *bnctx = NULL; |
|
|
|
|
|
|
|
BN_MONT_CTX *bnmont; |
|
|
|
|
|
|
|
int ret = 0; |
|
|
|
|
|
|
|
int mismatches = 0, mm_r; |
|
|
|
|
|
|
|
int x, y, bx; |
|
|
|
|
|
|
|
static const unsigned char raw_modulus[] = { |
|
|
|
|
|
|
|
0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF, |
|
|
|
|
|
|
|
0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF, |
|
|
|
|
|
|
|
0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF, |
|
|
|
|
|
|
|
0xFF,0xFF,0xFF,0xFE,0xFF,0xFF,0xFC,0x2F |
|
|
|
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
BN_init(&bnz); |
|
|
|
|
|
|
|
BN_init(&bnez); |
|
|
|
|
|
|
|
BN_init(&bnm); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
bnctx = BN_CTX_new(); |
|
|
|
|
|
|
|
bnmont = BN_MONT_CTX_new(); |
|
|
|
|
|
|
|
pgroup = EC_KEY_get0_group(vxcp->vxc_key); |
|
|
|
|
|
|
|
ppr = EC_POINT_new(pgroup); |
|
|
|
|
|
|
|
ppc = EC_POINT_new(pgroup); |
|
|
|
|
|
|
|
pps = EC_POINT_new(pgroup); |
|
|
|
|
|
|
|
ppt = EC_POINT_new(pgroup); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (!bnctx || !bnmont || !ppr || !ppc || !pps || !ppt) { |
|
|
|
|
|
|
|
printf("ERROR: out of memory\n"); |
|
|
|
|
|
|
|
goto out; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
BN_bin2bn(raw_modulus, sizeof(raw_modulus), &bnm); |
|
|
|
|
|
|
|
BN_MONT_CTX_set(bnmont, &bnm, bnctx); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (z_inverted) { |
|
|
|
|
|
|
|
bnzc = &bnez; |
|
|
|
|
|
|
|
} else { |
|
|
|
|
|
|
|
bnzc = &pps->Z; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
z_heap = (unsigned char *) |
|
|
|
|
|
|
|
vg_ocl_map_arg_buffer(vocp, slot, 1, 0); |
|
|
|
|
|
|
|
point_tmp = (unsigned char *) |
|
|
|
|
|
|
|
vg_ocl_map_arg_buffer(vocp, slot, 2, 0); |
|
|
|
|
|
|
|
ocl_points_in = (unsigned char *) |
|
|
|
|
|
|
|
vg_ocl_map_arg_buffer(vocp, slot, 3, 0); |
|
|
|
|
|
|
|
ocl_strides_in = (unsigned char *) |
|
|
|
|
|
|
|
vg_ocl_map_arg_buffer(vocp, slot, 4, 0); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (!z_heap || !point_tmp || !ocl_points_in || !ocl_strides_in) { |
|
|
|
|
|
|
|
printf("ERROR: could not map OpenCL point buffers\n"); |
|
|
|
|
|
|
|
goto out; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for (y = 0; y < vocp->voc_ocl_rows; y++) { |
|
|
|
|
|
|
|
vg_ocl_get_point(ppr, ocl_strides_in + (64*y)); |
|
|
|
|
|
|
|
bx = y * vocp->voc_ocl_cols; |
|
|
|
|
|
|
|
mm_r = 0; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for (x = 0; x < vocp->voc_ocl_cols; x++) { |
|
|
|
|
|
|
|
vg_ocl_get_point_tpa(ppc, ocl_points_in, x); |
|
|
|
|
|
|
|
assert(ppr->Z_is_one && ppc->Z_is_one); |
|
|
|
|
|
|
|
EC_POINT_add(pgroup, pps, ppc, ppr, bnctx); |
|
|
|
|
|
|
|
assert(!pps->Z_is_one); |
|
|
|
|
|
|
|
vg_ocl_get_point_tpa(ppt, point_tmp, bx + x); |
|
|
|
|
|
|
|
vg_ocl_get_bignum_tpa(&bnz, z_heap, bx + x); |
|
|
|
|
|
|
|
if (z_inverted) { |
|
|
|
|
|
|
|
BN_mod_inverse(&bnez, &pps->Z, &bnm, bnctx); |
|
|
|
|
|
|
|
BN_to_montgomery(&bnez, &bnez, bnmont, bnctx); |
|
|
|
|
|
|
|
BN_to_montgomery(&bnez, &bnez, bnmont, bnctx); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
if (BN_cmp(&ppt->X, &pps->X) || |
|
|
|
|
|
|
|
BN_cmp(&ppt->Y, &pps->Y) || |
|
|
|
|
|
|
|
BN_cmp(&bnz, bnzc)) { |
|
|
|
|
|
|
|
mismatches++; |
|
|
|
|
|
|
|
printf("Mismatch for kernel %d, " |
|
|
|
|
|
|
|
"offset %d (%d,%d)\n", |
|
|
|
|
|
|
|
z_inverted, bx + x, y, x); |
|
|
|
|
|
|
|
if (!mm_r) { |
|
|
|
|
|
|
|
mm_r = 1; |
|
|
|
|
|
|
|
printf("Row X : "); |
|
|
|
|
|
|
|
dumpbn(&ppr->X); |
|
|
|
|
|
|
|
printf("Row Y : "); |
|
|
|
|
|
|
|
dumpbn(&ppr->Y); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
printf("Column X: "); |
|
|
|
|
|
|
|
dumpbn(&ppc->X); |
|
|
|
|
|
|
|
printf("Column Y: "); |
|
|
|
|
|
|
|
dumpbn(&ppc->Y); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (BN_cmp(&ppt->X, &pps->X)) { |
|
|
|
|
|
|
|
printf("Expect X: "); |
|
|
|
|
|
|
|
dumpbn(&pps->X); |
|
|
|
|
|
|
|
printf("Device X: "); |
|
|
|
|
|
|
|
dumpbn(&ppt->X); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
if (BN_cmp(&ppt->Y, &pps->Y)) { |
|
|
|
|
|
|
|
printf("Expect Y: "); |
|
|
|
|
|
|
|
dumpbn(&pps->Y); |
|
|
|
|
|
|
|
printf("Device Y: "); |
|
|
|
|
|
|
|
dumpbn(&ppt->Y); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
if (BN_cmp(&bnz, bnzc)) { |
|
|
|
|
|
|
|
printf("Expect Z: "); |
|
|
|
|
|
|
|
dumpbn(bnzc); |
|
|
|
|
|
|
|
printf("Device Z: "); |
|
|
|
|
|
|
|
dumpbn(&bnz); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
ret = !mismatches; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
out: |
|
|
|
|
|
|
|
if (z_heap) |
|
|
|
|
|
|
|
vg_ocl_unmap_arg_buffer(vocp, slot, 1, z_heap); |
|
|
|
|
|
|
|
if (point_tmp) |
|
|
|
|
|
|
|
vg_ocl_unmap_arg_buffer(vocp, slot, 2, point_tmp); |
|
|
|
|
|
|
|
if (ocl_points_in) |
|
|
|
|
|
|
|
vg_ocl_unmap_arg_buffer(vocp, slot, 3, ocl_points_in); |
|
|
|
|
|
|
|
if (ocl_strides_in) |
|
|
|
|
|
|
|
vg_ocl_unmap_arg_buffer(vocp, slot, 4, ocl_strides_in); |
|
|
|
|
|
|
|
if (ppr) |
|
|
|
|
|
|
|
EC_POINT_free(ppr); |
|
|
|
|
|
|
|
if (ppc) |
|
|
|
|
|
|
|
EC_POINT_free(ppc); |
|
|
|
|
|
|
|
if (pps) |
|
|
|
|
|
|
|
EC_POINT_free(pps); |
|
|
|
|
|
|
|
if (ppt) |
|
|
|
|
|
|
|
EC_POINT_free(ppt); |
|
|
|
|
|
|
|
BN_clear_free(&bnz); |
|
|
|
|
|
|
|
BN_clear_free(&bnez); |
|
|
|
|
|
|
|
BN_clear_free(&bnm); |
|
|
|
|
|
|
|
if (bnmont) |
|
|
|
|
|
|
|
BN_MONT_CTX_free(bnmont); |
|
|
|
|
|
|
|
if (bnctx) |
|
|
|
|
|
|
|
BN_CTX_free(bnctx); |
|
|
|
|
|
|
|
return ret; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
int |
|
|
|
|
|
|
|
vg_ocl_verify_k0(vg_ocl_context_t *vocp, int slot) |
|
|
|
|
|
|
|
{ |
|
|
|
|
|
|
|
return vg_ocl_verify_temporary(vocp, slot, 0); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
int |
|
|
|
|
|
|
|
vg_ocl_verify_k1(vg_ocl_context_t *vocp, int slot) |
|
|
|
|
|
|
|
{ |
|
|
|
|
|
|
|
return vg_ocl_verify_temporary(vocp, slot, 1); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
void * |
|
|
|
void * |
|
|
|
vg_opencl_thread(void *arg) |
|
|
|
vg_opencl_thread(void *arg) |
|
|
|
{ |
|
|
|
{ |
|
|
@ -1578,7 +1837,7 @@ out: |
|
|
|
*/ |
|
|
|
*/ |
|
|
|
|
|
|
|
|
|
|
|
void * |
|
|
|
void * |
|
|
|
vg_opencl_loop(vg_context_t *vcp, cl_device_id did, int safe_mode, |
|
|
|
vg_opencl_loop(vg_context_t *vcp, cl_device_id did, int safe_mode, int verify, |
|
|
|
int worksize, int nthreads, int nrows, int ncols, int invsize) |
|
|
|
int worksize, int nthreads, int nrows, int ncols, int invsize) |
|
|
|
{ |
|
|
|
{ |
|
|
|
int i; |
|
|
|
int i; |
|
|
@ -1609,6 +1868,16 @@ vg_opencl_loop(vg_context_t *vcp, cl_device_id did, int safe_mode, |
|
|
|
if (!vg_ocl_init(vcp, &ctx, did, safe_mode)) |
|
|
|
if (!vg_ocl_init(vcp, &ctx, did, safe_mode)) |
|
|
|
return NULL; |
|
|
|
return NULL; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (verify) { |
|
|
|
|
|
|
|
if (vcp->vc_verbose > 0) { |
|
|
|
|
|
|
|
printf("WARNING: Hardware verification mode enabled\n"); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
if (!worksize) |
|
|
|
|
|
|
|
worksize = 1; |
|
|
|
|
|
|
|
vocp->voc_verify_func[0] = vg_ocl_verify_k0; |
|
|
|
|
|
|
|
vocp->voc_verify_func[1] = vg_ocl_verify_k1; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
pkey = vxcp->vxc_key; |
|
|
|
pkey = vxcp->vxc_key; |
|
|
|
pgroup = EC_KEY_get0_group(pkey); |
|
|
|
pgroup = EC_KEY_get0_group(pkey); |
|
|
|
pgen = EC_GROUP_get0_generator(pgroup); |
|
|
|
pgen = EC_GROUP_get0_generator(pgroup); |
|
|
@ -1700,7 +1969,7 @@ vg_opencl_loop(vg_context_t *vcp, cl_device_id did, int safe_mode, |
|
|
|
round = nrows * ncols; |
|
|
|
round = nrows * ncols; |
|
|
|
|
|
|
|
|
|
|
|
if (!invsize) { |
|
|
|
if (!invsize) { |
|
|
|
invsize = 1; |
|
|
|
invsize = 2; |
|
|
|
while (!(round % (invsize << 1)) && |
|
|
|
while (!(round % (invsize << 1)) && |
|
|
|
((round / invsize) > full_threads)) |
|
|
|
((round / invsize) > full_threads)) |
|
|
|
invsize <<= 1; |
|
|
|
invsize <<= 1; |
|
|
@ -1712,7 +1981,7 @@ vg_opencl_loop(vg_context_t *vcp, cl_device_id did, int safe_mode, |
|
|
|
round/invsize, invsize); |
|
|
|
round/invsize, invsize); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
if ((round % invsize) || (invsize & (invsize-1))) { |
|
|
|
if ((round % invsize) || !is_pow2(invsize) || (invsize < 2)) { |
|
|
|
if (vcp->vc_verbose <= 1) { |
|
|
|
if (vcp->vc_verbose <= 1) { |
|
|
|
printf("Grid size: %dx%d\n", ncols, nrows); |
|
|
|
printf("Grid size: %dx%d\n", ncols, nrows); |
|
|
|
printf("Modular inverse: %d threads, %d ops each\n", |
|
|
|
printf("Modular inverse: %d threads, %d ops each\n", |
|
|
@ -2257,6 +2526,7 @@ usage(const char *name) |
|
|
|
"-t <threads> Set target thread count per multiprocessor\n" |
|
|
|
"-t <threads> Set target thread count per multiprocessor\n" |
|
|
|
"-g <x>x<y> Set grid size\n" |
|
|
|
"-g <x>x<y> Set grid size\n" |
|
|
|
"-b <invsize> Set modular inverse ops per thread\n" |
|
|
|
"-b <invsize> Set modular inverse ops per thread\n" |
|
|
|
|
|
|
|
"-V Enable kernel/OpenCL/hardware verification (SLOW)\n" |
|
|
|
"-f <file> File containing list of patterns, one per line\n" |
|
|
|
"-f <file> File containing list of patterns, one per line\n" |
|
|
|
" (Use \"-\" as the file name for stdin)\n" |
|
|
|
" (Use \"-\" as the file name for stdin)\n" |
|
|
|
"-o <file> Write pattern matches to <file>\n" |
|
|
|
"-o <file> Write pattern matches to <file>\n" |
|
|
@ -2285,6 +2555,7 @@ main(int argc, char **argv) |
|
|
|
int nrows = 0, ncols = 0; |
|
|
|
int nrows = 0, ncols = 0; |
|
|
|
int invsize = 0; |
|
|
|
int invsize = 0; |
|
|
|
int remove_on_match = 1; |
|
|
|
int remove_on_match = 1; |
|
|
|
|
|
|
|
int verify_mode = 0; |
|
|
|
int safe_mode = 0; |
|
|
|
int safe_mode = 0; |
|
|
|
vg_context_t *vcp = NULL; |
|
|
|
vg_context_t *vcp = NULL; |
|
|
|
cl_device_id did; |
|
|
|
cl_device_id did; |
|
|
@ -2292,7 +2563,7 @@ main(int argc, char **argv) |
|
|
|
const char *key_password = NULL; |
|
|
|
const char *key_password = NULL; |
|
|
|
|
|
|
|
|
|
|
|
while ((opt = getopt(argc, argv, |
|
|
|
while ((opt = getopt(argc, argv, |
|
|
|
"vqrikNTX:eE:p:d:w:t:g:b:Sh?f:o:s:")) != -1) { |
|
|
|
"vqrikNTX:eE:p:d:w:t:g:b:VSh?f:o:s:")) != -1) { |
|
|
|
switch (opt) { |
|
|
|
switch (opt) { |
|
|
|
case 'v': |
|
|
|
case 'v': |
|
|
|
verbose = 2; |
|
|
|
verbose = 2; |
|
|
@ -2371,6 +2642,9 @@ main(int argc, char **argv) |
|
|
|
return 1; |
|
|
|
return 1; |
|
|
|
} |
|
|
|
} |
|
|
|
break; |
|
|
|
break; |
|
|
|
|
|
|
|
case 'V': |
|
|
|
|
|
|
|
verify_mode = 1; |
|
|
|
|
|
|
|
break; |
|
|
|
case 'S': |
|
|
|
case 'S': |
|
|
|
safe_mode = 1; |
|
|
|
safe_mode = 1; |
|
|
|
break; |
|
|
|
break; |
|
|
@ -2498,7 +2772,7 @@ main(int argc, char **argv) |
|
|
|
return 1; |
|
|
|
return 1; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
vg_opencl_loop(vcp, did, safe_mode, |
|
|
|
vg_opencl_loop(vcp, did, safe_mode, verify_mode, |
|
|
|
worksize, nthreads, nrows, ncols, invsize); |
|
|
|
worksize, nthreads, nrows, ncols, invsize); |
|
|
|
return 0; |
|
|
|
return 0; |
|
|
|
} |
|
|
|
} |
|
|
|