Browse Source

Add verification mode for tracking down OpenCL/hardware problems.

Ensure that invsize > 1 to ensure correct behavior from heap_invert.
master
samr7 13 years ago
parent
commit
458bb28add
  1. 304
      oclvanitygen.c

304
oclvanitygen.c

@ -48,6 +48,7 @@ const int debug = 0; @@ -48,6 +48,7 @@ const int debug = 0;
#define MAX_ARG 6
#define MAX_KERNEL 3
#define is_pow2(v) (!((v) & ((v)-1)))
#define round_up_pow2(x, a) (((x) + ((a)-1)) & ~((a)-1))
/* OpenCL address searching mode */
@ -74,6 +75,8 @@ typedef struct _vg_ocl_context_s { @@ -74,6 +75,8 @@ typedef struct _vg_ocl_context_s {
int voc_pattern_rewrite;
int voc_pattern_alloc;
vg_ocl_check_t voc_verify_func[MAX_KERNEL];
pthread_t voc_ocl_thread;
pthread_mutex_t voc_lock;
pthread_cond_t voc_wait;
@ -1164,6 +1167,9 @@ vg_ocl_kernel_start(vg_ocl_context_t *vocp, int slot, int ncol, int nrow, @@ -1164,6 +1167,9 @@ vg_ocl_kernel_start(vg_ocl_context_t *vocp, int slot, int ncol, int nrow,
assert(!vocp->voc_oclkrnwait[slot]);
/* heap_invert() preconditions */
assert(is_pow2(invsize) && (invsize > 1));
val = invsize;
ret = clSetKernelArg(vocp->voc_oclkernel[slot][1],
1,
@ -1191,6 +1197,12 @@ vg_ocl_kernel_start(vg_ocl_context_t *vocp, int slot, int ncol, int nrow, @@ -1191,6 +1197,12 @@ vg_ocl_kernel_start(vg_ocl_context_t *vocp, int slot, int ncol, int nrow,
return 0;
}
if (vocp->voc_verify_func[0] &&
!(vocp->voc_verify_func[0])(vocp, slot)) {
printf("ERROR: Kernel 0 failed verification test\n");
return 0;
}
ret = clEnqueueNDRangeKernel(vocp->voc_oclcmdq,
vocp->voc_oclkernel[slot][1],
1,
@ -1209,6 +1221,12 @@ vg_ocl_kernel_start(vg_ocl_context_t *vocp, int slot, int ncol, int nrow, @@ -1209,6 +1221,12 @@ vg_ocl_kernel_start(vg_ocl_context_t *vocp, int slot, int ncol, int nrow,
return 0;
}
if (vocp->voc_verify_func[1] &&
!(vocp->voc_verify_func[1])(vocp, slot)) {
printf("ERROR: Kernel 1 failed verification test\n");
return 0;
}
ret = clEnqueueNDRangeKernel(vocp->voc_oclcmdq,
vocp->voc_oclkernel[slot][2],
2,
@ -1244,6 +1262,45 @@ vg_ocl_kernel_wait(vg_ocl_context_t *vocp, int slot) @@ -1244,6 +1262,45 @@ vg_ocl_kernel_wait(vg_ocl_context_t *vocp, int slot)
}
INLINE void
vg_ocl_get_bignum_raw(BIGNUM *bn, const unsigned char *buf)
{
bn_expand(bn, 256);
memcpy(bn->d, buf, 32);
bn->top = (32 / sizeof(BN_ULONG));
}
INLINE void
vg_ocl_put_bignum_raw(unsigned char *buf, const BIGNUM *bn)
{
int bnlen = (bn->top * sizeof(BN_ULONG));
if (bnlen >= 32) {
memcpy(buf, bn->d, 32);
} else {
memcpy(buf, bn->d, bnlen);
memset(buf + bnlen, 0, 32 - bnlen);
}
}
#define ACCESS_BUNDLE 1024
#define ACCESS_STRIDE (ACCESS_BUNDLE/8)
void
vg_ocl_get_bignum_tpa(BIGNUM *bn, const unsigned char *buf, int cell)
{
unsigned char bnbuf[32];
int start, i;
start = (((cell / ACCESS_STRIDE) * ACCESS_BUNDLE) +
(cell % ACCESS_STRIDE));
for (i = 0; i < 8; i++)
memcpy(bnbuf+(i*4),
buf + 4*(start + i*ACCESS_STRIDE),
4);
vg_ocl_get_bignum_raw(bn, bnbuf);
}
/*
* Absolutely disgusting.
* We want points in Montgomery form, and it's a lot easier to read the
@ -1259,20 +1316,29 @@ struct ec_point_st { @@ -1259,20 +1316,29 @@ struct ec_point_st {
};
INLINE void
vg_ocl_put_point(unsigned char *buf, EC_POINT *ppnt)
vg_ocl_get_point(EC_POINT *ppnt, const unsigned char *buf)
{
assert(ppnt->Z_is_one);
memcpy(buf, ppnt->X.d, 32);
memcpy(buf + 32, ppnt->Y.d, 32);
static const unsigned char mont_one[] = { 0x01,0x00,0x00,0x03,0xd1 };
vg_ocl_get_bignum_raw(&ppnt->X, buf);
vg_ocl_get_bignum_raw(&ppnt->Y, buf + 32);
if (!ppnt->Z_is_one) {
ppnt->Z_is_one = 1;
BN_bin2bn(mont_one, sizeof(mont_one), &ppnt->Z);
}
}
#define ACCESS_BUNDLE 1024
#define ACCESS_STRIDE (ACCESS_BUNDLE/8)
INLINE void
vg_ocl_put_point_tpa(unsigned char *buf, int cell, EC_POINT *ppnt)
vg_ocl_put_point(unsigned char *buf, const EC_POINT *ppnt)
{
uint8_t pntbuf[64];
assert(ppnt->Z_is_one);
vg_ocl_put_bignum_raw(buf, &ppnt->X);
vg_ocl_put_bignum_raw(buf + 32, &ppnt->Y);
}
void
vg_ocl_put_point_tpa(unsigned char *buf, int cell, const EC_POINT *ppnt)
{
unsigned char pntbuf[64];
int start, i;
vg_ocl_put_point(pntbuf, ppnt);
@ -1289,6 +1355,26 @@ vg_ocl_put_point_tpa(unsigned char *buf, int cell, EC_POINT *ppnt) @@ -1289,6 +1355,26 @@ vg_ocl_put_point_tpa(unsigned char *buf, int cell, EC_POINT *ppnt)
4);
}
void
vg_ocl_get_point_tpa(EC_POINT *ppnt, const unsigned char *buf, int cell)
{
unsigned char pntbuf[64];
int start, i;
start = ((((2 * cell) / ACCESS_STRIDE) * ACCESS_BUNDLE) +
(cell % (ACCESS_STRIDE/2)));
for (i = 0; i < 8; i++)
memcpy(pntbuf+(i*4),
buf + 4*(start + i*ACCESS_STRIDE),
4);
for (i = 0; i < 8; i++)
memcpy(pntbuf+32+(i*4),
buf + 4*(start + (ACCESS_STRIDE/2) + (i*ACCESS_STRIDE)),
4);
vg_ocl_get_point(ppnt, pntbuf);
}
void
show_elapsed(struct timeval *tv, const char *place)
{
@ -1425,7 +1511,14 @@ vg_ocl_prefix_check(vg_ocl_context_t *vocp, int slot) @@ -1425,7 +1511,14 @@ vg_ocl_prefix_check(vg_ocl_context_t *vocp, int slot)
orig_delta = vxcp->vxc_delta;
vxcp->vxc_delta += found_delta;
vg_exec_context_calc_address(vxcp);
res = test_func(vxcp);
/* Make sure the GPU produced the expected hash */
res = 0;
if (!memcmp(vxcp->vxc_binres + 1,
ocl_found_out + 2,
20)) {
res = test_func(vxcp);
}
if (res == 0) {
/*
* The match was not found in
@ -1491,6 +1584,172 @@ vg_ocl_config_pattern(vg_ocl_context_t *vocp) @@ -1491,6 +1584,172 @@ vg_ocl_config_pattern(vg_ocl_context_t *vocp)
}
/*
* Temporary buffer content verification functions
* This provides a simple test of the kernel, the OpenCL compiler,
* and the hardware.
*/
int
vg_ocl_verify_temporary(vg_ocl_context_t *vocp, int slot, int z_inverted)
{
vg_exec_context_t *vxcp = &vocp->base;
unsigned char *point_tmp = NULL, *z_heap = NULL;
unsigned char *ocl_points_in = NULL, *ocl_strides_in = NULL;
const EC_GROUP *pgroup;
EC_POINT *ppr = NULL, *ppc = NULL, *pps = NULL, *ppt = NULL;
BIGNUM bnz, bnez, bnm, *bnzc;
BN_CTX *bnctx = NULL;
BN_MONT_CTX *bnmont;
int ret = 0;
int mismatches = 0, mm_r;
int x, y, bx;
static const unsigned char raw_modulus[] = {
0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
0xFF,0xFF,0xFF,0xFE,0xFF,0xFF,0xFC,0x2F
};
BN_init(&bnz);
BN_init(&bnez);
BN_init(&bnm);
bnctx = BN_CTX_new();
bnmont = BN_MONT_CTX_new();
pgroup = EC_KEY_get0_group(vxcp->vxc_key);
ppr = EC_POINT_new(pgroup);
ppc = EC_POINT_new(pgroup);
pps = EC_POINT_new(pgroup);
ppt = EC_POINT_new(pgroup);
if (!bnctx || !bnmont || !ppr || !ppc || !pps || !ppt) {
printf("ERROR: out of memory\n");
goto out;
}
BN_bin2bn(raw_modulus, sizeof(raw_modulus), &bnm);
BN_MONT_CTX_set(bnmont, &bnm, bnctx);
if (z_inverted) {
bnzc = &bnez;
} else {
bnzc = &pps->Z;
}
z_heap = (unsigned char *)
vg_ocl_map_arg_buffer(vocp, slot, 1, 0);
point_tmp = (unsigned char *)
vg_ocl_map_arg_buffer(vocp, slot, 2, 0);
ocl_points_in = (unsigned char *)
vg_ocl_map_arg_buffer(vocp, slot, 3, 0);
ocl_strides_in = (unsigned char *)
vg_ocl_map_arg_buffer(vocp, slot, 4, 0);
if (!z_heap || !point_tmp || !ocl_points_in || !ocl_strides_in) {
printf("ERROR: could not map OpenCL point buffers\n");
goto out;
}
for (y = 0; y < vocp->voc_ocl_rows; y++) {
vg_ocl_get_point(ppr, ocl_strides_in + (64*y));
bx = y * vocp->voc_ocl_cols;
mm_r = 0;
for (x = 0; x < vocp->voc_ocl_cols; x++) {
vg_ocl_get_point_tpa(ppc, ocl_points_in, x);
assert(ppr->Z_is_one && ppc->Z_is_one);
EC_POINT_add(pgroup, pps, ppc, ppr, bnctx);
assert(!pps->Z_is_one);
vg_ocl_get_point_tpa(ppt, point_tmp, bx + x);
vg_ocl_get_bignum_tpa(&bnz, z_heap, bx + x);
if (z_inverted) {
BN_mod_inverse(&bnez, &pps->Z, &bnm, bnctx);
BN_to_montgomery(&bnez, &bnez, bnmont, bnctx);
BN_to_montgomery(&bnez, &bnez, bnmont, bnctx);
}
if (BN_cmp(&ppt->X, &pps->X) ||
BN_cmp(&ppt->Y, &pps->Y) ||
BN_cmp(&bnz, bnzc)) {
mismatches++;
printf("Mismatch for kernel %d, "
"offset %d (%d,%d)\n",
z_inverted, bx + x, y, x);
if (!mm_r) {
mm_r = 1;
printf("Row X : ");
dumpbn(&ppr->X);
printf("Row Y : ");
dumpbn(&ppr->Y);
}
printf("Column X: ");
dumpbn(&ppc->X);
printf("Column Y: ");
dumpbn(&ppc->Y);
if (BN_cmp(&ppt->X, &pps->X)) {
printf("Expect X: ");
dumpbn(&pps->X);
printf("Device X: ");
dumpbn(&ppt->X);
}
if (BN_cmp(&ppt->Y, &pps->Y)) {
printf("Expect Y: ");
dumpbn(&pps->Y);
printf("Device Y: ");
dumpbn(&ppt->Y);
}
if (BN_cmp(&bnz, bnzc)) {
printf("Expect Z: ");
dumpbn(bnzc);
printf("Device Z: ");
dumpbn(&bnz);
}
}
}
}
ret = !mismatches;
out:
if (z_heap)
vg_ocl_unmap_arg_buffer(vocp, slot, 1, z_heap);
if (point_tmp)
vg_ocl_unmap_arg_buffer(vocp, slot, 2, point_tmp);
if (ocl_points_in)
vg_ocl_unmap_arg_buffer(vocp, slot, 3, ocl_points_in);
if (ocl_strides_in)
vg_ocl_unmap_arg_buffer(vocp, slot, 4, ocl_strides_in);
if (ppr)
EC_POINT_free(ppr);
if (ppc)
EC_POINT_free(ppc);
if (pps)
EC_POINT_free(pps);
if (ppt)
EC_POINT_free(ppt);
BN_clear_free(&bnz);
BN_clear_free(&bnez);
BN_clear_free(&bnm);
if (bnmont)
BN_MONT_CTX_free(bnmont);
if (bnctx)
BN_CTX_free(bnctx);
return ret;
}
int
vg_ocl_verify_k0(vg_ocl_context_t *vocp, int slot)
{
return vg_ocl_verify_temporary(vocp, slot, 0);
}
int
vg_ocl_verify_k1(vg_ocl_context_t *vocp, int slot)
{
return vg_ocl_verify_temporary(vocp, slot, 1);
}
void *
vg_opencl_thread(void *arg)
{
@ -1578,7 +1837,7 @@ out: @@ -1578,7 +1837,7 @@ out:
*/
void *
vg_opencl_loop(vg_context_t *vcp, cl_device_id did, int safe_mode,
vg_opencl_loop(vg_context_t *vcp, cl_device_id did, int safe_mode, int verify,
int worksize, int nthreads, int nrows, int ncols, int invsize)
{
int i;
@ -1609,6 +1868,16 @@ vg_opencl_loop(vg_context_t *vcp, cl_device_id did, int safe_mode, @@ -1609,6 +1868,16 @@ vg_opencl_loop(vg_context_t *vcp, cl_device_id did, int safe_mode,
if (!vg_ocl_init(vcp, &ctx, did, safe_mode))
return NULL;
if (verify) {
if (vcp->vc_verbose > 0) {
printf("WARNING: Hardware verification mode enabled\n");
}
if (!worksize)
worksize = 1;
vocp->voc_verify_func[0] = vg_ocl_verify_k0;
vocp->voc_verify_func[1] = vg_ocl_verify_k1;
}
pkey = vxcp->vxc_key;
pgroup = EC_KEY_get0_group(pkey);
pgen = EC_GROUP_get0_generator(pgroup);
@ -1700,7 +1969,7 @@ vg_opencl_loop(vg_context_t *vcp, cl_device_id did, int safe_mode, @@ -1700,7 +1969,7 @@ vg_opencl_loop(vg_context_t *vcp, cl_device_id did, int safe_mode,
round = nrows * ncols;
if (!invsize) {
invsize = 1;
invsize = 2;
while (!(round % (invsize << 1)) &&
((round / invsize) > full_threads))
invsize <<= 1;
@ -1712,7 +1981,7 @@ vg_opencl_loop(vg_context_t *vcp, cl_device_id did, int safe_mode, @@ -1712,7 +1981,7 @@ vg_opencl_loop(vg_context_t *vcp, cl_device_id did, int safe_mode,
round/invsize, invsize);
}
if ((round % invsize) || (invsize & (invsize-1))) {
if ((round % invsize) || !is_pow2(invsize) || (invsize < 2)) {
if (vcp->vc_verbose <= 1) {
printf("Grid size: %dx%d\n", ncols, nrows);
printf("Modular inverse: %d threads, %d ops each\n",
@ -2257,6 +2526,7 @@ usage(const char *name) @@ -2257,6 +2526,7 @@ usage(const char *name)
"-t <threads> Set target thread count per multiprocessor\n"
"-g <x>x<y> Set grid size\n"
"-b <invsize> Set modular inverse ops per thread\n"
"-V Enable kernel/OpenCL/hardware verification (SLOW)\n"
"-f <file> File containing list of patterns, one per line\n"
" (Use \"-\" as the file name for stdin)\n"
"-o <file> Write pattern matches to <file>\n"
@ -2285,6 +2555,7 @@ main(int argc, char **argv) @@ -2285,6 +2555,7 @@ main(int argc, char **argv)
int nrows = 0, ncols = 0;
int invsize = 0;
int remove_on_match = 1;
int verify_mode = 0;
int safe_mode = 0;
vg_context_t *vcp = NULL;
cl_device_id did;
@ -2292,7 +2563,7 @@ main(int argc, char **argv) @@ -2292,7 +2563,7 @@ main(int argc, char **argv)
const char *key_password = NULL;
while ((opt = getopt(argc, argv,
"vqrikNTX:eE:p:d:w:t:g:b:Sh?f:o:s:")) != -1) {
"vqrikNTX:eE:p:d:w:t:g:b:VSh?f:o:s:")) != -1) {
switch (opt) {
case 'v':
verbose = 2;
@ -2371,6 +2642,9 @@ main(int argc, char **argv) @@ -2371,6 +2642,9 @@ main(int argc, char **argv)
return 1;
}
break;
case 'V':
verify_mode = 1;
break;
case 'S':
safe_mode = 1;
break;
@ -2498,7 +2772,7 @@ main(int argc, char **argv) @@ -2498,7 +2772,7 @@ main(int argc, char **argv)
return 1;
}
vg_opencl_loop(vcp, did, safe_mode,
vg_opencl_loop(vcp, did, safe_mode, verify_mode,
worksize, nthreads, nrows, ncols, invsize);
return 0;
}

Loading…
Cancel
Save