mirror of
https://github.com/GOSTSec/vanitygen
synced 2025-03-13 05:41:35 +00:00
Add verification mode for tracking down OpenCL/hardware problems.
Ensure that invsize > 1 to ensure correct behavior from heap_invert.
This commit is contained in:
parent
cb3e60750a
commit
458bb28add
304
oclvanitygen.c
304
oclvanitygen.c
@ -48,6 +48,7 @@ const int debug = 0;
|
||||
#define MAX_ARG 6
|
||||
#define MAX_KERNEL 3
|
||||
|
||||
#define is_pow2(v) (!((v) & ((v)-1)))
|
||||
#define round_up_pow2(x, a) (((x) + ((a)-1)) & ~((a)-1))
|
||||
|
||||
/* OpenCL address searching mode */
|
||||
@ -74,6 +75,8 @@ typedef struct _vg_ocl_context_s {
|
||||
int voc_pattern_rewrite;
|
||||
int voc_pattern_alloc;
|
||||
|
||||
vg_ocl_check_t voc_verify_func[MAX_KERNEL];
|
||||
|
||||
pthread_t voc_ocl_thread;
|
||||
pthread_mutex_t voc_lock;
|
||||
pthread_cond_t voc_wait;
|
||||
@ -1164,6 +1167,9 @@ vg_ocl_kernel_start(vg_ocl_context_t *vocp, int slot, int ncol, int nrow,
|
||||
|
||||
assert(!vocp->voc_oclkrnwait[slot]);
|
||||
|
||||
/* heap_invert() preconditions */
|
||||
assert(is_pow2(invsize) && (invsize > 1));
|
||||
|
||||
val = invsize;
|
||||
ret = clSetKernelArg(vocp->voc_oclkernel[slot][1],
|
||||
1,
|
||||
@ -1191,6 +1197,12 @@ vg_ocl_kernel_start(vg_ocl_context_t *vocp, int slot, int ncol, int nrow,
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (vocp->voc_verify_func[0] &&
|
||||
!(vocp->voc_verify_func[0])(vocp, slot)) {
|
||||
printf("ERROR: Kernel 0 failed verification test\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
ret = clEnqueueNDRangeKernel(vocp->voc_oclcmdq,
|
||||
vocp->voc_oclkernel[slot][1],
|
||||
1,
|
||||
@ -1209,6 +1221,12 @@ vg_ocl_kernel_start(vg_ocl_context_t *vocp, int slot, int ncol, int nrow,
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (vocp->voc_verify_func[1] &&
|
||||
!(vocp->voc_verify_func[1])(vocp, slot)) {
|
||||
printf("ERROR: Kernel 1 failed verification test\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
ret = clEnqueueNDRangeKernel(vocp->voc_oclcmdq,
|
||||
vocp->voc_oclkernel[slot][2],
|
||||
2,
|
||||
@ -1244,6 +1262,45 @@ vg_ocl_kernel_wait(vg_ocl_context_t *vocp, int slot)
|
||||
}
|
||||
|
||||
|
||||
INLINE void
|
||||
vg_ocl_get_bignum_raw(BIGNUM *bn, const unsigned char *buf)
|
||||
{
|
||||
bn_expand(bn, 256);
|
||||
memcpy(bn->d, buf, 32);
|
||||
bn->top = (32 / sizeof(BN_ULONG));
|
||||
}
|
||||
|
||||
INLINE void
|
||||
vg_ocl_put_bignum_raw(unsigned char *buf, const BIGNUM *bn)
|
||||
{
|
||||
int bnlen = (bn->top * sizeof(BN_ULONG));
|
||||
if (bnlen >= 32) {
|
||||
memcpy(buf, bn->d, 32);
|
||||
} else {
|
||||
memcpy(buf, bn->d, bnlen);
|
||||
memset(buf + bnlen, 0, 32 - bnlen);
|
||||
}
|
||||
}
|
||||
|
||||
#define ACCESS_BUNDLE 1024
|
||||
#define ACCESS_STRIDE (ACCESS_BUNDLE/8)
|
||||
|
||||
void
|
||||
vg_ocl_get_bignum_tpa(BIGNUM *bn, const unsigned char *buf, int cell)
|
||||
{
|
||||
unsigned char bnbuf[32];
|
||||
int start, i;
|
||||
|
||||
start = (((cell / ACCESS_STRIDE) * ACCESS_BUNDLE) +
|
||||
(cell % ACCESS_STRIDE));
|
||||
for (i = 0; i < 8; i++)
|
||||
memcpy(bnbuf+(i*4),
|
||||
buf + 4*(start + i*ACCESS_STRIDE),
|
||||
4);
|
||||
|
||||
vg_ocl_get_bignum_raw(bn, bnbuf);
|
||||
}
|
||||
|
||||
/*
|
||||
* Absolutely disgusting.
|
||||
* We want points in Montgomery form, and it's a lot easier to read the
|
||||
@ -1259,20 +1316,29 @@ struct ec_point_st {
|
||||
};
|
||||
|
||||
INLINE void
|
||||
vg_ocl_put_point(unsigned char *buf, EC_POINT *ppnt)
|
||||
vg_ocl_get_point(EC_POINT *ppnt, const unsigned char *buf)
|
||||
{
|
||||
assert(ppnt->Z_is_one);
|
||||
memcpy(buf, ppnt->X.d, 32);
|
||||
memcpy(buf + 32, ppnt->Y.d, 32);
|
||||
static const unsigned char mont_one[] = { 0x01,0x00,0x00,0x03,0xd1 };
|
||||
vg_ocl_get_bignum_raw(&ppnt->X, buf);
|
||||
vg_ocl_get_bignum_raw(&ppnt->Y, buf + 32);
|
||||
if (!ppnt->Z_is_one) {
|
||||
ppnt->Z_is_one = 1;
|
||||
BN_bin2bn(mont_one, sizeof(mont_one), &ppnt->Z);
|
||||
}
|
||||
}
|
||||
|
||||
#define ACCESS_BUNDLE 1024
|
||||
#define ACCESS_STRIDE (ACCESS_BUNDLE/8)
|
||||
|
||||
INLINE void
|
||||
vg_ocl_put_point_tpa(unsigned char *buf, int cell, EC_POINT *ppnt)
|
||||
vg_ocl_put_point(unsigned char *buf, const EC_POINT *ppnt)
|
||||
{
|
||||
uint8_t pntbuf[64];
|
||||
assert(ppnt->Z_is_one);
|
||||
vg_ocl_put_bignum_raw(buf, &ppnt->X);
|
||||
vg_ocl_put_bignum_raw(buf + 32, &ppnt->Y);
|
||||
}
|
||||
|
||||
void
|
||||
vg_ocl_put_point_tpa(unsigned char *buf, int cell, const EC_POINT *ppnt)
|
||||
{
|
||||
unsigned char pntbuf[64];
|
||||
int start, i;
|
||||
|
||||
vg_ocl_put_point(pntbuf, ppnt);
|
||||
@ -1289,6 +1355,26 @@ vg_ocl_put_point_tpa(unsigned char *buf, int cell, EC_POINT *ppnt)
|
||||
4);
|
||||
}
|
||||
|
||||
void
|
||||
vg_ocl_get_point_tpa(EC_POINT *ppnt, const unsigned char *buf, int cell)
|
||||
{
|
||||
unsigned char pntbuf[64];
|
||||
int start, i;
|
||||
|
||||
start = ((((2 * cell) / ACCESS_STRIDE) * ACCESS_BUNDLE) +
|
||||
(cell % (ACCESS_STRIDE/2)));
|
||||
for (i = 0; i < 8; i++)
|
||||
memcpy(pntbuf+(i*4),
|
||||
buf + 4*(start + i*ACCESS_STRIDE),
|
||||
4);
|
||||
for (i = 0; i < 8; i++)
|
||||
memcpy(pntbuf+32+(i*4),
|
||||
buf + 4*(start + (ACCESS_STRIDE/2) + (i*ACCESS_STRIDE)),
|
||||
4);
|
||||
|
||||
vg_ocl_get_point(ppnt, pntbuf);
|
||||
}
|
||||
|
||||
void
|
||||
show_elapsed(struct timeval *tv, const char *place)
|
||||
{
|
||||
@ -1425,7 +1511,14 @@ vg_ocl_prefix_check(vg_ocl_context_t *vocp, int slot)
|
||||
orig_delta = vxcp->vxc_delta;
|
||||
vxcp->vxc_delta += found_delta;
|
||||
vg_exec_context_calc_address(vxcp);
|
||||
res = test_func(vxcp);
|
||||
|
||||
/* Make sure the GPU produced the expected hash */
|
||||
res = 0;
|
||||
if (!memcmp(vxcp->vxc_binres + 1,
|
||||
ocl_found_out + 2,
|
||||
20)) {
|
||||
res = test_func(vxcp);
|
||||
}
|
||||
if (res == 0) {
|
||||
/*
|
||||
* The match was not found in
|
||||
@ -1491,6 +1584,172 @@ vg_ocl_config_pattern(vg_ocl_context_t *vocp)
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Temporary buffer content verification functions
|
||||
* This provides a simple test of the kernel, the OpenCL compiler,
|
||||
* and the hardware.
|
||||
*/
|
||||
int
|
||||
vg_ocl_verify_temporary(vg_ocl_context_t *vocp, int slot, int z_inverted)
|
||||
{
|
||||
vg_exec_context_t *vxcp = &vocp->base;
|
||||
unsigned char *point_tmp = NULL, *z_heap = NULL;
|
||||
unsigned char *ocl_points_in = NULL, *ocl_strides_in = NULL;
|
||||
const EC_GROUP *pgroup;
|
||||
EC_POINT *ppr = NULL, *ppc = NULL, *pps = NULL, *ppt = NULL;
|
||||
BIGNUM bnz, bnez, bnm, *bnzc;
|
||||
BN_CTX *bnctx = NULL;
|
||||
BN_MONT_CTX *bnmont;
|
||||
int ret = 0;
|
||||
int mismatches = 0, mm_r;
|
||||
int x, y, bx;
|
||||
static const unsigned char raw_modulus[] = {
|
||||
0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
|
||||
0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
|
||||
0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
|
||||
0xFF,0xFF,0xFF,0xFE,0xFF,0xFF,0xFC,0x2F
|
||||
};
|
||||
|
||||
BN_init(&bnz);
|
||||
BN_init(&bnez);
|
||||
BN_init(&bnm);
|
||||
|
||||
bnctx = BN_CTX_new();
|
||||
bnmont = BN_MONT_CTX_new();
|
||||
pgroup = EC_KEY_get0_group(vxcp->vxc_key);
|
||||
ppr = EC_POINT_new(pgroup);
|
||||
ppc = EC_POINT_new(pgroup);
|
||||
pps = EC_POINT_new(pgroup);
|
||||
ppt = EC_POINT_new(pgroup);
|
||||
|
||||
if (!bnctx || !bnmont || !ppr || !ppc || !pps || !ppt) {
|
||||
printf("ERROR: out of memory\n");
|
||||
goto out;
|
||||
}
|
||||
|
||||
BN_bin2bn(raw_modulus, sizeof(raw_modulus), &bnm);
|
||||
BN_MONT_CTX_set(bnmont, &bnm, bnctx);
|
||||
|
||||
if (z_inverted) {
|
||||
bnzc = &bnez;
|
||||
} else {
|
||||
bnzc = &pps->Z;
|
||||
}
|
||||
|
||||
z_heap = (unsigned char *)
|
||||
vg_ocl_map_arg_buffer(vocp, slot, 1, 0);
|
||||
point_tmp = (unsigned char *)
|
||||
vg_ocl_map_arg_buffer(vocp, slot, 2, 0);
|
||||
ocl_points_in = (unsigned char *)
|
||||
vg_ocl_map_arg_buffer(vocp, slot, 3, 0);
|
||||
ocl_strides_in = (unsigned char *)
|
||||
vg_ocl_map_arg_buffer(vocp, slot, 4, 0);
|
||||
|
||||
if (!z_heap || !point_tmp || !ocl_points_in || !ocl_strides_in) {
|
||||
printf("ERROR: could not map OpenCL point buffers\n");
|
||||
goto out;
|
||||
}
|
||||
|
||||
for (y = 0; y < vocp->voc_ocl_rows; y++) {
|
||||
vg_ocl_get_point(ppr, ocl_strides_in + (64*y));
|
||||
bx = y * vocp->voc_ocl_cols;
|
||||
mm_r = 0;
|
||||
|
||||
for (x = 0; x < vocp->voc_ocl_cols; x++) {
|
||||
vg_ocl_get_point_tpa(ppc, ocl_points_in, x);
|
||||
assert(ppr->Z_is_one && ppc->Z_is_one);
|
||||
EC_POINT_add(pgroup, pps, ppc, ppr, bnctx);
|
||||
assert(!pps->Z_is_one);
|
||||
vg_ocl_get_point_tpa(ppt, point_tmp, bx + x);
|
||||
vg_ocl_get_bignum_tpa(&bnz, z_heap, bx + x);
|
||||
if (z_inverted) {
|
||||
BN_mod_inverse(&bnez, &pps->Z, &bnm, bnctx);
|
||||
BN_to_montgomery(&bnez, &bnez, bnmont, bnctx);
|
||||
BN_to_montgomery(&bnez, &bnez, bnmont, bnctx);
|
||||
}
|
||||
if (BN_cmp(&ppt->X, &pps->X) ||
|
||||
BN_cmp(&ppt->Y, &pps->Y) ||
|
||||
BN_cmp(&bnz, bnzc)) {
|
||||
mismatches++;
|
||||
printf("Mismatch for kernel %d, "
|
||||
"offset %d (%d,%d)\n",
|
||||
z_inverted, bx + x, y, x);
|
||||
if (!mm_r) {
|
||||
mm_r = 1;
|
||||
printf("Row X : ");
|
||||
dumpbn(&ppr->X);
|
||||
printf("Row Y : ");
|
||||
dumpbn(&ppr->Y);
|
||||
}
|
||||
|
||||
printf("Column X: ");
|
||||
dumpbn(&ppc->X);
|
||||
printf("Column Y: ");
|
||||
dumpbn(&ppc->Y);
|
||||
|
||||
if (BN_cmp(&ppt->X, &pps->X)) {
|
||||
printf("Expect X: ");
|
||||
dumpbn(&pps->X);
|
||||
printf("Device X: ");
|
||||
dumpbn(&ppt->X);
|
||||
}
|
||||
if (BN_cmp(&ppt->Y, &pps->Y)) {
|
||||
printf("Expect Y: ");
|
||||
dumpbn(&pps->Y);
|
||||
printf("Device Y: ");
|
||||
dumpbn(&ppt->Y);
|
||||
}
|
||||
if (BN_cmp(&bnz, bnzc)) {
|
||||
printf("Expect Z: ");
|
||||
dumpbn(bnzc);
|
||||
printf("Device Z: ");
|
||||
dumpbn(&bnz);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ret = !mismatches;
|
||||
|
||||
out:
|
||||
if (z_heap)
|
||||
vg_ocl_unmap_arg_buffer(vocp, slot, 1, z_heap);
|
||||
if (point_tmp)
|
||||
vg_ocl_unmap_arg_buffer(vocp, slot, 2, point_tmp);
|
||||
if (ocl_points_in)
|
||||
vg_ocl_unmap_arg_buffer(vocp, slot, 3, ocl_points_in);
|
||||
if (ocl_strides_in)
|
||||
vg_ocl_unmap_arg_buffer(vocp, slot, 4, ocl_strides_in);
|
||||
if (ppr)
|
||||
EC_POINT_free(ppr);
|
||||
if (ppc)
|
||||
EC_POINT_free(ppc);
|
||||
if (pps)
|
||||
EC_POINT_free(pps);
|
||||
if (ppt)
|
||||
EC_POINT_free(ppt);
|
||||
BN_clear_free(&bnz);
|
||||
BN_clear_free(&bnez);
|
||||
BN_clear_free(&bnm);
|
||||
if (bnmont)
|
||||
BN_MONT_CTX_free(bnmont);
|
||||
if (bnctx)
|
||||
BN_CTX_free(bnctx);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int
|
||||
vg_ocl_verify_k0(vg_ocl_context_t *vocp, int slot)
|
||||
{
|
||||
return vg_ocl_verify_temporary(vocp, slot, 0);
|
||||
}
|
||||
|
||||
int
|
||||
vg_ocl_verify_k1(vg_ocl_context_t *vocp, int slot)
|
||||
{
|
||||
return vg_ocl_verify_temporary(vocp, slot, 1);
|
||||
}
|
||||
|
||||
void *
|
||||
vg_opencl_thread(void *arg)
|
||||
{
|
||||
@ -1578,7 +1837,7 @@ out:
|
||||
*/
|
||||
|
||||
void *
|
||||
vg_opencl_loop(vg_context_t *vcp, cl_device_id did, int safe_mode,
|
||||
vg_opencl_loop(vg_context_t *vcp, cl_device_id did, int safe_mode, int verify,
|
||||
int worksize, int nthreads, int nrows, int ncols, int invsize)
|
||||
{
|
||||
int i;
|
||||
@ -1609,6 +1868,16 @@ vg_opencl_loop(vg_context_t *vcp, cl_device_id did, int safe_mode,
|
||||
if (!vg_ocl_init(vcp, &ctx, did, safe_mode))
|
||||
return NULL;
|
||||
|
||||
if (verify) {
|
||||
if (vcp->vc_verbose > 0) {
|
||||
printf("WARNING: Hardware verification mode enabled\n");
|
||||
}
|
||||
if (!worksize)
|
||||
worksize = 1;
|
||||
vocp->voc_verify_func[0] = vg_ocl_verify_k0;
|
||||
vocp->voc_verify_func[1] = vg_ocl_verify_k1;
|
||||
}
|
||||
|
||||
pkey = vxcp->vxc_key;
|
||||
pgroup = EC_KEY_get0_group(pkey);
|
||||
pgen = EC_GROUP_get0_generator(pgroup);
|
||||
@ -1700,7 +1969,7 @@ vg_opencl_loop(vg_context_t *vcp, cl_device_id did, int safe_mode,
|
||||
round = nrows * ncols;
|
||||
|
||||
if (!invsize) {
|
||||
invsize = 1;
|
||||
invsize = 2;
|
||||
while (!(round % (invsize << 1)) &&
|
||||
((round / invsize) > full_threads))
|
||||
invsize <<= 1;
|
||||
@ -1712,7 +1981,7 @@ vg_opencl_loop(vg_context_t *vcp, cl_device_id did, int safe_mode,
|
||||
round/invsize, invsize);
|
||||
}
|
||||
|
||||
if ((round % invsize) || (invsize & (invsize-1))) {
|
||||
if ((round % invsize) || !is_pow2(invsize) || (invsize < 2)) {
|
||||
if (vcp->vc_verbose <= 1) {
|
||||
printf("Grid size: %dx%d\n", ncols, nrows);
|
||||
printf("Modular inverse: %d threads, %d ops each\n",
|
||||
@ -2257,6 +2526,7 @@ usage(const char *name)
|
||||
"-t <threads> Set target thread count per multiprocessor\n"
|
||||
"-g <x>x<y> Set grid size\n"
|
||||
"-b <invsize> Set modular inverse ops per thread\n"
|
||||
"-V Enable kernel/OpenCL/hardware verification (SLOW)\n"
|
||||
"-f <file> File containing list of patterns, one per line\n"
|
||||
" (Use \"-\" as the file name for stdin)\n"
|
||||
"-o <file> Write pattern matches to <file>\n"
|
||||
@ -2285,6 +2555,7 @@ main(int argc, char **argv)
|
||||
int nrows = 0, ncols = 0;
|
||||
int invsize = 0;
|
||||
int remove_on_match = 1;
|
||||
int verify_mode = 0;
|
||||
int safe_mode = 0;
|
||||
vg_context_t *vcp = NULL;
|
||||
cl_device_id did;
|
||||
@ -2292,7 +2563,7 @@ main(int argc, char **argv)
|
||||
const char *key_password = NULL;
|
||||
|
||||
while ((opt = getopt(argc, argv,
|
||||
"vqrikNTX:eE:p:d:w:t:g:b:Sh?f:o:s:")) != -1) {
|
||||
"vqrikNTX:eE:p:d:w:t:g:b:VSh?f:o:s:")) != -1) {
|
||||
switch (opt) {
|
||||
case 'v':
|
||||
verbose = 2;
|
||||
@ -2371,6 +2642,9 @@ main(int argc, char **argv)
|
||||
return 1;
|
||||
}
|
||||
break;
|
||||
case 'V':
|
||||
verify_mode = 1;
|
||||
break;
|
||||
case 'S':
|
||||
safe_mode = 1;
|
||||
break;
|
||||
@ -2498,7 +2772,7 @@ main(int argc, char **argv)
|
||||
return 1;
|
||||
}
|
||||
|
||||
vg_opencl_loop(vcp, did, safe_mode,
|
||||
vg_opencl_loop(vcp, did, safe_mode, verify_mode,
|
||||
worksize, nthreads, nrows, ncols, invsize);
|
||||
return 0;
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user