mirror of
https://github.com/GOSTSec/vanitygen
synced 2025-02-07 12:24:20 +00:00
Checkpoint development of oclvanitygen.
This commit is contained in:
parent
e328e73d6a
commit
d64d355010
@ -2,17 +2,26 @@ CC = cl
|
||||
OPENSSL_DIR = C:\OpenSSL-Win32
|
||||
PTHREADS_DIR = C:\pthreads-w32
|
||||
PCRE_DIR = C:\pcre-7.9-src
|
||||
OPENCL_DIR = "C:\Program Files (x86)\AMD APP"
|
||||
OPENCL_INCLUDE = /I$(OPENCL_DIR)\include
|
||||
OPENCL_LIBS = $(OPENCL_DIR)\lib\x86\OpenCL.lib
|
||||
CFLAGS = /D_WIN32 /DPTW32_STATIC_LIB /DPCRE_STATIC /I$(OPENSSL_DIR)\include /I$(PTHREADS_DIR) /I$(PCRE_DIR)
|
||||
LIBS = $(OPENSSL_DIR)\lib\libeay32.lib $(PTHREADS_DIR)\pthread.lib $(PCRE_DIR)\pcre.lib ws2_32.lib
|
||||
OBJS = vanitygen.obj pattern.obj winglue.obj
|
||||
OBJS = vanitygen.obj oclvanitygen.obj pattern.obj winglue.obj
|
||||
|
||||
all: vanitygen.exe
|
||||
|
||||
vanitygen.exe: $(OBJS)
|
||||
link /nologo /out:vanitygen.exe $(OBJS) $(LIBS)
|
||||
vanitygen.exe: vanitygen.obj pattern.obj winglue.obj
|
||||
link /nologo /out:$@ $** $(LIBS)
|
||||
|
||||
oclvanitygen.exe: oclvanitygen.obj pattern.obj winglue.obj
|
||||
link /nologo /out:$@ $** $(LIBS) $(OPENCL_LIBS)
|
||||
|
||||
.c.obj:
|
||||
$(CC) $(CFLAGS) /c /Tp$< /Fo$@
|
||||
|
||||
oclvanitygen.obj: oclvanitygen.c
|
||||
$(CC) $(CFLAGS) $(OPENCL_INCLUDE) /c /Tpoclvanitygen.c /Fo$@
|
||||
|
||||
clean:
|
||||
del vanitygen.exe $(OBJS)
|
||||
|
303
calc_addrs.cl
303
calc_addrs.cl
@ -42,6 +42,21 @@
|
||||
* substantially reduces the cost of performing modular inversion.
|
||||
*/
|
||||
|
||||
|
||||
/* Byte-swapping and endianness */
|
||||
#define bswap32(v) \
|
||||
(((v) >> 24) | (((v) >> 8) & 0xff00) | \
|
||||
(((v) << 8) & 0xff0000) | ((v) << 24))
|
||||
|
||||
#if __ENDIAN_LITTLE__ != 1
|
||||
#define load_le32(v) bswap32(v)
|
||||
#define load_be32(v) (v)
|
||||
#else
|
||||
#define load_le32(v) (v)
|
||||
#define load_be32(v) bswap32(v)
|
||||
#endif
|
||||
|
||||
|
||||
/*
|
||||
* BIGNUM mini-library
|
||||
* This module deals with fixed-size 256-bit bignums.
|
||||
@ -439,27 +454,6 @@ bn_from_mont(bignum *rb, bignum *b)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* Montgomery multiplication test kernel */
|
||||
__kernel void
|
||||
test_mul_mont(__global bignum *products_out, __global bignum *nums_in,
|
||||
int count)
|
||||
{
|
||||
bignum x, y, tmp;
|
||||
int i, o, p;
|
||||
o = get_global_id(0) * count;
|
||||
p = o * 2;
|
||||
for (i = 0; i < count; i++) {
|
||||
x = nums_in[p++];
|
||||
y = nums_in[p++];
|
||||
bn_mul_mont(&tmp, &x, &y);
|
||||
bn_mul_mont(&tmp, &tmp, &x);
|
||||
bn_mul_mont(&tmp, &tmp, &y);
|
||||
bn_from_mont(&x, &tmp);
|
||||
products_out[o++] = x;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Modular inversion
|
||||
*/
|
||||
@ -526,22 +520,6 @@ bn_mod_inverse(bignum *r, bignum *n)
|
||||
return;
|
||||
}
|
||||
|
||||
/* modular inversion test kernel */
|
||||
__kernel void
|
||||
test_mod_inverse(__global bignum *inv_out, __global bignum *nums_in,
|
||||
int count)
|
||||
{
|
||||
bignum x, xp;
|
||||
int i, o;
|
||||
o = get_global_id(0) * count;
|
||||
for (i = 0; i < count; i++) {
|
||||
x = nums_in[o];
|
||||
bn_mod_inverse(&xp, &x);
|
||||
inv_out[o++] = xp;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* HASH FUNCTIONS
|
||||
*
|
||||
@ -597,7 +575,7 @@ sha2_256_init(uint *out)
|
||||
}
|
||||
|
||||
/* The state variable remapping is really contorted */
|
||||
#define sha2_stvar(vals, i, v) vals[(i+(7-v)) % 8]
|
||||
#define sha2_stvar(vals, i, v) vals[(64+v-i) % 8]
|
||||
|
||||
void
|
||||
sha2_256_block(uint *out, uint *in)
|
||||
@ -608,7 +586,7 @@ sha2_256_block(uint *out, uint *in)
|
||||
#pragma unroll UNROLL_MAX
|
||||
#endif
|
||||
for (i = 0; i < 8; i++)
|
||||
state[7-i] = out[i];
|
||||
state[i] = out[i];
|
||||
#ifdef UNROLL_MAX
|
||||
#pragma unroll 64
|
||||
#endif
|
||||
@ -642,7 +620,7 @@ sha2_256_block(uint *out, uint *in)
|
||||
#pragma unroll UNROLL_MAX
|
||||
#endif
|
||||
for (i = 0; i < 8; i++)
|
||||
out[i] += state[7-i];
|
||||
out[i] += state[i];
|
||||
}
|
||||
|
||||
|
||||
@ -687,28 +665,36 @@ __constant uchar ripemd160_rlp[] = {
|
||||
8, 5, 12, 9, 12, 5, 14, 6, 8, 13, 6, 5, 15, 13, 11, 11
|
||||
};
|
||||
|
||||
#define ripemd160_val(v, i, n) (v)[(80+(n)-(i)) % 5]
|
||||
#define ripemd160_valp(v, i, n) (v)[5 + ((80+(n)-(i)) % 5)]
|
||||
#define ripemd160_f0(x, y, z) (x ^ y ^ z)
|
||||
#define ripemd160_f1(x, y, z) ((x & y) | (~x & z))
|
||||
#define ripemd160_f2(x, y, z) ((x | ~y) ^ z)
|
||||
#define ripemd160_f3(x, y, z) ((x & z) | (y & ~z))
|
||||
#define ripemd160_f4(x, y, z) (x ^ (y | ~z))
|
||||
#define ripemd160_round(i, in, vals, f, fp, t) do { \
|
||||
t = rotate(vals[0] + \
|
||||
f(vals[1], vals[2], vals[3]) + \
|
||||
in[ripemd160_ws[i]] + \
|
||||
ripemd160_k[i / 16], \
|
||||
(uint)ripemd160_rl[i]) + vals[4]; \
|
||||
vals[0] = vals[4]; vals[4] = vals[3]; \
|
||||
vals[3] = rotate(vals[2], 10U); vals[2] = vals[1]; \
|
||||
vals[1] = t; \
|
||||
t = rotate(vals[5] + \
|
||||
fp(vals[6], vals[7], vals[8]) + \
|
||||
in[ripemd160_wsp[i]] + \
|
||||
ripemd160_kp[i / 16], \
|
||||
(uint)ripemd160_rlp[i]) + vals[9]; \
|
||||
vals[5] = vals[9]; vals[9] = vals[8]; \
|
||||
vals[8] = rotate(vals[7], 10U); vals[7] = vals[6]; \
|
||||
vals[6] = t; \
|
||||
#define ripemd160_round(i, in, vals, f, fp, t) do { \
|
||||
ripemd160_val(vals, i, 0) = \
|
||||
rotate(ripemd160_val(vals, i, 0) + \
|
||||
f(ripemd160_val(vals, i, 1), \
|
||||
ripemd160_val(vals, i, 2), \
|
||||
ripemd160_val(vals, i, 3)) + \
|
||||
in[ripemd160_ws[i]] + \
|
||||
ripemd160_k[i / 16], \
|
||||
(uint)ripemd160_rl[i]) + \
|
||||
ripemd160_val(vals, i, 4); \
|
||||
ripemd160_val(vals, i, 2) = \
|
||||
rotate(ripemd160_val(vals, i, 2), 10U); \
|
||||
ripemd160_valp(vals, i, 0) = \
|
||||
rotate(ripemd160_valp(vals, i, 0) + \
|
||||
fp(ripemd160_valp(vals, i, 1), \
|
||||
ripemd160_valp(vals, i, 2), \
|
||||
ripemd160_valp(vals, i, 3)) + \
|
||||
in[ripemd160_wsp[i]] + \
|
||||
ripemd160_kp[i / 16], \
|
||||
(uint)ripemd160_rlp[i]) + \
|
||||
ripemd160_valp(vals, i, 4); \
|
||||
ripemd160_valp(vals, i, 2) = \
|
||||
rotate(ripemd160_valp(vals, i, 2), 10U); \
|
||||
} while (0)
|
||||
|
||||
void
|
||||
@ -771,9 +757,41 @@ ripemd160_block(uint *out, uint *in)
|
||||
}
|
||||
|
||||
|
||||
#define bswap32(v) \
|
||||
(((v) >> 24) | (((v) >> 8) & 0xff00) | \
|
||||
(((v) << 8) & 0xff0000) | ((v) << 24))
|
||||
#ifdef TEST_KERNELS
|
||||
/*
|
||||
* Test kernels
|
||||
*/
|
||||
|
||||
/* Montgomery multiplication test kernel */
|
||||
__kernel void
|
||||
test_mul_mont(__global bignum *products_out, __global bignum *nums_in)
|
||||
{
|
||||
bignum a, b, c;
|
||||
int o;
|
||||
o = get_global_id(0);
|
||||
nums_in += (2*o);
|
||||
|
||||
a = nums_in[0];
|
||||
b = nums_in[1];
|
||||
bn_mul_mont(&c, &a, &b);
|
||||
products_out[o] = c;
|
||||
}
|
||||
|
||||
/* modular inversion test kernel */
|
||||
__kernel void
|
||||
test_mod_inverse(__global bignum *inv_out, __global bignum *nums_in,
|
||||
int count)
|
||||
{
|
||||
bignum x, xp;
|
||||
int i, o;
|
||||
o = get_global_id(0) * count;
|
||||
for (i = 0; i < count; i++) {
|
||||
x = nums_in[o];
|
||||
bn_mod_inverse(&xp, &x);
|
||||
inv_out[o++] = xp;
|
||||
}
|
||||
}
|
||||
#endif /* TEST_KERNELS */
|
||||
|
||||
|
||||
#if 0
|
||||
@ -1028,24 +1046,13 @@ heap_invert(__global bignum *z_heap, int ncols)
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void
|
||||
hash_ec_point(__global uint *hashes_out,
|
||||
__global bignum *points_in, __global bignum *z_heap)
|
||||
void
|
||||
hash_ec_point(uint *hash_out, __global bignum *xy, __global bignum *zip)
|
||||
{
|
||||
uint hash1[16], hash2[16];
|
||||
bignum c, zi, zzi;
|
||||
bn_word wh, wl;
|
||||
bignum p, a, b;
|
||||
int i, o;
|
||||
|
||||
o = get_global_size(0);
|
||||
i = o * get_global_id(1);
|
||||
z_heap += (i * 2);
|
||||
points_in += (i * 2);
|
||||
hashes_out += (i * 5);
|
||||
|
||||
i = get_global_id(0);
|
||||
points_in += (i * 2);
|
||||
hashes_out += (i * 5);
|
||||
int i;
|
||||
|
||||
/*
|
||||
* Multiply the coordinates by the inverted Z values.
|
||||
@ -1054,34 +1061,34 @@ hash_ec_point(__global uint *hashes_out,
|
||||
* is big-endian, so swapping is unnecessary, but
|
||||
* inserting the format byte in front causes a headache.
|
||||
*/
|
||||
a = z_heap[(o - 1) + i];
|
||||
bn_mul_mont(&b, &a, &a); /* Z^2 */
|
||||
p = points_in[0];
|
||||
bn_mul_mont(&p, &p, &b); /* X / Z^2 */
|
||||
bn_from_mont(&p, &p);
|
||||
zi = zip[0];
|
||||
bn_mul_mont(&zzi, &zi, &zi); /* 1 / Z^2 */
|
||||
c = xy[0];
|
||||
bn_mul_mont(&c, &c, &zzi); /* X / Z^2 */
|
||||
bn_from_mont(&c, &c);
|
||||
|
||||
wh = 0x00000004; /* POINT_CONVERSION_UNCOMPRESSED */
|
||||
#ifdef UNROLL_MAX
|
||||
#pragma unroll UNROLL_MAX
|
||||
#endif
|
||||
for (o = 0; o < BN_NWORDS; o++) {
|
||||
for (i = 0; i < BN_NWORDS; i++) {
|
||||
wl = wh;
|
||||
wh = p.d[(BN_NWORDS - 1) - o];
|
||||
hash1[o] = (wl << 24) | (wh >> 8);
|
||||
wh = c.d[(BN_NWORDS - 1) - i];
|
||||
hash1[i] = (wl << 24) | (wh >> 8);
|
||||
}
|
||||
|
||||
bn_mul_mont(&a, &a, &b); /* Z^3 */
|
||||
p = points_in[1];
|
||||
bn_mul_mont(&p, &p, &a); /* Y / Z^3 */
|
||||
bn_from_mont(&p, &p);
|
||||
bn_mul_mont(&zzi, &zzi, &zi); /* 1 / Z^3 */
|
||||
c = xy[1];
|
||||
bn_mul_mont(&c, &c, &zzi); /* Y / Z^3 */
|
||||
bn_from_mont(&c, &c);
|
||||
|
||||
#ifdef UNROLL_MAX
|
||||
#pragma unroll UNROLL_MAX
|
||||
#endif
|
||||
for (o = 0; o < BN_NWORDS; o++) {
|
||||
for (i = 0; i < BN_NWORDS; i++) {
|
||||
wl = wh;
|
||||
wh = p.d[(BN_NWORDS - 1) - o];
|
||||
hash1[BN_NWORDS + o] = (wl << 24) | (wh >> 8);
|
||||
wh = c.d[(BN_NWORDS - 1) - i];
|
||||
hash1[BN_NWORDS + i] = (wl << 24) | (wh >> 8);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1119,8 +1126,8 @@ hash_ec_point(__global uint *hashes_out,
|
||||
#ifdef UNROLL_MAX
|
||||
#pragma unroll UNROLL_MAX
|
||||
#endif
|
||||
for (o = 0; o < 8; o++)
|
||||
hash2[o] = bswap32(hash2[o]);
|
||||
for (i = 0; i < 8; i++)
|
||||
hash2[i] = bswap32(hash2[i]);
|
||||
hash2[8] = bswap32(0x80000000);
|
||||
hash2[9] = 0;
|
||||
hash2[10] = 0;
|
||||
@ -1129,12 +1136,116 @@ hash_ec_point(__global uint *hashes_out,
|
||||
hash2[13] = 0;
|
||||
hash2[14] = 32 * 8;
|
||||
hash2[15] = 0;
|
||||
ripemd160_init(hash1);
|
||||
ripemd160_block(hash1, hash2);
|
||||
ripemd160_init(hash_out);
|
||||
ripemd160_block(hash_out, hash2);
|
||||
}
|
||||
|
||||
|
||||
__kernel void
|
||||
hash_ec_point_get(__global uint *hashes_out,
|
||||
__global bignum *points_in, __global bignum *z_heap)
|
||||
{
|
||||
uint hash[5];
|
||||
int i, o;
|
||||
|
||||
o = get_global_size(0);
|
||||
i = o * get_global_id(1);
|
||||
z_heap += (i * 2);
|
||||
points_in += (i * 2);
|
||||
hashes_out += (i * 5);
|
||||
|
||||
i = get_global_id(0);
|
||||
points_in += (i * 2);
|
||||
hashes_out += (i * 5);
|
||||
|
||||
/* Complete the coordinates and hash */
|
||||
hash_ec_point(hash, points_in, &z_heap[i + (o - 1)]);
|
||||
|
||||
/* Output the hash in proper byte-order */
|
||||
#ifdef UNROLL_MAX
|
||||
#pragma unroll UNROLL_MAX
|
||||
#endif
|
||||
for (o = 0; o < 5; o++)
|
||||
hashes_out[o] = hash1[o];
|
||||
for (i = 0; i < 5; i++)
|
||||
hashes_out[i] = load_le32(hash[i]);
|
||||
}
|
||||
|
||||
/*
|
||||
* Normally this would be one function that compared two hash160s.
|
||||
* This one compares a hash160 with an upper and lower bound in one
|
||||
* function to work around a problem with AMD's OpenCL compiler.
|
||||
*/
|
||||
int
|
||||
hash160_ucmp_g(uint *a, __global uint *bound)
|
||||
{
|
||||
uint gv;
|
||||
int i;
|
||||
#ifdef UNROLL_MAX
|
||||
#pragma unroll UNROLL_MAX
|
||||
#endif
|
||||
for (i = 0; i < 5; i++) {
|
||||
gv = load_be32(bound[i]);
|
||||
if (a[i] < gv) return -1;
|
||||
if (a[i] > gv) break;
|
||||
}
|
||||
#ifdef UNROLL_MAX
|
||||
#pragma unroll UNROLL_MAX
|
||||
#endif
|
||||
for (i = 0; i < 5; i++) {
|
||||
gv = load_be32(bound[5+i]);
|
||||
if (a[i] < gv) return 0;
|
||||
if (a[i] > gv) return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
__kernel void
|
||||
hash_ec_point_search_prefix(__global uint *found,
|
||||
__global bignum *points_in, __global bignum *z_heap,
|
||||
__global uint *target_table, int ntargets)
|
||||
{
|
||||
uint hash[5];
|
||||
int i, high, low, p;
|
||||
|
||||
p = get_global_size(0);
|
||||
i = p * get_global_id(1);
|
||||
z_heap += (i * 2);
|
||||
points_in += (i * 2);
|
||||
|
||||
i = get_global_id(0);
|
||||
points_in += (i * 2);
|
||||
|
||||
/* Complete the coordinates and hash */
|
||||
hash_ec_point(hash, points_in, &z_heap[(p - 1) + i]);
|
||||
|
||||
/*
|
||||
* Unconditionally byteswap the hash result, because:
|
||||
* - The byte-level convention of RIPEMD160 is little-endian
|
||||
* - We are comparing it in big-endian order
|
||||
*/
|
||||
#ifdef UNROLL_MAX
|
||||
#pragma unroll UNROLL_MAX
|
||||
#endif
|
||||
for (i = 0; i < 5; i++)
|
||||
hash[i] = bswap32(hash[i]);
|
||||
|
||||
/* Binary-search the target table for the hash we just computed */
|
||||
for (high = ntargets - 1, low = 0, i = high >> 1;
|
||||
high >= low;
|
||||
i = low + ((high - low) >> 1)) {
|
||||
p = hash160_ucmp_g(hash, &target_table[10*i]);
|
||||
low = (p > 0) ? (i + 1) : low;
|
||||
high = (p < 0) ? (i - 1) : high;
|
||||
if (p == 0) {
|
||||
/* For debugging purposes, write the hash value */
|
||||
found[0] = ((get_global_id(1) * get_global_size(0)) +
|
||||
get_global_id(0));
|
||||
found[1] = i;
|
||||
#ifdef UNROLL_MAX
|
||||
#pragma unroll UNROLL_MAX
|
||||
#endif
|
||||
for (p = 0; p < 5; p++)
|
||||
found[p+2] = load_be32(hash[p]);
|
||||
high = -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
488
oclvanitygen.c
488
oclvanitygen.c
@ -33,8 +33,16 @@
|
||||
|
||||
|
||||
const char *version = "0.13";
|
||||
const int debug = 0;
|
||||
|
||||
#define MAX_SLOT 2
|
||||
#define MAX_ARG 6
|
||||
#define MAX_KERNEL 3
|
||||
|
||||
/* OpenCL address searching mode */
|
||||
struct _vg_ocl_context_s;
|
||||
typedef int (*vg_ocl_init_t)(struct _vg_ocl_context_s *);
|
||||
typedef int (*vg_ocl_check_t)(struct _vg_ocl_context_s *, int slot);
|
||||
|
||||
typedef struct _vg_ocl_context_s {
|
||||
vg_exec_context_t base;
|
||||
@ -42,10 +50,17 @@ typedef struct _vg_ocl_context_s {
|
||||
cl_context voc_oclctx;
|
||||
cl_command_queue voc_oclcmdq;
|
||||
cl_program voc_oclprog;
|
||||
cl_kernel voc_oclkernel[MAX_SLOT][3];
|
||||
vg_ocl_init_t voc_init_func;
|
||||
vg_ocl_init_t voc_rekey_func;
|
||||
vg_ocl_check_t voc_check_func;
|
||||
int voc_nslots;
|
||||
cl_kernel voc_oclkernel[MAX_SLOT][MAX_KERNEL];
|
||||
cl_event voc_oclkrnwait[MAX_SLOT];
|
||||
cl_mem voc_args[MAX_SLOT][6];
|
||||
size_t voc_arg_size[MAX_SLOT][6];
|
||||
cl_mem voc_args[MAX_SLOT][MAX_ARG];
|
||||
size_t voc_arg_size[MAX_SLOT][MAX_ARG];
|
||||
|
||||
int voc_pattern_rewrite;
|
||||
int voc_pattern_alloc;
|
||||
|
||||
pthread_t voc_ocl_thread;
|
||||
pthread_mutex_t voc_lock;
|
||||
@ -171,8 +186,7 @@ vg_ocl_load_program(vg_context_t *vcp, vg_ocl_context_t *vocp,
|
||||
|
||||
vocp->voc_oclprog = prog;
|
||||
if (!vg_ocl_create_kernel(vocp, 0, "ec_add_grid") ||
|
||||
!vg_ocl_create_kernel(vocp, 1, "heap_invert") ||
|
||||
!vg_ocl_create_kernel(vocp, 2, "hash_ec_point")) {
|
||||
!vg_ocl_create_kernel(vocp, 1, "heap_invert")) {
|
||||
clReleaseProgram(vocp->voc_oclprog);
|
||||
vocp->voc_oclprog = NULL;
|
||||
return 0;
|
||||
@ -181,7 +195,7 @@ vg_ocl_load_program(vg_context_t *vcp, vg_ocl_context_t *vocp,
|
||||
return 1;
|
||||
}
|
||||
|
||||
void
|
||||
void CL_CALLBACK
|
||||
vg_ocl_context_callback(const char *errinfo,
|
||||
const void *private_info,
|
||||
size_t cb,
|
||||
@ -223,7 +237,7 @@ vg_ocl_init(vg_context_t *vcp, vg_ocl_context_t *vocp, cl_device_id did)
|
||||
|
||||
if (!vg_ocl_load_program(vcp, vocp,
|
||||
"calc_addrs.cl",
|
||||
//"-cl-nv-verbose -cl-nv-maxrregcount=32 "
|
||||
//"-cl-nv-verbose "
|
||||
"-DUNROLL_MAX=16")) {
|
||||
printf("Could not load kernel\n");
|
||||
return 0;
|
||||
@ -251,6 +265,21 @@ vg_ocl_del(vg_ocl_context_t *vocp)
|
||||
vg_exec_context_del(&vocp->base);
|
||||
}
|
||||
|
||||
static int vg_ocl_arg_map[][8] = {
|
||||
/* hashes_out / found */
|
||||
{ 2, 0, -1 },
|
||||
/* z_heap */
|
||||
{ 0, 1, 1, 0, 2, 2, -1 },
|
||||
/* point_tmp */
|
||||
{ 0, 0, 2, 1, -1 },
|
||||
/* row_in */
|
||||
{ 0, 2, -1 },
|
||||
/* col_in */
|
||||
{ 0, 3, -1 },
|
||||
/* target_table */
|
||||
{ 2, 3, -1 },
|
||||
};
|
||||
|
||||
int
|
||||
vg_ocl_kernel_arg_alloc(vg_ocl_context_t *vocp, int slot,
|
||||
int arg, size_t size, int host)
|
||||
@ -259,18 +288,15 @@ vg_ocl_kernel_arg_alloc(vg_ocl_context_t *vocp, int slot,
|
||||
cl_int ret;
|
||||
int i, j, knum, karg;
|
||||
|
||||
static int arg_map[5][8] = {
|
||||
/* hashes_out */
|
||||
{ 2, 0, -1 },
|
||||
/* z_heap */
|
||||
{ 0, 1, 1, 0, 2, 2, -1 },
|
||||
/* point_tmp */
|
||||
{ 0, 0, 2, 1, -1 },
|
||||
/* row_in */
|
||||
{ 0, 2, -1 },
|
||||
/* col_in */
|
||||
{ 0, 3, -1 },
|
||||
};
|
||||
for (i = 0; i < MAX_SLOT; i++) {
|
||||
if ((i != slot) && (slot >= 0))
|
||||
continue;
|
||||
if (vocp->voc_args[i][arg]) {
|
||||
clReleaseMemObject(vocp->voc_args[i][arg]);
|
||||
vocp->voc_args[i][arg] = NULL;
|
||||
vocp->voc_arg_size[i][arg] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
clbuf = clCreateBuffer(vocp->voc_oclctx,
|
||||
CL_MEM_READ_WRITE |
|
||||
@ -287,24 +313,54 @@ vg_ocl_kernel_arg_alloc(vg_ocl_context_t *vocp, int slot,
|
||||
if ((i != slot) && (slot >= 0))
|
||||
continue;
|
||||
|
||||
for (j = 0; arg_map[arg][j] >= 0; j += 2) {
|
||||
knum = arg_map[arg][j];
|
||||
karg = arg_map[arg][j+1];
|
||||
clRetainMemObject(clbuf);
|
||||
vocp->voc_args[i][arg] = clbuf;
|
||||
vocp->voc_arg_size[i][arg] = size;
|
||||
|
||||
for (j = 0; vg_ocl_arg_map[arg][j] >= 0; j += 2) {
|
||||
knum = vg_ocl_arg_map[arg][j];
|
||||
karg = vg_ocl_arg_map[arg][j+1];
|
||||
ret = clSetKernelArg(vocp->voc_oclkernel[i][knum],
|
||||
karg,
|
||||
sizeof(clbuf),
|
||||
&clbuf);
|
||||
|
||||
if (ret) {
|
||||
clReleaseMemObject(clbuf);
|
||||
printf("Could not set kernel argument: %d\n",
|
||||
ret);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
vocp->voc_args[i][arg] = clbuf;
|
||||
vocp->voc_arg_size[i][arg] = size;
|
||||
}
|
||||
|
||||
clReleaseMemObject(clbuf);
|
||||
return 1;
|
||||
}
|
||||
|
||||
int
|
||||
vg_ocl_copyout_arg(vg_ocl_context_t *vocp, int wslot, int arg,
|
||||
void *buffer, size_t size)
|
||||
{
|
||||
cl_int slot, ret;
|
||||
|
||||
slot = (wslot < 0) ? 0 : wslot;
|
||||
|
||||
assert((slot >= 0) && (slot < MAX_SLOT));
|
||||
assert(size <= vocp->voc_arg_size[slot][arg]);
|
||||
|
||||
ret = clEnqueueWriteBuffer(vocp->voc_oclcmdq,
|
||||
vocp->voc_args[slot][arg],
|
||||
CL_TRUE,
|
||||
0, size,
|
||||
buffer,
|
||||
0, NULL,
|
||||
NULL);
|
||||
|
||||
if (ret) {
|
||||
printf("Could not copyout argument buffer: %d\n", ret);
|
||||
return 0;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
@ -320,7 +376,8 @@ vg_ocl_map_arg_buffer(vg_ocl_context_t *vocp, int slot,
|
||||
buf = clEnqueueMapBuffer(vocp->voc_oclcmdq,
|
||||
vocp->voc_args[slot][arg],
|
||||
CL_TRUE,
|
||||
rw ? CL_MAP_WRITE : CL_MAP_READ,
|
||||
(rw == 2) ? (CL_MAP_READ|CL_MAP_WRITE)
|
||||
: (rw ? CL_MAP_WRITE : CL_MAP_READ),
|
||||
0, vocp->voc_arg_size[slot][arg],
|
||||
0, NULL,
|
||||
NULL,
|
||||
@ -368,7 +425,7 @@ vg_ocl_kernel_int_arg(vg_ocl_context_t *vocp, int slot,
|
||||
for (i = 0; i < MAX_SLOT; i++) {
|
||||
if ((i != slot) && (slot >= 0))
|
||||
continue;
|
||||
ret = clSetKernelArg(vocp->voc_oclkernel[i][0],
|
||||
ret = clSetKernelArg(vocp->voc_oclkernel[i][2],
|
||||
arg,
|
||||
sizeof(value),
|
||||
&value);
|
||||
@ -507,6 +564,200 @@ show_elapsed(struct timeval *tv, const char *place)
|
||||
printf("%s spent %ld.%06lds\n", place, delta.tv_sec, delta.tv_usec);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* GPU address matching methods
|
||||
*
|
||||
* gethash: GPU computes and returns all address hashes.
|
||||
* + Works with any matching method, including regular expressions.
|
||||
* - The CPU will not be able to keep up with mid- to high-end GPUs.
|
||||
*
|
||||
* prefix: GPU computes hash, searches a range list, and discards.
|
||||
* + Fast, minimal work for CPU.
|
||||
*/
|
||||
|
||||
int
|
||||
vg_ocl_gethash_check(vg_ocl_context_t *vocp, int slot)
|
||||
{
|
||||
vg_exec_context_t *vxcp = &vocp->base;
|
||||
vg_context_t *vcp = vocp->base.vxc_vc;
|
||||
vg_test_func_t test_func = vcp->vc_test;
|
||||
unsigned char *ocl_hashes_out;
|
||||
int i, res = 0, round;
|
||||
|
||||
ocl_hashes_out = (unsigned char *)
|
||||
vg_ocl_map_arg_buffer(vocp, slot, 0, 0);
|
||||
|
||||
round = vocp->voc_ocl_cols * vocp->voc_ocl_rows;
|
||||
|
||||
for (i = 0; i < round; i++, vxcp->vxc_delta++) {
|
||||
memcpy(&vxcp->vxc_binres[1],
|
||||
ocl_hashes_out + (20*i),
|
||||
20);
|
||||
|
||||
res = test_func(vxcp);
|
||||
if (res)
|
||||
break;
|
||||
}
|
||||
|
||||
vg_ocl_unmap_arg_buffer(vocp, slot, 0, ocl_hashes_out);
|
||||
return res;
|
||||
}
|
||||
|
||||
int
|
||||
vg_ocl_gethash_init(vg_ocl_context_t *vocp)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (!vg_ocl_create_kernel(vocp, 2, "hash_ec_point_get"))
|
||||
return 0;
|
||||
|
||||
for (i = 0; i < vocp->voc_nslots; i++) {
|
||||
/* Each slot gets its own hash output buffer */
|
||||
if (!vg_ocl_kernel_arg_alloc(vocp, i, 0,
|
||||
20 *
|
||||
vocp->voc_ocl_rows *
|
||||
vocp->voc_ocl_cols, 1))
|
||||
return 0;
|
||||
}
|
||||
|
||||
vocp->voc_rekey_func = NULL;
|
||||
vocp->voc_check_func = vg_ocl_gethash_check;
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
vg_ocl_prefix_rekey(vg_ocl_context_t *vocp)
|
||||
{
|
||||
vg_context_t *vcp = vocp->base.vxc_vc;
|
||||
unsigned char *ocl_targets_in;
|
||||
uint32_t *ocl_found_out;
|
||||
int i;
|
||||
|
||||
/* Set the found indicator for each slot to -1 */
|
||||
for (i = 0; i < vocp->voc_nslots; i++) {
|
||||
ocl_found_out = (uint32_t *)
|
||||
vg_ocl_map_arg_buffer(vocp, i, 0, 1);
|
||||
ocl_found_out[0] = 0xffffffff;
|
||||
vg_ocl_unmap_arg_buffer(vocp, i, 0, ocl_found_out);
|
||||
}
|
||||
|
||||
if (vocp->voc_pattern_rewrite) {
|
||||
/* Count number of range records */
|
||||
i = vg_context_hash160_sort(vcp, NULL);
|
||||
if (!i) {
|
||||
printf("No range records available, exiting\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (i > vocp->voc_pattern_alloc) {
|
||||
/* (re)allocate target buffer */
|
||||
if (!vg_ocl_kernel_arg_alloc(vocp, -1, 5, 40 * i, 0))
|
||||
return 0;
|
||||
vocp->voc_pattern_alloc = i;
|
||||
}
|
||||
|
||||
/* Write range records */
|
||||
ocl_targets_in = (unsigned char *)
|
||||
vg_ocl_map_arg_buffer(vocp, 0, 5, 1);
|
||||
vg_context_hash160_sort(vcp, ocl_targets_in);
|
||||
vg_ocl_unmap_arg_buffer(vocp, 0, 5, ocl_targets_in);
|
||||
vg_ocl_kernel_int_arg(vocp, -1, 4, i);
|
||||
|
||||
vocp->voc_pattern_rewrite = 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int
|
||||
vg_ocl_prefix_check(vg_ocl_context_t *vocp, int slot)
|
||||
{
|
||||
vg_exec_context_t *vxcp = &vocp->base;
|
||||
vg_context_t *vcp = vocp->base.vxc_vc;
|
||||
vg_test_func_t test_func = vcp->vc_test;
|
||||
uint32_t *ocl_found_out;
|
||||
uint32_t found_delta;
|
||||
int orig_delta, tablesize;
|
||||
int res = 0;
|
||||
|
||||
/* Retrieve the found indicator */
|
||||
ocl_found_out = (uint32_t *)
|
||||
vg_ocl_map_arg_buffer(vocp, slot, 0, 2);
|
||||
found_delta = ocl_found_out[0];
|
||||
|
||||
if (found_delta != 0xffffffff) {
|
||||
/* GPU code claims match, verify with CPU version */
|
||||
orig_delta = vxcp->vxc_delta;
|
||||
vxcp->vxc_delta += found_delta;
|
||||
vg_exec_context_calc_address(vxcp);
|
||||
res = test_func(vxcp);
|
||||
if (res == 0) {
|
||||
/*
|
||||
* The match was not found in
|
||||
* the pattern list. Hmm.
|
||||
*/
|
||||
tablesize = ocl_found_out[2];
|
||||
printf("Match idx: %d\n", ocl_found_out[1]);
|
||||
printf("CPU hash: ");
|
||||
dumphex(vxcp->vxc_binres + 1, 20);
|
||||
printf("GPU hash: ");
|
||||
dumphex((unsigned char *) (ocl_found_out + 3), 20);
|
||||
printf("Table size: %d\n", ocl_found_out[2]);
|
||||
printf("Found delta: %d "
|
||||
"Start delta: %d\n",
|
||||
found_delta, orig_delta);
|
||||
res = 1;
|
||||
}
|
||||
vocp->voc_pattern_rewrite = 1;
|
||||
} else {
|
||||
vxcp->vxc_delta += (vocp->voc_ocl_cols * vocp->voc_ocl_rows);
|
||||
}
|
||||
|
||||
vg_ocl_unmap_arg_buffer(vocp, slot, 0, ocl_found_out);
|
||||
return res;
|
||||
}
|
||||
|
||||
int
|
||||
vg_ocl_prefix_init(vg_ocl_context_t *vocp)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (!vg_ocl_create_kernel(vocp, 2, "hash_ec_point_search_prefix"))
|
||||
return 0;
|
||||
|
||||
for (i = 0; i < vocp->voc_nslots; i++) {
|
||||
if (!vg_ocl_kernel_arg_alloc(vocp, i, 0, 28, 1))
|
||||
return 0;
|
||||
}
|
||||
vocp->voc_rekey_func = vg_ocl_prefix_rekey;
|
||||
vocp->voc_check_func = vg_ocl_prefix_check;
|
||||
vocp->voc_pattern_rewrite = 1;
|
||||
vocp->voc_pattern_alloc = 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
vg_ocl_config_pattern(vg_ocl_context_t *vocp)
|
||||
{
|
||||
vg_context_t *vcp = vocp->base.vxc_vc;
|
||||
int i;
|
||||
|
||||
i = vg_context_hash160_sort(vcp, NULL);
|
||||
if (i > 0) {
|
||||
if (vcp->vc_verbose > 1)
|
||||
printf("Using GPU prefix matcher\n");
|
||||
/* Configure for prefix matching */
|
||||
return vg_ocl_prefix_init(vocp);
|
||||
}
|
||||
|
||||
if (vcp->vc_verbose > 0)
|
||||
printf("WARNING: Using CPU pattern matcher\n");
|
||||
return vg_ocl_gethash_init(vocp);
|
||||
}
|
||||
|
||||
|
||||
void *
|
||||
vg_opencl_thread(void *arg)
|
||||
{
|
||||
@ -561,24 +812,25 @@ vg_opencl_thread(void *arg)
|
||||
|
||||
if (!vg_ocl_kernel_wait(vocp, slot))
|
||||
halt = 1;
|
||||
gettimeofday(&tvt, NULL);
|
||||
timersub(&tvt, &tv, &tvd);
|
||||
timeradd(&tvd, &busy, &busy);
|
||||
|
||||
if ((vcp->vc_verbose > 1) &&
|
||||
((busy.tv_sec + idle.tv_sec) > 1)) {
|
||||
idleu = (1000000 * idle.tv_sec) + idle.tv_usec;
|
||||
busyu = (1000000 * busy.tv_sec) + busy.tv_usec;
|
||||
pidle = ((double) idleu) / (idleu + busyu);
|
||||
if (vcp->vc_verbose > 1) {
|
||||
gettimeofday(&tvt, NULL);
|
||||
timersub(&tvt, &tv, &tvd);
|
||||
timeradd(&tvd, &busy, &busy);
|
||||
if ((busy.tv_sec + idle.tv_sec) > 1) {
|
||||
idleu = (1000000 * idle.tv_sec) + idle.tv_usec;
|
||||
busyu = (1000000 * busy.tv_sec) + busy.tv_usec;
|
||||
pidle = ((double) idleu) / (idleu + busyu);
|
||||
|
||||
if (pidle > 0.05) {
|
||||
printf("\rGPU idle: %.2f%%"
|
||||
" "
|
||||
if (pidle > 0.01) {
|
||||
printf("\rGPU idle: %.2f%%"
|
||||
" "
|
||||
" \n",
|
||||
100 * pidle);
|
||||
100 * pidle);
|
||||
}
|
||||
memset(&idle, 0, sizeof(idle));
|
||||
memset(&busy, 0, sizeof(busy));
|
||||
}
|
||||
memset(&idle, 0, sizeof(idle));
|
||||
memset(&busy, 0, sizeof(busy));
|
||||
}
|
||||
}
|
||||
out:
|
||||
@ -586,6 +838,7 @@ out:
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Address search thread main loop
|
||||
*/
|
||||
@ -605,12 +858,11 @@ vg_opencl_loop(vg_context_t *vcp, cl_device_id did, int worksize)
|
||||
EC_POINT **ppbase = NULL, **pprow, *pbatchinc = NULL, *poffset = NULL;
|
||||
EC_POINT *pseek = NULL;
|
||||
|
||||
unsigned char *ocl_points_in, *ocl_strides_in, *ocl_hashes_out;
|
||||
unsigned char *ocl_points_in, *ocl_strides_in;
|
||||
|
||||
vg_ocl_context_t ctx;
|
||||
vg_ocl_context_t *vocp = &ctx;
|
||||
vg_exec_context_t *vxcp = &vocp->base;
|
||||
vg_test_func_t test_func = vcp->vc_test;
|
||||
|
||||
int slot, nslots;
|
||||
int slot_busy = 0, slot_done = 0, halt = 0;
|
||||
@ -637,6 +889,10 @@ vg_opencl_loop(vg_context_t *vcp, cl_device_id did, int worksize)
|
||||
nslots = 2;
|
||||
slot = 0;
|
||||
|
||||
vocp->voc_ocl_cols = batchsize;
|
||||
vocp->voc_ocl_rows = worksize;
|
||||
vocp->voc_nslots = nslots;
|
||||
|
||||
ppbase = (EC_POINT **) malloc((batchsize + worksize) *
|
||||
sizeof(EC_POINT*));
|
||||
if (!ppbase)
|
||||
@ -667,26 +923,28 @@ vg_opencl_loop(vg_context_t *vcp, cl_device_id did, int worksize)
|
||||
|
||||
round = batchsize * worksize;
|
||||
|
||||
if (!vg_ocl_config_pattern(vocp))
|
||||
goto enomem;
|
||||
|
||||
for (i = 0; i < nslots; i++) {
|
||||
/*
|
||||
* Each work group gets its own:
|
||||
* - Hash output array
|
||||
* - Point and z_heap scratch spaces
|
||||
* - Column point array
|
||||
*/
|
||||
if (!vg_ocl_kernel_arg_alloc(vocp, i, 0, 20 * round, 1) ||
|
||||
!vg_ocl_kernel_arg_alloc(vocp, i, 1, 32 * 2 * round, 0) ||
|
||||
!vg_ocl_kernel_arg_alloc(vocp, i, 2, 32 * 2 * round, 0) ||
|
||||
!vg_ocl_kernel_arg_alloc(vocp, i, 4, 32 * 2 * worksize, 1))
|
||||
if (!vg_ocl_kernel_arg_alloc(vocp, i, 4, 32 * 2 * worksize, 1))
|
||||
goto enomem;
|
||||
}
|
||||
|
||||
/* Same row point array for all instances */
|
||||
if (!vg_ocl_kernel_arg_alloc(vocp, -1, 3, 32 * 2 * batchsize, 1))
|
||||
/*
|
||||
* All instances share:
|
||||
* - The z_heap and point scratch spaces
|
||||
* - The row point array
|
||||
*/
|
||||
if (!vg_ocl_kernel_arg_alloc(vocp, -1, 1, 32 * 2 * round, 0) ||
|
||||
!vg_ocl_kernel_arg_alloc(vocp, -1, 2, 32 * 2 * round, 0) ||
|
||||
!vg_ocl_kernel_arg_alloc(vocp, -1, 3, 32 * 2 * batchsize, 1))
|
||||
goto enomem;
|
||||
|
||||
//vg_ocl_kernel_int_arg(vocp, -1, 5, batchsize);
|
||||
|
||||
npoints = 0;
|
||||
rekey_at = 0;
|
||||
vxcp->vxc_binres[0] = vcp->vc_addrtype;
|
||||
@ -698,6 +956,10 @@ vg_opencl_loop(vg_context_t *vcp, cl_device_id did, int worksize)
|
||||
gettimeofday(&tvstart, NULL);
|
||||
|
||||
l_rekey:
|
||||
if (vocp->voc_rekey_func &&
|
||||
!vocp->voc_rekey_func(vocp))
|
||||
goto enomem;
|
||||
|
||||
/* Generate a new random private key */
|
||||
EC_KEY_generate_key(pkey);
|
||||
npoints = 0;
|
||||
@ -755,33 +1017,22 @@ l_rekey:
|
||||
|
||||
while (1) {
|
||||
if (slot_done) {
|
||||
assert(rekey_at > 0);
|
||||
slot_done = 0;
|
||||
|
||||
ocl_hashes_out = (unsigned char *)
|
||||
vg_ocl_map_arg_buffer(vocp, slot, 0, 0);
|
||||
|
||||
for (i = 0; i < round; i++, vxcp->vxc_delta++) {
|
||||
memcpy(&vxcp->vxc_binres[1],
|
||||
ocl_hashes_out + (20*i),
|
||||
20);
|
||||
|
||||
switch (test_func(vxcp)) {
|
||||
case 1:
|
||||
rekey_at = 0;
|
||||
i = round;
|
||||
break;
|
||||
case 2:
|
||||
halt = 1;
|
||||
i = round;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
/* Call the result check function */
|
||||
switch (vocp->voc_check_func(vocp, slot)) {
|
||||
case 1:
|
||||
rekey_at = 0;
|
||||
break;
|
||||
case 2:
|
||||
halt = 1;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
vg_ocl_unmap_arg_buffer(vocp, slot, 0, ocl_hashes_out);
|
||||
|
||||
c += (i + 1);
|
||||
c += round;
|
||||
if (!halt && (c >= output_interval)) {
|
||||
output_interval =
|
||||
vg_output_timing(vcp, c, &tvstart);
|
||||
@ -789,24 +1040,8 @@ l_rekey:
|
||||
}
|
||||
}
|
||||
|
||||
if (halt) {
|
||||
if (vcp->vc_verbose > 1)
|
||||
printf("Halting...");
|
||||
pthread_mutex_lock(&vocp->voc_lock);
|
||||
vocp->voc_halt = 1;
|
||||
pthread_cond_signal(&vocp->voc_wait);
|
||||
while (vocp->voc_ocl_slot != -1) {
|
||||
assert(slot_busy);
|
||||
pthread_cond_wait(&vocp->voc_wait,
|
||||
&vocp->voc_lock);
|
||||
}
|
||||
slot_busy = 0;
|
||||
pthread_mutex_unlock(&vocp->voc_lock);
|
||||
pthread_join(vocp->voc_ocl_thread, NULL);
|
||||
if (vcp->vc_verbose > 1)
|
||||
printf("done!\n");
|
||||
if (halt)
|
||||
break;
|
||||
}
|
||||
|
||||
if ((npoints + round) < rekey_at) {
|
||||
if (npoints > 1) {
|
||||
@ -843,35 +1078,36 @@ l_rekey:
|
||||
}
|
||||
|
||||
if (vocp->voc_halt) {
|
||||
halt = 1;
|
||||
} else {
|
||||
vocp->voc_ocl_slot = slot;
|
||||
vocp->voc_ocl_cols = batchsize;
|
||||
vocp->voc_ocl_rows = worksize;
|
||||
pthread_cond_signal(&vocp->voc_wait);
|
||||
pthread_mutex_unlock(&vocp->voc_lock);
|
||||
|
||||
if (slot_busy)
|
||||
slot_done = 1;
|
||||
slot_busy = 1;
|
||||
slot = (slot + 1) % nslots;
|
||||
halt = 1;
|
||||
break;
|
||||
}
|
||||
pthread_mutex_unlock(&vocp->voc_lock);
|
||||
}
|
||||
|
||||
else if (slot_busy) {
|
||||
pthread_mutex_lock(&vocp->voc_lock);
|
||||
while (vocp->voc_ocl_slot != -1) {
|
||||
pthread_cond_wait(&vocp->voc_wait,
|
||||
&vocp->voc_lock);
|
||||
vocp->voc_ocl_slot = slot;
|
||||
pthread_cond_signal(&vocp->voc_wait);
|
||||
pthread_mutex_unlock(&vocp->voc_lock);
|
||||
|
||||
slot_done = slot_busy;
|
||||
slot_busy = 1;
|
||||
slot = (slot + 1) % nslots;
|
||||
|
||||
} else {
|
||||
if (slot_busy) {
|
||||
pthread_mutex_lock(&vocp->voc_lock);
|
||||
while (vocp->voc_ocl_slot != -1) {
|
||||
assert(vocp->voc_ocl_slot ==
|
||||
((slot + nslots - 1) % nslots));
|
||||
pthread_cond_wait(&vocp->voc_wait,
|
||||
&vocp->voc_lock);
|
||||
}
|
||||
pthread_mutex_unlock(&vocp->voc_lock);
|
||||
slot_busy = 0;
|
||||
slot_done = 1;
|
||||
}
|
||||
slot_busy = 0;
|
||||
pthread_mutex_unlock(&vocp->voc_lock);
|
||||
slot_done = 1;
|
||||
}
|
||||
|
||||
else if (!rekey_at || ((npoints + round) >= rekey_at)) {
|
||||
goto l_rekey;
|
||||
if (!rekey_at ||
|
||||
(!slot_done && ((npoints + round) >= rekey_at)))
|
||||
goto l_rekey;
|
||||
}
|
||||
}
|
||||
|
||||
@ -880,6 +1116,24 @@ l_rekey:
|
||||
printf("ERROR: allocation failure?\n");
|
||||
}
|
||||
|
||||
if (halt) {
|
||||
if (vcp->vc_verbose > 1)
|
||||
printf("Halting...");
|
||||
pthread_mutex_lock(&vocp->voc_lock);
|
||||
vocp->voc_halt = 1;
|
||||
pthread_cond_signal(&vocp->voc_wait);
|
||||
while (vocp->voc_ocl_slot != -1) {
|
||||
assert(slot_busy);
|
||||
pthread_cond_wait(&vocp->voc_wait,
|
||||
&vocp->voc_lock);
|
||||
}
|
||||
slot_busy = 0;
|
||||
pthread_mutex_unlock(&vocp->voc_lock);
|
||||
pthread_join(vocp->voc_ocl_thread, NULL);
|
||||
if (vcp->vc_verbose > 1)
|
||||
printf("done!\n");
|
||||
}
|
||||
|
||||
if (ppbase) {
|
||||
for (i = 0; i < (batchsize + worksize); i++)
|
||||
if (ppbase[i])
|
||||
|
110
pattern.c
110
pattern.c
@ -207,6 +207,27 @@ vg_exec_context_consolidate_key(vg_exec_context_t *vxcp)
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
vg_exec_context_calc_address(vg_exec_context_t *vxcp)
|
||||
{
|
||||
const EC_GROUP *pgroup;
|
||||
unsigned char eckey_buf[96], hash1[32], hash2[20];
|
||||
int len;
|
||||
|
||||
vg_exec_context_consolidate_key(vxcp);
|
||||
pgroup = EC_KEY_get0_group(vxcp->vxc_key);
|
||||
len = EC_POINT_point2oct(pgroup,
|
||||
EC_KEY_get0_public_key(vxcp->vxc_key),
|
||||
POINT_CONVERSION_UNCOMPRESSED,
|
||||
eckey_buf,
|
||||
sizeof(eckey_buf),
|
||||
vxcp->vxc_bnctx);
|
||||
SHA256(eckey_buf, len, hash1);
|
||||
RIPEMD160(hash1, sizeof(hash1), hash2);
|
||||
memcpy(&vxcp->vxc_binres[1],
|
||||
hash2, 20);
|
||||
}
|
||||
|
||||
|
||||
typedef struct _timing_info_s {
|
||||
struct _timing_info_s *ti_next;
|
||||
@ -437,6 +458,14 @@ vg_context_add_patterns(vg_context_t *vcp,
|
||||
return vcp->vc_add_patterns(vcp, patterns, npatterns);
|
||||
}
|
||||
|
||||
int
|
||||
vg_context_hash160_sort(vg_context_t *vcp, void *buf)
|
||||
{
|
||||
if (!vcp->vc_hash160_sort)
|
||||
return 0;
|
||||
return vcp->vc_hash160_sort(vcp, buf);
|
||||
}
|
||||
|
||||
|
||||
static const signed char b58_reverse_map[256] = {
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
@ -1023,6 +1052,17 @@ avl_insert_fix(avl_root_t *rootp, avl_item_t *itemp)
|
||||
}
|
||||
}
|
||||
|
||||
static INLINE avl_item_t *
|
||||
avl_first(avl_root_t *rootp)
|
||||
{
|
||||
avl_item_t *itemp = rootp->ar_root;
|
||||
if (itemp) {
|
||||
while (itemp->ai_left)
|
||||
itemp = itemp->ai_left;
|
||||
}
|
||||
return itemp;
|
||||
}
|
||||
|
||||
static INLINE avl_item_t *
|
||||
avl_next(avl_item_t *itemp)
|
||||
{
|
||||
@ -1181,6 +1221,26 @@ vg_prefix_avl_insert(avl_root_t *rootp, vg_prefix_t *vpnew)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static vg_prefix_t *
|
||||
vg_prefix_first(avl_root_t *rootp)
|
||||
{
|
||||
avl_item_t *itemp;
|
||||
itemp = avl_first(rootp);
|
||||
if (itemp)
|
||||
return avl_item_entry(itemp, vg_prefix_t, vp_item);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static vg_prefix_t *
|
||||
vg_prefix_next(vg_prefix_t *vp)
|
||||
{
|
||||
avl_item_t *itemp = &vp->vp_item;
|
||||
itemp = avl_next(itemp);
|
||||
if (itemp)
|
||||
return avl_item_entry(itemp, vg_prefix_t, vp_item);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static vg_prefix_t *
|
||||
vg_prefix_add(avl_root_t *rootp, const char *pattern, BIGNUM *low, BIGNUM *high)
|
||||
{
|
||||
@ -1568,6 +1628,54 @@ research:
|
||||
return res;
|
||||
}
|
||||
|
||||
int
|
||||
vg_prefix_hash160_sort(vg_context_t *vcp, void *buf)
|
||||
{
|
||||
vg_prefix_context_t *vcpp = (vg_prefix_context_t *) vcp;
|
||||
vg_prefix_t *vp;
|
||||
unsigned char *cbuf = (unsigned char *) buf;
|
||||
unsigned char bnbuf[25];
|
||||
int nbytes, ncopy, nskip, npfx = 0;
|
||||
|
||||
/*
|
||||
* Walk the prefix tree in order, copy the upper and lower bound
|
||||
* values into the hash160 buffer. Skip the lower four bytes
|
||||
* and anything above the 24th byte.
|
||||
*/
|
||||
for (vp = vg_prefix_first(&vcpp->vcp_avlroot);
|
||||
vp != NULL;
|
||||
vp = vg_prefix_next(vp)) {
|
||||
npfx++;
|
||||
if (!buf)
|
||||
continue;
|
||||
|
||||
/* Low */
|
||||
nbytes = BN_bn2bin(vp->vp_low, bnbuf);
|
||||
ncopy = ((nbytes >= 24) ? 20 :
|
||||
((nbytes > 4) ? (nbytes - 4) : 0));
|
||||
nskip = (nbytes >= 24) ? (nbytes - 24) : 0;
|
||||
if (ncopy < 20)
|
||||
memset(cbuf, 0, 20 - ncopy);
|
||||
memcpy(cbuf + (20 - ncopy),
|
||||
bnbuf + nskip,
|
||||
ncopy);
|
||||
cbuf += 20;
|
||||
|
||||
/* High */
|
||||
nbytes = BN_bn2bin(vp->vp_high, bnbuf);
|
||||
ncopy = ((nbytes >= 24) ? 20 :
|
||||
((nbytes > 4) ? (nbytes - 4) : 0));
|
||||
nskip = (nbytes >= 24) ? (nbytes - 24) : 0;
|
||||
if (ncopy < 20)
|
||||
memset(cbuf, 0, 20 - ncopy);
|
||||
memcpy(cbuf + (20 - ncopy),
|
||||
bnbuf + nskip,
|
||||
ncopy);
|
||||
cbuf += 20;
|
||||
}
|
||||
return npfx;
|
||||
}
|
||||
|
||||
vg_context_t *
|
||||
vg_prefix_context_new(int addrtype, int privtype, int caseinsensitive)
|
||||
{
|
||||
@ -1584,6 +1692,7 @@ vg_prefix_context_new(int addrtype, int privtype, int caseinsensitive)
|
||||
vcpp->base.vc_free = vg_prefix_context_free;
|
||||
vcpp->base.vc_add_patterns = vg_prefix_context_add_patterns;
|
||||
vcpp->base.vc_test = vg_prefix_test;
|
||||
vcpp->base.vc_hash160_sort = vg_prefix_hash160_sort;
|
||||
avl_root_init(&vcpp->vcp_avlroot);
|
||||
BN_init(&vcpp->vcp_difficulty);
|
||||
vcpp->vcp_caseinsensitive = caseinsensitive;
|
||||
@ -1811,6 +1920,7 @@ vg_regex_context_new(int addrtype, int privtype)
|
||||
vcrp->base.vc_free = vg_regex_context_free;
|
||||
vcrp->base.vc_add_patterns = vg_regex_context_add_patterns;
|
||||
vcrp->base.vc_test = vg_regex_test;
|
||||
vcrp->base.vc_hash160_sort = NULL;
|
||||
vcrp->vcr_regex = NULL;
|
||||
vcrp->vcr_nalloc = 0;
|
||||
}
|
||||
|
@ -59,6 +59,7 @@ typedef struct _vg_exec_context_s {
|
||||
extern int vg_exec_context_init(vg_context_t *vcp, vg_exec_context_t *vxcp);
|
||||
extern void vg_exec_context_del(vg_exec_context_t *vxcp);
|
||||
extern void vg_exec_context_consolidate_key(vg_exec_context_t *vxcp);
|
||||
extern void vg_exec_context_calc_address(vg_exec_context_t *vxcp);
|
||||
|
||||
/* Implementation-specific lock/unlock/consolidate */
|
||||
extern void vg_exec_downgrade_lock(vg_exec_context_t *vxcp);
|
||||
@ -69,6 +70,7 @@ typedef void (*vg_free_func_t)(vg_context_t *);
|
||||
typedef int (*vg_add_pattern_func_t)(vg_context_t *,
|
||||
char ** const patterns, int npatterns);
|
||||
typedef int (*vg_test_func_t)(vg_exec_context_t *);
|
||||
typedef int (*vg_hash160_sort_func_t)(vg_context_t *vcp, void *buf);
|
||||
|
||||
/* Application-level context, incl. parameters and global pattern store */
|
||||
struct _vg_context_s {
|
||||
@ -84,12 +86,14 @@ struct _vg_context_s {
|
||||
vg_free_func_t vc_free;
|
||||
vg_add_pattern_func_t vc_add_patterns;
|
||||
vg_test_func_t vc_test;
|
||||
vg_hash160_sort_func_t vc_hash160_sort;
|
||||
};
|
||||
|
||||
|
||||
extern void vg_context_free(vg_context_t *vcp);
|
||||
extern int vg_context_add_patterns(vg_context_t *vcp,
|
||||
char ** const patterns, int npatterns);
|
||||
extern int vg_context_hash160_sort(vg_context_t *vcp, void *buf);
|
||||
|
||||
|
||||
extern vg_context_t *vg_prefix_context_new(int addrtype, int privtype,
|
||||
|
11
winglue.c
11
winglue.c
@ -102,6 +102,17 @@ gettimeofday(struct timeval *tv, struct timezone *tz)
|
||||
return 0;
|
||||
}
|
||||
|
||||
void
|
||||
timeradd(struct timeval *a, struct timeval *b, struct timeval *result)
|
||||
{
|
||||
result->tv_sec = a->tv_sec + b->tv_sec;
|
||||
result->tv_usec = a->tv_usec + b->tv_usec;
|
||||
if (result->tv_usec > 10000000) {
|
||||
result->tv_sec++;
|
||||
result->tv_usec -= 1000000;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
timersub(struct timeval *a, struct timeval *b, struct timeval *result)
|
||||
{
|
||||
|
Loading…
x
Reference in New Issue
Block a user