Checkpoint development of oclvanitygen.

2025-02-07 12:24:20 +00:00 · 2011-07-21 08:08:24 -07:00 · 2011-07-21 08:08:24 -07:00 · d64d355010
commit d64d355010
parent e328e73d6a
7 changed files with 717 additions and 216 deletions
--- a/Makefile.Win32
+++ b/Makefile.Win32
@ -2,17 +2,26 @@ CC = cl
 OPENSSL_DIR = C:\OpenSSL-Win32
 PTHREADS_DIR = C:\pthreads-w32
 PCRE_DIR = C:\pcre-7.9-src
+OPENCL_DIR = "C:\Program Files (x86)\AMD APP"
+OPENCL_INCLUDE = /I$(OPENCL_DIR)\include
+OPENCL_LIBS = $(OPENCL_DIR)\lib\x86\OpenCL.lib
 CFLAGS = /D_WIN32 /DPTW32_STATIC_LIB /DPCRE_STATIC /I$(OPENSSL_DIR)\include /I$(PTHREADS_DIR) /I$(PCRE_DIR)
 LIBS = $(OPENSSL_DIR)\lib\libeay32.lib $(PTHREADS_DIR)\pthread.lib $(PCRE_DIR)\pcre.lib ws2_32.lib
-OBJS = vanitygen.obj pattern.obj winglue.obj
+OBJS = vanitygen.obj oclvanitygen.obj pattern.obj winglue.obj

 all: vanitygen.exe

-vanitygen.exe: $(OBJS)
-	link /nologo /out:vanitygen.exe $(OBJS) $(LIBS)
+vanitygen.exe: vanitygen.obj pattern.obj winglue.obj
+	link /nologo /out:$@ $** $(LIBS)
+
+oclvanitygen.exe: oclvanitygen.obj pattern.obj winglue.obj
+	link /nologo /out:$@ $** $(LIBS) $(OPENCL_LIBS)

 .c.obj:
 	$(CC) $(CFLAGS) /c /Tp$< /Fo$@

+oclvanitygen.obj: oclvanitygen.c
+	$(CC) $(CFLAGS) $(OPENCL_INCLUDE) /c /Tpoclvanitygen.c /Fo$@
+
 clean:
 	del vanitygen.exe $(OBJS)
--- a/calc_addrs.cl
+++ b/calc_addrs.cl
@ -42,6 +42,21 @@
 * substantially reduces the cost of performing modular inversion.
 */

+
+/* Byte-swapping and endianness */
+#define bswap32(v)					\
+	(((v) >> 24) | (((v) >> 8) & 0xff00) |		\
+	 (((v) << 8) & 0xff0000) | ((v) << 24))
+
+#if __ENDIAN_LITTLE__ != 1
+#define load_le32(v) bswap32(v)
+#define load_be32(v) (v)
+#else
+#define load_le32(v) (v)
+#define load_be32(v) bswap32(v)
+#endif
+
+
 /*
 * BIGNUM mini-library
 * This module deals with fixed-size 256-bit bignums.
@ -439,27 +454,6 @@ bn_from_mont(bignum *rb, bignum *b)
 	}
 }

-
-/* Montgomery multiplication test kernel */
-__kernel void
-test_mul_mont(__global bignum *products_out, __global bignum *nums_in,
-	      int count)
-{
-	bignum x, y, tmp;
-	int i, o, p;
-	o = get_global_id(0) * count;
-	p = o * 2;
-	for (i = 0; i < count; i++) {
-		x = nums_in[p++];
-		y = nums_in[p++];
-		bn_mul_mont(&tmp, &x, &y);
-		bn_mul_mont(&tmp, &tmp, &x);
-		bn_mul_mont(&tmp, &tmp, &y);
-		bn_from_mont(&x, &tmp);
-		products_out[o++] = x;
-	}
-}
-
 /*
 * Modular inversion
 */
@ -526,22 +520,6 @@ bn_mod_inverse(bignum *r, bignum *n)
 	return;
 }

-/* modular inversion test kernel */
-__kernel void
-test_mod_inverse(__global bignum *inv_out, __global bignum *nums_in,
-		 int count)
-{
-	bignum x, xp;
-	int i, o;
-	o = get_global_id(0) * count;
-	for (i = 0; i < count; i++) {
-		x = nums_in[o];
-		bn_mod_inverse(&xp, &x);
-		inv_out[o++] = xp;
-	}
-}
-
-
 /*
 * HASH FUNCTIONS
 *
@ -597,7 +575,7 @@ sha2_256_init(uint *out)
 }

 /* The state variable remapping is really contorted */
-#define sha2_stvar(vals, i, v) vals[(i+(7-v)) % 8]
+#define sha2_stvar(vals, i, v) vals[(64+v-i) % 8]

 void
 sha2_256_block(uint *out, uint *in)
@ -608,7 +586,7 @@ sha2_256_block(uint *out, uint *in)
 #pragma unroll UNROLL_MAX
 #endif
 	for (i = 0; i < 8; i++)
-		state[7-i] = out[i];
+		state[i] = out[i];
 #ifdef UNROLL_MAX
 #pragma unroll 64
 #endif
@ -642,7 +620,7 @@ sha2_256_block(uint *out, uint *in)
 #pragma unroll UNROLL_MAX
 #endif
 	for (i = 0; i < 8; i++)
-		out[i] += state[7-i];
+		out[i] += state[i];
 }


@ -687,28 +665,36 @@ __constant uchar ripemd160_rlp[] = {
 	8, 5, 12, 9, 12, 5, 14, 6, 8, 13, 6, 5, 15, 13, 11, 11
 };

+#define ripemd160_val(v, i, n) (v)[(80+(n)-(i)) % 5]
+#define ripemd160_valp(v, i, n) (v)[5 + ((80+(n)-(i)) % 5)]
 #define ripemd160_f0(x, y, z) (x ^ y ^ z)
 #define ripemd160_f1(x, y, z) ((x & y) | (~x & z))
 #define ripemd160_f2(x, y, z) ((x | ~y) ^ z)
 #define ripemd160_f3(x, y, z) ((x & z) | (y & ~z))
 #define ripemd160_f4(x, y, z) (x ^ (y | ~z))
-#define ripemd160_round(i, in, vals, f, fp, t) do {		\
-	t = rotate(vals[0] +					\
-		   f(vals[1], vals[2], vals[3]) +		\
-		   in[ripemd160_ws[i]] +			\
-		   ripemd160_k[i / 16],				\
-		   (uint)ripemd160_rl[i]) + vals[4];		\
-	vals[0] = vals[4]; vals[4] = vals[3];			\
-	vals[3] = rotate(vals[2], 10U); vals[2] = vals[1];	\
-	vals[1] = t;						\
-	t = rotate(vals[5] +					\
-		   fp(vals[6], vals[7], vals[8]) +		\
-		   in[ripemd160_wsp[i]] +			\
-		   ripemd160_kp[i / 16],			\
-		   (uint)ripemd160_rlp[i]) + vals[9];		\
-	vals[5] = vals[9]; vals[9] = vals[8];			\
-	vals[8] = rotate(vals[7], 10U); vals[7] = vals[6];	\
-	vals[6] = t;						\
+#define ripemd160_round(i, in, vals, f, fp, t) do {			\
+		ripemd160_val(vals, i, 0) =				\
+			rotate(ripemd160_val(vals, i, 0) +		\
+			       f(ripemd160_val(vals, i, 1),		\
+				 ripemd160_val(vals, i, 2),		\
+				 ripemd160_val(vals, i, 3)) +		\
+			       in[ripemd160_ws[i]] +			\
+			       ripemd160_k[i / 16],			\
+			       (uint)ripemd160_rl[i]) +			\
+			ripemd160_val(vals, i, 4);			\
+		ripemd160_val(vals, i, 2) =				\
+			rotate(ripemd160_val(vals, i, 2), 10U);		\
+		ripemd160_valp(vals, i, 0) =				\
+			rotate(ripemd160_valp(vals, i, 0) +		\
+			       fp(ripemd160_valp(vals, i, 1),		\
+				  ripemd160_valp(vals, i, 2),		\
+				  ripemd160_valp(vals, i, 3)) +		\
+			       in[ripemd160_wsp[i]] +			\
+			       ripemd160_kp[i / 16],			\
+			       (uint)ripemd160_rlp[i]) +		\
+			ripemd160_valp(vals, i, 4);			\
+		ripemd160_valp(vals, i, 2) =				\
+			rotate(ripemd160_valp(vals, i, 2), 10U);	\
 	} while (0)

 void
@ -771,9 +757,41 @@ ripemd160_block(uint *out, uint *in)
 }


-#define bswap32(v)					\
-	(((v) >> 24) | (((v) >> 8) & 0xff00) |		\
-	 (((v) << 8) & 0xff0000) | ((v) << 24))
+#ifdef TEST_KERNELS
+/*
+ * Test kernels
+ */
+
+/* Montgomery multiplication test kernel */
+__kernel void
+test_mul_mont(__global bignum *products_out, __global bignum *nums_in)
+{
+	bignum a, b, c;
+	int o;
+	o = get_global_id(0);
+	nums_in += (2*o);
+
+	a = nums_in[0];
+	b = nums_in[1];
+	bn_mul_mont(&c, &a, &b);
+	products_out[o] = c;
+}
+
+/* modular inversion test kernel */
+__kernel void
+test_mod_inverse(__global bignum *inv_out, __global bignum *nums_in,
+		 int count)
+{
+	bignum x, xp;
+	int i, o;
+	o = get_global_id(0) * count;
+	for (i = 0; i < count; i++) {
+		x = nums_in[o];
+		bn_mod_inverse(&xp, &x);
+		inv_out[o++] = xp;
+	}
+}
+#endif  /* TEST_KERNELS */


 #if 0
@ -1028,24 +1046,13 @@ heap_invert(__global bignum *z_heap, int ncols)
 	}
 }

-__kernel void
-hash_ec_point(__global uint *hashes_out,
-	      __global bignum *points_in, __global bignum *z_heap)
+void
+hash_ec_point(uint *hash_out, __global bignum *xy, __global bignum *zip)
 {
 	uint hash1[16], hash2[16];
+	bignum c, zi, zzi;
 	bn_word wh, wl;
-	bignum p, a, b;
-	int i, o;
-
-	o = get_global_size(0);
-	i = o * get_global_id(1);
-	z_heap += (i * 2);
-	points_in += (i * 2);
-	hashes_out += (i * 5);
-
-	i = get_global_id(0);
-	points_in += (i * 2);
-	hashes_out += (i * 5);
+	int i;

 	/*
 	 * Multiply the coordinates by the inverted Z values.
@ -1054,34 +1061,34 @@ hash_ec_point(__global uint *hashes_out,
 	 * is big-endian, so swapping is unnecessary, but
 	 * inserting the format byte in front causes a headache.
 	 */
-	a = z_heap[(o - 1) + i];
-	bn_mul_mont(&b, &a, &a);  /* Z^2 */
-	p = points_in[0];
-	bn_mul_mont(&p, &p, &b);  /* X / Z^2 */
-	bn_from_mont(&p, &p);
+	zi = zip[0];
+	bn_mul_mont(&zzi, &zi, &zi);  /* 1 / Z^2 */
+	c = xy[0];
+	bn_mul_mont(&c, &c, &zzi);  /* X / Z^2 */
+	bn_from_mont(&c, &c);

 	wh = 0x00000004;  /* POINT_CONVERSION_UNCOMPRESSED */
 #ifdef UNROLL_MAX
 #pragma unroll UNROLL_MAX
 #endif
-	for (o = 0; o < BN_NWORDS; o++) {
+	for (i = 0; i < BN_NWORDS; i++) {
 		wl = wh;
-		wh = p.d[(BN_NWORDS - 1) - o];
-		hash1[o] = (wl << 24) | (wh >> 8);
+		wh = c.d[(BN_NWORDS - 1) - i];
+		hash1[i] = (wl << 24) | (wh >> 8);
 	}

-	bn_mul_mont(&a, &a, &b);  /* Z^3 */
-	p = points_in[1];
-	bn_mul_mont(&p, &p, &a);  /* Y / Z^3 */
-	bn_from_mont(&p, &p);
+	bn_mul_mont(&zzi, &zzi, &zi);  /* 1 / Z^3 */
+	c = xy[1];
+	bn_mul_mont(&c, &c, &zzi);  /* Y / Z^3 */
+	bn_from_mont(&c, &c);

 #ifdef UNROLL_MAX
 #pragma unroll UNROLL_MAX
 #endif
-	for (o = 0; o < BN_NWORDS; o++) {
+	for (i = 0; i < BN_NWORDS; i++) {
 		wl = wh;
-		wh = p.d[(BN_NWORDS - 1) - o];
-		hash1[BN_NWORDS + o] = (wl << 24) | (wh >> 8);
+		wh = c.d[(BN_NWORDS - 1) - i];
+		hash1[BN_NWORDS + i] = (wl << 24) | (wh >> 8);
 	}

 	/*
@ -1119,8 +1126,8 @@ hash_ec_point(__global uint *hashes_out,
 #ifdef UNROLL_MAX
 #pragma unroll UNROLL_MAX
 #endif
-	for (o = 0; o < 8; o++)
-		hash2[o] = bswap32(hash2[o]);
+	for (i = 0; i < 8; i++)
+		hash2[i] = bswap32(hash2[i]);
 	hash2[8] = bswap32(0x80000000);
 	hash2[9] = 0;
 	hash2[10] = 0;
@ -1129,12 +1136,116 @@ hash_ec_point(__global uint *hashes_out,
 	hash2[13] = 0;
 	hash2[14] = 32 * 8;
 	hash2[15] = 0;
-	ripemd160_init(hash1);
-	ripemd160_block(hash1, hash2);
+	ripemd160_init(hash_out);
+	ripemd160_block(hash_out, hash2);
+}

+
+__kernel void
+hash_ec_point_get(__global uint *hashes_out,
+		  __global bignum *points_in, __global bignum *z_heap)
+{
+	uint hash[5];
+	int i, o;
+
+	o = get_global_size(0);
+	i = o * get_global_id(1);
+	z_heap += (i * 2);
+	points_in += (i * 2);
+	hashes_out += (i * 5);
+
+	i = get_global_id(0);
+	points_in += (i * 2);
+	hashes_out += (i * 5);
+
+	/* Complete the coordinates and hash */
+	hash_ec_point(hash, points_in, &z_heap[i + (o - 1)]);
+
+	/* Output the hash in proper byte-order */
 #ifdef UNROLL_MAX
 #pragma unroll UNROLL_MAX
 #endif
-	for (o = 0; o < 5; o++)
-		hashes_out[o] = hash1[o];
+	for (i = 0; i < 5; i++)
+		hashes_out[i] = load_le32(hash[i]);
+}
+
+/*
+ * Normally this would be one function that compared two hash160s.
+ * This one compares a hash160 with an upper and lower bound in one
+ * function to work around a problem with AMD's OpenCL compiler.
+ */
+int
+hash160_ucmp_g(uint *a, __global uint *bound)
+{
+	uint gv;
+	int i;
+#ifdef UNROLL_MAX
+#pragma unroll UNROLL_MAX
+#endif
+	for (i = 0; i < 5; i++) {
+		gv = load_be32(bound[i]);
+		if (a[i] < gv) return -1;
+		if (a[i] > gv) break;
+	}
+#ifdef UNROLL_MAX
+#pragma unroll UNROLL_MAX
+#endif
+	for (i = 0; i < 5; i++) {
+		gv = load_be32(bound[5+i]);
+		if (a[i] < gv) return 0;
+		if (a[i] > gv) return 1;
+	}
+	return 0;
+}
+
+__kernel void
+hash_ec_point_search_prefix(__global uint *found,
+			    __global bignum *points_in, __global bignum *z_heap,
+			    __global uint *target_table, int ntargets)
+{
+	uint hash[5];
+	int i, high, low, p;
+
+	p = get_global_size(0);
+	i = p * get_global_id(1);
+	z_heap += (i * 2);
+	points_in += (i * 2);
+
+	i = get_global_id(0);
+	points_in += (i * 2);
+
+	/* Complete the coordinates and hash */
+	hash_ec_point(hash, points_in, &z_heap[(p - 1) + i]);
+
+	/*
+	 * Unconditionally byteswap the hash result, because:
+	 * - The byte-level convention of RIPEMD160 is little-endian
+	 * - We are comparing it in big-endian order
+	 */
+#ifdef UNROLL_MAX
+#pragma unroll UNROLL_MAX
+#endif
+	for (i = 0; i < 5; i++)
+		hash[i] = bswap32(hash[i]);
+
+	/* Binary-search the target table for the hash we just computed */
+	for (high = ntargets - 1, low = 0, i = high >> 1;
+	     high >= low;
+	     i = low + ((high - low) >> 1)) {
+		p = hash160_ucmp_g(hash, &target_table[10*i]);
+		low = (p > 0) ? (i + 1) : low;
+		high = (p < 0) ? (i - 1) : high;
+		if (p == 0) {
+			/* For debugging purposes, write the hash value */
+			found[0] = ((get_global_id(1) * get_global_size(0)) +
+				    get_global_id(0));
+			found[1] = i;
+#ifdef UNROLL_MAX
+#pragma unroll UNROLL_MAX
+#endif
+			for (p = 0; p < 5; p++)
+				found[p+2] = load_be32(hash[p]);
+			high = -1;
+		}
+	}
 }
--- a/oclvanitygen.c
+++ b/oclvanitygen.c
@ -33,8 +33,16 @@


 const char *version = "0.13";
+const int debug = 0;

 #define MAX_SLOT 2
+#define MAX_ARG 6
+#define MAX_KERNEL 3
+
+/* OpenCL address searching mode */
+struct _vg_ocl_context_s;
+typedef int (*vg_ocl_init_t)(struct _vg_ocl_context_s *);
+typedef int (*vg_ocl_check_t)(struct _vg_ocl_context_s *, int slot);

 typedef struct _vg_ocl_context_s {
 	vg_exec_context_t		base;
@ -42,10 +50,17 @@ typedef struct _vg_ocl_context_s {
 	cl_context			voc_oclctx;
 	cl_command_queue		voc_oclcmdq;
 	cl_program			voc_oclprog;
-	cl_kernel			voc_oclkernel[MAX_SLOT][3];
+	vg_ocl_init_t			voc_init_func;
+	vg_ocl_init_t			voc_rekey_func;
+	vg_ocl_check_t			voc_check_func;
+	int				voc_nslots;
+	cl_kernel			voc_oclkernel[MAX_SLOT][MAX_KERNEL];
 	cl_event			voc_oclkrnwait[MAX_SLOT];
-	cl_mem				voc_args[MAX_SLOT][6];
-	size_t				voc_arg_size[MAX_SLOT][6];
+	cl_mem				voc_args[MAX_SLOT][MAX_ARG];
+	size_t				voc_arg_size[MAX_SLOT][MAX_ARG];
+
+	int				voc_pattern_rewrite;
+	int				voc_pattern_alloc;

 	pthread_t			voc_ocl_thread;
 	pthread_mutex_t			voc_lock;
@ -171,8 +186,7 @@ vg_ocl_load_program(vg_context_t *vcp, vg_ocl_context_t *vocp,

 	vocp->voc_oclprog = prog;
 	if (!vg_ocl_create_kernel(vocp, 0, "ec_add_grid") ||
-	    !vg_ocl_create_kernel(vocp, 1, "heap_invert") ||
-	    !vg_ocl_create_kernel(vocp, 2, "hash_ec_point")) {
+	    !vg_ocl_create_kernel(vocp, 1, "heap_invert")) {
 		clReleaseProgram(vocp->voc_oclprog);
 		vocp->voc_oclprog = NULL;
 		return 0;
@ -181,7 +195,7 @@ vg_ocl_load_program(vg_context_t *vcp, vg_ocl_context_t *vocp,
 	return 1;
 }

-void
+void CL_CALLBACK
 vg_ocl_context_callback(const char *errinfo,
 			const void *private_info,
 			size_t cb,
@ -223,7 +237,7 @@ vg_ocl_init(vg_context_t *vcp, vg_ocl_context_t *vocp, cl_device_id did)

 	if (!vg_ocl_load_program(vcp, vocp,
 				 "calc_addrs.cl",
-				 //"-cl-nv-verbose -cl-nv-maxrregcount=32 "
+				 //"-cl-nv-verbose "
 				 "-DUNROLL_MAX=16")) {
 		printf("Could not load kernel\n");
 		return 0;
@ -251,6 +265,21 @@ vg_ocl_del(vg_ocl_context_t *vocp)
 	vg_exec_context_del(&vocp->base);
 }

+static int vg_ocl_arg_map[][8] = {
+	/* hashes_out / found */
+	{ 2, 0, -1 },
+	/* z_heap */
+	{ 0, 1, 1, 0, 2, 2, -1 },
+	/* point_tmp */
+	{ 0, 0, 2, 1, -1 },
+	/* row_in */
+	{ 0, 2, -1 },
+	/* col_in */
+	{ 0, 3, -1 },
+	/* target_table */
+	{ 2, 3, -1 },
+};
+
 int
 vg_ocl_kernel_arg_alloc(vg_ocl_context_t *vocp, int slot,
 			int arg, size_t size, int host)
@ -259,18 +288,15 @@ vg_ocl_kernel_arg_alloc(vg_ocl_context_t *vocp, int slot,
 	cl_int ret;
 	int i, j, knum, karg;

-	static int arg_map[5][8] = {
-		/* hashes_out */
-		{ 2, 0, -1 },
-		/* z_heap */
-		{ 0, 1, 1, 0, 2, 2, -1 },
-		/* point_tmp */
-		{ 0, 0, 2, 1, -1 },
-		/* row_in */
-		{ 0, 2, -1 },
-		/* col_in */
-		{ 0, 3, -1 },
-	};
+	for (i = 0; i < MAX_SLOT; i++) {
+		if ((i != slot) && (slot >= 0))
+			continue;
+		if (vocp->voc_args[i][arg]) {
+			clReleaseMemObject(vocp->voc_args[i][arg]);
+			vocp->voc_args[i][arg] = NULL;
+			vocp->voc_arg_size[i][arg] = 0;
+		}
+	}

 	clbuf = clCreateBuffer(vocp->voc_oclctx,
 			       CL_MEM_READ_WRITE |
@ -287,24 +313,54 @@ vg_ocl_kernel_arg_alloc(vg_ocl_context_t *vocp, int slot,
 		if ((i != slot) && (slot >= 0))
 			continue;

-		for (j = 0; arg_map[arg][j] >= 0; j += 2) {
-			knum = arg_map[arg][j];
-			karg = arg_map[arg][j+1];
+		clRetainMemObject(clbuf);
+		vocp->voc_args[i][arg] = clbuf;
+		vocp->voc_arg_size[i][arg] = size;
+
+		for (j = 0; vg_ocl_arg_map[arg][j] >= 0; j += 2) {
+			knum = vg_ocl_arg_map[arg][j];
+			karg = vg_ocl_arg_map[arg][j+1];
 			ret = clSetKernelArg(vocp->voc_oclkernel[i][knum],
 					     karg,
 					     sizeof(clbuf),
 					     &clbuf);
 			
 			if (ret) {
-				clReleaseMemObject(clbuf);
 				printf("Could not set kernel argument: %d\n",
 				       ret);
 				return 0;
 			}
 		}
-		vocp->voc_args[i][arg] = clbuf;
-		vocp->voc_arg_size[i][arg] = size;
 	}
+
+	clReleaseMemObject(clbuf);
+	return 1;
+}
+
+int
+vg_ocl_copyout_arg(vg_ocl_context_t *vocp, int wslot, int arg,
+		   void *buffer, size_t size)
+{
+	cl_int slot, ret;
+
+	slot = (wslot < 0) ? 0 : wslot;
+
+	assert((slot >= 0) && (slot < MAX_SLOT));
+	assert(size <= vocp->voc_arg_size[slot][arg]);
+
+	ret = clEnqueueWriteBuffer(vocp->voc_oclcmdq,
+				   vocp->voc_args[slot][arg],
+				   CL_TRUE,
+				   0, size,
+				   buffer,
+				   0, NULL,
+				   NULL);
+			
+	if (ret) {
+		printf("Could not copyout argument buffer: %d\n", ret);
+		return 0;
+	}
+
 	return 1;
 }

@ -320,7 +376,8 @@ vg_ocl_map_arg_buffer(vg_ocl_context_t *vocp, int slot,
 	buf = clEnqueueMapBuffer(vocp->voc_oclcmdq,
 				 vocp->voc_args[slot][arg],
 				 CL_TRUE,
-				 rw ? CL_MAP_WRITE : CL_MAP_READ,
+				 (rw == 2) ? (CL_MAP_READ|CL_MAP_WRITE)
+				           : (rw ? CL_MAP_WRITE : CL_MAP_READ),
 				 0, vocp->voc_arg_size[slot][arg],
 				 0, NULL,
 				 NULL,
@ -368,7 +425,7 @@ vg_ocl_kernel_int_arg(vg_ocl_context_t *vocp, int slot,
 	for (i = 0; i < MAX_SLOT; i++) {
 		if ((i != slot) && (slot >= 0))
 			continue;
-		ret = clSetKernelArg(vocp->voc_oclkernel[i][0],
+		ret = clSetKernelArg(vocp->voc_oclkernel[i][2],
 				     arg,
 				     sizeof(value),
 				     &value);
@ -507,6 +564,200 @@ show_elapsed(struct timeval *tv, const char *place)
 	printf("%s spent %ld.%06lds\n", place, delta.tv_sec, delta.tv_usec);
 }

+
+/*
+ * GPU address matching methods
+ *
+ * gethash: GPU computes and returns all address hashes.
+ *  + Works with any matching method, including regular expressions.
+ *  - The CPU will not be able to keep up with mid- to high-end GPUs.
+ *
+ * prefix: GPU computes hash, searches a range list, and discards.
+ *  + Fast, minimal work for CPU.
+ */
+
+int
+vg_ocl_gethash_check(vg_ocl_context_t *vocp, int slot)
+{
+	vg_exec_context_t *vxcp = &vocp->base;
+	vg_context_t *vcp = vocp->base.vxc_vc;
+	vg_test_func_t test_func = vcp->vc_test;
+	unsigned char *ocl_hashes_out;
+	int i, res = 0, round;
+
+	ocl_hashes_out = (unsigned char *)
+		vg_ocl_map_arg_buffer(vocp, slot, 0, 0);
+
+	round = vocp->voc_ocl_cols * vocp->voc_ocl_rows;
+
+	for (i = 0; i < round; i++, vxcp->vxc_delta++) {
+		memcpy(&vxcp->vxc_binres[1],
+		       ocl_hashes_out + (20*i),
+		       20);
+
+		res = test_func(vxcp);
+		if (res)
+			break;
+	}
+
+	vg_ocl_unmap_arg_buffer(vocp, slot, 0, ocl_hashes_out);
+	return res;
+}
+
+int
+vg_ocl_gethash_init(vg_ocl_context_t *vocp)
+{
+	int i;
+
+	if (!vg_ocl_create_kernel(vocp, 2, "hash_ec_point_get"))
+		return 0;
+
+	for (i = 0; i < vocp->voc_nslots; i++) {
+		/* Each slot gets its own hash output buffer */
+		if (!vg_ocl_kernel_arg_alloc(vocp, i, 0,
+					     20 *
+					     vocp->voc_ocl_rows *
+					     vocp->voc_ocl_cols, 1))
+			return 0;
+	}
+
+	vocp->voc_rekey_func = NULL;
+	vocp->voc_check_func = vg_ocl_gethash_check;
+	return 1;
+}
+
+
+static int
+vg_ocl_prefix_rekey(vg_ocl_context_t *vocp)
+{
+	vg_context_t *vcp = vocp->base.vxc_vc;
+	unsigned char *ocl_targets_in;
+	uint32_t *ocl_found_out;
+	int i;
+
+	/* Set the found indicator for each slot to -1 */
+	for (i = 0; i < vocp->voc_nslots; i++) {
+		ocl_found_out = (uint32_t *)
+			vg_ocl_map_arg_buffer(vocp, i, 0, 1);
+		ocl_found_out[0] = 0xffffffff;
+		vg_ocl_unmap_arg_buffer(vocp, i, 0, ocl_found_out);
+	}
+
+	if (vocp->voc_pattern_rewrite) {
+		/* Count number of range records */
+		i = vg_context_hash160_sort(vcp, NULL);
+		if (!i) {
+			printf("No range records available, exiting\n");
+			return 0;
+		}
+
+		if (i > vocp->voc_pattern_alloc) {
+			/* (re)allocate target buffer */
+			if (!vg_ocl_kernel_arg_alloc(vocp, -1, 5, 40 * i, 0))
+				return 0;
+			vocp->voc_pattern_alloc = i;
+		}
+
+		/* Write range records */
+		ocl_targets_in = (unsigned char *)
+			vg_ocl_map_arg_buffer(vocp, 0, 5, 1);
+		vg_context_hash160_sort(vcp, ocl_targets_in);
+		vg_ocl_unmap_arg_buffer(vocp, 0, 5, ocl_targets_in);
+		vg_ocl_kernel_int_arg(vocp, -1, 4, i);
+
+		vocp->voc_pattern_rewrite = 0;
+	}
+	return 1;
+}
+
+static int
+vg_ocl_prefix_check(vg_ocl_context_t *vocp, int slot)
+{
+	vg_exec_context_t *vxcp = &vocp->base;
+	vg_context_t *vcp = vocp->base.vxc_vc;
+	vg_test_func_t test_func = vcp->vc_test;
+	uint32_t *ocl_found_out;
+	uint32_t found_delta;
+	int orig_delta, tablesize;
+	int res = 0;
+
+	/* Retrieve the found indicator */
+	ocl_found_out = (uint32_t *)
+		vg_ocl_map_arg_buffer(vocp, slot, 0, 2);
+	found_delta = ocl_found_out[0];
+
+	if (found_delta != 0xffffffff) {
+		/* GPU code claims match, verify with CPU version */
+		orig_delta = vxcp->vxc_delta;
+		vxcp->vxc_delta += found_delta;
+		vg_exec_context_calc_address(vxcp);
+		res = test_func(vxcp);
+		if (res == 0) {
+			/*
+			 * The match was not found in
+			 * the pattern list.  Hmm.
+			 */
+			tablesize = ocl_found_out[2];
+			printf("Match idx: %d\n", ocl_found_out[1]);
+			printf("CPU hash: ");
+			dumphex(vxcp->vxc_binres + 1, 20);
+			printf("GPU hash: ");
+			dumphex((unsigned char *) (ocl_found_out + 3), 20);
+			printf("Table size: %d\n", ocl_found_out[2]);
+			printf("Found delta: %d "
+			       "Start delta: %d\n",
+			       found_delta, orig_delta);
+			res = 1;
+		}
+		vocp->voc_pattern_rewrite = 1;
+	} else {
+		vxcp->vxc_delta += (vocp->voc_ocl_cols * vocp->voc_ocl_rows);
+	}
+
+	vg_ocl_unmap_arg_buffer(vocp, slot, 0, ocl_found_out);
+	return res;
+}
+
+int
+vg_ocl_prefix_init(vg_ocl_context_t *vocp)
+{
+	int i;
+
+	if (!vg_ocl_create_kernel(vocp, 2, "hash_ec_point_search_prefix"))
+		return 0;
+
+	for (i = 0; i < vocp->voc_nslots; i++) {
+		if (!vg_ocl_kernel_arg_alloc(vocp, i, 0, 28, 1))
+			return 0;
+	}
+	vocp->voc_rekey_func = vg_ocl_prefix_rekey;
+	vocp->voc_check_func = vg_ocl_prefix_check;
+	vocp->voc_pattern_rewrite = 1;
+	vocp->voc_pattern_alloc = 0;
+	return 1;
+}
+
+
+int
+vg_ocl_config_pattern(vg_ocl_context_t *vocp)
+{
+	vg_context_t *vcp = vocp->base.vxc_vc;
+	int i;
+
+	i = vg_context_hash160_sort(vcp, NULL);
+	if (i > 0) {
+		if (vcp->vc_verbose > 1)
+			printf("Using GPU prefix matcher\n");
+		/* Configure for prefix matching */
+		return vg_ocl_prefix_init(vocp);
+	}
+
+	if (vcp->vc_verbose > 0)
+		printf("WARNING: Using CPU pattern matcher\n");
+	return vg_ocl_gethash_init(vocp);
+}
+
+
 void *
 vg_opencl_thread(void *arg)
 {
@ -561,24 +812,25 @@ vg_opencl_thread(void *arg)

 		if (!vg_ocl_kernel_wait(vocp, slot))
 			halt = 1;
-		gettimeofday(&tvt, NULL);
-		timersub(&tvt, &tv, &tvd);
-		timeradd(&tvd, &busy, &busy);

-		if ((vcp->vc_verbose > 1) &&
-		    ((busy.tv_sec + idle.tv_sec) > 1)) {
-			idleu = (1000000 * idle.tv_sec) + idle.tv_usec;
-			busyu = (1000000 * busy.tv_sec) + busy.tv_usec;
-			pidle = ((double) idleu) / (idleu + busyu);
+		if (vcp->vc_verbose > 1) {
+			gettimeofday(&tvt, NULL);
+			timersub(&tvt, &tv, &tvd);
+			timeradd(&tvd, &busy, &busy);
+			if ((busy.tv_sec + idle.tv_sec) > 1) {
+				idleu = (1000000 * idle.tv_sec) + idle.tv_usec;
+				busyu = (1000000 * busy.tv_sec) + busy.tv_usec;
+				pidle = ((double) idleu) / (idleu + busyu);

-			if (pidle > 0.05) {
-				printf("\rGPU idle: %.2f%%"
-				       "                              "
+				if (pidle > 0.01) {
+					printf("\rGPU idle: %.2f%%"
+					       "                              "
 				       "                                \n",
-				       100 * pidle);
+					       100 * pidle);
+				}
+				memset(&idle, 0, sizeof(idle));
+				memset(&busy, 0, sizeof(busy));
 			}
-			memset(&idle, 0, sizeof(idle));
-			memset(&busy, 0, sizeof(busy));
 		}
 	}
 out:
@ -586,6 +838,7 @@ out:
 	return NULL;
 }

+
 /*
 * Address search thread main loop
 */
@ -605,12 +858,11 @@ vg_opencl_loop(vg_context_t *vcp, cl_device_id did, int worksize)
 	EC_POINT **ppbase = NULL, **pprow, *pbatchinc = NULL, *poffset = NULL;
 	EC_POINT *pseek = NULL;

-	unsigned char *ocl_points_in, *ocl_strides_in, *ocl_hashes_out;
+	unsigned char *ocl_points_in, *ocl_strides_in;

 	vg_ocl_context_t ctx;
 	vg_ocl_context_t *vocp = &ctx;
 	vg_exec_context_t *vxcp = &vocp->base;
-	vg_test_func_t test_func = vcp->vc_test;

 	int slot, nslots;
 	int slot_busy = 0, slot_done = 0, halt = 0;
@ -637,6 +889,10 @@ vg_opencl_loop(vg_context_t *vcp, cl_device_id did, int worksize)
 	nslots = 2;
 	slot = 0;

+	vocp->voc_ocl_cols = batchsize;
+	vocp->voc_ocl_rows = worksize;
+	vocp->voc_nslots = nslots;
+
 	ppbase = (EC_POINT **) malloc((batchsize + worksize) *
 				      sizeof(EC_POINT*));
 	if (!ppbase)
@ -667,26 +923,28 @@ vg_opencl_loop(vg_context_t *vcp, cl_device_id did, int worksize)

 	round = batchsize * worksize;

+	if (!vg_ocl_config_pattern(vocp))
+		goto enomem;
+
 	for (i = 0; i < nslots; i++) {
 		/*
 		 * Each work group gets its own:
-		 * - Hash output array
-		 * - Point and z_heap scratch spaces
 		 * - Column point array
 		 */
-		if (!vg_ocl_kernel_arg_alloc(vocp, i, 0, 20 * round, 1) ||
-		    !vg_ocl_kernel_arg_alloc(vocp, i, 1, 32 * 2 * round, 0) ||
-		    !vg_ocl_kernel_arg_alloc(vocp, i, 2, 32 * 2 * round, 0) ||
-		    !vg_ocl_kernel_arg_alloc(vocp, i, 4, 32 * 2 * worksize, 1))
+		if (!vg_ocl_kernel_arg_alloc(vocp, i, 4, 32 * 2 * worksize, 1))
 			goto enomem;
 	}

-	/* Same row point array for all instances */
-	if (!vg_ocl_kernel_arg_alloc(vocp, -1, 3, 32 * 2 * batchsize, 1))
+	/*
+	 * All instances share:
+	 * - The z_heap and point scratch spaces
+	 * - The row point array
+	 */
+	if (!vg_ocl_kernel_arg_alloc(vocp, -1, 1, 32 * 2 * round, 0) ||
+	    !vg_ocl_kernel_arg_alloc(vocp, -1, 2, 32 * 2 * round, 0) ||
+	    !vg_ocl_kernel_arg_alloc(vocp, -1, 3, 32 * 2 * batchsize, 1))
 		goto enomem;

-	//vg_ocl_kernel_int_arg(vocp, -1, 5, batchsize);
-
 	npoints = 0;
 	rekey_at = 0;
 	vxcp->vxc_binres[0] = vcp->vc_addrtype;
@ -698,6 +956,10 @@ vg_opencl_loop(vg_context_t *vcp, cl_device_id did, int worksize)
 	gettimeofday(&tvstart, NULL);

 l_rekey:
+	if (vocp->voc_rekey_func &&
+	    !vocp->voc_rekey_func(vocp))
+		goto enomem;
+
 	/* Generate a new random private key */
 	EC_KEY_generate_key(pkey);
 	npoints = 0;
@ -755,33 +1017,22 @@ l_rekey:

 	while (1) {
 		if (slot_done) {
+			assert(rekey_at > 0);
 			slot_done = 0;

-			ocl_hashes_out = (unsigned char *)
-				vg_ocl_map_arg_buffer(vocp, slot, 0, 0);
-
-			for (i = 0; i < round; i++, vxcp->vxc_delta++) {
-				memcpy(&vxcp->vxc_binres[1],
-				       ocl_hashes_out + (20*i),
-				       20);
-
-				switch (test_func(vxcp)) {
-				case 1:
-					rekey_at = 0;
-					i = round;
-					break;
-				case 2:
-					halt = 1;
-					i = round;
-					break;
-				default:
-					break;
-				}
+			/* Call the result check function */
+			switch (vocp->voc_check_func(vocp, slot)) {
+			case 1:
+				rekey_at = 0;
+				break;
+			case 2:
+				halt = 1;
+				break;
+			default:
+				break;
 			}

-			vg_ocl_unmap_arg_buffer(vocp, slot, 0, ocl_hashes_out);
-
-			c += (i + 1);
+			c += round;
 			if (!halt && (c >= output_interval)) {
 				output_interval =
 					vg_output_timing(vcp, c, &tvstart);
@ -789,24 +1040,8 @@ l_rekey:
 			}
 		}

-		if (halt) {
-			if (vcp->vc_verbose > 1)
-				printf("Halting...");
-			pthread_mutex_lock(&vocp->voc_lock);
-			vocp->voc_halt = 1;
-			pthread_cond_signal(&vocp->voc_wait);
-			while (vocp->voc_ocl_slot != -1) {
-				assert(slot_busy);
-				pthread_cond_wait(&vocp->voc_wait,
-						  &vocp->voc_lock);
-			}
-			slot_busy = 0;
-			pthread_mutex_unlock(&vocp->voc_lock);
-			pthread_join(vocp->voc_ocl_thread, NULL);
-			if (vcp->vc_verbose > 1)
-				printf("done!\n");
+		if (halt)
 			break;
-		}

 		if ((npoints + round) < rekey_at) {
 			if (npoints > 1) {
@ -843,35 +1078,36 @@ l_rekey:
 			}

 			if (vocp->voc_halt) {
-				halt = 1;
-			} else {
-				vocp->voc_ocl_slot = slot;
-				vocp->voc_ocl_cols = batchsize;
-				vocp->voc_ocl_rows = worksize;
-				pthread_cond_signal(&vocp->voc_wait);
 				pthread_mutex_unlock(&vocp->voc_lock);
-
-				if (slot_busy)
-					slot_done = 1;
-				slot_busy = 1;
-				slot = (slot + 1) % nslots;
+				halt = 1;
+				break;
 			}
-			pthread_mutex_unlock(&vocp->voc_lock);
-		}

-		else if (slot_busy) {
-			pthread_mutex_lock(&vocp->voc_lock);
-			while (vocp->voc_ocl_slot != -1) {
-				pthread_cond_wait(&vocp->voc_wait,
-						  &vocp->voc_lock);
+			vocp->voc_ocl_slot = slot;
+			pthread_cond_signal(&vocp->voc_wait);
+			pthread_mutex_unlock(&vocp->voc_lock);
+
+			slot_done = slot_busy;
+			slot_busy = 1;
+			slot = (slot + 1) % nslots;
+
+		} else { 
+			if (slot_busy) {
+				pthread_mutex_lock(&vocp->voc_lock);
+				while (vocp->voc_ocl_slot != -1) {
+					assert(vocp->voc_ocl_slot ==
+					       ((slot + nslots - 1) % nslots));
+					pthread_cond_wait(&vocp->voc_wait,
+							  &vocp->voc_lock);
+				}
+				pthread_mutex_unlock(&vocp->voc_lock);
+				slot_busy = 0;
+				slot_done = 1;
 			}
-			slot_busy = 0;
-			pthread_mutex_unlock(&vocp->voc_lock);
-			slot_done = 1;
-		}

-		else if (!rekey_at || ((npoints + round) >= rekey_at)) {
-			goto l_rekey;
+			if (!rekey_at ||
+			    (!slot_done && ((npoints + round) >= rekey_at)))
+				goto l_rekey;
 		}
 	}

@ -880,6 +1116,24 @@ l_rekey:
 		printf("ERROR: allocation failure?\n");
 	}

+	if (halt) {
+		if (vcp->vc_verbose > 1)
+			printf("Halting...");
+		pthread_mutex_lock(&vocp->voc_lock);
+		vocp->voc_halt = 1;
+		pthread_cond_signal(&vocp->voc_wait);
+		while (vocp->voc_ocl_slot != -1) {
+			assert(slot_busy);
+			pthread_cond_wait(&vocp->voc_wait,
+					  &vocp->voc_lock);
+		}
+		slot_busy = 0;
+		pthread_mutex_unlock(&vocp->voc_lock);
+		pthread_join(vocp->voc_ocl_thread, NULL);
+		if (vcp->vc_verbose > 1)
+			printf("done!\n");
+	}
+
 	if (ppbase) {
 		for (i = 0; i < (batchsize + worksize); i++)
 			if (ppbase[i])
--- a/pattern.c
+++ b/pattern.c
@ -207,6 +207,27 @@ vg_exec_context_consolidate_key(vg_exec_context_t *vxcp)
 	}
 }

+void
+vg_exec_context_calc_address(vg_exec_context_t *vxcp)
+{
+	const EC_GROUP *pgroup;
+	unsigned char eckey_buf[96], hash1[32], hash2[20];
+	int len;
+
+	vg_exec_context_consolidate_key(vxcp);
+	pgroup = EC_KEY_get0_group(vxcp->vxc_key);
+	len = EC_POINT_point2oct(pgroup,
+				 EC_KEY_get0_public_key(vxcp->vxc_key),
+				 POINT_CONVERSION_UNCOMPRESSED,
+				 eckey_buf,
+				 sizeof(eckey_buf),
+				 vxcp->vxc_bnctx);
+	SHA256(eckey_buf, len, hash1);
+	RIPEMD160(hash1, sizeof(hash1), hash2);
+	memcpy(&vxcp->vxc_binres[1],
+	       hash2, 20);
+}
+

 typedef struct _timing_info_s {
 	struct _timing_info_s	*ti_next;
@ -437,6 +458,14 @@ vg_context_add_patterns(vg_context_t *vcp,
 	return vcp->vc_add_patterns(vcp, patterns, npatterns);
 }

+int
+vg_context_hash160_sort(vg_context_t *vcp, void *buf)
+{
+	if (!vcp->vc_hash160_sort)
+		return 0;
+	return vcp->vc_hash160_sort(vcp, buf);
+}
+

 static const signed char b58_reverse_map[256] = {
 	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
@ -1023,6 +1052,17 @@ avl_insert_fix(avl_root_t *rootp, avl_item_t *itemp)
 	}
 }

+static INLINE avl_item_t *
+avl_first(avl_root_t *rootp)
+{
+	avl_item_t *itemp = rootp->ar_root;
+	if (itemp) {
+		while (itemp->ai_left)
+			itemp = itemp->ai_left;
+	}
+	return itemp;
+}
+
 static INLINE avl_item_t *
 avl_next(avl_item_t *itemp)
 {
@ -1181,6 +1221,26 @@ vg_prefix_avl_insert(avl_root_t *rootp, vg_prefix_t *vpnew)
 	return NULL;
 }

+static vg_prefix_t *
+vg_prefix_first(avl_root_t *rootp)
+{
+	avl_item_t *itemp;
+	itemp = avl_first(rootp);
+	if (itemp)
+		return avl_item_entry(itemp, vg_prefix_t, vp_item);
+	return NULL;
+}
+
+static vg_prefix_t *
+vg_prefix_next(vg_prefix_t *vp)
+{
+	avl_item_t *itemp = &vp->vp_item;
+	itemp = avl_next(itemp);
+	if (itemp)
+		return avl_item_entry(itemp, vg_prefix_t, vp_item);
+	return NULL;
+}
+
 static vg_prefix_t *
 vg_prefix_add(avl_root_t *rootp, const char *pattern, BIGNUM *low, BIGNUM *high)
 {
@ -1568,6 +1628,54 @@ research:
 	return res;
 }

+int
+vg_prefix_hash160_sort(vg_context_t *vcp, void *buf)
+{
+	vg_prefix_context_t *vcpp = (vg_prefix_context_t *) vcp;
+	vg_prefix_t *vp;
+	unsigned char *cbuf = (unsigned char *) buf;
+	unsigned char bnbuf[25];
+	int nbytes, ncopy, nskip, npfx = 0;
+
+	/*
+	 * Walk the prefix tree in order, copy the upper and lower bound
+	 * values into the hash160 buffer.  Skip the lower four bytes
+	 * and anything above the 24th byte.
+	 */
+	for (vp = vg_prefix_first(&vcpp->vcp_avlroot);
+	     vp != NULL;
+	     vp = vg_prefix_next(vp)) {
+		npfx++;
+		if (!buf)
+			continue;
+
+		/* Low */
+		nbytes = BN_bn2bin(vp->vp_low, bnbuf);
+		ncopy = ((nbytes >= 24) ? 20 :
+			 ((nbytes > 4) ? (nbytes - 4) : 0));
+		nskip = (nbytes >= 24) ? (nbytes - 24) : 0;
+		if (ncopy < 20)
+			memset(cbuf, 0, 20 - ncopy);
+		memcpy(cbuf + (20 - ncopy),
+		       bnbuf + nskip,
+		       ncopy);
+		cbuf += 20;
+
+		/* High */
+		nbytes = BN_bn2bin(vp->vp_high, bnbuf);
+		ncopy = ((nbytes >= 24) ? 20 :
+			 ((nbytes > 4) ? (nbytes - 4) : 0));
+		nskip = (nbytes >= 24) ? (nbytes - 24) : 0;
+		if (ncopy < 20)
+			memset(cbuf, 0, 20 - ncopy);
+		memcpy(cbuf + (20 - ncopy),
+		       bnbuf + nskip,
+		       ncopy);
+		cbuf += 20;
+	}
+	return npfx;
+}
+
 vg_context_t *
 vg_prefix_context_new(int addrtype, int privtype, int caseinsensitive)
 {
@ -1584,6 +1692,7 @@ vg_prefix_context_new(int addrtype, int privtype, int caseinsensitive)
 		vcpp->base.vc_free = vg_prefix_context_free;
 		vcpp->base.vc_add_patterns = vg_prefix_context_add_patterns;
 		vcpp->base.vc_test = vg_prefix_test;
+		vcpp->base.vc_hash160_sort = vg_prefix_hash160_sort;
 		avl_root_init(&vcpp->vcp_avlroot);
 		BN_init(&vcpp->vcp_difficulty);
 		vcpp->vcp_caseinsensitive = caseinsensitive;
@ -1811,6 +1920,7 @@ vg_regex_context_new(int addrtype, int privtype)
 		vcrp->base.vc_free = vg_regex_context_free;
 		vcrp->base.vc_add_patterns = vg_regex_context_add_patterns;
 		vcrp->base.vc_test = vg_regex_test;
+		vcrp->base.vc_hash160_sort = NULL;
 		vcrp->vcr_regex = NULL;
 		vcrp->vcr_nalloc = 0;
 	}
--- a/pattern.h
+++ b/pattern.h
@ -59,6 +59,7 @@ typedef struct _vg_exec_context_s {
 extern int vg_exec_context_init(vg_context_t *vcp, vg_exec_context_t *vxcp);
 extern void vg_exec_context_del(vg_exec_context_t *vxcp);
 extern void vg_exec_context_consolidate_key(vg_exec_context_t *vxcp);
+extern void vg_exec_context_calc_address(vg_exec_context_t *vxcp);

 /* Implementation-specific lock/unlock/consolidate */
 extern void vg_exec_downgrade_lock(vg_exec_context_t *vxcp);
@ -69,6 +70,7 @@ typedef void (*vg_free_func_t)(vg_context_t *);
 typedef int (*vg_add_pattern_func_t)(vg_context_t *,
 				     char ** const patterns, int npatterns);
 typedef int (*vg_test_func_t)(vg_exec_context_t *);
+typedef int (*vg_hash160_sort_func_t)(vg_context_t *vcp, void *buf);

 /* Application-level context, incl. parameters and global pattern store */
 struct _vg_context_s {
@ -84,12 +86,14 @@ struct _vg_context_s {
 	vg_free_func_t		vc_free;
 	vg_add_pattern_func_t	vc_add_patterns;
 	vg_test_func_t		vc_test;
+	vg_hash160_sort_func_t	vc_hash160_sort;
 };


 extern void vg_context_free(vg_context_t *vcp);
 extern int vg_context_add_patterns(vg_context_t *vcp,
 				   char ** const patterns, int npatterns);
+extern int vg_context_hash160_sort(vg_context_t *vcp, void *buf);


 extern vg_context_t *vg_prefix_context_new(int addrtype, int privtype,
--- a/winglue.c
+++ b/winglue.c
@ -102,6 +102,17 @@ gettimeofday(struct timeval *tv, struct timezone *tz)
 	return 0;
 }

+void
+timeradd(struct timeval *a, struct timeval *b, struct timeval *result)
+{
+	result->tv_sec = a->tv_sec + b->tv_sec;
+	result->tv_usec = a->tv_usec + b->tv_usec;
+	if (result->tv_usec > 10000000) {
+		result->tv_sec++;
+		result->tv_usec -= 1000000;
+	}
+}
+
 void
 timersub(struct timeval *a, struct timeval *b, struct timeval *result)
 {
--- a/winglue.h
+++ b/winglue.h
@ -29,6 +29,8 @@
 struct timezone;

 extern int gettimeofday(struct timeval *tv, struct timezone *tz);
+extern void timeradd(struct timeval *a, struct timeval *b,
+		     struct timeval *result);
 extern void timersub(struct timeval *a, struct timeval *b,
 		     struct timeval *result);