mirror of
https://github.com/GOSTSec/sgminer
synced 2025-03-10 04:31:03 +00:00
Find the nearest power of 2 maximum alloc size for the scrypt buffer that can successfully be allocated and is large enough to accomodate the thread concurrency chosen, thus mapping it to an intensity.
This commit is contained in:
parent
bff58c3bed
commit
ea10b08dce
24
ocl.c
24
ocl.c
@ -472,17 +472,35 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
|
||||
|
||||
#ifdef USE_SCRYPT
|
||||
if (opt_scrypt) {
|
||||
cl_ulong ma = gpus[gpu].max_alloc, mt;
|
||||
int pow2 = 0;
|
||||
|
||||
if (!gpus[gpu].lookup_gap) {
|
||||
applog(LOG_DEBUG, "GPU %d: selecting lookup gap of 2", gpu);
|
||||
gpus[gpu].lookup_gap = 2;
|
||||
}
|
||||
if (!gpus[gpu].thread_concurrency) {
|
||||
gpus[gpu].thread_concurrency = gpus[gpu].max_alloc / 32768 / gpus[gpu].lookup_gap;
|
||||
gpus[gpu].thread_concurrency = ma / 32768 / gpus[gpu].lookup_gap;
|
||||
if (gpus[gpu].shaders && gpus[gpu].thread_concurrency > gpus[gpu].shaders)
|
||||
gpus[gpu].thread_concurrency -= gpus[gpu].thread_concurrency % gpus[gpu].shaders;
|
||||
|
||||
applog(LOG_DEBUG, "GPU %d: selecting thread concurrency of %u",gpu, gpus[gpu].thread_concurrency);
|
||||
}
|
||||
|
||||
/* If we have memory to spare, try to find a power of 2 value
|
||||
* >= required amount to map nicely to an intensity */
|
||||
mt = gpus[gpu].thread_concurrency * 32768 * gpus[gpu].lookup_gap;
|
||||
if (ma > mt) {
|
||||
while (ma >>= 1)
|
||||
pow2++;
|
||||
ma = 1;
|
||||
while (--pow2 && ma < mt)
|
||||
ma <<= 1;
|
||||
if (ma >= mt) {
|
||||
gpus[gpu].max_alloc = ma;
|
||||
applog(LOG_DEBUG, "Max alloc decreased to %lu", gpus[gpu].max_alloc);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -776,8 +794,8 @@ built:
|
||||
size_t ipt = (1024 / gpus[gpu].lookup_gap + (1024 % gpus[gpu].lookup_gap > 0));
|
||||
size_t bufsize = 128 * ipt * gpus[gpu].thread_concurrency;
|
||||
|
||||
/* Always allocate the largest possible buffer allowed, even if we're not initially requiring it
|
||||
* based on thread_concurrency, giving us some headroom for intensity levels. */
|
||||
/* Use the max alloc value which has been rounded to a power of
|
||||
* 2 greater >= required amount earlier */
|
||||
if (bufsize > gpus[gpu].max_alloc) {
|
||||
applog(LOG_WARNING, "Maximum buffer memory device %d supports says %u, your scrypt settings come to %u",
|
||||
gpu, gpus[gpu].max_alloc, bufsize);
|
||||
|
Loading…
x
Reference in New Issue
Block a user