mirror of
https://github.com/GOSTSec/ccminer
synced 2025-08-26 13:51:51 +00:00
benchmark: free last memory leaks on algo switch
remains my original lyra2 implementation to fix... (cuda_lyra2.cu) I guess some kind of memory overflow force the driver to allocate memory... but was unable to free it without device reset.
This commit is contained in:
parent
4e1e03b891
commit
c2214091ae
@ -10,14 +10,15 @@
|
|||||||
|
|
||||||
uint32_t *d_fugue256_hashoutput[MAX_GPUS];
|
uint32_t *d_fugue256_hashoutput[MAX_GPUS];
|
||||||
static uint32_t *d_resultNonce[MAX_GPUS];
|
static uint32_t *d_resultNonce[MAX_GPUS];
|
||||||
|
static unsigned int* d_textures[MAX_GPUS][8];
|
||||||
|
|
||||||
__constant__ uint32_t GPUstate[30]; // Single GPU
|
__constant__ uint32_t GPUstate[30]; // Single GPU
|
||||||
__constant__ uint32_t pTarget[8]; // Single GPU
|
__constant__ uint32_t pTarget[8]; // Single GPU
|
||||||
|
|
||||||
texture<unsigned int, 1, cudaReadModeElementType> mixTab0Tex;
|
static texture<unsigned int, 1, cudaReadModeElementType> mixTab0Tex;
|
||||||
texture<unsigned int, 1, cudaReadModeElementType> mixTab1Tex;
|
static texture<unsigned int, 1, cudaReadModeElementType> mixTab1Tex;
|
||||||
texture<unsigned int, 1, cudaReadModeElementType> mixTab2Tex;
|
static texture<unsigned int, 1, cudaReadModeElementType> mixTab2Tex;
|
||||||
texture<unsigned int, 1, cudaReadModeElementType> mixTab3Tex;
|
static texture<unsigned int, 1, cudaReadModeElementType> mixTab3Tex;
|
||||||
|
|
||||||
#if USE_SHARED
|
#if USE_SHARED
|
||||||
#define mixtab0(x) (*((uint32_t*)mixtabs + ( (x))))
|
#define mixtab0(x) (*((uint32_t*)mixtabs + ( (x))))
|
||||||
@ -707,28 +708,30 @@ fugue256_gpu_hash(int thr_id, uint32_t threads, uint32_t startNounce, void *outp
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#define texDef(texname, texmem, texsource, texsize) \
|
#define texDef(id, texname, texmem, texsource, texsize) { \
|
||||||
unsigned int *texmem; \
|
unsigned int *texmem; \
|
||||||
cudaMalloc(&texmem, texsize); \
|
cudaMalloc(&texmem, texsize); \
|
||||||
|
d_textures[thr_id][id] = texmem; \
|
||||||
cudaMemcpy(texmem, texsource, texsize, cudaMemcpyHostToDevice); \
|
cudaMemcpy(texmem, texsource, texsize, cudaMemcpyHostToDevice); \
|
||||||
texname.normalized = 0; \
|
texname.normalized = 0; \
|
||||||
texname.filterMode = cudaFilterModePoint; \
|
texname.filterMode = cudaFilterModePoint; \
|
||||||
texname.addressMode[0] = cudaAddressModeClamp; \
|
texname.addressMode[0] = cudaAddressModeClamp; \
|
||||||
{ cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<unsigned int>(); \
|
{ cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<unsigned int>(); \
|
||||||
cudaBindTexture(NULL, &texname, texmem, &channelDesc, texsize ); }
|
cudaBindTexture(NULL, &texname, texmem, &channelDesc, texsize ); \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
__host__
|
__host__
|
||||||
void fugue256_cpu_init(int thr_id, uint32_t threads)
|
void fugue256_cpu_init(int thr_id, uint32_t threads)
|
||||||
{
|
{
|
||||||
// Kopiere die Hash-Tabellen in den GPU-Speicher
|
// Kopiere die Hash-Tabellen in den GPU-Speicher
|
||||||
texDef(mixTab0Tex, mixTab0m, mixtab0_cpu, sizeof(uint32_t)*256);
|
texDef(0, mixTab0Tex, mixTab0m, mixtab0_cpu, sizeof(uint32_t)*256);
|
||||||
texDef(mixTab1Tex, mixTab1m, mixtab1_cpu, sizeof(uint32_t)*256);
|
texDef(1, mixTab1Tex, mixTab1m, mixtab1_cpu, sizeof(uint32_t)*256);
|
||||||
texDef(mixTab2Tex, mixTab2m, mixtab2_cpu, sizeof(uint32_t)*256);
|
texDef(2, mixTab2Tex, mixTab2m, mixtab2_cpu, sizeof(uint32_t)*256);
|
||||||
texDef(mixTab3Tex, mixTab3m, mixtab3_cpu, sizeof(uint32_t)*256);
|
texDef(3, mixTab3Tex, mixTab3m, mixtab3_cpu, sizeof(uint32_t)*256);
|
||||||
|
|
||||||
// Speicher für alle Ergebnisse belegen
|
// Speicher für alle Ergebnisse belegen
|
||||||
cudaMalloc(&d_fugue256_hashoutput[thr_id], 8 * sizeof(uint32_t) * threads);
|
cudaMalloc(&d_fugue256_hashoutput[thr_id], (size_t) 32 * threads);
|
||||||
cudaMalloc(&d_resultNonce[thr_id], sizeof(uint32_t));
|
cudaMalloc(&d_resultNonce[thr_id], sizeof(uint32_t));
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -737,6 +740,14 @@ void fugue256_cpu_free(int thr_id)
|
|||||||
{
|
{
|
||||||
cudaFree(d_fugue256_hashoutput[thr_id]);
|
cudaFree(d_fugue256_hashoutput[thr_id]);
|
||||||
cudaFree(d_resultNonce[thr_id]);
|
cudaFree(d_resultNonce[thr_id]);
|
||||||
|
|
||||||
|
cudaUnbindTexture(mixTab0Tex);
|
||||||
|
cudaUnbindTexture(mixTab1Tex);
|
||||||
|
cudaUnbindTexture(mixTab2Tex);
|
||||||
|
cudaUnbindTexture(mixTab3Tex);
|
||||||
|
|
||||||
|
for (int i=0; i<4; i++)
|
||||||
|
cudaFree(d_textures[thr_id][i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
__host__
|
__host__
|
||||||
|
@ -67,14 +67,14 @@ __constant__ uint32_t pTarget[8];
|
|||||||
#define T3dn(x) tex1Dfetch(t3dn2, x)
|
#define T3dn(x) tex1Dfetch(t3dn2, x)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
texture<unsigned int, 1, cudaReadModeElementType> t0up2;
|
static texture<unsigned int, 1, cudaReadModeElementType> t0up2;
|
||||||
texture<unsigned int, 1, cudaReadModeElementType> t0dn2;
|
static texture<unsigned int, 1, cudaReadModeElementType> t0dn2;
|
||||||
texture<unsigned int, 1, cudaReadModeElementType> t1up2;
|
static texture<unsigned int, 1, cudaReadModeElementType> t1up2;
|
||||||
texture<unsigned int, 1, cudaReadModeElementType> t1dn2;
|
static texture<unsigned int, 1, cudaReadModeElementType> t1dn2;
|
||||||
texture<unsigned int, 1, cudaReadModeElementType> t2up2;
|
static texture<unsigned int, 1, cudaReadModeElementType> t2up2;
|
||||||
texture<unsigned int, 1, cudaReadModeElementType> t2dn2;
|
static texture<unsigned int, 1, cudaReadModeElementType> t2dn2;
|
||||||
texture<unsigned int, 1, cudaReadModeElementType> t3up2;
|
static texture<unsigned int, 1, cudaReadModeElementType> t3up2;
|
||||||
texture<unsigned int, 1, cudaReadModeElementType> t3dn2;
|
static texture<unsigned int, 1, cudaReadModeElementType> t3dn2;
|
||||||
|
|
||||||
#define RSTT(d0, d1, a, b0, b1, b2, b3, b4, b5, b6, b7) do { \
|
#define RSTT(d0, d1, a, b0, b1, b2, b3, b4, b5, b6, b7) do { \
|
||||||
t[d0] = T0up(B32_0(a[b0])) \
|
t[d0] = T0up(B32_0(a[b0])) \
|
||||||
@ -283,6 +283,15 @@ void groestl256_cpu_init(int thr_id, uint32_t threads)
|
|||||||
__host__
|
__host__
|
||||||
void groestl256_cpu_free(int thr_id)
|
void groestl256_cpu_free(int thr_id)
|
||||||
{
|
{
|
||||||
|
cudaUnbindTexture(t0up2);
|
||||||
|
cudaUnbindTexture(t0dn2);
|
||||||
|
cudaUnbindTexture(t1up2);
|
||||||
|
cudaUnbindTexture(t1dn2);
|
||||||
|
cudaUnbindTexture(t2up2);
|
||||||
|
cudaUnbindTexture(t2dn2);
|
||||||
|
cudaUnbindTexture(t3up2);
|
||||||
|
cudaUnbindTexture(t3dn2);
|
||||||
|
|
||||||
for (int i=0; i<8; i++)
|
for (int i=0; i<8; i++)
|
||||||
cudaFree(d_textures[thr_id][i]);
|
cudaFree(d_textures[thr_id][i]);
|
||||||
|
|
||||||
|
@ -1632,6 +1632,7 @@ bool algo_switch_next(int thr_id)
|
|||||||
|
|
||||||
stats_purge_all();
|
stats_purge_all();
|
||||||
global_hashrate = 0;
|
global_hashrate = 0;
|
||||||
|
thr_hashrates[thr_id] = 0; // reset for minmax64
|
||||||
|
|
||||||
if (thr_id == 0)
|
if (thr_id == 0)
|
||||||
applog(LOG_BLUE, "Benchmark algo %s...", algo_names[algo]);
|
applog(LOG_BLUE, "Benchmark algo %s...", algo_names[algo]);
|
||||||
|
@ -433,6 +433,9 @@ void free_scrypt_jane(int thr_id)
|
|||||||
{
|
{
|
||||||
int dev_id = device_map[thr_id];
|
int dev_id = device_map[thr_id];
|
||||||
|
|
||||||
|
if (!init[thr_id])
|
||||||
|
return;
|
||||||
|
|
||||||
cudaSetDevice(dev_id);
|
cudaSetDevice(dev_id);
|
||||||
cudaDeviceSynchronize();
|
cudaDeviceSynchronize();
|
||||||
cudaDeviceReset(); // well, simple way to free ;)
|
cudaDeviceReset(); // well, simple way to free ;)
|
||||||
|
@ -692,6 +692,9 @@ void free_scrypt(int thr_id)
|
|||||||
{
|
{
|
||||||
int dev_id = device_map[thr_id];
|
int dev_id = device_map[thr_id];
|
||||||
|
|
||||||
|
if (!init[thr_id])
|
||||||
|
return;
|
||||||
|
|
||||||
// trivial way to free all...
|
// trivial way to free all...
|
||||||
cudaSetDevice(dev_id);
|
cudaSetDevice(dev_id);
|
||||||
cudaDeviceSynchronize();
|
cudaDeviceSynchronize();
|
||||||
|
6
skein.cu
6
skein.cu
@ -376,7 +376,7 @@ extern "C" int scanhash_skeincoin(int thr_id, struct work* work, uint32_t max_no
|
|||||||
if (sm5) {
|
if (sm5) {
|
||||||
skeincoin_init(thr_id);
|
skeincoin_init(thr_id);
|
||||||
} else {
|
} else {
|
||||||
cudaMalloc(&d_hash[thr_id], throughput * 64U);
|
cudaMalloc(&d_hash[thr_id], (size_t) 64 * throughput);
|
||||||
quark_skein512_cpu_init(thr_id, throughput);
|
quark_skein512_cpu_init(thr_id, throughput);
|
||||||
cuda_check_cpu_init(thr_id, throughput);
|
cuda_check_cpu_init(thr_id, throughput);
|
||||||
CUDA_SAFE_CALL(cudaDeviceSynchronize());
|
CUDA_SAFE_CALL(cudaDeviceSynchronize());
|
||||||
@ -475,7 +475,9 @@ extern "C" void free_skeincoin(int thr_id)
|
|||||||
|
|
||||||
cudaSetDevice(device_map[thr_id]);
|
cudaSetDevice(device_map[thr_id]);
|
||||||
|
|
||||||
if (!sm5) {
|
if (sm5)
|
||||||
|
skeincoin_free(thr_id);
|
||||||
|
else {
|
||||||
cudaFree(d_hash[thr_id]);
|
cudaFree(d_hash[thr_id]);
|
||||||
cuda_check_cpu_free(thr_id);
|
cuda_check_cpu_free(thr_id);
|
||||||
}
|
}
|
||||||
|
27
util.cpp
27
util.cpp
@ -1829,33 +1829,20 @@ void do_gpu_tests(void)
|
|||||||
unsigned long done;
|
unsigned long done;
|
||||||
char s[128] = { '\0' };
|
char s[128] = { '\0' };
|
||||||
uchar buf[160];
|
uchar buf[160];
|
||||||
uint32_t tgt[8] = { 0 };
|
struct work work;
|
||||||
|
memset(&work, 0, sizeof(work));
|
||||||
|
|
||||||
opt_tracegpu = true;
|
opt_tracegpu = true;
|
||||||
work_restart = (struct work_restart*) malloc(sizeof(struct work_restart));
|
work_restart = (struct work_restart*) malloc(sizeof(struct work_restart));
|
||||||
work_restart[0].restart = 1;
|
work_restart[0].restart = 1;
|
||||||
tgt[7] = 0xffff;
|
work.target[7] = 0xffff;
|
||||||
|
|
||||||
//memset(buf, 0, sizeof buf);
|
|
||||||
//scanhash_skeincoin(0, (uint32_t*)buf, tgt, 1, &done);
|
|
||||||
|
|
||||||
//memset(buf, 0, sizeof buf);
|
|
||||||
//memcpy(buf, zrtest, 80);
|
|
||||||
//scanhash_zr5(0, (uint32_t*)buf, tgt, zrtest[19]+1, &done);
|
|
||||||
|
|
||||||
//struct timeval tv;
|
//struct timeval tv;
|
||||||
//memset(buf, 0, sizeof buf);
|
//memset(work.data, 0, sizeof(work.data));
|
||||||
//scanhash_scrypt_jane(0, (uint32_t*)buf, tgt, NULL, 1, &done, &tv, &tv);
|
//scanhash_scrypt_jane(0, &work, NULL, 1, &done, &tv, &tv);
|
||||||
|
|
||||||
memset(buf, 0, sizeof buf);
|
memset(work.data, 0, sizeof(work.data));
|
||||||
scanhash_lyra2(0, (uint32_t*)buf, tgt, 1, &done);
|
scanhash_lyra2(0, &work, 1, &done);
|
||||||
|
|
||||||
//memset(buf, 0, sizeof buf);
|
|
||||||
// buf[0] = 1; buf[64] = 2; // for endian tests
|
|
||||||
//scanhash_blake256(0, (uint32_t*)buf, tgt, 1, &done, 14);
|
|
||||||
|
|
||||||
//memset(buf, 0, sizeof buf);
|
|
||||||
//scanhash_heavy(0, (uint32_t*)buf, tgt, 1, &done, 1, 84); // HEAVYCOIN_BLKHDR_SZ=84
|
|
||||||
|
|
||||||
free(work_restart);
|
free(work_restart);
|
||||||
work_restart = NULL;
|
work_restart = NULL;
|
||||||
|
@ -46,10 +46,10 @@ static unsigned int* d_textures[MAX_GPUS][4];
|
|||||||
#define mixtab2(x) (*((uint32_t*)mixtabs + (512+(x))))
|
#define mixtab2(x) (*((uint32_t*)mixtabs + (512+(x))))
|
||||||
#define mixtab3(x) (*((uint32_t*)mixtabs + (768+(x))))
|
#define mixtab3(x) (*((uint32_t*)mixtabs + (768+(x))))
|
||||||
|
|
||||||
texture<unsigned int, 1, cudaReadModeElementType> mixTab0Tex;
|
static texture<unsigned int, 1, cudaReadModeElementType> mixTab0Tex;
|
||||||
texture<unsigned int, 1, cudaReadModeElementType> mixTab1Tex;
|
static texture<unsigned int, 1, cudaReadModeElementType> mixTab1Tex;
|
||||||
texture<unsigned int, 1, cudaReadModeElementType> mixTab2Tex;
|
static texture<unsigned int, 1, cudaReadModeElementType> mixTab2Tex;
|
||||||
texture<unsigned int, 1, cudaReadModeElementType> mixTab3Tex;
|
static texture<unsigned int, 1, cudaReadModeElementType> mixTab3Tex;
|
||||||
|
|
||||||
static const uint32_t mixtab0_cpu[] = {
|
static const uint32_t mixtab0_cpu[] = {
|
||||||
SPH_C32(0x63633297), SPH_C32(0x7c7c6feb), SPH_C32(0x77775ec7),
|
SPH_C32(0x63633297), SPH_C32(0x7c7c6feb), SPH_C32(0x77775ec7),
|
||||||
@ -685,6 +685,11 @@ void x13_fugue512_cpu_init(int thr_id, uint32_t threads)
|
|||||||
__host__
|
__host__
|
||||||
void x13_fugue512_cpu_free(int thr_id)
|
void x13_fugue512_cpu_free(int thr_id)
|
||||||
{
|
{
|
||||||
|
cudaUnbindTexture(mixTab0Tex);
|
||||||
|
cudaUnbindTexture(mixTab1Tex);
|
||||||
|
cudaUnbindTexture(mixTab2Tex);
|
||||||
|
cudaUnbindTexture(mixTab3Tex);
|
||||||
|
|
||||||
for (int i=0; i<4; i++)
|
for (int i=0; i<4; i++)
|
||||||
cudaFree(d_textures[thr_id][i]);
|
cudaFree(d_textures[thr_id][i]);
|
||||||
}
|
}
|
||||||
|
@ -14,8 +14,8 @@
|
|||||||
__constant__ uint64_t c_PaddedMessage80[16]; // padded message (80 bytes + padding)
|
__constant__ uint64_t c_PaddedMessage80[16]; // padded message (80 bytes + padding)
|
||||||
__constant__ uint32_t pTarget[8];
|
__constant__ uint32_t pTarget[8];
|
||||||
|
|
||||||
uint32_t *d_wnounce[MAX_GPUS];
|
static uint32_t *d_wnounce[MAX_GPUS];
|
||||||
uint32_t *d_WNonce[MAX_GPUS];
|
static uint32_t *d_WNonce[MAX_GPUS];
|
||||||
|
|
||||||
#define USE_ALL_TABLES 1
|
#define USE_ALL_TABLES 1
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user