GOSTCoin CUDA miner project, compatible with most nvidia cards, containing only gostd algo
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

151 lines
3.8 KiB

/*
* Copyright 2008-2012 NVIDIA Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*! \file odd_even_sort.h
* \brief Block versions of Batcher's Odd-Even Merge Sort
*/
#pragma once
namespace thrust
{
namespace system
{
namespace cuda
{
namespace detail
{
namespace block
{
/*! Block-wise implementation of Batcher's Odd-Even Merge Sort
* This implementation is based on Nadathur Satish's.
*/
template<typename KeyType,
typename ValueType,
typename StrictWeakOrdering>
__device__ void odd_even_sort(KeyType *keys,
ValueType *data,
const unsigned int n,
StrictWeakOrdering comp)
{
for(unsigned int p = blockDim.x>>1; p > 0; p >>= 1)
{
unsigned int q = blockDim.x>>1, r = 0, d = p;
while(q >= p)
{
unsigned int j = threadIdx.x + d;
// if j lies beyond the end of the array, we consider it "sorted" wrt i
// regardless of whether i lies beyond the end of the array
if(threadIdx.x < (blockDim.x-d) && (threadIdx.x & p) == r && j < n)
{
KeyType xikey = keys[threadIdx.x];
KeyType xjkey = keys[j];
ValueType xivalue = data[threadIdx.x];
ValueType xjvalue = data[j];
// does xj sort before xi?
if(comp(xjkey, xikey))
{
keys[threadIdx.x] = xjkey;
keys[j] = xikey;
data[threadIdx.x] = xjvalue;
data[j] = xivalue;
} // end if
} // end if
d = q - p;
q >>= 1;
r = p;
__syncthreads();
} // end while
} // end for p
} // end odd_even_sort()
template<typename KeyType,
typename ValueType,
typename StrictWeakOrdering>
__device__ void stable_odd_even_sort(KeyType *keys,
ValueType *data,
const unsigned int n,
StrictWeakOrdering comp)
{
for(unsigned int i = 0;
i < blockDim.x>>1;
++i)
{
bool thread_is_odd = threadIdx.x & 0x1;
// do odds first
if(thread_is_odd && threadIdx.x + 1 < n)
{
KeyType xikey = keys[threadIdx.x];
KeyType xjkey = keys[threadIdx.x + 1];
ValueType xivalue = data[threadIdx.x];
ValueType xjvalue = data[threadIdx.x + 1];
// does xj sort before xi?
if(comp(xjkey, xikey))
{
keys[threadIdx.x] = xjkey;
keys[threadIdx.x + 1] = xikey;
data[threadIdx.x] = xjvalue;
data[threadIdx.x + 1] = xivalue;
} // end if
} // end if
__syncthreads();
// do evens second
if(!thread_is_odd && threadIdx.x + 1 < n)
{
KeyType xikey = keys[threadIdx.x];
KeyType xjkey = keys[threadIdx.x + 1];
ValueType xivalue = data[threadIdx.x];
ValueType xjvalue = data[threadIdx.x + 1];
// does xj sort before xi?
if(comp(xjkey, xikey))
{
keys[threadIdx.x] = xjkey;
keys[threadIdx.x + 1] = xikey;
data[threadIdx.x] = xjvalue;
data[threadIdx.x + 1] = xivalue;
} // end if
} // end if
__syncthreads();
} // end for i
} // end stable_odd_even_sort()
} // end namespace block
} // end namespace detail
} // end namespace cuda
} // end namespace system
} // end namespace thrust