ccminer-gostd-lite/compat/thrust/system/cuda/detail/adjacent_difference.inl

198 lines
6.1 KiB
Plaintext
Raw Normal View History

2014-03-18 22:17:40 +01:00
/*
* Copyright 2008-2012 NVIDIA Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <thrust/detail/config.h>
#include <thrust/gather.h>
#include <thrust/functional.h>
#include <thrust/iterator/iterator_traits.h>
#include <thrust/iterator/counting_iterator.h>
#include <thrust/iterator/transform_iterator.h>
#include <thrust/detail/temporary_array.h>
#include <thrust/system/detail/internal/decompose.h>
#include <thrust/system/cuda/detail/default_decomposition.h>
#include <thrust/system/cuda/detail/detail/launch_closure.h>
#include <thrust/system/cuda/detail/detail/launch_calculator.h>
#include <thrust/system/cuda/detail/execution_policy.h>
namespace thrust
{
namespace system
{
namespace cuda
{
namespace detail
{
namespace detail
{
template <typename Decomposition>
struct last_index_in_each_interval : public thrust::unary_function<typename Decomposition::index_type, typename Decomposition::index_type>
{
typedef typename Decomposition::index_type index_type;
Decomposition decomp;
last_index_in_each_interval(Decomposition decomp) : decomp(decomp) {}
__host__ __device__
index_type operator()(index_type interval)
{
return decomp[interval].end() - 1;
}
};
template <typename InputIterator1,
typename InputIterator2,
typename OutputIterator,
typename BinaryFunction,
typename Decomposition,
typename Context>
struct adjacent_difference_closure
{
InputIterator1 input;
InputIterator2 input_copy;
OutputIterator output;
BinaryFunction binary_op;
Decomposition decomp;
Context context;
typedef Context context_type;
adjacent_difference_closure(InputIterator1 input,
InputIterator2 input_copy,
OutputIterator output,
BinaryFunction binary_op,
Decomposition decomp,
Context context = Context())
: input(input), input_copy(input_copy), output(output), binary_op(binary_op), decomp(decomp), context(context) {}
__device__ __thrust_forceinline__
void operator()(void)
{
typedef typename thrust::iterator_value<InputIterator1>::type InputType;
typedef typename Decomposition::index_type index_type;
// this block processes results in [range.begin(), range.end())
thrust::system::detail::internal::index_range<index_type> range = decomp[context.block_index()];
input_copy += context.block_index() - 1;
// prime the temp values for all threads so we don't need to launch a default constructor
InputType next_left = (context.block_index() == 0) ? *input : *input_copy;
index_type base = range.begin();
index_type i = range.begin() + context.thread_index();
if (i < range.end())
{
if (context.thread_index() > 0)
{
InputIterator1 temp = input + (i - 1);
next_left = *temp;
}
}
input += i;
output += i;
while (base < range.end())
{
InputType curr_left = next_left;
if (i + context.block_dimension() < range.end())
{
InputIterator1 temp = input + (context.block_dimension() - 1);
next_left = *temp;
}
context.barrier();
if (i < range.end())
{
if (i == 0)
*output = *input;
else
{
InputType x = *input;
*output = binary_op(x, curr_left);
}
}
i += context.block_dimension();
base += context.block_dimension();
input += context.block_dimension();
output += context.block_dimension();
}
}
};
} // end namespace detail
__THRUST_DISABLE_MSVC_POSSIBLE_LOSS_OF_DATA_WARNING_BEGIN
template <typename DerivedPolicy,
typename InputIterator,
typename OutputIterator,
typename BinaryFunction>
OutputIterator adjacent_difference(execution_policy<DerivedPolicy> &exec,
InputIterator first, InputIterator last,
OutputIterator result,
BinaryFunction binary_op)
{
typedef typename thrust::iterator_value<InputIterator>::type InputType;
typedef typename thrust::iterator_difference<InputIterator>::type IndexType;
typedef thrust::system::detail::internal::uniform_decomposition<IndexType> Decomposition;
IndexType n = last - first;
if (n == 0)
return result;
Decomposition decomp = default_decomposition(last - first);
// allocate temporary storage
thrust::detail::temporary_array<InputType,DerivedPolicy> temp(exec, decomp.size() - 1);
// gather last value in each interval
detail::last_index_in_each_interval<Decomposition> unary_op(decomp);
thrust::gather(exec,
thrust::make_transform_iterator(thrust::counting_iterator<IndexType>(0), unary_op),
thrust::make_transform_iterator(thrust::counting_iterator<IndexType>(0), unary_op) + (decomp.size() - 1),
first,
temp.begin());
typedef typename thrust::detail::temporary_array<InputType,DerivedPolicy>::iterator InputIterator2;
typedef detail::blocked_thread_array Context;
typedef detail::adjacent_difference_closure<InputIterator,InputIterator2,OutputIterator,BinaryFunction,Decomposition,Context> Closure;
Closure closure(first, temp.begin(), result, binary_op, decomp);
detail::launch_closure(closure, decomp.size());
return result + n;
}
__THRUST_DISABLE_MSVC_POSSIBLE_LOSS_OF_DATA_WARNING_END
} // end namespace detail
} // end namespace cuda
} // end namespace system
} // end namespace thrust