You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
239 lines
9.4 KiB
239 lines
9.4 KiB
/* |
|
* Copyright 2008-2012 NVIDIA Corporation |
|
* |
|
* Licensed under the Apache License, Version 2.0 (the "License"); |
|
* you may not use this file except in compliance with the License. |
|
* You may obtain a copy of the License at |
|
* |
|
* http://www.apache.org/licenses/LICENSE-2.0 |
|
* |
|
* Unless required by applicable law or agreed to in writing, software |
|
* distributed under the License is distributed on an "AS IS" BASIS, |
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
* See the License for the specific language governing permissions and |
|
* limitations under the License. |
|
*/ |
|
|
|
|
|
#include <thrust/detail/config.h> |
|
#include <thrust/system/detail/generic/scan_by_key.h> |
|
#include <thrust/functional.h> |
|
#include <thrust/transform.h> |
|
#include <thrust/replace.h> |
|
#include <thrust/iterator/zip_iterator.h> |
|
#include <thrust/iterator/iterator_traits.h> |
|
#include <thrust/detail/temporary_array.h> |
|
#include <thrust/detail/internal_functional.h> |
|
#include <thrust/scan.h> |
|
|
|
namespace thrust |
|
{ |
|
namespace system |
|
{ |
|
namespace detail |
|
{ |
|
namespace generic |
|
{ |
|
namespace detail |
|
{ |
|
|
|
template <typename OutputType, typename HeadFlagType, typename AssociativeOperator> |
|
struct segmented_scan_functor |
|
{ |
|
AssociativeOperator binary_op; |
|
|
|
typedef typename thrust::tuple<OutputType, HeadFlagType> result_type; |
|
|
|
__host__ __device__ |
|
segmented_scan_functor(AssociativeOperator _binary_op) : binary_op(_binary_op) {} |
|
|
|
__host__ __device__ |
|
result_type operator()(result_type a, result_type b) |
|
{ |
|
return result_type(thrust::get<1>(b) ? thrust::get<0>(b) : binary_op(thrust::get<0>(a), thrust::get<0>(b)), |
|
thrust::get<1>(a) | thrust::get<1>(b)); |
|
} |
|
}; |
|
|
|
} // end namespace detail |
|
|
|
|
|
template<typename DerivedPolicy, |
|
typename InputIterator1, |
|
typename InputIterator2, |
|
typename OutputIterator> |
|
OutputIterator inclusive_scan_by_key(thrust::execution_policy<DerivedPolicy> &exec, |
|
InputIterator1 first1, |
|
InputIterator1 last1, |
|
InputIterator2 first2, |
|
OutputIterator result) |
|
{ |
|
typedef typename thrust::iterator_traits<InputIterator1>::value_type InputType1; |
|
return thrust::inclusive_scan_by_key(exec, first1, last1, first2, result, thrust::equal_to<InputType1>()); |
|
} |
|
|
|
|
|
template<typename DerivedPolicy, |
|
typename InputIterator1, |
|
typename InputIterator2, |
|
typename OutputIterator, |
|
typename BinaryPredicate> |
|
OutputIterator inclusive_scan_by_key(thrust::execution_policy<DerivedPolicy> &exec, |
|
InputIterator1 first1, |
|
InputIterator1 last1, |
|
InputIterator2 first2, |
|
OutputIterator result, |
|
BinaryPredicate binary_pred) |
|
{ |
|
typedef typename thrust::iterator_traits<OutputIterator>::value_type OutputType; |
|
return thrust::inclusive_scan_by_key(exec, first1, last1, first2, result, binary_pred, thrust::plus<OutputType>()); |
|
} |
|
|
|
|
|
template<typename DerivedPolicy, |
|
typename InputIterator1, |
|
typename InputIterator2, |
|
typename OutputIterator, |
|
typename BinaryPredicate, |
|
typename AssociativeOperator> |
|
OutputIterator inclusive_scan_by_key(thrust::execution_policy<DerivedPolicy> &exec, |
|
InputIterator1 first1, |
|
InputIterator1 last1, |
|
InputIterator2 first2, |
|
OutputIterator result, |
|
BinaryPredicate binary_pred, |
|
AssociativeOperator binary_op) |
|
{ |
|
typedef typename thrust::iterator_traits<OutputIterator>::value_type OutputType; |
|
typedef unsigned int HeadFlagType; |
|
|
|
const size_t n = last1 - first1; |
|
|
|
if(n != 0) |
|
{ |
|
// compute head flags |
|
thrust::detail::temporary_array<HeadFlagType,DerivedPolicy> flags(exec, n); |
|
flags[0] = 1; thrust::transform(exec, first1, last1 - 1, first1 + 1, flags.begin() + 1, thrust::detail::not2(binary_pred)); |
|
|
|
// scan key-flag tuples, |
|
// For additional details refer to Section 2 of the following paper |
|
// S. Sengupta, M. Harris, and M. Garland. "Efficient parallel scan algorithms for GPUs" |
|
// NVIDIA Technical Report NVR-2008-003, December 2008 |
|
// http://mgarland.org/files/papers/nvr-2008-003.pdf |
|
thrust::inclusive_scan |
|
(exec, |
|
thrust::make_zip_iterator(thrust::make_tuple(first2, flags.begin())), |
|
thrust::make_zip_iterator(thrust::make_tuple(first2, flags.begin())) + n, |
|
thrust::make_zip_iterator(thrust::make_tuple(result, flags.begin())), |
|
detail::segmented_scan_functor<OutputType, HeadFlagType, AssociativeOperator>(binary_op)); |
|
} |
|
|
|
return result + n; |
|
} |
|
|
|
|
|
template<typename DerivedPolicy, |
|
typename InputIterator1, |
|
typename InputIterator2, |
|
typename OutputIterator> |
|
OutputIterator exclusive_scan_by_key(thrust::execution_policy<DerivedPolicy> &exec, |
|
InputIterator1 first1, |
|
InputIterator1 last1, |
|
InputIterator2 first2, |
|
OutputIterator result) |
|
{ |
|
typedef typename thrust::iterator_traits<OutputIterator>::value_type OutputType; |
|
return thrust::exclusive_scan_by_key(exec, first1, last1, first2, result, OutputType(0)); |
|
} |
|
|
|
|
|
template<typename DerivedPolicy, |
|
typename InputIterator1, |
|
typename InputIterator2, |
|
typename OutputIterator, |
|
typename T> |
|
OutputIterator exclusive_scan_by_key(thrust::execution_policy<DerivedPolicy> &exec, |
|
InputIterator1 first1, |
|
InputIterator1 last1, |
|
InputIterator2 first2, |
|
OutputIterator result, |
|
T init) |
|
{ |
|
typedef typename thrust::iterator_traits<InputIterator1>::value_type InputType1; |
|
return thrust::exclusive_scan_by_key(exec, first1, last1, first2, result, init, thrust::equal_to<InputType1>()); |
|
} |
|
|
|
|
|
template<typename DerivedPolicy, |
|
typename InputIterator1, |
|
typename InputIterator2, |
|
typename OutputIterator, |
|
typename T, |
|
typename BinaryPredicate> |
|
OutputIterator exclusive_scan_by_key(thrust::execution_policy<DerivedPolicy> &exec, |
|
InputIterator1 first1, |
|
InputIterator1 last1, |
|
InputIterator2 first2, |
|
OutputIterator result, |
|
T init, |
|
BinaryPredicate binary_pred) |
|
{ |
|
typedef typename thrust::iterator_traits<OutputIterator>::value_type OutputType; |
|
return thrust::exclusive_scan_by_key(exec, first1, last1, first2, result, init, binary_pred, thrust::plus<OutputType>()); |
|
} |
|
|
|
|
|
template<typename DerivedPolicy, |
|
typename InputIterator1, |
|
typename InputIterator2, |
|
typename OutputIterator, |
|
typename T, |
|
typename BinaryPredicate, |
|
typename AssociativeOperator> |
|
OutputIterator exclusive_scan_by_key(thrust::execution_policy<DerivedPolicy> &exec, |
|
InputIterator1 first1, |
|
InputIterator1 last1, |
|
InputIterator2 first2, |
|
OutputIterator result, |
|
T init, |
|
BinaryPredicate binary_pred, |
|
AssociativeOperator binary_op) |
|
{ |
|
typedef typename thrust::iterator_traits<OutputIterator>::value_type OutputType; |
|
typedef unsigned int HeadFlagType; |
|
|
|
const size_t n = last1 - first1; |
|
|
|
if(n != 0) |
|
{ |
|
InputIterator2 last2 = first2 + n; |
|
|
|
// compute head flags |
|
thrust::detail::temporary_array<HeadFlagType,DerivedPolicy> flags(exec, n); |
|
flags[0] = 1; thrust::transform(exec, first1, last1 - 1, first1 + 1, flags.begin() + 1, thrust::detail::not2(binary_pred)); |
|
|
|
// shift input one to the right and initialize segments with init |
|
thrust::detail::temporary_array<OutputType,DerivedPolicy> temp(exec, n); |
|
thrust::replace_copy_if(exec, first2, last2 - 1, flags.begin() + 1, temp.begin() + 1, thrust::negate<HeadFlagType>(), init); |
|
temp[0] = init; |
|
|
|
// scan key-flag tuples, |
|
// For additional details refer to Section 2 of the following paper |
|
// S. Sengupta, M. Harris, and M. Garland. "Efficient parallel scan algorithms for GPUs" |
|
// NVIDIA Technical Report NVR-2008-003, December 2008 |
|
// http://mgarland.org/files/papers/nvr-2008-003.pdf |
|
thrust::inclusive_scan(exec, |
|
thrust::make_zip_iterator(thrust::make_tuple(temp.begin(), flags.begin())), |
|
thrust::make_zip_iterator(thrust::make_tuple(temp.begin(), flags.begin())) + n, |
|
thrust::make_zip_iterator(thrust::make_tuple(result, flags.begin())), |
|
detail::segmented_scan_functor<OutputType, HeadFlagType, AssociativeOperator>(binary_op)); |
|
} |
|
|
|
return result + n; |
|
} |
|
|
|
} // end namespace generic |
|
} // end namespace detail |
|
} // end namespace system |
|
} // end namespace thrust |
|
|
|
|