ccminer/compat/thrust/system/detail/generic/scan_by_key.inl

/*
 *  Copyright 2008-2012 NVIDIA Corporation
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 */


#include <thrust/detail/config.h>
#include <thrust/system/detail/generic/scan_by_key.h>
#include <thrust/functional.h>
#include <thrust/transform.h>
#include <thrust/replace.h>
#include <thrust/iterator/zip_iterator.h>
#include <thrust/iterator/iterator_traits.h>
#include <thrust/detail/temporary_array.h>
#include <thrust/detail/internal_functional.h>
#include <thrust/scan.h>

namespace thrust
{
namespace system
{
namespace detail
{
namespace generic
{
namespace detail
{

template <typename OutputType, typename HeadFlagType, typename AssociativeOperator>
struct segmented_scan_functor
{
    AssociativeOperator binary_op;

    typedef typename thrust::tuple<OutputType, HeadFlagType> result_type;

    __host__ __device__
    segmented_scan_functor(AssociativeOperator _binary_op) : binary_op(_binary_op) {}

    __host__ __device__
    result_type operator()(result_type a, result_type b)
    {
        return result_type(thrust::get<1>(b) ? thrust::get<0>(b) : binary_op(thrust::get<0>(a), thrust::get<0>(b)),
                           thrust::get<1>(a) | thrust::get<1>(b));
    }
};

} // end namespace detail


template<typename DerivedPolicy,
         typename InputIterator1,
         typename InputIterator2,
         typename OutputIterator>
  OutputIterator inclusive_scan_by_key(thrust::execution_policy<DerivedPolicy> &exec,
                                       InputIterator1 first1,
                                       InputIterator1 last1,
                                       InputIterator2 first2,
                                       OutputIterator result)
{
  typedef typename thrust::iterator_traits<InputIterator1>::value_type InputType1;
  return thrust::inclusive_scan_by_key(exec, first1, last1, first2, result, thrust::equal_to<InputType1>());
}


template<typename DerivedPolicy,
         typename InputIterator1,
         typename InputIterator2,
         typename OutputIterator,
         typename BinaryPredicate>
  OutputIterator inclusive_scan_by_key(thrust::execution_policy<DerivedPolicy> &exec,
                                       InputIterator1 first1,
                                       InputIterator1 last1,
                                       InputIterator2 first2,
                                       OutputIterator result,
                                       BinaryPredicate binary_pred)
{
  typedef typename thrust::iterator_traits<OutputIterator>::value_type OutputType;
  return thrust::inclusive_scan_by_key(exec, first1, last1, first2, result, binary_pred, thrust::plus<OutputType>());
}


template<typename DerivedPolicy,
         typename InputIterator1,
         typename InputIterator2,
         typename OutputIterator,
         typename BinaryPredicate,
         typename AssociativeOperator>
  OutputIterator inclusive_scan_by_key(thrust::execution_policy<DerivedPolicy> &exec,
                                       InputIterator1 first1,
                                       InputIterator1 last1,
                                       InputIterator2 first2,
                                       OutputIterator result,
                                       BinaryPredicate binary_pred,
                                       AssociativeOperator binary_op)
{
  typedef typename thrust::iterator_traits<OutputIterator>::value_type OutputType;
  typedef unsigned int HeadFlagType;

  const size_t n = last1 - first1;

  if(n != 0)
  {
    // compute head flags
    thrust::detail::temporary_array<HeadFlagType,DerivedPolicy> flags(exec, n);
    flags[0] = 1; thrust::transform(exec, first1, last1 - 1, first1 + 1, flags.begin() + 1, thrust::detail::not2(binary_pred));

    // scan key-flag tuples, 
    // For additional details refer to Section 2 of the following paper
    //    S. Sengupta, M. Harris, and M. Garland. "Efficient parallel scan algorithms for GPUs"
    //    NVIDIA Technical Report NVR-2008-003, December 2008
    //    http://mgarland.org/files/papers/nvr-2008-003.pdf
    thrust::inclusive_scan
        (exec,
         thrust::make_zip_iterator(thrust::make_tuple(first2, flags.begin())),
         thrust::make_zip_iterator(thrust::make_tuple(first2, flags.begin())) + n,
         thrust::make_zip_iterator(thrust::make_tuple(result, flags.begin())),
         detail::segmented_scan_functor<OutputType, HeadFlagType, AssociativeOperator>(binary_op));
  }

  return result + n;
}


template<typename DerivedPolicy,
         typename InputIterator1,
         typename InputIterator2,
         typename OutputIterator>
  OutputIterator exclusive_scan_by_key(thrust::execution_policy<DerivedPolicy> &exec,
                                       InputIterator1 first1,
                                       InputIterator1 last1,
                                       InputIterator2 first2,
                                       OutputIterator result)
{
  typedef typename thrust::iterator_traits<OutputIterator>::value_type OutputType;
  return thrust::exclusive_scan_by_key(exec, first1, last1, first2, result, OutputType(0));
}


template<typename DerivedPolicy,
         typename InputIterator1,
         typename InputIterator2,
         typename OutputIterator,
         typename T>
  OutputIterator exclusive_scan_by_key(thrust::execution_policy<DerivedPolicy> &exec,
                                       InputIterator1 first1,
                                       InputIterator1 last1,
                                       InputIterator2 first2,
                                       OutputIterator result,
                                       T init)
{
  typedef typename thrust::iterator_traits<InputIterator1>::value_type InputType1;
  return thrust::exclusive_scan_by_key(exec, first1, last1, first2, result, init, thrust::equal_to<InputType1>());
}


template<typename DerivedPolicy,
         typename InputIterator1,
         typename InputIterator2,
         typename OutputIterator,
         typename T,
         typename BinaryPredicate>
  OutputIterator exclusive_scan_by_key(thrust::execution_policy<DerivedPolicy> &exec,
                                       InputIterator1 first1,
                                       InputIterator1 last1,
                                       InputIterator2 first2,
                                       OutputIterator result,
                                       T init,
                                       BinaryPredicate binary_pred)
{
  typedef typename thrust::iterator_traits<OutputIterator>::value_type OutputType;
  return thrust::exclusive_scan_by_key(exec, first1, last1, first2, result, init, binary_pred, thrust::plus<OutputType>());
}


template<typename DerivedPolicy,
         typename InputIterator1,
         typename InputIterator2,
         typename OutputIterator,
         typename T,
         typename BinaryPredicate,
         typename AssociativeOperator>
  OutputIterator exclusive_scan_by_key(thrust::execution_policy<DerivedPolicy> &exec,
                                       InputIterator1 first1,
                                       InputIterator1 last1,
                                       InputIterator2 first2,
                                       OutputIterator result,
                                       T init,
                                       BinaryPredicate binary_pred,
                                       AssociativeOperator binary_op)
{
  typedef typename thrust::iterator_traits<OutputIterator>::value_type OutputType;
  typedef unsigned int HeadFlagType;

  const size_t n = last1 - first1;

  if(n != 0)
  {
    InputIterator2 last2 = first2 + n;

    // compute head flags
    thrust::detail::temporary_array<HeadFlagType,DerivedPolicy> flags(exec, n);
    flags[0] = 1; thrust::transform(exec, first1, last1 - 1, first1 + 1, flags.begin() + 1, thrust::detail::not2(binary_pred));

    // shift input one to the right and initialize segments with init
    thrust::detail::temporary_array<OutputType,DerivedPolicy> temp(exec, n);
    thrust::replace_copy_if(exec, first2, last2 - 1, flags.begin() + 1, temp.begin() + 1, thrust::negate<HeadFlagType>(), init);
    temp[0] = init;

    // scan key-flag tuples, 
    // For additional details refer to Section 2 of the following paper
    //    S. Sengupta, M. Harris, and M. Garland. "Efficient parallel scan algorithms for GPUs"
    //    NVIDIA Technical Report NVR-2008-003, December 2008
    //    http://mgarland.org/files/papers/nvr-2008-003.pdf
    thrust::inclusive_scan(exec,
                           thrust::make_zip_iterator(thrust::make_tuple(temp.begin(), flags.begin())),
                           thrust::make_zip_iterator(thrust::make_tuple(temp.begin(), flags.begin())) + n,
                           thrust::make_zip_iterator(thrust::make_tuple(result,       flags.begin())),
                           detail::segmented_scan_functor<OutputType, HeadFlagType, AssociativeOperator>(binary_op));
  }

  return result + n;
}

} // end namespace generic
} // end namespace detail
} // end namespace system
} // end namespace thrust
commit initial version 0.1 11 years ago			`/*`
			`* Copyright 2008-2012 NVIDIA Corporation`
			`*`
			`* Licensed under the Apache License, Version 2.0 (the "License");`
			`* you may not use this file except in compliance with the License.`
			`* You may obtain a copy of the License at`
			`*`
			`* http://www.apache.org/licenses/LICENSE-2.0`
			`*`
			`* Unless required by applicable law or agreed to in writing, software`
			`* distributed under the License is distributed on an "AS IS" BASIS,`
			`* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`* See the License for the specific language governing permissions and`
			`* limitations under the License.`
			`*/`


			`#include <thrust/detail/config.h>`
			`#include <thrust/system/detail/generic/scan_by_key.h>`
			`#include <thrust/functional.h>`
			`#include <thrust/transform.h>`
			`#include <thrust/replace.h>`
			`#include <thrust/iterator/zip_iterator.h>`
			`#include <thrust/iterator/iterator_traits.h>`
			`#include <thrust/detail/temporary_array.h>`
			`#include <thrust/detail/internal_functional.h>`
			`#include <thrust/scan.h>`

			`namespace thrust`
			`{`
			`namespace system`
			`{`
			`namespace detail`
			`{`
			`namespace generic`
			`{`
			`namespace detail`
			`{`

			`template <typename OutputType, typename HeadFlagType, typename AssociativeOperator>`
			`struct segmented_scan_functor`
			`{`
			`AssociativeOperator binary_op;`

			`typedef typename thrust::tuple<OutputType, HeadFlagType> result_type;`

			`__host__ __device__`
			`segmented_scan_functor(AssociativeOperator _binary_op) : binary_op(_binary_op) {}`

			`__host__ __device__`
			`result_type operator()(result_type a, result_type b)`
			`{`
			`return result_type(thrust::get<1>(b) ? thrust::get<0>(b) : binary_op(thrust::get<0>(a), thrust::get<0>(b)),`
			`thrust::get<1>(a) \| thrust::get<1>(b));`
			`}`
			`};`

			`} // end namespace detail`


			`template<typename DerivedPolicy,`
			`typename InputIterator1,`
			`typename InputIterator2,`
			`typename OutputIterator>`
			`OutputIterator inclusive_scan_by_key(thrust::execution_policy<DerivedPolicy> &exec,`
			`InputIterator1 first1,`
			`InputIterator1 last1,`
			`InputIterator2 first2,`
			`OutputIterator result)`
			`{`
			`typedef typename thrust::iterator_traits<InputIterator1>::value_type InputType1;`
			`return thrust::inclusive_scan_by_key(exec, first1, last1, first2, result, thrust::equal_to<InputType1>());`
			`}`


			`template<typename DerivedPolicy,`
			`typename InputIterator1,`
			`typename InputIterator2,`
			`typename OutputIterator,`
			`typename BinaryPredicate>`
			`OutputIterator inclusive_scan_by_key(thrust::execution_policy<DerivedPolicy> &exec,`
			`InputIterator1 first1,`
			`InputIterator1 last1,`
			`InputIterator2 first2,`
			`OutputIterator result,`
			`BinaryPredicate binary_pred)`
			`{`
			`typedef typename thrust::iterator_traits<OutputIterator>::value_type OutputType;`
			`return thrust::inclusive_scan_by_key(exec, first1, last1, first2, result, binary_pred, thrust::plus<OutputType>());`
			`}`


			`template<typename DerivedPolicy,`
			`typename InputIterator1,`
			`typename InputIterator2,`
			`typename OutputIterator,`
			`typename BinaryPredicate,`
			`typename AssociativeOperator>`
			`OutputIterator inclusive_scan_by_key(thrust::execution_policy<DerivedPolicy> &exec,`
			`InputIterator1 first1,`
			`InputIterator1 last1,`
			`InputIterator2 first2,`
			`OutputIterator result,`
			`BinaryPredicate binary_pred,`
			`AssociativeOperator binary_op)`
			`{`
			`typedef typename thrust::iterator_traits<OutputIterator>::value_type OutputType;`
			`typedef unsigned int HeadFlagType;`

			`const size_t n = last1 - first1;`

			`if(n != 0)`
			`{`
			`// compute head flags`
			`thrust::detail::temporary_array<HeadFlagType,DerivedPolicy> flags(exec, n);`
			`flags[0] = 1; thrust::transform(exec, first1, last1 - 1, first1 + 1, flags.begin() + 1, thrust::detail::not2(binary_pred));`

			`// scan key-flag tuples,`
			`// For additional details refer to Section 2 of the following paper`
			`// S. Sengupta, M. Harris, and M. Garland. "Efficient parallel scan algorithms for GPUs"`
			`// NVIDIA Technical Report NVR-2008-003, December 2008`
			`// http://mgarland.org/files/papers/nvr-2008-003.pdf`
			`thrust::inclusive_scan`
			`(exec,`
			`thrust::make_zip_iterator(thrust::make_tuple(first2, flags.begin())),`
			`thrust::make_zip_iterator(thrust::make_tuple(first2, flags.begin())) + n,`
			`thrust::make_zip_iterator(thrust::make_tuple(result, flags.begin())),`
			`detail::segmented_scan_functor<OutputType, HeadFlagType, AssociativeOperator>(binary_op));`
			`}`

			`return result + n;`
			`}`


			`template<typename DerivedPolicy,`
			`typename InputIterator1,`
			`typename InputIterator2,`
			`typename OutputIterator>`
			`OutputIterator exclusive_scan_by_key(thrust::execution_policy<DerivedPolicy> &exec,`
			`InputIterator1 first1,`
			`InputIterator1 last1,`
			`InputIterator2 first2,`
			`OutputIterator result)`
			`{`
			`typedef typename thrust::iterator_traits<OutputIterator>::value_type OutputType;`
			`return thrust::exclusive_scan_by_key(exec, first1, last1, first2, result, OutputType(0));`
			`}`


			`template<typename DerivedPolicy,`
			`typename InputIterator1,`
			`typename InputIterator2,`
			`typename OutputIterator,`
			`typename T>`
			`OutputIterator exclusive_scan_by_key(thrust::execution_policy<DerivedPolicy> &exec,`
			`InputIterator1 first1,`
			`InputIterator1 last1,`
			`InputIterator2 first2,`
			`OutputIterator result,`
			`T init)`
			`{`
			`typedef typename thrust::iterator_traits<InputIterator1>::value_type InputType1;`
			`return thrust::exclusive_scan_by_key(exec, first1, last1, first2, result, init, thrust::equal_to<InputType1>());`
			`}`


			`template<typename DerivedPolicy,`
			`typename InputIterator1,`
			`typename InputIterator2,`
			`typename OutputIterator,`
			`typename T,`
			`typename BinaryPredicate>`
			`OutputIterator exclusive_scan_by_key(thrust::execution_policy<DerivedPolicy> &exec,`
			`InputIterator1 first1,`
			`InputIterator1 last1,`
			`InputIterator2 first2,`
			`OutputIterator result,`
			`T init,`
			`BinaryPredicate binary_pred)`
			`{`
			`typedef typename thrust::iterator_traits<OutputIterator>::value_type OutputType;`
			`return thrust::exclusive_scan_by_key(exec, first1, last1, first2, result, init, binary_pred, thrust::plus<OutputType>());`
			`}`


			`template<typename DerivedPolicy,`
			`typename InputIterator1,`
			`typename InputIterator2,`
			`typename OutputIterator,`
			`typename T,`
			`typename BinaryPredicate,`
			`typename AssociativeOperator>`
			`OutputIterator exclusive_scan_by_key(thrust::execution_policy<DerivedPolicy> &exec,`
			`InputIterator1 first1,`
			`InputIterator1 last1,`
			`InputIterator2 first2,`
			`OutputIterator result,`
			`T init,`
			`BinaryPredicate binary_pred,`
			`AssociativeOperator binary_op)`
			`{`
			`typedef typename thrust::iterator_traits<OutputIterator>::value_type OutputType;`
			`typedef unsigned int HeadFlagType;`

			`const size_t n = last1 - first1;`

			`if(n != 0)`
			`{`
			`InputIterator2 last2 = first2 + n;`

			`// compute head flags`
			`thrust::detail::temporary_array<HeadFlagType,DerivedPolicy> flags(exec, n);`
			`flags[0] = 1; thrust::transform(exec, first1, last1 - 1, first1 + 1, flags.begin() + 1, thrust::detail::not2(binary_pred));`

			`// shift input one to the right and initialize segments with init`
			`thrust::detail::temporary_array<OutputType,DerivedPolicy> temp(exec, n);`
			`thrust::replace_copy_if(exec, first2, last2 - 1, flags.begin() + 1, temp.begin() + 1, thrust::negate<HeadFlagType>(), init);`
			`temp[0] = init;`

			`// scan key-flag tuples,`
			`// For additional details refer to Section 2 of the following paper`
			`// S. Sengupta, M. Harris, and M. Garland. "Efficient parallel scan algorithms for GPUs"`
			`// NVIDIA Technical Report NVR-2008-003, December 2008`
			`// http://mgarland.org/files/papers/nvr-2008-003.pdf`
			`thrust::inclusive_scan(exec,`
			`thrust::make_zip_iterator(thrust::make_tuple(temp.begin(), flags.begin())),`
			`thrust::make_zip_iterator(thrust::make_tuple(temp.begin(), flags.begin())) + n,`
			`thrust::make_zip_iterator(thrust::make_tuple(result, flags.begin())),`
			`detail::segmented_scan_functor<OutputType, HeadFlagType, AssociativeOperator>(binary_op));`
			`}`

			`return result + n;`
			`}`

			`} // end namespace generic`
			`} // end namespace detail`
			`} // end namespace system`
			`} // end namespace thrust`