/* * Copyright 2008-2012 NVIDIA Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include #include #include #include #include #include #include #include #include #include #include namespace thrust { namespace system { namespace cuda { namespace detail { namespace detail { template struct last_index_in_each_interval : public thrust::unary_function { typedef typename Decomposition::index_type index_type; Decomposition decomp; last_index_in_each_interval(Decomposition decomp) : decomp(decomp) {} __host__ __device__ index_type operator()(index_type interval) { return decomp[interval].end() - 1; } }; template struct adjacent_difference_closure { InputIterator1 input; InputIterator2 input_copy; OutputIterator output; BinaryFunction binary_op; Decomposition decomp; Context context; typedef Context context_type; adjacent_difference_closure(InputIterator1 input, InputIterator2 input_copy, OutputIterator output, BinaryFunction binary_op, Decomposition decomp, Context context = Context()) : input(input), input_copy(input_copy), output(output), binary_op(binary_op), decomp(decomp), context(context) {} __device__ __thrust_forceinline__ void operator()(void) { typedef typename thrust::iterator_value::type InputType; typedef typename Decomposition::index_type index_type; // this block processes results in [range.begin(), range.end()) thrust::system::detail::internal::index_range range = decomp[context.block_index()]; input_copy += context.block_index() - 1; // prime the temp values for all threads so we don't need to launch a default constructor InputType next_left = (context.block_index() == 0) ? *input : *input_copy; index_type base = range.begin(); index_type i = range.begin() + context.thread_index(); if (i < range.end()) { if (context.thread_index() > 0) { InputIterator1 temp = input + (i - 1); next_left = *temp; } } input += i; output += i; while (base < range.end()) { InputType curr_left = next_left; if (i + context.block_dimension() < range.end()) { InputIterator1 temp = input + (context.block_dimension() - 1); next_left = *temp; } context.barrier(); if (i < range.end()) { if (i == 0) *output = *input; else { InputType x = *input; *output = binary_op(x, curr_left); } } i += context.block_dimension(); base += context.block_dimension(); input += context.block_dimension(); output += context.block_dimension(); } } }; } // end namespace detail __THRUST_DISABLE_MSVC_POSSIBLE_LOSS_OF_DATA_WARNING_BEGIN template OutputIterator adjacent_difference(execution_policy &exec, InputIterator first, InputIterator last, OutputIterator result, BinaryFunction binary_op) { typedef typename thrust::iterator_value::type InputType; typedef typename thrust::iterator_difference::type IndexType; typedef thrust::system::detail::internal::uniform_decomposition Decomposition; IndexType n = last - first; if (n == 0) return result; Decomposition decomp = default_decomposition(last - first); // allocate temporary storage thrust::detail::temporary_array temp(exec, decomp.size() - 1); // gather last value in each interval detail::last_index_in_each_interval unary_op(decomp); thrust::gather(exec, thrust::make_transform_iterator(thrust::counting_iterator(0), unary_op), thrust::make_transform_iterator(thrust::counting_iterator(0), unary_op) + (decomp.size() - 1), first, temp.begin()); typedef typename thrust::detail::temporary_array::iterator InputIterator2; typedef detail::blocked_thread_array Context; typedef detail::adjacent_difference_closure Closure; Closure closure(first, temp.begin(), result, binary_op, decomp); detail::launch_closure(closure, decomp.size()); return result + n; } __THRUST_DISABLE_MSVC_POSSIBLE_LOSS_OF_DATA_WARNING_END } // end namespace detail } // end namespace cuda } // end namespace system } // end namespace thrust