ccminer/compat/thrust/system/cuda/detail/runtime_introspection.inl

/*
 *  Copyright 2008-2012 NVIDIA Corporation
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 */

#include <thrust/detail/config.h>
#include <thrust/system/cuda/detail/runtime_introspection.h>
#include <thrust/system/cuda/detail/guarded_cuda_runtime_api.h>
#include <thrust/detail/util/blocking.h>
#include <thrust/detail/minmax.h>
#include <thrust/system_error.h>
#include <thrust/system/cuda/error.h>


namespace thrust
{
namespace system
{
namespace cuda
{
namespace detail
{
namespace runtime_introspection_detail
{


inline void get_device_properties(device_properties_t &p, int device_id)
{
  cudaDeviceProp properties;
  
  cudaError_t error = cudaGetDeviceProperties(&properties, device_id);
  
  if(error)
    throw thrust::system_error(error, thrust::cuda_category());

  // be careful about how this is initialized!
  device_properties_t temp = {
    properties.major,
    {
      properties.maxGridSize[0],
      properties.maxGridSize[1],
      properties.maxGridSize[2]
    },
    properties.maxThreadsPerBlock,
    properties.maxThreadsPerMultiProcessor,
    properties.minor,
    properties.multiProcessorCount,
    properties.regsPerBlock,
    properties.sharedMemPerBlock,
    properties.warpSize
  };

  p = temp;
} // end get_device_properties()


} // end runtime_introspection_detail


inline device_properties_t device_properties(int device_id)
{
  // cache the result of get_device_properties, because it is slow
  // only cache the first few devices
  static const int max_num_devices                              = 16;

  static bool properties_exist[max_num_devices]                 = {0};
  static device_properties_t device_properties[max_num_devices] = {};

  if(device_id >= max_num_devices)
  {
    device_properties_t result;
    runtime_introspection_detail::get_device_properties(result, device_id);
    return result;
  }

  if(!properties_exist[device_id])
  {
    runtime_introspection_detail::get_device_properties(device_properties[device_id], device_id);

    // disallow the compiler to move the write to properties_exist[device_id]
    // before the initialization of device_properties[device_id]
    __thrust_compiler_fence();
    
    properties_exist[device_id] = true;
  }

  return device_properties[device_id];
}

inline int current_device()
{
  int result = -1;

  cudaError_t error = cudaGetDevice(&result);

  if(error)
    throw thrust::system_error(error, thrust::cuda_category());

  if(result < 0)
    throw thrust::system_error(cudaErrorNoDevice, thrust::cuda_category());

  return result;
}

inline device_properties_t device_properties(void)
{
  return device_properties(current_device());
}

template <typename KernelFunction>
inline function_attributes_t function_attributes(KernelFunction kernel)
{
// cudaFuncGetAttributes(), used below, only exists when __CUDACC__ is defined
#ifdef __CUDACC__
  typedef void (*fun_ptr_type)();

  fun_ptr_type fun_ptr = reinterpret_cast<fun_ptr_type>(kernel);

  cudaFuncAttributes attributes;
  
  cudaError_t error = cudaFuncGetAttributes(&attributes, fun_ptr);
  
  if(error)
  {
    throw thrust::system_error(error, thrust::cuda_category());
  }

  // be careful about how this is initialized!
  function_attributes_t result = {
    attributes.constSizeBytes,
    attributes.localSizeBytes,
    attributes.maxThreadsPerBlock,
    attributes.numRegs,
    attributes.sharedSizeBytes
  };

  return result;
#else
  return function_attributes_t();
#endif // __CUDACC__
}

inline size_t compute_capability(const device_properties_t &properties)
{
  return 10 * properties.major + properties.minor;
}

inline size_t compute_capability(void)
{
  return compute_capability(device_properties());
}


} // end namespace detail
} // end namespace cuda
} // end namespace system
} // end namespace thrust
commit initial version 0.1 11 years ago			`/*`
			`* Copyright 2008-2012 NVIDIA Corporation`
			`*`
			`* Licensed under the Apache License, Version 2.0 (the "License");`
			`* you may not use this file except in compliance with the License.`
			`* You may obtain a copy of the License at`
			`*`
			`* http://www.apache.org/licenses/LICENSE-2.0`
			`*`
			`* Unless required by applicable law or agreed to in writing, software`
			`* distributed under the License is distributed on an "AS IS" BASIS,`
			`* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`* See the License for the specific language governing permissions and`
			`* limitations under the License.`
			`*/`

			`#include <thrust/detail/config.h>`
			`#include <thrust/system/cuda/detail/runtime_introspection.h>`
			`#include <thrust/system/cuda/detail/guarded_cuda_runtime_api.h>`
			`#include <thrust/detail/util/blocking.h>`
			`#include <thrust/detail/minmax.h>`
			`#include <thrust/system_error.h>`
			`#include <thrust/system/cuda/error.h>`


			`namespace thrust`
			`{`
			`namespace system`
			`{`
			`namespace cuda`
			`{`
			`namespace detail`
			`{`
			`namespace runtime_introspection_detail`
			`{`


			`inline void get_device_properties(device_properties_t &p, int device_id)`
			`{`
			`cudaDeviceProp properties;`

			`cudaError_t error = cudaGetDeviceProperties(&properties, device_id);`

			`if(error)`
			`throw thrust::system_error(error, thrust::cuda_category());`

			`// be careful about how this is initialized!`
			`device_properties_t temp = {`
			`properties.major,`
			`{`
			`properties.maxGridSize[0],`
			`properties.maxGridSize[1],`
			`properties.maxGridSize[2]`
			`},`
			`properties.maxThreadsPerBlock,`
			`properties.maxThreadsPerMultiProcessor,`
			`properties.minor,`
			`properties.multiProcessorCount,`
			`properties.regsPerBlock,`
			`properties.sharedMemPerBlock,`
			`properties.warpSize`
			`};`

			`p = temp;`
			`} // end get_device_properties()`


			`} // end runtime_introspection_detail`


			`inline device_properties_t device_properties(int device_id)`
			`{`
			`// cache the result of get_device_properties, because it is slow`
			`// only cache the first few devices`
			`static const int max_num_devices = 16;`

			`static bool properties_exist[max_num_devices] = {0};`
			`static device_properties_t device_properties[max_num_devices] = {};`

			`if(device_id >= max_num_devices)`
			`{`
			`device_properties_t result;`
			`runtime_introspection_detail::get_device_properties(result, device_id);`
			`return result;`
			`}`

			`if(!properties_exist[device_id])`
			`{`
			`runtime_introspection_detail::get_device_properties(device_properties[device_id], device_id);`

			`// disallow the compiler to move the write to properties_exist[device_id]`
			`// before the initialization of device_properties[device_id]`
			`__thrust_compiler_fence();`

			`properties_exist[device_id] = true;`
			`}`

			`return device_properties[device_id];`
			`}`

			`inline int current_device()`
			`{`
			`int result = -1;`

			`cudaError_t error = cudaGetDevice(&result);`

			`if(error)`
			`throw thrust::system_error(error, thrust::cuda_category());`

			`if(result < 0)`
			`throw thrust::system_error(cudaErrorNoDevice, thrust::cuda_category());`

			`return result;`
			`}`

			`inline device_properties_t device_properties(void)`
			`{`
			`return device_properties(current_device());`
			`}`

			`template <typename KernelFunction>`
			`inline function_attributes_t function_attributes(KernelFunction kernel)`
			`{`
			`// cudaFuncGetAttributes(), used below, only exists when __CUDACC__ is defined`
			`#ifdef __CUDACC__`
			`typedef void (*fun_ptr_type)();`

			`fun_ptr_type fun_ptr = reinterpret_cast<fun_ptr_type>(kernel);`

			`cudaFuncAttributes attributes;`

			`cudaError_t error = cudaFuncGetAttributes(&attributes, fun_ptr);`

			`if(error)`
			`{`
			`throw thrust::system_error(error, thrust::cuda_category());`
			`}`

			`// be careful about how this is initialized!`
			`function_attributes_t result = {`
			`attributes.constSizeBytes,`
			`attributes.localSizeBytes,`
			`attributes.maxThreadsPerBlock,`
			`attributes.numRegs,`
			`attributes.sharedSizeBytes`
			`};`

			`return result;`
			`#else`
			`return function_attributes_t();`
			`#endif // __CUDACC__`
			`}`

			`inline size_t compute_capability(const device_properties_t &properties)`
			`{`
			`return 10 * properties.major + properties.minor;`
			`}`

			`inline size_t compute_capability(void)`
			`{`
			`return compute_capability(device_properties());`
			`}`


			`} // end namespace detail`
			`} // end namespace cuda`
			`} // end namespace system`
			`} // end namespace thrust`