/*
 *  Copyright 2008-2012 NVIDIA Corporation
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 */

#include <thrust/detail/config.h>
#include <thrust/system/cuda/detail/runtime_introspection.h>
#include <thrust/system/cuda/detail/guarded_cuda_runtime_api.h>
#include <thrust/detail/util/blocking.h>
#include <thrust/detail/minmax.h>
#include <thrust/system_error.h>
#include <thrust/system/cuda/error.h>


namespace thrust
{
namespace system
{
namespace cuda
{
namespace detail
{
namespace runtime_introspection_detail
{


inline void get_device_properties(device_properties_t &p, int device_id)
{
  cudaDeviceProp properties;
  
  cudaError_t error = cudaGetDeviceProperties(&properties, device_id);
  
  if(error)
    throw thrust::system_error(error, thrust::cuda_category());

  // be careful about how this is initialized!
  device_properties_t temp = {
    properties.major,
    {
      properties.maxGridSize[0],
      properties.maxGridSize[1],
      properties.maxGridSize[2]
    },
    properties.maxThreadsPerBlock,
    properties.maxThreadsPerMultiProcessor,
    properties.minor,
    properties.multiProcessorCount,
    properties.regsPerBlock,
    properties.sharedMemPerBlock,
    properties.warpSize
  };

  p = temp;
} // end get_device_properties()


} // end runtime_introspection_detail


inline device_properties_t device_properties(int device_id)
{
  // cache the result of get_device_properties, because it is slow
  // only cache the first few devices
  static const int max_num_devices                              = 16;

  static bool properties_exist[max_num_devices]                 = {0};
  static device_properties_t device_properties[max_num_devices] = {};

  if(device_id >= max_num_devices)
  {
    device_properties_t result;
    runtime_introspection_detail::get_device_properties(result, device_id);
    return result;
  }

  if(!properties_exist[device_id])
  {
    runtime_introspection_detail::get_device_properties(device_properties[device_id], device_id);

    // disallow the compiler to move the write to properties_exist[device_id]
    // before the initialization of device_properties[device_id]
    __thrust_compiler_fence();
    
    properties_exist[device_id] = true;
  }

  return device_properties[device_id];
}

inline int current_device()
{
  int result = -1;

  cudaError_t error = cudaGetDevice(&result);

  if(error)
    throw thrust::system_error(error, thrust::cuda_category());

  if(result < 0)
    throw thrust::system_error(cudaErrorNoDevice, thrust::cuda_category());

  return result;
}

inline device_properties_t device_properties(void)
{
  return device_properties(current_device());
}

template <typename KernelFunction>
inline function_attributes_t function_attributes(KernelFunction kernel)
{
// cudaFuncGetAttributes(), used below, only exists when __CUDACC__ is defined
#ifdef __CUDACC__
  typedef void (*fun_ptr_type)();

  fun_ptr_type fun_ptr = reinterpret_cast<fun_ptr_type>(kernel);

  cudaFuncAttributes attributes;
  
  cudaError_t error = cudaFuncGetAttributes(&attributes, fun_ptr);
  
  if(error)
  {
    throw thrust::system_error(error, thrust::cuda_category());
  }

  // be careful about how this is initialized!
  function_attributes_t result = {
    attributes.constSizeBytes,
    attributes.localSizeBytes,
    attributes.maxThreadsPerBlock,
    attributes.numRegs,
    attributes.sharedSizeBytes
  };

  return result;
#else
  return function_attributes_t();
#endif // __CUDACC__
}

inline size_t compute_capability(const device_properties_t &properties)
{
  return 10 * properties.major + properties.minor;
}

inline size_t compute_capability(void)
{
  return compute_capability(device_properties());
}


} // end namespace detail
} // end namespace cuda
} // end namespace system
} // end namespace thrust