From 4148de2f0b5c15b909824bfa0de8ba67b0956b32 Mon Sep 17 00:00:00 2001 From: DepthDeluxe Date: Sat, 23 Jan 2016 20:43:25 -0500 Subject: [PATCH] added NVIDIA kernel compatible build environment --- CMakeLists.txt | 1 - app/br/CMakeLists.txt | 1 - openbr/CMakeLists.txt | 24 +++++++++++------------- openbr/cuda-plugins/cglg/copyfrom.cpp | 43 ------------------------------------------- openbr/cuda-plugins/cglg/copyto.cpp | 54 ------------------------------------------------------ openbr/cuda-plugins/cglg/customthreshold.cu | 72 ------------------------------------------------------------------------ openbr/cuda-plugins/cglg/passthrough.cpp | 23 ----------------------- openbr/cuda-plugins/cglg/threshold.cpp | 44 -------------------------------------------- openbr/cuda-plugins/cuda-plugins.cmake | 50 -------------------------------------------------- openbr/plugins/cuda/copyfrom.cpp | 43 +++++++++++++++++++++++++++++++++++++++++++ openbr/plugins/cuda/copyto.cpp | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ openbr/plugins/cuda/passthrough.cpp | 36 ++++++++++++++++++++++++++++++++++++ openbr/plugins/cuda/passthrough.cu | 7 +++++++ openbr/plugins/cuda/threshold.cpp | 44 ++++++++++++++++++++++++++++++++++++++++++++ openbr/plugins/plugins.cmake | 2 +- 15 files changed, 196 insertions(+), 302 deletions(-) delete mode 100644 openbr/cuda-plugins/cglg/copyfrom.cpp delete mode 100644 openbr/cuda-plugins/cglg/copyto.cpp delete mode 100644 openbr/cuda-plugins/cglg/customthreshold.cu delete mode 100644 openbr/cuda-plugins/cglg/passthrough.cpp delete mode 100644 openbr/cuda-plugins/cglg/threshold.cpp delete mode 100644 openbr/cuda-plugins/cuda-plugins.cmake create mode 100644 openbr/plugins/cuda/copyfrom.cpp create mode 100644 openbr/plugins/cuda/copyto.cpp create mode 100644 openbr/plugins/cuda/passthrough.cpp create mode 100644 openbr/plugins/cuda/passthrough.cu create mode 100644 openbr/plugins/cuda/threshold.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 1f41dc3..d8047ea 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -108,7 +108,6 @@ endif() if(UNIX) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Wno-strict-overflow -Wno-comment -Wno-unknown-pragmas -fvisibility=hidden -fno-omit-frame-pointer") set(CMAKE_INSTALL_RPATH ${CMAKE_INSTALL_PREFIX}/lib ${_qt5Core_install_prefix}/lib) - set(CUDA_NVCC_FLAGS "") # put NVCC compiler flags here if(NOT APPLE) if(${CMAKE_CXX_COMPILER} STREQUAL "/opt/intel/bin/icpc") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -static-intel -wd2196") diff --git a/app/br/CMakeLists.txt b/app/br/CMakeLists.txt index dfa60ed..3c3b6a9 100644 --- a/app/br/CMakeLists.txt +++ b/app/br/CMakeLists.txt @@ -8,4 +8,3 @@ qt5_use_modules(br ${QT_DEPENDENCIES}) install(TARGETS br RUNTIME DESTINATION bin) add_test(NAME br_initialize WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} COMMAND br) - diff --git a/openbr/CMakeLists.txt b/openbr/CMakeLists.txt index c09d502..bd07993 100644 --- a/openbr/CMakeLists.txt +++ b/openbr/CMakeLists.txt @@ -8,7 +8,12 @@ set(SRC openbr.cpp universal_template.cpp) aux_source_directory(core BR_CORE) include(plugins/plugins.cmake) -include(cuda-plugins/cuda-plugins.cmake) + +# CUDA shit +FIND_PACKAGE(CUDA REQUIRED) +SET(CUDA_SEPARABLE_COMPILATION ON) +SET(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -arch=sm_20;--compiler-options "-fPIC") +SET(CUDA_VERBOSE_BUILD ON) # Janus API option(BR_WITH_JANUS "Build IARPA Janus related applications." ON) @@ -29,25 +34,18 @@ if(NOT BR_EMBEDDED) install(FILES ${HEADERS} DESTINATION include/openbr/gui) endif() -# normal BR library declaration -add_library(openbr SHARED ${SRC} ${BR_CORE} ${BR_JANUS} ${BR_GUI} ${BR_ICONS} ${BR_THIRDPARTY_SRC} ${BR_RESOURCES} ${NATURALSTRINGCOMPARE_SRC}) +# normal BR library declaration - added openbr-cuda library +message(STATUS "BR_THIRDPARTY_SRC") +message(STATUS ${BR_THIRDPARTY_SRC}) +cuda_add_library(openbr SHARED ${SRC} ${BR_CORE} ${BR_JANUS} ${BR_GUI} ${BR_ICONS} ${BR_THIRDPARTY_SRC} ${BR_RESOURCES} ${NATURALSTRINGCOMPARE_SRC}) qt5_use_modules(openbr ${QT_DEPENDENCIES}) set_target_properties(openbr PROPERTIES DEFINE_SYMBOL BR_LIBRARY VERSION ${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH} SOVERSION ${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}) -target_link_libraries(openbr ${BR_THIRDPARTY_LIBS}) +target_link_libraries(openbr ${BR_THIRDPARTY_LIBS} ${CUDA_LIBRARIES}) add_cppcheck(openbr) -# CUDA portion of BR - -cuda_add_library(openbr-cuda SHARED ${BR_THIRDPARTY_CUDA_SRC}) -qt5_use_modules(openbr-cuda ${QT_DEPENDENCIES}) -set_target_properties(openbr-cuda PROPERTIES - DEFINE_SYMBOL BR_CUDA_LIBRARY - VERSION ${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH} - SOVERSION ${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}) - # Janus implementation if(BR_WITH_JANUS) set(JANUS_BUILD_PP5_WRAPPER ${BR_WITH_PP5} CACHE BOOL "Build Janus implementation using PittPatt 5") diff --git a/openbr/cuda-plugins/cglg/copyfrom.cpp b/openbr/cuda-plugins/cglg/copyfrom.cpp deleted file mode 100644 index e624aae..0000000 --- a/openbr/cuda-plugins/cglg/copyfrom.cpp +++ /dev/null @@ -1,43 +0,0 @@ -#include - -#include -#include - -#include - -using namespace std; - -using namespace cv; -using namespace cv::gpu; - - -namespace br -{ - class CUDACopyFrom : public UntrainableTransform - { - Q_OBJECT - -private: - void project(const Template &src, Template &dst) const - { - // reassemble the integer and then build pointer to it - uint64_t gpuMatInt = (((uint64_t)src.m().at(1,0)) << (uint64_t)32) + ((uint64_t)src.m().at(0,0)); - GpuMat* gpuMat = (GpuMat*)gpuMatInt; - - printf("gpuMatInt: %li\n", gpuMatInt); - printf("m.at(0,0): %i\nm.at(1,0): %i\n", src.m().at(0,0), src.m().at(1,0)); - - // download the data back into the destination - Size size = gpuMat->size(); - Mat out = Mat(size.height, size.width, gpuMat->depth()); - - gpuMat->download(out); - - dst = out; - } - }; - - BR_REGISTER(Transform, CUDACopyFrom); -} - -#include "cuda/copyfrom.moc" diff --git a/openbr/cuda-plugins/cglg/copyto.cpp b/openbr/cuda-plugins/cglg/copyto.cpp deleted file mode 100644 index 36f683f..0000000 --- a/openbr/cuda-plugins/cglg/copyto.cpp +++ /dev/null @@ -1,54 +0,0 @@ -#include - -#include -#include - -#include - -using namespace std; - -using namespace cv; -using namespace cv::gpu; - -namespace br -{ - class CUDACopyTo : public UntrainableTransform - { - Q_OBJECT - -private: - void project(const Template &src, Template &dst) const - { - // get the mat to send to the GPU - GpuMat* gpuMat = new GpuMat; - - try - { - // copy the contents to the GPU - gpuMat->upload(src.m()); - } - catch(const cv::Exception& ex) - { - cout << "Error: " << ex.what() << endl; - } - - // now create a new Mat that contains the 64-bit pointer - Mat m = Mat(2, 1, CV_32S); - - // pointer magic - uint64_t gpuMatInt = (uint64_t)gpuMat; - m.at(0,0) = (int32_t)(gpuMatInt & 0x00000000FFFFFFFF); - m.at(1,0) = (int32_t)((gpuMatInt & 0xFFFFFFFF00000000) >> (uint64_t)32); - - printf("gpuMatInt: %li\n", gpuMatInt); - printf("m.at(0,0): %i\nm.at(1,0): %i\n", m.at(0,0), m.at(1,0)); - - // save away in the destination mat - dst += m; - } - }; - - BR_REGISTER(Transform, CUDACopyTo); -} - -#include "cuda/copyto.moc" diff --git a/openbr/cuda-plugins/cglg/customthreshold.cu b/openbr/cuda-plugins/cglg/customthreshold.cu deleted file mode 100644 index b414b57..0000000 --- a/openbr/cuda-plugins/cglg/customthreshold.cu +++ /dev/null @@ -1,72 +0,0 @@ -/* -#include - -// external opencv CUDA interface -#include -#include - -// internal CUDA stuff -#include -#include -#include -#include -#include - -#include - -using namespace std; - -using namespace cv; -using namespace cv::gpu; -using namespace cv::cuda; -using namespace cv::cuda::device; - -namespace br -{ - class CUDACustomThresholdTransform : public UntrainableTransform - { - Q_OBJECT - -private: - void project(const Template &src, Template &dst) const - { - // get the mat to send to the GPU - GpuMat gpuMat_src, gpuMat_dst; - - try - { - // copy the contents to the GPU - gpuMat_src.upload(src.m()); - - threshold(gpuMat_src, gpuMat_dst, 128.0, 255.0, CV_THRESH_BINARY); - - gpuMat_dst.download(dst.m()); - } - catch(const cv::Exception& ex) - { - cout << "Error: " << ex.what() << endl; - } - } - }; - - BR_REGISTER(Transform, CUDACustomThresholdTransform); - - namespace cuda { namespace customthreshold { - texture tex_src(false, cudaFilterModePoint, cudaAddressModeClamp); - struct SrcTex { - __host__ SrcTex(int _xoff, int _yoff) : xoff(_xoff), yoff(_yoff) {} - __device__ __forceinline__ int operator ()(int y, int x) const { - return tex2D(tex_src, x + xoff, y + yoff); - } - } - __global__ void testKernel(const SrcTex src) { - const int x = blockIdx.x * blockDim.x + threadIdx.x; - const int y = blockIdx.y * blockDim.y + threadIdx.y; - - src(x, y) = 1;sajfflksajlkfjdsalkfjsadjflkdsaf - } - } -} - -#include "cglg/customthreshold.moc" -*/ diff --git a/openbr/cuda-plugins/cglg/passthrough.cpp b/openbr/cuda-plugins/cglg/passthrough.cpp deleted file mode 100644 index 5b1044d..0000000 --- a/openbr/cuda-plugins/cglg/passthrough.cpp +++ /dev/null @@ -1,23 +0,0 @@ -#include - -#include - -using namespace cv; - -namespace br -{ - class CUDAPassthroughTransform : public UntrainableTransform - { - Q_OBJECT - -private: - void project(const Template &src, Template &dst) const - { - dst = src; - } - }; - - BR_REGISTER(Transform, CUDAPassthroughTransform); -} - -#include "cuda/passthrough.moc" diff --git a/openbr/cuda-plugins/cglg/threshold.cpp b/openbr/cuda-plugins/cglg/threshold.cpp deleted file mode 100644 index 219dccf..0000000 --- a/openbr/cuda-plugins/cglg/threshold.cpp +++ /dev/null @@ -1,44 +0,0 @@ -#include - -#include -#include - -#include - -using namespace std; - -using namespace cv; -using namespace cv::gpu; - -namespace br -{ - class CUDAThreshold : public UntrainableTransform - { - Q_OBJECT - -private: - void project(const Template &src, Template &dst) const - { - // get the mat to send to the GPU - GpuMat gpuMat_src, gpuMat_dst; - - try - { - // copy the contents to the GPU - gpuMat_src.upload(src.m()); - - threshold(gpuMat_src, gpuMat_dst, 128.0, 255.0, CV_THRESH_BINARY); - - gpuMat_dst.download(dst.m()); - } - catch(const cv::Exception& ex) - { - cout << "Error: " << ex.what() << endl; - } - } - }; - - BR_REGISTER(Transform, CUDAThreshold); -} - -#include "cuda/threshold.moc" diff --git a/openbr/cuda-plugins/cuda-plugins.cmake b/openbr/cuda-plugins/cuda-plugins.cmake deleted file mode 100644 index a4bf886..0000000 --- a/openbr/cuda-plugins/cuda-plugins.cmake +++ /dev/null @@ -1,50 +0,0 @@ -# Optional Appendable CMake Variables: -# BR_THIRDPARTY_CUDA_PLUGINS - Additional plugins -# BR_THIRDPARTY_CUDA_PLUGINS_DIR - Additional folder(s) of plugins -# BR_EXCLUDED_CUDA_PLUGINS - Plugins that should not be built -# BR_THIRDPARTY_SRC - Additional source code needed by a plugin -# BR_THIRDPARTY_LIBS - Additional libaries needed by a plugin - -# Also look for CMake modules in the thirdparty plugins folder(s) -set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${BR_THIRDPARTY_CUDA_PLUGINS_DIR}) - -# Gather all of the plugin subdirectories -file(GLOB SUBFILES cuda-plugins/*) -foreach(FILE ${SUBFILES}) - if(IS_DIRECTORY ${FILE}) - set(BR_CUDA_PLUGINS_DIR ${BR_CUDA_PLUGINS_DIR} ${FILE}) - endif() -endforeach() -set(BR_CUDA_PLUGINS_DIR ${BR_CUDA_PLUGINS_DIR} cuda-plugins/) # Remove this when finished with reorg - -# Exclude pertinent plugins based on .cmake files -mark_as_advanced(BR_EXCLUDED_CUDA_PLUGINS) -foreach(DIR cuda-plugins/cmake ${BR_THIRDPARTY_CUDA_PLUGINS_DIR}) - file(GLOB CMAKE_FILES ${DIR}/*.cmake) - foreach(CMAKE_FILE ${CMAKE_FILES}) - if (NOT ${CMAKE_FILE} MATCHES "Find.*cmake") - include(${CMAKE_FILE}) - endif() - endforeach() -endforeach() - -# Collect all source files except for excluded plugins -foreach(DIR ${BR_CUDA_PLUGINS_DIR} ${BR_THIRDPARTY_CUDA_PLUGINS_DIR}) - get_filename_component(DIR_NAME ${DIR} NAME) - file(GLOB CUDA_PLUGINS ${DIR}/*.cu ${DIR}/*.cpp ${DIR}/*.h) - foreach(CUDA_PLUGIN ${CUDA_PLUGINS}) - get_filename_component(CUDA_PLUGIN_NAME ${CUDA_PLUGIN} NAME) - set(EXCLUDE FALSE) - foreach(EXCLUDED_CUDA_PLUGIN ${BR_EXCLUDED_CUDA_PLUGINS}) - get_filename_component(EXCLUDED_CUDA_PLUGIN_NAME ${EXCLUDED_CUDA_PLUGIN} NAME) - if (${CUDA_PLUGIN_NAME} STREQUAL ${EXCLUDED_CUDA_PLUGIN_NAME}) - set(EXCLUDE TRUE) - endif() - endforeach() - if(NOT ${EXCLUDE}) - set(BR_THIRDPARTY_CUDA_PLUGINS ${BR_THIRDPARTY_CUDA_PLUGINS} ${CUDA_PLUGIN}) - endif() - endforeach() -endforeach() - -set(BR_THIRDPARTY_CUDA_SRC ${BR_THIRDPARTY_CUDA_SRC} ${BR_THIRDPARTY_CUDA_PLUGINS}) diff --git a/openbr/plugins/cuda/copyfrom.cpp b/openbr/plugins/cuda/copyfrom.cpp new file mode 100644 index 0000000..e624aae --- /dev/null +++ b/openbr/plugins/cuda/copyfrom.cpp @@ -0,0 +1,43 @@ +#include + +#include +#include + +#include + +using namespace std; + +using namespace cv; +using namespace cv::gpu; + + +namespace br +{ + class CUDACopyFrom : public UntrainableTransform + { + Q_OBJECT + +private: + void project(const Template &src, Template &dst) const + { + // reassemble the integer and then build pointer to it + uint64_t gpuMatInt = (((uint64_t)src.m().at(1,0)) << (uint64_t)32) + ((uint64_t)src.m().at(0,0)); + GpuMat* gpuMat = (GpuMat*)gpuMatInt; + + printf("gpuMatInt: %li\n", gpuMatInt); + printf("m.at(0,0): %i\nm.at(1,0): %i\n", src.m().at(0,0), src.m().at(1,0)); + + // download the data back into the destination + Size size = gpuMat->size(); + Mat out = Mat(size.height, size.width, gpuMat->depth()); + + gpuMat->download(out); + + dst = out; + } + }; + + BR_REGISTER(Transform, CUDACopyFrom); +} + +#include "cuda/copyfrom.moc" diff --git a/openbr/plugins/cuda/copyto.cpp b/openbr/plugins/cuda/copyto.cpp new file mode 100644 index 0000000..36f683f --- /dev/null +++ b/openbr/plugins/cuda/copyto.cpp @@ -0,0 +1,54 @@ +#include + +#include +#include + +#include + +using namespace std; + +using namespace cv; +using namespace cv::gpu; + +namespace br +{ + class CUDACopyTo : public UntrainableTransform + { + Q_OBJECT + +private: + void project(const Template &src, Template &dst) const + { + // get the mat to send to the GPU + GpuMat* gpuMat = new GpuMat; + + try + { + // copy the contents to the GPU + gpuMat->upload(src.m()); + } + catch(const cv::Exception& ex) + { + cout << "Error: " << ex.what() << endl; + } + + // now create a new Mat that contains the 64-bit pointer + Mat m = Mat(2, 1, CV_32S); + + // pointer magic + uint64_t gpuMatInt = (uint64_t)gpuMat; + m.at(0,0) = (int32_t)(gpuMatInt & 0x00000000FFFFFFFF); + m.at(1,0) = (int32_t)((gpuMatInt & 0xFFFFFFFF00000000) >> (uint64_t)32); + + printf("gpuMatInt: %li\n", gpuMatInt); + printf("m.at(0,0): %i\nm.at(1,0): %i\n", m.at(0,0), m.at(1,0)); + + // save away in the destination mat + dst += m; + } + }; + + BR_REGISTER(Transform, CUDACopyTo); +} + +#include "cuda/copyto.moc" diff --git a/openbr/plugins/cuda/passthrough.cpp b/openbr/plugins/cuda/passthrough.cpp new file mode 100644 index 0000000..13b9f19 --- /dev/null +++ b/openbr/plugins/cuda/passthrough.cpp @@ -0,0 +1,36 @@ +#include + +#include +#include + +using namespace cv; +using namespace cv::gpu; + +extern void br_cuda_device_wrapper(); + +namespace br +{ + class CUDAPassthroughTransform : public UntrainableTransform + { + Q_OBJECT + +private: + void project(const Template &src, Template &dst) const + { + // upload the src mat to the GPU + GpuMat srcGpuMat, dstGpuMat; + srcGpuMat.upload(src.m()); + dstGpuMat.upload(src.m()); + + br_cuda_device_wrapper(); + + dstGpuMat.download(dst.m()); + + // TODO(colin): add delete code + } + }; + + BR_REGISTER(Transform, CUDAPassthroughTransform); +} + +#include "cuda/passthrough.moc" diff --git a/openbr/plugins/cuda/passthrough.cu b/openbr/plugins/cuda/passthrough.cu new file mode 100644 index 0000000..983a24e --- /dev/null +++ b/openbr/plugins/cuda/passthrough.cu @@ -0,0 +1,7 @@ +__global__ void br_cuda_device_kernel() { + +} + +void br_cuda_device_wrapper() { + br_cuda_device_kernel<<<1,1>>>(); +} diff --git a/openbr/plugins/cuda/threshold.cpp b/openbr/plugins/cuda/threshold.cpp new file mode 100644 index 0000000..219dccf --- /dev/null +++ b/openbr/plugins/cuda/threshold.cpp @@ -0,0 +1,44 @@ +#include + +#include +#include + +#include + +using namespace std; + +using namespace cv; +using namespace cv::gpu; + +namespace br +{ + class CUDAThreshold : public UntrainableTransform + { + Q_OBJECT + +private: + void project(const Template &src, Template &dst) const + { + // get the mat to send to the GPU + GpuMat gpuMat_src, gpuMat_dst; + + try + { + // copy the contents to the GPU + gpuMat_src.upload(src.m()); + + threshold(gpuMat_src, gpuMat_dst, 128.0, 255.0, CV_THRESH_BINARY); + + gpuMat_dst.download(dst.m()); + } + catch(const cv::Exception& ex) + { + cout << "Error: " << ex.what() << endl; + } + } + }; + + BR_REGISTER(Transform, CUDAThreshold); +} + +#include "cuda/threshold.moc" diff --git a/openbr/plugins/plugins.cmake b/openbr/plugins/plugins.cmake index c4ee8f3..094ab06 100644 --- a/openbr/plugins/plugins.cmake +++ b/openbr/plugins/plugins.cmake @@ -31,7 +31,7 @@ endforeach() # Collect all source files except for excluded plugins foreach(DIR ${BR_PLUGINS_DIR} ${BR_THIRDPARTY_PLUGINS_DIR}) get_filename_component(DIR_NAME ${DIR} NAME) - file(GLOB PLUGINS ${DIR}/*.cpp ${DIR}/*.h) + file(GLOB PLUGINS ${DIR}/*.cpp ${DIR}/*.cu ${DIR}/*.h) foreach(PLUGIN ${PLUGINS}) get_filename_component(PLUGIN_NAME ${PLUGIN} NAME) set(EXCLUDE FALSE) -- libgit2 0.21.4