Commit c868cd3ebd50494898ac6c8accf17aa823667a23
1 parent
be489c22
added basic documentation in the cuda plugins folder
Showing
1 changed file
with
117 additions
and
18 deletions
openbr/plugins/cuda/README.md
| 1 | 1 | # CUDA Plugins |
| 2 | -This folder contains CUDA-accelerated OpenBR plugins. They are structured in the following format. | |
| 2 | +CUDA plugins are very similar to normal plugins. A single plugin is split into | |
| 3 | +two files: the `.cpp` file with the BR standard plugin definition and the `.cu` | |
| 4 | +file with your kernel and wrapper functions. | |
| 3 | 5 | |
| 4 | -## File Structure | |
| 5 | -We will use a plugin called `CUDAPlugin` as an example. | |
| 6 | +## The `.cpp` file | |
| 7 | +Every main plugin file must have the names of the kernel wrapper functions | |
| 8 | +defined at the top of the program. Once the definitions are there, just call | |
| 9 | +the CUDA functions as you need them | |
| 6 | 10 | |
| 7 | -Each plugin has 3 files associated with it: a CUDA file, CPP file, and HPP header file. | |
| 11 | +## The `.cu` file | |
| 12 | +All functions within the CUDA file must be declared inside their own namespace | |
| 13 | +under `br::cuda`. For example the plugin `passthrough` must have all functions | |
| 14 | +inside it declared under the namespace `br::cuda::passthrough`. | |
| 15 | + | |
| 16 | +## CPU Template object format | |
| 17 | +Like any other BR Transform, the plugin must return an object for the next | |
| 18 | +plugin to consume. For performance reasons, we don't copy data to and from | |
| 19 | +the graphics card for every transform. Instead, we use this space to transfer | |
| 20 | +data about how to access the image data and its type. The Mat is an array of data type `void*`. | |
| 21 | + | |
| 22 | +Index | Item Name | Type | Description | |
| 23 | +--------|-------------|-----------|------------ | |
| 24 | +0 | GpuData | void* | Pointer to the graphics card data | |
| 25 | +1 | rows | int | Number of rows in the Mat | |
| 26 | +2 | cols | int | Number of colums in the Mat | |
| 27 | +3 | type | int | OpenCV mat data type code (i.e. `mat.type()`) | |
| 28 | + | |
| 29 | +It is expected that the wrapper function does the proper GPU memory handling | |
| 30 | +to make sure that the GpuData pointer in the output mat is pointing to the | |
| 31 | +data that the plugin is outputting. | |
| 32 | + | |
| 33 | +## Example: Passthrough | |
| 34 | +This example plugin takes in input data and passes it straight to the output. | |
| 35 | +The BR transform calls the wrapper function which exists in the CUDA file which | |
| 36 | +in turn calls the kernel routine to copy the data in the GPU. | |
| 37 | + | |
| 38 | +**Note**: This program assumes that a previous Transform, namely `CUDACopyTo` has | |
| 39 | +copied the data to the GPU. | |
| 40 | + | |
| 41 | +### **passthrough.cpp** | |
| 42 | +```c++ | |
| 43 | +#include <openbr/plugins/openbr_internal.h> | |
| 44 | +#include <opencv2/opencv.hpp> | |
| 45 | + | |
| 46 | +// wrapper function within the CUDA file | |
| 47 | +namespace br { namespace cuda { namespace passthrough { | |
| 48 | + void wrapper(void* srcGpuData, void** dstGpuData); | |
| 49 | +}}}; | |
| 50 | + | |
| 51 | +#include <iostream> | |
| 52 | +namespace br | |
| 53 | +{ | |
| 54 | + class CUDAPassthroughTransform : public UntrainableTransform | |
| 55 | + { | |
| 56 | + Q_OBJECT | |
| 57 | + | |
| 58 | + void project(const Template &src, Template &dst) { | |
| 59 | + // extract the parameters out of the Mat passed from the previous plugin | |
| 60 | + void* const* srcDataPtr = src.m().ptr<void*>(); | |
| 61 | + int rows = *((int*)srcDataPtr[1]); | |
| 62 | + int cols = *((int*)srcDataPtr[2]); | |
| 63 | + int type = *((int*)srcDataPtr[3]); | |
| 64 | + | |
| 65 | + // generate a new Mat to be passed to the next plugin | |
| 66 | + Mat dstMat = Mat(src.m().rows, src.m().cols, src.m().type()); | |
| 67 | + void** dstDataPtr = dstMat.ptr<void*>(); | |
| 68 | + dstDataPtr[1] = srcDataPtr[1]; | |
| 69 | + dstDataPtr[2] = srcDataPtr[2]; | |
| 70 | + dstDataPtr[3] = srcDataPtr[3]; | |
| 71 | + | |
| 72 | + // call the wrapper and set the dst output to the newly created Mat | |
| 73 | + br::cuda::passthrough::wrapper(srcDataPtr[0], &dstDataPtr[0], rows, cols); | |
| 74 | + dst = dstMat; | |
| 75 | + } | |
| 76 | + }; | |
| 77 | + | |
| 78 | + BR_REGISTER(Transform, CUDAPassthroughTransform); | |
| 79 | +} | |
| 80 | + | |
| 81 | +#include "cuda/passthrough.moc" | |
| 8 | 82 | ``` |
| 9 | -cudaplugin.cu | |
| 10 | -cudaplugin.cpp | |
| 11 | -cudaplugin.hpp | |
| 83 | + | |
| 84 | +### **passthrough.cu** | |
| 85 | +```c++ | |
| 86 | +#include <opencv2/opencv.hpp> | |
| 87 | + | |
| 88 | +namespace br { namespace cuda { namespace passthrough { | |
| 89 | + __global__ void kernel(char* srcPtr, char* dstPtr, int rows, int cols) { | |
| 90 | + // get the current index | |
| 91 | + int rowInd = blockIdx.y*blockDim.y+threadIdx.y; | |
| 92 | + int colInd = blockIdx.x*blockDim.x+threadIdx.x; | |
| 93 | + | |
| 94 | + // don't do anything if we are outside the allowable positions | |
| 95 | + if (rowInd >= rows || colInd >= cols) | |
| 96 | + return; | |
| 97 | + | |
| 98 | + // write the input to the output | |
| 99 | + rowDstPtr[rowInd*cols + colInd] = srcVal; | |
| 100 | + } | |
| 101 | + | |
| 102 | + void wrapper(char* srcPtr, char** dstPtr, int rows, int cols, int type) { | |
| 103 | + // verify the proper image type | |
| 104 | + if (type != CV_8UC1) { | |
| 105 | + cout << "Error: image type not supported" | |
| 106 | + return; | |
| 107 | + } | |
| 108 | + | |
| 109 | + *dstPtr = cudaMalloc(rows*cols*sizeof(char)); | |
| 110 | + | |
| 111 | + dim3 threadsPerBlock(8, 8); | |
| 112 | + dim3 numBlocks(imageWidth / threadsPerBlock.x + 1, | |
| 113 | + imageHeight / threadsPerBlock.y + 1); | |
| 114 | + | |
| 115 | + // run the kernel function | |
| 116 | + kernel<<<numBlocks, threadPerBlock>>>(srcPtr, dstPtr, rows, cols); | |
| 117 | + | |
| 118 | + // free the memory as it isn't used anymore | |
| 119 | + cudaFree(srcPtr); | |
| 120 | + } | |
| 121 | +}}} | |
| 12 | 122 | ``` |
| 13 | -The `.cu` file contains CUDA kernel functions and the corresponding wrapper functions | |
| 14 | -that directly call the kernel functions. The `.cpp` files contain the OpenBR | |
| 15 | -standard plugin declaration. Functions in this file call the wrappers. The `.hpp` | |
| 16 | -contains header declarations for the CUDA wrapper functions so the `.cpp` file | |
| 17 | -knows how to call them. | |
| 18 | - | |
| 19 | -# CUDA Files | |
| 20 | -All functions for a particular CUDA plugin are defined in a namespace of that | |
| 21 | -plugin's name which is defined within `br::cuda` namespace. For example, if | |
| 22 | -we have a plugin called CUDAPlugin, both wrapper and kernel functions should | |
| 23 | -be globally defined within `br::cuda::cudaplugin`. | ... | ... |