Commit c9a58b18b6db3f96d330f71fbbee507b108e637d

Authored by DepthDeluxe
1 parent 4e59b2b5

added L2 distance norm plugin

openbr/plugins/cuda/CUDAL2.cpp 0 → 100644
  1 +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
  2 + * Copyright 2012 The MITRE Corporation *
  3 + * *
  4 + * Licensed under the Apache License, Version 2.0 (the "License"); *
  5 + * you may not use this file except in compliance with the License. *
  6 + * You may obtain a copy of the License at *
  7 + * *
  8 + * http://www.apache.org/licenses/LICENSE-2.0 *
  9 + * *
  10 + * Unless required by applicable law or agreed to in writing, software *
  11 + * distributed under the License is distributed on an "AS IS" BASIS, *
  12 + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
  13 + * See the License for the specific language governing permissions and *
  14 + * limitations under the License. *
  15 + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
  16 +
  17 +#include <iostream>
  18 +using namespace std;
  19 +
  20 +#include <openbr/plugins/openbr_internal.h>
  21 +
  22 +namespace br { namespace cuda { namespace L2{
  23 + void wrapper(float* cudaAPtr, float* cudaBPtr, int length, float* outPtr);
  24 +}}}
  25 +
  26 +namespace br
  27 +{
  28 +
  29 +/*!
  30 + * \ingroup distances
  31 + * \brief L2 distance computed using eigen.
  32 + * \author Josh Klontz \cite jklontz
  33 + */
  34 +class CUDAL2Distance : public UntrainableDistance
  35 +{
  36 + Q_OBJECT
  37 +
  38 + float compare(const cv::Mat &a, const cv::Mat &b) const
  39 + {
  40 + void* const* srcDataPtr = a.ptr<void*>();
  41 + float* cudaAPtr = (float*)srcDataPtr[0];
  42 + int rows = *((int*)srcDataPtr[1]);
  43 + int cols = *((int*)srcDataPtr[2]);
  44 +
  45 + float* cudaBPtr = (float*)b.ptr<void*>()[0];
  46 +
  47 + float out;
  48 + br::cuda::L2::wrapper(cudaAPtr, cudaBPtr, rows*cols, &out);
  49 +
  50 + return out;
  51 + }
  52 +};
  53 +
  54 +BR_REGISTER(Distance, CUDAL2Distance)
  55 +
  56 +} // namespace br
  57 +
  58 +#include "cuda/CUDAL2.moc"
... ...
openbr/plugins/cuda/CUDAL2.cu 0 → 100644
  1 +#include <math.h>
  2 +
  3 +#include "cudadefines.hpp"
  4 +
  5 +namespace br { namespace cuda { namespace L2 {
  6 +
  7 + __global__ void my_subtract_kernel(float* aPtr, float* bPtr, float* workPtr, int length) {
  8 + int index = blockIdx.x*blockDim.x+threadIdx.x;
  9 +
  10 + if (index >= length) {
  11 + return;
  12 + }
  13 +
  14 + // perform the subtraction in-place
  15 + // use b because it is the comparison
  16 + // image
  17 + workPtr[index] = aPtr[index] - bPtr[index];
  18 + workPtr[index] = workPtr[index] * workPtr[index];
  19 + }
  20 +
  21 + __global__ void collapse_kernel(float* inPtr, float* outPtr, int length) {
  22 + // make sure there is only one thread that we are calling
  23 + if (blockIdx.x != 0 || threadIdx.x != 0) {
  24 + return;
  25 + }
  26 +
  27 + // sum up all the values
  28 + *outPtr = 0;
  29 + for (int i=0; i < length; i++) {
  30 + *outPtr = *outPtr + inPtr[i];
  31 + }
  32 +
  33 + // take the square root
  34 + *outPtr = sqrtf(*outPtr);
  35 + }
  36 +
  37 + void wrapper(float* cudaAPtr, float* cudaBPtr, int length, float* outPtr) {
  38 + cudaError_t err;
  39 + float* cudaOutPtr;
  40 + CUDA_SAFE_MALLOC(&cudaOutPtr, sizeof(float), &err);
  41 +
  42 + float* cudaWorkBufferPtr;
  43 + CUDA_SAFE_MALLOC(&cudaWorkBufferPtr, sizeof(float)*length, &err);
  44 +
  45 + // perform the subtraction
  46 + int threadsPerBlock = 64;
  47 + int numBlocks = length / threadsPerBlock + 1;
  48 + my_subtract_kernel<<<threadsPerBlock, numBlocks>>>(cudaAPtr, cudaBPtr, cudaWorkBufferPtr, length);
  49 + CUDA_KERNEL_ERR_CHK(&err);
  50 +
  51 + // perform the collapse
  52 + collapse_kernel<<<1,1>>>(cudaWorkBufferPtr, cudaOutPtr, length);
  53 + CUDA_KERNEL_ERR_CHK(&err);
  54 +
  55 + // copy the single value back to the destinsion
  56 + CUDA_SAFE_MEMCPY(outPtr, cudaOutPtr, sizeof(float), cudaMemcpyDeviceToHost, &err);
  57 +
  58 + CUDA_SAFE_FREE(cudaOutPtr, &err);
  59 +
  60 + // do not free aPtr which should be the reference library
  61 + // only free bPtr, which is the image we are comparing
  62 + CUDA_SAFE_FREE(cudaBPtr, &err);
  63 + CUDA_SAFE_FREE(cudaWorkBufferPtr, &err);
  64 + }
  65 +}}}
  66 +
  67 +// 128CUDAEigenfaces on 6400 ATT: 54.367s
  68 +// 128CUDAEigenfacesL2 on 6400 ATT:
... ...