Commit 59574f5a6d8400be03e80601e293f26b71b6072a

Authored by Josh Klontz
1 parent a469e14c

added CustomSIFTTransform

openbr/plugins/imgproc/custom_sift.cpp 0 → 100644
  1 +/*M///////////////////////////////////////////////////////////////////////////////////////
  2 +//
  3 +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
  4 +//
  5 +// By downloading, copying, installing or using the software you agree to this license.
  6 +// If you do not agree to this license, do not download, install,
  7 +// copy or use the software.
  8 +//
  9 +//
  10 +// License Agreement
  11 +// For Open Source Computer Vision Library
  12 +//
  13 +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
  14 +// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
  15 +// Third party copyrights are property of their respective owners.
  16 +//
  17 +// Redistribution and use in source and binary forms, with or without modification,
  18 +// are permitted provided that the following conditions are met:
  19 +//
  20 +// * Redistribution's of source code must retain the above copyright notice,
  21 +// this list of conditions and the following disclaimer.
  22 +//
  23 +// * Redistribution's in binary form must reproduce the above copyright notice,
  24 +// this list of conditions and the following disclaimer in the documentation
  25 +// and/or other materials provided with the distribution.
  26 +//
  27 +// * The name of the copyright holders may not be used to endorse or promote products
  28 +// derived from this software without specific prior written permission.
  29 +//
  30 +// This software is provided by the copyright holders and contributors "as is" and
  31 +// any express or implied warranties, including, but not limited to, the implied
  32 +// warranties of merchantability and fitness for a particular purpose are disclaimed.
  33 +// In no event shall the Intel Corporation or contributors be liable for any direct,
  34 +// indirect, incidental, special, exemplary, or consequential damages
  35 +// (including, but not limited to, procurement of substitute goods or services;
  36 +// loss of use, data, or profits; or business interruption) however caused
  37 +// and on any theory of liability, whether in contract, strict liability,
  38 +// or tort (including negligence or otherwise) arising in any way out of
  39 +// the use of this software, even if advised of the possibility of such damage.
  40 +//
  41 +//M*/
  42 +
  43 +/**********************************************************************************************\
  44 + Implementation of SIFT is based on the code from http://blogs.oregonstate.edu/hess/code/sift/
  45 + Below is the original copyright.
  46 +
  47 +// Copyright (c) 2006-2010, Rob Hess <hess@eecs.oregonstate.edu>
  48 +// All rights reserved.
  49 +
  50 +// The following patent has been issued for methods embodied in this
  51 +// software: "Method and apparatus for identifying scale invariant features
  52 +// in an image and use of same for locating an object in an image," David
  53 +// G. Lowe, US Patent 6,711,293 (March 23, 2004). Provisional application
  54 +// filed March 8, 1999. Asignee: The University of British Columbia. For
  55 +// further details, contact David Lowe (lowe@cs.ubc.ca) or the
  56 +// University-Industry Liaison Office of the University of British
  57 +// Columbia.
  58 +
  59 +// Note that restrictions imposed by this patent (and possibly others)
  60 +// exist independently of and may be in conflict with the freedoms granted
  61 +// in this license, which refers to copyright of the program, not patents
  62 +// for any methods that it implements. Both copyright and patent law must
  63 +// be obeyed to legally use and redistribute this program and it is not the
  64 +// purpose of this license to induce you to infringe any patents or other
  65 +// property right claims or to contest validity of any such claims. If you
  66 +// redistribute or use the program, then this license merely protects you
  67 +// from committing copyright infringement. It does not protect you from
  68 +// committing patent infringement. So, before you do anything with this
  69 +// program, make sure that you have permission to do so not merely in terms
  70 +// of copyright, but also in terms of patent law.
  71 +
  72 +// Please note that this license is not to be understood as a guarantee
  73 +// either. If you use the program according to this license, but in
  74 +// conflict with patent law, it does not mean that the licensor will refund
  75 +// you for any losses that you incur if you are sued for your patent
  76 +// infringement.
  77 +
  78 +// Redistribution and use in source and binary forms, with or without
  79 +// modification, are permitted provided that the following conditions are
  80 +// met:
  81 +// * Redistributions of source code must retain the above copyright and
  82 +// patent notices, this list of conditions and the following
  83 +// disclaimer.
  84 +// * Redistributions in binary form must reproduce the above copyright
  85 +// notice, this list of conditions and the following disclaimer in
  86 +// the documentation and/or other materials provided with the
  87 +// distribution.
  88 +// * Neither the name of Oregon State University nor the names of its
  89 +// contributors may be used to endorse or promote products derived
  90 +// from this software without specific prior written permission.
  91 +
  92 +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
  93 +// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  94 +// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
  95 +// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  96 +// HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
  97 +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  98 +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
  99 +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
  100 +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
  101 +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  102 +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  103 +\**********************************************************************************************/
  104 +
  105 +#include <iostream>
  106 +#include <stdarg.h>
  107 +#include <opencv2/imgproc/imgproc.hpp>
  108 +#include <opencv2/nonfree/features2d.hpp>
  109 +
  110 +using namespace cv;
  111 +
  112 +/******************************* Defs and macros *****************************/
  113 +
  114 +// determines the size of a single descriptor orientation histogram
  115 +static const float SIFT_DESCR_SCL_FCTR = 3.f;
  116 +
  117 +// threshold on magnitude of elements of descriptor vector
  118 +static const float SIFT_DESCR_MAG_THR = 0.2f;
  119 +
  120 +// factor used to convert floating-point descriptor to unsigned char
  121 +static const float SIFT_INT_DESCR_FCTR = 512.f;
  122 +
  123 +// intermediate type used for DoG pyramids
  124 +typedef short sift_wt;
  125 +static const int SIFT_FIXPT_SCALE = 48;
  126 +
  127 +static inline void unpackOctave(const KeyPoint &kpt, int &octave, int &layer, float &scale)
  128 +{
  129 + octave = kpt.octave & 255;
  130 + layer = (kpt.octave >> 8) & 255;
  131 + octave = octave < 128 ? octave : (-128 | octave);
  132 + scale = octave >= 0 ? 1.f/(1 << octave) : (float)(1 << -octave);
  133 +}
  134 +
  135 +static Mat createInitialImage( const Mat& img, bool doubleImageSize, float sigma, float initSigma )
  136 +{
  137 + Mat gray, gray_fpt;
  138 + if( img.channels() == 3 || img.channels() == 4 )
  139 + cvtColor(img, gray, COLOR_BGR2GRAY);
  140 + else
  141 + img.copyTo(gray);
  142 + gray.convertTo(gray_fpt, DataType<sift_wt>::type, SIFT_FIXPT_SCALE, 0);
  143 +
  144 + float sig_diff;
  145 +
  146 + if( doubleImageSize )
  147 + {
  148 + sig_diff = sqrtf( std::max(sigma * sigma - initSigma * initSigma * 4, 0.01f) );
  149 + Mat dbl;
  150 + resize(gray_fpt, dbl, Size(gray.cols*2, gray.rows*2), 0, 0, INTER_LINEAR);
  151 + GaussianBlur(dbl, dbl, Size(), sig_diff, sig_diff);
  152 + return dbl;
  153 + }
  154 + else
  155 + {
  156 + sig_diff = sqrtf( std::max(sigma * sigma - initSigma * initSigma, 0.01f) );
  157 + GaussianBlur(gray_fpt, gray_fpt, Size(), sig_diff, sig_diff);
  158 + return gray_fpt;
  159 + }
  160 +}
  161 +
  162 +
  163 +static void buildGaussianPyramid( const Mat& base, vector<Mat>& pyr, int nOctaves, int nOctaveLayers, double sigma )
  164 +{
  165 + vector<double> sig(nOctaveLayers + 3);
  166 + pyr.resize(nOctaves*(nOctaveLayers + 3));
  167 +
  168 + // precompute Gaussian sigmas using the following formula:
  169 + // \sigma_{total}^2 = \sigma_{i}^2 + \sigma_{i-1}^2
  170 + sig[0] = sigma;
  171 + double k = pow( 2., 1. / nOctaveLayers );
  172 + for( int i = 1; i < nOctaveLayers + 3; i++ )
  173 + {
  174 + double sig_prev = pow(k, (double)(i-1))*sigma;
  175 + double sig_total = sig_prev*k;
  176 + sig[i] = std::sqrt(sig_total*sig_total - sig_prev*sig_prev);
  177 + }
  178 +
  179 + for( int o = 0; o < nOctaves; o++ )
  180 + {
  181 + for( int i = 0; i < nOctaveLayers + 3; i++ )
  182 + {
  183 + Mat& dst = pyr[o*(nOctaveLayers + 3) + i];
  184 + if( o == 0 && i == 0 )
  185 + dst = base;
  186 + // base of new octave is halved image from end of previous octave
  187 + else if( i == 0 )
  188 + {
  189 + const Mat& src = pyr[(o-1)*(nOctaveLayers + 3) + nOctaveLayers];
  190 + resize(src, dst, Size(src.cols/2, src.rows/2),
  191 + 0, 0, INTER_NEAREST);
  192 + }
  193 + else
  194 + {
  195 + const Mat& src = pyr[o*(nOctaveLayers + 3) + i-1];
  196 + GaussianBlur(src, dst, Size(), sig[i], sig[i]);
  197 + }
  198 + }
  199 + }
  200 +}
  201 +
  202 +
  203 +static void buildDoGPyramid( const vector<Mat>& gpyr, vector<Mat>& dogpyr, int nOctaveLayers )
  204 +{
  205 + int nOctaves = (int)gpyr.size()/(nOctaveLayers + 3);
  206 + dogpyr.resize( nOctaves*(nOctaveLayers + 2) );
  207 +
  208 + for( int o = 0; o < nOctaves; o++ )
  209 + {
  210 + for( int i = 0; i < nOctaveLayers + 2; i++ )
  211 + {
  212 + const Mat& src1 = gpyr[o*(nOctaveLayers + 3) + i];
  213 + const Mat& src2 = gpyr[o*(nOctaveLayers + 3) + i + 1];
  214 + Mat& dst = dogpyr[o*(nOctaveLayers + 2) + i];
  215 + subtract(src2, src1, dst, noArray(), DataType<sift_wt>::type);
  216 + }
  217 + }
  218 +}
  219 +
  220 +static void calcSIFTDescriptor( const Mat& img, Point2f ptf, float ori, float scl,
  221 + int d, int n, float* dst )
  222 +{
  223 + Point pt(cvRound(ptf.x), cvRound(ptf.y));
  224 + float cos_t = cosf(ori*(float)(CV_PI/180));
  225 + float sin_t = sinf(ori*(float)(CV_PI/180));
  226 + float bins_per_rad = n / 360.f;
  227 + float exp_scale = -1.f/(d * d * 0.5f);
  228 + float hist_width = SIFT_DESCR_SCL_FCTR * scl;
  229 + int radius = cvRound(hist_width * 1.4142135623730951f * (d + 1) * 0.5f);
  230 + // Clip the radius to the diagonal of the image to avoid autobuffer too large exception
  231 + radius = std::min(radius, (int) sqrt((double) img.cols*img.cols + img.rows*img.rows));
  232 + cos_t /= hist_width;
  233 + sin_t /= hist_width;
  234 +
  235 + int i, j, k, len = (radius*2+1)*(radius*2+1), histlen = (d+2)*(d+2)*(n+2);
  236 + int rows = img.rows, cols = img.cols;
  237 +
  238 + AutoBuffer<float> buf(len*6 + histlen);
  239 + float *X = buf, *Y = X + len, *Mag = Y, *Ori = Mag + len, *W = Ori + len;
  240 + float *RBin = W + len, *CBin = RBin + len, *hist = CBin + len;
  241 +
  242 + for( i = 0; i < d+2; i++ )
  243 + {
  244 + for( j = 0; j < d+2; j++ )
  245 + for( k = 0; k < n+2; k++ )
  246 + hist[(i*(d+2) + j)*(n+2) + k] = 0.;
  247 + }
  248 +
  249 + for( i = -radius, k = 0; i <= radius; i++ )
  250 + for( j = -radius; j <= radius; j++ )
  251 + {
  252 + // Calculate sample's histogram array coords rotated relative to ori.
  253 + // Subtract 0.5 so samples that fall e.g. in the center of row 1 (i.e.
  254 + // r_rot = 1.5) have full weight placed in row 1 after interpolation.
  255 + float c_rot = j * cos_t - i * sin_t;
  256 + float r_rot = j * sin_t + i * cos_t;
  257 + float rbin = r_rot + d/2 - 0.5f;
  258 + float cbin = c_rot + d/2 - 0.5f;
  259 + int r = pt.y + i, c = pt.x + j;
  260 +
  261 + if( rbin > -1 && rbin < d && cbin > -1 && cbin < d &&
  262 + r > 0 && r < rows - 1 && c > 0 && c < cols - 1 )
  263 + {
  264 + float dx = (float)(img.at<sift_wt>(r, c+1) - img.at<sift_wt>(r, c-1));
  265 + float dy = (float)(img.at<sift_wt>(r-1, c) - img.at<sift_wt>(r+1, c));
  266 + X[k] = dx; Y[k] = dy; RBin[k] = rbin; CBin[k] = cbin;
  267 + W[k] = (c_rot * c_rot + r_rot * r_rot)*exp_scale;
  268 + k++;
  269 + }
  270 + }
  271 +
  272 + len = k;
  273 + fastAtan2(Y, X, Ori, len, true);
  274 + magnitude(X, Y, Mag, len);
  275 + exp(W, W, len);
  276 +
  277 + for( k = 0; k < len; k++ )
  278 + {
  279 + float rbin = RBin[k], cbin = CBin[k];
  280 + float obin = (Ori[k] - ori)*bins_per_rad;
  281 + float mag = Mag[k]*W[k];
  282 +
  283 + int r0 = cvFloor( rbin );
  284 + int c0 = cvFloor( cbin );
  285 + int o0 = cvFloor( obin );
  286 + rbin -= r0;
  287 + cbin -= c0;
  288 + obin -= o0;
  289 +
  290 + if( o0 < 0 )
  291 + o0 += n;
  292 + if( o0 >= n )
  293 + o0 -= n;
  294 +
  295 + // histogram update using tri-linear interpolation
  296 + float v_r1 = mag*rbin, v_r0 = mag - v_r1;
  297 + float v_rc11 = v_r1*cbin, v_rc10 = v_r1 - v_rc11;
  298 + float v_rc01 = v_r0*cbin, v_rc00 = v_r0 - v_rc01;
  299 + float v_rco111 = v_rc11*obin, v_rco110 = v_rc11 - v_rco111;
  300 + float v_rco101 = v_rc10*obin, v_rco100 = v_rc10 - v_rco101;
  301 + float v_rco011 = v_rc01*obin, v_rco010 = v_rc01 - v_rco011;
  302 + float v_rco001 = v_rc00*obin, v_rco000 = v_rc00 - v_rco001;
  303 +
  304 + int idx = ((r0+1)*(d+2) + c0+1)*(n+2) + o0;
  305 + hist[idx] += v_rco000;
  306 + hist[idx+1] += v_rco001;
  307 + hist[idx+(n+2)] += v_rco010;
  308 + hist[idx+(n+3)] += v_rco011;
  309 + hist[idx+(d+2)*(n+2)] += v_rco100;
  310 + hist[idx+(d+2)*(n+2)+1] += v_rco101;
  311 + hist[idx+(d+3)*(n+2)] += v_rco110;
  312 + hist[idx+(d+3)*(n+2)+1] += v_rco111;
  313 + }
  314 +
  315 + // finalize histogram, since the orientation histograms are circular
  316 + for( i = 0; i < d; i++ )
  317 + for( j = 0; j < d; j++ )
  318 + {
  319 + int idx = ((i+1)*(d+2) + (j+1))*(n+2);
  320 + hist[idx] += hist[idx+n];
  321 + hist[idx+1] += hist[idx+n+1];
  322 + for( k = 0; k < n; k++ )
  323 + dst[(i*d + j)*n + k] = hist[idx+k];
  324 + }
  325 + // copy histogram to the descriptor,
  326 + // apply hysteresis thresholding
  327 + // and scale the result, so that it can be easily converted
  328 + // to byte array
  329 + float nrm2 = 0;
  330 + len = d*d*n;
  331 + for( k = 0; k < len; k++ )
  332 + nrm2 += dst[k]*dst[k];
  333 + float thr = std::sqrt(nrm2)*SIFT_DESCR_MAG_THR;
  334 + for( i = 0, nrm2 = 0; i < k; i++ )
  335 + {
  336 + float val = std::min(dst[i], thr);
  337 + dst[i] = val;
  338 + nrm2 += val*val;
  339 + }
  340 + nrm2 = SIFT_INT_DESCR_FCTR/std::max(std::sqrt(nrm2), FLT_EPSILON);
  341 +
  342 +#if 1
  343 + for( k = 0; k < len; k++ )
  344 + {
  345 + dst[k] = saturate_cast<uchar>(dst[k]*nrm2);
  346 + }
  347 +#else
  348 + float nrm1 = 0;
  349 + for( k = 0; k < len; k++ )
  350 + {
  351 + dst[k] *= nrm2;
  352 + nrm1 += dst[k];
  353 + }
  354 + nrm1 = 1.f/std::max(nrm1, FLT_EPSILON);
  355 + for( k = 0; k < len; k++ )
  356 + {
  357 + dst[k] = std::sqrt(dst[k] * nrm1);//saturate_cast<uchar>(std::sqrt(dst[k] * nrm1)*SIFT_INT_DESCR_FCTR);
  358 + }
  359 +#endif
  360 +}
  361 +
  362 +static void calcDescriptors(const vector<Mat>& gpyr, const vector<KeyPoint>& keypoints,
  363 + Mat& descriptors, int nOctaveLayers, int firstOctave, int n /* bins */, int d /* width */)
  364 +{
  365 + for( size_t i = 0; i < keypoints.size(); i++ )
  366 + {
  367 + KeyPoint kpt = keypoints[i];
  368 + int octave, layer;
  369 + float scale;
  370 + unpackOctave(kpt, octave, layer, scale);
  371 + CV_Assert(octave >= firstOctave && layer <= nOctaveLayers+2);
  372 + float size=kpt.size*scale;
  373 + Point2f ptf(kpt.pt.x*scale, kpt.pt.y*scale);
  374 + const Mat& img = gpyr[(octave - firstOctave)*(nOctaveLayers + 3) + layer];
  375 +
  376 + float angle = 360.f - kpt.angle;
  377 + if(std::abs(angle - 360.f) < FLT_EPSILON)
  378 + angle = 0.f;
  379 + calcSIFTDescriptor(img, ptf, angle, size*0.5f, d, n, descriptors.ptr<float>((int)i));
  380 + }
  381 +}
  382 +
  383 +static int descriptorSize(int bins, int width)
  384 +{
  385 + return width*width*bins;
  386 +}
  387 +
  388 +static void extractSIFT(const Mat &image, vector<KeyPoint> &keypoints, Mat &descriptors, int nOctaveLayers, double sigma, int bins, int width, float initSigma)
  389 +{
  390 + if( image.empty() || image.depth() != CV_8U )
  391 + CV_Error( CV_StsBadArg, "image is empty or has incorrect depth (!=CV_8U)" );
  392 +
  393 + int firstOctave = 0, actualNOctaves = 0, actualNLayers = 0, maxOctave = INT_MIN;
  394 + for (size_t i=0; i<keypoints.size(); i++) {
  395 + int octave, layer;
  396 + float scale;
  397 + unpackOctave(keypoints[i], octave, layer, scale);
  398 + firstOctave = std::min(firstOctave, octave);
  399 + maxOctave = std::max(maxOctave, octave);
  400 + actualNLayers = std::max(actualNLayers, layer-2);
  401 + }
  402 +
  403 + firstOctave = std::min(firstOctave, 0);
  404 + CV_Assert( firstOctave >= -1 && actualNLayers <= nOctaveLayers );
  405 + actualNOctaves = maxOctave - firstOctave + 1;
  406 +
  407 + const Mat base = createInitialImage(image, firstOctave < 0, (float)sigma, initSigma);
  408 + const int nOctaves = actualNOctaves > 0 ? actualNOctaves : cvRound(log( (double)std::min( base.cols, base.rows ) ) / log(2.) - 2) - firstOctave;
  409 +
  410 + vector<Mat> gpyr, dogpyr;
  411 + buildGaussianPyramid(base, gpyr, nOctaves, nOctaveLayers, sigma);
  412 + buildDoGPyramid(gpyr, dogpyr, nOctaveLayers);
  413 +
  414 + descriptors.create((int)keypoints.size(), descriptorSize(bins, width), CV_32F);
  415 + calcDescriptors(gpyr, keypoints, descriptors, nOctaveLayers, firstOctave, bins, width);
  416 +}
  417 +
  418 +#include "openbr/plugins/openbr_internal.h"
  419 +
  420 +namespace br
  421 +{
  422 +
  423 +/*!
  424 + * \ingroup transforms
  425 + * \brief Specialize wrapper OpenCV SIFT wrapper
  426 + * \author Josh Klontz \cite jklontz
  427 + */
  428 +class CustomSIFTTransform : public UntrainableTransform
  429 +{
  430 + Q_OBJECT
  431 + Q_PROPERTY(int size READ get_size WRITE set_size RESET reset_size STORED false)
  432 + Q_PROPERTY(QList<int> sizes READ get_sizes WRITE set_sizes RESET reset_sizes STORED false)
  433 + Q_PROPERTY(int bins READ get_bins WRITE set_bins RESET reset_bins STORED false)
  434 + Q_PROPERTY(int width READ get_width WRITE set_width RESET reset_width STORED false)
  435 + Q_PROPERTY(float initSigma READ get_initSigma WRITE set_initSigma RESET reset_initSigma STORED false)
  436 + BR_PROPERTY(int, size, 1)
  437 + BR_PROPERTY(QList<int>, sizes, QList<int>())
  438 + BR_PROPERTY(int, bins, 8)
  439 + BR_PROPERTY(int, width, 4)
  440 + BR_PROPERTY(float, initSigma, 0.5f)
  441 +
  442 + void init()
  443 + {
  444 + if (sizes.empty())
  445 + sizes.append(size);
  446 + }
  447 +
  448 + void project(const Template &src, Template &dst) const
  449 + {
  450 + std::vector<KeyPoint> keyPoints;
  451 + foreach (const QPointF &val, src.file.points())
  452 + foreach (const int sz, sizes)
  453 + keyPoints.push_back(KeyPoint(val.x(), val.y(), sz));
  454 +
  455 + Mat m;
  456 + extractSIFT(src, keyPoints, m, 3, 1.6f, bins, width, initSigma);
  457 + m.setTo(0, m<0); // SIFT returns large negative values when it goes off the edge of the image.
  458 + dst += m;
  459 + }
  460 +};
  461 +
  462 +BR_REGISTER(Transform, CustomSIFTTransform)
  463 +
  464 +}
  465 +
  466 +#include "custom_sift.moc"