From 59574f5a6d8400be03e80601e293f26b71b6072a Mon Sep 17 00:00:00 2001 From: Josh Klontz Date: Tue, 10 Mar 2015 10:23:16 -0400 Subject: [PATCH] added CustomSIFTTransform --- openbr/plugins/imgproc/custom_sift.cpp | 466 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 466 insertions(+), 0 deletions(-) create mode 100644 openbr/plugins/imgproc/custom_sift.cpp diff --git a/openbr/plugins/imgproc/custom_sift.cpp b/openbr/plugins/imgproc/custom_sift.cpp new file mode 100644 index 0000000..b508120 --- /dev/null +++ b/openbr/plugins/imgproc/custom_sift.cpp @@ -0,0 +1,466 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +/**********************************************************************************************\ + Implementation of SIFT is based on the code from http://blogs.oregonstate.edu/hess/code/sift/ + Below is the original copyright. + +// Copyright (c) 2006-2010, Rob Hess +// All rights reserved. + +// The following patent has been issued for methods embodied in this +// software: "Method and apparatus for identifying scale invariant features +// in an image and use of same for locating an object in an image," David +// G. Lowe, US Patent 6,711,293 (March 23, 2004). Provisional application +// filed March 8, 1999. Asignee: The University of British Columbia. For +// further details, contact David Lowe (lowe@cs.ubc.ca) or the +// University-Industry Liaison Office of the University of British +// Columbia. + +// Note that restrictions imposed by this patent (and possibly others) +// exist independently of and may be in conflict with the freedoms granted +// in this license, which refers to copyright of the program, not patents +// for any methods that it implements. Both copyright and patent law must +// be obeyed to legally use and redistribute this program and it is not the +// purpose of this license to induce you to infringe any patents or other +// property right claims or to contest validity of any such claims. If you +// redistribute or use the program, then this license merely protects you +// from committing copyright infringement. It does not protect you from +// committing patent infringement. So, before you do anything with this +// program, make sure that you have permission to do so not merely in terms +// of copyright, but also in terms of patent law. + +// Please note that this license is not to be understood as a guarantee +// either. If you use the program according to this license, but in +// conflict with patent law, it does not mean that the licensor will refund +// you for any losses that you incur if you are sued for your patent +// infringement. + +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// * Redistributions of source code must retain the above copyright and +// patent notices, this list of conditions and the following +// disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in +// the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Oregon State University nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. + +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +\**********************************************************************************************/ + +#include +#include +#include +#include + +using namespace cv; + +/******************************* Defs and macros *****************************/ + +// determines the size of a single descriptor orientation histogram +static const float SIFT_DESCR_SCL_FCTR = 3.f; + +// threshold on magnitude of elements of descriptor vector +static const float SIFT_DESCR_MAG_THR = 0.2f; + +// factor used to convert floating-point descriptor to unsigned char +static const float SIFT_INT_DESCR_FCTR = 512.f; + +// intermediate type used for DoG pyramids +typedef short sift_wt; +static const int SIFT_FIXPT_SCALE = 48; + +static inline void unpackOctave(const KeyPoint &kpt, int &octave, int &layer, float &scale) +{ + octave = kpt.octave & 255; + layer = (kpt.octave >> 8) & 255; + octave = octave < 128 ? octave : (-128 | octave); + scale = octave >= 0 ? 1.f/(1 << octave) : (float)(1 << -octave); +} + +static Mat createInitialImage( const Mat& img, bool doubleImageSize, float sigma, float initSigma ) +{ + Mat gray, gray_fpt; + if( img.channels() == 3 || img.channels() == 4 ) + cvtColor(img, gray, COLOR_BGR2GRAY); + else + img.copyTo(gray); + gray.convertTo(gray_fpt, DataType::type, SIFT_FIXPT_SCALE, 0); + + float sig_diff; + + if( doubleImageSize ) + { + sig_diff = sqrtf( std::max(sigma * sigma - initSigma * initSigma * 4, 0.01f) ); + Mat dbl; + resize(gray_fpt, dbl, Size(gray.cols*2, gray.rows*2), 0, 0, INTER_LINEAR); + GaussianBlur(dbl, dbl, Size(), sig_diff, sig_diff); + return dbl; + } + else + { + sig_diff = sqrtf( std::max(sigma * sigma - initSigma * initSigma, 0.01f) ); + GaussianBlur(gray_fpt, gray_fpt, Size(), sig_diff, sig_diff); + return gray_fpt; + } +} + + +static void buildGaussianPyramid( const Mat& base, vector& pyr, int nOctaves, int nOctaveLayers, double sigma ) +{ + vector sig(nOctaveLayers + 3); + pyr.resize(nOctaves*(nOctaveLayers + 3)); + + // precompute Gaussian sigmas using the following formula: + // \sigma_{total}^2 = \sigma_{i}^2 + \sigma_{i-1}^2 + sig[0] = sigma; + double k = pow( 2., 1. / nOctaveLayers ); + for( int i = 1; i < nOctaveLayers + 3; i++ ) + { + double sig_prev = pow(k, (double)(i-1))*sigma; + double sig_total = sig_prev*k; + sig[i] = std::sqrt(sig_total*sig_total - sig_prev*sig_prev); + } + + for( int o = 0; o < nOctaves; o++ ) + { + for( int i = 0; i < nOctaveLayers + 3; i++ ) + { + Mat& dst = pyr[o*(nOctaveLayers + 3) + i]; + if( o == 0 && i == 0 ) + dst = base; + // base of new octave is halved image from end of previous octave + else if( i == 0 ) + { + const Mat& src = pyr[(o-1)*(nOctaveLayers + 3) + nOctaveLayers]; + resize(src, dst, Size(src.cols/2, src.rows/2), + 0, 0, INTER_NEAREST); + } + else + { + const Mat& src = pyr[o*(nOctaveLayers + 3) + i-1]; + GaussianBlur(src, dst, Size(), sig[i], sig[i]); + } + } + } +} + + +static void buildDoGPyramid( const vector& gpyr, vector& dogpyr, int nOctaveLayers ) +{ + int nOctaves = (int)gpyr.size()/(nOctaveLayers + 3); + dogpyr.resize( nOctaves*(nOctaveLayers + 2) ); + + for( int o = 0; o < nOctaves; o++ ) + { + for( int i = 0; i < nOctaveLayers + 2; i++ ) + { + const Mat& src1 = gpyr[o*(nOctaveLayers + 3) + i]; + const Mat& src2 = gpyr[o*(nOctaveLayers + 3) + i + 1]; + Mat& dst = dogpyr[o*(nOctaveLayers + 2) + i]; + subtract(src2, src1, dst, noArray(), DataType::type); + } + } +} + +static void calcSIFTDescriptor( const Mat& img, Point2f ptf, float ori, float scl, + int d, int n, float* dst ) +{ + Point pt(cvRound(ptf.x), cvRound(ptf.y)); + float cos_t = cosf(ori*(float)(CV_PI/180)); + float sin_t = sinf(ori*(float)(CV_PI/180)); + float bins_per_rad = n / 360.f; + float exp_scale = -1.f/(d * d * 0.5f); + float hist_width = SIFT_DESCR_SCL_FCTR * scl; + int radius = cvRound(hist_width * 1.4142135623730951f * (d + 1) * 0.5f); + // Clip the radius to the diagonal of the image to avoid autobuffer too large exception + radius = std::min(radius, (int) sqrt((double) img.cols*img.cols + img.rows*img.rows)); + cos_t /= hist_width; + sin_t /= hist_width; + + int i, j, k, len = (radius*2+1)*(radius*2+1), histlen = (d+2)*(d+2)*(n+2); + int rows = img.rows, cols = img.cols; + + AutoBuffer buf(len*6 + histlen); + float *X = buf, *Y = X + len, *Mag = Y, *Ori = Mag + len, *W = Ori + len; + float *RBin = W + len, *CBin = RBin + len, *hist = CBin + len; + + for( i = 0; i < d+2; i++ ) + { + for( j = 0; j < d+2; j++ ) + for( k = 0; k < n+2; k++ ) + hist[(i*(d+2) + j)*(n+2) + k] = 0.; + } + + for( i = -radius, k = 0; i <= radius; i++ ) + for( j = -radius; j <= radius; j++ ) + { + // Calculate sample's histogram array coords rotated relative to ori. + // Subtract 0.5 so samples that fall e.g. in the center of row 1 (i.e. + // r_rot = 1.5) have full weight placed in row 1 after interpolation. + float c_rot = j * cos_t - i * sin_t; + float r_rot = j * sin_t + i * cos_t; + float rbin = r_rot + d/2 - 0.5f; + float cbin = c_rot + d/2 - 0.5f; + int r = pt.y + i, c = pt.x + j; + + if( rbin > -1 && rbin < d && cbin > -1 && cbin < d && + r > 0 && r < rows - 1 && c > 0 && c < cols - 1 ) + { + float dx = (float)(img.at(r, c+1) - img.at(r, c-1)); + float dy = (float)(img.at(r-1, c) - img.at(r+1, c)); + X[k] = dx; Y[k] = dy; RBin[k] = rbin; CBin[k] = cbin; + W[k] = (c_rot * c_rot + r_rot * r_rot)*exp_scale; + k++; + } + } + + len = k; + fastAtan2(Y, X, Ori, len, true); + magnitude(X, Y, Mag, len); + exp(W, W, len); + + for( k = 0; k < len; k++ ) + { + float rbin = RBin[k], cbin = CBin[k]; + float obin = (Ori[k] - ori)*bins_per_rad; + float mag = Mag[k]*W[k]; + + int r0 = cvFloor( rbin ); + int c0 = cvFloor( cbin ); + int o0 = cvFloor( obin ); + rbin -= r0; + cbin -= c0; + obin -= o0; + + if( o0 < 0 ) + o0 += n; + if( o0 >= n ) + o0 -= n; + + // histogram update using tri-linear interpolation + float v_r1 = mag*rbin, v_r0 = mag - v_r1; + float v_rc11 = v_r1*cbin, v_rc10 = v_r1 - v_rc11; + float v_rc01 = v_r0*cbin, v_rc00 = v_r0 - v_rc01; + float v_rco111 = v_rc11*obin, v_rco110 = v_rc11 - v_rco111; + float v_rco101 = v_rc10*obin, v_rco100 = v_rc10 - v_rco101; + float v_rco011 = v_rc01*obin, v_rco010 = v_rc01 - v_rco011; + float v_rco001 = v_rc00*obin, v_rco000 = v_rc00 - v_rco001; + + int idx = ((r0+1)*(d+2) + c0+1)*(n+2) + o0; + hist[idx] += v_rco000; + hist[idx+1] += v_rco001; + hist[idx+(n+2)] += v_rco010; + hist[idx+(n+3)] += v_rco011; + hist[idx+(d+2)*(n+2)] += v_rco100; + hist[idx+(d+2)*(n+2)+1] += v_rco101; + hist[idx+(d+3)*(n+2)] += v_rco110; + hist[idx+(d+3)*(n+2)+1] += v_rco111; + } + + // finalize histogram, since the orientation histograms are circular + for( i = 0; i < d; i++ ) + for( j = 0; j < d; j++ ) + { + int idx = ((i+1)*(d+2) + (j+1))*(n+2); + hist[idx] += hist[idx+n]; + hist[idx+1] += hist[idx+n+1]; + for( k = 0; k < n; k++ ) + dst[(i*d + j)*n + k] = hist[idx+k]; + } + // copy histogram to the descriptor, + // apply hysteresis thresholding + // and scale the result, so that it can be easily converted + // to byte array + float nrm2 = 0; + len = d*d*n; + for( k = 0; k < len; k++ ) + nrm2 += dst[k]*dst[k]; + float thr = std::sqrt(nrm2)*SIFT_DESCR_MAG_THR; + for( i = 0, nrm2 = 0; i < k; i++ ) + { + float val = std::min(dst[i], thr); + dst[i] = val; + nrm2 += val*val; + } + nrm2 = SIFT_INT_DESCR_FCTR/std::max(std::sqrt(nrm2), FLT_EPSILON); + +#if 1 + for( k = 0; k < len; k++ ) + { + dst[k] = saturate_cast(dst[k]*nrm2); + } +#else + float nrm1 = 0; + for( k = 0; k < len; k++ ) + { + dst[k] *= nrm2; + nrm1 += dst[k]; + } + nrm1 = 1.f/std::max(nrm1, FLT_EPSILON); + for( k = 0; k < len; k++ ) + { + dst[k] = std::sqrt(dst[k] * nrm1);//saturate_cast(std::sqrt(dst[k] * nrm1)*SIFT_INT_DESCR_FCTR); + } +#endif +} + +static void calcDescriptors(const vector& gpyr, const vector& keypoints, + Mat& descriptors, int nOctaveLayers, int firstOctave, int n /* bins */, int d /* width */) +{ + for( size_t i = 0; i < keypoints.size(); i++ ) + { + KeyPoint kpt = keypoints[i]; + int octave, layer; + float scale; + unpackOctave(kpt, octave, layer, scale); + CV_Assert(octave >= firstOctave && layer <= nOctaveLayers+2); + float size=kpt.size*scale; + Point2f ptf(kpt.pt.x*scale, kpt.pt.y*scale); + const Mat& img = gpyr[(octave - firstOctave)*(nOctaveLayers + 3) + layer]; + + float angle = 360.f - kpt.angle; + if(std::abs(angle - 360.f) < FLT_EPSILON) + angle = 0.f; + calcSIFTDescriptor(img, ptf, angle, size*0.5f, d, n, descriptors.ptr((int)i)); + } +} + +static int descriptorSize(int bins, int width) +{ + return width*width*bins; +} + +static void extractSIFT(const Mat &image, vector &keypoints, Mat &descriptors, int nOctaveLayers, double sigma, int bins, int width, float initSigma) +{ + if( image.empty() || image.depth() != CV_8U ) + CV_Error( CV_StsBadArg, "image is empty or has incorrect depth (!=CV_8U)" ); + + int firstOctave = 0, actualNOctaves = 0, actualNLayers = 0, maxOctave = INT_MIN; + for (size_t i=0; i= -1 && actualNLayers <= nOctaveLayers ); + actualNOctaves = maxOctave - firstOctave + 1; + + const Mat base = createInitialImage(image, firstOctave < 0, (float)sigma, initSigma); + const int nOctaves = actualNOctaves > 0 ? actualNOctaves : cvRound(log( (double)std::min( base.cols, base.rows ) ) / log(2.) - 2) - firstOctave; + + vector gpyr, dogpyr; + buildGaussianPyramid(base, gpyr, nOctaves, nOctaveLayers, sigma); + buildDoGPyramid(gpyr, dogpyr, nOctaveLayers); + + descriptors.create((int)keypoints.size(), descriptorSize(bins, width), CV_32F); + calcDescriptors(gpyr, keypoints, descriptors, nOctaveLayers, firstOctave, bins, width); +} + +#include "openbr/plugins/openbr_internal.h" + +namespace br +{ + +/*! + * \ingroup transforms + * \brief Specialize wrapper OpenCV SIFT wrapper + * \author Josh Klontz \cite jklontz + */ +class CustomSIFTTransform : public UntrainableTransform +{ + Q_OBJECT + Q_PROPERTY(int size READ get_size WRITE set_size RESET reset_size STORED false) + Q_PROPERTY(QList sizes READ get_sizes WRITE set_sizes RESET reset_sizes STORED false) + Q_PROPERTY(int bins READ get_bins WRITE set_bins RESET reset_bins STORED false) + Q_PROPERTY(int width READ get_width WRITE set_width RESET reset_width STORED false) + Q_PROPERTY(float initSigma READ get_initSigma WRITE set_initSigma RESET reset_initSigma STORED false) + BR_PROPERTY(int, size, 1) + BR_PROPERTY(QList, sizes, QList()) + BR_PROPERTY(int, bins, 8) + BR_PROPERTY(int, width, 4) + BR_PROPERTY(float, initSigma, 0.5f) + + void init() + { + if (sizes.empty()) + sizes.append(size); + } + + void project(const Template &src, Template &dst) const + { + std::vector keyPoints; + foreach (const QPointF &val, src.file.points()) + foreach (const int sz, sizes) + keyPoints.push_back(KeyPoint(val.x(), val.y(), sz)); + + Mat m; + extractSIFT(src, keyPoints, m, 3, 1.6f, bins, width, initSigma); + m.setTo(0, m<0); // SIFT returns large negative values when it goes off the edge of the image. + dst += m; + } +}; + +BR_REGISTER(Transform, CustomSIFTTransform) + +} + +#include "custom_sift.moc" -- libgit2 0.21.4