// hat.cpp: Histogram Array Transform descriptors // // Rob Hess' opensift implementation was used a reference: // http://blogs.oregonstate.edu/hess/code/sift // // Copyright (C) 2005-2013, Stephen Milborrow #include "stasm.h" namespace stasm { static const int GRIDHEIGHT = 4; // 4 x 5 grid of histograms in descriptor static const int GRIDWIDTH = 5; static const int BINS_PER_HIST = 8; // 8 gives a 45 degree range per bin static const double WINDOW_SIGMA = .5; // gaussian window as frac of patch width // .5 implies patch border downweight is .368 static const double FINAL_SCALE = 10; // arb but 10 is good for %g printing of descriptors // Get gradient magnitude and orientation of pixels in given img. // We use a [1,-1] convolution mask rather than [1,0,-1] because it gives as good // Stasm results and doesn't "waste" pixels on the left and top image boundary. // Before scaling by bins_per_degree, orientations are from 0 to 359.99... // degrees, with 0 being due east, and anticlockwise increasing. static void InitGradMagAndOrientMats( MAT& magmat, // out: grad mag mat MAT& orientmat, // out: grad ori mat const Image& img) // in: ROI scaled to current pyramid level { const int nrows = img.rows, nrows1 = img.rows-1; const int ncols = img.cols, ncols1 = img.cols-1; const double bins_per_degree = BINS_PER_HIST / 360.; magmat.create(nrows, ncols); orientmat.create(nrows, ncols); for (int y = 0; y < nrows1; y++) { const byte* const buf = (byte*)(img.data) + y * ncols; const byte* const buf_x1 = (byte*)(img.data) + y * ncols + 1; const byte* const buf_y1 = (byte*)(img.data) + (y+1) * ncols; double* const magbuf = Buf(magmat) + y * ncols; double* const orientbuf = Buf(orientmat) + y * ncols; for (int x = 0; x < ncols1; x++) { const byte pixel = buf[x]; const double xdelta = buf_x1[x] - pixel; const double ydelta = buf_y1[x] - pixel; magbuf[x] = sqrt(SQ(xdelta) + SQ(ydelta)); double orient = RadsToDegrees(atan2(ydelta, xdelta)); // -180 <= orient < 180 if (orient < 0) orient += 360; // 0 <= orient < 360 orientbuf[x] = orient * bins_per_degree; // 0 <= orient < BINS_PER_HIST } } // fill bottom and right edges magmat.row(nrows1) = 0; magmat.col(ncols1) = 0; orientmat.row(nrows1) = 0; orientmat.col(ncols1) = 0; } // Init the indices which map a patch row,col to the corresponding // histogram grid row,col. The mapping depends only on the image // patchwidth and the histogram GRIDHEIGHT and WIDTH. // // The first pixel in the image patch maps to histogram grid x coord -0.5. // Therefore after TrilinearAccumulate, the pixel will be equally smeared // across histogram bin -1 and histogram bin 0. The histogram row indices // for this pixel are irow=-1 row_frac=0.5. static inline void InitIndices( vec_int& row_indices, // out vec_double& row_fracs, // out vec_int& col_indices, // out vec_double& col_fracs, // out vec_double& pixelweights, // out const int patchwidth) // in: in pixels { CV_Assert(patchwidth % 2 == 1); // patchwidth must be odd in this implementation const int npix = SQ(patchwidth); // number of pixels in image patch row_indices.resize(npix); row_fracs.resize(npix); col_indices.resize(npix); col_fracs.resize(npix); pixelweights.resize(npix); const int halfpatchwidth = (patchwidth-1) / 2; const double grid_rows_per_img_row = GRIDHEIGHT / (patchwidth-1.); const double row_offset = GRIDHEIGHT / 2. - .5; // see header comment const double grid_cols_per_img_col = GRIDWIDTH / (patchwidth-1.); const double col_offset = GRIDWIDTH / 2. - .5; // downweight at border of patch is exp(-1 / (2 * WINDOW_SIGMA)) const double weight = -1 / (WINDOW_SIGMA * GRIDHEIGHT * GRIDWIDTH ); int ipix = 0; for (double patchrow = -halfpatchwidth; patchrow <= halfpatchwidth; patchrow++) { const double signed_row = patchrow * grid_rows_per_img_row; const double row = signed_row + row_offset; const int irow = int(floor(row)); const double row_frac = row - irow; CV_DbgAssert(row >= -.5 && row <= GRIDHEIGHT - .5); // same applies to col below for (double patchcol = -halfpatchwidth; patchcol <= halfpatchwidth; patchcol++) { row_indices[ipix] = irow; row_fracs[ipix] = row_frac; const double signed_col = patchcol * grid_cols_per_img_col; const double col = signed_col + col_offset; const int icol = int(floor(col)); col_indices[ipix] = icol; col_fracs[ipix] = col - icol; pixelweights[ipix] = // TODO this weights col and row offsets equally exp(weight * (SQ(signed_row) + SQ(signed_col))); ipix++; } } } // Init the data that doesn't change unless the image, patch width, or // GRIDHEIGHT or WIDTH changes (i.e. for Stasm this must be called // once per pyramid lev). void Hat::Init_( const Image& img, // in: image scaled to current pyramid level const int patchwidth) // in: patch will be patchwidth x patchwidth pixels { patchwidth_ = patchwidth; InitGradMagAndOrientMats(magmat_, orientmat_, img); InitIndices(row_indices_, row_fracs_, col_indices_, col_fracs_, pixelweights_, patchwidth_); } // Calculate the image patch gradient mags and orients. // Note that the mag for a pixel out of the image boundaries is set // to 0 and thus contributes nothing later in TrilinearAccumulate. static void GetMagsAndOrients_GeneralCase( vec_double& mags, // out vec_double& orients, // out const int ix, // in: x coord of center of patch const int iy, // in: y coord of center of patch const int patchwidth, // in const MAT& magmat, // in const MAT& orientmat, // in const vec_double& pixelweights) // in { const int halfpatchwidth = (patchwidth-1) / 2; int ipix = 0; for (int x = iy - halfpatchwidth; x <= iy + halfpatchwidth; x++) { const double* const magbuf = Buf(magmat) + x * magmat.cols; const double* const orientbuf = Buf(orientmat) + x * orientmat.cols; for (int y = ix - halfpatchwidth; y <= ix + halfpatchwidth; y++) { if (x < 0 || x >= magmat.rows || y < 0 || y >= magmat.cols) { mags[ipix] = 0; // off image orients[ipix] = 0; } else // in image { mags[ipix] = pixelweights[ipix] * magbuf[y]; orients[ipix] = orientbuf[y]; } ipix++; } } CV_DbgAssert(ipix == NSIZE(mags)); } // Calculate the image patch gradient mags and orients for // an image patch that is entirely in the image boundaries. static inline void GetMagsAndOrients_AllInImg( vec_double& mags, // out vec_double& orients, // out const int ix, // in: x coord of center of patch const int iy, // in: y coord of center of patch const int patchwidth, // in const MAT& magmat, // in const MAT& orientmat, // in const vec_double& pixelweights) // in { const int halfpatchwidth = (patchwidth-1) / 2; int ipix = 0; for (int x = iy - halfpatchwidth; x <= iy + halfpatchwidth; x++) { const double* const magbuf = Buf(magmat) + x * magmat.cols; const double* const orientbuf = Buf(orientmat) + x * orientmat.cols; for (int y = ix - halfpatchwidth; y <= ix + halfpatchwidth; y++) { mags[ipix] = pixelweights[ipix] * magbuf[y]; orients[ipix] = orientbuf[y]; ipix++; } } CV_DbgAssert(ipix == NSIZE(mags)); } void GetMagsAndOrients( // get mags and orients for patch at ix,iy vec_double& mags, // out vec_double& orients, // out const int ix, // in: x coord of center of patch (may be off image) const int iy, // in: y coord of center of patch (may be off image) const int patchwidth, // in: in pixels const MAT& magmat, // in const MAT& orientmat, // in const vec_double& pixelweights) // in { CV_Assert(patchwidth % 2 == 1); // patchwidth must be odd in this implementation const int npix = SQ(patchwidth); // number of pixels in image patch const int halfpatchwidth = (patchwidth-1) / 2; mags.resize(npix); orients.resize(npix); if (ix - halfpatchwidth < 0 || ix + halfpatchwidth >= magmat.cols || iy - halfpatchwidth < 0 || iy + halfpatchwidth >= magmat.rows) { // Part or all of the patch is out the image area. GetMagsAndOrients_GeneralCase(mags, orients, ix, iy, patchwidth, magmat, orientmat, pixelweights); } else { // Patch is entirely in the image area. The following function returns // results identical to GetMagsAndOrients_GeneralCase, but is faster // because it doesn't have to worry about the edges of the image. GetMagsAndOrients_AllInImg(mags, orients, ix, iy, patchwidth, magmat, orientmat, pixelweights); } } // Apportion the gradient magnitude of a pixel across 8 orientation bins. // "Accumulate" is in the func name because we "+=" the interpolated values. // This routine needs to be fast. static inline void TrilinearAccumulate( double& b000, double& b001, // io: histogram bins double& b010, double& b011, // io double& b100, double& b101, // io double& b110, double& b111, // io const double mag, // in: the mag that gets apportioned const double rowfrac, // in const double colfrac, // in const double orientfrac) // in { const double a1 = mag * rowfrac, a0 = mag - a1, a11 = a1 * colfrac, a10 = a1 - a11, a01 = a0 * colfrac, a00 = a0 - a01, a111 = a11 * orientfrac, a101 = a10 * orientfrac, a011 = a01 * orientfrac, a001 = a00 * orientfrac; b000 += a00 - a001; b001 += a001; b010 += a01 - a011; b011 += a011; b100 += a10 - a101; b101 += a101; b110 += a11 - a111; b111 += a111; } // The dimension of histbins is 1+GRIDHEIGHT+1 by 1+GRIDWIDTH+1 by BINS_PER_HIST+1. // The extra bins are for fast trilinear accumulation (boundary checks unneeded). // The final bin in each histogram is for degrees greater than 360, needed as // degrees less than but near 360 get smeared out by trilinear interpolation. static inline int HistIndex(int row, int col, int iorient) // index into histbins { return ((row+1) * (1+GRIDWIDTH+1) + (col+1)) * (BINS_PER_HIST+1) + iorient; } void GetHistograms( // get all histogram bins vec_double& histbins, // out const int patchwidth, // in: in pixels const vec_double& mags, // in const vec_double& orients, // in const vec_int& row_indices, // in const vec_double& row_fracs, // in const vec_int& col_indices, // in const vec_double& col_fracs) // in { const int npix = SQ(patchwidth); // number of pixels in image patch const int nhistbins = (1 + GRIDHEIGHT + 1) * (1 + GRIDWIDTH + 1) * (BINS_PER_HIST + 1); // resize and clear (the fill is needed if the size of histbins // doesn't change, because in that case resize does nothing) histbins.resize(nhistbins); fill(histbins.begin(), histbins.end(), 0.); for (int ipix = 0; ipix < npix; ipix++) { const double orient = orients[ipix]; const int iorient = int(floor(orient)); CV_DbgAssert(iorient >= 0 && iorient < BINS_PER_HIST); const int ibin = HistIndex(row_indices[ipix], col_indices[ipix], iorient); double* const p = &histbins[ibin]; TrilinearAccumulate( // apportion grad mag across eight orientation bins p[0], // ThisOrient p[1], // NextOrient p[BINS_PER_HIST + 1], // NextCol ThisOrient p[BINS_PER_HIST + 2], // NextCol NextOrient p[(GRIDWIDTH+2) * (BINS_PER_HIST+1)], // NextRow ThisOrient p[(GRIDWIDTH+2) * (BINS_PER_HIST+1) + 1], // NextRow NextOrient p[(GRIDWIDTH+3) * (BINS_PER_HIST+1)], // NextRow NextCol ThisOrient p[(GRIDWIDTH+3) * (BINS_PER_HIST+1) + 1], // NextRow NextCol NextOrient mags[ipix], // the mag that gets apportioned row_fracs[ipix], // rowfrac col_fracs[ipix], // colfrac orient - iorient); // orientfrac } } static void WrapHistograms( vec_double& histbins) { for (int row = 0; row < GRIDHEIGHT; row++) for (int col = 0; col < GRIDWIDTH; col++) { const int ibin = HistIndex(row, col, 0); histbins[ibin] += histbins[ibin + BINS_PER_HIST]; // 360 wraps to 0 } } static void CopyHistsToDesc( // copy histograms to descriptor, skipping pad bins VEC& desc, // out const vec_double& histbins) // in { for (int row = 0; row < GRIDHEIGHT; row++) for (int col = 0; col < GRIDWIDTH; col++) memcpy(Buf(desc) + (row * GRIDWIDTH + col) * BINS_PER_HIST, &histbins[HistIndex(row, col, 0)], BINS_PER_HIST * sizeof(histbins[0])); } static void NormalizeDesc( // take sqrt of elems and divide by L2 norm VEC& desc) // io { double* const data = Buf(desc); for (int i = 0; i < NSIZE(desc); i++) data[i] = sqrt(data[i]); // sqrt reduces effect of outliers const double norm = cv::norm(desc); // L2 norm if (!IsZero(norm)) { const double scale = FINAL_SCALE / norm; for (int i = 0; i < NSIZE(desc); i++) data[i] *= scale; } } // Hat::Init_ must be called before calling this function. // // A HAT descriptor is a vector of doubles of length // GRIDHEIGHT * GRIDWIDTH * BINS_PER_HIST (currently 4 * 5 * 8 = 160). // // The descriptor is a vector of doubles (instead of say bytes) primarily // so the HatFit function we apply later is fast (because byte-to-double // type conversions are unneeded when applying the formula). // // Note also that a trial implementation that used floats instead of // doubles (and with a float form of HatFit) was slower. VEC Hat::Desc_( // return HAT descriptor, Init_ must be called first const double x, // in: x coord of center of patch (may be off image) const double y) // in: y coord of center of patch (may be off image) const { CV_Assert(magmat_.rows); // verify that Hat::Init_ was called #if _OPENMP // can't be static because multiple instances vec_double mags, orients; vec_double histbins; #else // static faster since size rarely changes vec_double mags, orients; // the image patch grad mags and orientations vec_double histbins; // the histograms #endif GetMagsAndOrients(mags, orients, cvRound(x), cvRound(y), patchwidth_, magmat_, orientmat_, pixelweights_); GetHistograms(histbins, patchwidth_, mags, orients, row_indices_, row_fracs_, col_indices_, col_fracs_); WrapHistograms(histbins); // wrap 360 degrees back to 0 VEC desc(GRIDHEIGHT * GRIDWIDTH * BINS_PER_HIST, 1); // the HAT descriptor CopyHistsToDesc(desc, histbins); NormalizeDesc(desc); return desc; } } // namespace stasm