hat.cpp
16 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
// hat.cpp: Histogram Array Transform descriptors
//
// Rob Hess' opensift implementation was used a reference:
// http://blogs.oregonstate.edu/hess/code/sift
//
// Copyright (C) 2005-2013, Stephen Milborrow
#include "stasm.h"
namespace stasm
{
static const int GRIDHEIGHT = 4; // 4 x 5 grid of histograms in descriptor
static const int GRIDWIDTH = 5;
static const int BINS_PER_HIST = 8; // 8 gives a 45 degree range per bin
static const double WINDOW_SIGMA = .5; // gaussian window as frac of patch width
// .5 implies patch border downweight is .368
static const double FINAL_SCALE = 10; // arb but 10 is good for %g printing of descriptors
// Get gradient magnitude and orientation of pixels in given img.
// We use a [1,-1] convolution mask rather than [1,0,-1] because it gives as good
// Stasm results and doesn't "waste" pixels on the left and top image boundary.
// Before scaling by bins_per_degree, orientations are from 0 to 359.99...
// degrees, with 0 being due east, and anticlockwise increasing.
static void InitGradMagAndOrientMats(
MAT& magmat, // out: grad mag mat
MAT& orientmat, // out: grad ori mat
const Image& img) // in: ROI scaled to current pyramid level
{
const int nrows = img.rows, nrows1 = img.rows-1;
const int ncols = img.cols, ncols1 = img.cols-1;
const double bins_per_degree = BINS_PER_HIST / 360.;
magmat.create(nrows, ncols);
orientmat.create(nrows, ncols);
for (int y = 0; y < nrows1; y++)
{
const byte* const buf = (byte*)(img.data) + y * ncols;
const byte* const buf_x1 = (byte*)(img.data) + y * ncols + 1;
const byte* const buf_y1 = (byte*)(img.data) + (y+1) * ncols;
double* const magbuf = Buf(magmat) + y * ncols;
double* const orientbuf = Buf(orientmat) + y * ncols;
for (int x = 0; x < ncols1; x++)
{
const byte pixel = buf[x];
const double xdelta = buf_x1[x] - pixel;
const double ydelta = buf_y1[x] - pixel;
magbuf[x] = sqrt(SQ(xdelta) + SQ(ydelta));
double orient =
RadsToDegrees(atan2(ydelta, xdelta)); // -180 <= orient < 180
if (orient < 0)
orient += 360; // 0 <= orient < 360
orientbuf[x] = orient * bins_per_degree; // 0 <= orient < BINS_PER_HIST
}
}
// fill bottom and right edges
magmat.row(nrows1) = 0;
magmat.col(ncols1) = 0;
orientmat.row(nrows1) = 0;
orientmat.col(ncols1) = 0;
}
// Init the indices which map a patch row,col to the corresponding
// histogram grid row,col. The mapping depends only on the image
// patchwidth and the histogram GRIDHEIGHT and WIDTH.
//
// The first pixel in the image patch maps to histogram grid x coord -0.5.
// Therefore after TrilinearAccumulate, the pixel will be equally smeared
// across histogram bin -1 and histogram bin 0. The histogram row indices
// for this pixel are irow=-1 row_frac=0.5.
static inline void InitIndices(
vec_int& row_indices, // out
vec_double& row_fracs, // out
vec_int& col_indices, // out
vec_double& col_fracs, // out
vec_double& pixelweights, // out
const int patchwidth) // in: in pixels
{
CV_Assert(patchwidth % 2 == 1); // patchwidth must be odd in this implementation
const int npix = SQ(patchwidth); // number of pixels in image patch
row_indices.resize(npix);
row_fracs.resize(npix);
col_indices.resize(npix);
col_fracs.resize(npix);
pixelweights.resize(npix);
const int halfpatchwidth = (patchwidth-1) / 2;
const double grid_rows_per_img_row = GRIDHEIGHT / (patchwidth-1.);
const double row_offset = GRIDHEIGHT / 2. - .5; // see header comment
const double grid_cols_per_img_col = GRIDWIDTH / (patchwidth-1.);
const double col_offset = GRIDWIDTH / 2. - .5;
// downweight at border of patch is exp(-1 / (2 * WINDOW_SIGMA))
const double weight = -1 / (WINDOW_SIGMA * GRIDHEIGHT * GRIDWIDTH );
int ipix = 0;
for (double patchrow = -halfpatchwidth; patchrow <= halfpatchwidth; patchrow++)
{
const double signed_row = patchrow * grid_rows_per_img_row;
const double row = signed_row + row_offset;
const int irow = int(floor(row));
const double row_frac = row - irow;
CV_DbgAssert(row >= -.5 && row <= GRIDHEIGHT - .5); // same applies to col below
for (double patchcol = -halfpatchwidth; patchcol <= halfpatchwidth; patchcol++)
{
row_indices[ipix] = irow;
row_fracs[ipix] = row_frac;
const double signed_col = patchcol * grid_cols_per_img_col;
const double col = signed_col + col_offset;
const int icol = int(floor(col));
col_indices[ipix] = icol;
col_fracs[ipix] = col - icol;
pixelweights[ipix] = // TODO this weights col and row offsets equally
exp(weight * (SQ(signed_row) + SQ(signed_col)));
ipix++;
}
}
}
// Init the data that doesn't change unless the image, patch width, or
// GRIDHEIGHT or WIDTH changes (i.e. for Stasm this must be called
// once per pyramid lev).
void Hat::Init_(
const Image& img, // in: image scaled to current pyramid level
const int patchwidth) // in: patch will be patchwidth x patchwidth pixels
{
patchwidth_ = patchwidth;
InitGradMagAndOrientMats(magmat_, orientmat_, img);
InitIndices(row_indices_, row_fracs_, col_indices_, col_fracs_, pixelweights_,
patchwidth_);
}
// Calculate the image patch gradient mags and orients.
// Note that the mag for a pixel out of the image boundaries is set
// to 0 and thus contributes nothing later in TrilinearAccumulate.
static void GetMagsAndOrients_GeneralCase(
vec_double& mags, // out
vec_double& orients, // out
const int ix, // in: x coord of center of patch
const int iy, // in: y coord of center of patch
const int patchwidth, // in
const MAT& magmat, // in
const MAT& orientmat, // in
const vec_double& pixelweights) // in
{
const int halfpatchwidth = (patchwidth-1) / 2;
int ipix = 0;
for (int x = iy - halfpatchwidth; x <= iy + halfpatchwidth; x++)
{
const double* const magbuf = Buf(magmat) + x * magmat.cols;
const double* const orientbuf = Buf(orientmat) + x * orientmat.cols;
for (int y = ix - halfpatchwidth; y <= ix + halfpatchwidth; y++)
{
if (x < 0 || x >= magmat.rows || y < 0 || y >= magmat.cols)
{
mags[ipix] = 0; // off image
orients[ipix] = 0;
}
else // in image
{
mags[ipix] = pixelweights[ipix] * magbuf[y];
orients[ipix] = orientbuf[y];
}
ipix++;
}
}
CV_DbgAssert(ipix == NSIZE(mags));
}
// Calculate the image patch gradient mags and orients for
// an image patch that is entirely in the image boundaries.
static inline void GetMagsAndOrients_AllInImg(
vec_double& mags, // out
vec_double& orients, // out
const int ix, // in: x coord of center of patch
const int iy, // in: y coord of center of patch
const int patchwidth, // in
const MAT& magmat, // in
const MAT& orientmat, // in
const vec_double& pixelweights) // in
{
const int halfpatchwidth = (patchwidth-1) / 2;
int ipix = 0;
for (int x = iy - halfpatchwidth; x <= iy + halfpatchwidth; x++)
{
const double* const magbuf = Buf(magmat) + x * magmat.cols;
const double* const orientbuf = Buf(orientmat) + x * orientmat.cols;
for (int y = ix - halfpatchwidth; y <= ix + halfpatchwidth; y++)
{
mags[ipix] = pixelweights[ipix] * magbuf[y];
orients[ipix] = orientbuf[y];
ipix++;
}
}
CV_DbgAssert(ipix == NSIZE(mags));
}
void GetMagsAndOrients( // get mags and orients for patch at ix,iy
vec_double& mags, // out
vec_double& orients, // out
const int ix, // in: x coord of center of patch (may be off image)
const int iy, // in: y coord of center of patch (may be off image)
const int patchwidth, // in: in pixels
const MAT& magmat, // in
const MAT& orientmat, // in
const vec_double& pixelweights) // in
{
CV_Assert(patchwidth % 2 == 1); // patchwidth must be odd in this implementation
const int npix = SQ(patchwidth); // number of pixels in image patch
const int halfpatchwidth = (patchwidth-1) / 2;
mags.resize(npix);
orients.resize(npix);
if (ix - halfpatchwidth < 0 || ix + halfpatchwidth >= magmat.cols ||
iy - halfpatchwidth < 0 || iy + halfpatchwidth >= magmat.rows)
{
// Part or all of the patch is out the image area.
GetMagsAndOrients_GeneralCase(mags, orients,
ix, iy, patchwidth, magmat, orientmat, pixelweights);
}
else
{
// Patch is entirely in the image area. The following function returns
// results identical to GetMagsAndOrients_GeneralCase, but is faster
// because it doesn't have to worry about the edges of the image.
GetMagsAndOrients_AllInImg(mags, orients,
ix, iy, patchwidth, magmat, orientmat, pixelweights);
}
}
// Apportion the gradient magnitude of a pixel across 8 orientation bins.
// "Accumulate" is in the func name because we "+=" the interpolated values.
// This routine needs to be fast.
static inline void TrilinearAccumulate(
double& b000, double& b001, // io: histogram bins
double& b010, double& b011, // io
double& b100, double& b101, // io
double& b110, double& b111, // io
const double mag, // in: the mag that gets apportioned
const double rowfrac, // in
const double colfrac, // in
const double orientfrac) // in
{
const double
a1 = mag * rowfrac, a0 = mag - a1,
a11 = a1 * colfrac, a10 = a1 - a11,
a01 = a0 * colfrac, a00 = a0 - a01,
a111 = a11 * orientfrac,
a101 = a10 * orientfrac,
a011 = a01 * orientfrac,
a001 = a00 * orientfrac;
b000 += a00 - a001; b001 += a001;
b010 += a01 - a011; b011 += a011;
b100 += a10 - a101; b101 += a101;
b110 += a11 - a111; b111 += a111;
}
// The dimension of histbins is 1+GRIDHEIGHT+1 by 1+GRIDWIDTH+1 by BINS_PER_HIST+1.
// The extra bins are for fast trilinear accumulation (boundary checks unneeded).
// The final bin in each histogram is for degrees greater than 360, needed as
// degrees less than but near 360 get smeared out by trilinear interpolation.
static inline int HistIndex(int row, int col, int iorient) // index into histbins
{
return ((row+1) * (1+GRIDWIDTH+1) + (col+1)) * (BINS_PER_HIST+1) + iorient;
}
void GetHistograms( // get all histogram bins
vec_double& histbins, // out
const int patchwidth, // in: in pixels
const vec_double& mags, // in
const vec_double& orients, // in
const vec_int& row_indices, // in
const vec_double& row_fracs, // in
const vec_int& col_indices, // in
const vec_double& col_fracs) // in
{
const int npix = SQ(patchwidth); // number of pixels in image patch
const int nhistbins =
(1 + GRIDHEIGHT + 1) * (1 + GRIDWIDTH + 1) * (BINS_PER_HIST + 1);
// resize and clear (the fill is needed if the size of histbins
// doesn't change, because in that case resize does nothing)
histbins.resize(nhistbins);
fill(histbins.begin(), histbins.end(), 0.);
for (int ipix = 0; ipix < npix; ipix++)
{
const double orient = orients[ipix];
const int iorient = int(floor(orient));
CV_DbgAssert(iorient >= 0 && iorient < BINS_PER_HIST);
const int ibin =
HistIndex(row_indices[ipix], col_indices[ipix], iorient);
double* const p = &histbins[ibin];
TrilinearAccumulate( // apportion grad mag across eight orientation bins
p[0], // ThisOrient
p[1], // NextOrient
p[BINS_PER_HIST + 1], // NextCol ThisOrient
p[BINS_PER_HIST + 2], // NextCol NextOrient
p[(GRIDWIDTH+2) * (BINS_PER_HIST+1)], // NextRow ThisOrient
p[(GRIDWIDTH+2) * (BINS_PER_HIST+1) + 1], // NextRow NextOrient
p[(GRIDWIDTH+3) * (BINS_PER_HIST+1)], // NextRow NextCol ThisOrient
p[(GRIDWIDTH+3) * (BINS_PER_HIST+1) + 1], // NextRow NextCol NextOrient
mags[ipix], // the mag that gets apportioned
row_fracs[ipix], // rowfrac
col_fracs[ipix], // colfrac
orient - iorient); // orientfrac
}
}
static void WrapHistograms(
vec_double& histbins)
{
for (int row = 0; row < GRIDHEIGHT; row++)
for (int col = 0; col < GRIDWIDTH; col++)
{
const int ibin = HistIndex(row, col, 0);
histbins[ibin] += histbins[ibin + BINS_PER_HIST]; // 360 wraps to 0
}
}
static void CopyHistsToDesc( // copy histograms to descriptor, skipping pad bins
VEC& desc, // out
const vec_double& histbins) // in
{
for (int row = 0; row < GRIDHEIGHT; row++)
for (int col = 0; col < GRIDWIDTH; col++)
memcpy(Buf(desc) +
(row * GRIDWIDTH + col) * BINS_PER_HIST,
&histbins[HistIndex(row, col, 0)],
BINS_PER_HIST * sizeof(histbins[0]));
}
static void NormalizeDesc( // take sqrt of elems and divide by L2 norm
VEC& desc) // io
{
double* const data = Buf(desc);
for (int i = 0; i < NSIZE(desc); i++)
data[i] = sqrt(data[i]); // sqrt reduces effect of outliers
const double norm = cv::norm(desc); // L2 norm
if (!IsZero(norm))
{
const double scale = FINAL_SCALE / norm;
for (int i = 0; i < NSIZE(desc); i++)
data[i] *= scale;
}
}
// Hat::Init_ must be called before calling this function.
//
// A HAT descriptor is a vector of doubles of length
// GRIDHEIGHT * GRIDWIDTH * BINS_PER_HIST (currently 4 * 5 * 8 = 160).
//
// The descriptor is a vector of doubles (instead of say bytes) primarily
// so the HatFit function we apply later is fast (because byte-to-double
// type conversions are unneeded when applying the formula).
//
// Note also that a trial implementation that used floats instead of
// doubles (and with a float form of HatFit) was slower.
VEC Hat::Desc_( // return HAT descriptor, Init_ must be called first
const double x, // in: x coord of center of patch (may be off image)
const double y) // in: y coord of center of patch (may be off image)
const
{
CV_Assert(magmat_.rows); // verify that Hat::Init_ was called
#if _OPENMP // can't be static because multiple instances
vec_double mags, orients;
vec_double histbins;
#else // static faster since size rarely changes
vec_double mags, orients; // the image patch grad mags and orientations
vec_double histbins; // the histograms
#endif
GetMagsAndOrients(mags, orients,
cvRound(x), cvRound(y), patchwidth_,
magmat_, orientmat_, pixelweights_);
GetHistograms(histbins,
patchwidth_, mags, orients,
row_indices_, row_fracs_, col_indices_, col_fracs_);
WrapHistograms(histbins); // wrap 360 degrees back to 0
VEC desc(GRIDHEIGHT * GRIDWIDTH * BINS_PER_HIST, 1); // the HAT descriptor
CopyHistsToDesc(desc,
histbins);
NormalizeDesc(desc);
return desc;
}
} // namespace stasm