pinstart.cpp
10.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
// pinstart.cpp: utilities for creating a start shape from manually pinned points
//
// Copyright (C) 2005-2013, Stephen Milborrow
#include "pinstart.h"
#include "stasm_landmarks.h"
#include "faceroi.h"
#include "err.h"
#include "landmarks.h"
#include "print.h"
namespace stasm
{
// The following model was machine generated by running
// 5pointpose.R on the training shapes and their reflections.
static double EstYawFrom5PointShape(const double* x) // x has 10 elements
{
return 34.342
- 7.0267 * MAX(0, x[3] - -0.34708)
+ 10.739 * MAX(0, -0.34708 - x[3])
+ 116.29 * MAX(0, x[4] - 0.21454)
- 159.56 * MAX(0, 0.21454 - x[4])
+ 12.513 * MAX(0, x[7] - 0.3384)
+ 7.2764 * MAX(0, 0.3384 - x[7])
+ 260.14 * MAX(0, x[3] - -0.34708) * MAX(0, x[5] - -0.010838)
- 160.64 * MAX(0, x[3] - -0.34708) * MAX(0, -0.010838 - x[5])
- 284.88 * MAX(0, -0.34708 - x[3]) * MAX(0, x[5] - -0.055581)
+ 654.54 * MAX(0, -0.34708 - x[3]) * MAX(0, -0.055581 - x[5])
;
}
static void RotShapeInPlace(
Shape& shape, // io
double rot, // in: in-plane rotation angle in degrees, pos is anticlock
double x, // in: rotation origin
double y) // in
{
CV_Assert(rot >= -360 && rot <= 360); // sanity check, 360 is arb
const MAT rotmat =
getRotationMatrix2D(cv::Point2f(float(x), float(y)), rot, 1.);
AlignShapeInPlace(shape, rotmat);
}
static double SumElems( // return the sum of the elemens in mat
const MAT& mat) // in
{
return cv::sum(mat)[0];
}
// If shape does not have 5 points, return rot and yaw of 0.
// Else assume that the following five points are present, in this order:
// 0 LEyeOuter
// 1 REyeOuter
// 2 CNoseTip
// 3 LMouthCorner
// 4 RMouthCorner
static void EstRotAndYawFrom5PointShape(
double& rot, // out
double& yaw, // out
const Shape shape) // in
{
if (shape.rows != 5 || // basic sanity checks
shape(0, IX) > shape(1, IX) || // eye corners
shape(3, IX) > shape(4, IX)) // mouth corners
{
rot = yaw = 0;
return;
}
Shape workshape(shape.clone()); // local copy we can modify
// Derotate shape using eye angle as estimate of in-plane rotation.
// We rotate about the shape centroid.
// TODO EstYawFrom5PointShape was trained on shapes without this
// derotation, so must retrain the model for best results.
rot = RadsToDegrees(-atan2(workshape(1, IY) - workshape(0, IY),
workshape(1, IX) - workshape(0, IX)));
PossiblySetRotToZero(rot); // treat small Rots as zero Rots
if (rot)
RotShapeInPlace(workshape,
-rot,
SumElems(workshape.col(IX)) / 5,
SumElems(workshape.col(IY)) / 5);
// mean-center x and y
MAT X(workshape.col(IX)); X -= SumElems(X) / 5;
MAT Y(workshape.col(IY)); Y -= SumElems(Y) / 5;
// normalize so shape size is 1
double norm = 0;
for (int i = 0; i < 5; i++)
norm += SQ(X(i)) + SQ(Y(i));
workshape /= sqrt(norm);
yaw = EstYawFrom5PointShape(Buf(workshape));
}
static Shape PinMeanShape( // align mean shape to the pinned points
const Shape& pinned, // in: at least two of these points must be set
const Shape& meanshape) // in
{
CV_Assert(pinned.rows == meanshape.rows);
int ipoint, nused = 0; // number of points used in pinned
for (ipoint = 0; ipoint < meanshape.rows; ipoint++)
if (PointUsed(pinned, ipoint))
nused++;
if (nused < 2)
Err("Need at least two pinned landmarks");
// Create an anchor shape (the pinned landmarks) and an alignment shape (the
// points in meanshape that correspond to those pinned landmarks). Do that by
// copying the used points in pinned to pinned_used, and the corresponding
// points in meanshape to meanused.
Shape pinned_used(nused, 2), mean_used(nused, 2);
int i = 0;
for (ipoint = 0; ipoint < meanshape.rows; ipoint++)
if (PointUsed(pinned, ipoint))
{
pinned_used(i, IX) = pinned(ipoint, IX);
pinned_used(i, IY) = pinned(ipoint, IY);
mean_used(i, IX) = meanshape(ipoint, IX);
mean_used(i, IY) = meanshape(ipoint, IY);
i++;
}
CV_Assert(i == nused);
// transform meanshape to pose generated by aligning mean_used to pinned_used
Shape TransformedShape(
AlignShape(meanshape, AlignmentMat(mean_used, pinned_used)));
return JitterPointsAt00(TransformedShape);
}
static bool HaveCanonical5Points(
const Shape& pinned) // in: pinned landmarks
{
return PointUsed(pinned, L_LEyeOuter) &&
PointUsed(pinned, L_REyeOuter) &&
PointUsed(pinned, L_CNoseTip) &&
PointUsed(pinned, L_LMouthCorner) &&
PointUsed(pinned, L_RMouthCorner);
}
static Shape As5PointShape( // return a 5 point shape
const Shape& pinned, // in: pinned landmarks, canonical 5 points are best
const Shape& meanshape) // in: used only if pinned landmarks are not canonical
{
CV_Assert(pinned.rows == stasm_NLANDMARKS);
CV_Assert(meanshape.rows == stasm_NLANDMARKS);
Shape newpinned(pinned);
if (!HaveCanonical5Points(pinned))
{
// Not canonical 5 point pinned landmarks. Impute the missing points.
// This is not an optimal situation but will at least allow estimation
// of pose from an arb set of pinned landmarks.
newpinned = PinMeanShape(pinned, meanshape);
}
Shape outshape(5, 2); // 5 point shape
outshape(0, IX) = newpinned(L_LEyeOuter, IX);
outshape(0, IY) = newpinned(L_LEyeOuter, IY);
outshape(1, IX) = newpinned(L_REyeOuter, IX);
outshape(1, IY) = newpinned(L_REyeOuter, IY);
outshape(2, IX) = newpinned(L_CNoseTip, IX);
outshape(2, IY) = newpinned(L_CNoseTip, IY);
outshape(3, IX) = newpinned(L_LMouthCorner, IX);
outshape(3, IY) = newpinned(L_LMouthCorner, IY);
outshape(4, IX) = newpinned(L_RMouthCorner, IX);
outshape(4, IY) = newpinned(L_RMouthCorner, IY);
return outshape;
}
static void InitDetParEyeMouthFromShape( // fill in eye and mouth fields of detpar
DetPar& detpar,
Shape& shape)
{
if (PointUsed(shape, L_LPupil))
{
detpar.lex = shape(L_LPupil, IX);
detpar.ley = shape(L_LPupil, IY);
}
if (PointUsed(shape, L_RPupil))
{
detpar.rex = shape(L_RPupil, IX);
detpar.rey = shape(L_RPupil, IY);
}
if (PointUsed(shape, L_CBotOfBotLip))
{
detpar.mouthx = shape(L_CBotOfBotLip, IX);
detpar.mouthy = shape(L_CBotOfBotLip, IY);
}
}
// We generated the startshape without using the face detector, now "back
// generate" the detpar (the position of this does not have to exactly
// match the detpar that would generate the startshape). This approach
// allows detpar to be handled uniformly in PinnedStartShapeAndRoi.
static DetPar PseudoDetParFromStartShape(
const Shape& startshape,
double rot,
double yaw,
int nmods)
{
const double lex = startshape(L_LPupil, IX); // left eye
const double ley = startshape(L_LPupil, IY);
const double rex = startshape(L_RPupil, IX); // right eye
const double rey = startshape(L_RPupil, IY);
const double mouthx = startshape(L_CBotOfBotLip, IX); // mouth
const double mouthy = startshape(L_CBotOfBotLip, IY);
CV_Assert(PointUsed(lex, ley));
CV_Assert(PointUsed(rex, rey));
CV_Assert(PointUsed(mouthx, mouthy));
const double xeye = (lex + rex) / 2; // midpoint of eyes
const double yeye = (ley + rey) / 2;
const double eyemouth = PointDist(xeye, yeye, mouthx, mouthy);
DetPar detpar;
detpar.x = .7 * xeye + .3 * mouthx;
detpar.y = .7 * yeye + .3 * mouthy;
detpar.width = 2.0 * eyemouth;
detpar.height = 2.0 * eyemouth;
detpar.lex = lex;
detpar.ley = ley;
detpar.rex = rex;
detpar.rey = rey;
detpar.mouthx = mouthx;
detpar.mouthy = mouthy;
detpar.rot = rot;
detpar.eyaw = DegreesAsEyaw(yaw, nmods); // determines what ASM model to use
detpar.yaw = yaw;
return detpar;
}
// Use the given pinned face landmarks to init the start shape. The
// current implementation works best if the pinned landmarks are the five
// canonical pinned landmarks (viz. LEyeOuter, REyeOuter, CNoseTip,
// LMouthCorner, RMouthCorner). This is because it was trained on those
// points. But the routine also works if any two or more points are pinned.
void PinnedStartShapeAndRoi( // use the pinned landmarks to init the start shape
Shape& startshape, // out: the start shape (in ROI frame)
Image& face_roi, // out: ROI around face, possibly rotated upright
DetPar& detpar_roi, // out: detpar wrt to face_roi
DetPar& detpar, // out: detpar wrt to img
Shape& pinned_roi, // out: pinned arg translated to ROI frame
const Image& img, // in: the image (grayscale)
const vec_Mod& mods, // in: a vector of models, one for each yaw range
const Shape& pinned) // in: manually pinned landmarks
{
double rot, yaw;
EstRotAndYawFrom5PointShape(rot, yaw, As5PointShape(pinned, mods[0]->MeanShape_()));
const EYAW eyaw = DegreesAsEyaw(yaw, NSIZE(mods));
const int imod = EyawAsModIndex(eyaw, mods); // select ASM model based on yaw
if (trace_g)
lprintf("%-6.6s yaw %3.0f rot %3.0f ", EyawAsString(eyaw), yaw, rot);
pinned_roi = pinned; // use pinned_roi as a temp shape we can change
Image workimg(img); // possibly flipped image
if (IsLeftFacing(eyaw)) // left facing? (our models are for right facing faces)
{
pinned_roi = FlipShape(pinned_roi, workimg.cols);
FlipImgInPlace(workimg);
}
const Mod* mod = mods[ABS(imod)];
startshape = PinMeanShape(pinned_roi, mod->MeanShape_());
startshape = mod->ConformShapeToMod_Pinned_(startshape, pinned_roi);
detpar = PseudoDetParFromStartShape(startshape, rot, yaw, NSIZE(mods));
if (IsLeftFacing(eyaw))
detpar.rot *= -1;
FaceRoiAndDetPar(face_roi, detpar_roi, workimg, detpar, false);
startshape = ImgShapeToRoiFrame(startshape, detpar_roi, detpar);
pinned_roi = ImgShapeToRoiFrame(pinned_roi, detpar_roi, detpar);
// following line not strictly necessary because don't actually need eyes/mouth
InitDetParEyeMouthFromShape(detpar_roi, startshape);
if (IsLeftFacing(eyaw))
{
detpar = FlipDetPar(detpar, img.cols);
detpar.rot = -detpar.rot;
detpar_roi.x += 2. * (face_roi.cols/2. - detpar_roi.x);
}
}
} // namespace stasm