startshape.cpp
18.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
// startshape.cpp: routines for finding the start shape for an ASM search
//
// The model "estart" determines the method we use to create the start shape.
// (The InitMods function initializes estart during Stasm initialization.)
// The current open-source version of Stasm uses estart=ESTART_EYES.
//
// 1. With the model estart=ESTART_RECT_ONLY, the start shape is created by
// aligning the model mean face shape to the face rectangle. (The face
// rectangle is found by the face detector prior to calling routines in
// this file.)
//
// 2. With the model estart=ESTART_EYES (currently used for the frontal
// model), the start shape is created as follows. Using the face rectangle
// found by the face detector, Stasm searches for the eyes in the
// appropriate subregions within the rectangle. If both eyes are found the
// face is rotated so the eyes are horizontal. The start shape is then
// formed by aligning the mean training shape to the eyes. If either eye
// isn't found, the start shape is aligned to the face detector rectangle.
//
// Note however that if the eye angle is less than +-5 degrees, we treat it
// as 0 degrees (and don't rotate the face as described above). This
// minimizes preprocessing.
//
// 3. With the model estart=ESTART_EYE_AND_MOUTH (currently used for the
// three-quarter models), the start shape is generated as above, but we
// search for the mouth too and use it if is detected.
//
// Copyright (C) 2005-2013, Stephen Milborrow
#include "stasm.h"
namespace stasm
{
// The constant 200 is arbitrary, except that the value used by Stasm
// must match that used by Tasm when training the model. Using 200 instead
// of say, 1, means that the detector average face is displayable at a decent
// size which is useful for debugging.
static const int DET_FACE_WIDTH = 200;
// Following used if we did not detect eyes. We empirically get slighter better
// Stasm results if we slightly reduce the size of the detected face rectangle.
static double FACERECT_SCALE_WHEN_NO_EYES = .95;
//-----------------------------------------------------------------------------
// Align meanshape to the face detector rectangle and return it as startshape
// This ignores the eye and mouth, if any.
static Shape AlignMeanShapeToFaceDetRect(
const DetPar& detpar, // in
const Shape& meanshape, // in
double scale, // in: scale the face rectangle
const Image& img) // io: the image (grayscale)
{
if (trace_g)
lprintf("AlignToFaceDetBox ");
DetPar detpar1(detpar);
if (IsLeftFacing(detpar.eyaw))
detpar1 = FlipDetPar(detpar, img.cols);
CV_Assert(meanshape.rows > 0 && meanshape.cols == 2);
const double xscale = detpar1.width * scale / DET_FACE_WIDTH;
const double yscale = detpar1.height * scale / DET_FACE_WIDTH;
Shape startshape = AlignShape(meanshape,
xscale, 0, detpar1.x,
0, yscale, detpar1.y);
return startshape;
}
// Return the model meanshape aligned to both eyes and the mouth.
//
// The central idea is to form a triangular shape of the eyes and
// bottom-of-mouth from the face detector params, and align the same
// triangle in the meanshape to this triangle.
static Shape AlignMeanShapeToBothEyesAndMouth(
const DetPar& detpar, // in
const Shape& meanshape) // in
{
if (trace_g)
lprintf("AlignToBothEyesAndMouth ");
CV_Assert(NSIZE(meanshape) > 0 && PointUsed(meanshape, 0));
CV_Assert(Valid(detpar.mouthx));
CV_Assert(Valid(detpar.lex));
CV_Assert(Valid(detpar.rex));
Shape mean_tri(3, 2), det_tri(3, 2); // triangle of eyes and mouth
const double x_meanmouth =
(meanshape(L_CTopOfTopLip, IX) + meanshape(L_CBotOfBotLip, IX)) / 2.;
const double y_meanmouth =
(meanshape(L_CTopOfTopLip, IY) + meanshape(L_CBotOfBotLip, IY)) / 2.;
mean_tri(0, IX) = meanshape(L_LPupil, IX); // left eye
mean_tri(0, IY) = meanshape(L_LPupil, IY);
mean_tri(1, IX) = meanshape(L_RPupil, IX); // right eye
mean_tri(1, IY) = meanshape(L_RPupil, IY);
mean_tri(2, IX) = x_meanmouth; // mouth
mean_tri(2, IY) = y_meanmouth;
det_tri(0, IX) = detpar.lex; // left eye
det_tri(0, IY) = detpar.ley;
det_tri(1, IX) = detpar.rex; // right eye
det_tri(1, IY) = detpar.rey;
det_tri(2, IX) = detpar.mouthx; // mouth
det_tri(2, IY) = detpar.mouthy;
return AlignShape(meanshape, AlignmentMat(mean_tri, det_tri));
}
// return the model meanshape aligned to both eyes (mouth is not avail)
static Shape AlignMeanShapeToBothEyesNoMouth(
const DetPar& detpar, // in
const Shape& meanshape) // in
{
if (trace_g)
lprintf("AlignToBothEyesNoMouth ");
CV_Assert(NSIZE(meanshape) > 0 && PointUsed(meanshape, 0));
CV_Assert(Valid(detpar.lex));
CV_Assert(Valid(detpar.rex));
Shape meanline(2, 2), detline(2, 2); // line from eye to eye
meanline(0, IX) = meanshape(L_LPupil, IX); // left eye
meanline(0, IY) = meanshape(L_LPupil, IY);
meanline(1, IX) = meanshape(L_RPupil, IX); // right eye
meanline(1, IY) = meanshape(L_RPupil, IY);
detline(0, IX) = detpar.lex; // left eye
detline(0, IY) = detpar.ley;
detline(1, IX) = detpar.rex; // right eye
detline(1, IY) = detpar.rey;
return AlignShape(meanshape, AlignmentMat(meanline, detline));
}
// return the model meanshape aligned to both eyes (mouth is not avail)
static Shape AlignMeanShapeToBothEyesEstMouth(
const DetPar& detpar, // in
const Shape& meanshape) // in
{
// .48 was tested to give slightly better worse case results than .50
static double EYEMOUTH_TO_FACERECT_RATIO = .48;
if (trace_g)
lprintf("AlignToBothEyesNoMouth(EstMouth) ");
CV_Assert(NSIZE(meanshape) > 0 && PointUsed(meanshape, 0));
CV_Assert(Valid(detpar.lex));
CV_Assert(Valid(detpar.rex));
// estimate the mouth's position
double x_eyemid = 0;
switch (detpar.eyaw)
{
case EYAW00: // mid point
x_eyemid = .50 * detpar.lex + .50 * detpar.rex;
break;
// TODO The constants below have not been empirically optimized.
case EYAW_45: // closer to left eye
x_eyemid = .30 * detpar.lex + .70 * detpar.rex;
break;
case EYAW_22: // closer to left eye
x_eyemid = .30 * detpar.lex + .70 * detpar.rex;
break;
case EYAW22: // closer to right eye
x_eyemid = .30 * detpar.lex + .70 * detpar.rex;
break;
case EYAW45: // closer to right eye
x_eyemid = .30 * detpar.lex + .70 * detpar.rex;
break;
default:
Err("AlignMeanShapeToBothEyesEstMouth: Invalid eyaw %d", detpar.eyaw);
break;
}
const double y_eyemid = (detpar.ley + detpar.rey) / 2;
Shape mean_tri(3, 2), det_tri(3, 2); // triangle of eyes and mouth
mean_tri(0, IX) = meanshape(L_LPupil, IX); // left eye
mean_tri(0, IY) = meanshape(L_LPupil, IY);
mean_tri(1, IX) = meanshape(L_RPupil, IX); // right eye
mean_tri(1, IY) = meanshape(L_RPupil, IY);
mean_tri(2, IX) = meanshape(L_CBotOfBotLip, IX); // mouth
mean_tri(2, IY) = meanshape(L_CBotOfBotLip, IY);
det_tri(0, IX) = detpar.lex; // left eye
det_tri(0, IY) = detpar.ley;
det_tri(1, IX) = detpar.rex; // right eye
det_tri(1, IY) = detpar.rey;
det_tri(2, IX) = x_eyemid; // mouth
det_tri(2, IY) = y_eyemid + EYEMOUTH_TO_FACERECT_RATIO * detpar.width;
return AlignShape(meanshape, AlignmentMat(mean_tri, det_tri));
}
static Shape AlignMeanShapeToLeftEyeAndMouth(
const DetPar& detpar, // in
const Shape& meanshape) // in
{
if (trace_g)
lprintf("AlignToLeftEyeAndMouth ");
CV_Assert(NSIZE(meanshape) > 0 && PointUsed(meanshape, 0));
CV_Assert(Valid(detpar.lex)); // left eye valid?
CV_Assert(!Valid(detpar.rex)); // right eye invalid? (else why are we here?)
CV_Assert(Valid(detpar.mouthx)); // mouth valid?
Shape meanline(2, 2), detline(2, 2); // line from eye to mouth
const double x_meanmouth =
(meanshape(L_CTopOfTopLip, IX) + meanshape(L_CBotOfBotLip, IX)) / 2;
const double y_meanmouth =
(meanshape(L_CTopOfTopLip, IY) + meanshape(L_CBotOfBotLip, IY)) / 2;
meanline(0, IX) = meanshape(L_LPupil, IX); // left eye
meanline(0, IY) = meanshape(L_LPupil, IY);
meanline(1, IX) = x_meanmouth; // mouth
meanline(1, IY) = y_meanmouth;
detline(0, IX) = detpar.lex; // left eye
detline(0, IY) = detpar.ley;
detline(1, IX) = detpar.mouthx; // mouth
detline(1, IY) = detpar.mouthy;
return AlignShape(meanshape, AlignmentMat(meanline, detline));
}
static Shape AlignMeanShapeToRightEyeAndMouth(
const DetPar& detpar, // in
const Shape& meanshape) // in
{
if (trace_g)
lprintf("AlignToRightEyeAndMouth ");
CV_Assert(NSIZE(meanshape) > 0 && PointUsed(meanshape, 0));
CV_Assert(!Valid(detpar.lex)); // left eye invalid? (else why are we here?)
CV_Assert(Valid(detpar.rex)); // right eye valid?
CV_Assert(Valid(detpar.mouthx)); // mouth valid?
const double x_meanmouth =
(meanshape(L_CTopOfTopLip, IX) + meanshape(L_CBotOfBotLip, IX)) / 2;
const double y_meanmouth =
(meanshape(L_CTopOfTopLip, IY) + meanshape(L_CBotOfBotLip, IY)) / 2;
Shape meanline(2, 2), detline(2, 2); // line from eye to mouth
meanline(0, IX) = meanshape(L_RPupil, IX); // right eye
meanline(0, IY) = meanshape(L_RPupil, IY);
meanline(1, IX) = x_meanmouth; // mouth
meanline(1, IY) = y_meanmouth;
detline(0, IX) = detpar.rex; // right eye
detline(0, IY) = detpar.rey;
detline(1, IX) = detpar.mouthx; // mouth
detline(1, IY) = detpar.mouthy;
return AlignShape(meanshape, AlignmentMat(meanline, detline));
}
static void FlipIfLeftFacing(
Shape& shape, // io
EYAW eyaw, // in
int ncols) // in
{
if (IsLeftFacing(eyaw))
shape = FlipShape(shape, ncols);
}
// Align the model meanshape to the detpar from the face and feature dets.
// Complexity enters in because the detected eyes and mouth may be useful
// if available. The "left facing" code is needed because our three
// quarter models are for right facing faces (wrt the viewer).
static Shape StartShapeFromDetPar(
const DetPar& detpar_roi, // in: detpar wrt the ROI
const Image& face_roi, // in
const Shape& meanshape, // in
ESTART estart) // in: use mouth etc. to posn start shape?
{
CV_Assert(estart == ESTART_RECT_ONLY ||
estart == ESTART_EYES ||
estart == ESTART_EYE_AND_MOUTH);
Shape startshape;
Shape meanshape1(meanshape);
if (estart == ESTART_EYE_AND_MOUTH && // use both eyes and mouth?
Valid(detpar_roi.mouthx) &&
Valid(detpar_roi.lex) &&
Valid(detpar_roi.rex))
{
FlipIfLeftFacing(meanshape1, detpar_roi.eyaw, face_roi.cols);
startshape = AlignMeanShapeToBothEyesAndMouth(detpar_roi, meanshape1);
FlipIfLeftFacing(startshape, detpar_roi.eyaw, face_roi.cols);
}
else if (Valid(detpar_roi.lex) && // use both eyes?
Valid(detpar_roi.rex))
{
FlipIfLeftFacing(meanshape1, detpar_roi.eyaw, face_roi.cols);
// TODO Tune the following code, what approach is best?
if (detpar_roi.eyaw == EYAW00)
startshape = AlignMeanShapeToBothEyesEstMouth(detpar_roi, meanshape1);
else
startshape = AlignMeanShapeToBothEyesNoMouth(detpar_roi, meanshape1);
FlipIfLeftFacing(startshape, detpar_roi.eyaw, face_roi.cols);
}
else if (estart == ESTART_EYE_AND_MOUTH && // use left eye and mouth?
Valid(detpar_roi.mouthx) &&
Valid(detpar_roi.lex))
{
FlipIfLeftFacing(meanshape1, detpar_roi.eyaw, face_roi.cols);
startshape = AlignMeanShapeToLeftEyeAndMouth(detpar_roi, meanshape1);
FlipIfLeftFacing(startshape, detpar_roi.eyaw, face_roi.cols);
}
else if (estart == ESTART_EYE_AND_MOUTH && // use right eye and mouth?
Valid(detpar_roi.mouthx) &&
Valid(detpar_roi.rex))
{
FlipIfLeftFacing(meanshape1, detpar_roi.eyaw, face_roi.cols);
startshape = AlignMeanShapeToRightEyeAndMouth(detpar_roi, meanshape1);
FlipIfLeftFacing(startshape, detpar_roi.eyaw, face_roi.cols);
}
else // last resort: use the face det rectangle (can't use facial features)
{
startshape =
AlignMeanShapeToFaceDetRect(detpar_roi, meanshape1,
FACERECT_SCALE_WHEN_NO_EYES, face_roi);
}
return JitterPointsAt00(startshape);
}
static double EstRotFromEyeAngle( // estimate face rotation from intereye angle
const DetPar& detpar) // in: detpar wrt the ROI
{
double rot = 0;
if (Valid(detpar.lex) && Valid(detpar.rey)) // both eyes detected?
rot = RadsToDegrees(-atan2(detpar.rey - detpar.ley,
detpar.rex - detpar.lex));
return rot;
}
// Get the start shape and the ROI around it, given the face rectangle.
// Depending on the estart field in the model, we detect the eyes
// and mouth and use those to help fit the start shape.
// (Note also that the ROI is flipped if necessary because our three-quarter
// models are right facing and the face may be left facing.)
static void StartShapeAndRoi( // we have the facerect, now get the rest
Shape& startshape, // out: the start shape we are looking for
Image& face_roi, // out: ROI around face, possibly rotated upright
DetPar& detpar_roi, // out: detpar wrt to face_roi
DetPar& detpar, // io: detpar wrt to img (has face rect on entry)
const Image& img, // in: the image (grayscale)
const vec_Mod& mods, // in: a vector of models, one for each yaw range
// (use only estart, and meanshape)
StasmCascadeClassifier cascade)
{
PossiblySetRotToZero(detpar.rot); // treat small rots as zero rots
FaceRoiAndDetPar(face_roi, detpar_roi, // get ROI around face
img, detpar, false);
DetectEyesAndMouth(detpar_roi, // use OpenCV eye and mouth detectors
face_roi, cascade);
// Some face detectors return the face rotation, some don't (in
// the call to NextFace_ just made via NextStartShapeAndRoi).
// If we don't have the rotation, then estimate it from the eye
// angle, if the eyes are available.
if (!Valid(detpar.rot)) // don't have the face rotation?
{
detpar_roi.rot = EstRotFromEyeAngle(detpar_roi);
PossiblySetRotToZero(detpar_roi.rot);
detpar.rot = detpar_roi.rot;
if (detpar.rot != 0)
{
// face is rotated: rotate ROI and re-get the eyes and mouth
// TODO: Prevent bogus OpenCV assert fail face_roi.data == img.data.
face_roi = Image(0,0);
FaceRoiAndDetPar(face_roi, detpar_roi,
img, detpar, false);
DetectEyesAndMouth(detpar_roi, // use OpenCV eye and mouth detectors
face_roi, cascade);
}
}
if (trace_g)
lprintf("%-6.6s yaw %3.0f rot %3.0f ",
EyawAsString(detpar_roi.eyaw), detpar_roi.yaw, detpar_roi.rot);
else
logprintf("%-6.6s yaw %3.0f rot %3.0f ",
EyawAsString(detpar_roi.eyaw), detpar_roi.yaw, detpar_roi.rot);
// select an ASM model based on the face's yaw
const Mod* mod = mods[ABS(EyawAsModIndex(detpar_roi.eyaw, mods))];
const ESTART estart = mod->Estart_();
CV_Assert(estart == ESTART_EYES || estart == ESTART_EYE_AND_MOUTH);
startshape = StartShapeFromDetPar(detpar_roi,
face_roi, mod->MeanShape_(), estart);
if (IsLeftFacing(detpar_roi.eyaw))
FlipImgInPlace(face_roi);
JitterPointsAt00(startshape);
}
// Get the start shape for the next face in the image, and the ROI around it.
// The returned shape is wrt the ROI frame.
//
// Note that we we previously called the face detector, and the face
// rectangle(s) were saved privately in facedet, and are now ready for
// immediate retrieval by NextFace_.
//
// The following comment applies for three-quarter models (not for frontal
// models): If the three-quarter face is left-facing, we flip the ROI so
// the returned face is right-facing. This is because our three-quarter
// ASM models are for right-facing faces. For frontal faces (the yaw00
// model), faces are not flipped.
bool NextStartShapeAndRoi( // use face detector results to estimate start shape
Shape& startshape, // out: the start shape
Image& face_roi, // out: ROI around face, possibly rotated upright
DetPar& detpar_roi, // out: detpar wrt to face_roi
DetPar& detpar, // out: detpar wrt to img
const Image& img, // in: the image (grayscale)
const vec_Mod& mods, // in: a vector of models, one for each yaw range
// (use only estart, and meanshape)
FaceDet& facedet, // io: the face detector (internal face index bumped)
StasmCascadeClassifier cascade)
{
detpar = facedet.NextFace_(); // get next face's detpar from the face det
if (Valid(detpar.x)) // NextFace_ returned a face?
StartShapeAndRoi(startshape, face_roi, detpar_roi, detpar, img, mods, cascade);
return Valid(detpar.x);
}
} // namespace stasm