// pinstart.cpp: utilities for creating a start shape from manually pinned points // // Copyright (C) 2005-2013, Stephen Milborrow #include "stasm.h" namespace stasm { // The following model was machine generated by running // 5pointpose.R on the training shapes and their reflections. static double EstYawFrom5PointShape(const double* x) // x has 10 elements { return 34.342 - 7.0267 * MAX(0, x[3] - -0.34708) + 10.739 * MAX(0, -0.34708 - x[3]) + 116.29 * MAX(0, x[4] - 0.21454) - 159.56 * MAX(0, 0.21454 - x[4]) + 12.513 * MAX(0, x[7] - 0.3384) + 7.2764 * MAX(0, 0.3384 - x[7]) + 260.14 * MAX(0, x[3] - -0.34708) * MAX(0, x[5] - -0.010838) - 160.64 * MAX(0, x[3] - -0.34708) * MAX(0, -0.010838 - x[5]) - 284.88 * MAX(0, -0.34708 - x[3]) * MAX(0, x[5] - -0.055581) + 654.54 * MAX(0, -0.34708 - x[3]) * MAX(0, -0.055581 - x[5]) ; } static void RotShapeInPlace( Shape& shape, // io double rot, // in: in-plane rotation angle in degrees, pos is anticlock double x, // in: rotation origin double y) // in { CV_Assert(rot >= -360 && rot <= 360); // sanity check, 360 is arb const MAT rotmat = getRotationMatrix2D(cv::Point2f(float(x), float(y)), rot, 1.); AlignShapeInPlace(shape, rotmat); } static double SumElems( // return the sum of the elemens in mat const MAT& mat) // in { return cv::sum(mat)[0]; } // If shape does not have 5 points, return rot and yaw of 0. // Else assume that the following five points are present, in this order: // 0 LEyeOuter // 1 REyeOuter // 2 CNoseTip // 3 LMouthCorner // 4 RMouthCorner static void EstRotAndYawFrom5PointShape( double& rot, // out double& yaw, // out const Shape shape) // in { if (shape.rows != 5 || // basic sanity checks shape(0, IX) > shape(1, IX) || // eye corners shape(3, IX) > shape(4, IX)) // mouth corners { rot = yaw = 0; return; } Shape workshape(shape.clone()); // local copy we can modify // Derotate shape using eye angle as estimate of in-plane rotation. // We rotate about the shape centroid. // TODO EstYawFrom5PointShape was trained on shapes without this // derotation, so must retrain the model for best results. rot = RadsToDegrees(-atan2(workshape(1, IY) - workshape(0, IY), workshape(1, IX) - workshape(0, IX))); PossiblySetRotToZero(rot); // treat small Rots as zero Rots if (rot) RotShapeInPlace(workshape, -rot, SumElems(workshape.col(IX)) / 5, SumElems(workshape.col(IY)) / 5); // mean-center x and y MAT X(workshape.col(IX)); X -= SumElems(X) / 5; MAT Y(workshape.col(IY)); Y -= SumElems(Y) / 5; // normalize so shape size is 1 double norm = 0; for (int i = 0; i < 5; i++) norm += SQ(X(i)) + SQ(Y(i)); workshape /= sqrt(norm); yaw = EstYawFrom5PointShape(Buf(workshape)); } static Shape PinMeanShape( // align mean shape to the pinned points const Shape& pinned, // in: at least two of these points must be set const Shape& meanshape) // in { CV_Assert(pinned.rows == meanshape.rows); int ipoint, nused = 0; // number of points used in pinned for (ipoint = 0; ipoint < meanshape.rows; ipoint++) if (PointUsed(pinned, ipoint)) nused++; if (nused < 2) Err("Need at least two pinned landmarks"); // Create an anchor shape (the pinned landmarks) and an alignment shape (the // points in meanshape that correspond to those pinned landmarks). Do that by // copying the used points in pinned to pinned_used, and the corresponding // points in meanshape to meanused. Shape pinned_used(nused, 2), mean_used(nused, 2); int i = 0; for (ipoint = 0; ipoint < meanshape.rows; ipoint++) if (PointUsed(pinned, ipoint)) { pinned_used(i, IX) = pinned(ipoint, IX); pinned_used(i, IY) = pinned(ipoint, IY); mean_used(i, IX) = meanshape(ipoint, IX); mean_used(i, IY) = meanshape(ipoint, IY); i++; } CV_Assert(i == nused); // transform meanshape to pose generated by aligning mean_used to pinned_used Shape TransformedShape( AlignShape(meanshape, AlignmentMat(mean_used, pinned_used))); return JitterPointsAt00(TransformedShape); } static bool HaveCanonical5Points( const Shape& pinned) // in: pinned landmarks { return PointUsed(pinned, L_LEyeOuter) && PointUsed(pinned, L_REyeOuter) && PointUsed(pinned, L_CNoseTip) && PointUsed(pinned, L_LMouthCorner) && PointUsed(pinned, L_RMouthCorner); } static Shape As5PointShape( // return a 5 point shape const Shape& pinned, // in: pinned landmarks, canonical 5 points are best const Shape& meanshape) // in: used only if pinned landmarks are not canonical { CV_Assert(pinned.rows == stasm_NLANDMARKS); CV_Assert(meanshape.rows == stasm_NLANDMARKS); Shape newpinned(pinned); if (!HaveCanonical5Points(pinned)) { // Not canonical 5 point pinned landmarks. Impute the missing points. // This is not an optimal situation but will at least allow estimation // of pose from an arb set of pinned landmarks. newpinned = PinMeanShape(pinned, meanshape); } Shape outshape(5, 2); // 5 point shape outshape(0, IX) = newpinned(L_LEyeOuter, IX); outshape(0, IY) = newpinned(L_LEyeOuter, IY); outshape(1, IX) = newpinned(L_REyeOuter, IX); outshape(1, IY) = newpinned(L_REyeOuter, IY); outshape(2, IX) = newpinned(L_CNoseTip, IX); outshape(2, IY) = newpinned(L_CNoseTip, IY); outshape(3, IX) = newpinned(L_LMouthCorner, IX); outshape(3, IY) = newpinned(L_LMouthCorner, IY); outshape(4, IX) = newpinned(L_RMouthCorner, IX); outshape(4, IY) = newpinned(L_RMouthCorner, IY); return outshape; } static void InitDetParEyeMouthFromShape( // fill in eye and mouth fields of detpar DetPar& detpar, Shape& shape) { if (PointUsed(shape, L_LPupil)) { detpar.lex = shape(L_LPupil, IX); detpar.ley = shape(L_LPupil, IY); } if (PointUsed(shape, L_RPupil)) { detpar.rex = shape(L_RPupil, IX); detpar.rey = shape(L_RPupil, IY); } if (PointUsed(shape, L_CBotOfBotLip)) { detpar.mouthx = shape(L_CBotOfBotLip, IX); detpar.mouthy = shape(L_CBotOfBotLip, IY); } } // We generated the startshape without using the face detector, now "back // generate" the detpar (the position of this does not have to exactly // match the detpar that would generate the startshape). This approach // allows detpar to be handled uniformly in PinnedStartShapeAndRoi. static DetPar PseudoDetParFromStartShape( const Shape& startshape, double rot, double yaw, int nmods) { const double lex = startshape(L_LPupil, IX); // left eye const double ley = startshape(L_LPupil, IY); const double rex = startshape(L_RPupil, IX); // right eye const double rey = startshape(L_RPupil, IY); const double mouthx = startshape(L_CBotOfBotLip, IX); // mouth const double mouthy = startshape(L_CBotOfBotLip, IY); CV_Assert(PointUsed(lex, ley)); CV_Assert(PointUsed(rex, rey)); CV_Assert(PointUsed(mouthx, mouthy)); const double xeye = (lex + rex) / 2; // midpoint of eyes const double yeye = (ley + rey) / 2; const double eyemouth = PointDist(xeye, yeye, mouthx, mouthy); DetPar detpar; detpar.x = .7 * xeye + .3 * mouthx; detpar.y = .7 * yeye + .3 * mouthy; detpar.width = 2.0 * eyemouth; detpar.height = 2.0 * eyemouth; detpar.lex = lex; detpar.ley = ley; detpar.rex = rex; detpar.rey = rey; detpar.mouthx = mouthx; detpar.mouthy = mouthy; detpar.rot = rot; detpar.eyaw = DegreesAsEyaw(yaw, nmods); // determines what ASM model to use detpar.yaw = yaw; return detpar; } // Use the given pinned face landmarks to init the start shape. The // current implementation works best if the pinned landmarks are the five // canonical pinned landmarks (viz. LEyeOuter, REyeOuter, CNoseTip, // LMouthCorner, RMouthCorner). This is because it was trained on those // points. But the routine also works if any two or more points are pinned. void PinnedStartShapeAndRoi( // use the pinned landmarks to init the start shape Shape& startshape, // out: the start shape (in ROI frame) Image& face_roi, // out: ROI around face, possibly rotated upright DetPar& detpar_roi, // out: detpar wrt to face_roi DetPar& detpar, // out: detpar wrt to img Shape& pinned_roi, // out: pinned arg translated to ROI frame const Image& img, // in: the image (grayscale) const vec_Mod& mods, // in: a vector of models, one for each yaw range const Shape& pinned) // in: manually pinned landmarks { double rot, yaw; EstRotAndYawFrom5PointShape(rot, yaw, As5PointShape(pinned, mods[0]->MeanShape_())); const EYAW eyaw = DegreesAsEyaw(yaw, NSIZE(mods)); const int imod = EyawAsModIndex(eyaw, mods); // select ASM model based on yaw if (trace_g) lprintf("%-6.6s yaw %3.0f rot %3.0f ", EyawAsString(eyaw), yaw, rot); pinned_roi = pinned; // use pinned_roi as a temp shape we can change Image workimg(img); // possibly flipped image if (IsLeftFacing(eyaw)) // left facing? (our models are for right facing faces) { pinned_roi = FlipShape(pinned_roi, workimg.cols); FlipImgInPlace(workimg); } const Mod* mod = mods[ABS(imod)]; startshape = PinMeanShape(pinned_roi, mod->MeanShape_()); startshape = mod->ConformShapeToMod_Pinned_(startshape, pinned_roi); detpar = PseudoDetParFromStartShape(startshape, rot, yaw, NSIZE(mods)); if (IsLeftFacing(eyaw)) detpar.rot *= -1; FaceRoiAndDetPar(face_roi, detpar_roi, workimg, detpar, false); startshape = ImgShapeToRoiFrame(startshape, detpar_roi, detpar); pinned_roi = ImgShapeToRoiFrame(pinned_roi, detpar_roi, detpar); // following line not strictly necessary because don't actually need eyes/mouth InitDetParEyeMouthFromShape(detpar_roi, startshape); if (IsLeftFacing(eyaw)) { detpar = FlipDetPar(detpar, img.cols); detpar.rot = -detpar.rot; detpar_roi.x += 2. * (face_roi.cols/2. - detpar_roi.x); } } } // namespace stasm