startshape.cpp 18.2 KB

Edit Raw Blame History

// startshape.cpp: routines for finding the start shape for an ASM search
//
// The model "estart" determines the method we use to create the start shape.
// (The InitMods function initializes estart during Stasm initialization.)
// The current open-source version of Stasm uses estart=ESTART_EYES.
//
// 1. With the model estart=ESTART_RECT_ONLY, the start shape is created by
// aligning the model mean face shape to the face rectangle.  (The face
// rectangle is found by the face detector prior to calling routines in
// this file.)
//
// 2. With the model estart=ESTART_EYES (currently used for the frontal
// model), the start shape is created as follows.  Using the face rectangle
// found by the face detector, Stasm searches for the eyes in the
// appropriate subregions within the rectangle.  If both eyes are found the
// face is rotated so the eyes are horizontal.  The start shape is then
// formed by aligning the mean training shape to the eyes.  If either eye
// isn't found, the start shape is aligned to the face detector rectangle.
//
// Note however that if the eye angle is less than +-5 degrees, we treat it
// as 0 degrees (and don't rotate the face as described above).  This
// minimizes preprocessing.
//
// 3. With the model estart=ESTART_EYE_AND_MOUTH (currently used for the
// three-quarter models), the start shape is generated as above, but we
// search for the mouth too and use it if is detected.
//
// Copyright (C) 2005-2013, Stephen Milborrow
#include "stasm.h"
namespace stasm
{
// The constant 200 is arbitrary, except that the value used by Stasm
// must match that used by Tasm when training the model.  Using 200 instead
// of say, 1, means that the detector average face is displayable at a decent
// size which is useful for debugging.
static const int DET_FACE_WIDTH = 200;

// Following used if we did not detect eyes.  We empirically get slighter better
// Stasm results if we slightly reduce the size of the detected face rectangle.
static double FACERECT_SCALE_WHEN_NO_EYES = .95;

//-----------------------------------------------------------------------------
// Align meanshape to the face detector rectangle and return it as startshape
// This ignores the eye and mouth, if any.
static Shape AlignMeanShapeToFaceDetRect(
    const DetPar& detpar,                 // in
    const Shape&  meanshape,              // in
    double        scale,                  // in: scale the face rectangle
    const Image&  img)                    // io: the image (grayscale)
{
    if (trace_g)
        lprintf("AlignToFaceDetBox                ");

    DetPar detpar1(detpar);

    if (IsLeftFacing(detpar.eyaw))
        detpar1 = FlipDetPar(detpar, img.cols);

    CV_Assert(meanshape.rows > 0 && meanshape.cols == 2);

    const double xscale = detpar1.width  * scale / DET_FACE_WIDTH;
    const double yscale = detpar1.height * scale / DET_FACE_WIDTH;

    Shape startshape = AlignShape(meanshape,
                                     xscale,      0, detpar1.x,
                                          0, yscale, detpar1.y);

    return startshape;
}

// Return the model meanshape aligned to both eyes and the mouth.
//
// The central idea is to form a triangular shape of the eyes and
// bottom-of-mouth from the face detector params, and align the same
// triangle in the meanshape to this triangle.
static Shape AlignMeanShapeToBothEyesAndMouth(
    const DetPar& detpar,                      // in
    const Shape&  meanshape)                   // in
{
    if (trace_g)
        lprintf("AlignToBothEyesAndMouth          ");

    CV_Assert(NSIZE(meanshape) > 0 && PointUsed(meanshape, 0));
    CV_Assert(Valid(detpar.mouthx));
    CV_Assert(Valid(detpar.lex));
    CV_Assert(Valid(detpar.rex));

    Shape mean_tri(3, 2), det_tri(3, 2);       // triangle of eyes and mouth
    const double x_meanmouth =
       (meanshape(L_CTopOfTopLip, IX) + meanshape(L_CBotOfBotLip, IX)) / 2.;

    const double y_meanmouth =
       (meanshape(L_CTopOfTopLip, IY) + meanshape(L_CBotOfBotLip, IY)) / 2.;

    mean_tri(0, IX) = meanshape(L_LPupil, IX); // left eye
    mean_tri(0, IY) = meanshape(L_LPupil, IY);
    mean_tri(1, IX) = meanshape(L_RPupil, IX); // right eye
    mean_tri(1, IY) = meanshape(L_RPupil, IY);
    mean_tri(2, IX) = x_meanmouth;             // mouth
    mean_tri(2, IY) = y_meanmouth;

    det_tri(0, IX) = detpar.lex;               // left eye
    det_tri(0, IY) = detpar.ley;
    det_tri(1, IX) = detpar.rex;               // right eye
    det_tri(1, IY) = detpar.rey;
    det_tri(2, IX) = detpar.mouthx;            // mouth
    det_tri(2, IY) = detpar.mouthy;

    return AlignShape(meanshape, AlignmentMat(mean_tri, det_tri));
}

// return the model meanshape aligned to both eyes (mouth is not avail)
static Shape AlignMeanShapeToBothEyesNoMouth(
    const DetPar& detpar,                      // in
    const Shape&  meanshape)                   // in
{
    if (trace_g)
        lprintf("AlignToBothEyesNoMouth           ");

    CV_Assert(NSIZE(meanshape) > 0 && PointUsed(meanshape, 0));
    CV_Assert(Valid(detpar.lex));
    CV_Assert(Valid(detpar.rex));

    Shape meanline(2, 2), detline(2, 2);       // line from eye to eye
    meanline(0, IX) = meanshape(L_LPupil, IX); // left eye
    meanline(0, IY) = meanshape(L_LPupil, IY);
    meanline(1, IX) = meanshape(L_RPupil, IX); // right eye
    meanline(1, IY) = meanshape(L_RPupil, IY);

    detline(0, IX) = detpar.lex;               // left eye
    detline(0, IY) = detpar.ley;
    detline(1, IX) = detpar.rex;               // right eye
    detline(1, IY) = detpar.rey;

    return AlignShape(meanshape, AlignmentMat(meanline, detline));
}

// return the model meanshape aligned to both eyes (mouth is not avail)
static Shape AlignMeanShapeToBothEyesEstMouth(
    const DetPar& detpar,                      // in
    const Shape&  meanshape)                   // in
{
    // .48 was tested to give slightly better worse case results than .50
    static double EYEMOUTH_TO_FACERECT_RATIO = .48;

    if (trace_g)
        lprintf("AlignToBothEyesNoMouth(EstMouth) ");

    CV_Assert(NSIZE(meanshape) > 0 && PointUsed(meanshape, 0));
    CV_Assert(Valid(detpar.lex));
    CV_Assert(Valid(detpar.rex));

    // estimate the mouth's position
    double x_eyemid = 0;
    switch (detpar.eyaw)
    {
        case EYAW00:                                 //  mid point
            x_eyemid = .50 * detpar.lex + .50 * detpar.rex;
            break;
        // TODO The constants below have not been empirically optimized.
        case EYAW_45:                                // closer to left eye
            x_eyemid = .30 * detpar.lex + .70 * detpar.rex;
            break;
        case EYAW_22:                                // closer to left eye
            x_eyemid = .30 * detpar.lex + .70 * detpar.rex;
            break;
        case EYAW22:                                 // closer to right eye
            x_eyemid = .30 * detpar.lex + .70 * detpar.rex;
            break;
        case EYAW45:                                 // closer to right eye
            x_eyemid = .30 * detpar.lex + .70 * detpar.rex;
            break;
        default:
            Err("AlignMeanShapeToBothEyesEstMouth: Invalid eyaw %d", detpar.eyaw);
            break;
    }
    const double y_eyemid = (detpar.ley + detpar.rey) / 2;

    Shape mean_tri(3, 2), det_tri(3, 2);             // triangle of eyes and mouth
    mean_tri(0, IX) = meanshape(L_LPupil, IX);       // left eye
    mean_tri(0, IY) = meanshape(L_LPupil, IY);
    mean_tri(1, IX) = meanshape(L_RPupil, IX);       // right eye
    mean_tri(1, IY) = meanshape(L_RPupil, IY);
    mean_tri(2, IX) = meanshape(L_CBotOfBotLip, IX); // mouth
    mean_tri(2, IY) = meanshape(L_CBotOfBotLip, IY);

    det_tri(0, IX) = detpar.lex;                     // left eye
    det_tri(0, IY) = detpar.ley;
    det_tri(1, IX) = detpar.rex;                     // right eye
    det_tri(1, IY) = detpar.rey;
    det_tri(2, IX) = x_eyemid;                       // mouth
    det_tri(2, IY) = y_eyemid + EYEMOUTH_TO_FACERECT_RATIO * detpar.width;

    return AlignShape(meanshape, AlignmentMat(mean_tri, det_tri));
}

static Shape AlignMeanShapeToLeftEyeAndMouth(
    const DetPar& detpar,                             // in
    const Shape&  meanshape)                          // in
{
    if (trace_g)
        lprintf("AlignToLeftEyeAndMouth           ");

    CV_Assert(NSIZE(meanshape) > 0 && PointUsed(meanshape, 0));
    CV_Assert(Valid(detpar.lex));    // left eye valid?
    CV_Assert(!Valid(detpar.rex));   // right eye invalid? (else why are we here?)
    CV_Assert(Valid(detpar.mouthx)); // mouth valid?
    Shape meanline(2, 2), detline(2, 2);              // line from eye to mouth
    const double x_meanmouth =
       (meanshape(L_CTopOfTopLip, IX) + meanshape(L_CBotOfBotLip, IX)) / 2;

    const double y_meanmouth =
       (meanshape(L_CTopOfTopLip, IY) + meanshape(L_CBotOfBotLip, IY)) / 2;

    meanline(0, IX) = meanshape(L_LPupil, IX);        // left eye
    meanline(0, IY) = meanshape(L_LPupil, IY);
    meanline(1, IX) = x_meanmouth;                    // mouth
    meanline(1, IY) = y_meanmouth;

    detline(0, IX) = detpar.lex;                      // left eye
    detline(0, IY) = detpar.ley;
    detline(1, IX) = detpar.mouthx;                   // mouth
    detline(1, IY) = detpar.mouthy;

    return AlignShape(meanshape, AlignmentMat(meanline, detline));
}

static Shape AlignMeanShapeToRightEyeAndMouth(
    const DetPar& detpar,                             // in
    const Shape&  meanshape)                          // in
{
    if (trace_g)
        lprintf("AlignToRightEyeAndMouth          ");

    CV_Assert(NSIZE(meanshape) > 0 && PointUsed(meanshape, 0));
    CV_Assert(!Valid(detpar.lex));   // left eye invalid? (else why are we here?)
    CV_Assert(Valid(detpar.rex));    // right eye valid?
    CV_Assert(Valid(detpar.mouthx)); // mouth valid?
    const double x_meanmouth =
       (meanshape(L_CTopOfTopLip, IX) + meanshape(L_CBotOfBotLip, IX)) / 2;

    const double y_meanmouth =
       (meanshape(L_CTopOfTopLip, IY) + meanshape(L_CBotOfBotLip, IY)) / 2;

    Shape meanline(2, 2), detline(2, 2);              // line from eye to mouth
    meanline(0, IX) = meanshape(L_RPupil, IX);        // right eye
    meanline(0, IY) = meanshape(L_RPupil, IY);
    meanline(1, IX) = x_meanmouth;                    // mouth
    meanline(1, IY) = y_meanmouth;

    detline(0, IX) = detpar.rex;                      // right eye
    detline(0, IY) = detpar.rey;
    detline(1, IX) = detpar.mouthx;                   // mouth
    detline(1, IY) = detpar.mouthy;

    return AlignShape(meanshape, AlignmentMat(meanline, detline));
}

static void FlipIfLeftFacing(
    Shape& shape,             // io
    EYAW   eyaw,              // in
    int    ncols)             // in
{
    if (IsLeftFacing(eyaw))
        shape = FlipShape(shape, ncols);
}

// Align the model meanshape to the detpar from the face and feature dets.
// Complexity enters in because the detected eyes and mouth may be useful
// if available.  The "left facing" code is needed because our three
// quarter models are for right facing faces (wrt the viewer).
static Shape StartShapeFromDetPar(
    const DetPar& detpar_roi,      // in: detpar wrt the ROI
    const Image&  face_roi,        // in
    const Shape&  meanshape,       // in
    ESTART        estart)          // in: use mouth etc. to posn start shape?
{
    CV_Assert(estart == ESTART_RECT_ONLY ||
              estart == ESTART_EYES ||
              estart == ESTART_EYE_AND_MOUTH);

    Shape startshape;
    Shape meanshape1(meanshape);

    if (estart == ESTART_EYE_AND_MOUTH &&             // use both eyes and mouth?
        Valid(detpar_roi.mouthx) &&
        Valid(detpar_roi.lex) &&
        Valid(detpar_roi.rex))
    {
        FlipIfLeftFacing(meanshape1, detpar_roi.eyaw, face_roi.cols);
        startshape = AlignMeanShapeToBothEyesAndMouth(detpar_roi, meanshape1);
        FlipIfLeftFacing(startshape, detpar_roi.eyaw, face_roi.cols);
    }
    else if (Valid(detpar_roi.lex) &&                 // use both eyes?
             Valid(detpar_roi.rex))
    {
        FlipIfLeftFacing(meanshape1, detpar_roi.eyaw, face_roi.cols);
        // TODO Tune the following code, what approach is best?
        if (detpar_roi.eyaw == EYAW00)
            startshape = AlignMeanShapeToBothEyesEstMouth(detpar_roi, meanshape1);
        else
            startshape = AlignMeanShapeToBothEyesNoMouth(detpar_roi, meanshape1);
        FlipIfLeftFacing(startshape, detpar_roi.eyaw, face_roi.cols);
    }
    else if (estart == ESTART_EYE_AND_MOUTH &&        // use left eye and mouth?
             Valid(detpar_roi.mouthx) &&
             Valid(detpar_roi.lex))
    {
        FlipIfLeftFacing(meanshape1, detpar_roi.eyaw, face_roi.cols);
        startshape = AlignMeanShapeToLeftEyeAndMouth(detpar_roi, meanshape1);
        FlipIfLeftFacing(startshape, detpar_roi.eyaw, face_roi.cols);
    }
    else if (estart == ESTART_EYE_AND_MOUTH &&        // use right eye and mouth?
             Valid(detpar_roi.mouthx) &&
             Valid(detpar_roi.rex))
    {
        FlipIfLeftFacing(meanshape1, detpar_roi.eyaw, face_roi.cols);
        startshape = AlignMeanShapeToRightEyeAndMouth(detpar_roi, meanshape1);
        FlipIfLeftFacing(startshape, detpar_roi.eyaw, face_roi.cols);
    }
    else // last resort: use the face det rectangle (can't use facial features)
    {
        startshape =
            AlignMeanShapeToFaceDetRect(detpar_roi, meanshape1,
                                        FACERECT_SCALE_WHEN_NO_EYES, face_roi);
    }
    return JitterPointsAt00(startshape);
}

static double EstRotFromEyeAngle( // estimate face rotation from intereye angle
    const DetPar& detpar)         // in: detpar wrt the ROI
{
    double rot = 0;

    if (Valid(detpar.lex) && Valid(detpar.rey)) // both eyes detected?
        rot = RadsToDegrees(-atan2(detpar.rey - detpar.ley,
                                   detpar.rex - detpar.lex));

    return rot;
}

// Get the start shape and the ROI around it, given the face rectangle.
// Depending on the estart field in the model, we detect the eyes
// and mouth and use those to help fit the start shape.
// (Note also that the ROI is flipped if necessary because our three-quarter
// models are right facing and the face may be left facing.)
static void StartShapeAndRoi(  // we have the facerect, now get the rest
    Shape&         startshape, // out: the start shape we are looking for
    Image&         face_roi,   // out: ROI around face, possibly rotated upright
    DetPar&        detpar_roi, // out: detpar wrt to face_roi
    DetPar&        detpar,     // io:  detpar wrt to img (has face rect on entry)
    const Image&   img,        // in:  the image (grayscale)
    const vec_Mod& mods,       // in:  a vector of models, one for each yaw range
                               //       (use only estart, and meanshape)
    StasmCascadeClassifier cascade)
{
    PossiblySetRotToZero(detpar.rot);         // treat small rots as zero rots
    FaceRoiAndDetPar(face_roi, detpar_roi,    // get ROI around face
                     img, detpar, false);

    DetectEyesAndMouth(detpar_roi,            // use OpenCV eye and mouth detectors
                       face_roi, cascade);

    // Some face detectors return the face rotation, some don't (in
    // the call to NextFace_ just made via NextStartShapeAndRoi).
    // If we don't have the rotation, then estimate it from the eye
    // angle, if the eyes are available.
    if (!Valid(detpar.rot)) // don't have the face rotation?
    {
        detpar_roi.rot = EstRotFromEyeAngle(detpar_roi);
        PossiblySetRotToZero(detpar_roi.rot);
        detpar.rot = detpar_roi.rot;
        if (detpar.rot != 0)
        {
            // face is rotated: rotate ROI and re-get the eyes and mouth
            // TODO: Prevent bogus OpenCV assert fail face_roi.data == img.data.
            face_roi = Image(0,0);

            FaceRoiAndDetPar(face_roi, detpar_roi,
                             img, detpar, false);

            DetectEyesAndMouth(detpar_roi,    // use OpenCV eye and mouth detectors
                               face_roi, cascade);
        }
    }
    if (trace_g)
        lprintf("%-6.6s yaw %3.0f rot %3.0f ",
            EyawAsString(detpar_roi.eyaw), detpar_roi.yaw, detpar_roi.rot);
    else
        logprintf("%-6.6s yaw %3.0f rot %3.0f ",
            EyawAsString(detpar_roi.eyaw), detpar_roi.yaw, detpar_roi.rot);

    // select an ASM model based on the face's yaw
    const Mod* mod = mods[ABS(EyawAsModIndex(detpar_roi.eyaw, mods))];

    const ESTART estart = mod->Estart_();
    CV_Assert(estart == ESTART_EYES || estart == ESTART_EYE_AND_MOUTH);

    startshape = StartShapeFromDetPar(detpar_roi,
                                      face_roi, mod->MeanShape_(), estart);

    if (IsLeftFacing(detpar_roi.eyaw))
        FlipImgInPlace(face_roi);

    JitterPointsAt00(startshape);

}

// Get the start shape for the next face in the image, and the ROI around it.
// The returned shape is wrt the ROI frame.
//
// Note that we we previously called the face detector, and the face
// rectangle(s) were saved privately in facedet, and are now ready for
// immediate retrieval by NextFace_.
//
// The following comment applies for three-quarter models (not for frontal
// models): If the three-quarter face is left-facing, we flip the ROI so
// the returned face is right-facing.  This is because our three-quarter
// ASM models are for right-facing faces.  For frontal faces (the yaw00
// model), faces are not flipped.
bool NextStartShapeAndRoi(     // use face detector results to estimate start shape
    Shape&         startshape, // out: the start shape
    Image&         face_roi,   // out: ROI around face, possibly rotated upright
    DetPar&        detpar_roi, // out: detpar wrt to face_roi
    DetPar&        detpar,     // out: detpar wrt to img
    const Image&   img,        // in:  the image (grayscale)
    const vec_Mod& mods,       // in:  a vector of models, one for each yaw range
                               //       (use only estart, and meanshape)
    FaceDet&       facedet,    // io:  the face detector (internal face index bumped)
    StasmCascadeClassifier cascade)
{
    detpar = facedet.NextFace_();  // get next face's detpar from the face det
    if (Valid(detpar.x))           // NextFace_ returned a face?
        StartShapeAndRoi(startshape, face_roi, detpar_roi, detpar, img, mods, cascade);

    return Valid(detpar.x);
}

} // namespace stasm