detection.cpp

#include "detection.hpp"

namespace videoanalyser {
namespace detection {
using namespace cv;

namespace {
cv::RotatedRect get_rectangle_from_match(const cv::Vec4f& positions, int width, int height, int offsetX, int offsetY,
                                         float processingScale) {
    cv::RotatedRect rr;
    cv::Point2f rrpts[4];

    cv::Point2f pos(positions[0] + offsetX, positions[1] + offsetY);
    float scale = positions[2];
    float angle = positions[3];

    rr.center = pos * processingScale;
    rr.size = cv::Size2f(width * scale * processingScale, height * scale * processingScale);
    rr.angle = angle;

    rr.points(rrpts);

    return rr;
}

using ShapeMatch = std::tuple<std::vector<cv::Vec4f>, std::vector<cv::Vec4f>, cv::Mat, cv::Mat>;
ShapeMatch detect_shape(cv::Ptr<cv::GeneralizedHoughGuil> alg, int pos_thresh, cv::Mat processing_area) {
    cv::Mat positive_votes, negative_votes;
    std::vector<cv::Vec4f> positive_positions, negative_positions;

    alg->setPosThresh(pos_thresh);

    int num_prev_matches = 0;
    int threshold_increment = 0;
    int max_match_score = 0;

    // Process shapes with positive angles
    alg->setMinAngle(0);
    alg->setMaxAngle(3);

    while (true) {
        alg->detect(processing_area, positive_positions, positive_votes);
        int current_matches = positive_positions.size();
        if (current_matches == 1 || (current_matches == 0 && num_prev_matches == 0)) {
            // We detected the most interesting shape
            // Impossible to find with these parameters
            break;
        } else if (current_matches == 0 && num_prev_matches > 0) {
            // It is not possible to detect only one shape with the current
            // parameters
            alg->setPosThresh(pos_thresh + threshold_increment - 1);  // Decrease position value
            alg->detect(processing_area, positive_positions,
                        positive_votes);  // Detect all available shapes
            break;
        }
        num_prev_matches = current_matches;
        // Find maximum vote
        for (int j = 0; j < positive_votes.cols / 3; j++) {
            if (positive_votes.at<int>(3 * j) > max_match_score) max_match_score = positive_votes.at<int>(3 * j);
        }

        if (current_matches > 10) {
            threshold_increment += 5;  // To speed up computation when there are too many matches
        } else if (max_match_score - (pos_thresh + threshold_increment) > 100) {
            threshold_increment += 100;  // To speed up computation when there are few super high
                                         // matches
        } else {
            threshold_increment++;
        }
        alg->setPosThresh(pos_thresh + threshold_increment);
    }

    // Reset incremental position value
    threshold_increment = 0;
    num_prev_matches = 0;
    max_match_score = 0;
    // Process shapes with negative angles
    alg->setMinAngle(357);
    alg->setMaxAngle(360);
    while (true) {
        alg->detect(processing_area, negative_positions, negative_votes);
        int current_matches = negative_positions.size();
        if (current_matches == 1 || (current_matches == 0 && num_prev_matches == 0)) {
            // We detected the most interesting shape
            // Impossible to found with these parameters
            break;
        } else if (current_matches == 0 && num_prev_matches > 0) {
            // It is not possible to detect only one shape with the current
            // parameters
            alg->setPosThresh(pos_thresh + threshold_increment - 1);  // Decrease position value
            alg->detect(processing_area, negative_positions,
                        negative_votes);  // Detect all available shapes
            break;
        }
        num_prev_matches = current_matches;

        // Find maximum vote
        for (int j = 0; j < positive_votes.cols / 3; j++) {
            if (positive_votes.at<int>(3 * j) > max_match_score) max_match_score = positive_votes.at<int>(3 * j);
        }

        if (current_matches > 10) {
            threshold_increment += 5;  // To speed up computation when there are too many matches
        } else if (max_match_score - (pos_thresh + threshold_increment) > 100) {
            threshold_increment += 100;  // To speed up computation when there are few super high
                                         // matches
        } else {
            threshold_increment++;
        }
        alg->setPosThresh(pos_thresh + threshold_increment);
    }

    return std::make_tuple(positive_positions, negative_positions, positive_votes, negative_votes);
}

Result<core::Frame> get_template_image(ElementType element_type) {
    switch (element_type) {
        case ElementType::TAPE:
            return core::Frame(cv::imread("input/readingHead.png", cv::IMREAD_GRAYSCALE));
        case ElementType::CAPSTAN:
            return core::Frame(cv::imread("input/capstanBERIO058prova.png", cv::IMREAD_GRAYSCALE));
        default:
            return Error("Invalid element type");
    }
}
/**
 * @fn std::tuple<int, int, double, double, vector<Vec4f>, vector<Vec4f>>
 * find_object(Mat model, SceneObject object)
 * @brief Find the model in the scene using the Generalized Hough Transform.
 * It returns the best matches. Find the best matches for positive and negative
 * angles. If there are more than one shape, then choose the one with the
 * highest score. If there are more than one with the same highest score, then
 * arbitrarily choose the latest.
 *
 * For informations about the Generalized Hough Guild usage see the tutorial
 * at https://docs.opencv.org/4.7.0/da/ddc/tutorial_generalized_hough_ballard_guil.html
 *
 * @param model the template image to be searched with the Generalized Hough
 * Transform
 * @param object the sceneObject struct containing the parameters for the
 * Generalized Hough Transform
 * @return std::tuple<int, int, double, double, vector<Vec4f>, vector<Vec4f>> a
 * tuple containing the best matches for positive and negative angles
 */
Result<Roi> find_roi_ght(core::Frame image, SceneElement element_to_find) {
    // Save a grayscale version of image in gray_image
    core::Frame gray_image = core::Frame(image).convert_color(cv::COLOR_BGR2GRAY);
    // downsample the frame in half pixels for performance reasons
    core::Frame halved_gray_image = core::Frame(gray_image).clone().downsample(2);
    // Get input shape in grayscale and downsample it in half pixels
    Result<core::Frame> template_image_result = get_template_image(element_to_find.type);
    if (std::holds_alternative<Error>(template_image_result)) {
        return Error("Error while loading template image:" + std::get<Error>(template_image_result));
    }
    core::Frame template_image = std::get<core::Frame>(template_image_result).downsample(2);

    cv::Ptr<cv::GeneralizedHoughGuil> ght = cv::createGeneralizedHoughGuil();
    ght->setMinDist(element_to_find.min_dist);
    ght->setLevels(360);
    ght->setDp(2);
    ght->setMaxBufferSize(1000);
    ght->setAngleStep(1);
    ght->setAngleThresh(element_to_find.threshold.angle);
    ght->setMinScale(0.9);
    ght->setMaxScale(1.1);
    ght->setScaleStep(0.01);
    ght->setScaleThresh(element_to_find.threshold.scale);
    ght->setCannyLowThresh(150);
    ght->setCannyHighThresh(240);
    ght->setTemplate(template_image);

    cv::Rect processing_area;
    cv::Mat processing_image;
    if (element_to_find.type == ElementType::TAPE) {
        processing_area = cv::Rect(halved_gray_image.cols / 4, halved_gray_image.rows / 2, halved_gray_image.cols / 2,
                                   halved_gray_image.rows / 2);
        processing_image = halved_gray_image(processing_area);
    } else if (element_to_find.type == ElementType::CAPSTAN) {
        processing_area = cv::Rect(image.cols * 3 / 4, image.rows / 2, image.cols / 4, image.rows / 2);
        processing_image = gray_image(processing_area);
    }

    auto [positive_positions, negative_positions, posPos, posNeg] =
        detect_shape(ght, element_to_find.threshold.pos, processing_image);

    double max_score_for_positive_match = 0, max_score_for_negative_match = 0;
    int index_max_positive_score = 0, index_max_negative_score = 0;
    cv::Mat positive_matches_scores = posPos;
    cv::Mat negative_matches_scores = posNeg;

    for (int i = 0; i < positive_matches_scores.size().width; i++) {
        if (positive_matches_scores.at<int>(i) >= max_score_for_positive_match) {
            max_score_for_positive_match = positive_matches_scores.at<int>(i);
            index_max_positive_score = i;
        }
    }

    for (int i = 0; i < negative_matches_scores.size().width; i++) {
        if (negative_matches_scores.at<int>(i) >= max_score_for_negative_match) {
            max_score_for_negative_match = negative_matches_scores.at<int>(i);
            index_max_negative_score = i;
        }
    }

    cv::RotatedRect roi_pos;
    cv::RotatedRect roi_neg;
    if (element_to_find.type == ElementType::TAPE) {
        if (positive_positions.size() > 0) {
            roi_pos = get_rectangle_from_match(positive_positions[index_max_positive_score], template_image.cols,
                                               template_image.rows, halved_gray_image.cols / 4,
                                               halved_gray_image.rows / 2, 2);
        }
        if (negative_positions.size() > 0) {
            roi_neg = get_rectangle_from_match(negative_positions[index_max_negative_score], template_image.cols,
                                               template_image.rows, halved_gray_image.cols / 4,
                                               halved_gray_image.rows / 2, 2);
        }
    } else if (element_to_find.type == ElementType::CAPSTAN) {
        if (positive_positions.size() > 0) {
            roi_pos =
                get_rectangle_from_match(positive_positions[index_max_positive_score], template_image.cols - 22,
                                         template_image.rows - 92, image.cols * 3 / 4 + 11, image.rows / 2 + 46, 1);
        }
        if (negative_positions.size() > 0) {
            roi_neg =
                get_rectangle_from_match(negative_positions[index_max_negative_score], template_image.cols - 22,
                                         template_image.rows - 92, image.cols * 3 / 4 + 11, image.rows / 2 + 46, 1);
        }
    }

    cv::RotatedRect result;

    if (max_score_for_positive_match > 0) {
        if (max_score_for_negative_match > 0) {
            result = max_score_for_positive_match > max_score_for_negative_match ? roi_pos : roi_neg;
        } else {
            result = roi_pos;
        }
    } else if (max_score_for_negative_match > 0) {
        result = roi_neg;
    } else {
        return Error("No match found");
    }

    if (element_to_find.type == ElementType::TAPE) {
        cv::Vec4f tape_position(result.center.x,
                                result.center.y + result.size.height / 2 + 20 * (result.size.width / 200), 1,
                                result.angle);
        result = get_rectangle_from_match(tape_position, result.size.width, 50 * (result.size.width / 200), 0, 0, 1);
    }
    return result;
}

Result<Roi> find_roi_surf(core::Frame image, SceneElement element_to_find) {
    // Step 1: Detect the keypoints using SURF Detector, compute the
    // descriptors
    int min_hessian = 100;
    Ptr<xfeatures2d::SURF> detector = xfeatures2d::SURF::create(min_hessian);
    std::vector<cv::KeyPoint> keypoints_object, keypoints_scene;
    cv::Mat descriptors_object, descriptors_scene;

    // Save a grayscale version of image in gray_image
    core::Frame gray_image = core::Frame(image).convert_color(cv::COLOR_BGR2GRAY);
    // downsample the frame in half pixels for performance reasons
    core::Frame halved_gray_image = core::Frame(gray_image).clone().downsample(2);

    Result<core::Frame> template_image_result = get_template_image(element_to_find.type);
    if (std::holds_alternative<Error>(template_image_result)) {
        return Error("Error while loading template image:" + std::get<Error>(template_image_result));
    }
    core::Frame template_image = std::get<core::Frame>(template_image_result);

    detector->detectAndCompute(template_image, cv::noArray(), keypoints_object, descriptors_object);
    detector->detectAndCompute(gray_image, cv::noArray(), keypoints_scene, descriptors_scene);

    // Step 2: Matching descriptor vectors with a FLANN based matcher
    // Since SURF is a floating-point descriptor NORM_L2 is used
    cv::Ptr<cv::DescriptorMatcher> matcher = cv::DescriptorMatcher::create(cv::DescriptorMatcher::FLANNBASED);
    std::vector<std::vector<cv::DMatch>> knn_matches;
    matcher->knnMatch(descriptors_object, descriptors_scene, knn_matches, 2);
    //-- Filter matches using the Lowe's ratio test
    const float RATIO_THRESH = 0.75f;
    std::vector<cv::DMatch> good_matches;
    for (size_t i = 0; i < knn_matches.size(); i++) {
        if (knn_matches[i][0].distance < RATIO_THRESH * knn_matches[i][1].distance) {
            good_matches.push_back(knn_matches[i][0]);
        }
    }

    // Draw matches
    cv::Mat img_matches;
    cv::drawMatches(template_image, keypoints_object, halved_gray_image, keypoints_scene, good_matches, img_matches,
                    cv::Scalar::all(-1), cv::Scalar::all(-1), std::vector<char>(),
                    cv::DrawMatchesFlags::NOT_DRAW_SINGLE_POINTS);
    // Localize the object
    std::vector<cv::Point2f> obj;
    std::vector<cv::Point2f> scene;
    for (size_t i = 0; i < good_matches.size(); i++) {
        // Get the keypoints from the good matches
        obj.push_back(keypoints_object[good_matches[i].queryIdx].pt);
        scene.push_back(keypoints_scene[good_matches[i].trainIdx].pt);
    }
    cv::Mat H = cv::findHomography(obj, scene, cv::RANSAC);
    // Get the corners from the image_1 ( the object to be "detected" )
    std::vector<cv::Point2f> obj_corners(4);
    obj_corners[0] = cv::Point2f(0, 0);
    obj_corners[1] = cv::Point2f((float)template_image.cols, 0);
    obj_corners[2] = cv::Point2f((float)template_image.cols, (float)template_image.rows);
    obj_corners[3] = cv::Point2f(0, (float)template_image.rows);
    std::vector<cv::Point2f> scene_corners(4);
    cv::perspectiveTransform(obj_corners, scene_corners, H);

    // Find average
    float capstanX = (scene_corners[0].x + scene_corners[1].x + scene_corners[2].x + scene_corners[3].x) / 4;
    float capstanY = (scene_corners[0].y + scene_corners[1].y + scene_corners[2].y + scene_corners[3].y) / 4;

    // In the following there are two alterations to cut the first 20
    // horizontal pixels and the first 90 vertical pixels from the found
    // rectangle: +10 in X for centering and -20 in width +45 in Y for
    // centering and -90 in height
    cv::Vec4f positionCapstan(capstanX + 10, capstanY + 45, 1, 0);
    return get_rectangle_from_match(positionCapstan, template_image.cols - 20, template_image.rows - 90, 0, 0, 1);
}
}  // anonymous namespace

Result<Roi> find_roi(core::Frame image, Algorithm algorithm, SceneElement element_to_find) {
    switch (algorithm) {
        case Algorithm::GHT:
            return find_roi_ght(image, element_to_find);
        case Algorithm::SURF:
            return find_roi_surf(image, element_to_find);
        default:
            return Error("Invalid algorithm");
    }
}
}  // namespace detection
}  // namespace videoanalyser