#include "detection.hpp" namespace videoanalyser { namespace detection { using namespace cv; namespace { cv::RotatedRect get_rectangle_from_match(const cv::Vec4f& positions, int width, int height, int offsetX, int offsetY, float processingScale) { cv::RotatedRect rr; cv::Point2f rrpts[4]; cv::Point2f pos(positions[0] + offsetX, positions[1] + offsetY); float scale = positions[2]; float angle = positions[3]; rr.center = pos * processingScale; rr.size = cv::Size2f(width * scale * processingScale, height * scale * processingScale); rr.angle = angle; rr.points(rrpts); return rr; } using ShapeMatch = std::tuple, std::vector, cv::Mat, cv::Mat>; ShapeMatch detect_shape(cv::Ptr alg, int pos_thresh, cv::Mat processing_area) { cv::Mat positive_votes, negative_votes; std::vector positive_positions, negative_positions; alg->setPosThresh(pos_thresh); int num_prev_matches = 0; int threshold_increment = 0; int max_match_score = 0; // Process shapes with positive angles alg->setMinAngle(0); alg->setMaxAngle(3); while (true) { alg->detect(processing_area, positive_positions, positive_votes); int current_matches = positive_positions.size(); if (current_matches == 1 || (current_matches == 0 && num_prev_matches == 0)) { // We detected the most interesting shape // Impossible to find with these parameters break; } else if (current_matches == 0 && num_prev_matches > 0) { // It is not possible to detect only one shape with the current // parameters alg->setPosThresh(pos_thresh + threshold_increment - 1); // Decrease position value alg->detect(processing_area, positive_positions, positive_votes); // Detect all available shapes break; } num_prev_matches = current_matches; // Find maximum vote for (int j = 0; j < positive_votes.cols / 3; j++) { if (positive_votes.at(3 * j) > max_match_score) max_match_score = positive_votes.at(3 * j); } if (current_matches > 10) { threshold_increment += 5; // To speed up computation when there are too many matches } else if (max_match_score - (pos_thresh + threshold_increment) > 100) { threshold_increment += 100; // To speed up computation when there are few super high // matches } else { threshold_increment++; } alg->setPosThresh(pos_thresh + threshold_increment); } // Reset incremental position value threshold_increment = 0; num_prev_matches = 0; max_match_score = 0; // Process shapes with negative angles alg->setMinAngle(357); alg->setMaxAngle(360); while (true) { alg->detect(processing_area, negative_positions, negative_votes); int current_matches = negative_positions.size(); if (current_matches == 1 || (current_matches == 0 && num_prev_matches == 0)) { // We detected the most interesting shape // Impossible to found with these parameters break; } else if (current_matches == 0 && num_prev_matches > 0) { // It is not possible to detect only one shape with the current // parameters alg->setPosThresh(pos_thresh + threshold_increment - 1); // Decrease position value alg->detect(processing_area, negative_positions, negative_votes); // Detect all available shapes break; } num_prev_matches = current_matches; // Find maximum vote for (int j = 0; j < positive_votes.cols / 3; j++) { if (positive_votes.at(3 * j) > max_match_score) max_match_score = positive_votes.at(3 * j); } if (current_matches > 10) { threshold_increment += 5; // To speed up computation when there are too many matches } else if (max_match_score - (pos_thresh + threshold_increment) > 100) { threshold_increment += 100; // To speed up computation when there are few super high // matches } else { threshold_increment++; } alg->setPosThresh(pos_thresh + threshold_increment); } return std::make_tuple(positive_positions, negative_positions, positive_votes, negative_votes); } Result get_template_image(ElementType element_type) { switch (element_type) { case ElementType::TAPE: return core::Frame(cv::imread("input/readingHead.png", cv::IMREAD_GRAYSCALE)); case ElementType::CAPSTAN: return core::Frame(cv::imread("input/capstanBERIO058prova.png", cv::IMREAD_GRAYSCALE)); default: return Error("Invalid element type"); } } /** * @fn std::tuple, vector> * find_object(Mat model, SceneObject object) * @brief Find the model in the scene using the Generalized Hough Transform. * It returns the best matches. Find the best matches for positive and negative * angles. If there are more than one shape, then choose the one with the * highest score. If there are more than one with the same highest score, then * arbitrarily choose the latest. * * For informations about the Generalized Hough Guild usage see the tutorial * at https://docs.opencv.org/4.7.0/da/ddc/tutorial_generalized_hough_ballard_guil.html * * @param model the template image to be searched with the Generalized Hough * Transform * @param object the sceneObject struct containing the parameters for the * Generalized Hough Transform * @return std::tuple, vector> a * tuple containing the best matches for positive and negative angles */ Result find_roi_ght(core::Frame image, SceneElement element_to_find) { // Save a grayscale version of image in gray_image core::Frame gray_image = core::Frame(image).convert_color(cv::COLOR_BGR2GRAY); // downsample the frame in half pixels for performance reasons core::Frame halved_gray_image = core::Frame(gray_image).clone().downsample(2); // Get input shape in grayscale and downsample it in half pixels Result template_image_result = get_template_image(element_to_find.type); if (std::holds_alternative(template_image_result)) { return Error("Error while loading template image:" + std::get(template_image_result)); } core::Frame template_image = std::get(template_image_result).downsample(2); cv::Ptr ght = cv::createGeneralizedHoughGuil(); ght->setMinDist(element_to_find.min_dist); ght->setLevels(360); ght->setDp(2); ght->setMaxBufferSize(1000); ght->setAngleStep(1); ght->setAngleThresh(element_to_find.threshold.angle); ght->setMinScale(0.9); ght->setMaxScale(1.1); ght->setScaleStep(0.01); ght->setScaleThresh(element_to_find.threshold.scale); ght->setCannyLowThresh(150); ght->setCannyHighThresh(240); ght->setTemplate(template_image); cv::Rect processing_area; cv::Mat processing_image; if (element_to_find.type == ElementType::TAPE) { processing_area = cv::Rect(halved_gray_image.cols / 4, halved_gray_image.rows / 2, halved_gray_image.cols / 2, halved_gray_image.rows / 2); processing_image = halved_gray_image(processing_area); } else if (element_to_find.type == ElementType::CAPSTAN) { processing_area = cv::Rect(image.cols * 3 / 4, image.rows / 2, image.cols / 4, image.rows / 2); processing_image = gray_image(processing_area); } auto [positive_positions, negative_positions, posPos, posNeg] = detect_shape(ght, element_to_find.threshold.pos, processing_image); double max_score_for_positive_match = 0, max_score_for_negative_match = 0; int index_max_positive_score = 0, index_max_negative_score = 0; cv::Mat positive_matches_scores = posPos; cv::Mat negative_matches_scores = posNeg; for (int i = 0; i < positive_matches_scores.size().width; i++) { if (positive_matches_scores.at(i) >= max_score_for_positive_match) { max_score_for_positive_match = positive_matches_scores.at(i); index_max_positive_score = i; } } for (int i = 0; i < negative_matches_scores.size().width; i++) { if (negative_matches_scores.at(i) >= max_score_for_negative_match) { max_score_for_negative_match = negative_matches_scores.at(i); index_max_negative_score = i; } } cv::RotatedRect roi_pos; cv::RotatedRect roi_neg; if (element_to_find.type == ElementType::TAPE) { if (positive_positions.size() > 0) { roi_pos = get_rectangle_from_match(positive_positions[index_max_positive_score], template_image.cols, template_image.rows, halved_gray_image.cols / 4, halved_gray_image.rows / 2, 2); } if (negative_positions.size() > 0) { roi_neg = get_rectangle_from_match(negative_positions[index_max_negative_score], template_image.cols, template_image.rows, halved_gray_image.cols / 4, halved_gray_image.rows / 2, 2); } } else if (element_to_find.type == ElementType::CAPSTAN) { if (positive_positions.size() > 0) { roi_pos = get_rectangle_from_match(positive_positions[index_max_positive_score], template_image.cols - 22, template_image.rows - 92, image.cols * 3 / 4 + 11, image.rows / 2 + 46, 1); } if (negative_positions.size() > 0) { roi_neg = get_rectangle_from_match(negative_positions[index_max_negative_score], template_image.cols - 22, template_image.rows - 92, image.cols * 3 / 4 + 11, image.rows / 2 + 46, 1); } } cv::RotatedRect result; if (max_score_for_positive_match > 0) { if (max_score_for_negative_match > 0) { result = max_score_for_positive_match > max_score_for_negative_match ? roi_pos : roi_neg; } else { result = roi_pos; } } else if (max_score_for_negative_match > 0) { result = roi_neg; } else { return Error("No match found"); } if (element_to_find.type == ElementType::TAPE) { cv::Vec4f tape_position(result.center.x, result.center.y + result.size.height / 2 + 20 * (result.size.width / 200), 1, result.angle); result = get_rectangle_from_match(tape_position, result.size.width, 50 * (result.size.width / 200), 0, 0, 1); } return result; } Result find_roi_surf(core::Frame image, SceneElement element_to_find) { // Step 1: Detect the keypoints using SURF Detector, compute the // descriptors int min_hessian = 100; Ptr detector = xfeatures2d::SURF::create(min_hessian); std::vector keypoints_object, keypoints_scene; cv::Mat descriptors_object, descriptors_scene; // Save a grayscale version of image in gray_image core::Frame gray_image = core::Frame(image).convert_color(cv::COLOR_BGR2GRAY); // downsample the frame in half pixels for performance reasons core::Frame halved_gray_image = core::Frame(gray_image).clone().downsample(2); Result template_image_result = get_template_image(element_to_find.type); if (std::holds_alternative(template_image_result)) { return Error("Error while loading template image:" + std::get(template_image_result)); } core::Frame template_image = std::get(template_image_result); detector->detectAndCompute(template_image, cv::noArray(), keypoints_object, descriptors_object); detector->detectAndCompute(gray_image, cv::noArray(), keypoints_scene, descriptors_scene); // Step 2: Matching descriptor vectors with a FLANN based matcher // Since SURF is a floating-point descriptor NORM_L2 is used cv::Ptr matcher = cv::DescriptorMatcher::create(cv::DescriptorMatcher::FLANNBASED); std::vector> knn_matches; matcher->knnMatch(descriptors_object, descriptors_scene, knn_matches, 2); //-- Filter matches using the Lowe's ratio test const float RATIO_THRESH = 0.75f; std::vector good_matches; for (size_t i = 0; i < knn_matches.size(); i++) { if (knn_matches[i][0].distance < RATIO_THRESH * knn_matches[i][1].distance) { good_matches.push_back(knn_matches[i][0]); } } // Draw matches cv::Mat img_matches; cv::drawMatches(template_image, keypoints_object, halved_gray_image, keypoints_scene, good_matches, img_matches, cv::Scalar::all(-1), cv::Scalar::all(-1), std::vector(), cv::DrawMatchesFlags::NOT_DRAW_SINGLE_POINTS); // Localize the object std::vector obj; std::vector scene; for (size_t i = 0; i < good_matches.size(); i++) { // Get the keypoints from the good matches obj.push_back(keypoints_object[good_matches[i].queryIdx].pt); scene.push_back(keypoints_scene[good_matches[i].trainIdx].pt); } cv::Mat H = cv::findHomography(obj, scene, cv::RANSAC); // Get the corners from the image_1 ( the object to be "detected" ) std::vector obj_corners(4); obj_corners[0] = cv::Point2f(0, 0); obj_corners[1] = cv::Point2f((float)template_image.cols, 0); obj_corners[2] = cv::Point2f((float)template_image.cols, (float)template_image.rows); obj_corners[3] = cv::Point2f(0, (float)template_image.rows); std::vector scene_corners(4); cv::perspectiveTransform(obj_corners, scene_corners, H); // Find average float capstanX = (scene_corners[0].x + scene_corners[1].x + scene_corners[2].x + scene_corners[3].x) / 4; float capstanY = (scene_corners[0].y + scene_corners[1].y + scene_corners[2].y + scene_corners[3].y) / 4; // In the following there are two alterations to cut the first 20 // horizontal pixels and the first 90 vertical pixels from the found // rectangle: +10 in X for centering and -20 in width +45 in Y for // centering and -90 in height cv::Vec4f positionCapstan(capstanX + 10, capstanY + 45, 1, 0); return get_rectangle_from_match(positionCapstan, template_image.cols - 20, template_image.rows - 90, 0, 0, 1); } } // anonymous namespace Result find_roi(core::Frame image, Algorithm algorithm, SceneElement element_to_find) { switch (algorithm) { case Algorithm::GHT: return find_roi_ght(image, element_to_find); case Algorithm::SURF: return find_roi_surf(image, element_to_find); default: return Error("Invalid algorithm"); } } } // namespace detection } // namespace videoanalyser