detection.cpp 15.1 KB
Newer Older
Matteo's avatar
Matteo committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
#include "detection.hpp"

namespace videoanalyser {
namespace detection {
using namespace cv;

namespace {
cv::RotatedRect get_rectangle_from_match(const cv::Vec4f& positions, int width, int height, int offsetX, int offsetY,
                                         float processingScale) {
    cv::RotatedRect rr;
    cv::Point2f rrpts[4];

    cv::Point2f pos(positions[0] + offsetX, positions[1] + offsetY);
    float scale = positions[2];
    float angle = positions[3];

    rr.center = pos * processingScale;
    rr.size = cv::Size2f(width * scale * processingScale, height * scale * processingScale);
    rr.angle = angle;

    rr.points(rrpts);

    return rr;
}

using ShapeMatch = std::tuple<std::vector<cv::Vec4f>, std::vector<cv::Vec4f>, cv::Mat, cv::Mat>;
ShapeMatch detect_shape(cv::Ptr<cv::GeneralizedHoughGuil> alg, int pos_thresh, cv::Mat processing_area) {
    cv::Mat positive_votes, negative_votes;
    std::vector<cv::Vec4f> positive_positions, negative_positions;

    alg->setPosThresh(pos_thresh);

    int num_prev_matches = 0;
    int threshold_increment = 0;
    int max_match_score = 0;

    // Process shapes with positive angles
    alg->setMinAngle(0);
    alg->setMaxAngle(3);

    while (true) {
        alg->detect(processing_area, positive_positions, positive_votes);
        int current_matches = positive_positions.size();
        if (current_matches == 1 || (current_matches == 0 && num_prev_matches == 0)) {
            // We detected the most interesting shape
            // Impossible to find with these parameters
            break;
        } else if (current_matches == 0 && num_prev_matches > 0) {
            // It is not possible to detect only one shape with the current
            // parameters
            alg->setPosThresh(pos_thresh + threshold_increment - 1);  // Decrease position value
            alg->detect(processing_area, positive_positions,
                        positive_votes);  // Detect all available shapes
            break;
        }
        num_prev_matches = current_matches;
        // Find maximum vote
        for (int j = 0; j < positive_votes.cols / 3; j++) {
            if (positive_votes.at<int>(3 * j) > max_match_score) max_match_score = positive_votes.at<int>(3 * j);
        }

        if (current_matches > 10) {
            threshold_increment += 5;  // To speed up computation when there are too many matches
        } else if (max_match_score - (pos_thresh + threshold_increment) > 100) {
            threshold_increment += 100;  // To speed up computation when there are few super high
                                         // matches
        } else {
            threshold_increment++;
        }
        alg->setPosThresh(pos_thresh + threshold_increment);
    }

    // Reset incremental position value
    threshold_increment = 0;
    num_prev_matches = 0;
    max_match_score = 0;
    // Process shapes with negative angles
    alg->setMinAngle(357);
    alg->setMaxAngle(360);
    while (true) {
        alg->detect(processing_area, negative_positions, negative_votes);
        int current_matches = negative_positions.size();
        if (current_matches == 1 || (current_matches == 0 && num_prev_matches == 0)) {
            // We detected the most interesting shape
            // Impossible to found with these parameters
            break;
        } else if (current_matches == 0 && num_prev_matches > 0) {
            // It is not possible to detect only one shape with the current
            // parameters
            alg->setPosThresh(pos_thresh + threshold_increment - 1);  // Decrease position value
            alg->detect(processing_area, negative_positions,
                        negative_votes);  // Detect all available shapes
            break;
        }
        num_prev_matches = current_matches;

        // Find maximum vote
        for (int j = 0; j < positive_votes.cols / 3; j++) {
            if (positive_votes.at<int>(3 * j) > max_match_score) max_match_score = positive_votes.at<int>(3 * j);
        }

        if (current_matches > 10) {
            threshold_increment += 5;  // To speed up computation when there are too many matches
        } else if (max_match_score - (pos_thresh + threshold_increment) > 100) {
            threshold_increment += 100;  // To speed up computation when there are few super high
                                         // matches
        } else {
            threshold_increment++;
        }
        alg->setPosThresh(pos_thresh + threshold_increment);
    }

    return std::make_tuple(positive_positions, negative_positions, positive_votes, negative_votes);
}

Result<core::Frame> get_template_image(ElementType element_type) {
    switch (element_type) {
        case ElementType::TAPE:
            return core::Frame(cv::imread("input/readingHead.png", cv::IMREAD_GRAYSCALE));
        case ElementType::CAPSTAN:
            return core::Frame(cv::imread("input/capstanBERIO058prova.png", cv::IMREAD_GRAYSCALE));
        default:
            return Error("Invalid element type");
    }
}
/**
 * @fn std::tuple<int, int, double, double, vector<Vec4f>, vector<Vec4f>>
 * find_object(Mat model, SceneObject object)
 * @brief Find the model in the scene using the Generalized Hough Transform.
 * It returns the best matches. Find the best matches for positive and negative
 * angles. If there are more than one shape, then choose the one with the
 * highest score. If there are more than one with the same highest score, then
 * arbitrarily choose the latest.
 *
 * For informations about the Generalized Hough Guild usage see the tutorial
 * at https://docs.opencv.org/4.7.0/da/ddc/tutorial_generalized_hough_ballard_guil.html
 *
 * @param model the template image to be searched with the Generalized Hough
 * Transform
 * @param object the sceneObject struct containing the parameters for the
 * Generalized Hough Transform
 * @return std::tuple<int, int, double, double, vector<Vec4f>, vector<Vec4f>> a
 * tuple containing the best matches for positive and negative angles
 */
Result<Roi> find_roi_ght(core::Frame image, SceneElement element_to_find) {
    // Save a grayscale version of image in gray_image
    core::Frame gray_image = core::Frame(image).convert_color(cv::COLOR_BGR2GRAY);
    // downsample the frame in half pixels for performance reasons
    core::Frame halved_gray_image = core::Frame(gray_image).clone().downsample(2);
    // Get input shape in grayscale and downsample it in half pixels
    Result<core::Frame> template_image_result = get_template_image(element_to_find.type);
    if (std::holds_alternative<Error>(template_image_result)) {
        return Error("Error while loading template image:" + std::get<Error>(template_image_result));
    }
    core::Frame template_image = std::get<core::Frame>(template_image_result).downsample(2);

    cv::Ptr<cv::GeneralizedHoughGuil> ght = cv::createGeneralizedHoughGuil();
    ght->setMinDist(element_to_find.min_dist);
    ght->setLevels(360);
    ght->setDp(2);
    ght->setMaxBufferSize(1000);
    ght->setAngleStep(1);
    ght->setAngleThresh(element_to_find.threshold.angle);
    ght->setMinScale(0.9);
    ght->setMaxScale(1.1);
    ght->setScaleStep(0.01);
    ght->setScaleThresh(element_to_find.threshold.scale);
    ght->setCannyLowThresh(150);
    ght->setCannyHighThresh(240);
    ght->setTemplate(template_image);

    cv::Rect processing_area;
    cv::Mat processing_image;
    if (element_to_find.type == ElementType::TAPE) {
        processing_area = cv::Rect(halved_gray_image.cols / 4, halved_gray_image.rows / 2, halved_gray_image.cols / 2,
                                   halved_gray_image.rows / 2);
        processing_image = halved_gray_image(processing_area);
    } else if (element_to_find.type == ElementType::CAPSTAN) {
        processing_area = cv::Rect(image.cols * 3 / 4, image.rows / 2, image.cols / 4, image.rows / 2);
        processing_image = gray_image(processing_area);
    }

    auto [positive_positions, negative_positions, posPos, posNeg] =
        detect_shape(ght, element_to_find.threshold.pos, processing_image);

    double max_score_for_positive_match = 0, max_score_for_negative_match = 0;
    int index_max_positive_score = 0, index_max_negative_score = 0;
    cv::Mat positive_matches_scores = posPos;
    cv::Mat negative_matches_scores = posNeg;

    for (int i = 0; i < positive_matches_scores.size().width; i++) {
        if (positive_matches_scores.at<int>(i) >= max_score_for_positive_match) {
            max_score_for_positive_match = positive_matches_scores.at<int>(i);
            index_max_positive_score = i;
        }
    }

    for (int i = 0; i < negative_matches_scores.size().width; i++) {
        if (negative_matches_scores.at<int>(i) >= max_score_for_negative_match) {
            max_score_for_negative_match = negative_matches_scores.at<int>(i);
            index_max_negative_score = i;
        }
    }

    cv::RotatedRect roi_pos;
    cv::RotatedRect roi_neg;
    if (element_to_find.type == ElementType::TAPE) {
        if (positive_positions.size() > 0) {
            roi_pos = get_rectangle_from_match(positive_positions[index_max_positive_score], template_image.cols,
                                               template_image.rows, halved_gray_image.cols / 4,
                                               halved_gray_image.rows / 2, 2);
        }
        if (negative_positions.size() > 0) {
            roi_neg = get_rectangle_from_match(negative_positions[index_max_negative_score], template_image.cols,
                                               template_image.rows, halved_gray_image.cols / 4,
                                               halved_gray_image.rows / 2, 2);
        }
    } else if (element_to_find.type == ElementType::CAPSTAN) {
        if (positive_positions.size() > 0) {
            roi_pos =
                get_rectangle_from_match(positive_positions[index_max_positive_score], template_image.cols - 22,
                                         template_image.rows - 92, image.cols * 3 / 4 + 11, image.rows / 2 + 46, 1);
        }
        if (negative_positions.size() > 0) {
            roi_neg =
                get_rectangle_from_match(negative_positions[index_max_negative_score], template_image.cols - 22,
                                         template_image.rows - 92, image.cols * 3 / 4 + 11, image.rows / 2 + 46, 1);
        }
    }

    cv::RotatedRect result;

    if (max_score_for_positive_match > 0) {
        if (max_score_for_negative_match > 0) {
            result = max_score_for_positive_match > max_score_for_negative_match ? roi_pos : roi_neg;
        } else {
            result = roi_pos;
        }
    } else if (max_score_for_negative_match > 0) {
        result = roi_neg;
    } else {
        return Error("No match found");
    }

    if (element_to_find.type == ElementType::TAPE) {
        cv::Vec4f tape_position(result.center.x,
                                result.center.y + result.size.height / 2 + 20 * (result.size.width / 200), 1,
                                result.angle);
        result = get_rectangle_from_match(tape_position, result.size.width, 50 * (result.size.width / 200), 0, 0, 1);
    }
    return result;
}

Result<Roi> find_roi_surf(core::Frame image, SceneElement element_to_find) {
    // Step 1: Detect the keypoints using SURF Detector, compute the
    // descriptors
    int min_hessian = 100;
    Ptr<xfeatures2d::SURF> detector = xfeatures2d::SURF::create(min_hessian);
    std::vector<cv::KeyPoint> keypoints_object, keypoints_scene;
    cv::Mat descriptors_object, descriptors_scene;

    // Save a grayscale version of image in gray_image
    core::Frame gray_image = core::Frame(image).convert_color(cv::COLOR_BGR2GRAY);
    // downsample the frame in half pixels for performance reasons
    core::Frame halved_gray_image = core::Frame(gray_image).clone().downsample(2);

    Result<core::Frame> template_image_result = get_template_image(element_to_find.type);
    if (std::holds_alternative<Error>(template_image_result)) {
        return Error("Error while loading template image:" + std::get<Error>(template_image_result));
    }
    core::Frame template_image = std::get<core::Frame>(template_image_result);

    detector->detectAndCompute(template_image, cv::noArray(), keypoints_object, descriptors_object);
    detector->detectAndCompute(gray_image, cv::noArray(), keypoints_scene, descriptors_scene);

    // Step 2: Matching descriptor vectors with a FLANN based matcher
    // Since SURF is a floating-point descriptor NORM_L2 is used
    cv::Ptr<cv::DescriptorMatcher> matcher = cv::DescriptorMatcher::create(cv::DescriptorMatcher::FLANNBASED);
    std::vector<std::vector<cv::DMatch>> knn_matches;
    matcher->knnMatch(descriptors_object, descriptors_scene, knn_matches, 2);
    //-- Filter matches using the Lowe's ratio test
    const float RATIO_THRESH = 0.75f;
    std::vector<cv::DMatch> good_matches;
    for (size_t i = 0; i < knn_matches.size(); i++) {
        if (knn_matches[i][0].distance < RATIO_THRESH * knn_matches[i][1].distance) {
            good_matches.push_back(knn_matches[i][0]);
        }
    }

    // Draw matches
    cv::Mat img_matches;
    cv::drawMatches(template_image, keypoints_object, halved_gray_image, keypoints_scene, good_matches, img_matches,
                    cv::Scalar::all(-1), cv::Scalar::all(-1), std::vector<char>(),
                    cv::DrawMatchesFlags::NOT_DRAW_SINGLE_POINTS);
    // Localize the object
    std::vector<cv::Point2f> obj;
    std::vector<cv::Point2f> scene;
    for (size_t i = 0; i < good_matches.size(); i++) {
        // Get the keypoints from the good matches
        obj.push_back(keypoints_object[good_matches[i].queryIdx].pt);
        scene.push_back(keypoints_scene[good_matches[i].trainIdx].pt);
    }
    cv::Mat H = cv::findHomography(obj, scene, cv::RANSAC);
    // Get the corners from the image_1 ( the object to be "detected" )
    std::vector<cv::Point2f> obj_corners(4);
    obj_corners[0] = cv::Point2f(0, 0);
    obj_corners[1] = cv::Point2f((float)template_image.cols, 0);
    obj_corners[2] = cv::Point2f((float)template_image.cols, (float)template_image.rows);
    obj_corners[3] = cv::Point2f(0, (float)template_image.rows);
    std::vector<cv::Point2f> scene_corners(4);
    cv::perspectiveTransform(obj_corners, scene_corners, H);

    // Find average
    float capstanX = (scene_corners[0].x + scene_corners[1].x + scene_corners[2].x + scene_corners[3].x) / 4;
    float capstanY = (scene_corners[0].y + scene_corners[1].y + scene_corners[2].y + scene_corners[3].y) / 4;

    // In the following there are two alterations to cut the first 20
    // horizontal pixels and the first 90 vertical pixels from the found
    // rectangle: +10 in X for centering and -20 in width +45 in Y for
    // centering and -90 in height
    cv::Vec4f positionCapstan(capstanX + 10, capstanY + 45, 1, 0);
    return get_rectangle_from_match(positionCapstan, template_image.cols - 20, template_image.rows - 90, 0, 0, 1);
}
}  // anonymous namespace

Result<Roi> find_roi(core::Frame image, Algorithm algorithm, SceneElement element_to_find) {
    switch (algorithm) {
        case Algorithm::GHT:
            return find_roi_ght(image, element_to_find);
        case Algorithm::SURF:
            return find_roi_surf(image, element_to_find);
        default:
            return Error("Invalid algorithm");
    }
}
}  // namespace detection
}  // namespace videoanalyser