main.cpp 33.9 KB
Newer Older
Nadir Dalla Pozza's avatar
Nadir Dalla Pozza committed
1
/**
Matteo's avatar
Matteo committed
2
 * @mainpage MPAI CAE-ARP Video Analyser
Matteo's avatar
Matteo committed
3
 * @file main.cpp
Nadir Dalla Pozza's avatar
Nadir Dalla Pozza committed
4
5
6
 *  MPAI CAE-ARP Video Analyser.
 *
 *	Implements MPAI CAE-ARP Video Analyser Technical Specification.
Matteo's avatar
Matteo committed
7
8
 *	It identifies Irregularities on the Preservation Audio-Visual File,
 *providing:
Nadir Dalla Pozza's avatar
Nadir Dalla Pozza committed
9
10
11
 *	- Irregularity Files;
 *	- Irregularity Images.
 *
Matteo's avatar
Matteo committed
12
13
 * @warning Currently, this program is only compatible with the Studer A810
 *and videos recorded in PAL standard.
Matteo's avatar
update    
Matteo committed
14
 *
Matteo's avatar
Matteo committed
15
16
17
18
19
 * @todo
 *  - A resize function of the entire video should be implemented if it does not
 *conform to the PAL standard (currently taken for granted).
 *  - Progressive videos, which do not require deinterlacing, should be managed
 *(in the code there are several steps that operate considering this property).
Nadir Dalla Pozza's avatar
Nadir Dalla Pozza committed
20
 *
Matteo's avatar
Matteo committed
21
22
 *  @author Nadir Dalla Pozza <nadir.dallapozza@unipd.it>
 *  @author Matteo Spanio <dev2@audioinnova.com>
Matteo's avatar
Matteo committed
23
 *	@copyright 2023, Audio Innova S.r.l.
Nadir Dalla Pozza's avatar
Nadir Dalla Pozza committed
24
25
 *	@credits Niccolò Pretto, Nadir Dalla Pozza, Sergio Canazza
 *	@license GPL v3.0
Matteo's avatar
Matteo committed
26
 *	@version 1.1.2
Nadir Dalla Pozza's avatar
Nadir Dalla Pozza committed
27
28
 *	@status Production
 */
29
#include <stdlib.h>
30
31
32
#include <sys/timeb.h>

#include <boost/lexical_cast.hpp>
Matteo's avatar
Matteo committed
33
34
35
36
37
38
39
40
#include <boost/program_options.hpp>
#include <boost/uuid/uuid.hpp>             // uuid class
#include <boost/uuid/uuid_generators.hpp>  // generators
#include <boost/uuid/uuid_io.hpp>          // streaming operators etc.
#include <filesystem>
#include <fstream>
#include <iostream>
#include <nlohmann/json.hpp>
41
42
43
44
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgcodecs.hpp>
#include <opencv2/imgproc.hpp>
Matteo's avatar
Matteo committed
45
#include <ranges>
46

Matteo's avatar
Matteo committed
47
48
49
50
51
52
#include "forAudioAnalyser.h"
#include "lib/Irregularity.h"
#include "lib/IrregularityFile.h"
#include "lib/colors.h"
#include "lib/files.h"
#include "lib/time.h"
53
#include "opencv2/calib3d.hpp"
Matteo's avatar
Matteo committed
54
55
#include "opencv2/core.hpp"
#include "opencv2/features2d.hpp"
56
57
58
#include "opencv2/highgui.hpp"
#include "opencv2/imgproc.hpp"
#include "opencv2/xfeatures2d.hpp"
59
#include "utility.h"
Matteo's avatar
update    
Matteo committed
60

Matteo's avatar
Matteo committed
61
62
63
64
#define A_IRREG_FILE_1 "AudioAnalyser_IrregularityFileOutput1.json"
#define V_IRREG_FILE_1 "VideoAnalyser_IrregularityFileOutput1.json"
#define V_IRREG_FILE_2 "VideoAnalyser_IrregularityFileOutput2.json"

65
66
using namespace cv;
using namespace std;
Matteo's avatar
update    
Matteo committed
67
using utility::Frame;
68
using json = nlohmann::json;
69
70
namespace fs = std::filesystem;
namespace po = boost::program_options;
71

Matteo's avatar
Matteo committed
72
73
74
75
76
77
78
79
/**
 * @const bool useSURF
 * @brief If true, SURF is used for capstan detection, otherwise GHT is used.
 *
 * For capstan detection, there are two alternative approaches:
 * 1. Generalized Hough Transform
 * 2. SURF.
 */
80
81
bool useSURF = true;

Matteo's avatar
update    
Matteo committed
82
83
bool savingPinchRoller = false;
bool pinchRollerRect = false;
84
bool savingBrand = false;
85
bool endTapeSaved = false;
86
float mediaPrevFrame = 0;
Matteo's avatar
Matteo committed
87
88
89
90
91
92

/**
 * @var bool firstBrand
 * @brief The first frame containing brands on tape must be saved
 */
bool firstBrand = true;
Nadir Dalla Pozza's avatar
Update.    
Nadir Dalla Pozza committed
93
float firstInstant = 0;
94
string fileName, extension;
95

96
// Path variables
Matteo's avatar
Matteo committed
97
98
static fs::path outputPath{};
static fs::path irregularityImagesPath{};
99
// JSON files
Matteo's avatar
Matteo committed
100
101
102
static json configurationFile{};
static json irregularityFileOutput1{};
static json irregularityFileOutput2{};
103
// RotatedRect identifying the processing area
Nadir Dalla Pozza's avatar
Update.    
Nadir Dalla Pozza committed
104
RotatedRect rect, rectTape, rectCapstan;
105

Matteo's avatar
Matteo committed
106
/**
Matteo's avatar
Matteo committed
107
 * @fn void pprint(string text, string color)
Matteo's avatar
Matteo committed
108
 * @brief Prints a text in a given color.
Matteo's avatar
Matteo committed
109
 *
Matteo's avatar
Matteo committed
110
 * @param text
Matteo's avatar
Matteo committed
111
 * @param color
Matteo's avatar
Matteo committed
112
 */
Matteo's avatar
Matteo committed
113
void pprint(string text, string color) { cout << color << text << END << endl; }
Matteo's avatar
Matteo committed
114

115
struct Args {
Matteo's avatar
Matteo committed
116
117
    fs::path
        workingPath;  /**< The working path where all input files are stored and where all output files will be saved */
Matteo's avatar
Matteo committed
118
    string filesName; /**< The name of the preservation files to be considered */
Matteo's avatar
Matteo committed
119
120
    bool brands;      /**< True if tape presents brands on its surface */
    float speed;      /**< The speed at which the tape was read */
Matteo's avatar
Matteo committed
121
122

    Args(fs::path workingPath, string filesName, bool brands, float speed) {
Matteo's avatar
Matteo committed
123
        if (speed != 7.5 && speed != 15) throw invalid_argument("Speed must be 7.5 or 15");
Matteo's avatar
Matteo committed
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
        this->workingPath = workingPath;
        this->filesName = filesName;
        this->brands = brands;
        this->speed = speed;
    }
    ~Args() {}

    static Args from_file(fs::path path) {
        ifstream iConfig(path);
        json j;
        iConfig >> j;
        return Args(fs::path(string(j["WorkingPath"])), j["FilesName"], j["Brands"], j["Speed"]);
    }

    static Args from_cli(int argc, char** argv) {
        po::variables_map vm;
        try {
            po::options_description desc(
                "A tool that implements MPAI CAE-ARP Video Analyser Technical "
                "Specification.\n"
                "By default, the configuartion parameters are loaded from "
                "config/config.json file,\n"
                "but, alternately, you can pass command line arguments to "
                "replace them");
            desc.add_options()("help,h", "Display this help message")(
                "working-path,w", po::value<string>()->required(),
                "Specify the Working Path, where all input files are stored")(
                "files-name,f", po::value<string>()->required(),
                "Specify the name of the Preservation files (without "
                "extension)")("brands,b", po::value<bool>()->required(),
                              "Specify if the tape presents brands on its surface")(
                "speed,s", po::value<float>()->required(), "Specify the speed at which the tape was read");
            po::store(po::command_line_parser(argc, argv).options(desc).run(), vm);
            if (vm.count("help")) {
                cout << desc << "\n";
                std::exit(EXIT_SUCCESS);
            }
            po::notify(vm);
        } catch (po::invalid_command_line_syntax& e) {
            pprint("The command line syntax is invalid: " + string(e.what()), RED + BOLD);
            std::exit(EXIT_FAILURE);
        } catch (po::required_option& e) {
            cerr << "Error: " << e.what() << endl;
            std::exit(EXIT_FAILURE);
        } catch (nlohmann::detail::type_error e) {
            pprint("config.json error! " + string(e.what()), RED);
            std::exit(EXIT_FAILURE);
        }

        return Args(fs::path(vm["working-path"].as<string>()), vm["files-name"].as<string>(), vm["brands"].as<bool>(),
                    vm["speed"].as<float>());
    }
176
};
177

178
179
180
181
// Constants Paths
static const string READING_HEAD_IMG = "input/readingHead.png";
static const string CAPSTAN_TEMPLATE_IMG = "input/capstanBERIO058prova.png";
static const string CONFIG_FILE = "config/config.json";
182

Matteo's avatar
Matteo committed
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
/**
 * @brief Get the next frame object
 * 
 * Whenever we find an Irregularity, we want to skip a lenght equal to the
 * Studer reading head (3 cm = 1.18 inches).
 * Note the following considerations:
 * - since we are analysing video at 25 fps a frame occurs every 40 ms
 * - at 15 ips we cross 3 cm of tape in 79 ms (2 frames)
 * - at 7.5 ips we cross 3 cm of tape in 157 ms (4 frames)
 * The considered lengths are the widths of the tape areas.
 * The following condition constitutes a valid approach if the tape areas
 * have widths always equal to the reading head
 * 
 * @param cap VideoCapture object
 * @param speed tape reading speed
 * @return Frame
 */
Frame get_next_frame(VideoCapture& cap, float speed, bool skip = false) {

    if (skip) {
        int ms_to_skip = speed == 15 ? 79 : 157;
        cap.set(CAP_PROP_POS_MSEC, cap.get(CAP_PROP_POS_MSEC) + ms_to_skip);
    }

    Frame frame;
    cap >> frame;
    return frame;
}

Matteo's avatar
Matteo committed
212
double rotatedRectArea(RotatedRect rect) { return rect.size.width * rect.size.height; }
213

Nadir Dalla Pozza's avatar
Nadir Dalla Pozza committed
214
/**
Matteo's avatar
Matteo committed
215
216
 * @fn std::tuple<int, int, double, double, vector<Vec4f>, vector<Vec4f>>
 * findObject(Mat model, SceneObject object)
Matteo's avatar
Matteo committed
217
218
219
 * @brief Find the model in the scene using the Generalized Hough Transform.
 * It returns the best matches. Find the best matches for positive and negative
 * angles. If there are more than one shape, then choose the one with the
Matteo's avatar
Matteo committed
220
221
222
223
224
225
226
227
228
 * highest score. If there are more than one with the same highest score, then
 * arbitrarily choose the latest
 *
 * @param model the template image to be searched with the Generalized Hough
 * Transform
 * @param object the sceneObject struct containing the parameters for the
 * Generalized Hough Transform
 * @return std::tuple<int, int, double, double, vector<Vec4f>, vector<Vec4f>> a
 * tuple containing the best matches for positive and negative angles
Nadir Dalla Pozza's avatar
Nadir Dalla Pozza committed
229
 */
Matteo's avatar
Matteo committed
230
231
232
233
234
235
236
std::tuple<int, int, double, double, vector<Vec4f>, vector<Vec4f>> findObject(Mat model, SceneObject object,
                                                                              Mat processing_area) {
    // Algorithm and parameters
    // for informations about the Generalized Hough Guild usage see the tutorial
    // at
    // https://docs.opencv.org/4.7.0/da/ddc/tutorial_generalized_hough_ballard_guil.html
    Ptr<GeneralizedHoughGuil> alg = createGeneralizedHoughGuil();
237

Matteo's avatar
Matteo committed
238
239
    vector<Vec4f> positionsPos, positionsNeg;
    Mat votesPos, votesNeg;
240

Matteo's avatar
Matteo committed
241
242
    double maxValPos = 0, maxValNeg = 0;
    int indexPos = 0, indexNeg = 0;
243

Matteo's avatar
Matteo committed
244
245
246
247
    alg->setMinDist(object.minDist);
    alg->setLevels(360);
    alg->setDp(2);
    alg->setMaxBufferSize(1000);
248

Matteo's avatar
Matteo committed
249
250
    alg->setAngleStep(1);
    alg->setAngleThresh(object.threshold.angle);
251

Matteo's avatar
Matteo committed
252
253
254
255
    alg->setMinScale(0.9);
    alg->setMaxScale(1.1);
    alg->setScaleStep(0.01);
    alg->setScaleThresh(object.threshold.scale);
256

Matteo's avatar
Matteo committed
257
    alg->setPosThresh(object.threshold.pos);
258

Matteo's avatar
Matteo committed
259
260
    alg->setCannyLowThresh(150);   // Old: 100
    alg->setCannyHighThresh(240);  // Old: 300
261

Matteo's avatar
Matteo committed
262
    alg->setTemplate(model);
263

Matteo's avatar
Matteo committed
264
265
    utility::detectShape(alg, model, object.threshold.pos, positionsPos, votesPos, positionsNeg, votesNeg,
                         processing_area);
266

Matteo's avatar
Matteo committed
267
268
269
270
271
272
    for (int i = 0; i < votesPos.size().width; i++) {
        if (votesPos.at<int>(i) >= maxValPos) {
            maxValPos = votesPos.at<int>(i);
            indexPos = i;
        }
    }
273

Matteo's avatar
Matteo committed
274
275
276
277
278
279
    for (int i = 0; i < votesNeg.size().width; i++) {
        if (votesNeg.at<int>(i) >= maxValNeg) {
            maxValNeg = votesNeg.at<int>(i);
            indexNeg = i;
        }
    }
280

Matteo's avatar
Matteo committed
281
    return {indexPos, indexNeg, maxValPos, maxValNeg, positionsPos, positionsNeg};
Matteo's avatar
Matteo committed
282
283
284
}

/**
Matteo's avatar
Matteo committed
285
 * @fn bool findProcessingAreas(Mat myFrame)
Matteo's avatar
Matteo committed
286
287
288
 * @brief Identifies the Regions Of Interest (ROIs) on the video,
 * which are:
 * - The reading head;
Matteo's avatar
Matteo committed
289
290
 * - The tape area under the tape head (computed on the basis of the detected
 * reading head);
Matteo's avatar
Matteo committed
291
292
293
294
295
 * - The capstan.
 * @param myFrame The current frame of the video.
 * @return true if some areas have been detected;
 * @return false otherwise.
 */
296
bool findProcessingAreas(Mat myFrame, SceneObject tape, SceneObject capstan) {
Matteo's avatar
Matteo committed
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
    /*********************************************************************************************/
    /*********************************** READING HEAD DETECTION
    /*********************************************************************************************/

    // Save a grayscale version of myFrame in myFrameGrayscale and downsample it
    // in half pixels for performance reasons
    Frame gray_current_frame = Frame(myFrame).convertColor(COLOR_BGR2GRAY);

    Frame halved_gray_current_frame = gray_current_frame.clone().downsample(2);

    // Get input shape in grayscale and downsample it in half pixels
    Frame reading_head_template = Frame(cv::imread(READING_HEAD_IMG, IMREAD_GRAYSCALE)).downsample(2);

    // Process only the bottom-central portion of the input video -> best
    // results with our videos
    Rect readingHeadProcessingAreaRect(halved_gray_current_frame.cols / 4, halved_gray_current_frame.rows / 2,
                                       halved_gray_current_frame.cols / 2, halved_gray_current_frame.rows / 2);
    Mat processingImage = halved_gray_current_frame(readingHeadProcessingAreaRect);

    RotatedRect rectPos, rectNeg;
    auto [indexPos, indexNeg, maxValPos, maxValNeg, positionsPos, positionsNeg] =
        findObject(reading_head_template, tape, processingImage);

    // The color is progressively darkened to emphasize that the algorithm found
    // more than one shape
    if (positionsPos.size() > 0)
        rectPos = utility::drawShapes(myFrame, positionsPos[indexPos], Scalar(0, 0, 255 - indexPos * 64),
                                      reading_head_template.cols, reading_head_template.rows,
                                      halved_gray_current_frame.cols / 4, halved_gray_current_frame.rows / 2, 2);
    if (positionsNeg.size() > 0)
        rectNeg = utility::drawShapes(myFrame, positionsNeg[indexNeg], Scalar(128, 128, 255 - indexNeg * 64),
                                      reading_head_template.cols, reading_head_template.rows,
                                      halved_gray_current_frame.cols / 4, halved_gray_current_frame.rows / 2, 2);

    if (maxValPos > 0)
        if (maxValNeg > 0)
            if (maxValPos > maxValNeg) {
                rect = rectPos;
            } else {
                rect = rectNeg;
            }
        else {
            rect = rectPos;
        }
    else if (maxValNeg > 0) {
        rect = rectNeg;
    } else {
        return false;
    }
Matteo's avatar
Matteo committed
346

Matteo's avatar
Matteo committed
347
    /************************************ TAPE AREA DETECTION ****************/
Matteo's avatar
Matteo committed
348
349
350
351
352
353
354

    // Compute area basing on reading head detection
    Vec4f positionTape(rect.center.x, rect.center.y + rect.size.height / 2 + 20 * (rect.size.width / 200), 1,
                       rect.angle);
    rectTape = utility::drawShapes(myFrame, positionTape, Scalar(0, 255 - indexPos * 64, 0), rect.size.width,
                                   50 * (rect.size.width / 200), 0, 0, 1);

Matteo's avatar
Matteo committed
355
    /************************************* CAPSTAN DETECTION ******************/
Matteo's avatar
Matteo committed
356
357
358

    // Read template image - it is smaller than before, therefore there is no
    // need to downsample
Matteo's avatar
Matteo committed
359
    Mat templateShape = cv::imread(CAPSTAN_TEMPLATE_IMG, IMREAD_GRAYSCALE);
Matteo's avatar
Matteo committed
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386

    if (useSURF) {
        // Step 1: Detect the keypoints using SURF Detector, compute the
        // descriptors
        int minHessian = 100;
        Ptr<xfeatures2d::SURF> detector = xfeatures2d::SURF::create(minHessian);
        vector<KeyPoint> keypoints_object, keypoints_scene;
        Mat descriptors_object, descriptors_scene;

        detector->detectAndCompute(templateShape, noArray(), keypoints_object, descriptors_object);
        detector->detectAndCompute(gray_current_frame, noArray(), keypoints_scene, descriptors_scene);

        // Step 2: Matching descriptor vectors with a FLANN based matcher
        // Since SURF is a floating-point descriptor NORM_L2 is used
        Ptr<DescriptorMatcher> matcher = DescriptorMatcher::create(DescriptorMatcher::FLANNBASED);
        vector<vector<DMatch>> knn_matches;
        matcher->knnMatch(descriptors_object, descriptors_scene, knn_matches, 2);
        //-- Filter matches using the Lowe's ratio test
        const float ratio_thresh = 0.75f;
        vector<DMatch> good_matches;
        for (size_t i = 0; i < knn_matches.size(); i++) {
            if (knn_matches[i][0].distance < ratio_thresh * knn_matches[i][1].distance) {
                good_matches.push_back(knn_matches[i][0]);
            }
        }
        // Draw matches
        Mat img_matches;
Matteo's avatar
Matteo committed
387
        cv::drawMatches(templateShape, keypoints_object, halved_gray_current_frame, keypoints_scene, good_matches,
Matteo's avatar
Matteo committed
388
389
390
391
392
393
394
395
396
397
                    img_matches, Scalar::all(-1), Scalar::all(-1), vector<char>(),
                    DrawMatchesFlags::NOT_DRAW_SINGLE_POINTS);
        // Localize the object
        vector<Point2f> obj;
        vector<Point2f> scene;
        for (size_t i = 0; i < good_matches.size(); i++) {
            // Get the keypoints from the good matches
            obj.push_back(keypoints_object[good_matches[i].queryIdx].pt);
            scene.push_back(keypoints_scene[good_matches[i].trainIdx].pt);
        }
Matteo's avatar
Matteo committed
398
        Mat H = cv::findHomography(obj, scene, RANSAC);
Matteo's avatar
Matteo committed
399
400
401
402
403
404
405
        // Get the corners from the image_1 ( the object to be "detected" )
        vector<Point2f> obj_corners(4);
        obj_corners[0] = Point2f(0, 0);
        obj_corners[1] = Point2f((float)templateShape.cols, 0);
        obj_corners[2] = Point2f((float)templateShape.cols, (float)templateShape.rows);
        obj_corners[3] = Point2f(0, (float)templateShape.rows);
        vector<Point2f> scene_corners(4);
Matteo's avatar
Matteo committed
406
        cv::perspectiveTransform(obj_corners, scene_corners, H);
Matteo's avatar
Matteo committed
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467

        // Find average
        float capstanX = (scene_corners[0].x + scene_corners[1].x + scene_corners[2].x + scene_corners[3].x) / 4;
        float capstanY = (scene_corners[0].y + scene_corners[1].y + scene_corners[2].y + scene_corners[3].y) / 4;

        // In the following there are two alterations to cut the first 20
        // horizontal pixels and the first 90 vertical pixels from the found
        // rectangle: +10 in X for centering and -20 in width +45 in Y for
        // centering and -90 in height
        Vec4f positionCapstan(capstanX + 10, capstanY + 45, 1, 0);
        rectCapstan = utility::drawShapes(myFrame, positionCapstan, Scalar(255 - indexPos * 64, 0, 0),
                                          templateShape.cols - 20, templateShape.rows - 90, 0, 0, 1);

    } else {
        // Process only right portion of the image, where the capstain always
        // appears
        int capstanProcessingAreaRectX = myFrame.cols * 3 / 4;
        int capstanProcessingAreaRectY = myFrame.rows / 2;
        int capstanProcessingAreaRectWidth = myFrame.cols / 4;
        int capstanProcessingAreaRectHeight = myFrame.rows / 2;
        Rect capstanProcessingAreaRect(capstanProcessingAreaRectX, capstanProcessingAreaRectY,
                                       capstanProcessingAreaRectWidth, capstanProcessingAreaRectHeight);
        Mat capstanProcessingAreaGrayscale = gray_current_frame(capstanProcessingAreaRect);
        // Reset algorithm and set parameters

        auto [indexPos, indexNeg, maxValPos, maxValNeg, positionsC1Pos, positionsC1Neg] =
            findObject(templateShape, capstan, capstanProcessingAreaGrayscale);

        RotatedRect rectCapstanPos, rectCapstanNeg;
        if (positionsC1Pos.size() > 0)
            rectCapstanPos = utility::drawShapes(myFrame, positionsC1Pos[indexPos], Scalar(255 - indexPos * 64, 0, 0),
                                                 templateShape.cols - 22, templateShape.rows - 92,
                                                 capstanProcessingAreaRectX + 11, capstanProcessingAreaRectY + 46, 1);
        if (positionsC1Neg.size() > 0)
            rectCapstanNeg = utility::drawShapes(myFrame, positionsC1Neg[indexNeg], Scalar(255 - indexNeg * 64, 128, 0),
                                                 templateShape.cols - 22, templateShape.rows - 92,
                                                 capstanProcessingAreaRectX + 11, capstanProcessingAreaRectY + 46, 1);

        if (maxValPos > 0)
            if (maxValNeg > 0)
                if (maxValPos > maxValNeg) {
                    rectCapstan = rectCapstanPos;
                } else {
                    rectCapstan = rectCapstanNeg;
                }
            else {
                rectCapstan = rectCapstanPos;
            }
        else if (maxValNeg > 0) {
            rectCapstan = rectCapstanNeg;
        } else {
            return false;
        }
    }

    cout << endl;

    // Save the image containing the detected areas
    cv::imwrite(outputPath.string() + "/tapeAreas.jpg", myFrame);

    return true;
468
469
}

Matteo's avatar
Matteo committed
470
471
472
/**
 * @fn RotatedRect check_skew(RotatedRect roi)
 * @brief Check if the region of interest is skewed and correct it
Matteo's avatar
Matteo committed
473
 *
Matteo's avatar
Matteo committed
474
475
476
 * @param roi the region of interest
 * @return RotatedRect the corrected region of interest
 */
Matteo's avatar
Matteo committed
477
RotatedRect check_skew(RotatedRect roi) {
Matteo's avatar
Matteo committed
478
479
480
481
482
483
484
485
486
    // get angle and size from the bounding box
    // thanks to http://felix.abecassis.me/2011/10/opencv-rotation-deskewing/
    cv::Size rect_size = roi.size;
    float angle = roi.angle;

    if (roi.angle < -45.) {
        angle += 90.0;
        swap(rect_size.width, rect_size.height);
    }
Matteo's avatar
Matteo committed
487

Matteo's avatar
Matteo committed
488
    return RotatedRect(roi.center, rect_size, angle);
Matteo's avatar
Matteo committed
489
490
491
}

/**
Matteo's avatar
Matteo committed
492
493
494
495
496
 * @fn Frame get_difference_for_roi(Frame previous, Frame current, RotatedRect
 * roi)
 * @brief Look for differences in two consecutive frames in a specific region of
 * interest
 *
Matteo's avatar
Matteo committed
497
498
499
500
501
502
 * @param previous the reference frame
 * @param current the frame to compare with the reference
 * @param roi the region of interest
 * @return Frame the difference matrix between the two frames
 */
Frame get_difference_for_roi(Frame previous, Frame current, RotatedRect roi) {
Matteo's avatar
Matteo committed
503
504
505
506
507
    cv::Mat rotation_matrix = getRotationMatrix2D(roi.center, roi.angle, 1.0);

    return previous.warp(rotation_matrix)
        .crop(roi.size, roi.center)
        .difference(current.warp(rotation_matrix).crop(roi.size, roi.center));
Matteo's avatar
Matteo committed
508
}
509

Nadir Dalla Pozza's avatar
Nadir Dalla Pozza committed
510
/**
Matteo's avatar
Matteo committed
511
512
513
514
515
 * @fn bool frameDifference(cv::Mat prevFrame, cv::Mat currentFrame, int
 * msToEnd)
 * @brief Compares two consecutive video frames and establish if there
 * potentially is an Irregularity. The comparison is pixel-wise and based on
 * threshold values set on config.json file.
Nadir Dalla Pozza's avatar
Nadir Dalla Pozza committed
516
517
518
 *
 * @param prevFrame the frame before the current one;
 * @param currentFrame the current frame;
Matteo's avatar
Matteo committed
519
520
 * @param msToEnd the number of milliseconds left before the end of the video.
 * Useful for capstan analysis.
Nadir Dalla Pozza's avatar
Nadir Dalla Pozza committed
521
522
523
 * @return true if a potential Irregularity has been found;
 * @return false otherwise.
 */
Matteo's avatar
Matteo committed
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
bool frameDifference(cv::Mat prevFrame, cv::Mat currentFrame, int msToEnd, SceneObject capstan, SceneObject tape,
                     Args args) {
    bool result = false;

    /*********************** Capstan analysis ************************/

    // In the last minute of the video, check for pinchRoller position for
    // endTape event
    if (!endTapeSaved && msToEnd < 60000) {
        // Capstan area
        int capstanAreaPixels = rectCapstan.size.width * rectCapstan.size.height;
        float capstanDifferentPixelsThreshold = capstanAreaPixels * capstan.threshold.percentual / 100;

        RotatedRect corrected_capstan_roi = check_skew(rectCapstan);
        Frame difference_frame = get_difference_for_roi(Frame(prevFrame), Frame(currentFrame), corrected_capstan_roi);

        int blackPixelsCapstan = 0;

        for (int i = 0; i < difference_frame.rows; i++) {
            for (int j = 0; j < difference_frame.cols; j++) {
                if (difference_frame.at<cv::Vec3b>(i, j)[0] == 0) {
                    // There is a black pixel, then there is a difference
                    // between previous and current frames
                    blackPixelsCapstan++;
                }
            }
        }

        if (blackPixelsCapstan > capstanDifferentPixelsThreshold) {
            savingPinchRoller = true;
            endTapeSaved = true;  // Never check again for end tape instant
            return true;
        }
    }
    // savingPinchRoller will already be false before the last minute of the
    // video. After having saved the capstan, the next time reset the variable
    // to not save again
    savingPinchRoller = false;
562

Matteo's avatar
Matteo committed
563
    /********************* Tape analysis *********************/
564

Matteo's avatar
Matteo committed
565
566
567
    // Tape area
    int tapeAreaPixels = rotatedRectArea(rectTape);
    float tapeDifferentPixelsThreshold = tapeAreaPixels * tape.threshold.percentual / 100;
568

Matteo's avatar
Matteo committed
569
    RotatedRect corrected_tape_roi = check_skew(rectTape);
570

Matteo's avatar
Matteo committed
571
572
573
    Frame croppedCurrentFrame = Frame(currentFrame)
                                    .warp(getRotationMatrix2D(corrected_tape_roi.center, corrected_tape_roi.angle, 1.0))
                                    .crop(corrected_tape_roi.size, corrected_tape_roi.center);
574

Matteo's avatar
Matteo committed
575
    Frame difference_frame = get_difference_for_roi(Frame(prevFrame), Frame(currentFrame), corrected_tape_roi);
576

Matteo's avatar
Matteo committed
577
578
579
580
    int decEnd = (msToEnd % 1000) / 100;
    int secEnd = (msToEnd - (msToEnd % 1000)) / 1000;
    int minEnd = secEnd / 60;
    secEnd = secEnd % 60;
581

Matteo's avatar
Matteo committed
582
    /********************** Segment analysis ************************/
583

Matteo's avatar
Matteo committed
584
585
586
    int blackPixels = 0;
    float mediaCurrFrame;
    int totColoreCF = 0;
Matteo's avatar
Matteo committed
587

Matteo's avatar
Matteo committed
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
    for (int i = 0; i < croppedCurrentFrame.rows; i++) {
        for (int j = 0; j < croppedCurrentFrame.cols; j++) {
            totColoreCF += croppedCurrentFrame.at<cv::Vec3b>(i, j)[0] + croppedCurrentFrame.at<cv::Vec3b>(i, j)[1] +
                           croppedCurrentFrame.at<cv::Vec3b>(i, j)[2];
            if (difference_frame.at<cv::Vec3b>(i, j)[0] == 0) {
                blackPixels++;
            }
        }
    }
    mediaCurrFrame = totColoreCF / tapeAreaPixels;

    /*********************** Decision stage ************************/

    if (blackPixels > tapeDifferentPixelsThreshold) {  // The threshold must be passed

        /***** AVERAGE_COLOR-BASED DECISION *****/
        if (mediaPrevFrame > (mediaCurrFrame + 7) ||
            mediaPrevFrame < (mediaCurrFrame - 7)) {  // They are not similar for color average
            result = true;
        }

        /***** BRANDS MANAGEMENT *****/
        if (args.brands) {
            // At the beginning of the video, wait at least 5 seconds before the
            // next Irregularity to consider it as a brand. It is not guaranteed
            // that it will be the first brand, but it is generally a safe
            // approach to have a correct image
            if (firstBrand) {
                if (firstInstant - msToEnd > 5000) {
                    firstBrand = false;
                    savingBrand = true;
                    result = true;
                }
                // In the following iterations reset savingBrand, since we are
                // no longer interested in brands.
            } else
                savingBrand = false;
        }
    }
627

Matteo's avatar
Matteo committed
628
629
    // Update mediaPrevFrame
    mediaPrevFrame = mediaCurrFrame;
630

Matteo's avatar
Matteo committed
631
632
    return result;
}
633

Nadir Dalla Pozza's avatar
Nadir Dalla Pozza committed
634
/**
Matteo's avatar
Matteo committed
635
636
 * @fn void processing(cv::VideoCapture videoCapture, SceneObject capstan,
 * SceneObject tape, Args args)
Nadir Dalla Pozza's avatar
Nadir Dalla Pozza committed
637
 * @brief video processing phase, where each frame is analysed.
Matteo's avatar
Matteo committed
638
639
640
 * It saves the IrregularityImages and updates the IrregularityFiles if an
 * Irregularity is found
 *
Matteo's avatar
update    
Matteo committed
641
 * @note To be able to work with the "old" neural network (by Ilenya),
Matteo's avatar
Matteo committed
642
643
644
645
646
647
 * the output images should correspond to the old "whole tape" where, from the
 * frame judged as interesting, an area corresponding to the height of the tape
 * was extracted (so about the height of the current rectangle) and as wide as
 * the original frame (so 720px). This area will then have to be resized to
 * 224x224 as in the past. If instead you decide to use the new neural network,
 * no changes are needed.
Nadir Dalla Pozza's avatar
Nadir Dalla Pozza committed
648
649
 *
 * @param videoCapture the input Preservation Audio-Visual File;
Matteo's avatar
update    
Matteo committed
650
651
652
 * @param capstan the capstan SceneObject;
 * @param tape the tape SceneObject;
 * @param args the command line arguments.
Nadir Dalla Pozza's avatar
Nadir Dalla Pozza committed
653
 */
654
void processing(cv::VideoCapture videoCapture, SceneObject capstan, SceneObject tape, Args args) {
Matteo's avatar
Matteo committed
655
656
657
    const int video_length_ms = ((float)videoCapture.get(CAP_PROP_FRAME_COUNT) / videoCapture.get(CAP_PROP_FPS)) * 1000;
    int video_current_ms = videoCapture.get(CAP_PROP_POS_MSEC);
    // counters
Matteo's avatar
update    
Matteo committed
658
    int savedFrames = 0;
Matteo's avatar
Matteo committed
659
    float lastSaved = -160;
Matteo's avatar
Matteo committed
660
661
662
663
664
665
666
667
668
669
670
    /* Whenever we find an Irregularity, we want to skip a lenght equal to the
     * Studer reading head (3 cm = 1.18 inches).
     * Note the following considerations:
     * - since we are analysing video at 25 fps a frame occurs every 40 ms
     * - at 15 ips we cross 3 cm of tape in 79 ms (2 frames)
     * - at 7.5 ips we cross 3 cm of tape in 157 ms (4 frames)
     * The considered lengths are the widths of the tape areas.
     * The following condition constitutes a valid approach if the tape areas
     * have widths always equal to the reading head
     */
    int savingRate = args.speed == 7.5 ? 157 : 79; // [ms]
Matteo's avatar
Matteo committed
671
672

    // The first frame of the video won't be processed
673
    cv::Mat prevFrame;
Matteo's avatar
Matteo committed
674
675
    videoCapture >> prevFrame;
    firstInstant = video_length_ms - video_current_ms;
676
677

    while (videoCapture.isOpened()) {
Matteo's avatar
Matteo committed
678
679
680
681

        Frame currentFrame = get_next_frame(videoCapture, args.speed);
        video_current_ms = videoCapture.get(CAP_PROP_POS_MSEC);

Matteo's avatar
Matteo committed
682
        Frame frame;
683
        videoCapture >> frame;
Matteo's avatar
Matteo committed
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
        video_current_ms = videoCapture.get(CAP_PROP_POS_MSEC);

        if (frame.empty()) {
            cout << endl << "Empty frame!" << endl;
            videoCapture.release();
            return;
        }

        int msToEnd = video_length_ms - video_current_ms;
        if (video_current_ms == 0)  // With OpenCV library, this happens at the last few frames of
                                    // the video before realising that "frame" is empty.
            return;

        // Display program status
        int secToEnd = msToEnd / 1000;
        int minToEnd = (secToEnd / 60) % 60;
        secToEnd = secToEnd % 60;
        string secStrToEnd = secToEnd < 10 ? "0" + to_string(secToEnd) : to_string(secToEnd);
        string minStrToEnd = minToEnd < 10 ? "0" + to_string(minToEnd) : to_string(minToEnd);

        cout << "\rIrregularities: " << savedFrames << ".   ";
        cout << "Remaining video time [mm:ss]: " << minStrToEnd << ":" << secStrToEnd << flush;

        if ((video_current_ms - lastSaved > savingRate) &&
            frameDifference(prevFrame, frame, msToEnd, capstan, tape, args)) {
            // An Irregularity has been found!
            auto [odd_frame, even_frame] = frame.deinterlace();

            string timeLabel = getTimeLabel(video_current_ms, ":");
            string safeTimeLabel = getTimeLabel(video_current_ms, "-");

            string irregularityImageFilename = to_string(savedFrames) + "_" + safeTimeLabel + ".jpg";
Matteo's avatar
Matteo committed
716
            cv::imwrite(irregularityImagesPath / irregularityImageFilename, odd_frame);
Matteo's avatar
Matteo committed
717
718
719
720
721
722
723
724
725
726

            // Append Irregularity information to JSON
            Irregularity irreg = Irregularity(Source::Video, timeLabel);
            irregularityFileOutput1["Irregularities"] += irreg.to_JSON();
            irregularityFileOutput2["Irregularities"] +=
                irreg.set_image_URI(irregularityImagesPath.string() + "/" + irregularityImageFilename).to_JSON();

            lastSaved = video_current_ms;
            savedFrames++;
        }
Matteo's avatar
Matteo committed
727

Matteo's avatar
Matteo committed
728
729
        prevFrame = frame;
    }
730
731
}

Nadir Dalla Pozza's avatar
Nadir Dalla Pozza committed
732
/**
Matteo's avatar
Matteo committed
733
 * @fn int main(int argc, char** argv)
Nadir Dalla Pozza's avatar
Nadir Dalla Pozza committed
734
735
736
737
738
739
740
741
 * @brief main program, organised as:
 * - Get input from command line or config.json file;
 * - Check input parameters;
 * - Creation of output directories;
 * - Regions Of Interest (ROIs) detection;
 * - Irregularities detection;
 * - Saving of output IrregularityFiles.
 *
Matteo's avatar
Matteo committed
742
743
744
745
746
 * @todo The main function should be splitted into 2 steps, and each step should be callable from the command line, so
 * that the user can choose to run only the first step, only the second step, or both:
 * - First step: generate irregularity file output 1;
 * - Second step: generate irregularity file output 2.
 *
Nadir Dalla Pozza's avatar
Nadir Dalla Pozza committed
747
748
749
750
 * @param argc Command line arguments count;
 * @param argv Command line arguments.
 * @return int program status.
 */
751
int main(int argc, char** argv) {
Matteo's avatar
Matteo committed
752
    const Args args = argc > 1 ? Args::from_cli(argc, argv) : Args::from_file(CONFIG_FILE);
Matteo's avatar
Matteo committed
753
754
    SceneObject capstan = SceneObject::from_file(CONFIG_FILE, Object::CAPSTAN);
    SceneObject tape = SceneObject::from_file(CONFIG_FILE, Object::TAPE);
755

Matteo's avatar
Matteo committed
756
757
    json irregularityFileInput;
    cv::Mat myFrame;
Matteo's avatar
update    
Matteo committed
758

Matteo's avatar
Matteo committed
759
    const fs::path VIDEO_PATH = args.workingPath / "PreservationAudioVisualFile" / args.filesName;
Matteo's avatar
update    
Matteo committed
760
761

    if (files::findFileName(VIDEO_PATH, fileName, extension) == -1) {
Matteo's avatar
Matteo committed
762
763
764
765
766
        cerr << RED << BOLD << "Input error!" << END << endl
             << RED << VIDEO_PATH.string() << " cannot be found or opened." << END << endl;
        std::exit(EXIT_FAILURE);
    }

Matteo's avatar
Matteo committed
767
    const fs::path irregularityFileInputPath = args.workingPath / "temp" / fileName / A_IRREG_FILE_1;
Matteo's avatar
Matteo committed
768
769
770
771
772
773
774
775

    // Input JSON check
    ifstream iJSON(irregularityFileInputPath);
    if (iJSON.fail()) {
        cerr << RED << BOLD << "config.json error!" << END << endl
             << RED << irregularityFileInputPath.string() << " cannot be found or opened." << END << endl;
        std::exit(EXIT_FAILURE);
    }
Matteo's avatar
Matteo committed
776
777
778

    // Read input JSON
    iJSON >> irregularityFileInput;
779

Matteo's avatar
Matteo committed
780
781
782
783
784
785
786
    // Adjust input paramenters (considering given ones as pertinent to a speed reference = 7.5)
    if (args.brands) {
        if (args.speed == 15) tape.threshold.percentual += 6;
    } else if (args.speed == 15)
        tape.threshold.percentual += 20;
    else
        tape.threshold.percentual += 21;
Nadir Dalla Pozza's avatar
Update.    
Nadir Dalla Pozza committed
787

788
    cout << endl;
Matteo's avatar
Matteo committed
789
790
791
    cout << "Parameters:" << endl;
    cout << "    Brands: " << args.brands << endl;
    cout << "    Speed: " << args.speed << endl;
Matteo's avatar
update    
Matteo committed
792
    cout << "    ThresholdPercentual: " << tape.threshold.percentual << endl;
Matteo's avatar
Matteo committed
793
794
    cout << "    ThresholdPercentualCapstan: " << capstan.threshold.percentual << endl;
    cout << endl;
795

Matteo's avatar
Matteo committed
796
797
    // Make directory with fileName name
    outputPath = args.workingPath / "temp" / fileName;
Matteo's avatar
Matteo committed
798
    fs::create_directory(outputPath);
Matteo's avatar
update    
Matteo committed
799

Matteo's avatar
Matteo committed
800
    irregularityImagesPath = outputPath / "IrregularityImages";
Matteo's avatar
Matteo committed
801
    fs::create_directory(irregularityImagesPath);
Nadir Dalla Pozza's avatar
Update.    
Nadir Dalla Pozza committed
802

Matteo's avatar
Matteo committed
803
    /************************************** AREAS DETECTION *********************************/
804

Matteo's avatar
Matteo committed
805
    cv::VideoCapture videoCapture(VIDEO_PATH);
806
    if (!videoCapture.isOpened()) {
Matteo's avatar
Matteo committed
807
808
        pprint("Video unreadable.", RED + BOLD);
        std::exit(EXIT_FAILURE);
809
810
    }

Matteo's avatar
Matteo committed
811
812
813
814
815
    int frames_number = videoCapture.get(CAP_PROP_FRAME_COUNT);
    // Set frame position to half video length
    videoCapture.set(CAP_PROP_POS_FRAMES, frames_number / 2);
    // Get frame
    videoCapture >> myFrame;
816

Matteo's avatar
Matteo committed
817
    cout << "Video resolution: " << myFrame.cols << "x" << myFrame.rows << endl;
818

Matteo's avatar
Matteo committed
819
    bool found = findProcessingAreas(myFrame, tape, capstan);
820

Matteo's avatar
Matteo committed
821
822
    // Reset frame position
    videoCapture.set(CAP_PROP_POS_FRAMES, 0);
823

Matteo's avatar
Matteo committed
824
825
826
827
    if (!found) {
        pprint("Processing area not found. Try changing JSON parameters.", RED);
        std::exit(EXIT_FAILURE);
    }
828

Matteo's avatar
Matteo committed
829
    /**************************************** PROCESSING **************************/
830

Matteo's avatar
Matteo committed
831
    pprint("Processing...", CYAN);
832

Matteo's avatar
Matteo committed
833
834
835
    // Processing timer
    time_t startTimer, endTimer;
    startTimer = time(NULL);
836

Matteo's avatar
Matteo committed
837
    processing(videoCapture, capstan, tape, args);
838

Matteo's avatar
Matteo committed
839
840
841
    endTimer = time(NULL);
    float min = (endTimer - startTimer) / 60;
    float sec = (endTimer - startTimer) % 60;
842

Matteo's avatar
Matteo committed
843
844
    string result("Processing elapsed time: " + to_string((int)min) + ":" + to_string((int)sec));
    cout << endl << result << endl;
845

Matteo's avatar
Matteo committed
846
    /************************************* IRREGULARITY FILES *****************************/
847

Matteo's avatar
Matteo committed
848
    files::saveFile(outputPath / V_IRREG_FILE_1, irregularityFileOutput1.dump(4), false);
849

Matteo's avatar
Matteo committed
850
851
    // Irregularities to extract for the AudioAnalyser and to the TapeIrregularityClassifier
    extractIrregularityImagesForAudio(outputPath, VIDEO_PATH, irregularityFileInput, irregularityFileOutput2);
852

Matteo's avatar
Matteo committed
853
    files::saveFile(outputPath / V_IRREG_FILE_2, irregularityFileOutput2.dump(4), false);
854

Matteo's avatar
Matteo committed
855
    return EXIT_SUCCESS;
856
}