script.cpp 36.7 KB
Newer Older
Nadir Dalla Pozza's avatar
Nadir Dalla Pozza committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
/**
 *  MPAI CAE-ARP Video Analyser.
 *
 *	Implements MPAI CAE-ARP Video Analyser Technical Specification.
 *	It identifies Irregularities on the Preservation Audio-Visual File, providing:
 *	- Irregularity Files;
 *	- Irregularity Images.
 *
 *	WARNING:
 *	Currently, this program is only compatible with the Studer A810 and videos recorded in PAL standard.
 *
 *  @author Nadir Dalla Pozza
 *	@copyright 2022, Audio Innova S.r.l.
 *	@credits Niccolò Pretto, Nadir Dalla Pozza, Sergio Canazza
 *	@license GPL v3.0
 *	@version 1.0.1
 *	@maintainer Nadir Dalla Pozza
 *	@email nadir.dallapozza@unipd.it
 *	@status Production
 */
21
#include <filesystem>
Nadir Dalla Pozza's avatar
Update.    
Nadir Dalla Pozza committed
22
#include <fstream>
23
#include <iostream>
24
#include <stdlib.h>
25
26
#include <sys/timeb.h>

27
#include <boost/program_options.hpp>
28
29
30
31
32
33
34
35
36
37
38
39
#include <boost/uuid/uuid.hpp>            // uuid class
#include <boost/uuid/uuid_generators.hpp> // generators
#include <boost/uuid/uuid_io.hpp>         // streaming operators etc.
#include <boost/lexical_cast.hpp>

#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgcodecs.hpp>
#include <opencv2/imgproc.hpp>

#include <nlohmann/json.hpp>

40
41
42
43
44
45
46
#include "opencv2/core.hpp"
#include "opencv2/calib3d.hpp"
#include "opencv2/highgui.hpp"
#include "opencv2/imgproc.hpp"
#include "opencv2/features2d.hpp"
#include "opencv2/xfeatures2d.hpp"

47
48
49
50
51
52
#include "utility.h"
#include "forAudioAnalyser.h"

using namespace cv;
using namespace std;
using json = nlohmann::json;
53
54
namespace fs = std::filesystem;
namespace po = boost::program_options;
55
56
57



Nadir Dalla Pozza's avatar
Nadir Dalla Pozza committed
58
// For capstan detection, there are two alternative approaches:
59
60
61
// Generalized Hough Transform and SURF.
bool useSURF = true;

Nadir Dalla Pozza's avatar
Nadir Dalla Pozza committed
62
bool savingPinchRoller = false, pinchRollerRect = false;
63
bool savingBrand = false;
64
bool endTapeSaved = false;
65
66
67
cv::Mat myFrame;
float mediaPrevFrame = 0;
bool firstBrand = true;	// The first frame containing brands on tape must be saved
Nadir Dalla Pozza's avatar
Update.    
Nadir Dalla Pozza committed
68
float firstInstant = 0;
69
string fileName, extension;
70
71

// config.json parameters
72
73
fs::path workingPath;
string filesName;
74
bool brands;
75
float speed, tapeThresholdPercentual, capstanThresholdPercentual;
Nadir Dalla Pozza's avatar
Nadir Dalla Pozza committed
76
int minDist, angleThresh, scaleThresh, posThresh, minDistCapstan, angleThreshCapstan, scaleThreshCapstan, posThreshCapstan;
77
78
79
80
81
// Path variables
fs::path outputPath;
fs::path irregularityImagesPath;
fs::path videoPath;
fs::path irregularityFileInputPath;
82
83
84
85
86
87
// JSON files
json configurationFile;
json irregularityFileInput;
json irregularityFileOutput1;
json irregularityFileOutput2;
// RotatedRect identifying the processing area
Nadir Dalla Pozza's avatar
Update.    
Nadir Dalla Pozza committed
88
RotatedRect rect, rectTape, rectCapstan;
89
90


91
92
93
94
95
96
97
98
99
100
101
102
string PURPLE = "\033[95m";
string CYAN = "\033[96m";
string DARK_CYAN = "\033[36m";
string BLUE = "\033[94m";
string GREEN = "\033[92m";
string YELLOW = "\033[93m";
string RED = "\033[91m";
string BOLD = "\033[1m";
string UNDERLINE = "\033[4m";
string END = "\033[0m";


Nadir Dalla Pozza's avatar
Nadir Dalla Pozza committed
103
104
105
/*************************************************************************************************/
/**************************************** SUPPORT METHODS ****************************************/
/*************************************************************************************************/
106

Nadir Dalla Pozza's avatar
Nadir Dalla Pozza committed
107
108
109
110
111
112
113
114
/**
 * @brief Get operation arguments from command line or config.json file.
 *
 * @param argc Command line arguments count;
 * @param argv Command line arguments.
 * @return true if input configuration is valid;
 * @return false otherwise.
 */
115
116
bool getArguments(int argc, char** argv) {
	// Read configuration file
Matteo's avatar
Matteo committed
117
	ifstream iConfig("config/config.json");
118
119
120
121
122
123
124
125
126
127
128
129
130
131
	iConfig >> configurationFile;

	if (argc == 1) {
		// Read from JSON file
		string wp = configurationFile["WorkingPath"];
		workingPath = fs::path(wp);
		filesName = configurationFile["FilesName"];
		brands = configurationFile["Brands"];
		speed = configurationFile["Speed"];
	} else {
		// Get from command line
		try {
			po::options_description desc(
				"A tool that implements MPAI CAE-ARP Video Analyser Technical Specification.\n"
Matteo's avatar
update    
Matteo committed
132
				"By default, the configuartion parameters are loaded from config/config.json file,\n"
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
				"but, alternately, you can pass command line arguments to replace them"
			);
			desc.add_options()
				("help,h", "Display this help message")
				("working-path,w", po::value<string>()->required(), "Specify the Working Path, where all input files are stored")
				("files-name,f", po::value<string>()->required(), "Specify the name of the Preservation files (without extension)")
				("brands,b", po::value<bool>()->required(), "Specify if the tape presents brands on its surface")
				("speed,s", po::value<float>()->required(), "Specify the speed at which the tape was read");
			po::variables_map vm;
			po::store(po::command_line_parser(argc, argv).options(desc).run(), vm);
			if (vm.count("help")) {
				cout << desc << "\n";
				return false;
			}
			po::notify(vm);
		} catch (po::invalid_command_line_syntax& e) {
			cerr << RED << BOLD << "The command line syntax is invalid: " << END << RED << e.what() << END << endl;
			return false;
		} catch (po::required_option& e) {
			cerr << "Error: " << e.what() << endl;
			return false;
		}
	}

	tapeThresholdPercentual = configurationFile["TapeThresholdPercentual"];
	capstanThresholdPercentual = configurationFile["CapstanThresholdPercentual"];
	minDist = configurationFile["MinDist"];
	angleThresh = configurationFile["AngleThresh"];
	scaleThresh = configurationFile["ScaleThresh"];
	posThresh = configurationFile["PosThresh"];
	minDistCapstan = configurationFile["MinDistCapstan"];
	angleThreshCapstan = configurationFile["AngleThreshCapstan"];
	scaleThreshCapstan = configurationFile["ScaleThreshCapstan"];
	posThreshCapstan = configurationFile["PosThreshCapstan"];

	return true;
}


Nadir Dalla Pozza's avatar
Nadir Dalla Pozza committed
172
173
174
175
176
177
178
179
180
181
182
/**
 * @brief Identifies the Regions Of Interest (ROIs) on the video,
 * which are:
 * - The reading head;
 * - The tape area under the tape head (computed on the basis of the detected reading head);
 * - The capstan.
 *
 * @param configurationFile the config.json containing working parameters.
 * @return true if some areas have been detected;
 * @return false otherwise.
 */
183
184
185
186
187
188
189
190
191
192
bool findProcessingAreas(json configurationFile) {

	/*********************************************************************************************/
	/*********************************** READING HEAD DETECTION **********************************/
	/*********************************************************************************************/

	// Obtain grayscale version of myFrame
	Mat myFrameGrayscale;
	cvtColor(myFrame, myFrameGrayscale, COLOR_BGR2GRAY);
	// Get input shape in grayscale
Matteo's avatar
update    
Matteo committed
193
	Mat templateImage = imread("input/readingHead.png", IMREAD_GRAYSCALE);
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
	// Downsample myFrameGrayscale in half pixels for performance reasons
	Mat myFrameGrayscaleHalf;
	pyrDown(myFrameGrayscale, myFrameGrayscaleHalf, Size(myFrame.cols/2, myFrame.rows/2));
	// Downsample tapeShape in half pixels
	Mat templateImageHalf;
	pyrDown(templateImage, templateImageHalf, Size(templateImage.cols/2, templateImage.rows/2));

	// Process only the bottom-central portion of the input video -> best results with our videos
	Rect readingHeadProcessingAreaRect(myFrameGrayscaleHalf.cols/4, myFrameGrayscaleHalf.rows/2, myFrameGrayscaleHalf.cols/2, myFrameGrayscaleHalf.rows/2);
	Mat processingImage = myFrameGrayscaleHalf(readingHeadProcessingAreaRect);
	// Select the template to be detected
	Mat templateShape = templateImageHalf;

	// Algorithm and parameters
	Ptr<GeneralizedHoughGuil> alg = createGeneralizedHoughGuil();

	vector<Vec4f> positionsPos, positionsNeg;
	Mat votesPos, votesNeg;
	TickMeter tm;
	int oldPosThresh = posThresh;
	RotatedRect rectPos, rectNeg;
	ofstream myFile;
	Point2f pts[4];

	// Find the best matches for positive and negative angles
	// If there are more than one shapes, then choose the one with the highest score
	// If there are more than one with the same highest score, then arbitrarily choose the latest
	double maxValPos = 0, maxValNeg = 0;
	int indexPos = 0, indexNeg = 0;

	alg -> setMinDist(minDist);
	alg -> setLevels(360);
	alg -> setDp(2);
	alg -> setMaxBufferSize(1000);

	alg -> setAngleStep(1);
	alg -> setAngleThresh(angleThresh);

	alg -> setMinScale(0.9);
	alg -> setMaxScale(1.1);
	alg -> setScaleStep(0.01);
	alg -> setScaleThresh(scaleThresh);

	alg -> setPosThresh(posThresh);

	alg -> setCannyLowThresh(150); // Old: 100
	alg -> setCannyHighThresh(240); // Old: 300

	alg -> setTemplate(templateShape);

	cout << DARK_CYAN << "Reading head" << END << endl;
	tm.start();
	// Invoke utility.h function
	detectShape(alg, templateShape, posThresh, positionsPos, votesPos, positionsNeg, votesNeg, processingImage);
	tm.stop();
	cout << "Reading head detection time: " << tm.getTimeMilli() << " ms" << endl;

	for (int i = 0; i < votesPos.size().width; i++) {
		if (votesPos.at<int>(i) >= maxValPos) {
			maxValPos = votesPos.at<int>(i);
			indexPos = i;
		}
	}

	for (int i = 0; i < votesNeg.size().width; i++) {
		if (votesNeg.at<int>(i) >= maxValNeg) {
			maxValNeg = votesNeg.at<int>(i);
			indexNeg = i;
		}
	}

	// The color is progressively darkened to emphasize that the algorithm found more than one shape
	if (positionsPos.size() > 0)
		rectPos = drawShapes(myFrame, positionsPos[indexPos], Scalar(0, 0, 255-indexPos*64), templateImageHalf.cols, templateImageHalf.rows, myFrameGrayscaleHalf.cols/4, myFrameGrayscaleHalf.rows/2, 2);
	if (positionsNeg.size() > 0)
		rectNeg = drawShapes(myFrame, positionsNeg[indexNeg], Scalar(128, 128, 255-indexNeg*64), templateImageHalf.cols, templateImageHalf.rows, myFrameGrayscaleHalf.cols/4, myFrameGrayscaleHalf.rows/2, 2);

	myFile.open("log.txt", ios::app);

	if (maxValPos > 0)
		if (maxValNeg > 0)
			if (maxValPos > maxValNeg) {
				myFile << "READING HEAD: Positive angle is best, match number: " << indexPos << endl;
				rect = rectPos;
			} else {
				myFile << "READING HEAD: Negative angle is best, match number: " << indexNeg << endl;
				rect = rectNeg;
			}
		else {
			myFile << "READING HEAD: Positive angle is the only choice, match number: " << indexPos << endl;
			rect = rectPos;
		}
	else if (maxValNeg > 0) {
		myFile << "READING HEAD: Negative angle is the only choice, match number: " << indexNeg << endl;
		rect = rectNeg;
	} else {
		myFile.close();
		return false;
	}
	cout << endl;

	rect.points(pts);

	/*********************************************************************************************/
	/************************************ TAPE AREA DETECTION ************************************/
	/*********************************************************************************************/

	// Compute area basing on reading head detection
	Vec4f positionTape( rect.center.x, rect.center.y + rect.size.height / 2 + 20 * (rect.size.width / 200), 1, rect.angle );
	rectTape = drawShapes(myFrame, positionTape, Scalar(0, 255-indexPos*64, 0), rect.size.width, 50 * (rect.size.width / 200), 0, 0, 1);

	myFile << "Tape area:" << endl;
	myFile << "  Center (x, y): (" << rectTape.center.x << ", " << rectTape.center.y << ")" << endl;
	myFile << "  Size (w, h): (" << rectTape.size.width << ", " << rectTape.size.height << ")" << endl;
	myFile << "  Angle (deg): (" << rectTape.angle << ")" << endl;

	json autoJSON;
	autoJSON["PreservationAudioVisualFile"] = fileName;
	autoJSON["RotatedRect"] = {
		{
			"CenterX", rectTape.center.x
		}, {
			"CenterY", rectTape.center.y
		}, {
			"Width", rectTape.size.width
		}, {
			"Height", rectTape.size.height
		}, {
			"Angle", rectTape.angle
		}
	};

	ofstream outputFile;
	string outputFileName = "/Users/nadir/Documents/MPAI-CAE/AreaJSONs/Auto/" + fileName + ".json";
	outputFile.open(outputFileName);
	outputFile << autoJSON << endl;
	outputFile.close();

	/*********************************************************************************************/
	/************************************* CAPSTAN DETECTION *************************************/
	/*********************************************************************************************/

	// Read template image - it is smaller than before, therefore there is no need to downsample
Matteo's avatar
update    
Matteo committed
337
	templateShape = imread("input/capstanBERIO058prova.png", IMREAD_GRAYSCALE); // WORKING
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
	// templateShape = imread("../input/capstanBERIO058.png", IMREAD_GRAYSCALE);

	cout << DARK_CYAN << "Capstan" << END << endl;

	if (useSURF) {

		// Step 1: Detect the keypoints using SURF Detector, compute the descriptors
		int minHessian = 100;
		Ptr<xfeatures2d::SURF> detector = xfeatures2d::SURF::create(minHessian);
		vector<KeyPoint> keypoints_object, keypoints_scene;
		Mat descriptors_object, descriptors_scene;

		tm.reset();
		tm.start();
		detector->detectAndCompute(templateShape, noArray(), keypoints_object, descriptors_object);
		detector->detectAndCompute(myFrameGrayscale, noArray(), keypoints_scene, descriptors_scene);
		tm.stop();
		cout << "Capstan detection time: " << tm.getTimeMilli() << " ms" << endl;

		// Step 2: Matching descriptor vectors with a FLANN based matcher
		// Since SURF is a floating-point descriptor NORM_L2 is used
		Ptr<DescriptorMatcher> matcher = DescriptorMatcher::create(DescriptorMatcher::FLANNBASED);
		vector<vector<DMatch>> knn_matches;
		matcher->knnMatch(descriptors_object, descriptors_scene, knn_matches, 2);
		//-- Filter matches using the Lowe's ratio test
		const float ratio_thresh = 0.75f;
		vector<DMatch> good_matches;
		for (size_t i = 0; i < knn_matches.size(); i++) {
			if (knn_matches[i][0].distance < ratio_thresh * knn_matches[i][1].distance) {
				good_matches.push_back(knn_matches[i][0]);
			}
		}
		// Draw matches
		Mat img_matches;
		drawMatches(templateShape, keypoints_object, myFrameGrayscale, keypoints_scene, good_matches, img_matches, Scalar::all(-1), Scalar::all(-1), vector<char>(), DrawMatchesFlags::NOT_DRAW_SINGLE_POINTS);
		// Localize the object
		vector<Point2f> obj;
		vector<Point2f> scene;
Nadir Dalla Pozza's avatar
Nadir Dalla Pozza committed
376
		for (size_t i = 0; i < good_matches.size(); i++) {
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
			// Get the keypoints from the good matches
			obj.push_back(keypoints_object[good_matches[i].queryIdx].pt);
			scene.push_back(keypoints_scene[good_matches[i].trainIdx].pt);
		}
		Mat H = findHomography(obj, scene, RANSAC);
		// Get the corners from the image_1 ( the object to be "detected" )
		vector<Point2f> obj_corners(4);
		obj_corners[0] = Point2f(0, 0);
		obj_corners[1] = Point2f((float)templateShape.cols, 0);
		obj_corners[2] = Point2f((float)templateShape.cols, (float)templateShape.rows);
		obj_corners[3] = Point2f(0, (float)templateShape.rows);
		vector<Point2f> scene_corners(4);
		perspectiveTransform( obj_corners, scene_corners, H);

		// Find average
		float capstanX = (scene_corners[0].x + scene_corners[1].x + scene_corners[2].x + scene_corners[3].x) / 4;
		float capstanY = (scene_corners[0].y + scene_corners[1].y + scene_corners[2].y + scene_corners[3].y) / 4;

		// In the following there are two alterations to cut the first 20 horizontal pixels and the first 90 vertical pixels from the found rectangle:
		// +10 in X for centering and -20 in width
		// +45 in Y for centering and -90 in height
Nadir Dalla Pozza's avatar
Nadir Dalla Pozza committed
398
		Vec4f positionCapstan(capstanX + 10, capstanY + 45, 1, 0);
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
		rectCapstan = drawShapes(myFrame, positionCapstan, Scalar(255-indexPos*64, 0, 0), templateShape.cols - 20, templateShape.rows - 90, 0, 0, 1);

	} else {

		// Process only right portion of the image, wherw the capstain always appears
		int capstanProcessingAreaRectX = myFrame.cols*3/4;
		int capstanProcessingAreaRectY = myFrame.rows/2;
		int capstanProcessingAreaRectWidth = myFrame.cols/4;
		int capstanProcessingAreaRectHeight = myFrame.rows/2;
		Rect capstanProcessingAreaRect(capstanProcessingAreaRectX, capstanProcessingAreaRectY, capstanProcessingAreaRectWidth, capstanProcessingAreaRectHeight);
		Mat capstanProcessingAreaGrayscale = myFrameGrayscale(capstanProcessingAreaRect);
		// Reset algorithm and set parameters
		alg = createGeneralizedHoughGuil();

		alg -> setMinDist(minDistCapstan);
		alg -> setLevels(360);
		alg -> setDp(2);
		alg -> setMaxBufferSize(1000);

		alg -> setAngleStep(1);
		alg -> setAngleThresh(angleThreshCapstan);

		alg -> setMinScale(0.9);
		alg -> setMaxScale(1.1);
		alg -> setScaleStep(0.01);
		alg -> setScaleThresh(scaleThreshCapstan);

		alg -> setPosThresh(posThreshCapstan);

		alg -> setCannyLowThresh(150);
		alg -> setCannyHighThresh(240);

		alg -> setTemplate(templateShape);

		oldPosThresh = posThreshCapstan;

		vector<Vec4f> positionsC1Pos, positionsC1Neg;
		Mat votesC1Pos, votesC1Neg;

		tm.reset();
		tm.start();
		detectShape(alg, templateShape, posThreshCapstan, positionsC1Pos, votesC1Pos, positionsC1Neg, votesC1Neg, capstanProcessingAreaGrayscale);
		tm.stop();
		cout << "Capstan detection time: " << tm.getTimeMilli() << " ms" << endl;

		// Find the best matches for positive and negative angles
		// If there are more than one shapes, then choose the one with the highest score
		// If there are more than one with the same highest score, then choose the latest
		maxValPos = 0, maxValNeg = 0, indexPos = 0, indexNeg = 0;

		for (int i = 0; i < votesC1Pos.size().width; i++) {
			if (votesC1Pos.at<int>(i) >= maxValPos) {
				maxValPos = votesC1Pos.at<int>(i);
				indexPos = i;
			}
		}

		for (int i = 0; i < votesC1Neg.size().width; i++) {
			if (votesC1Neg.at<int>(i) >= maxValNeg) {
				maxValNeg = votesC1Neg.at<int>(i);
				indexNeg = i;
			}
		}

		RotatedRect rectCapstanPos, rectCapstanNeg;
		if (positionsC1Pos.size() > 0)
			rectCapstanPos = drawShapes(myFrame, positionsC1Pos[indexPos], Scalar(255-indexPos*64, 0, 0), templateShape.cols-22, templateShape.rows-92, capstanProcessingAreaRectX+11, capstanProcessingAreaRectY+46, 1);
		if (positionsC1Neg.size() > 0)
			rectCapstanNeg = drawShapes(myFrame, positionsC1Neg[indexNeg], Scalar(255-indexNeg*64, 128, 0), templateShape.cols-22, templateShape.rows-92, capstanProcessingAreaRectX+11, capstanProcessingAreaRectY+46, 1);

		if (maxValPos > 0)
			if (maxValNeg > 0)
				if (maxValPos > maxValNeg) {
					myFile << "CAPSTAN: Positive is best, match number: " << indexPos << endl;
					rectCapstan = rectCapstanPos;
				} else {
					myFile << "CAPSTAN: Negative is best, match number: " << indexNeg << endl;
					rectCapstan = rectCapstanNeg;
				}
			else {
				myFile << "CAPSTAN: Positive is the only choice, match number: " << indexPos << endl;
				rectCapstan = rectCapstanPos;
			}
		else if (maxValNeg > 0) {
			myFile << "CAPSTAN: Negative is the only choice, match number: " << indexNeg << endl;
			rectCapstan = rectCapstanNeg;
		} else {
			myFile.close();
			return false;
		}

	}

	myFile << "Capstan ROI:" << endl;
	myFile << "  Center (x, y): (" << rectCapstan.center.x << ", " << rectCapstan.center.y << ")" << endl;
	myFile << "  Size (w, h): (" << rectCapstan.size.width << ", " << rectCapstan.size.height << ")" << endl;
	myFile << "  Angle (deg): (" << rectCapstan.angle << ")" << endl;
	myFile.close();
	cout << endl;

	// Save the image containing the detected areas
	cv::imwrite(outputPath.string() + "/tapeAreas.jpg", myFrame);

	return true;
}


Nadir Dalla Pozza's avatar
Nadir Dalla Pozza committed
506
507
508
509
510
511
512
513
514
515
/**
 * @brief Compares two consecutive video frames and establish if there potentially is an Irregularity.
 * The comparison is pixel-wise and based on threshold values set on config.json file.
 *
 * @param prevFrame the frame before the current one;
 * @param currentFrame the current frame;
 * @param msToEnd the number of milliseconds left before the end of the video. Useful for capstan analysis.
 * @return true if a potential Irregularity has been found;
 * @return false otherwise.
 */
516
517
bool frameDifference(cv::Mat prevFrame, cv::Mat currentFrame, int msToEnd) {

518
	/*********************************************************************************************/
519
	/********************************** Capstan analysis *****************************************/
520
	/*********************************************************************************************/
521
522

	// In the last minute of the video, check for pinchRoller position for endTape event
523
	if (!endTapeSaved && msToEnd < 60000) {
524
525
526
527

		// Capstan area
		int capstanAreaPixels = rectCapstan.size.width * rectCapstan.size.height;
		float capstanDifferentPixelsThreshold = capstanAreaPixels * capstanThresholdPercentual / 100;
528

Nadir Dalla Pozza's avatar
Nadir Dalla Pozza committed
529
		// Extract matrices corresponding to the processing area
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
		// CODE FROM https://answers.opencv.org/question/497/extract-a-rotatedrect-area/

		// matrices we'll use
		Mat M, rotatedPrevFrame, croppedPrevFrame, rotatedCurrentFrame, croppedCurrentFrame;
		// get angle and size from the bounding box
		float angle = rectCapstan.angle;
		Size rect_size = rectCapstan.size;
		// thanks to http://felix.abecassis.me/2011/10/opencv-rotation-deskewing/
		if (rectCapstan.angle < -45.) {
			angle += 90.0;
			swap(rect_size.width, rect_size.height);
		}
		// get the rotation matrix
		M = getRotationMatrix2D(rectCapstan.center, angle, 1.0);
		// perform the affine transformation
545
546
		cv::warpAffine(prevFrame, rotatedPrevFrame, M, prevFrame.size(), INTER_CUBIC);
		cv::warpAffine(currentFrame, rotatedCurrentFrame, M, currentFrame.size(), INTER_CUBIC);
547
		// crop the resulting image
548
549
		cv::getRectSubPix(rotatedPrevFrame, rect_size, rectCapstan.center, croppedPrevFrame);
		cv::getRectSubPix(rotatedCurrentFrame, rect_size, rectCapstan.center, croppedCurrentFrame);
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567

		// END CODE FROM https://answers.opencv.org/question/497/extract-a-rotatedrect-area/

		cv::Mat differenceFrame = difference(croppedPrevFrame, croppedCurrentFrame);

		int blackPixelsCapstan = 0;

		for (int i = 0; i < croppedCurrentFrame.rows; i++) {
			for (int j = 0; j < croppedCurrentFrame.cols; j++) {
				if (differenceFrame.at<cv::Vec3b>(i, j)[0] == 0) {
					// There is a black pixel, then there is a difference between previous and current frames
					blackPixelsCapstan++;
				}
			}
		}

		if (blackPixelsCapstan > capstanDifferentPixelsThreshold) {
			savingPinchRoller = true;
568
			endTapeSaved = true; // Never check again for end tape instant
569
570
571
572
			return true;
		} else {
			savingPinchRoller = false;
		}
573
574
	} else {
		savingPinchRoller = false; // It will already be false before the last minute of the video. After having saved the capstan, the next time reset the variable to not save again
575
	}
576
577

	/*********************************************************************************************/
578
	/************************************ Tape analysis ******************************************/
579
	/*********************************************************************************************/
580
581
582
583

	// Tape area
    int tapeAreaPixels = rectTape.size.width * rectTape.size.height;
	float tapeDifferentPixelsThreshold = tapeAreaPixels * tapeThresholdPercentual / 100;
584

Nadir Dalla Pozza's avatar
Nadir Dalla Pozza committed
585
	// Extract matrices corresponding to the processing area
586
587
588
589
590
	// CODE FROM https://answers.opencv.org/question/497/extract-a-rotatedrect-area/

	// matrices we'll use
	Mat M, rotatedPrevFrame, croppedPrevFrame, rotatedCurrentFrame, croppedCurrentFrame;
	// get angle and size from the bounding box
591
592
	float angle = rectTape.angle;
	Size rect_size = rectTape.size;
593
	// thanks to http://felix.abecassis.me/2011/10/opencv-rotation-deskewing/
594
	if (rectTape.angle < -45.) {
595
596
597
598
		angle += 90.0;
		swap(rect_size.width, rect_size.height);
	}
	// get the rotation matrix
599
	M = getRotationMatrix2D(rectTape.center, angle, 1.0);
600
	// perform the affine transformation
601
602
	cv::warpAffine(prevFrame, rotatedPrevFrame, M, prevFrame.size(), INTER_CUBIC);
	cv::warpAffine(currentFrame, rotatedCurrentFrame, M, currentFrame.size(), INTER_CUBIC);
603
	// crop the resulting image
604
605
	cv::getRectSubPix(rotatedPrevFrame, rect_size, rectTape.center, croppedPrevFrame);
	cv::getRectSubPix(rotatedCurrentFrame, rect_size, rectTape.center, croppedCurrentFrame);
606
607
608
609
610
611
612
613
614
615
616

	// END CODE FROM https://answers.opencv.org/question/497/extract-a-rotatedrect-area/

	cv::Mat differenceFrame = difference(croppedPrevFrame, croppedCurrentFrame);

	int decEnd = (msToEnd % 1000) / 100;
	int secEnd = (msToEnd - (msToEnd % 1000)) / 1000;
	int minEnd = secEnd / 60;
	secEnd = secEnd % 60;


Nadir Dalla Pozza's avatar
Nadir Dalla Pozza committed
617
	/************************************* Segment analysis **************************************/
618

619
620
621
622
623
624
625
626
627
628
629
630
  	int blackPixels = 0;
	float mediaCurrFrame;
	int totColoreCF = 0;

	for (int i = 0; i < croppedCurrentFrame.rows; i++) {
		for (int j = 0; j < croppedCurrentFrame.cols; j++) {
			totColoreCF += croppedCurrentFrame.at<cv::Vec3b>(i, j)[0] + croppedCurrentFrame.at<cv::Vec3b>(i, j)[1] + croppedCurrentFrame.at<cv::Vec3b>(i, j)[2];
			if (differenceFrame.at<cv::Vec3b>(i, j)[0] == 0) {
				blackPixels++;
			}
		}
	}
631
	mediaCurrFrame = totColoreCF/tapeAreaPixels;
632

Nadir Dalla Pozza's avatar
Update.    
Nadir Dalla Pozza committed
633
634
	/************************************* Decision stage ****************************************/

Nadir Dalla Pozza's avatar
Nadir Dalla Pozza committed
635
	bool isIrregularity = false;
Nadir Dalla Pozza's avatar
Update.    
Nadir Dalla Pozza committed
636

637
	if (blackPixels > tapeDifferentPixelsThreshold) { // The threshold must be passed
Nadir Dalla Pozza's avatar
Update.    
Nadir Dalla Pozza committed
638

639
640
		/***** AVERAGE_COLOR-BASED DECISION *****/
		if (mediaPrevFrame > (mediaCurrFrame + 7) || mediaPrevFrame < (mediaCurrFrame - 7)) { // They are not similar for color average
Nadir Dalla Pozza's avatar
Nadir Dalla Pozza committed
641
			isIrregularity = true;
642
		}
Nadir Dalla Pozza's avatar
Update.    
Nadir Dalla Pozza committed
643

644
645
646
		/***** BRANDS MANAGEMENT *****/
		if (brands) {
			// At the beginning of the video, wait at least 5 seconds before the next Irregularity to consider it as a brand.
647
			// It is not guaranteed that it will be the first brand, but it is generally a safe approach to have a correct image
648
649
650
651
			if (firstBrand) {
				if (firstInstant - msToEnd > 5000) {
					firstBrand = false;
					savingBrand = true;
Nadir Dalla Pozza's avatar
Nadir Dalla Pozza committed
652
					isIrregularity = true;
653
				}
Nadir Dalla Pozza's avatar
Update.    
Nadir Dalla Pozza committed
654
			// In the following iterations reset savingBrand, since we are no longer interested in brands.
655
			} else
Nadir Dalla Pozza's avatar
Update.    
Nadir Dalla Pozza committed
656
				savingBrand = false;
657
		}
Nadir Dalla Pozza's avatar
Update.    
Nadir Dalla Pozza committed
658

659
	}
660
661
662
663

	// Update mediaPrevFrame
	mediaPrevFrame = mediaCurrFrame;

Nadir Dalla Pozza's avatar
Nadir Dalla Pozza committed
664
	return isIrregularity;
665
666
667
}


Nadir Dalla Pozza's avatar
Nadir Dalla Pozza committed
668
669
670
671
672
673
674
/**
 * @brief video processing phase, where each frame is analysed.
 * It saves the IrregularityImages and updates the IrregularityFiles if an Irregularity is found
 *
 * @param videoCapture the input Preservation Audio-Visual File;
 */
void processing(cv::VideoCapture videoCapture) {
675
676
677
678
679
680

	// Video duration
	int frameNumbers_v = videoCapture.get(CAP_PROP_FRAME_COUNT);
	float fps_v = videoCapture.get(CAP_PROP_FPS); // FPS can be non-integers!!!
	float videoLength = (float) frameNumbers_v / fps_v; // [s]
	int videoLength_ms = videoLength * 1000;
681

682
683
    int savedFrames = 0, unsavedFrames = 0;
	float lastSaved = -160;
Nadir Dalla Pozza's avatar
Nadir Dalla Pozza committed
684
	// Whenever we find an Irregularity, we want to skip a lenght equal to the Studer reading head (3 cm = 1.18 inches).
685
686
	int savingRate = 79; // [ms]. Time taken to cross 3 cm at 15 ips, or 1.5 cm at 7.5 ips. The considered lengths are the widths of the tape areas.
	// The following condition constitutes a valid approach if the tape areas have widths always equal to the reading head
687
688
689
690
691
692
	if (speed == 7.5)
		savingRate = 157; // Time taken to cross 3 cm at 7.5 ips

	// The first frame of the video won't be processed
    cv::Mat prevFrame;
	videoCapture >> prevFrame;
Nadir Dalla Pozza's avatar
Update.    
Nadir Dalla Pozza committed
693
	firstInstant = videoLength_ms - videoCapture.get(CAP_PROP_POS_MSEC);
694
695
696
697
698
699
700
701
702
703
704
705

    while (videoCapture.isOpened()) {

		cv::Mat frame;
        videoCapture >> frame;

        if (!frame.empty()) {

			int ms = videoCapture.get(CAP_PROP_POS_MSEC);
			int msToEnd = videoLength_ms - ms;
			if (ms == 0) // With OpenCV library, this happens at the last few frames of the video before realising that "frame" is empty.
				break;
Nadir Dalla Pozza's avatar
Nadir Dalla Pozza committed
706
707

			// Variables to display program status
708
709
710
711
			int secToEnd = msToEnd / 1000;
			int minToEnd = (secToEnd / 60) % 60;
			secToEnd = secToEnd % 60;

712
			string secStrToEnd = to_string(secToEnd), minStrToEnd = to_string(minToEnd);
713
714
715
716
717
			if (minToEnd < 10)
				minStrToEnd = "0" + minStrToEnd;
			if (secToEnd < 10)
				secStrToEnd = "0" + secStrToEnd;

Nadir Dalla Pozza's avatar
Nadir Dalla Pozza committed
718
			// Display program status
719
720
			cout << "\rIrregularities: " << savedFrames << ".   ";
			cout << "Remaining video time [mm:ss]: " << minStrToEnd << ":" << secStrToEnd << flush;
721
722

			if ((ms - lastSaved > savingRate) && frameDifference(prevFrame, frame, msToEnd)) {
723

Nadir Dalla Pozza's avatar
Nadir Dalla Pozza committed
724
				// An Irregularity has been found!
725
726
727
728
729
730

				// De-interlacing frame
				cv::Mat oddFrame(frame.rows/2, frame.cols, CV_8UC3);
				cv::Mat evenFrame(frame.rows/2, frame.cols, CV_8UC3);
				separateFrame(frame, oddFrame, evenFrame);

Nadir Dalla Pozza's avatar
Nadir Dalla Pozza committed
731
				// Extract the image corresponding to the ROIs
732
				Point2f pts[4];
733
734
735
736
737
				if (savingPinchRoller)
					rectCapstan.points(pts);
				else
					rectTape.points(pts);
				cv::Mat subImage(frame, cv::Rect(100, min(pts[1].y, pts[2].y), frame.cols - 100, static_cast<int>(rectTape.size.height)));
738

Nadir Dalla Pozza's avatar
Nadir Dalla Pozza committed
739
				// De-interlacing
740
741
742
				cv::Mat oddSubImage(subImage.rows/2, subImage.cols, CV_8UC3);
				int evenSubImageRows = subImage.rows/2;
				if (subImage.rows % 2 != 0) // If the found rectangle is of odd height, we must increase evenSubImage height by 1, otherwise we have segmentation_fault!!!
743
					evenSubImageRows += 1;
744
745
				cv::Mat evenSubImage(evenSubImageRows, subImage.cols, CV_8UC3);
				separateFrame(subImage, oddSubImage, evenSubImage);
746

747
748
				string timeLabel = getTimeLabel(ms);
				string safeTimeLabel = getSafeTimeLabel(ms);
749

750
751
				string irregularityImageFilename = to_string(savedFrames) + "_" + safeTimeLabel + ".jpg";
				cv::imwrite(irregularityImagesPath / irregularityImageFilename, oddFrame);
752
753
754

				// Append Irregularity information to JSON
				boost::uuids::uuid uuid = boost::uuids::random_generator()();
755
756
757
				irregularityFileOutput1["Irregularities"] += {
					{
						"IrregularityID", boost::lexical_cast<string>(uuid)
758
759
760
761
762
763
					}, {
						"Source", "v"
					}, {
						"TimeLabel", timeLabel
					}
				};
764
765
766
				irregularityFileOutput2["Irregularities"] += {
					{
						"IrregularityID", boost::lexical_cast<string>(uuid)
767
768
769
770
771
					}, {
						"Source", "v"
					}, {
						"TimeLabel", timeLabel
					}, {
772
						"ImageURI", irregularityImagesPath.string() + "/" + irregularityImageFilename
773
774
775
776
777
778
779
780
781
782
783
784
785
					}
				};

				lastSaved = ms;
				savedFrames++;

			} else {
				unsavedFrames++;
			}

			prevFrame = frame;

	    } else {
786
			cout << endl << "Empty frame!" << endl;
787
788
789
790
791
792
793
	    	videoCapture.release();
	    	break;
	    }
	}

	ofstream myFile;
	myFile.open("log.txt", ios::app);
794
	myFile << "Saved frames are: " << savedFrames << endl;
795
796
797
798
799
	myFile.close();

}


Nadir Dalla Pozza's avatar
Nadir Dalla Pozza committed
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
/*************************************************************************************************/
/********************************************* MAIN **********************************************/
/*************************************************************************************************/

/**
 * @brief main program, organised as:
 * - Get input from command line or config.json file;
 * - Check input parameters;
 * - Creation of output directories;
 * - Regions Of Interest (ROIs) detection;
 * - Irregularities detection;
 * - Saving of output IrregularityFiles.
 *
 * @param argc Command line arguments count;
 * @param argv Command line arguments.
 * @return int program status.
 */
817
818
int main(int argc, char** argv) {

819
820
821
	/*********************************************************************************************/
	/*************************************** CONFIGURATION ***************************************/
	/*********************************************************************************************/
822

823
	// Get the input from config.json or command line
824
	try {
825
826
827
		bool continueExecution = getArguments(argc, argv);
		if (!continueExecution) {
			return 0;
Nadir Dalla Pozza's avatar
Update.    
Nadir Dalla Pozza committed
828
		}
829
	} catch (nlohmann::detail::type_error e) {
830
		cerr << RED << "config.json error!" << endl << e.what() << END << endl;
831
832
		return -1;
	}
833
834
835
836
837
838
839
840
841

	videoPath = workingPath / "PreservationAudioVisualFile" / filesName;
    if (findFileName(videoPath, fileName, extension) == -1) {
        cerr << RED << BOLD << "config.json error!" << END << endl << RED << videoPath.string() << " cannot be found or opened." << END << endl;
        return -1;
    }

	irregularityFileInputPath = workingPath / "temp" / fileName / "AudioAnalyser_IrregularityFileOutput1.json";

842
	// Input JSON check
843
	ifstream iJSON(irregularityFileInputPath);
844
	if (iJSON.fail()) {
845
		cerr << RED << BOLD << "config.json error!" << END << endl << RED << irregularityFileInputPath.string() << " cannot be found or opened." << END << endl;
846
847
848
		return -1;
	}
	if (speed != 7.5 && speed != 15) {
849
		cerr << RED << BOLD << "config.json error!" << END << endl << RED << "Speed parameter must be 7.5 or 15 ips." << END << endl;
850
851
		return -1;
	}
852
	if (tapeThresholdPercentual < 0 || tapeThresholdPercentual > 100) {
853
		cerr << RED << BOLD << "config.json error!" << END << endl << RED << "TapeThresholdPercentual parameter must be a percentage value." << END << endl;
854
855
		return -1;
	}
856
	if (capstanThresholdPercentual < 0 || capstanThresholdPercentual > 100) {
857
		cerr << RED << BOLD << "config.json error!" << END << endl << RED << "CapstanThresholdPercentual parameter must be a percentage value." << END << endl;
858
859
860
		return -1;
	}

861
	// Adjust input paramenters (considering given ones as pertinent to a speed reference = 7.5)
Nadir Dalla Pozza's avatar
Update.    
Nadir Dalla Pozza committed
862
863
864
865
866
867
868
869
870
	if (brands) {
		if (speed == 15)
			tapeThresholdPercentual += 6;
	} else
		if (speed == 15)
			tapeThresholdPercentual += 20;
		else
			tapeThresholdPercentual += 21;

871
872
873
874
875
876
877
    cout << endl;
	cout << "Parameters:" << endl;
	cout << "    Brands: " << brands << endl;
	cout << "    Speed: " << speed << endl;
    cout << "    ThresholdPercentual: " << tapeThresholdPercentual << endl;
	cout << "    ThresholdPercentualCapstan: " << capstanThresholdPercentual << endl;
	cout << endl;
878
879
880
881

	// Read input JSON
	iJSON >> irregularityFileInput;

882
883
884
	/*********************************************************************************************/
	/*********************************** MAKE OUTPUT DIRECTORY ***********************************/
	/*********************************************************************************************/
Nadir Dalla Pozza's avatar
Update.    
Nadir Dalla Pozza committed
885
886

	// Make directory with fileName name
887
888
	outputPath = workingPath / "temp" / fileName;
	int outputFileNameDirectory = create_directory(outputPath);
Nadir Dalla Pozza's avatar
Update.    
Nadir Dalla Pozza committed
889
	// Get now time
890
891
892
	time_t t = chrono::system_clock::to_time_t(chrono::system_clock::now());
    string ts = ctime(&t);
	// Write useful info to log file
Nadir Dalla Pozza's avatar
Update.    
Nadir Dalla Pozza committed
893
	ofstream myFile;
894
	myFile.open(outputPath / "log.txt", ios::app);
Nadir Dalla Pozza's avatar
Update.    
Nadir Dalla Pozza committed
895
	myFile << endl << fileName << endl;
896
	myFile << "tsh: " << tapeThresholdPercentual << "   tshp: " << capstanThresholdPercentual << endl;
Nadir Dalla Pozza's avatar
Update.    
Nadir Dalla Pozza committed
897
898
899
	myFile << ts; // No endline character for avoiding middle blank line.
	myFile.close();

900
901
902
	/*********************************************************************************************/
	/************************************** AREAS DETECTION **************************************/
	/*********************************************************************************************/
903
904
905

	cv::VideoCapture videoCapture(videoPath);
    if (!videoCapture.isOpened()) {
906
        cerr << RED << BOLD << "Video unreadable." << END << endl;
907
908
909
910
911
912
913
        return -1;
    }

	// Get total number of frames
	int totalFrames = videoCapture.get(CAP_PROP_FRAME_COUNT);
	// Set frame position to half video length
	videoCapture.set(CAP_PROP_POS_FRAMES, totalFrames/2);
914
	// Get frame
915
	videoCapture >> myFrame;
916
917
918

	cout << "Video resolution: " << myFrame.cols << "x" << myFrame.rows << endl << endl;

919
	// Find the processing area corresponding to the tape area over the reading head
Nadir Dalla Pozza's avatar
Nadir Dalla Pozza committed
920
	bool found = findProcessingAreas(configurationFile);
921
922
923
924

	// Reset frame position
	videoCapture.set(CAP_PROP_POS_FRAMES, 0);

925
	// Write useful information to log file
926
927
	myFile.open("log.txt", ios::app);
	if (found) {
928
		cout << "Processing areas found!" << endl;
929
		myFile << "Processing areas found!" << endl;
930
931
		myFile.close();
	} else {
932
		cout << "Processing area not found. Try changing JSON parameters." << endl;
933
934
		myFile << "Processing area not found." << endl;
		myFile.close();
935
		return -1; // Program terminated early
936
937
	}

938
939
940
	/*********************************************************************************************/
	/***************************** MAKE ADDITIONAL OUTPUT DIRECTORIES ****************************/
	/*********************************************************************************************/
941

942
943
944
945
	irregularityImagesPath = outputPath / "IrregularityImages";
	int fullFrameDirectory = fs::create_directory(irregularityImagesPath);

	/*********************************************************************************************/
946
	/**************************************** PROCESSING *****************************************/
947
	/*********************************************************************************************/
948

949
	cout << endl << CYAN << "Starting processing..." << END << endl;
950
951
952
953
954

	// Processing timer
	time_t startTimer, endTimer;
	startTimer = time(NULL);

955
	processing(videoCapture);
956
957
958
959
960

	endTimer = time(NULL);
	float min = (endTimer - startTimer) / 60;
	float sec = (endTimer - startTimer) % 60;

961
962
	string result("Processing elapsed time: " + to_string((int)min) + ":" + to_string((int)sec));
	cout << endl << result << endl;
963
964

	myFile.open("log.txt", ios::app);
965
	myFile << result << endl << endl;
966
967
	myFile.close();

968
969
970
	/*********************************************************************************************/
	/************************************* IRREGULARITY FILES ************************************/
	/*********************************************************************************************/
971

972
973
	ofstream outputFile1;
	fs::path outputFile1Name = outputPath / "VideoAnalyser_IrregularityFileOutput1.json";
974
	outputFile1.open(outputFile1Name);
975
	outputFile1 << irregularityFileOutput1 << endl;
976
977
978
979

	// Irregularities to extract for the AudioAnalyser and to the TapeIrregularityClassifier
	extractIrregularityImagesForAudio(outputPath, videoPath, irregularityFileInput, irregularityFileOutput2);

980
981
	ofstream outputFile2;
	fs::path outputFile2Name = outputPath / "VideoAnalyser_IrregularityFileOutput2.json";
982
	outputFile2.open(outputFile2Name);
983
984
	outputFile2 << irregularityFileOutput2 << endl;

985
986
987
    return 0;

}