script.cpp 34.7 KB
Newer Older
1
2
3
4
5
6
7
/*
    Questo script esegue l'analisi di un video fornito per rilevare le discontinuità
    che vengono trovate.

    Tutte le informazioni necessarie all'agoritmo si possono individuare nei file XML
    all'interno della cartella config.

Nadir Dalla Pozza's avatar
Nadir Dalla Pozza committed
8
9
    @author Nadir Dalla Pozza
    @version 3.0
10
    @date 18-02-2023
11
12
*/
#include <filesystem>
Nadir Dalla Pozza's avatar
Update.    
Nadir Dalla Pozza committed
13
#include <fstream>
14
#include <iostream>
15
#include <stdlib.h>
16
17
#include <sys/timeb.h>

18
#include <boost/program_options.hpp>
19
20
21
22
23
24
25
26
27
28
29
30
#include <boost/uuid/uuid.hpp>            // uuid class
#include <boost/uuid/uuid_generators.hpp> // generators
#include <boost/uuid/uuid_io.hpp>         // streaming operators etc.
#include <boost/lexical_cast.hpp>

#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgcodecs.hpp>
#include <opencv2/imgproc.hpp>

#include <nlohmann/json.hpp>

31
32
33
34
35
36
37
#include "opencv2/core.hpp"
#include "opencv2/calib3d.hpp"
#include "opencv2/highgui.hpp"
#include "opencv2/imgproc.hpp"
#include "opencv2/features2d.hpp"
#include "opencv2/xfeatures2d.hpp"

38
39
40
41
42
43
#include "utility.h"
#include "forAudioAnalyser.h"

using namespace cv;
using namespace std;
using json = nlohmann::json;
44
45
namespace fs = std::filesystem;
namespace po = boost::program_options;
46
47
48
49
50
51
52
53


/*
------------------------------------------------------------------------------
VARIABLES
------------------------------------------------------------------------------
*/

54
55
56
57
// There are two alternative approaches:
// Generalized Hough Transform and SURF.
bool useSURF = true;

Nadir Dalla Pozza's avatar
Nadir Dalla Pozza committed
58
bool savingPinchRoller = false, pinchRollerRect = false;
59
bool savingBrand = false;
60
bool endTapeSaved = false;
61
62
63
cv::Mat myFrame;
float mediaPrevFrame = 0;
bool firstBrand = true;	// The first frame containing brands on tape must be saved
Nadir Dalla Pozza's avatar
Update.    
Nadir Dalla Pozza committed
64
float firstInstant = 0;
65
string fileName, extension;
66
67

// config.json parameters
68
69
fs::path workingPath;
string filesName;
70
bool brands;
71
72
73
74
75
76
77
float speed, tapeThresholdPercentual, capstanThresholdPercentual;
int minDist, angleThresh, scaleThresh, posThresh, minDistTape, angleThreshTape, scaleThreshTape, posThreshTape, minDistCapstan, angleThreshCapstan, scaleThreshCapstan, posThreshCapstan;
// Path variables
fs::path outputPath;
fs::path irregularityImagesPath;
fs::path videoPath;
fs::path irregularityFileInputPath;
78
79
80
81
82
83
// JSON files
json configurationFile;
json irregularityFileInput;
json irregularityFileOutput1;
json irregularityFileOutput2;
// RotatedRect identifying the processing area
Nadir Dalla Pozza's avatar
Update.    
Nadir Dalla Pozza committed
84
RotatedRect rect, rectTape, rectCapstan;
85
86


87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
string PURPLE = "\033[95m";
string CYAN = "\033[96m";
string DARK_CYAN = "\033[36m";
string BLUE = "\033[94m";
string GREEN = "\033[92m";
string YELLOW = "\033[93m";
string RED = "\033[91m";
string BOLD = "\033[1m";
string UNDERLINE = "\033[4m";
string END = "\033[0m";



bool getArguments(int argc, char** argv) {
	// Read configuration file
	ifstream iConfig("../config/config.json");
	iConfig >> configurationFile;

	if (argc == 1) {
		// Read from JSON file
		string wp = configurationFile["WorkingPath"];
		workingPath = fs::path(wp);
		filesName = configurationFile["FilesName"];
		brands = configurationFile["Brands"];
		speed = configurationFile["Speed"];
	} else {
		// Get from command line
		try {
			po::options_description desc(
				"A tool that implements MPAI CAE-ARP Video Analyser Technical Specification.\n"
				"By default, the configuartion parameters are loaded from ./config.json file,\n"
				"but, alternately, you can pass command line arguments to replace them"
			);
			desc.add_options()
				("help,h", "Display this help message")
				("working-path,w", po::value<string>()->required(), "Specify the Working Path, where all input files are stored")
				("files-name,f", po::value<string>()->required(), "Specify the name of the Preservation files (without extension)")
				("brands,b", po::value<bool>()->required(), "Specify if the tape presents brands on its surface")
				("speed,s", po::value<float>()->required(), "Specify the speed at which the tape was read");
			po::variables_map vm;
			po::store(po::command_line_parser(argc, argv).options(desc).run(), vm);
			if (vm.count("help")) {
				cout << desc << "\n";
				return false;
			}
			po::notify(vm);
		} catch (po::invalid_command_line_syntax& e) {
			cerr << RED << BOLD << "The command line syntax is invalid: " << END << RED << e.what() << END << endl;
			return false;
		} catch (po::required_option& e) {
			cerr << "Error: " << e.what() << endl;
			return false;
		}
	}

	tapeThresholdPercentual = configurationFile["TapeThresholdPercentual"];
	capstanThresholdPercentual = configurationFile["CapstanThresholdPercentual"];
	minDist = configurationFile["MinDist"];
	angleThresh = configurationFile["AngleThresh"];
	scaleThresh = configurationFile["ScaleThresh"];
	posThresh = configurationFile["PosThresh"];
	minDistTape = configurationFile["MinDistTape"];
	angleThreshTape = configurationFile["AngleThreshTape"];
	scaleThreshTape = configurationFile["ScaleThreshTape"];
	posThreshTape = configurationFile["PosThreshTape"];
	minDistCapstan = configurationFile["MinDistCapstan"];
	angleThreshCapstan = configurationFile["AngleThreshCapstan"];
	scaleThreshCapstan = configurationFile["ScaleThreshCapstan"];
	posThreshCapstan = configurationFile["PosThreshCapstan"];

	return true;
}



bool findProcessingAreas(json configurationFile) {

	/*********************************************************************************************/
	/*********************************** READING HEAD DETECTION **********************************/
	/*********************************************************************************************/

	// Obtain grayscale version of myFrame
	Mat myFrameGrayscale;
	cvtColor(myFrame, myFrameGrayscale, COLOR_BGR2GRAY);
	// Get input shape in grayscale
	Mat templateImage = imread("../input/readingHead.png", IMREAD_GRAYSCALE);
	// Downsample myFrameGrayscale in half pixels for performance reasons
	Mat myFrameGrayscaleHalf;
	pyrDown(myFrameGrayscale, myFrameGrayscaleHalf, Size(myFrame.cols/2, myFrame.rows/2));
	// Downsample tapeShape in half pixels
	Mat templateImageHalf;
	pyrDown(templateImage, templateImageHalf, Size(templateImage.cols/2, templateImage.rows/2));

	// Process only the bottom-central portion of the input video -> best results with our videos
	Rect readingHeadProcessingAreaRect(myFrameGrayscaleHalf.cols/4, myFrameGrayscaleHalf.rows/2, myFrameGrayscaleHalf.cols/2, myFrameGrayscaleHalf.rows/2);
	Mat processingImage = myFrameGrayscaleHalf(readingHeadProcessingAreaRect);
	// Select the template to be detected
	Mat templateShape = templateImageHalf;

	// Algorithm and parameters
	Ptr<GeneralizedHoughGuil> alg = createGeneralizedHoughGuil();

	vector<Vec4f> positionsPos, positionsNeg;
	Mat votesPos, votesNeg;
	TickMeter tm;
	int oldPosThresh = posThresh;
	RotatedRect rectPos, rectNeg;
	ofstream myFile;
	Point2f pts[4];

	// Find the best matches for positive and negative angles
	// If there are more than one shapes, then choose the one with the highest score
	// If there are more than one with the same highest score, then arbitrarily choose the latest
	double maxValPos = 0, maxValNeg = 0;
	int indexPos = 0, indexNeg = 0;

	alg -> setMinDist(minDist);
	alg -> setLevels(360);
	alg -> setDp(2);
	alg -> setMaxBufferSize(1000);

	alg -> setAngleStep(1);
	alg -> setAngleThresh(angleThresh);

	alg -> setMinScale(0.9);
	alg -> setMaxScale(1.1);
	alg -> setScaleStep(0.01);
	alg -> setScaleThresh(scaleThresh);

	alg -> setPosThresh(posThresh);

	alg -> setCannyLowThresh(150); // Old: 100
	alg -> setCannyHighThresh(240); // Old: 300

	alg -> setTemplate(templateShape);

	cout << DARK_CYAN << "Reading head" << END << endl;
	tm.start();
	// Invoke utility.h function
	detectShape(alg, templateShape, posThresh, positionsPos, votesPos, positionsNeg, votesNeg, processingImage);
	tm.stop();
	cout << "Reading head detection time: " << tm.getTimeMilli() << " ms" << endl;

	for (int i = 0; i < votesPos.size().width; i++) {
		if (votesPos.at<int>(i) >= maxValPos) {
			maxValPos = votesPos.at<int>(i);
			indexPos = i;
		}
	}

	for (int i = 0; i < votesNeg.size().width; i++) {
		if (votesNeg.at<int>(i) >= maxValNeg) {
			maxValNeg = votesNeg.at<int>(i);
			indexNeg = i;
		}
	}

	// The color is progressively darkened to emphasize that the algorithm found more than one shape
	if (positionsPos.size() > 0)
		rectPos = drawShapes(myFrame, positionsPos[indexPos], Scalar(0, 0, 255-indexPos*64), templateImageHalf.cols, templateImageHalf.rows, myFrameGrayscaleHalf.cols/4, myFrameGrayscaleHalf.rows/2, 2);
	if (positionsNeg.size() > 0)
		rectNeg = drawShapes(myFrame, positionsNeg[indexNeg], Scalar(128, 128, 255-indexNeg*64), templateImageHalf.cols, templateImageHalf.rows, myFrameGrayscaleHalf.cols/4, myFrameGrayscaleHalf.rows/2, 2);

	myFile.open("log.txt", ios::app);

	if (maxValPos > 0)
		if (maxValNeg > 0)
			if (maxValPos > maxValNeg) {
				myFile << "READING HEAD: Positive angle is best, match number: " << indexPos << endl;
				rect = rectPos;
			} else {
				myFile << "READING HEAD: Negative angle is best, match number: " << indexNeg << endl;
				rect = rectNeg;
			}
		else {
			myFile << "READING HEAD: Positive angle is the only choice, match number: " << indexPos << endl;
			rect = rectPos;
		}
	else if (maxValNeg > 0) {
		myFile << "READING HEAD: Negative angle is the only choice, match number: " << indexNeg << endl;
		rect = rectNeg;
	} else {
		myFile.close();
		return false;
	}
	cout << endl;

	rect.points(pts);

	/*********************************************************************************************/
	/************************************ TAPE AREA DETECTION ************************************/
	/*********************************************************************************************/

	// Compute area basing on reading head detection
	Vec4f positionTape( rect.center.x, rect.center.y + rect.size.height / 2 + 20 * (rect.size.width / 200), 1, rect.angle );
	rectTape = drawShapes(myFrame, positionTape, Scalar(0, 255-indexPos*64, 0), rect.size.width, 50 * (rect.size.width / 200), 0, 0, 1);

	myFile << "Tape area:" << endl;
	myFile << "  Center (x, y): (" << rectTape.center.x << ", " << rectTape.center.y << ")" << endl;
	myFile << "  Size (w, h): (" << rectTape.size.width << ", " << rectTape.size.height << ")" << endl;
	myFile << "  Angle (deg): (" << rectTape.angle << ")" << endl;

	json autoJSON;
	autoJSON["PreservationAudioVisualFile"] = fileName;
	autoJSON["RotatedRect"] = {
		{
			"CenterX", rectTape.center.x
		}, {
			"CenterY", rectTape.center.y
		}, {
			"Width", rectTape.size.width
		}, {
			"Height", rectTape.size.height
		}, {
			"Angle", rectTape.angle
		}
	};

	ofstream outputFile;
	string outputFileName = "/Users/nadir/Documents/MPAI-CAE/AreaJSONs/Auto/" + fileName + ".json";
	outputFile.open(outputFileName);
	outputFile << autoJSON << endl;
	outputFile.close();

	/*********************************************************************************************/
	/************************************* CAPSTAN DETECTION *************************************/
	/*********************************************************************************************/

	// Read template image - it is smaller than before, therefore there is no need to downsample
	templateShape = imread("../input/capstanBERIO058prova.png", IMREAD_GRAYSCALE); // WORKING
	// templateShape = imread("../input/capstanBERIO058.png", IMREAD_GRAYSCALE);

	cout << DARK_CYAN << "Capstan" << END << endl;

	if (useSURF) {

		// Step 1: Detect the keypoints using SURF Detector, compute the descriptors
		int minHessian = 100;
		Ptr<xfeatures2d::SURF> detector = xfeatures2d::SURF::create(minHessian);
		vector<KeyPoint> keypoints_object, keypoints_scene;
		Mat descriptors_object, descriptors_scene;

		tm.reset();
		tm.start();
		detector->detectAndCompute(templateShape, noArray(), keypoints_object, descriptors_object);
		detector->detectAndCompute(myFrameGrayscale, noArray(), keypoints_scene, descriptors_scene);
		tm.stop();
		cout << "Capstan detection time: " << tm.getTimeMilli() << " ms" << endl;

		// Step 2: Matching descriptor vectors with a FLANN based matcher
		// Since SURF is a floating-point descriptor NORM_L2 is used
		Ptr<DescriptorMatcher> matcher = DescriptorMatcher::create(DescriptorMatcher::FLANNBASED);
		vector<vector<DMatch>> knn_matches;
		matcher->knnMatch(descriptors_object, descriptors_scene, knn_matches, 2);
		//-- Filter matches using the Lowe's ratio test
		const float ratio_thresh = 0.75f;
		vector<DMatch> good_matches;
		for (size_t i = 0; i < knn_matches.size(); i++) {
			if (knn_matches[i][0].distance < ratio_thresh * knn_matches[i][1].distance) {
				good_matches.push_back(knn_matches[i][0]);
			}
		}
		// Draw matches
		Mat img_matches;
		drawMatches(templateShape, keypoints_object, myFrameGrayscale, keypoints_scene, good_matches, img_matches, Scalar::all(-1), Scalar::all(-1), vector<char>(), DrawMatchesFlags::NOT_DRAW_SINGLE_POINTS);
		// Localize the object
		vector<Point2f> obj;
		vector<Point2f> scene;
		for( size_t i = 0; i < good_matches.size(); i++ )
		{
			// Get the keypoints from the good matches
			obj.push_back(keypoints_object[good_matches[i].queryIdx].pt);
			scene.push_back(keypoints_scene[good_matches[i].trainIdx].pt);
		}
		Mat H = findHomography(obj, scene, RANSAC);
		// Get the corners from the image_1 ( the object to be "detected" )
		vector<Point2f> obj_corners(4);
		obj_corners[0] = Point2f(0, 0);
		obj_corners[1] = Point2f((float)templateShape.cols, 0);
		obj_corners[2] = Point2f((float)templateShape.cols, (float)templateShape.rows);
		obj_corners[3] = Point2f(0, (float)templateShape.rows);
		vector<Point2f> scene_corners(4);
		perspectiveTransform( obj_corners, scene_corners, H);

		// Find average
		float capstanX = (scene_corners[0].x + scene_corners[1].x + scene_corners[2].x + scene_corners[3].x) / 4;
		float capstanY = (scene_corners[0].y + scene_corners[1].y + scene_corners[2].y + scene_corners[3].y) / 4;

		// In the following there are two alterations to cut the first 20 horizontal pixels and the first 90 vertical pixels from the found rectangle:
		// +10 in X for centering and -20 in width
		// +45 in Y for centering and -90 in height
		Vec4f positionCapstan( capstanX + 10, capstanY + 45, 1, 0 );
		rectCapstan = drawShapes(myFrame, positionCapstan, Scalar(255-indexPos*64, 0, 0), templateShape.cols - 20, templateShape.rows - 90, 0, 0, 1);

	} else {

		// Process only right portion of the image, wherw the capstain always appears
		int capstanProcessingAreaRectX = myFrame.cols*3/4;
		int capstanProcessingAreaRectY = myFrame.rows/2;
		int capstanProcessingAreaRectWidth = myFrame.cols/4;
		int capstanProcessingAreaRectHeight = myFrame.rows/2;
		Rect capstanProcessingAreaRect(capstanProcessingAreaRectX, capstanProcessingAreaRectY, capstanProcessingAreaRectWidth, capstanProcessingAreaRectHeight);
		Mat capstanProcessingAreaGrayscale = myFrameGrayscale(capstanProcessingAreaRect);
		// Reset algorithm and set parameters
		alg = createGeneralizedHoughGuil();

		alg -> setMinDist(minDistCapstan);
		alg -> setLevels(360);
		alg -> setDp(2);
		alg -> setMaxBufferSize(1000);

		alg -> setAngleStep(1);
		alg -> setAngleThresh(angleThreshCapstan);

		alg -> setMinScale(0.9);
		alg -> setMaxScale(1.1);
		alg -> setScaleStep(0.01);
		alg -> setScaleThresh(scaleThreshCapstan);

		alg -> setPosThresh(posThreshCapstan);

		alg -> setCannyLowThresh(150);
		alg -> setCannyHighThresh(240);

		alg -> setTemplate(templateShape);

		oldPosThresh = posThreshCapstan;

		vector<Vec4f> positionsC1Pos, positionsC1Neg;
		Mat votesC1Pos, votesC1Neg;

		tm.reset();
		tm.start();
		detectShape(alg, templateShape, posThreshCapstan, positionsC1Pos, votesC1Pos, positionsC1Neg, votesC1Neg, capstanProcessingAreaGrayscale);
		tm.stop();
		cout << "Capstan detection time: " << tm.getTimeMilli() << " ms" << endl;

		// Find the best matches for positive and negative angles
		// If there are more than one shapes, then choose the one with the highest score
		// If there are more than one with the same highest score, then choose the latest
		maxValPos = 0, maxValNeg = 0, indexPos = 0, indexNeg = 0;

		for (int i = 0; i < votesC1Pos.size().width; i++) {
			if (votesC1Pos.at<int>(i) >= maxValPos) {
				maxValPos = votesC1Pos.at<int>(i);
				indexPos = i;
			}
		}

		for (int i = 0; i < votesC1Neg.size().width; i++) {
			if (votesC1Neg.at<int>(i) >= maxValNeg) {
				maxValNeg = votesC1Neg.at<int>(i);
				indexNeg = i;
			}
		}

		RotatedRect rectCapstanPos, rectCapstanNeg;
		if (positionsC1Pos.size() > 0)
			rectCapstanPos = drawShapes(myFrame, positionsC1Pos[indexPos], Scalar(255-indexPos*64, 0, 0), templateShape.cols-22, templateShape.rows-92, capstanProcessingAreaRectX+11, capstanProcessingAreaRectY+46, 1);
		if (positionsC1Neg.size() > 0)
			rectCapstanNeg = drawShapes(myFrame, positionsC1Neg[indexNeg], Scalar(255-indexNeg*64, 128, 0), templateShape.cols-22, templateShape.rows-92, capstanProcessingAreaRectX+11, capstanProcessingAreaRectY+46, 1);

		if (maxValPos > 0)
			if (maxValNeg > 0)
				if (maxValPos > maxValNeg) {
					myFile << "CAPSTAN: Positive is best, match number: " << indexPos << endl;
					rectCapstan = rectCapstanPos;
				} else {
					myFile << "CAPSTAN: Negative is best, match number: " << indexNeg << endl;
					rectCapstan = rectCapstanNeg;
				}
			else {
				myFile << "CAPSTAN: Positive is the only choice, match number: " << indexPos << endl;
				rectCapstan = rectCapstanPos;
			}
		else if (maxValNeg > 0) {
			myFile << "CAPSTAN: Negative is the only choice, match number: " << indexNeg << endl;
			rectCapstan = rectCapstanNeg;
		} else {
			myFile.close();
			return false;
		}

	}

	myFile << "Capstan ROI:" << endl;
	myFile << "  Center (x, y): (" << rectCapstan.center.x << ", " << rectCapstan.center.y << ")" << endl;
	myFile << "  Size (w, h): (" << rectCapstan.size.width << ", " << rectCapstan.size.height << ")" << endl;
	myFile << "  Angle (deg): (" << rectCapstan.angle << ")" << endl;
	myFile.close();
	cout << endl;

	// Save the image containing the detected areas
	cv::imwrite(outputPath.string() + "/tapeAreas.jpg", myFrame);

	return true;
}


486
487
488

bool frameDifference(cv::Mat prevFrame, cv::Mat currentFrame, int msToEnd) {

489
	/*********************************************************************************************/
490
	/********************************** Capstan analysis *****************************************/
491
	/*********************************************************************************************/
492
493

	// In the last minute of the video, check for pinchRoller position for endTape event
494
	if (!endTapeSaved && msToEnd < 60000) {
495
496
497
498

		// Capstan area
		int capstanAreaPixels = rectCapstan.size.width * rectCapstan.size.height;
		float capstanDifferentPixelsThreshold = capstanAreaPixels * capstanThresholdPercentual / 100;
499

500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
		// CODE FROM https://answers.opencv.org/question/497/extract-a-rotatedrect-area/

		// matrices we'll use
		Mat M, rotatedPrevFrame, croppedPrevFrame, rotatedCurrentFrame, croppedCurrentFrame;
		// get angle and size from the bounding box
		float angle = rectCapstan.angle;
		Size rect_size = rectCapstan.size;
		// thanks to http://felix.abecassis.me/2011/10/opencv-rotation-deskewing/
		if (rectCapstan.angle < -45.) {
			angle += 90.0;
			swap(rect_size.width, rect_size.height);
		}
		// get the rotation matrix
		M = getRotationMatrix2D(rectCapstan.center, angle, 1.0);
		// perform the affine transformation
515
516
		cv::warpAffine(prevFrame, rotatedPrevFrame, M, prevFrame.size(), INTER_CUBIC);
		cv::warpAffine(currentFrame, rotatedCurrentFrame, M, currentFrame.size(), INTER_CUBIC);
517
		// crop the resulting image
518
519
		cv::getRectSubPix(rotatedPrevFrame, rect_size, rectCapstan.center, croppedPrevFrame);
		cv::getRectSubPix(rotatedCurrentFrame, rect_size, rectCapstan.center, croppedCurrentFrame);
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541

		// imshow("Current frame", currentFrame);
		// imshow("Cropped Current Frame", croppedCurrentFrame);
		// waitKey();

		// END CODE FROM https://answers.opencv.org/question/497/extract-a-rotatedrect-area/

		cv::Mat differenceFrame = difference(croppedPrevFrame, croppedCurrentFrame);

		int blackPixelsCapstan = 0;

		for (int i = 0; i < croppedCurrentFrame.rows; i++) {
			for (int j = 0; j < croppedCurrentFrame.cols; j++) {
				if (differenceFrame.at<cv::Vec3b>(i, j)[0] == 0) {
					// There is a black pixel, then there is a difference between previous and current frames
					blackPixelsCapstan++;
				}
			}
		}

		if (blackPixelsCapstan > capstanDifferentPixelsThreshold) {
			savingPinchRoller = true;
542
			endTapeSaved = true; // Never check again for end tape instant
543
544
545
546
			return true;
		} else {
			savingPinchRoller = false;
		}
547
548
	} else {
		savingPinchRoller = false; // It will already be false before the last minute of the video. After having saved the capstan, the next time reset the variable to not save again
549
	}
550
551

	/*********************************************************************************************/
552
	/************************************ Tape analysis ******************************************/
553
	/*********************************************************************************************/
554
555
556
557

	// Tape area
    int tapeAreaPixels = rectTape.size.width * rectTape.size.height;
	float tapeDifferentPixelsThreshold = tapeAreaPixels * tapeThresholdPercentual / 100;
558
559

	/***************** Extract matrices corresponding to the processing area *********************/
560
561

	// Tape area
562
563
564
565
566
	// CODE FROM https://answers.opencv.org/question/497/extract-a-rotatedrect-area/

	// matrices we'll use
	Mat M, rotatedPrevFrame, croppedPrevFrame, rotatedCurrentFrame, croppedCurrentFrame;
	// get angle and size from the bounding box
567
568
	float angle = rectTape.angle;
	Size rect_size = rectTape.size;
569
	// thanks to http://felix.abecassis.me/2011/10/opencv-rotation-deskewing/
570
	if (rectTape.angle < -45.) {
571
572
573
574
		angle += 90.0;
		swap(rect_size.width, rect_size.height);
	}
	// get the rotation matrix
575
	M = getRotationMatrix2D(rectTape.center, angle, 1.0);
576
	// perform the affine transformation
577
578
	cv::warpAffine(prevFrame, rotatedPrevFrame, M, prevFrame.size(), INTER_CUBIC);
	cv::warpAffine(currentFrame, rotatedCurrentFrame, M, currentFrame.size(), INTER_CUBIC);
579
	// crop the resulting image
580
581
	cv::getRectSubPix(rotatedPrevFrame, rect_size, rectTape.center, croppedPrevFrame);
	cv::getRectSubPix(rotatedCurrentFrame, rect_size, rectTape.center, croppedCurrentFrame);
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597

	// imshow("Current frame", currentFrame);
	// imshow("Cropped Current Frame", croppedCurrentFrame);
	// waitKey();

	// END CODE FROM https://answers.opencv.org/question/497/extract-a-rotatedrect-area/

	cv::Mat differenceFrame = difference(croppedPrevFrame, croppedCurrentFrame);

	int decEnd = (msToEnd % 1000) / 100;
	int secEnd = (msToEnd - (msToEnd % 1000)) / 1000;
	int minEnd = secEnd / 60;
	secEnd = secEnd % 60;


	/****************************** Segment analysis ****************************************/
598

599
600
601
602
603
604
605
606
607
608
609
610
  	int blackPixels = 0;
	float mediaCurrFrame;
	int totColoreCF = 0;

	for (int i = 0; i < croppedCurrentFrame.rows; i++) {
		for (int j = 0; j < croppedCurrentFrame.cols; j++) {
			totColoreCF += croppedCurrentFrame.at<cv::Vec3b>(i, j)[0] + croppedCurrentFrame.at<cv::Vec3b>(i, j)[1] + croppedCurrentFrame.at<cv::Vec3b>(i, j)[2];
			if (differenceFrame.at<cv::Vec3b>(i, j)[0] == 0) {
				blackPixels++;
			}
		}
	}
611
	mediaCurrFrame = totColoreCF/tapeAreaPixels;
612

Nadir Dalla Pozza's avatar
Update.    
Nadir Dalla Pozza committed
613
614
	/************************************* Decision stage ****************************************/

615
	bool irregularity = false;
Nadir Dalla Pozza's avatar
Update.    
Nadir Dalla Pozza committed
616

617
	if (blackPixels > tapeDifferentPixelsThreshold) { // The threshold must be passed
Nadir Dalla Pozza's avatar
Update.    
Nadir Dalla Pozza committed
618

619
620
621
622
		/***** AVERAGE_COLOR-BASED DECISION *****/
		if (mediaPrevFrame > (mediaCurrFrame + 7) || mediaPrevFrame < (mediaCurrFrame - 7)) { // They are not similar for color average
			irregularity = true;
		}
Nadir Dalla Pozza's avatar
Update.    
Nadir Dalla Pozza committed
623

624
625
626
		/***** BRANDS MANAGEMENT *****/
		if (brands) {
			// At the beginning of the video, wait at least 5 seconds before the next Irregularity to consider it as a brand.
627
			// It is not guaranteed that it will be the first brand, but it is generally a safe approach to have a correct image
628
629
630
631
632
633
			if (firstBrand) {
				if (firstInstant - msToEnd > 5000) {
					firstBrand = false;
					savingBrand = true;
					irregularity = true;
				}
Nadir Dalla Pozza's avatar
Update.    
Nadir Dalla Pozza committed
634
			// In the following iterations reset savingBrand, since we are no longer interested in brands.
635
			} else
Nadir Dalla Pozza's avatar
Update.    
Nadir Dalla Pozza committed
636
637
				savingBrand = false;

638
		}
Nadir Dalla Pozza's avatar
Update.    
Nadir Dalla Pozza committed
639

640
	}
641
642
643
644
645

	// Update mediaPrevFrame
	mediaPrevFrame = mediaCurrFrame;

	return irregularity;
646
647
648
}


649
650

int processing(cv::VideoCapture videoCapture) {
651
652
653
654
655
656

	// Video duration
	int frameNumbers_v = videoCapture.get(CAP_PROP_FRAME_COUNT);
	float fps_v = videoCapture.get(CAP_PROP_FPS); // FPS can be non-integers!!!
	float videoLength = (float) frameNumbers_v / fps_v; // [s]
	int videoLength_ms = videoLength * 1000;
657

658
659
    int savedFrames = 0, unsavedFrames = 0;
	float lastSaved = -160;
660
661
662
	// Whenever we find an Irregularity, we want to skip a lenght equal to the reading head (3 cm = 1.18 inches).
	int savingRate = 79; // [ms]. Time taken to cross 3 cm at 15 ips, or 1.5 cm at 7.5 ips. The considered lengths are the widths of the tape areas.
	// The following condition constitutes a valid approach if the tape areas have widths always equal to the reading head
663
664
665
666
667
668
	if (speed == 7.5)
		savingRate = 157; // Time taken to cross 3 cm at 7.5 ips

	// The first frame of the video won't be processed
    cv::Mat prevFrame;
	videoCapture >> prevFrame;
Nadir Dalla Pozza's avatar
Update.    
Nadir Dalla Pozza committed
669
	firstInstant = videoLength_ms - videoCapture.get(CAP_PROP_POS_MSEC);
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685

    while (videoCapture.isOpened()) {

		cv::Mat frame;
        videoCapture >> frame;

        if (!frame.empty()) {

			int ms = videoCapture.get(CAP_PROP_POS_MSEC);
			int msToEnd = videoLength_ms - ms;
			if (ms == 0) // With OpenCV library, this happens at the last few frames of the video before realising that "frame" is empty.
				break;
			int secToEnd = msToEnd / 1000;
			int minToEnd = (secToEnd / 60) % 60;
			secToEnd = secToEnd % 60;

686
			string secStrToEnd = to_string(secToEnd), minStrToEnd = to_string(minToEnd);
687
688
689
690
691
			if (minToEnd < 10)
				minStrToEnd = "0" + minStrToEnd;
			if (secToEnd < 10)
				secStrToEnd = "0" + secStrToEnd;

692
693
			cout << "\rIrregularities: " << savedFrames << ".   ";
			cout << "Remaining video time [mm:ss]: " << minStrToEnd << ":" << secStrToEnd << flush;
694
695

			if ((ms - lastSaved > savingRate) && frameDifference(prevFrame, frame, msToEnd)) {
696

697
698
699
700
701
702
703
704
705
				// An Irregularity is found!

				// De-interlacing frame
				cv::Mat oddFrame(frame.rows/2, frame.cols, CV_8UC3);
				cv::Mat evenFrame(frame.rows/2, frame.cols, CV_8UC3);
				separateFrame(frame, oddFrame, evenFrame);

				// Finding an image containing the whole tape
				Point2f pts[4];
706
707
708
709
710
				if (savingPinchRoller)
					rectCapstan.points(pts);
				else
					rectTape.points(pts);
				cv::Mat subImage(frame, cv::Rect(100, min(pts[1].y, pts[2].y), frame.cols - 100, static_cast<int>(rectTape.size.height)));
711
712

				// De-interlacing the image with the whole tape
713
714
715
				cv::Mat oddSubImage(subImage.rows/2, subImage.cols, CV_8UC3);
				int evenSubImageRows = subImage.rows/2;
				if (subImage.rows % 2 != 0) // If the found rectangle is of odd height, we must increase evenSubImage height by 1, otherwise we have segmentation_fault!!!
716
					evenSubImageRows += 1;
717
718
				cv::Mat evenSubImage(evenSubImageRows, subImage.cols, CV_8UC3);
				separateFrame(subImage, oddSubImage, evenSubImage);
719

720
721
				string timeLabel = getTimeLabel(ms);
				string safeTimeLabel = getSafeTimeLabel(ms);
722

723
724
				string irregularityImageFilename = to_string(savedFrames) + "_" + safeTimeLabel + ".jpg";
				cv::imwrite(irregularityImagesPath / irregularityImageFilename, oddFrame);
725
726
727

				// Append Irregularity information to JSON
				boost::uuids::uuid uuid = boost::uuids::random_generator()();
728
729
730
				irregularityFileOutput1["Irregularities"] += {
					{
						"IrregularityID", boost::lexical_cast<string>(uuid)
731
732
733
734
735
736
					}, {
						"Source", "v"
					}, {
						"TimeLabel", timeLabel
					}
				};
737
738
739
				irregularityFileOutput2["Irregularities"] += {
					{
						"IrregularityID", boost::lexical_cast<string>(uuid)
740
741
742
743
744
					}, {
						"Source", "v"
					}, {
						"TimeLabel", timeLabel
					}, {
745
						"ImageURI", irregularityImagesPath.string() + "/" + irregularityImageFilename
746
747
748
749
750
751
752
753
754
755
756
757
758
					}
				};

				lastSaved = ms;
				savedFrames++;

			} else {
				unsavedFrames++;
			}

			prevFrame = frame;

	    } else {
759
			cout << endl << "Empty frame!" << endl;
760
761
762
763
764
765
766
	    	videoCapture.release();
	    	break;
	    }
	}

	ofstream myFile;
	myFile.open("log.txt", ios::app);
767
	myFile << "Saved frames are: " << savedFrames << endl;
768
769
770
771
772
773
774
775
776
777
	myFile.close();

    return 0;

}



int main(int argc, char** argv) {

778
779
780
	/*********************************************************************************************/
	/*************************************** CONFIGURATION ***************************************/
	/*********************************************************************************************/
781

782
	// Get the input from config.json or command line
783
	try {
784
785
786
		bool continueExecution = getArguments(argc, argv);
		if (!continueExecution) {
			return 0;
Nadir Dalla Pozza's avatar
Update.    
Nadir Dalla Pozza committed
787
		}
788
	} catch (nlohmann::detail::type_error e) {
789
		cerr << RED << "config.json error!" << endl << e.what() << END << endl;
790
791
		return -1;
	}
792
793
794
795
796
797
798
799
800

	videoPath = workingPath / "PreservationAudioVisualFile" / filesName;
    if (findFileName(videoPath, fileName, extension) == -1) {
        cerr << RED << BOLD << "config.json error!" << END << endl << RED << videoPath.string() << " cannot be found or opened." << END << endl;
        return -1;
    }

	irregularityFileInputPath = workingPath / "temp" / fileName / "AudioAnalyser_IrregularityFileOutput1.json";

801
	// Input JSON check
802
	ifstream iJSON(irregularityFileInputPath);
803
	if (iJSON.fail()) {
804
		cerr << RED << BOLD << "config.json error!" << END << endl << RED << irregularityFileInputPath.string() << " cannot be found or opened." << END << endl;
805
806
807
		return -1;
	}
	if (speed != 7.5 && speed != 15) {
808
		cerr << RED << BOLD << "config.json error!" << END << endl << RED << "Speed parameter must be 7.5 or 15 ips." << END << endl;
809
810
		return -1;
	}
811
	if (tapeThresholdPercentual < 0 || tapeThresholdPercentual > 100) {
812
		cerr << RED << BOLD << "config.json error!" << END << endl << RED << "TapeThresholdPercentual parameter must be a percentage value." << END << endl;
813
814
		return -1;
	}
815
	if (capstanThresholdPercentual < 0 || capstanThresholdPercentual > 100) {
816
		cerr << RED << BOLD << "config.json error!" << END << endl << RED << "CapstanThresholdPercentual parameter must be a percentage value." << END << endl;
817
818
819
		return -1;
	}

820
	// Adjust input paramenters (considering given ones as pertinent to a speed reference = 7.5)
Nadir Dalla Pozza's avatar
Update.    
Nadir Dalla Pozza committed
821
822
823
824
825
826
827
828
829
	if (brands) {
		if (speed == 15)
			tapeThresholdPercentual += 6;
	} else
		if (speed == 15)
			tapeThresholdPercentual += 20;
		else
			tapeThresholdPercentual += 21;

830
831
832
833
834
835
836
    cout << endl;
	cout << "Parameters:" << endl;
	cout << "    Brands: " << brands << endl;
	cout << "    Speed: " << speed << endl;
    cout << "    ThresholdPercentual: " << tapeThresholdPercentual << endl;
	cout << "    ThresholdPercentualCapstan: " << capstanThresholdPercentual << endl;
	cout << endl;
837
838
839
840

	// Read input JSON
	iJSON >> irregularityFileInput;

841
842
843
	/*********************************************************************************************/
	/*********************************** MAKE OUTPUT DIRECTORY ***********************************/
	/*********************************************************************************************/
Nadir Dalla Pozza's avatar
Update.    
Nadir Dalla Pozza committed
844
845

	// Make directory with fileName name
846
847
	outputPath = workingPath / "temp" / fileName;
	int outputFileNameDirectory = create_directory(outputPath);
Nadir Dalla Pozza's avatar
Update.    
Nadir Dalla Pozza committed
848
	// Get now time
849
850
851
	time_t t = chrono::system_clock::to_time_t(chrono::system_clock::now());
    string ts = ctime(&t);
	// Write useful info to log file
Nadir Dalla Pozza's avatar
Update.    
Nadir Dalla Pozza committed
852
	ofstream myFile;
853
	myFile.open(outputPath / "log.txt", ios::app);
Nadir Dalla Pozza's avatar
Update.    
Nadir Dalla Pozza committed
854
	myFile << endl << fileName << endl;
855
	myFile << "tsh: " << tapeThresholdPercentual << "   tshp: " << capstanThresholdPercentual << endl;
Nadir Dalla Pozza's avatar
Update.    
Nadir Dalla Pozza committed
856
857
858
	myFile << ts; // No endline character for avoiding middle blank line.
	myFile.close();

859
860
861
	/*********************************************************************************************/
	/************************************** AREAS DETECTION **************************************/
	/*********************************************************************************************/
862
863
864

	cv::VideoCapture videoCapture(videoPath);
    if (!videoCapture.isOpened()) {
865
        cerr << RED << BOLD << "Video unreadable." << END << endl;
866
867
868
869
870
871
872
        return -1;
    }

	// Get total number of frames
	int totalFrames = videoCapture.get(CAP_PROP_FRAME_COUNT);
	// Set frame position to half video length
	videoCapture.set(CAP_PROP_POS_FRAMES, totalFrames/2);
873
	// Get frame
874
	videoCapture >> myFrame;
875
876
877

	cout << "Video resolution: " << myFrame.cols << "x" << myFrame.rows << endl << endl;

878
	// Find the processing area corresponding to the tape area over the reading head
Nadir Dalla Pozza's avatar
Nadir Dalla Pozza committed
879
	bool found = findProcessingAreas(configurationFile);
880
881
882
883

	// Reset frame position
	videoCapture.set(CAP_PROP_POS_FRAMES, 0);

884
	// Write useful information to log file
885
886
	myFile.open("log.txt", ios::app);
	if (found) {
887
		cout << "Processing areas found!" << endl;
888
		myFile << "Processing areas found!" << endl;
889
890
		myFile.close();
	} else {
891
		cout << "Processing area not found. Try changing JSON parameters." << endl;
892
893
		myFile << "Processing area not found." << endl;
		myFile.close();
894
		return -1; // Program terminated early
895
896
	}

897
898
899
	/*********************************************************************************************/
	/***************************** MAKE ADDITIONAL OUTPUT DIRECTORIES ****************************/
	/*********************************************************************************************/
900

901
902
903
904
	irregularityImagesPath = outputPath / "IrregularityImages";
	int fullFrameDirectory = fs::create_directory(irregularityImagesPath);

	/*********************************************************************************************/
905
	/**************************************** PROCESSING *****************************************/
906
	/*********************************************************************************************/
907

908
	cout << endl << CYAN << "Starting processing..." << END << endl;
909
910
911
912
913

	// Processing timer
	time_t startTimer, endTimer;
	startTimer = time(NULL);

914
	processing(videoCapture);
915
916
917
918
919

	endTimer = time(NULL);
	float min = (endTimer - startTimer) / 60;
	float sec = (endTimer - startTimer) % 60;

920
921
	string result("Processing elapsed time: " + to_string((int)min) + ":" + to_string((int)sec));
	cout << endl << result << endl;
922
923

	myFile.open("log.txt", ios::app);
924
	myFile << result << endl << endl;
925
926
	myFile.close();

927
928
929
	/*********************************************************************************************/
	/************************************* IRREGULARITY FILES ************************************/
	/*********************************************************************************************/
930

931
932
	ofstream outputFile1;
	fs::path outputFile1Name = outputPath / "VideoAnalyser_IrregularityFileOutput1.json";
933
	outputFile1.open(outputFile1Name);
934
	outputFile1 << irregularityFileOutput1 << endl;
935
936
937
938

	// Irregularities to extract for the AudioAnalyser and to the TapeIrregularityClassifier
	extractIrregularityImagesForAudio(outputPath, videoPath, irregularityFileInput, irregularityFileOutput2);

939
940
	ofstream outputFile2;
	fs::path outputFile2Name = outputPath / "VideoAnalyser_IrregularityFileOutput2.json";
941
	outputFile2.open(outputFile2Name);
942
943
	outputFile2 << irregularityFileOutput2 << endl;

944
945
946
    return 0;

}