main.cpp

/**
 * @mainpage MPAI CAE-ARP Video Analyser
 * @file main.cpp
 *  MPAI CAE-ARP Video Analyser.
 *
 *	Implements MPAI CAE-ARP Video Analyser Technical Specification.
 *	It identifies Irregularities on the Preservation Audio-Visual File,
 *providing:
 *	- Irregularity Files;
 *	- Irregularity Images.
 *
 * @warning Currently, this program is only compatible with the Studer A810
 *and videos recorded in PAL standard.
 *
 * @todo
 *  - A resize function of the entire video should be implemented if it does not
 *conform to the PAL standard (currently taken for granted).
 *  - Progressive videos, which do not require deinterlacing, should be managed
 *(in the code there are several steps that operate considering this property).
 *
 *  @author Nadir Dalla Pozza <nadir.dallapozza@unipd.it>
 *  @author Matteo Spanio <dev2@audioinnova.com>
 *	@copyright 2023, Audio Innova S.r.l.
 *	@credits Niccolò Pretto, Nadir Dalla Pozza, Sergio Canazza
 *	@license GPL v3.0
 *	@version 1.1.2
 *	@status Production
 */
#include <stdlib.h>
#include <sys/timeb.h>

#include <boost/lexical_cast.hpp>
#include <boost/program_options.hpp>
#include <boost/uuid/uuid.hpp>             // uuid class
#include <boost/uuid/uuid_generators.hpp>  // generators
#include <boost/uuid/uuid_io.hpp>          // streaming operators etc.
#include <filesystem>
#include <fstream>
#include <iostream>
#include <nlohmann/json.hpp>
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgcodecs.hpp>
#include <opencv2/imgproc.hpp>
#include <ranges>

#include "forAudioAnalyser.h"
#include "lib/Irregularity.h"
#include "lib/IrregularityFile.h"
#include "lib/colors.h"
#include "lib/files.h"
#include "lib/time.h"
#include "opencv2/calib3d.hpp"
#include "opencv2/core.hpp"
#include "opencv2/features2d.hpp"
#include "opencv2/highgui.hpp"
#include "opencv2/imgproc.hpp"
#include "opencv2/xfeatures2d.hpp"
#include "utility.h"

#define A_IRREG_FILE_1 "AudioAnalyser_IrregularityFileOutput1.json"
#define V_IRREG_FILE_1 "VideoAnalyser_IrregularityFileOutput1.json"
#define V_IRREG_FILE_2 "VideoAnalyser_IrregularityFileOutput2.json"

using namespace cv;
using namespace std;
using utility::Frame;
using json = nlohmann::json;
namespace fs = std::filesystem;
namespace po = boost::program_options;

/**
 * @const bool useSURF
 * @brief If true, SURF is used for capstan detection, otherwise GHT is used.
 *
 * For capstan detection, there are two alternative approaches:
 * 1. Generalized Hough Transform
 * 2. SURF.
 */
bool useSURF = true;

bool savingPinchRoller = false;
bool pinchRollerRect = false;
bool savingBrand = false;
bool endTapeSaved = false;
float mediaPrevFrame = 0;

/**
 * @var bool firstBrand
 * @brief The first frame containing brands on tape must be saved
 */
bool firstBrand = true;
float firstInstant = 0;
string fileName, extension;

// Path variables
static fs::path outputPath{};
static fs::path irregularityImagesPath{};
// JSON files
static json configurationFile{};
static json irregularityFileOutput1{};
static json irregularityFileOutput2{};
// RotatedRect identifying the processing area
RotatedRect rect, rectTape, rectCapstan;

/**
 * @fn void pprint(string text, string color)
 * @brief Prints a text in a given color.
 *
 * @param text
 * @param color
 */
void pprint(string text, string color) { cout << color << text << END << endl; }

struct Args {
    fs::path
        workingPath;  /**< The working path where all input files are stored and where all output files will be saved */
    string filesName; /**< The name of the preservation files to be considered */
    bool brands;      /**< True if tape presents brands on its surface */
    float speed;      /**< The speed at which the tape was read */

    Args(fs::path workingPath, string filesName, bool brands, float speed) {
        if (speed != 7.5 && speed != 15) throw invalid_argument("Speed must be 7.5 or 15");
        this->workingPath = workingPath;
        this->filesName = filesName;
        this->brands = brands;
        this->speed = speed;
    }
    ~Args() {}

    static Args from_file(fs::path path) {
        ifstream iConfig(path);
        json j;
        iConfig >> j;
        return Args(fs::path(string(j["WorkingPath"])), j["FilesName"], j["Brands"], j["Speed"]);
    }

    static Args from_cli(int argc, char** argv) {
        po::variables_map vm;
        try {
            po::options_description desc(
                "A tool that implements MPAI CAE-ARP Video Analyser Technical "
                "Specification.\n"
                "By default, the configuartion parameters are loaded from "
                "config/config.json file,\n"
                "but, alternately, you can pass command line arguments to "
                "replace them");
            desc.add_options()("help,h", "Display this help message")(
                "working-path,w", po::value<string>()->required(),
                "Specify the Working Path, where all input files are stored")(
                "files-name,f", po::value<string>()->required(),
                "Specify the name of the Preservation files (without "
                "extension)")("brands,b", po::value<bool>()->required(),
                              "Specify if the tape presents brands on its surface")(
                "speed,s", po::value<float>()->required(), "Specify the speed at which the tape was read");
            po::store(po::command_line_parser(argc, argv).options(desc).run(), vm);
            if (vm.count("help")) {
                cout << desc << "\n";
                std::exit(EXIT_SUCCESS);
            }
            po::notify(vm);
        } catch (po::invalid_command_line_syntax& e) {
            pprint("The command line syntax is invalid: " + string(e.what()), RED + BOLD);
            std::exit(EXIT_FAILURE);
        } catch (po::required_option& e) {
            cerr << "Error: " << e.what() << endl;
            std::exit(EXIT_FAILURE);
        } catch (nlohmann::detail::type_error e) {
            pprint("config.json error! " + string(e.what()), RED);
            std::exit(EXIT_FAILURE);
        }

        return Args(fs::path(vm["working-path"].as<string>()), vm["files-name"].as<string>(), vm["brands"].as<bool>(),
                    vm["speed"].as<float>());
    }
};

// Constants Paths
static const string READING_HEAD_IMG = "input/readingHead.png";
static const string CAPSTAN_TEMPLATE_IMG = "input/capstanBERIO058prova.png";
static const string CONFIG_FILE = "config/config.json";

/**
 * @brief Get the next frame object
 * 
 * Whenever we find an Irregularity, we want to skip a lenght equal to the
 * Studer reading head (3 cm = 1.18 inches).
 * Note the following considerations:
 * - since we are analysing video at 25 fps a frame occurs every 40 ms
 * - at 15 ips we cross 3 cm of tape in 79 ms (2 frames)
 * - at 7.5 ips we cross 3 cm of tape in 157 ms (4 frames)
 * The considered lengths are the widths of the tape areas.
 * The following condition constitutes a valid approach if the tape areas
 * have widths always equal to the reading head
 * 
 * @param cap VideoCapture object
 * @param speed tape reading speed
 * @return Frame
 */
Frame get_next_frame(VideoCapture& cap, float speed, bool skip = false) {

    if (skip) {
        int ms_to_skip = speed == 15 ? 79 : 157;
        cap.set(CAP_PROP_POS_MSEC, cap.get(CAP_PROP_POS_MSEC) + ms_to_skip);
    }

    Frame frame;
    cap >> frame;
    return frame;
}

double rotatedRectArea(RotatedRect rect) { return rect.size.width * rect.size.height; }

/**
 * @fn std::tuple<int, int, double, double, vector<Vec4f>, vector<Vec4f>>
 * findObject(Mat model, SceneObject object)
 * @brief Find the model in the scene using the Generalized Hough Transform.
 * It returns the best matches. Find the best matches for positive and negative
 * angles. If there are more than one shape, then choose the one with the
 * highest score. If there are more than one with the same highest score, then
 * arbitrarily choose the latest
 *
 * @param model the template image to be searched with the Generalized Hough
 * Transform
 * @param object the sceneObject struct containing the parameters for the
 * Generalized Hough Transform
 * @return std::tuple<int, int, double, double, vector<Vec4f>, vector<Vec4f>> a
 * tuple containing the best matches for positive and negative angles
 */
std::tuple<int, int, double, double, vector<Vec4f>, vector<Vec4f>> findObject(Mat model, SceneObject object,
                                                                              Mat processing_area) {
    // Algorithm and parameters
    // for informations about the Generalized Hough Guild usage see the tutorial
    // at
    // https://docs.opencv.org/4.7.0/da/ddc/tutorial_generalized_hough_ballard_guil.html
    Ptr<GeneralizedHoughGuil> alg = createGeneralizedHoughGuil();

    vector<Vec4f> positionsPos, positionsNeg;
    Mat votesPos, votesNeg;

    double maxValPos = 0, maxValNeg = 0;
    int indexPos = 0, indexNeg = 0;

    alg->setMinDist(object.minDist);
    alg->setLevels(360);
    alg->setDp(2);
    alg->setMaxBufferSize(1000);

    alg->setAngleStep(1);
    alg->setAngleThresh(object.threshold.angle);

    alg->setMinScale(0.9);
    alg->setMaxScale(1.1);
    alg->setScaleStep(0.01);
    alg->setScaleThresh(object.threshold.scale);

    alg->setPosThresh(object.threshold.pos);

    alg->setCannyLowThresh(150);   // Old: 100
    alg->setCannyHighThresh(240);  // Old: 300

    alg->setTemplate(model);

    utility::detectShape(alg, model, object.threshold.pos, positionsPos, votesPos, positionsNeg, votesNeg,
                         processing_area);

    for (int i = 0; i < votesPos.size().width; i++) {
        if (votesPos.at<int>(i) >= maxValPos) {
            maxValPos = votesPos.at<int>(i);
            indexPos = i;
        }
    }

    for (int i = 0; i < votesNeg.size().width; i++) {
        if (votesNeg.at<int>(i) >= maxValNeg) {
            maxValNeg = votesNeg.at<int>(i);
            indexNeg = i;
        }
    }

    return {indexPos, indexNeg, maxValPos, maxValNeg, positionsPos, positionsNeg};
}

/**
 * @fn bool findProcessingAreas(Mat myFrame)
 * @brief Identifies the Regions Of Interest (ROIs) on the video,
 * which are:
 * - The reading head;
 * - The tape area under the tape head (computed on the basis of the detected
 * reading head);
 * - The capstan.
 * @param myFrame The current frame of the video.
 * @return true if some areas have been detected;
 * @return false otherwise.
 */
bool findProcessingAreas(Mat myFrame, SceneObject tape, SceneObject capstan) {
    /*********************************************************************************************/
    /*********************************** READING HEAD DETECTION
    /*********************************************************************************************/

    // Save a grayscale version of myFrame in myFrameGrayscale and downsample it
    // in half pixels for performance reasons
    Frame gray_current_frame = Frame(myFrame).convertColor(COLOR_BGR2GRAY);

    Frame halved_gray_current_frame = gray_current_frame.clone().downsample(2);

    // Get input shape in grayscale and downsample it in half pixels
    Frame reading_head_template = Frame(cv::imread(READING_HEAD_IMG, IMREAD_GRAYSCALE)).downsample(2);

    // Process only the bottom-central portion of the input video -> best
    // results with our videos
    Rect readingHeadProcessingAreaRect(halved_gray_current_frame.cols / 4, halved_gray_current_frame.rows / 2,
                                       halved_gray_current_frame.cols / 2, halved_gray_current_frame.rows / 2);
    Mat processingImage = halved_gray_current_frame(readingHeadProcessingAreaRect);

    RotatedRect rectPos, rectNeg;
    auto [indexPos, indexNeg, maxValPos, maxValNeg, positionsPos, positionsNeg] =
        findObject(reading_head_template, tape, processingImage);

    // The color is progressively darkened to emphasize that the algorithm found
    // more than one shape
    if (positionsPos.size() > 0)
        rectPos = utility::drawShapes(myFrame, positionsPos[indexPos], Scalar(0, 0, 255 - indexPos * 64),
                                      reading_head_template.cols, reading_head_template.rows,
                                      halved_gray_current_frame.cols / 4, halved_gray_current_frame.rows / 2, 2);
    if (positionsNeg.size() > 0)
        rectNeg = utility::drawShapes(myFrame, positionsNeg[indexNeg], Scalar(128, 128, 255 - indexNeg * 64),
                                      reading_head_template.cols, reading_head_template.rows,
                                      halved_gray_current_frame.cols / 4, halved_gray_current_frame.rows / 2, 2);

    if (maxValPos > 0)
        if (maxValNeg > 0)
            if (maxValPos > maxValNeg) {
                rect = rectPos;
            } else {
                rect = rectNeg;
            }
        else {
            rect = rectPos;
        }
    else if (maxValNeg > 0) {
        rect = rectNeg;
    } else {
        return false;
    }

    /************************************ TAPE AREA DETECTION ****************/

    // Compute area basing on reading head detection
    Vec4f positionTape(rect.center.x, rect.center.y + rect.size.height / 2 + 20 * (rect.size.width / 200), 1,
                       rect.angle);
    rectTape = utility::drawShapes(myFrame, positionTape, Scalar(0, 255 - indexPos * 64, 0), rect.size.width,
                                   50 * (rect.size.width / 200), 0, 0, 1);

    /************************************* CAPSTAN DETECTION ******************/

    // Read template image - it is smaller than before, therefore there is no
    // need to downsample
    Mat templateShape = cv::imread(CAPSTAN_TEMPLATE_IMG, IMREAD_GRAYSCALE);

    if (useSURF) {
        // Step 1: Detect the keypoints using SURF Detector, compute the
        // descriptors
        int minHessian = 100;
        Ptr<xfeatures2d::SURF> detector = xfeatures2d::SURF::create(minHessian);
        vector<KeyPoint> keypoints_object, keypoints_scene;
        Mat descriptors_object, descriptors_scene;

        detector->detectAndCompute(templateShape, noArray(), keypoints_object, descriptors_object);
        detector->detectAndCompute(gray_current_frame, noArray(), keypoints_scene, descriptors_scene);

        // Step 2: Matching descriptor vectors with a FLANN based matcher
        // Since SURF is a floating-point descriptor NORM_L2 is used
        Ptr<DescriptorMatcher> matcher = DescriptorMatcher::create(DescriptorMatcher::FLANNBASED);
        vector<vector<DMatch>> knn_matches;
        matcher->knnMatch(descriptors_object, descriptors_scene, knn_matches, 2);
        //-- Filter matches using the Lowe's ratio test
        const float ratio_thresh = 0.75f;
        vector<DMatch> good_matches;
        for (size_t i = 0; i < knn_matches.size(); i++) {
            if (knn_matches[i][0].distance < ratio_thresh * knn_matches[i][1].distance) {
                good_matches.push_back(knn_matches[i][0]);
            }
        }
        // Draw matches
        Mat img_matches;
        cv::drawMatches(templateShape, keypoints_object, halved_gray_current_frame, keypoints_scene, good_matches,
                    img_matches, Scalar::all(-1), Scalar::all(-1), vector<char>(),
                    DrawMatchesFlags::NOT_DRAW_SINGLE_POINTS);
        // Localize the object
        vector<Point2f> obj;
        vector<Point2f> scene;
        for (size_t i = 0; i < good_matches.size(); i++) {
            // Get the keypoints from the good matches
            obj.push_back(keypoints_object[good_matches[i].queryIdx].pt);
            scene.push_back(keypoints_scene[good_matches[i].trainIdx].pt);
        }
        Mat H = cv::findHomography(obj, scene, RANSAC);
        // Get the corners from the image_1 ( the object to be "detected" )
        vector<Point2f> obj_corners(4);
        obj_corners[0] = Point2f(0, 0);
        obj_corners[1] = Point2f((float)templateShape.cols, 0);
        obj_corners[2] = Point2f((float)templateShape.cols, (float)templateShape.rows);
        obj_corners[3] = Point2f(0, (float)templateShape.rows);
        vector<Point2f> scene_corners(4);
        cv::perspectiveTransform(obj_corners, scene_corners, H);

        // Find average
        float capstanX = (scene_corners[0].x + scene_corners[1].x + scene_corners[2].x + scene_corners[3].x) / 4;
        float capstanY = (scene_corners[0].y + scene_corners[1].y + scene_corners[2].y + scene_corners[3].y) / 4;

        // In the following there are two alterations to cut the first 20
        // horizontal pixels and the first 90 vertical pixels from the found
        // rectangle: +10 in X for centering and -20 in width +45 in Y for
        // centering and -90 in height
        Vec4f positionCapstan(capstanX + 10, capstanY + 45, 1, 0);
        rectCapstan = utility::drawShapes(myFrame, positionCapstan, Scalar(255 - indexPos * 64, 0, 0),
                                          templateShape.cols - 20, templateShape.rows - 90, 0, 0, 1);

    } else {
        // Process only right portion of the image, where the capstain always
        // appears
        int capstanProcessingAreaRectX = myFrame.cols * 3 / 4;
        int capstanProcessingAreaRectY = myFrame.rows / 2;
        int capstanProcessingAreaRectWidth = myFrame.cols / 4;
        int capstanProcessingAreaRectHeight = myFrame.rows / 2;
        Rect capstanProcessingAreaRect(capstanProcessingAreaRectX, capstanProcessingAreaRectY,
                                       capstanProcessingAreaRectWidth, capstanProcessingAreaRectHeight);
        Mat capstanProcessingAreaGrayscale = gray_current_frame(capstanProcessingAreaRect);
        // Reset algorithm and set parameters

        auto [indexPos, indexNeg, maxValPos, maxValNeg, positionsC1Pos, positionsC1Neg] =
            findObject(templateShape, capstan, capstanProcessingAreaGrayscale);

        RotatedRect rectCapstanPos, rectCapstanNeg;
        if (positionsC1Pos.size() > 0)
            rectCapstanPos = utility::drawShapes(myFrame, positionsC1Pos[indexPos], Scalar(255 - indexPos * 64, 0, 0),
                                                 templateShape.cols - 22, templateShape.rows - 92,
                                                 capstanProcessingAreaRectX + 11, capstanProcessingAreaRectY + 46, 1);
        if (positionsC1Neg.size() > 0)
            rectCapstanNeg = utility::drawShapes(myFrame, positionsC1Neg[indexNeg], Scalar(255 - indexNeg * 64, 128, 0),
                                                 templateShape.cols - 22, templateShape.rows - 92,
                                                 capstanProcessingAreaRectX + 11, capstanProcessingAreaRectY + 46, 1);

        if (maxValPos > 0)
            if (maxValNeg > 0)
                if (maxValPos > maxValNeg) {
                    rectCapstan = rectCapstanPos;
                } else {
                    rectCapstan = rectCapstanNeg;
                }
            else {
                rectCapstan = rectCapstanPos;
            }
        else if (maxValNeg > 0) {
            rectCapstan = rectCapstanNeg;
        } else {
            return false;
        }
    }

    cout << endl;

    // Save the image containing the detected areas
    cv::imwrite(outputPath.string() + "/tapeAreas.jpg", myFrame);

    return true;
}

/**
 * @fn RotatedRect check_skew(RotatedRect roi)
 * @brief Check if the region of interest is skewed and correct it
 *
 * @param roi the region of interest
 * @return RotatedRect the corrected region of interest
 */
RotatedRect check_skew(RotatedRect roi) {
    // get angle and size from the bounding box
    // thanks to http://felix.abecassis.me/2011/10/opencv-rotation-deskewing/
    cv::Size rect_size = roi.size;
    float angle = roi.angle;

    if (roi.angle < -45.) {
        angle += 90.0;
        swap(rect_size.width, rect_size.height);
    }

    return RotatedRect(roi.center, rect_size, angle);
}

/**
 * @fn Frame get_difference_for_roi(Frame previous, Frame current, RotatedRect
 * roi)
 * @brief Look for differences in two consecutive frames in a specific region of
 * interest
 *
 * @param previous the reference frame
 * @param current the frame to compare with the reference
 * @param roi the region of interest
 * @return Frame the difference matrix between the two frames
 */
Frame get_difference_for_roi(Frame previous, Frame current, RotatedRect roi) {
    cv::Mat rotation_matrix = getRotationMatrix2D(roi.center, roi.angle, 1.0);

    return previous.warp(rotation_matrix)
        .crop(roi.size, roi.center)
        .difference(current.warp(rotation_matrix).crop(roi.size, roi.center));
}

/**
 * @fn bool frameDifference(cv::Mat prevFrame, cv::Mat currentFrame, int
 * msToEnd)
 * @brief Compares two consecutive video frames and establish if there
 * potentially is an Irregularity. The comparison is pixel-wise and based on
 * threshold values set on config.json file.
 *
 * @param prevFrame the frame before the current one;
 * @param currentFrame the current frame;
 * @param msToEnd the number of milliseconds left before the end of the video.
 * Useful for capstan analysis.
 * @return true if a potential Irregularity has been found;
 * @return false otherwise.
 */
bool frameDifference(cv::Mat prevFrame, cv::Mat currentFrame, int msToEnd, SceneObject capstan, SceneObject tape,
                     Args args) {
    bool result = false;

    /*********************** Capstan analysis ************************/

    // In the last minute of the video, check for pinchRoller position for
    // endTape event
    if (!endTapeSaved && msToEnd < 60000) {
        // Capstan area
        int capstanAreaPixels = rectCapstan.size.width * rectCapstan.size.height;
        float capstanDifferentPixelsThreshold = capstanAreaPixels * capstan.threshold.percentual / 100;

        RotatedRect corrected_capstan_roi = check_skew(rectCapstan);
        Frame difference_frame = get_difference_for_roi(Frame(prevFrame), Frame(currentFrame), corrected_capstan_roi);

        int blackPixelsCapstan = 0;

        for (int i = 0; i < difference_frame.rows; i++) {
            for (int j = 0; j < difference_frame.cols; j++) {
                if (difference_frame.at<cv::Vec3b>(i, j)[0] == 0) {
                    // There is a black pixel, then there is a difference
                    // between previous and current frames
                    blackPixelsCapstan++;
                }
            }
        }

        if (blackPixelsCapstan > capstanDifferentPixelsThreshold) {
            savingPinchRoller = true;
            endTapeSaved = true;  // Never check again for end tape instant
            return true;
        }
    }
    // savingPinchRoller will already be false before the last minute of the
    // video. After having saved the capstan, the next time reset the variable
    // to not save again
    savingPinchRoller = false;

    /********************* Tape analysis *********************/

    // Tape area
    int tapeAreaPixels = rotatedRectArea(rectTape);
    float tapeDifferentPixelsThreshold = tapeAreaPixels * tape.threshold.percentual / 100;

    RotatedRect corrected_tape_roi = check_skew(rectTape);

    Frame croppedCurrentFrame = Frame(currentFrame)
                                    .warp(getRotationMatrix2D(corrected_tape_roi.center, corrected_tape_roi.angle, 1.0))
                                    .crop(corrected_tape_roi.size, corrected_tape_roi.center);

    Frame difference_frame = get_difference_for_roi(Frame(prevFrame), Frame(currentFrame), corrected_tape_roi);

    int decEnd = (msToEnd % 1000) / 100;
    int secEnd = (msToEnd - (msToEnd % 1000)) / 1000;
    int minEnd = secEnd / 60;
    secEnd = secEnd % 60;

    /********************** Segment analysis ************************/

    int blackPixels = 0;
    float mediaCurrFrame;
    int totColoreCF = 0;

    for (int i = 0; i < croppedCurrentFrame.rows; i++) {
        for (int j = 0; j < croppedCurrentFrame.cols; j++) {
            totColoreCF += croppedCurrentFrame.at<cv::Vec3b>(i, j)[0] + croppedCurrentFrame.at<cv::Vec3b>(i, j)[1] +
                           croppedCurrentFrame.at<cv::Vec3b>(i, j)[2];
            if (difference_frame.at<cv::Vec3b>(i, j)[0] == 0) {
                blackPixels++;
            }
        }
    }
    mediaCurrFrame = totColoreCF / tapeAreaPixels;

    /*********************** Decision stage ************************/

    if (blackPixels > tapeDifferentPixelsThreshold) {  // The threshold must be passed

        /***** AVERAGE_COLOR-BASED DECISION *****/
        if (mediaPrevFrame > (mediaCurrFrame + 7) ||
            mediaPrevFrame < (mediaCurrFrame - 7)) {  // They are not similar for color average
            result = true;
        }

        /***** BRANDS MANAGEMENT *****/
        if (args.brands) {
            // At the beginning of the video, wait at least 5 seconds before the
            // next Irregularity to consider it as a brand. It is not guaranteed
            // that it will be the first brand, but it is generally a safe
            // approach to have a correct image
            if (firstBrand) {
                if (firstInstant - msToEnd > 5000) {
                    firstBrand = false;
                    savingBrand = true;
                    result = true;
                }
                // In the following iterations reset savingBrand, since we are
                // no longer interested in brands.
            } else
                savingBrand = false;
        }
    }

    // Update mediaPrevFrame
    mediaPrevFrame = mediaCurrFrame;

    return result;
}

/**
 * @fn void processing(cv::VideoCapture videoCapture, SceneObject capstan,
 * SceneObject tape, Args args)
 * @brief video processing phase, where each frame is analysed.
 * It saves the IrregularityImages and updates the IrregularityFiles if an
 * Irregularity is found
 *
 * @note To be able to work with the "old" neural network (by Ilenya),
 * the output images should correspond to the old "whole tape" where, from the
 * frame judged as interesting, an area corresponding to the height of the tape
 * was extracted (so about the height of the current rectangle) and as wide as
 * the original frame (so 720px). This area will then have to be resized to
 * 224x224 as in the past. If instead you decide to use the new neural network,
 * no changes are needed.
 *
 * @param videoCapture the input Preservation Audio-Visual File;
 * @param capstan the capstan SceneObject;
 * @param tape the tape SceneObject;
 * @param args the command line arguments.
 */
void processing(cv::VideoCapture videoCapture, SceneObject capstan, SceneObject tape, Args args) {
    const int video_length_ms = ((float)videoCapture.get(CAP_PROP_FRAME_COUNT) / videoCapture.get(CAP_PROP_FPS)) * 1000;
    int video_current_ms = videoCapture.get(CAP_PROP_POS_MSEC);
    // counters
    int savedFrames = 0;
    float lastSaved = -160;
    /* Whenever we find an Irregularity, we want to skip a lenght equal to the
     * Studer reading head (3 cm = 1.18 inches).
     * Note the following considerations:
     * - since we are analysing video at 25 fps a frame occurs every 40 ms
     * - at 15 ips we cross 3 cm of tape in 79 ms (2 frames)
     * - at 7.5 ips we cross 3 cm of tape in 157 ms (4 frames)
     * The considered lengths are the widths of the tape areas.
     * The following condition constitutes a valid approach if the tape areas
     * have widths always equal to the reading head
     */
    int savingRate = args.speed == 7.5 ? 157 : 79; // [ms]

    // The first frame of the video won't be processed
    cv::Mat prevFrame;
    videoCapture >> prevFrame;
    firstInstant = video_length_ms - video_current_ms;

    while (videoCapture.isOpened()) {

        Frame currentFrame = get_next_frame(videoCapture, args.speed);
        video_current_ms = videoCapture.get(CAP_PROP_POS_MSEC);

        Frame frame;
        videoCapture >> frame;
        video_current_ms = videoCapture.get(CAP_PROP_POS_MSEC);

        if (frame.empty()) {
            cout << endl << "Empty frame!" << endl;
            videoCapture.release();
            return;
        }

        int msToEnd = video_length_ms - video_current_ms;
        if (video_current_ms == 0)  // With OpenCV library, this happens at the last few frames of
                                    // the video before realising that "frame" is empty.
            return;

        // Display program status
        int secToEnd = msToEnd / 1000;
        int minToEnd = (secToEnd / 60) % 60;
        secToEnd = secToEnd % 60;
        string secStrToEnd = secToEnd < 10 ? "0" + to_string(secToEnd) : to_string(secToEnd);
        string minStrToEnd = minToEnd < 10 ? "0" + to_string(minToEnd) : to_string(minToEnd);

        cout << "\rIrregularities: " << savedFrames << ".   ";
        cout << "Remaining video time [mm:ss]: " << minStrToEnd << ":" << secStrToEnd << flush;

        if ((video_current_ms - lastSaved > savingRate) &&
            frameDifference(prevFrame, frame, msToEnd, capstan, tape, args)) {
            // An Irregularity has been found!
            auto [odd_frame, even_frame] = frame.deinterlace();

            string timeLabel = getTimeLabel(video_current_ms, ":");
            string safeTimeLabel = getTimeLabel(video_current_ms, "-");

            string irregularityImageFilename = to_string(savedFrames) + "_" + safeTimeLabel + ".jpg";
            cv::imwrite(irregularityImagesPath / irregularityImageFilename, odd_frame);

            // Append Irregularity information to JSON
            Irregularity irreg = Irregularity(Source::Video, timeLabel);
            irregularityFileOutput1["Irregularities"] += irreg.to_JSON();
            irregularityFileOutput2["Irregularities"] +=
                irreg.set_image_URI(irregularityImagesPath.string() + "/" + irregularityImageFilename).to_JSON();

            lastSaved = video_current_ms;
            savedFrames++;
        }

        prevFrame = frame;
    }
}

/**
 * @fn int main(int argc, char** argv)
 * @brief main program, organised as:
 * - Get input from command line or config.json file;
 * - Check input parameters;
 * - Creation of output directories;
 * - Regions Of Interest (ROIs) detection;
 * - Irregularities detection;
 * - Saving of output IrregularityFiles.
 *
 * @todo The main function should be splitted into 2 steps, and each step should be callable from the command line, so
 * that the user can choose to run only the first step, only the second step, or both:
 * - First step: generate irregularity file output 1;
 * - Second step: generate irregularity file output 2.
 *
 * @param argc Command line arguments count;
 * @param argv Command line arguments.
 * @return int program status.
 */
int main(int argc, char** argv) {
    const Args args = argc > 1 ? Args::from_cli(argc, argv) : Args::from_file(CONFIG_FILE);
    SceneObject capstan = SceneObject::from_file(CONFIG_FILE, Object::CAPSTAN);
    SceneObject tape = SceneObject::from_file(CONFIG_FILE, Object::TAPE);

    json irregularityFileInput;
    cv::Mat myFrame;

    const fs::path VIDEO_PATH = args.workingPath / "PreservationAudioVisualFile" / args.filesName;

    if (files::findFileName(VIDEO_PATH, fileName, extension) == -1) {
        cerr << RED << BOLD << "Input error!" << END << endl
             << RED << VIDEO_PATH.string() << " cannot be found or opened." << END << endl;
        std::exit(EXIT_FAILURE);
    }

    const fs::path irregularityFileInputPath = args.workingPath / "temp" / fileName / A_IRREG_FILE_1;

    // Input JSON check
    ifstream iJSON(irregularityFileInputPath);
    if (iJSON.fail()) {
        cerr << RED << BOLD << "config.json error!" << END << endl
             << RED << irregularityFileInputPath.string() << " cannot be found or opened." << END << endl;
        std::exit(EXIT_FAILURE);
    }

    // Read input JSON
    iJSON >> irregularityFileInput;

    // Adjust input paramenters (considering given ones as pertinent to a speed reference = 7.5)
    if (args.brands) {
        if (args.speed == 15) tape.threshold.percentual += 6;
    } else if (args.speed == 15)
        tape.threshold.percentual += 20;
    else
        tape.threshold.percentual += 21;

    cout << endl;
    cout << "Parameters:" << endl;
    cout << "    Brands: " << args.brands << endl;
    cout << "    Speed: " << args.speed << endl;
    cout << "    ThresholdPercentual: " << tape.threshold.percentual << endl;
    cout << "    ThresholdPercentualCapstan: " << capstan.threshold.percentual << endl;
    cout << endl;

    // Make directory with fileName name
    outputPath = args.workingPath / "temp" / fileName;
    fs::create_directory(outputPath);

    irregularityImagesPath = outputPath / "IrregularityImages";
    fs::create_directory(irregularityImagesPath);

    /************************************** AREAS DETECTION *********************************/

    cv::VideoCapture videoCapture(VIDEO_PATH);
    if (!videoCapture.isOpened()) {
        pprint("Video unreadable.", RED + BOLD);
        std::exit(EXIT_FAILURE);
    }

    int frames_number = videoCapture.get(CAP_PROP_FRAME_COUNT);
    // Set frame position to half video length
    videoCapture.set(CAP_PROP_POS_FRAMES, frames_number / 2);
    // Get frame
    videoCapture >> myFrame;

    cout << "Video resolution: " << myFrame.cols << "x" << myFrame.rows << endl;

    bool found = findProcessingAreas(myFrame, tape, capstan);

    // Reset frame position
    videoCapture.set(CAP_PROP_POS_FRAMES, 0);

    if (!found) {
        pprint("Processing area not found. Try changing JSON parameters.", RED);
        std::exit(EXIT_FAILURE);
    }

    /**************************************** PROCESSING **************************/

    pprint("Processing...", CYAN);

    // Processing timer
    time_t startTimer, endTimer;
    startTimer = time(NULL);

    processing(videoCapture, capstan, tape, args);

    endTimer = time(NULL);
    float min = (endTimer - startTimer) / 60;
    float sec = (endTimer - startTimer) % 60;

    string result("Processing elapsed time: " + to_string((int)min) + ":" + to_string((int)sec));
    cout << endl << result << endl;

    /************************************* IRREGULARITY FILES *****************************/

    files::saveFile(outputPath / V_IRREG_FILE_1, irregularityFileOutput1.dump(4), false);

    // Irregularities to extract for the AudioAnalyser and to the TapeIrregularityClassifier
    extractIrregularityImagesForAudio(outputPath, VIDEO_PATH, irregularityFileInput, irregularityFileOutput2);

    files::saveFile(outputPath / V_IRREG_FILE_2, irregularityFileOutput2.dump(4), false);

    return EXIT_SUCCESS;
}