/** * @mainpage MPAI CAE-ARP Video Analyser * @file main.cpp * MPAI CAE-ARP Video Analyser. * * Implements MPAI CAE-ARP Video Analyser Technical Specification. * It identifies Irregularities on the Preservation Audio-Visual File, *providing: * - Irregularity Files; * - Irregularity Images. * * @warning Currently, this program is only compatible with the Studer A810 *and videos recorded in PAL standard. * * @todo * - A resize function of the entire video should be implemented if it does not *conform to the PAL standard (currently taken for granted). * - Progressive videos, which do not require deinterlacing, should be managed *(in the code there are several steps that operate considering this property). * * @author Nadir Dalla Pozza * @author Matteo Spanio * @copyright 2023, Audio Innova S.r.l. * @credits Niccolò Pretto, Nadir Dalla Pozza, Sergio Canazza * @license GPL v3.0 * @version 1.2 * @status Production */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include "forAudioAnalyser.h" #include "lib/Irregularity.hpp" #include "lib/IrregularityFile.hpp" #include "lib/colors.hpp" #include "lib/core.hpp" #include "lib/detection.hpp" #include "lib/files.hpp" #include "lib/io.hpp" #include "lib/time.hpp" #include "utility.hpp" #define BLACK_PIXEL 0 using namespace std; using namespace colors; using json = nlohmann::json; using videoanalyser::core::Frame; using videoanalyser::io::pprint; using videoanalyser::io::print_error_and_exit; namespace fs = std::filesystem; namespace po = boost::program_options; namespace va = videoanalyser; bool g_end_tape_saved = false; bool g_first_brand = true; // The first frame containing brands on tape must be saved float g_first_instant = 0; float g_mean_prev_frame_color = 0; // Average frame color static fs::path g_output_path{}; static fs::path g_irregularity_images_path{}; static json g_irregularity_file_1{}; static json g_irregularity_file_2{}; struct Args { fs::path working_path; /**< The working path where all input files are stored and where all output files will be saved */ string files_name; /**< The name of the preservation files to be considered */ bool brands; /**< True if tape presents brands on its surface */ float speed; /**< The speed at which the tape was read */ Args(fs::path working_path, string files_name, bool brands, float speed) { if (speed != 7.5 && speed != 15) throw invalid_argument("Speed must be 7.5 or 15"); this->working_path = working_path; this->files_name = files_name; this->brands = brands; this->speed = speed; } ~Args() {} static Args from_file(fs::path path) { ifstream iConfig(path); json j; iConfig >> j; return Args(fs::path(string(j["WorkingPath"])), j["FilesName"], j["Brands"], j["Speed"]); } static Args from_cli(int argc, char** argv) { po::variables_map vm; try { po::options_description desc( "A tool that implements MPAI CAE-ARP Video Analyser Technical " "Specification.\n" "By default, the configuartion parameters are loaded from " "config/config.json file,\n" "but, alternately, you can pass command line arguments to " "replace them"); desc.add_options()("help,h", "Display this help message")( "working-path,w", po::value()->required(), "Specify the Working Path, where all input files are stored")( "files-name,f", po::value()->required(), "Specify the name of the Preservation files (without " "extension)")("brands,b", po::value()->required(), "Specify if the tape presents brands on its surface")( "speed,s", po::value()->required(), "Specify the speed at which the tape was read"); po::store(po::command_line_parser(argc, argv).options(desc).run(), vm); if (vm.count("help")) { std::cout << desc << "\n"; std::exit(EXIT_SUCCESS); } po::notify(vm); } catch (po::invalid_command_line_syntax& e) { print_error_and_exit("Invalid command line syntax: " + string(e.what())); } catch (po::required_option& e) { print_error_and_exit("Missing required option: " + string(e.what())); } catch (nlohmann::detail::type_error e) { print_error_and_exit("config.json error: " + string(e.what())); } return Args(fs::path(vm["working-path"].as()), vm["files-name"].as(), vm["brands"].as(), vm["speed"].as()); } }; /** * @brief Get the next frame object. * * Whenever we find an Irregularity, we want to skip a lenght equal to the * Studer reading head (3 cm = 1.18 inches). * * Note the following considerations: * - since we are analysing video at 25 fps a frame occurs every 40 ms * - at 15 ips we cross 3 cm of tape in 79 ms (2 frames) * - at 7.5 ips we cross 3 cm of tape in 157 ms (4 frames) * * The considered lengths are the widths of the tape areas. * The following condition constitutes a valid approach if the tape areas * have widths always equal to the reading head * * @param cap VideoCapture object * @param speed tape reading speed * @return Frame */ Frame get_next_frame(VideoCapture& cap, float speed, bool skip = false) { if (skip) { int ms_to_skip = speed == 15 ? 79 : 157; cap.set(CAP_PROP_POS_MSEC, cap.get(CAP_PROP_POS_MSEC) + ms_to_skip); } Frame frame; cap >> frame; return frame; } float rotated_rect_area(RotatedRect rect) { return rect.size.width * rect.size.height; } /** * @fn bool find_processing_areas(Frame frame, SceneObject tape, SceneObject capstan) * @brief Identifies the Regions Of Interest (ROIs) on the video, * which are: * - The reading head; * - The tape area under the tape head (computed on the basis of the detected * reading head); * - The capstan. * * * * @param frame the frame to be analysed * @param tape the tape object * @param capstan the capstan object * @return true if some areas have been detected; * @return false otherwise. */ va::Result> find_processing_areas(Frame frame, SceneObject tape, SceneObject capstan) { va::detection::SceneElement tape_element{ va::detection::ElementType::TAPE, tape.minDist, {tape.threshold.percentual, tape.threshold.angle, tape.threshold.scale, tape.threshold.pos}}; auto tape_roi_result = va::detection::find_roi(frame, va::detection::Algorithm::GHT, tape_element); if (std::holds_alternative(tape_roi_result)) return va::Error("Error while finding tape roi: " + std::get(tape_roi_result)); auto rect_tape = std::get(tape_roi_result); va::detection::SceneElement capstan_element{ va::detection::ElementType::CAPSTAN, capstan.minDist, {capstan.threshold.percentual, capstan.threshold.angle, capstan.threshold.scale, capstan.threshold.pos}}; auto capstan_roi_result = va::detection::find_roi(frame, va::detection::Algorithm::SURF, capstan_element); if (std::holds_alternative(capstan_roi_result)) return va::Error("Error while finding capstan roi: " + std::get(capstan_roi_result)); auto rect_capstan = std::get(capstan_roi_result); // save detected areas to file cv::rectangle(frame, rect_tape.boundingRect(), cv::Scalar(0, 255, 0), 2); cv::rectangle(frame, rect_capstan.boundingRect(), cv::Scalar(255, 0, 0), 2); cv::imwrite(g_output_path.string() + "/my_tape_areas.jpg", frame); return make_pair(rect_tape, rect_capstan); } /** * @fn RotatedRect check_skew(RotatedRect roi) * @brief Check if the region of interest is skewed and correct it * * @param roi the region of interest * @return RotatedRect the corrected region of interest */ RotatedRect check_skew(RotatedRect roi) { // get angle and size from the bounding box // thanks to http://felix.abecassis.me/2011/10/opencv-rotation-deskewing/ cv::Size rect_size = roi.size; float angle = roi.angle; if (roi.angle < -45.) { angle += 90.0; std::swap(rect_size.width, rect_size.height); } return RotatedRect(roi.center, rect_size, angle); } /** * @fn Frame get_difference_for_roi(Frame previous, Frame current, RotatedRect * roi) * @brief Look for differences in two consecutive frames in a specific region of * interest * * @param previous the reference frame * @param current the frame to compare with the reference * @param roi the region of interest * @return Frame the difference matrix between the two frames */ Frame get_difference_for_roi(Frame previous, Frame current, RotatedRect roi) { cv::Mat rotation_matrix = cv::getRotationMatrix2D(roi.center, roi.angle, 1.0); return previous.warp(rotation_matrix) .crop(roi.size, roi.center) .difference(current.warp(rotation_matrix).crop(roi.size, roi.center)); } /** * @fn bool is_frame_different(cv::Mat prev_frame, cv::Mat current_frame, int * ms_to_end, SceneObject capstan, SceneObject tape, Args args, va::detection::Roi rect_tape, va::detection::Roi * rect_capstan) * @brief Compares two consecutive video frames and establish if there * potentially is an Irregularity. The comparison is pixel-wise and based on * threshold values set on config.json file. * * @param prev_frame the frame before the current one; * @param current_frame the current frame; * @param ms_to_end the number of milliseconds left before the end of the video. * Useful for capstan analysis. * @param capstan the capstan object; * @param tape the tape object; * @param args the command line arguments; * @param rect_tape the tape area under the tape head; * @param rect_capstan the capstan area. * @return true if a potential Irregularity has been found; * @return false otherwise. */ bool is_frame_different(cv::Mat prev_frame, cv::Mat current_frame, int ms_to_end, SceneObject capstan, SceneObject tape, Args args, va::detection::Roi rect_tape, va::detection::Roi rect_capstan) { bool result = false; int num_different_pixels = 0; /*********************** Capstan analysis ************************/ // In the last minute of the video, check for pinchRoller position for // endTape event if (!g_end_tape_saved && ms_to_end < 60000) { RotatedRect corrected_capstan_roi = check_skew(rect_capstan); Frame difference_frame = get_difference_for_roi(Frame(prev_frame), Frame(current_frame), corrected_capstan_roi); for (int i = 0; i < difference_frame.rows; i++) { for (int j = 0; j < difference_frame.cols; j++) { if (difference_frame.at(i, j)[0] == BLACK_PIXEL) { // There is a black pixel, then there is a difference // between previous and current frames num_different_pixels++; } } } float capstan_pixel_threshold = rotated_rect_area(rect_capstan) * capstan.threshold.percentual / 100; if (num_different_pixels > capstan_pixel_threshold) { g_end_tape_saved = true; // Never check again for end tape instant return true; } } /********************* Tape analysis *********************/ RotatedRect corrected_tape_roi = check_skew(rect_tape); Frame difference_frame = get_difference_for_roi(Frame(prev_frame), Frame(current_frame), corrected_tape_roi); /********************** Segment analysis ************************/ Frame cropped_current_frame = Frame(current_frame) .warp(cv::getRotationMatrix2D(corrected_tape_roi.center, corrected_tape_roi.angle, 1.0)) .crop(corrected_tape_roi.size, corrected_tape_roi.center); num_different_pixels = 0; float mean_current_frame_color; int current_frame_color_sum = 0; for (int i = 0; i < cropped_current_frame.rows; i++) { for (int j = 0; j < cropped_current_frame.cols; j++) { current_frame_color_sum += cropped_current_frame.at(i, j)[0] + cropped_current_frame.at(i, j)[1] + cropped_current_frame.at(i, j)[2]; if (difference_frame.at(i, j)[0] == BLACK_PIXEL) { num_different_pixels++; } } } float tape_area_pixels_sq = rotated_rect_area(rect_tape); mean_current_frame_color = current_frame_color_sum / tape_area_pixels_sq; /*********************** Decision stage ************************/ float tape_pixel_threshold = tape_area_pixels_sq * tape.threshold.percentual / 100; if (num_different_pixels > tape_pixel_threshold) { // The threshold must be passed /***** AVERAGE_COLOR-BASED DECISION *****/ if (g_mean_prev_frame_color > (mean_current_frame_color + 7) || g_mean_prev_frame_color < (mean_current_frame_color - 7)) { // They are not similar for color average result = true; } /***** BRANDS MANAGEMENT *****/ // At the beginning of the video, wait at least 5 seconds before the // next Irregularity to consider it as a brand. It is not guaranteed // that it will be the first brand, but it is generally a safe // approach to have a correct image if (args.brands && g_first_brand && g_first_instant - ms_to_end > 5000) { g_first_brand = false; result = true; } } g_mean_prev_frame_color = mean_current_frame_color; return result; } /** * @fn void processing(cv::VideoCapture video_capture, SceneObject capstan, * SceneObject tape, Args args, va::detection::Roi rect_tape, va::detection::Roi rect_capstan) * @brief video processing phase, where each frame is analysed. * It saves the IrregularityImages and updates the IrregularityFiles if an * Irregularity is found * * @note To be able to work with the "old" neural network (by Ilenya), * the output images should correspond to the old "whole tape" where, from the * frame judged as interesting, an area corresponding to the height of the tape * was extracted (so about the height of the current rectangle) and as wide as * the original frame (so 720px). This area will then have to be resized to * 224x224 as in the past. If instead you decide to use the new neural network, * no changes are needed. * * @param video_capture the input Preservation Audio-Visual File; * @param capstan the capstan SceneObject; * @param tape the tape SceneObject; * @param args the command line arguments. * @param rect_tape the tape Roi; * @param rect_capstan the capstan Roi. */ void processing(cv::VideoCapture video_capture, SceneObject capstan, SceneObject tape, Args args, va::detection::Roi rect_tape, va::detection::Roi rect_capstan) { const int video_length_ms = ((float)video_capture.get(CAP_PROP_FRAME_COUNT) / video_capture.get(CAP_PROP_FPS)) * 1000; int video_current_ms = video_capture.get(CAP_PROP_POS_MSEC); int num_saved_frames = 0; bool irregularity_found = false; // The first frame of the video won't be processed cv::Mat prev_frame = get_next_frame(video_capture, args.speed, irregularity_found); g_first_instant = video_length_ms - video_current_ms; while (video_capture.isOpened()) { Frame frame = get_next_frame(video_capture, args.speed, irregularity_found); video_current_ms = video_capture.get(CAP_PROP_POS_MSEC); if (frame.empty()) { std::cout << endl << "Empty frame!" << endl; video_capture.release(); return; } int ms_to_end = video_length_ms - video_current_ms; if (video_current_ms == 0) // With OpenCV library, this happens at the last few frames of // the video before realising that "frame" is empty. return; // Display program status int sec_to_end = ms_to_end / 1000; int min_to_end = (sec_to_end / 60) % 60; sec_to_end = sec_to_end % 60; string sec_str_to_end = (sec_to_end < 10 ? "0" : "") + to_string(sec_to_end); string min_str_to_end = (min_to_end < 10 ? "0" : "") + to_string(min_to_end); std::cout << "\rIrregularities: " << num_saved_frames << ". "; std::cout << "Remaining video time [mm:ss]: " << min_str_to_end << ":" << sec_str_to_end << flush; irregularity_found = is_frame_different(prev_frame, frame, ms_to_end, capstan, tape, args, rect_tape, rect_capstan); if (irregularity_found) { auto [odd_frame, _] = frame.deinterlace(); string irregularityImageFilename = to_string(num_saved_frames) + "_" + getTimeLabel(video_current_ms, "-") + ".jpg"; cv::imwrite(g_irregularity_images_path / irregularityImageFilename, odd_frame); // Append Irregularity information to JSON Irregularity irreg = Irregularity(Source::Video, getTimeLabel(video_current_ms, ":")); g_irregularity_file_1["Irregularities"] += irreg.to_JSON(); g_irregularity_file_2["Irregularities"] += irreg.set_image_URI(g_irregularity_images_path.string() + "/" + irregularityImageFilename).to_JSON(); num_saved_frames++; } prev_frame = frame; } } /** * @fn int main(int argc, char** argv) * @brief main program, organised as: * - Get input from command line or config.json file; * - Check input parameters; * - Creation of output directories; * - Regions Of Interest (ROIs) detection; * - Irregularities detection; * - Saving of output IrregularityFiles. * * @todo The main function should be splitted into 2 steps, and each step should be callable from the command line, so * that the user can choose to run only the first step, only the second step, or both: * - First step: generate irregularity file output 1; * - Second step: generate irregularity file output 2. * * @param argc Command line arguments count; * @param argv Command line arguments. * @return int program status. */ int main(int argc, char** argv) { const string CONFIG_FILE = "config/config.json"; const string A_IRREG_FILE_1 = "AudioAnalyser_IrregularityFileOutput1.json"; const string V_IRREG_FILE_1 = "VideoAnalyser_IrregularityFileOutput1.json"; const string V_IRREG_FILE_2 = "VideoAnalyser_IrregularityFileOutput2.json"; Args args = argc > 1 ? Args::from_cli(argc, argv) : Args::from_file(CONFIG_FILE); SceneObject capstan = SceneObject::from_file(CONFIG_FILE, ROI::CAPSTAN); SceneObject tape = SceneObject::from_file(CONFIG_FILE, ROI::TAPE); const fs::path VIDEO_PATH = args.working_path / "PreservationAudioVisualFile" / args.files_name; const auto [FILE_NAME, FILE_FORMAT] = files::get_filename_and_extension(VIDEO_PATH); const fs::path AUDIO_IRR_FILE_PATH = args.working_path / "temp" / FILE_NAME / A_IRREG_FILE_1; std::cout << "Video to be analysed: " << endl; std::cout << "\tFile name: " << FILE_NAME << endl; std::cout << "\tExtension: " << FILE_FORMAT << endl; if (FILE_FORMAT.compare("avi") != 0 && FILE_FORMAT.compare("mp4") != 0 && FILE_FORMAT.compare("mov") != 0) print_error_and_exit("Input error: The input file must be an AVI, MP4 or MOV file."); ifstream iJSON(AUDIO_IRR_FILE_PATH); if (iJSON.fail()) print_error_and_exit("config.json error" + AUDIO_IRR_FILE_PATH.string() + " cannot be found or opened."); json audio_irr_file; iJSON >> audio_irr_file; // Adjust input paramenters (considering given ones as pertinent to a speed reference = 7.5) if (args.speed == 15) { tape.threshold.percentual += args.brands ? 6 : 20; } else if (!args.brands) tape.threshold.percentual += 21; g_output_path = args.working_path / "temp" / FILE_NAME; fs::create_directory(g_output_path); g_irregularity_images_path = g_output_path / "IrregularityImages"; fs::create_directory(g_irregularity_images_path); cv::VideoCapture video_capture(VIDEO_PATH); // Open video file if (!video_capture.isOpened()) print_error_and_exit("Video error: Video file cannot be opened."); video_capture.set(CAP_PROP_POS_FRAMES, video_capture.get(CAP_PROP_FRAME_COUNT) / 2); // Set frame position to half video length cv::Mat middle_frame = get_next_frame(video_capture, args.speed); video_capture.set(CAP_PROP_POS_FRAMES, 0); // Reset frame position std::cout << "\tResolution: " << middle_frame.cols << "x" << middle_frame.rows << "\n\n"; auto processing_areas = find_processing_areas(middle_frame, tape, capstan); if (std::holds_alternative(processing_areas)) print_error_and_exit("Processing area not found: Try changing JSON parameters."); auto [rect_tape, rect_capstan] = std::get>(processing_areas); pprint("Processing...", CYAN); processing(video_capture, capstan, tape, args, rect_tape, rect_capstan); files::save_file(g_output_path / V_IRREG_FILE_1, g_irregularity_file_1.dump(4)); // Irregularities to extract for the AudioAnalyser and to the TapeIrregularityClassifier extract_irregularity_images_for_audio(g_output_path, VIDEO_PATH, audio_irr_file, g_irregularity_file_2); files::save_file(g_output_path / V_IRREG_FILE_2, g_irregularity_file_2.dump(4)); return EXIT_SUCCESS; }