Commit 8f5a80ed authored by Matteo's avatar Matteo
Browse files

major refactoring

parent df88fb26
CMAKE_MINIMUM_REQUIRED(VERSION 3.2)
PROJECT(video_analyser)
SET(EXECUTABLE_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/bin)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
SET(CMAKE_CXX_STANDARD 23)
# include(FetchContent)
# FetchContent_Declare(
# googletest
# URL https://github.com/google/googletest/archive/03597a01ee50ed33e9dfd640b249b4be3799d395.zip
# )
# # For Windows: Prevent overriding the parent project's compiler/linker settings
# SET(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
# FetchContent_MakeAvailable(googletest)
LINK_DIRECTORIES(/usr/local/lib)
add_library(analyser_lib
src/lib/colors.h
src/lib/time.cpp
src/lib/time.h
src/lib/enums.h
src/lib/enums.cpp
src/lib/Irregularity.h
src/lib/Irregularity.cpp
src/lib/IrregularityFile.h
src/lib/IrregularityFile.cpp
src/lib/TimeLabel.h
src/lib/TimeLabel.cpp
src/utility.h
src/utility.cpp
)
add_library(files
src/lib/files.h
src/lib/files.cpp
)
FIND_PACKAGE(OpenCV REQUIRED)
FIND_PACKAGE(nlohmann_json 3.2.0 REQUIRED)
FIND_PACKAGE(Boost COMPONENTS program_options REQUIRED)
INCLUDE_DIRECTORIES(${OpenCV_INCLUDE_DIRS})
INCLUDE_DIRECTORIES(${Boost_INCLUDE_DIR})
ADD_EXECUTABLE(video_analyser ./src/main.cpp)
TARGET_LINK_LIBRARIES(video_analyser
${OpenCV_LIBRARIES}
nlohmann_json::nlohmann_json
${Boost_PROGRAM_OPTIONS_LIBRARY}
analyser_lib
files
)
# enable_testing()
# ADD_EXECUTABLE(
# test_suite
# tests/irregularity_test.cpp
# tests/enums_test.cpp
# )
# TARGET_LINK_LIBRARIES(
# test_suite
# GTest::gtest_main
# analyser_lib
# ${OpenCV_LIBRARIES}
# ${Boost_PROGRAM_OPTIONS_LIBRARY}
# nlohmann_json::nlohmann_json
# )
# include(GoogleTest)
# gtest_discover_tests(test_suite)
CMAKE_MINIMUM_REQUIRED(VERSION 3.2)
PROJECT(video_analyser)
SET(EXECUTABLE_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/bin)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
SET(CMAKE_CXX_STANDARD 20)
# include(FetchContent)
# FetchContent_Declare(
# googletest
# URL https://github.com/google/googletest/archive/03597a01ee50ed33e9dfd640b249b4be3799d395.zip
# )
# # For Windows: Prevent overriding the parent project's compiler/linker settings
# SET(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
# FetchContent_MakeAvailable(googletest)
LINK_DIRECTORIES(/usr/local/lib)
add_library(analyser_lib
src/lib/colors.hpp
src/lib/core.hpp
src/lib/core.cpp
src/lib/detection.hpp
src/lib/detection.cpp
src/lib/io.hpp
src/lib/io.cpp
src/lib/time.cpp
src/lib/time.hpp
src/lib/enums.hpp
src/lib/enums.cpp
src/lib/Irregularity.hpp
src/lib/Irregularity.cpp
src/lib/IrregularityFile.hpp
src/lib/IrregularityFile.cpp
src/lib/TimeLabel.hpp
src/lib/TimeLabel.cpp
src/utility.hpp
src/utility.cpp
)
add_library(files
src/lib/files.hpp
src/lib/files.cpp
)
FIND_PACKAGE(OpenCV REQUIRED)
FIND_PACKAGE(nlohmann_json 3.2.0 REQUIRED)
FIND_PACKAGE(Boost COMPONENTS program_options REQUIRED)
INCLUDE_DIRECTORIES(${OpenCV_INCLUDE_DIRS})
INCLUDE_DIRECTORIES(${Boost_INCLUDE_DIR})
ADD_EXECUTABLE(video_analyser ./src/main.cpp)
TARGET_LINK_LIBRARIES(analyser_lib
${OpenCV_LIBRARIES}
nlohmann_json::nlohmann_json
${Boost_PROGRAM_OPTIONS_LIBRARY}
)
TARGET_LINK_LIBRARIES(video_analyser
${OpenCV_LIBRARIES}
nlohmann_json::nlohmann_json
${Boost_PROGRAM_OPTIONS_LIBRARY}
analyser_lib
files
)
# enable_testing()
# ADD_EXECUTABLE(
# test_suite
# tests/irregularity_test.cpp
# tests/enums_test.cpp
# )
# TARGET_LINK_LIBRARIES(
# test_suite
# GTest::gtest_main
# analyser_lib
# ${OpenCV_LIBRARIES}
# ${Boost_PROGRAM_OPTIONS_LIBRARY}
# nlohmann_json::nlohmann_json
# )
# include(GoogleTest)
# gtest_discover_tests(test_suite)
......@@ -30,7 +30,7 @@ clean:
rm -rf docs/latex
format:
$(FORMATTER) $(FORMAT_OPT) src/*.cpp src/*.h src/lib/*.cpp src/lib/*.h tests/*.cpp
$(FORMATTER) $(FORMAT_OPT) src/*.cpp src/*.h src/lib/*.cpp src/lib/*.hpp tests/*.cpp src/*.hpp
docs:
$(DOCS_GEN) docs/Doxyfile && $(OPEN) docs/html/index.html
......
......@@ -6,59 +6,59 @@
#include <opencv2/imgcodecs.hpp>
#include <opencv2/imgproc.hpp>
#include "lib/time.hpp"
using namespace cv;
using namespace std;
using json = nlohmann::json;
namespace fs = std::filesystem;
void extractIrregularityImagesForAudio(std::string outputPath, const std::string videoPath, json irregularityFileInput,
json &irregularityFileOutput2) {
const string G_IMG_FOLDER_PATH = "fromAudioAnalyser";
void extract_irregularity_images_for_audio(std::string output_path, const std::string video_path,
json irregularity_file_input, json &irregularity_file_output) {
// Make fromAudioAnalyser folder
int capsDirectory = fs::create_directory(outputPath + "fromAudioAnalyser/");
int caps_directory = fs::create_directory(output_path + G_IMG_FOLDER_PATH + "/");
// Open video
cv::VideoCapture videoCapture(videoPath);
cv::VideoCapture videoCapture(video_path);
// Compute video length in milliseconds
int frameCount = videoCapture.get(CAP_PROP_FRAME_COUNT);
int fps = videoCapture.get(CAP_PROP_FPS);
int videoLenghtMS = (frameCount / fps) * 1000 + std::round((float)((frameCount % fps) * 1000) / fps);
for (int i = 0; i < irregularityFileInput["Irregularities"].size(); i++) {
for (int i = 0; i < irregularity_file_input["Irregularities"].size(); i++) {
// Declare output image frame
cv::Mat frame;
std::string framePath;
std::string frame_path;
// Extract TimeLabel from input JSON
std::string timeLabel = irregularityFileInput["Irregularities"][i]["TimeLabel"];
std::string time_label = irregularity_file_input["Irregularities"][i]["TimeLabel"];
// Obtain time measures from JSON
int h = stoi(timeLabel.substr(0, 2));
int min = stoi(timeLabel.substr(3, 2));
int sec = stoi(timeLabel.substr(6, 2));
int ms = stoi(timeLabel.substr(9, 3));
std::string safeTimeLabel = timeLabel;
safeTimeLabel[2] = '-';
safeTimeLabel[5] = '-';
safeTimeLabel[8] = '-';
int h = stoi(time_label.substr(0, 2));
int min = stoi(time_label.substr(3, 2));
int sec = stoi(time_label.substr(6, 2));
int ms = stoi(time_label.substr(9, 3));
// Compute the Irregularity instant in milliseconds
int irrInstMS = ms + sec * 1000 + min * 60000 + h * 3600000;
int irr_time_in_ms = ms + sec * 1000 + min * 60000 + h * 3600000;
std::string safe_time_label = getTimeLabel(irr_time_in_ms, "-");
// Compute the frame number corresponding to the Irregularity
int irrFrame = std::round((float)(irrInstMS / 1000) * fps);
int irr_frame = std::round((float)(irr_time_in_ms / 1000) * fps);
try {
framePath = outputPath + "fromAudioAnalyser/AudioIrregularity_" + safeTimeLabel + ".jpg";
videoCapture.set(CAP_PROP_POS_FRAMES, irrFrame);
frame_path = output_path + G_IMG_FOLDER_PATH + "/AudioIrregularity_" + safe_time_label + ".jpg";
videoCapture.set(CAP_PROP_POS_FRAMES, irr_frame);
videoCapture >> frame;
cv::imwrite(framePath, frame);
cv::imwrite(frame_path, frame);
// Append Irregularity information to JSON
boost::uuids::uuid uuid = boost::uuids::random_generator()();
irregularityFileOutput2["Irregularities"] +=
{{"IrregularityID", irregularityFileInput["Irregularities"][i]["IrregularityID"]},
irregularity_file_output["Irregularities"] +=
{{"IrregularityID", irregularity_file_input["Irregularities"][i]["IrregularityID"]},
{"Source", "a"},
{"TimeLabel", timeLabel},
{"ImageURI", framePath}};
{"TimeLabel", time_label},
{"ImageURI", frame_path}};
} catch (cv::Exception e) {
std::cout << "\033[0;31mTimeLabel error for Audio Analyser Irregularity " << i << "." << std::endl;
}
......
#include "Irregularity.h"
#include <boost/lexical_cast.hpp>
#include <boost/uuid/uuid.hpp>
#include <boost/uuid/uuid_generators.hpp>
#include <boost/uuid/uuid_io.hpp>
#include "Irregularity.hpp"
Irregularity::Irregularity(const Irregularity& other)
: id(other.id), source(other.source), time_label(other.time_label), type(other.type) {}
......
/**
* @file Irregularity.h
* @file Irregularity.hpp
* @author Matteo Spanio (dev2@audioinnova.com)
* @brief Header file containing the Irregularity class
* @version 1.0
......@@ -10,10 +10,13 @@
*/
#ifndef IRREGULARITY_H
#define IRREGULARITY_H
#include <boost/lexical_cast.hpp>
#include <boost/uuid/uuid.hpp>
#include <boost/uuid/uuid_generators.hpp>
#include <boost/uuid/uuid_io.hpp>
#include <nlohmann/json.hpp>
#include "enums.h"
#include "enums.hpp"
using std::string;
using json = nlohmann::json;
......
#include "IrregularityFile.h"
#include <algorithm>
#include <exception>
#include <iterator>
#include <memory>
#include "IrregularityFile.hpp"
IrregularityFile::IrregularityFile(std::optional<uint16_t> offset) : offset_(offset) {}
......
/**
* @file IrregularityFile.h
* @file IrregularityFile.hpp
* @author Matteo Spanio (dev2@audioinnova.com)
* @brief Header file containing the IrregularityFile class
* @version 1.0
......@@ -11,11 +11,15 @@
#ifndef IRREGULARITY_FILE_H
#define IRREGULARITY_FILE_H
#include <algorithm>
#include <exception>
#include <iterator>
#include <memory>
#include <nlohmann/json.hpp>
#include <optional>
#include <vector>
#include "Irregularity.h"
#include "Irregularity.hpp"
using json = nlohmann::json;
......
#include "TimeLabel.h"
#include "TimeLabel.hpp"
TimeLabel::TimeLabel(/* args */) {}
......
/**
* @file colors.h
* @file colors.hpp
* @author Matteo Spanio (dev2@audioinnova.com)
* @brief Header file containing a set of ANSI escape codes to print colored
* text in the terminal.
......@@ -17,20 +17,20 @@
#ifndef COLORS_H
#define COLORS_H
#include <stdlib.h>
#include <string>
namespace colors {
using Color = const char*;
constexpr Color PURPLE = "\033[95m";
constexpr Color CYAN = "\033[96m";
constexpr Color DARK_CYAN = "\033[36m";
constexpr Color BLUE = "\033[94m";
constexpr Color GREEN = "\033[92m";
constexpr Color YELLOW = "\033[93m";
constexpr Color RED = "\033[91m";
constexpr Color WHITE = "\033[97m";
constexpr Color BOLD = "\033[1m";
constexpr Color UNDERLINE = "\033[4m";
constexpr Color END = "\033[0m";
} // namespace colors
using std::string;
string PURPLE = "\033[95m";
string CYAN = "\033[96m";
string DARK_CYAN = "\033[36m";
string BLUE = "\033[94m";
string GREEN = "\033[92m";
string YELLOW = "\033[93m";
string RED = "\033[91m";
string BOLD = "\033[1m";
string UNDERLINE = "\033[4m";
string END = "\033[0m";
#endif // COLORS_H
\ No newline at end of file
#include "core.hpp"
namespace videoanalyser {
namespace core {
Frame::Frame() : cv::Mat() {}
Frame::Frame(const cv::Mat& m) : cv::Mat(m) {}
Frame::Frame(const Frame& f) : cv::Mat(f) {}
Frame& Frame::operator=(const Mat& m) {
Mat::operator=(m);
return *this;
}
Frame& Frame::operator=(const Frame& f) {
Mat::operator=(f);
return *this;
}
Frame Frame::clone() const { return Frame(cv::Mat::clone()); }
Frame& Frame::downsample(int factor) {
cv::pyrDown(*this, *this, cv::Size(size().width / factor, size().height / factor));
return *this;
}
Frame& Frame::convert_color(int code) {
cv::cvtColor(*this, *this, code);
return *this;
}
Frame Frame::difference(Frame& f) {
Frame diff = this->clone();
for (int i = 0; i < this->rows; i++) {
for (int j = 0; j < this->cols; j++) {
if (f.at<cv::Vec3b>(i, j)[0] != this->at<cv::Vec3b>(i, j)[0] ||
f.at<cv::Vec3b>(i, j)[1] != this->at<cv::Vec3b>(i, j)[1] ||
f.at<cv::Vec3b>(i, j)[2] != this->at<cv::Vec3b>(i, j)[2]) {
// Different pixels
diff.at<cv::Vec3b>(i, j)[0] = 0;
} else {
// Identical pixels
diff.at<cv::Vec3b>(i, j)[0] = 255;
}
}
}
return diff;
}
Frame& Frame::crop(cv::Size rect_size, cv::Point2f center) {
cv::getRectSubPix(*this, rect_size, center, *this);
return *this;
}
Frame& Frame::warp(cv::Mat rotationMatrix) {
cv::warpAffine(*this, *this, rotationMatrix, this->size(), cv::INTER_CUBIC);
return *this;
}
std::pair<Frame, Frame> Frame::deinterlace() const {
Frame odd_frame(cv::Mat(this->rows / 2, this->cols, CV_8UC3));
Frame even_frame(cv::Mat(this->rows / 2, this->cols, CV_8UC3));
int i_odd_frame = 0;
int i_even_frame = 0;
for (int i = 0; i < this->rows; i++) {
for (int j = 0; j < this->cols; j++) {
if (i % 2 == 0) {
even_frame.at<cv::Vec3b>(i_even_frame, j)[0] = this->at<cv::Vec3b>(i, j)[0];
even_frame.at<cv::Vec3b>(i_even_frame, j)[1] = this->at<cv::Vec3b>(i, j)[1];
even_frame.at<cv::Vec3b>(i_even_frame, j)[2] = this->at<cv::Vec3b>(i, j)[2];
} else {
odd_frame.at<cv::Vec3b>(i_odd_frame, j)[0] = this->at<cv::Vec3b>(i, j)[0];
odd_frame.at<cv::Vec3b>(i_odd_frame, j)[1] = this->at<cv::Vec3b>(i, j)[1];
odd_frame.at<cv::Vec3b>(i_odd_frame, j)[2] = this->at<cv::Vec3b>(i, j)[2];
}
}
if (i % 2 == 0) {
i_even_frame++;
} else {
i_odd_frame++;
}
}
return std::make_pair(odd_frame, even_frame);
}
} // namespace core
} // namespace videoanalyser
\ No newline at end of file
/**
* @file core.hpp
* @author Matteo Spanio (dev2@audioinnova.com)
* @brief This file contains the core functionalities of the project.
* @version 1.2
* @date 2023-06-03
*
* @copyright Copyright (c) 2023
*
*/
#ifndef CORE_H
#define CORE_H
#include <opencv2/calib3d.hpp>
#include <opencv2/core/core.hpp>
#include <opencv2/features2d.hpp>
#include <opencv2/imgcodecs.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/xfeatures2d.hpp>
#include <optional>
#include <string>
#include <tuple>
#include <variant>
#include <vector>
namespace videoanalyser {
using Error = std::string;
template <typename T>
using Result = std::variant<T, Error>;
namespace core {
/**
* @class Frame
* @brief Class that extends the OpenCV Mat class, adding some useful methods
* frequently used in the project.
*
*/
class Frame : public cv::Mat {
public:
Frame();
Frame(const cv::Mat& m);
Frame(const Frame& f);
Frame& operator=(const cv::Mat& m);
Frame& operator=(const Frame& f);
Frame clone() const;
/**
* @brief Downsample the image by a given factor.
*
* @param factor The factor by which the image will be downsampled.
* @return Frame& The downsampled image.
*/
Frame& downsample(int factor);
/**
* @brief Convert the image to a given color space.
*
* @param code The code of the color space to which the image will be
* converted.
* @return Frame& The converted image.
*/
Frame& convert_color(int code);
/**
* @brief Compute the number of different pixels between two frames.
*
* @param f The frame to compare with.
* @return A black and white frame, where black pixels represent a
* difference, while white pixels represent an equality.
*/
Frame difference(Frame& f);
/**
* @brief Crop the image to a given size, centered in a given point.
*
* @param rect_size The size of the cropped image.
* @param center The center of the cropped image.
* @return Frame& The cropped image.
*/
Frame& crop(cv::Size rect_size, cv::Point2f center);
/**
* @brief Warp the image using a given rotation matrix.
*
* @param rotationMatrix The rotation matrix used to warp the image.
* @return Frame& The warped image.
*/
Frame& warp(cv::Mat rotationMatrix);
/**
* @brief Deinterlace the image, returning two images, one containing the
* odd lines and the other containing the even lines.
*
* @return std::pair<Frame, Frame> The two images containing the odd and
* even lines.
*/
std::pair<Frame, Frame> deinterlace() const;
};
} // namespace core
} // namespace videoanalyser
#endif // CORE_H
\ No newline at end of file
#include "detection.hpp"
namespace videoanalyser {
namespace detection {
using namespace cv;
namespace {
cv::RotatedRect get_rectangle_from_match(const cv::Vec4f& positions, int width, int height, int offsetX, int offsetY,
float processingScale) {
cv::RotatedRect rr;
cv::Point2f rrpts[4];
cv::Point2f pos(positions[0] + offsetX, positions[1] + offsetY);
float scale = positions[2];
float angle = positions[3];
rr.center = pos * processingScale;
rr.size = cv::Size2f(width * scale * processingScale, height * scale * processingScale);
rr.angle = angle;
rr.points(rrpts);
return rr;
}
using ShapeMatch = std::tuple<std::vector<cv::Vec4f>, std::vector<cv::Vec4f>, cv::Mat, cv::Mat>;
ShapeMatch detect_shape(cv::Ptr<cv::GeneralizedHoughGuil> alg, int pos_thresh, cv::Mat processing_area) {
cv::Mat positive_votes, negative_votes;
std::vector<cv::Vec4f> positive_positions, negative_positions;
alg->setPosThresh(pos_thresh);
int num_prev_matches = 0;
int threshold_increment = 0;
int max_match_score = 0;
// Process shapes with positive angles
alg->setMinAngle(0);
alg->setMaxAngle(3);
while (true) {
alg->detect(processing_area, positive_positions, positive_votes);
int current_matches = positive_positions.size();
if (current_matches == 1 || (current_matches == 0 && num_prev_matches == 0)) {
// We detected the most interesting shape
// Impossible to find with these parameters
break;
} else if (current_matches == 0 && num_prev_matches > 0) {
// It is not possible to detect only one shape with the current
// parameters
alg->setPosThresh(pos_thresh + threshold_increment - 1); // Decrease position value
alg->detect(processing_area, positive_positions,
positive_votes); // Detect all available shapes
break;
}
num_prev_matches = current_matches;
// Find maximum vote
for (int j = 0; j < positive_votes.cols / 3; j++) {
if (positive_votes.at<int>(3 * j) > max_match_score) max_match_score = positive_votes.at<int>(3 * j);
}
if (current_matches > 10) {
threshold_increment += 5; // To speed up computation when there are too many matches
} else if (max_match_score - (pos_thresh + threshold_increment) > 100) {
threshold_increment += 100; // To speed up computation when there are few super high
// matches
} else {
threshold_increment++;
}
alg->setPosThresh(pos_thresh + threshold_increment);
}
// Reset incremental position value
threshold_increment = 0;
num_prev_matches = 0;
max_match_score = 0;
// Process shapes with negative angles
alg->setMinAngle(357);
alg->setMaxAngle(360);
while (true) {
alg->detect(processing_area, negative_positions, negative_votes);
int current_matches = negative_positions.size();
if (current_matches == 1 || (current_matches == 0 && num_prev_matches == 0)) {
// We detected the most interesting shape
// Impossible to found with these parameters
break;
} else if (current_matches == 0 && num_prev_matches > 0) {
// It is not possible to detect only one shape with the current
// parameters
alg->setPosThresh(pos_thresh + threshold_increment - 1); // Decrease position value
alg->detect(processing_area, negative_positions,
negative_votes); // Detect all available shapes
break;
}
num_prev_matches = current_matches;
// Find maximum vote
for (int j = 0; j < positive_votes.cols / 3; j++) {
if (positive_votes.at<int>(3 * j) > max_match_score) max_match_score = positive_votes.at<int>(3 * j);
}
if (current_matches > 10) {
threshold_increment += 5; // To speed up computation when there are too many matches
} else if (max_match_score - (pos_thresh + threshold_increment) > 100) {
threshold_increment += 100; // To speed up computation when there are few super high
// matches
} else {
threshold_increment++;
}
alg->setPosThresh(pos_thresh + threshold_increment);
}
return std::make_tuple(positive_positions, negative_positions, positive_votes, negative_votes);
}
Result<core::Frame> get_template_image(ElementType element_type) {
switch (element_type) {
case ElementType::TAPE:
return core::Frame(cv::imread("input/readingHead.png", cv::IMREAD_GRAYSCALE));
case ElementType::CAPSTAN:
return core::Frame(cv::imread("input/capstanBERIO058prova.png", cv::IMREAD_GRAYSCALE));
default:
return Error("Invalid element type");
}
}
/**
* @fn std::tuple<int, int, double, double, vector<Vec4f>, vector<Vec4f>>
* find_object(Mat model, SceneObject object)
* @brief Find the model in the scene using the Generalized Hough Transform.
* It returns the best matches. Find the best matches for positive and negative
* angles. If there are more than one shape, then choose the one with the
* highest score. If there are more than one with the same highest score, then
* arbitrarily choose the latest.
*
* For informations about the Generalized Hough Guild usage see the tutorial
* at https://docs.opencv.org/4.7.0/da/ddc/tutorial_generalized_hough_ballard_guil.html
*
* @param model the template image to be searched with the Generalized Hough
* Transform
* @param object the sceneObject struct containing the parameters for the
* Generalized Hough Transform
* @return std::tuple<int, int, double, double, vector<Vec4f>, vector<Vec4f>> a
* tuple containing the best matches for positive and negative angles
*/
Result<Roi> find_roi_ght(core::Frame image, SceneElement element_to_find) {
// Save a grayscale version of image in gray_image
core::Frame gray_image = core::Frame(image).convert_color(cv::COLOR_BGR2GRAY);
// downsample the frame in half pixels for performance reasons
core::Frame halved_gray_image = core::Frame(gray_image).clone().downsample(2);
// Get input shape in grayscale and downsample it in half pixels
Result<core::Frame> template_image_result = get_template_image(element_to_find.type);
if (std::holds_alternative<Error>(template_image_result)) {
return Error("Error while loading template image:" + std::get<Error>(template_image_result));
}
core::Frame template_image = std::get<core::Frame>(template_image_result).downsample(2);
cv::Ptr<cv::GeneralizedHoughGuil> ght = cv::createGeneralizedHoughGuil();
ght->setMinDist(element_to_find.min_dist);
ght->setLevels(360);
ght->setDp(2);
ght->setMaxBufferSize(1000);
ght->setAngleStep(1);
ght->setAngleThresh(element_to_find.threshold.angle);
ght->setMinScale(0.9);
ght->setMaxScale(1.1);
ght->setScaleStep(0.01);
ght->setScaleThresh(element_to_find.threshold.scale);
ght->setCannyLowThresh(150);
ght->setCannyHighThresh(240);
ght->setTemplate(template_image);
cv::Rect processing_area;
cv::Mat processing_image;
if (element_to_find.type == ElementType::TAPE) {
processing_area = cv::Rect(halved_gray_image.cols / 4, halved_gray_image.rows / 2, halved_gray_image.cols / 2,
halved_gray_image.rows / 2);
processing_image = halved_gray_image(processing_area);
} else if (element_to_find.type == ElementType::CAPSTAN) {
processing_area = cv::Rect(image.cols * 3 / 4, image.rows / 2, image.cols / 4, image.rows / 2);
processing_image = gray_image(processing_area);
}
auto [positive_positions, negative_positions, posPos, posNeg] =
detect_shape(ght, element_to_find.threshold.pos, processing_image);
double max_score_for_positive_match = 0, max_score_for_negative_match = 0;
int index_max_positive_score = 0, index_max_negative_score = 0;
cv::Mat positive_matches_scores = posPos;
cv::Mat negative_matches_scores = posNeg;
for (int i = 0; i < positive_matches_scores.size().width; i++) {
if (positive_matches_scores.at<int>(i) >= max_score_for_positive_match) {
max_score_for_positive_match = positive_matches_scores.at<int>(i);
index_max_positive_score = i;
}
}
for (int i = 0; i < negative_matches_scores.size().width; i++) {
if (negative_matches_scores.at<int>(i) >= max_score_for_negative_match) {
max_score_for_negative_match = negative_matches_scores.at<int>(i);
index_max_negative_score = i;
}
}
cv::RotatedRect roi_pos;
cv::RotatedRect roi_neg;
if (element_to_find.type == ElementType::TAPE) {
if (positive_positions.size() > 0) {
roi_pos = get_rectangle_from_match(positive_positions[index_max_positive_score], template_image.cols,
template_image.rows, halved_gray_image.cols / 4,
halved_gray_image.rows / 2, 2);
}
if (negative_positions.size() > 0) {
roi_neg = get_rectangle_from_match(negative_positions[index_max_negative_score], template_image.cols,
template_image.rows, halved_gray_image.cols / 4,
halved_gray_image.rows / 2, 2);
}
} else if (element_to_find.type == ElementType::CAPSTAN) {
if (positive_positions.size() > 0) {
roi_pos =
get_rectangle_from_match(positive_positions[index_max_positive_score], template_image.cols - 22,
template_image.rows - 92, image.cols * 3 / 4 + 11, image.rows / 2 + 46, 1);
}
if (negative_positions.size() > 0) {
roi_neg =
get_rectangle_from_match(negative_positions[index_max_negative_score], template_image.cols - 22,
template_image.rows - 92, image.cols * 3 / 4 + 11, image.rows / 2 + 46, 1);
}
}
cv::RotatedRect result;
if (max_score_for_positive_match > 0) {
if (max_score_for_negative_match > 0) {
result = max_score_for_positive_match > max_score_for_negative_match ? roi_pos : roi_neg;
} else {
result = roi_pos;
}
} else if (max_score_for_negative_match > 0) {
result = roi_neg;
} else {
return Error("No match found");
}
if (element_to_find.type == ElementType::TAPE) {
cv::Vec4f tape_position(result.center.x,
result.center.y + result.size.height / 2 + 20 * (result.size.width / 200), 1,
result.angle);
result = get_rectangle_from_match(tape_position, result.size.width, 50 * (result.size.width / 200), 0, 0, 1);
}
return result;
}
Result<Roi> find_roi_surf(core::Frame image, SceneElement element_to_find) {
// Step 1: Detect the keypoints using SURF Detector, compute the
// descriptors
int min_hessian = 100;
Ptr<xfeatures2d::SURF> detector = xfeatures2d::SURF::create(min_hessian);
std::vector<cv::KeyPoint> keypoints_object, keypoints_scene;
cv::Mat descriptors_object, descriptors_scene;
// Save a grayscale version of image in gray_image
core::Frame gray_image = core::Frame(image).convert_color(cv::COLOR_BGR2GRAY);
// downsample the frame in half pixels for performance reasons
core::Frame halved_gray_image = core::Frame(gray_image).clone().downsample(2);
Result<core::Frame> template_image_result = get_template_image(element_to_find.type);
if (std::holds_alternative<Error>(template_image_result)) {
return Error("Error while loading template image:" + std::get<Error>(template_image_result));
}
core::Frame template_image = std::get<core::Frame>(template_image_result);
detector->detectAndCompute(template_image, cv::noArray(), keypoints_object, descriptors_object);
detector->detectAndCompute(gray_image, cv::noArray(), keypoints_scene, descriptors_scene);
// Step 2: Matching descriptor vectors with a FLANN based matcher
// Since SURF is a floating-point descriptor NORM_L2 is used
cv::Ptr<cv::DescriptorMatcher> matcher = cv::DescriptorMatcher::create(cv::DescriptorMatcher::FLANNBASED);
std::vector<std::vector<cv::DMatch>> knn_matches;
matcher->knnMatch(descriptors_object, descriptors_scene, knn_matches, 2);
//-- Filter matches using the Lowe's ratio test
const float RATIO_THRESH = 0.75f;
std::vector<cv::DMatch> good_matches;
for (size_t i = 0; i < knn_matches.size(); i++) {
if (knn_matches[i][0].distance < RATIO_THRESH * knn_matches[i][1].distance) {
good_matches.push_back(knn_matches[i][0]);
}
}
// Draw matches
cv::Mat img_matches;
cv::drawMatches(template_image, keypoints_object, halved_gray_image, keypoints_scene, good_matches, img_matches,
cv::Scalar::all(-1), cv::Scalar::all(-1), std::vector<char>(),
cv::DrawMatchesFlags::NOT_DRAW_SINGLE_POINTS);
// Localize the object
std::vector<cv::Point2f> obj;
std::vector<cv::Point2f> scene;
for (size_t i = 0; i < good_matches.size(); i++) {
// Get the keypoints from the good matches
obj.push_back(keypoints_object[good_matches[i].queryIdx].pt);
scene.push_back(keypoints_scene[good_matches[i].trainIdx].pt);
}
cv::Mat H = cv::findHomography(obj, scene, cv::RANSAC);
// Get the corners from the image_1 ( the object to be "detected" )
std::vector<cv::Point2f> obj_corners(4);
obj_corners[0] = cv::Point2f(0, 0);
obj_corners[1] = cv::Point2f((float)template_image.cols, 0);
obj_corners[2] = cv::Point2f((float)template_image.cols, (float)template_image.rows);
obj_corners[3] = cv::Point2f(0, (float)template_image.rows);
std::vector<cv::Point2f> scene_corners(4);
cv::perspectiveTransform(obj_corners, scene_corners, H);
// Find average
float capstanX = (scene_corners[0].x + scene_corners[1].x + scene_corners[2].x + scene_corners[3].x) / 4;
float capstanY = (scene_corners[0].y + scene_corners[1].y + scene_corners[2].y + scene_corners[3].y) / 4;
// In the following there are two alterations to cut the first 20
// horizontal pixels and the first 90 vertical pixels from the found
// rectangle: +10 in X for centering and -20 in width +45 in Y for
// centering and -90 in height
cv::Vec4f positionCapstan(capstanX + 10, capstanY + 45, 1, 0);
return get_rectangle_from_match(positionCapstan, template_image.cols - 20, template_image.rows - 90, 0, 0, 1);
}
} // anonymous namespace
Result<Roi> find_roi(core::Frame image, Algorithm algorithm, SceneElement element_to_find) {
switch (algorithm) {
case Algorithm::GHT:
return find_roi_ght(image, element_to_find);
case Algorithm::SURF:
return find_roi_surf(image, element_to_find);
default:
return Error("Invalid algorithm");
}
}
} // namespace detection
} // namespace videoanalyser
\ No newline at end of file
/**
* @file detection.hpp
* @author Matteo Spanio (dev2@audioinnova.com)
* @brief This file contains the functions used to detect the objects in a scene.
* @version 1.2
* @date 2023-06-04
*
* @copyright Copyright (c) 2023
*
*/
#ifndef VIDEOANALYSER_DETECTION_H
#define VIDEOANALYSER_DETECTION_H
#include <opencv2/calib3d.hpp>
#include <opencv2/core/core.hpp>
#include <opencv2/features2d.hpp>
#include <opencv2/imgcodecs.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/xfeatures2d.hpp>
#include <variant>
#include <vector>
#include "core.hpp"
namespace videoanalyser {
namespace detection {
enum class ElementType { TAPE, CAPSTAN, READING_HEAD };
enum class Algorithm { GHT, SURF };
/**
* @struct DetectionThreshold
* @brief Struct containing the threshold values used to detect a shape.
*/
struct DetectionThreshold {
float percentual; /**< The minimum percentage of different pixels for
considering the current frame under the ROI as a
potential Irregularity */
int angle; /**< The angle votes threshold for the detection of the object */
int scale; /**< The scale votes threshold for the detection of the object */
int pos; /**< The position votes threshold for the detection of the object
*/
};
/**
* @typedef Roi
* @brief The region of interest of a scene. It is a pair containing the
* coordinates of the top-left and bottom-right corners of the ROI and the
* type of the object in the ROI.
*/
using Roi = cv::RotatedRect;
/**
* @struct SceneElement
* @brief A scene element is an object that can be detected in a scene, such as a
* tape or a capstan.
*
*/
struct SceneElement {
ElementType type; /**< The type of the object */
int min_dist; /**< The minimum distance between the centers of the detected
objects for the detection of the reading head */
DetectionThreshold threshold; /**< the threshold values used to detect the object */
};
/**
* @fn Result<Roi> find_roi(core::Frame image, Algorithm algorithm, SceneElement element_to_find)
* @brief Looks for a shape in a frame.
*
* Find a shape in a frame using the specified algorithm.
*
* @param image The frame in which the object will be searched.
* @param algorithm The algorithm to use for the detection.
* @param element_to_find The object to find.
* @return Result<Roi<T>> A Result object containing the ROI of the object if found, otherwise an error.
*/
Result<Roi> find_roi(core::Frame image, Algorithm algorithm, SceneElement element_to_find);
} // namespace detection
} // namespace videoanalyser
#endif // VIDEOANALYSER_DETECTION_H
\ No newline at end of file
#include "enums.h"
#include <stdexcept>
#include "enums.hpp"
std::string sourceToString(Source source) {
switch (source) {
case Audio:
case Source::Audio:
return "a";
case Video:
case Source::Video:
return "v";
case Both:
case Source::Both:
return "b";
default:
throw std::invalid_argument("Invalid Source");
......@@ -17,44 +15,44 @@ std::string sourceToString(Source source) {
Source sourceFromString(std::string source) {
if (source == "a")
return Audio;
return Source::Audio;
else if (source == "v")
return Video;
return Source::Video;
else if (source == "b")
return Both;
return Source::Both;
else
throw std::invalid_argument("Invalid Source");
}
std::string irregularityTypeToString(IrregularityType type) {
switch (type) {
case BRANDS_ON_TAPE:
case IrregularityType::BRANDS_ON_TAPE:
return "b";
case SPLICE:
case IrregularityType::SPLICE:
return "sp";
case START_OF_TAPE:
case IrregularityType::START_OF_TAPE:
return "sot";
case ENDS_OF_TAPE:
case IrregularityType::ENDS_OF_TAPE:
return "eot";
case DAMAGED_TAPE:
case IrregularityType::DAMAGED_TAPE:
return "da";
case DIRT:
case IrregularityType::DIRT:
return "di";
case MARKS:
case IrregularityType::MARKS:
return "m";
case SHADOWS:
case IrregularityType::SHADOWS:
return "s";
case WOW_AND_FLUTTER:
case IrregularityType::WOW_AND_FLUTTER:
return "wf";
case PLAY_PAUSE_STOP:
case IrregularityType::PLAY_PAUSE_STOP:
return "pps";
case SPEED:
case IrregularityType::SPEED:
return "ssv";
case EQUALIZATION:
case IrregularityType::EQUALIZATION:
return "esv";
case SPEED_AND_EQUALIZATION:
case IrregularityType::SPEED_AND_EQUALIZATION:
return "ssv";
case BACKWARD:
case IrregularityType::BACKWARD:
return "sb";
default:
throw std::invalid_argument("Invalid IrregularityType");
......@@ -63,33 +61,33 @@ std::string irregularityTypeToString(IrregularityType type) {
IrregularityType irregularityTypeFromString(std::string type) {
if (type == "b")
return BRANDS_ON_TAPE;
return IrregularityType::BRANDS_ON_TAPE;
else if (type == "sp")
return SPLICE;
return IrregularityType::SPLICE;
else if (type == "sot")
return START_OF_TAPE;
return IrregularityType::START_OF_TAPE;
else if (type == "eot")
return ENDS_OF_TAPE;
return IrregularityType::ENDS_OF_TAPE;
else if (type == "da")
return DAMAGED_TAPE;
return IrregularityType::DAMAGED_TAPE;
else if (type == "di")
return DIRT;
return IrregularityType::DIRT;
else if (type == "m")
return MARKS;
return IrregularityType::MARKS;
else if (type == "s")
return SHADOWS;
return IrregularityType::SHADOWS;
else if (type == "wf")
return WOW_AND_FLUTTER;
return IrregularityType::WOW_AND_FLUTTER;
else if (type == "pps")
return PLAY_PAUSE_STOP;
return IrregularityType::PLAY_PAUSE_STOP;
else if (type == "ssv")
return SPEED;
return IrregularityType::SPEED;
else if (type == "esv")
return EQUALIZATION;
return IrregularityType::EQUALIZATION;
else if (type == "ssv")
return SPEED_AND_EQUALIZATION;
return IrregularityType::SPEED_AND_EQUALIZATION;
else if (type == "sb")
return BACKWARD;
return IrregularityType::BACKWARD;
else
throw std::invalid_argument("Invalid IrregularityType");
}
\ No newline at end of file
/**
* @file enums.h
* @file enums.hpp
* @author Matteo Spanio (dev2@audioinnova.com)
* @brief A collection of enums and functions to handle them.
* @version 1.0
......@@ -19,6 +19,7 @@
*/
#ifndef ENUMS_H
#define ENUMS_H
#include <stdexcept>
#include <string>
/**
......@@ -29,7 +30,7 @@
* both.
*
*/
enum Source { Audio, Video, Both };
enum class Source { Audio, Video, Both };
/**
* @enum IrregularityType
......@@ -57,7 +58,7 @@ enum Source { Audio, Video, Both };
* analyser.
*
*/
enum IrregularityType {
enum class IrregularityType {
BRANDS_ON_TAPE,
SPLICE,
START_OF_TAPE,
......
#include "files.h"
#include <iostream>
#include "files.hpp"
using std::cout, std::endl, std::cerr, std::ofstream, std::ios;
......
/**
* @file files.h
* @file files.hpp
* @author Matteo Spanio (dev2@audioinnova.com)
* @brief A collection of functions to handle files.
* @version 1.0
......
#include "io.hpp"
namespace videoanalyser {
namespace io {
void pprint(std::string msg, Color color) { std::cout << color << msg << END << std::endl; }
void print_error_and_exit(std::string msg) {
std::cerr << RED << msg << END << std::endl;
exit(EXIT_FAILURE);
}
} // namespace io
} // namespace videoanalyser
\ No newline at end of file
/**
* @file io.hpp
* @author Matteo Spanio (dev2@audioinnova.com)
* @brief Header file containing a set of functions related to the input/output
* @version 1.2
* @date 2023-06-03
*
* @copyright Copyright (c) 2023
*
*/
#ifndef PRETTYIO_H
#define PRETTYIO_H
#include <iostream>
#include <string>
#include "colors.hpp"
using namespace colors;
namespace videoanalyser {
/**
* @namespace io
* @brief Namespace containing all the functions related to the input/output
* operations.
*/
namespace io {
/**
* @fn void pprint(std::string msg, Color color = WHITE)
* @brief Print a colored message in the terminal.
*
* @param msg The message to print.
* @param color The color of the message. Default is WHITE.
*/
void pprint(std::string msg, Color color = WHITE);
/**
* @fn void print_error_and_exit(std::string msg)
* @brief Print a colored message in the terminal and exit the program.
*
* @param msg The message to print.
*/
void print_error_and_exit(std::string msg);
} // namespace io
} // namespace videoanalyser
#endif // PRETTYIO_H
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment