Commits (2)
# EditorConfig is awesome: https://EditorConfig.org
# top-most EditorConfig file
root = true
[*]
indent_style = space
indent_size = 4
end_of_line = lf
charset = utf-8
trim_trailing_whitespace = false
insert_final_newline = false
\ No newline at end of file
......@@ -6,4 +6,6 @@ bin/
build/
log.txt
venv/
__pycache__/
\ No newline at end of file
__pycache__/
docs/html
docs/latex
\ No newline at end of file
[submodule "docs/doxygen-awesome-css"]
path = docs/doxygen-awesome-css
url = https://github.com/jothepro/doxygen-awesome-css.git
CMAKE_MINIMUM_REQUIRED(VERSION 3.2)
PROJECT(video_analyser)
SET(EXECUTABLE_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/bin)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
SET(CMAKE_CXX_STANDARD 23)
# include(FetchContent)
# FetchContent_Declare(
# googletest
# URL https://github.com/google/googletest/archive/03597a01ee50ed33e9dfd640b249b4be3799d395.zip
# )
# # For Windows: Prevent overriding the parent project's compiler/linker settings
# SET(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
# FetchContent_MakeAvailable(googletest)
LINK_DIRECTORIES(/usr/local/lib)
add_library(analyser_lib
src/lib/colors.h
src/lib/time.cpp
src/lib/time.h
src/lib/enums.h
src/lib/enums.cpp
src/lib/Irregularity.h
src/lib/Irregularity.cpp
src/lib/IrregularityFile.h
src/lib/IrregularityFile.cpp
src/lib/TimeLabel.h
src/lib/TimeLabel.cpp
src/utility.h
src/utility.cpp
)
add_library(files
src/lib/files.h
src/lib/files.cpp
)
FIND_PACKAGE(OpenCV REQUIRED)
FIND_PACKAGE(nlohmann_json 3.2.0 REQUIRED)
FIND_PACKAGE(Boost COMPONENTS program_options REQUIRED)
INCLUDE_DIRECTORIES(${OpenCV_INCLUDE_DIRS})
INCLUDE_DIRECTORIES(${Boost_INCLUDE_DIR})
ADD_EXECUTABLE(video_analyser ./src/main.cpp)
TARGET_LINK_LIBRARIES(video_analyser
${OpenCV_LIBRARIES}
nlohmann_json::nlohmann_json
${Boost_PROGRAM_OPTIONS_LIBRARY}
analyser_lib
files
)
# enable_testing()
# ADD_EXECUTABLE(
# test_suite
# tests/irregularity_test.cpp
# tests/enums_test.cpp
# )
# TARGET_LINK_LIBRARIES(
# test_suite
# GTest::gtest_main
# analyser_lib
# ${OpenCV_LIBRARIES}
# ${Boost_PROGRAM_OPTIONS_LIBRARY}
# nlohmann_json::nlohmann_json
# )
# include(GoogleTest)
# gtest_discover_tests(test_suite)
CMAKE_MINIMUM_REQUIRED(VERSION 3.2)
PROJECT(video_analyser)
SET(EXECUTABLE_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/bin)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
SET(CMAKE_CXX_STANDARD 20)
include(FetchContent)
FetchContent_Declare(
googletest
URL https://github.com/google/googletest/archive/03597a01ee50ed33e9dfd640b249b4be3799d395.zip
)
# For Windows: Prevent overriding the parent project's compiler/linker settings
SET(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
FetchContent_MakeAvailable(googletest)
LINK_DIRECTORIES(/usr/local/lib)
add_library(analyser_lib
src/lib/colors.hpp
src/lib/core.hpp
src/lib/core.cpp
src/lib/detection.hpp
src/lib/detection.cpp
src/lib/io.hpp
src/lib/io.cpp
src/lib/time.cpp
src/lib/time.hpp
src/lib/enums.hpp
src/lib/enums.cpp
src/lib/Irregularity.hpp
src/lib/Irregularity.cpp
src/lib/IrregularityFile.hpp
src/lib/IrregularityFile.cpp
src/lib/TimeLabel.hpp
src/lib/TimeLabel.cpp
src/lib/files.hpp
src/lib/files.cpp
src/utility.hpp
src/utility.cpp
)
FIND_PACKAGE(OpenCV REQUIRED)
FIND_PACKAGE(nlohmann_json 3.2.0 REQUIRED)
FIND_PACKAGE(Boost COMPONENTS program_options REQUIRED)
INCLUDE_DIRECTORIES(${OpenCV_INCLUDE_DIRS})
INCLUDE_DIRECTORIES(${Boost_INCLUDE_DIR})
ADD_EXECUTABLE(video_analyser ./src/main.cpp)
TARGET_LINK_LIBRARIES(analyser_lib
${OpenCV_LIBRARIES}
nlohmann_json::nlohmann_json
${Boost_PROGRAM_OPTIONS_LIBRARY}
)
TARGET_LINK_LIBRARIES(video_analyser
${OpenCV_LIBRARIES}
nlohmann_json::nlohmann_json
${Boost_PROGRAM_OPTIONS_LIBRARY}
analyser_lib
)
enable_testing()
ADD_EXECUTABLE(
test_suite
tests/irregularity_test.cpp
tests/enums_test.cpp
tests/core_test.cpp
tests/files_test.cpp
)
TARGET_LINK_LIBRARIES(
test_suite
GTest::gtest_main
analyser_lib
${OpenCV_LIBRARIES}
${Boost_PROGRAM_OPTIONS_LIBRARY}
nlohmann_json::nlohmann_json
)
include(GoogleTest)
gtest_discover_tests(test_suite)
UNAME := $(shell uname)
FORMATTER = clang-format
DOCS_GEN = doxygen
FORMAT_OPT = -i -style="{BasedOnStyle: google,IndentWidth: 4,ColumnLimit: 120}"
TARGET = video_analyser
ifeq ($(UNAME), Linux)
OPEN = xdg-open
endif
ifeq ($(UNAME), Darwin)
OPEN = open
endif
ifeq ($(UNAME), Windows)
OPEN = start
endif
.PHONY: all clean test docs run build
build:
cmake -S . -B build
cmake --build build
test:
cd build && ctest
./bin/test_suite
clean:
rm -rf build
rm -rf bin
rm -rf docs/html
rm -rf docs/latex
format:
$(FORMATTER) $(FORMAT_OPT) src/*.cpp src/*.h src/lib/*.cpp src/lib/*.hpp tests/*.cpp src/*.hpp
docs:
$(DOCS_GEN) docs/Doxyfile && $(OPEN) docs/html/index.html
all:
cd build && cmake .. && make
run:
./bin/audio_analyser
./bin/video_analyser
......@@ -2,157 +2,57 @@
[![MPAI CAE-ARP](https://img.shields.io/badge/MPAI%20CAE--ARP-gray?style=for-the-badge&logo=AppleMusic&logoColor=cyan&link=https://mpai.community/standards/mpai-cae/about-mpai-cae/)](https://mpai.community/standards/mpai-cae/about-mpai-cae/)
[TOC]
## Description
Implements the Technical Specification of [MPAI CAE-ARP](https://mpai.community/standards/mpai-cae/about-mpai-cae/#Figure2) *Video Analyser* AIM, providing:
* 2 Irregularity Files;
* Irregularity Images.
## Getting started
The *Video Analyser* is written in C++23. It relies on OpenCV to elaborate Irregularity Images and on Boost C++ Libraries to create the command line interface and generate UUIDs. The configuration file is read with [nlohmann/json](https://github.com/nlohmann/json).
## Installation
[Boost C++ Libraries](https://www.boost.org) are required for creating the command line interface (with [Boost.Program_options](https://www.boost.org/doc/libs/1_81_0/doc/html/program_options.html)) and generating UUIDs (with [Uuid](https://www.boost.org/doc/libs/1_81_0/libs/uuid/doc/uuid.html)).
You can install them following [official instructions](https://www.boost.org/doc/libs/1_81_0/more/getting_started/unix-variants.html) (Boost version 1.81.0).
Boost `program_options` library shall be separately built following [these additional instructions](https://www.boost.org/doc/libs/1_81_0/more/getting_started/unix-variants.html#easy-build-and-install).
[OpenCV](https://docs.opencv.org/4.x/index.html) is required for elaborating Irregularity Images. You can install it following [official instructions](https://docs.opencv.org/3.4/d0/db2/tutorial_macos_install.html).
## Quick start
Finally, [nlohmann/json](https://github.com/nlohmann/json) is required for reading the configuration file.
Installation instructions are under the "Integration" section.
In the root folder there is a CMakeLists.txt file that specifies the configuration for CMake:
Clone the repository:
```
CMAKE_MINIMUM_REQUIRED(VERSION 3.2)
PROJECT(video_analyser)
SET(EXECUTABLE_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/bin)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
SET(CMAKE_CXX_STANDARD 23)
LINK_DIRECTORIES(/usr/local/lib)
FIND_PACKAGE(OpenCV REQUIRED)
FIND_PACKAGE(nlohmann_json 3.2.0 REQUIRED)
FIND_PACKAGE(Boost COMPONENTS program_options REQUIRED)
INCLUDE_DIRECTORIES(${OpenCV_INCLUDE_DIRS})
INCLUDE_DIRECTORIES(${Boost_INCLUDE_DIR})
ADD_EXECUTABLE(video_analyser ./src/script.cpp)
TARGET_LINK_LIBRARIES(video_analyser ${OpenCV_LIBRARIES} nlohmann_json::nlohmann_json ${Boost_PROGRAM_OPTIONS_LIBRARY})
git clone https://gitlab.dei.unipd.it/mpai/video-analyzer.git
```
`LINK_DIRECTORIES` specifies the path to the installed libraries.
Make sure that the flag `${Boost_PROGRAM_OPTIONS_LIBRARY}` is present under `TARGET_LINK_LIBRARIES`.
Once the libraries are installed, you can build the *Video Analyser* moving to `/build` directory and invoking CMake commands:
Install the dependencies:
- [Boost C++ Libraries](https://www.boost.org);
- [OpenCV](https://docs.opencv.org/4.x/index.html);
- [nlohmann/json](https://github.com/nlohmann/json).
Build the project from the root directory:
```
cd /path/to/video/analyser/build
cmake ..
make
make build
```
## Usage
Once the program is built, you should customise the configuration file `config.json`.
There are four required parameters of interest:
1. `WorkingPath` that specifies the working path where all input files are stored and where all output files will be saved;
2. `FilesName` that specifies the name of the preservation files to be considered.
3. `Brands` that specifies if the tape presents brands on its surface;
4. `Speed` that specifies the speed at which the tape was read;
There are also other required parameters which deeply influence the behaviour of the *Video Analyser* and, therefore, ***should not be modified unless with great knowledge of what you are doing***. They are:
1. `TapeThresholdPercentual` that specifies the minimum percentage of different pixels for considering the current frame under the tape ROI as a potential Irregularity;
2. `CapstanThresholdPercentual` that specifies the minimum percentage of different pixels for considering the current frame under the capstan ROI as a potential Irregularity;
3. `MinDist` that specifies the minimum distance between the centers of the detected objects for the detection of the reading head;
4. `AngleThresh` that specifies the angle votes threshold for the detection of the reading head;
5. `ScaleThresh` that specifies the scale votes threshold for the detection of the reading head;
6. `PosThresh` that specifies the position votes threshold for the detection of the reading head;
7. `MinDistCapstan` that specifies the minimum distance between the centers of the detected objects for the detection of the capstan;
8. `AngleThreshCapstan` that specifies the angle votes threshold for the detection of the capstan;
9. `ScaleThreshCapstan` that specifies the scale votes threshold for the detection of the capstan;
10. `PosThreshCapstan` that specifies the position votes threshold for the detection of the capstan.
To execute the script without issues, the inner structure of the `WorkingPath` directory shall be like:
Add the Preservation Files to the `data` directory following this structure:
```
.
├── AccessCopyFiles
│ └── ...
data
├── PreservationAudioFile
│ ├── File1.wav
│ ├── File2.wav
│ └── ...
├── PreservationAudioVisualFile
│ ├── File1.mp4
│ ├── File2.mp4
│ └── ...
├── PreservationMasterFiles
│ └── ...
└── temp
├── File1
│ ├── AudioAnalyser_IrregularityFileOutput1.json
│ ├── AudioAnalyser_IrregularityFileOutput2.json
│ ├── AudioBlocks
│ │ ├── AudioBlock1.wav
│ │ ├── AudioBlock2.wav
│ │ └── ...
│ ├── EditingList.json
│ ├── IrregularityImages
│ │ ├── IrregularityImage1.jpg
│ │ ├── IrregularityImage2.jpg
│ │ └── ...
│ ├── RestoredAudioFiles
│ │ ├── RestoredAudioFile1.wav
│ │ ├── RestoredAudioFile2.wav
│ │ └── ...
│ ├── TapeIrregularityClassifier_IrregularityFileOutput1.json
│ ├── TapeIrregularityClassifier_IrregularityFileOutput2.json
│ ├── VideoAnalyser_IrregularityFileOutput1.json
│ └── VideoAnalyser_IrregularityFileOutput2.json
├── File2
│ ├── AudioAnalyser_IrregularityFileOutput1.json
│ ├── AudioAnalyser_IrregularityFileOutput2.json
│ ├── AudioBlocks
│ │ ├── AudioBlock1.wav
│ │ ├── AudioBlock2.wav
│ │ └── ...
│ ├── EditingList.json
│ ├── IrregularityImages
│ │ ├── IrregularityImage1.jpg
│ │ ├── IrregularityImage2.jpg
│ │ └── ...
│ ├── RestoredAudioFiles
│ │ ├── RestoredAudioFile1.wav
│ │ ├── RestoredAudioFile2.wav
│ │ └── ...
│ ├── TapeIrregularityClassifier_IrregularityFileOutput1.json
│ ├── TapeIrregularityClassifier_IrregularityFileOutput2.json
│ ├── VideoAnalyser_IrregularityFileOutput1.json
│ └── VideoAnalyser_IrregularityFileOutput2.json
└── PreservationAudioVisualFile
├── File1.mp4
├── File2.mp4
└── ...
```
`PreservationAudioFile` and `PreservationAudioVisualFile` directories contain the input of ARP Workflow, while `AccessCopyFiles` and `PreservationMasterFiles` directories contain its output. `temp` directory is used to store all files exchanged between the AIMs within the Workflow.
Please note that:
* Corresponding input files shall present the same name;
* The name of Irregularity Files given above is ***mandatory***.
With this structure, `FilesName` parameter could be equal to `File1` or `File2`.
You can now launch the *Video Analyser* moving to the `/bin` directory from the command line with:
Run the project from the root directory:
```
cd /path/to/video/analyser/bin
./video_analyser
make run
```
Useful log information will be displayed during execution.
To enable integration in more complex workflows, it is also possible to launch the *Video Analyser* with command line arguments:
```
./video_analyser [-h] -w WORKING_PATH -f FILES_NAME -b BRANDS -s SPEED
```
If you use the `-h` flag:
## Generate the documentation
Along with the source code, the documentation of the *Video Analyser* is provided in the `docs` folder. The documentation is generated with [Doxygen](https://www.doxygen.nl/index.html) and can be accessed by opening the `index.html` file in the `docs/html` folder with a browser.
To generate the documentation, run the following command from the root folder:
```
./video_analyser -h
make docs
```
all instructions will be displayed.
Note that Doxygen must be installed on your machine.
## Support
If you require additional information or have any problem, you can contact us at:
......@@ -173,12 +73,4 @@ This project takes advantage of the following libraries:
Developed with IDE [Visual Studio Code](https://code.visualstudio.com).
## License
This project is licensed with [GNU GPL v3.0](https://www.gnu.org/licenses/gpl-3.0.html).
# TODO
This section refers to the code delivered by February 2023.
- To be able to work with the "old" neural network (by Ilenya), the output images should correspond to the old "whole tape" where, from the frame judged as interesting, an area corresponding to the height of the tape was extracted (so about the height of the current rectangle) and as wide as the original frame (so 720px). This area will then have to be resized to 224x224 as in the past. If instead you decide to use the new neural network, no changes are needed;
- A resize function of the entire video should be implemented if it does not conform to the PAL standard (currently taken for granted);
- Progressive videos, which do not require deinterlacing, should be managed (in the code there are several steps that operate considering this property);
\ No newline at end of file
This project is licensed with [GNU GPL v3.0](https://www.gnu.org/licenses/gpl-3.0.html).
\ No newline at end of file
This diff is collapsed.
Subproject commit df83fbf22cfff76b875c13d324baf584c74e96d0
# Documentation
[TOC]
> There is no worse software than undocumented software.
![Meme](../img/meme-documentation.jpg)
## Generate the documentation
Along with the source code, the documentation of the *Video Analyser* is provided in the `docs` folder. The documentation is generated with [Doxygen](https://www.doxygen.nl/index.html) and can be accessed by opening the `index.html` file in the `docs/html` folder with a browser.
To generate the documentation, run the following command from the root folder:
```
make docs
```
Note that Doxygen must be installed on your machine.
## Write the documentation
Doxygen is a tool for generating documentation from annotated C++ sources, as well as other popular programming languages like C, Objective-C, C#, PHP, Java, Python, IDL (Corba, Microsoft, and UNO/OpenOffice flavors), Fortran, VHDL, Tcl, and to some extent D.
To access Doxygen superpowers you need to add comments to your code. Doxygen supports two types of comments: single-line and multi-line comments. Single-line comments start with `///` or `//!` and multi-line comments start with `/**` and end with `*/`. The following example shows how to use both types of comments:
```
/// This is a single-line comment.
//! This is also a single-line comment.
/**
* This is a multi-line comment.
*/
```
A sample of Doxygen commented class looks like this:
```
/**
* @class SampleClass
* @brief This is a sample class.
*
* This class is used to show how to comment a class with Doxygen.
*/
class SampleClass {
private:
int field1; /**< This is a sample field. */
public:
/**
* @brief This is a sample constructor.
*
* This constructor is used to show how to comment a constructor with Doxygen.
*/
SampleClass();
/**
* @brief This is a sample destructor.
*
* This destructor is used to show how to comment a destructor with Doxygen.
*/
~SampleClass();
/**
* @brief This is a sample method.
*
* This method is used to show how to comment a method with Doxygen.
*
* @param[in] param1 This is a sample parameter.
* @param[in] param2 This is another sample parameter.
* @return This is a sample return value.
*/
int sampleMethod(int param1, int param2);
};
```
For more information about Doxygen, please refer to the [official documentation](https://www.doxygen.nl/manual/index.html).
# Style guide
The code is written following C++20 standard, to ensure the best readability and maintainability. The code is formatted using [clang-format](https://clang.llvm.org/docs/ClangFormat.html) with the [Google C++ Style Guide](https://google.github.io/styleguide/cppguide.html) as a reference.
To format the code, run the following command from the root folder:
```
make format
```
Note that clang-format must be installed on your machine.
In addition to the Google C++ Style Guide, the following rules are applied:
- when returning multiple values, use `std::tuple` or `std::pair`, instead of passing by reference;
- when dealing with nullable values, use `std::optional`;
- avoid to manipulate global variables, if you need to share data between different parts of the code, use dependency injection and pass the data as a parameter;
## Naming conventions
In C++, there are several naming conventions that are widely followed to improve code readability and maintainability. Although there is no strict standard enforced by the language, the following conventions are commonly used:
1. Variable Names:
- Use descriptive and meaningful names that reflect the purpose of the variable.
- Prefer lowercase letters for variable names.
- Use underscores (_) to separate words in multi-word variable names.
- Avoid using single uppercase letters as variable names, especially as standalone variables.
Example: `int num_items;`
2. Function Names:
- Use verbs or verb phrases to describe actions or operations performed by the function.
- Prefer lowercase letters for function names.
- Use underscores (_) to separate words in multi-word function names.
- Use parentheses () for function parameters, even if they are empty.
Example: `void calculate_average();`
3. Class/Struct Names:
- Use noun phrases or nouns to describe the purpose or nature of the class/struct.
- Use uppercase letters for each word (known as "PascalCase" or "camel case").
- Avoid abbreviations unless they are widely recognized.
Example: `class CustomerData;`
4. Constant Names:
- Use uppercase letters for constants.
- Use underscores (_) to separate words in multi-word constant names.
- Prefer meaningful and self-explanatory names for constants.
Example: `const int MAX_SIZE = 100;`
5. Global Variable Names:
- Avoid using global variables whenever possible. However, if necessary, prefix them with `g_` or use a namespace to indicate their global nature.
Example: `int g_global_variable;` or `namespace globals { int global_variable; }`
# Getting started
The *Video Analyser* is written in C++20. It relies on OpenCV to elaborate Irregularity Images and on Boost C++ Libraries to create the command line interface and generate UUIDs. The json files are read with [nlohmann/json](https://github.com/nlohmann/json).
To clone the repository, run the following command:
```
git clone https://gitlab.dei.unipd.it/mpai/video-analyzer.git
```
Since the documentation uses a git repo as a submodule, you should clone the repository with the `--recursive` option:
```
git clone --recursive https://gitlab.dei.unipd.it/mpai/video-analyzer.git
```
If you have already cloned the repository without the `--recursive` option, you can run the following command to clone the submodule:
```
git submodule update --init --recursive
```
[TOC]
## Installation
[Boost C++ Libraries](https://www.boost.org) are required for creating the command line interface (with [Boost.Program_options](https://www.boost.org/doc/libs/1_81_0/doc/html/program_options.html)) and generating UUIDs (with [Uuid](https://www.boost.org/doc/libs/1_81_0/libs/uuid/doc/uuid.html)).
You can install them following [official instructions](https://www.boost.org/doc/libs/1_81_0/more/getting_started/unix-variants.html) (Boost version 1.81.0).
Boost `program_options` library shall be separately built following [these additional instructions](https://www.boost.org/doc/libs/1_81_0/more/getting_started/unix-variants.html#easy-build-and-install).
[OpenCV](https://docs.opencv.org/4.x/index.html) is required for elaborating Irregularity Images. You can install it following [official instructions](https://docs.opencv.org/3.4/d0/db2/tutorial_macos_install.html).
To install OpenCV and Boost C++ Libraries on Ubuntu, run the following command:
```
sudo apt update && sudo apt install libboost-program-options-dev git build-essential cmake g++ wget unzip python3 python3-pip libgtk-3-dev libavcodec-dev libavformat-dev libswscale-dev libv4l-dev libjpeg-dev libpng-dev python3-dev libavdevice-dev libdc1394-dev libgstreamer-opencv1.0-0 libavutil-dev ffmpeg
```
To compile OpenCV from source with all the optional libraries, run the following commands:
```
mkdir opencv_source && cd ./opencv_source && wget -O opencv.zip https://github.com/opencv/opencv/archive/4.5.4.zip && wget -O opencv_contrib.zip https://github.com/opencv/opencv_contrib/archive/4.5.4.zip && unzip opencv.zip && unzip opencv_contrib.zip && mkdir -p build && cd ./build && cmake -D OPENCV_GENERATE_PKGCONFIG=YES -D WITH_FFMPEG=ON -D WITH_V4L=ON -D WITH_PNG=ON -D WITH_GSTREAMER=ON -D BUILD_opencv_video=ON -D BUILD_opencv_videoio=ON -D OPENCV_ENABLE_NONFREE=ON -DOPENCV_EXTRA_MODULES_PATH=../opencv_contrib-4.5.4/modules ../opencv-4.5.4 && make -j4 && make install
```
Finally, [nlohmann/json](https://github.com/nlohmann/json) is required for reading the configuration file.
Installation instructions are under the "Integration" section.
In the root folder there is a CMakeLists.txt file that specifies the configuration for CMake. Here is specified:
- the minimum required version of CMake;
- the project name;
- the C++ standard version;
- the source files;
- the include directories;
- the libraries to link.
Once the libraries are installed, you can build the *Video Analyser* moving to `/build` directory and invoking CMake commands:
```
cd /path/to/video/analyser/build
cmake ..
make
```
or just run `make build` from the root folder.
### Docker
A Dockerfile is provided to build a Docker image with the *Video Analyser*.
To build the image, run the following command from the root folder:
```
docker build -t mpai-video-analyzer .
```
To run the container, run the following command:
```
docker run -it --rm -v /path/to/video/analyser:/app -v /path/to/your/data:/data /bin/bash mpai-video-analyzer
```
where `/path/to/video/analyser` is the path to the *Video Analyser* folder.
This will mount the *Video Analyser* folder in the container and run a bash shell, where you can build the *Video Analyser* as described in the previous section. The advantage of using Docker is that you don't have to install the dependencies on your machine, but you can build the *Video Analyser* in a container.
## Usage
Once the program is built, you should customise the configuration file `config/config.json`.
There are four required parameters of interest:
1. `WorkingPath` that specifies the working path where all input files are stored and where all output files will be saved;
2. `FilesName` that specifies the name of the preservation files to be considered.
3. `Brands` that specifies if the tape presents brands on its surface;
4. `Speed` that specifies the speed at which the tape was read;
There are also other required parameters which deeply influence the behaviour of the *Video Analyser* and, therefore, ***should not be modified unless with great knowledge of what you are doing***. They are:
1. `TapeThresholdPercentual` that specifies the minimum percentage of different pixels for considering the current frame under the tape ROI as a potential Irregularity;
2. `CapstanThresholdPercentual` that specifies the minimum percentage of different pixels for considering the current frame under the capstan ROI as a potential Irregularity;
4. `AngleThresh` that specifies the angle votes threshold for the detection of the reading head;
5. `ScaleThresh` that specifies the scale votes threshold for the detection of the reading head;
6. `PosThresh` that specifies the position votes threshold for the detection of the reading head;
7. `MinDistCapstan` that specifies the minimum distance between the centers of the detected objects for the detection of the capstan;
8. `AngleThreshCapstan` that specifies the angle votes threshold for the detection of the capstan;
9. `ScaleThreshCapstan` that specifies the scale votes threshold for the detection of the capstan;
10. `PosThreshCapstan` that specifies the position votes threshold for the detection of the capstan.
To execute the script without issues, the inner structure of the `WorkingPath` directory shall be like:
```
.
├── PreservationAudioFile
│ ├── File1.wav
│ ├── File2.wav
│ └── ...
├── PreservationAudioVisualFile
│ ├── File1.mp4
│ ├── File2.mp4
│ └── ...
└── temp
├── File1
│ ├── AudioAnalyser_IrregularityFileOutput1.json
│ ├── AudioAnalyser_IrregularityFileOutput2.json
│ ├── AudioBlocks
│ │ ├── AudioBlock1.wav
│ │ ├── AudioBlock2.wav
│ │ └── ...
│ ├── EditingList.json
│ ├── IrregularityImages
│ │ ├── IrregularityImage1.jpg
│ │ ├── IrregularityImage2.jpg
│ │ └── ...
│ ├── RestoredAudioFiles
│ │ ├── RestoredAudioFile1.wav
│ │ ├── RestoredAudioFile2.wav
│ │ └── ...
│ ├── TapeIrregularityClassifier_IrregularityFileOutput1.json
│ ├── TapeIrregularityClassifier_IrregularityFileOutput2.json
│ ├── VideoAnalyser_IrregularityFileOutput1.json
│ └── VideoAnalyser_IrregularityFileOutput2.json
└── File2
├── AudioAnalyser_IrregularityFileOutput1.json
└── ...
```
`PreservationAudioFile` and `PreservationAudioVisualFile` directories contain the input of ARP Workflow. `temp` directory is used to store all files exchanged between the AIMs within the Workflow.
Please note that:
* Corresponding input files shall present the same name;
* The name of Irregularity Files given above is ***mandatory***.
With this structure, `FilesName` parameter could be equal to `File1` or `File2`.
You can now launch the *Video Analyser* moving to the `/bin` directory from the command line with:
```
cd /path/to/video/analyser/bin
./video_analyser
```
Useful log information will be displayed during execution.
To enable integration in more complex workflows, it is also possible to launch the *Video Analyser* with command line arguments:
```
./video_analyser [-h] -w WORKING_PATH -f FILES_NAME -b BRANDS -s SPEED
```
If you use the `-h` flag:
```
./video_analyser -h
```
all instructions will be displayed.
This diff is collapsed.
# General Requirements
The main goal of the MPAI-CAE ARP software is to take audio an video input of a open reel tape, analyse them and produce as output some kind of classifications and restorations (if needed).
# The input
As already said, the input consists of a video and an audio file of an open reel tape. The video file contains the video of the tape reproduced on the recorder, pointing the camera to the capstan, the pinch roller and the reading head. The audio file contains the audio of the tape reproduced on the recorder and captured from the headphone exit.
Focusing on the video analysis, the software should be able to detect tape irregularities, such as:
- Splices
- Brands on tape
- Start of tape
- End of tape
- Damaged tape
- Dirt
- Marks
- Shadows
- Wow and flutter
Most of the brands consist of the full name of the tape manufacturer, logo, or tape model codes. The brand changes in size, shape, and colour, depending on the tape used.
# Software Requirements
The software should be able to, given as input the video of an open reel tape, produce as output two irregularity files where are listed the irregularities found in the video and the irregularities found in the audio.
Irregularity files are JSON files that contain a list of irregularities. Each irregularity is a JSON object that contains structured as follow:
```js
{
"type": "object",
"properties": {
"IrregularityID": {
"type": "string",
"format": "uuid"
},
"Source": {
"enum": ["a", "v", "b"]
},
"TimeLabel": {
"type": "string",
"pattern": "[0-9]{2}:[0-5][0-9]:[0-5][0-9]\\.[0-9]{3}"
},
"IrregularityType": {
"enum": ["sp", "b", "sot", "eot", "da", "di", "m", "s", "wf", "pps", "ssv", "esv", "sb"]
},
"IrregularityProperties": {
"type": "object",
"properties": {
"ReadingSpeedStandard": {
"enum": [0.9375, 1.875, 3.75, 7.5, 15, 30]
},
"ReadingEqualisationStandard": {
"enum": ["IEC", "IEC1", "IEC2"]
},
"WritingSpeedStandard": {
"enum": [0.9375, 1.875, 3.75, 7.5, 15, 30]
},
"WritingEqualisationStandard": {
"enum": ["IEC", "IEC1", "IEC2"]
},
}
},
"ImageURI": {
"type": "string","format": "uri"
},
"AudioFileURI": {
"type": "string",
"format": "uri"
}
}
}
```
Here follows a partial list of requirements and their implementation status:
| N° | Done | Name | Priority | Description |
|:-:|:-:|---|:--:|--|
| 1 | ✔️ | Analyze single preservation file | high | The app should take in input a signle PreservationVisualFile an analyze it |
| 2 | ❌️ | Analyze a list of files | medium | The app should take in input a list of files and analyze them in batch. |
| 3 | ✔ | Output the analysis results | high | The app should create as output 2 irregularity files, one containing only irregularities from video and the other containing also audio irregularities from audio analyser |
| 4 | ❌ | Irregularity file on demand | high | The app should accept as input a number that specifies the irregularity file required: if 1 returns only the irregularities from video in the irregularity file, if 2 return an iregularity file with irregularities from both video and audio analyser |
✔️ = done | 🚧 = in progress | ❌ = todo
This diff is collapsed.
<!-- HTML footer for doxygen 1.9.1-->
<!-- start footer part -->
<!--BEGIN GENERATE_TREEVIEW-->
<div id="nav-path" class="navpath"><!-- id is needed for treeview function! -->
<ul>
$navpath
<li class="footer">$generatedby <a href="https://www.doxygen.org/index.html"><img class="footer" src="$relpath^doxygen.svg" width="104" height="31" alt="doxygen"/></a> $doxygenversion </li>
</ul>
</div>
<!--END GENERATE_TREEVIEW-->
<!--BEGIN !GENERATE_TREEVIEW-->
<hr class="footer"/><address class="footer"><small>
$generatedby&#160;<a href="https://www.doxygen.org/index.html"><img class="footer" src="$relpath^doxygen.svg" width="104" height="31" alt="doxygen"/></a> $doxygenversion
</small></address>
<!--END !GENERATE_TREEVIEW-->
</body>
</html>
<!-- HTML header for doxygen 1.9.1-->
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "https://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
<meta http-equiv="X-UA-Compatible" content="IE=9"/>
<meta name="generator" content="Doxygen $doxygenversion"/>
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<!--BEGIN PROJECT_NAME--><title>$projectname: $title</title><!--END PROJECT_NAME-->
<!--BEGIN !PROJECT_NAME--><title>$title</title><!--END !PROJECT_NAME-->
<link href="$relpath^tabs.css" rel="stylesheet" type="text/css"/>
<script type="text/javascript" src="$relpath^jquery.js"></script>
<script type="text/javascript" src="$relpath^dynsections.js"></script>
$treeview
$search
$mathjax
<link href="$relpath^$stylesheet" rel="stylesheet" type="text/css" />
$extrastylesheet
<script src="https://cdn.jsdelivr.net/npm/mermaid@10.0.2/dist/add-html-label-6e56ed67.min.js"></script>
<script type="text/javascript" src="$relpath^../doxygen-awesome-css/doxygen-awesome-darkmode-toggle.js"></script>
<script type="text/javascript" src="$relpath^../doxygen-awesome-css/doxygen-awesome-fragment-copy-button.js"></script>
<script type="text/javascript" src="$relpath^../doxygen-awesome-css/doxygen-awesome-paragraph-link.js"></script>
<script type="text/javascript" src="$relpath^../doxygen-awesome-css/doxygen-awesome-interactive-toc.js"></script>
<script type="text/javascript">
DoxygenAwesomeDarkModeToggle.init()
DoxygenAwesomeFragmentCopyButton.init()
DoxygenAwesomeParagraphLink.init()
DoxygenAwesomeInteractiveToc.init()
DoxygenAwesomeInteractiveToc.topOffset = 45
</script>
</head>
<body>
<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
<!--BEGIN TITLEAREA-->
<div id="titlearea">
<table cellspacing="0" cellpadding="0">
<tbody>
<tr style="height: 56px;">
<!--BEGIN PROJECT_LOGO-->
<td id="projectlogo"><img alt="Logo" src="$relpath^$projectlogo"/></td>
<!--END PROJECT_LOGO-->
<!--BEGIN PROJECT_NAME-->
<td id="projectalign" style="padding-left: 0.5em;">
<div id="projectname">$projectname
<!--BEGIN PROJECT_NUMBER-->&#160;<span id="projectnumber">$projectnumber</span><!--END PROJECT_NUMBER-->
</div>
<!--BEGIN PROJECT_BRIEF--><div id="projectbrief">$projectbrief</div><!--END PROJECT_BRIEF-->
</td>
<!--END PROJECT_NAME-->
<!--BEGIN !PROJECT_NAME-->
<!--BEGIN PROJECT_BRIEF-->
<td style="padding-left: 0.5em;">
<div id="projectbrief">$projectbrief</div>
</td>
<!--END PROJECT_BRIEF-->
<!--END !PROJECT_NAME-->
<!--BEGIN DISABLE_INDEX-->
<!--BEGIN SEARCHENGINE-->
<td>$searchbox</td>
<!--END SEARCHENGINE-->
<!--END DISABLE_INDEX-->
</tr>
</tbody>
</table>
</div>
<!--END TITLEAREA-->
<!-- end header part -->
#ifndef FORAUDIOANALYSER_H
#define FORAUDIOANALYSER_H
#include <filesystem>
#include <iostream>
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgcodecs.hpp>
#include <opencv2/imgproc.hpp>
#include <nlohmann/json.hpp>
#include <boost/uuid/uuid.hpp>
#include <boost/uuid/uuid_generators.hpp>
#include "lib/time.hpp"
#include "lib/core.hpp"
using namespace cv;
using namespace std;
using json = nlohmann::json;
namespace fs = std::filesystem;
namespace va = videoanalyser;
const string G_IMG_FOLDER_PATH = "fromAudioAnalyser";
va::Result<int> extract_irregularity_images_for_audio(std::string output_path, const std::string video_path,
json irregularity_file_input, json &irregularity_file_output) {
// Make fromAudioAnalyser folder
int caps_directory = fs::create_directory(output_path + G_IMG_FOLDER_PATH + "/");
// Open video
cv::VideoCapture videoCapture(video_path);
// Compute video length in milliseconds
int fps = videoCapture.get(CAP_PROP_FPS);
for (int i = 0; i < irregularity_file_input["Irregularities"].size(); i++) {
// Declare output image frame
cv::Mat frame;
std::string frame_path;
// Extract TimeLabel from input JSON
std::string time_label = irregularity_file_input["Irregularities"][i]["TimeLabel"];
int irr_time_in_ms = time_label_to_ms(time_label);
std::string safe_time_label = getTimeLabel(irr_time_in_ms, "-");
// Compute the frame number corresponding to the Irregularity
int irr_frame = std::round((float)(irr_time_in_ms / 1000) * fps);
try {
frame_path = output_path + G_IMG_FOLDER_PATH + "/AudioIrregularity_" + safe_time_label + ".jpg";
videoCapture.set(CAP_PROP_POS_FRAMES, irr_frame);
videoCapture >> frame;
cv::imwrite(frame_path, frame);
void extractIrregularityImagesForAudio(std::string outputPath, std::string videoPath, json irregularityFileInput, json &irregularityFileOutput2) {
// Make fromAudioAnalyser folder
int capsDirectory = fs::create_directory(outputPath + "fromAudioAnalyser/");
// Open video
cv::VideoCapture videoCapture(videoPath);
// Compute video length in milliseconds
int frameCount = videoCapture.get(CAP_PROP_FRAME_COUNT);
int fps = videoCapture.get(CAP_PROP_FPS);
int videoLenghtMS = (frameCount / fps) * 1000 + std::round((float)((frameCount % fps) * 1000) / fps);
for (int i = 0; i < irregularityFileInput["Irregularities"].size(); i++) {
// Declare output image frame
cv::Mat frame;
std::string framePath;
// Extract TimeLabel from input JSON
std::string timeLabel = irregularityFileInput["Irregularities"][i]["TimeLabel"];
// Obtain time measures from JSON
int h = stoi(timeLabel.substr(0, 2));
int min = stoi(timeLabel.substr(3, 2));
int sec = stoi(timeLabel.substr(6, 2));
int ms = stoi(timeLabel.substr(9, 3));
std::string safeTimeLabel = timeLabel;
safeTimeLabel[2] = '-';
safeTimeLabel[5] = '-';
safeTimeLabel[8] = '-';
// Compute the Irregularity instant in milliseconds
int irrInstMS = ms + sec*1000 + min*60000 + h*3600000;
// Compute the frame number corresponding to the Irregularity
int irrFrame = std::round((float)(irrInstMS/1000)*fps);
try {
framePath = outputPath + "fromAudioAnalyser/AudioIrregularity_" + safeTimeLabel + ".jpg";
videoCapture.set(CAP_PROP_POS_FRAMES, irrFrame);
videoCapture >> frame;
cv::imwrite(framePath, frame);
// Append Irregularity information to JSON
boost::uuids::uuid uuid = boost::uuids::random_generator()();
irregularityFileOutput2["Irregularities"] += {{
"IrregularityID", irregularityFileInput["Irregularities"][i]["IrregularityID"]
}, {
"Source", "a"
}, {
"TimeLabel", timeLabel
}, {
"ImageURI", framePath
}
};
} catch (cv::Exception e) {
std::cout << "\033[0;31mTimeLabel error for Audio Analyser Irregularity " << i << "." << std::endl;
}
}
}
\ No newline at end of file
// Append Irregularity information to JSON
boost::uuids::uuid uuid = boost::uuids::random_generator()();
irregularity_file_output["Irregularities"] +=
{{"IrregularityID", irregularity_file_input["Irregularities"][i]["IrregularityID"]},
{"Source", "a"},
{"TimeLabel", time_label},
{"ImageURI", frame_path}};
} catch (cv::Exception e) {
return va::Error("TimeLabel error for Audio Analyser Irregularity " + i);
}
}
return va::Result<int>(0);
}
#endif // FORAUDIOANALYSER_H
\ No newline at end of file
#include <boost/lexical_cast.hpp>
#include <boost/uuid/uuid_io.hpp>
#include <boost/uuid/uuid.hpp>
#include <boost/uuid/uuid_generators.hpp>
#include "Irregularity.h"
#include "Irregularity.hpp"
Irregularity::Irregularity(const Irregularity& other)
: id(other.id), source(other.source), time_label(other.time_label), type(other.type) {}
Irregularity::Irregularity(Irregularity&& other) noexcept
: id(std::move(other.id)),
source(other.source),
time_label(std::move(other.time_label)),
type(std::move(other.type)) {}
Irregularity::Irregularity(Source source, string time_label, IrregularityType type, string image_URI)
{
Irregularity::Irregularity(Source source, string time_label) {
this->id = boost::uuids::random_generator()();
this->source = source;
this->time_label = time_label;
this->type = type;
this->image_URI = image_URI;
this->type = std::nullopt;
}
Irregularity::~Irregularity() {}
Irregularity::Irregularity(Source source, string time_label, IrregularityType type) {
this->id = boost::uuids::random_generator()();
this->source = source;
this->time_label = time_label;
this->type = type;
}
json Irregularity::toJSON() {
json Irregularity::to_JSON() const {
json j;
j["IrregularityID"] = boost::lexical_cast<string>(this->id);
j["Source"] = sourceToString(this->source);
j["TimeLabel"] = this->time_label;
j["IrregularityType"] = irregularityTypeToString(this->type);
if (!this->image_URI.empty())
j["ImageURI"] = this->image_URI;
if (this->type.has_value()) j["IrregularityType"] = irregularityTypeToString(this->type.value());
if (this->image_URI.has_value()) j["ImageURI"] = this->image_URI.value();
if (this->audio_URI.has_value()) j["AudioURI"] = this->audio_URI.value();
return j;
}
Irregularity Irregularity::fromJSON(json j) {
Irregularity Irregularity::from_JSON(const json& j) {
Source source = sourceFromString(j["Source"]);
string time_label = j["TimeLabel"];
IrregularityType type = irregularityTypeFromString(j["IrregularityType"]);
return Irregularity(
sourceFromString(j["Source"]),
j["TimeLabel"],
irregularityTypeFromString(j["IrregularityType"]),
j["ImageURI"]
);
return Irregularity(source, time_label, type);
}
Source Irregularity::get_source() const { return this->source; }
string Irregularity::get_time_label() const { return this->time_label; }
std::optional<IrregularityType> Irregularity::get_type() const { return this->type; }
boost::uuids::uuid Irregularity::get_id() const { return this->id; }
std::optional<string> Irregularity::get_audio_URI() const { return this->audio_URI; }
Irregularity& Irregularity::set_audio_URI(string audio_URI) {
this->audio_URI = audio_URI;
return *this;
}
std::optional<string> Irregularity::get_image_URI() const { return this->image_URI; }
Irregularity& Irregularity::set_image_URI(string image_URI) {
this->image_URI = image_URI;
return *this;
}
#ifndef IRREGULARITY_H
#define IRREGULARITY_H
#include <boost/uuid/uuid.hpp>
#include <nlohmann/json.hpp>
#include "enums.h"
using std::string;
using json = nlohmann::json;
/**
* @brief an irregularity of the tape detected by the system
*
*/
struct Irregularity
{
boost::uuids::uuid id;
Source source;
string time_label;
IrregularityType type;
string image_URI;
Irregularity(Source source, string time_label, IrregularityType type, string image_URI);
~Irregularity();
json toJSON();
static Irregularity fromJSON(json j);
};
#endif // IRREGULARITY_H
\ No newline at end of file