update

fe415fcf · Matteo · 7f2bb3a1 · fe415fcf · fe415fcf · fe415fcf
Commit fe415fcf authored Apr 24, 2023 by Matteo
--- a/README.md
+++ b/README.md
 # Audio Analyzer

+[![MPAI CAE-ARP](https://img.shields.io/badge/MPAI%20CAE--ARP-gray?style=for-the-badge&logo=AppleMusic&logoColor=cyan&link=https://mpai.community/standards/mpai-cae/about-mpai-cae/)](https://mpai.community/standards/mpai-cae/about-mpai-cae/)
+
+Implements the Technical Specification of [MPAI CAE-ARP](https://mpai.community/standards/mpai-cae/about-mpai-cae/#Figure2) *Audio Analyser* AIM, providing:
+- 2 Irregularity Files
+- Audio Files
+
 # TODO

 - [x] calculate the video/audio offset

 - [ ] Read the input file(s?) and generate a list of audio files
- [ ] Split each file different channels
- [ ] extract silence from each channel
- [ ] generate an irregularity for each silence found
- [ ] save the list of irregularities as an irregularity file
+- [x] Split each file different channels
+- [x] extract silence from each channel
+- [x] generate an irregularity for each silence found
+- [x] save the list of irregularities as an irregularity file

- [ ] get the irregularity file from video analyzer
+- [x] get the irregularity file from video analyzer

- [ ] merge the irregularity files
- [ ] extract the audio from every irregularity
+- [x] merge the irregularity files
+- [x] extract the audio from every irregularity
 - [ ] for each audio irregularity, make a classification
- [ ] save everything in a single irregularity file
+- [x] save everything in a single irregularity file

 Sample irregularityFile from Audio to Video Analyzer:
 ```json
@@ -30,12 +36,12 @@ Sample irregularityFile from Audio to Video Analyzer:
        {
            "IrregularityID": "09859d16-3c73-4bb0-9c74-91b451e34925",
            "Source": "a",
-            "TimeLabel": "00:00:00.000",
+            "TimeLabel": "00:00:02.000",
        },
        {
            "IrregularityID": "09859d16-3c73-4bb0-9c74-91b451e34925",
            "Source": "a",
-            "TimeLabel": "00:00:00.000",
+            "TimeLabel": "00:00:05.000",
        }
    ]
 }
@@ -48,17 +54,17 @@ Sample irregularityFile from Video to Audio Analyzer:
        {
            "IrregularityID": "09859d16-3c73-4bb0-9c74-91b451e34925",
            "Source": "v",
-            "TimeLabel": "00:00:00.000",
+            "TimeLabel": "00:00:10.000",
        },
        {
            "IrregularityID": "09859d16-3c73-4bb0-9c74-91b451e34925",
            "Source": "v",
-            "TimeLabel": "00:00:00.000",
+            "TimeLabel": "00:00:20.000",
        },
        {
            "IrregularityID": "09859d16-3c73-4bb0-9c74-91b451e34925",
            "Source": "v",
-            "TimeLabel": "00:00:00.000",
+            "TimeLabel": "00:00:30.000",
        }
    ]
 }

--- a/pyproject.toml
+++ b/pyproject.toml
@@ -2,6 +2,7 @@
 name = "audio-analyzer"
 version = "1.0.0"
 description = "MPAI CAE-ARP Audio Analyser"
+repository = "https://gitlab.dei.unipd.it/mpai/audio-analyzer.git"
 authors = ["Matteo Spanio <dev2@audioinnova.com>"]
 license = "GPLv3"
 readme = "README.md"

--- a/src/audio_analyzer/cli.py
+++ b/src/audio_analyzer/cli.py
@@ -7,8 +7,8 @@ from mpai_cae_arp.types.irregularity import IrregularityFile, Source
 from mpai_cae_arp.files import File, FileType
 from mpai_cae_arp.io import prettify, Style

-from . import segment_finder as sf
-from . import classifier as cl
+import audio_analyzer.segment_finder as sf
+import audio_analyzer.classifier as cl


 def get_args() -> tuple[str | None, str | None]:

--- a/src/audio_analyzer/client.py
+++ b/src/audio_analyzer/client.py
@@ -9,8 +9,8 @@ channels = {
    "AudioAnalyser": grpc.insecure_channel("[::]:50051"),
    "VideoAnalyser": grpc.insecure_channel("[::]:50052"),
    "TapeIrregularityClassifier": grpc.insecure_channel("[::]:50053"),
-    "TapeAudioRestoration": grpc.insecure_channel("[::]:50051/tape-audio-restoration"),
-    "Packager": grpc.insecure_channel("[::]:50051/packager"),
+    "TapeAudioRestoration": grpc.insecure_channel("[::]:50051"),
+    #"Packager": grpc.insecure_channel("[::]:50051/packager"),
 }

 def run(console: Console):
@@ -18,53 +18,64 @@ def run(console: Console):
    audio_analyser = arp_pb2_grpc.AIMStub(channels["AudioAnalyser"])
    video_analyser = arp_pb2_grpc.AIMStub(channels["VideoAnalyser"])
    tape_irreg_classifier = arp_pb2_grpc.AIMStub(channels["TapeIrregularityClassifier"])
+    tape_audio_restoration = arp_pb2_grpc.AIMStub(channels["TapeAudioRestoration"])

    request = arp_pb2.InfoRequest()
-    for analyser in [audio_analyser, video_analyser, tape_irreg_classifier]:
-        response = analyser.getInfo(request)
-        console.print("[bold]{}[/], v{}".format(response.title, response.version))
+    # for analyser in [audio_analyser, video_analyser, tape_irreg_classifier]:
+    #     response = analyser.getInfo(request)
+    #     console.print("[bold]{}[/], v{}".format(response.title, response.version))

    request = arp_pb2.JobRequest(
-        working_dir="/data",
+        working_dir="../data",
        files_name="BERIO100",
        index=1,
    )

-    with console.status("[bold]Computing AudioAnalyser IrregularityFile 1...", spinner="bouncingBall"):
-        for result in audio_analyser.work(request):
-            if result.status == "error":
-                console.print("[bold red]Error![/] :boom:")
-                console.print(f"[italic red]{result.message}")
-                for channel in channels.values():
-                    channel.close()
-                exit(os.EX_SOFTWARE)
-            console.print(result.message)
+    # with console.status("[bold]Computing AudioAnalyser IrregularityFile 1...", spinner="bouncingBall"):
+    #     for result in audio_analyser.work(request):
+    #         if result.status == "error":
+    #             console.print("[bold red]Error![/] :boom:")
+    #             console.print(f"[italic red]{result.message}")
+    #             for channel in channels.values():
+    #                 channel.close()
+    #             exit(os.EX_SOFTWARE)
+    #         console.print(result.message)

-    request.files_name = "BERIO100.mov"
-    with console.status("[bold]Computing VideoAnalyser IrregularityFiles...", spinner="bouncingBall"):
-        for result in video_analyser.work(request):
-            if result.status == "error":
-                console.print("[bold red]Error![/] :boom:")
-                console.print(f"[italic red]{result.message}")
-                for channel in channels.values():
-                    channel.close()
-                exit(os.EX_SOFTWARE)
-            console.print(result.message)
+    # request.files_name = "BERIO100.mov"
+    # with console.status("[bold]Computing VideoAnalyser IrregularityFiles...", spinner="bouncingBall"):
+    #     for result in video_analyser.work(request):
+    #         if result.status == "error":
+    #             console.print("[bold red]Error![/] :boom:")
+    #             console.print(f"[italic red]{result.message}")
+    #             for channel in channels.values():
+    #                 channel.close()
+    #             exit(os.EX_SOFTWARE)
+    #         console.print(result.message)

-    request.index = 2
-    request.files_name = "BERIO100"
-    with console.status("[bold]Computing AudioAnalyser IrregularityFile 2...", spinner="bouncingBall"):
-        for result in audio_analyser.work(request):
-            if result.status == "error":
-                console.print("[bold red]Error![/] :boom:")
-                console.print(f"[italic red]{result.message}")
-                for channel in channels.values():
-                    channel.close()
-                exit(os.EX_SOFTWARE)
-            console.print(result.message)
+    # request.index = 2
+    # request.files_name = "BERIO100"
+    # with console.status("[bold]Computing AudioAnalyser IrregularityFile 2...", spinner="bouncingBall"):
+    #     for result in audio_analyser.work(request):
+    #         if result.status == "error":
+    #             console.print("[bold red]Error![/] :boom:")
+    #             console.print(f"[italic red]{result.message}")
+    #             for channel in channels.values():
+    #                 channel.close()
+    #             exit(os.EX_SOFTWARE)
+    #         console.print(result.message)
+
+    # with console.status("[bold]Computing TapeIrregularityClassifier...", spinner="bouncingBall"):
+    #     for result in tape_irreg_classifier.work(request):
+    #         if result.status == "error":
+    #             console.print("[bold red]Error![/] :boom:")
+    #             console.print(f"[italic red]{result.message}")
+    #             for channel in channels.values():
+    #                 channel.close()
+    #             exit(os.EX_SOFTWARE)
+    #         console.print(result.message)

-    with console.status("[bold]Computing TapeIrregularityClassifier...", spinner="bouncingBall"):
-        for result in tape_irreg_classifier.work(request):
+    with console.status("[bold]Computing TapeAudioRestoration...", spinner="bouncingBall"):
+        for result in tape_audio_restoration.work(request):
            if result.status == "error":
                console.print("[bold red]Error![/] :boom:")
                console.print(f"[italic red]{result.message}")
@@ -76,6 +87,7 @@ def run(console: Console):
    channels["AudioAnalyser"].close()
    channels["VideoAnalyser"].close()
    channels["TapeIrregularityClassifier"].close()
+    channels["TapeAudioRestoration"].close()

    console.print("[bold green]Success![/] :tada:")


--- a/src/audio_analyzer/segment_finder.py
+++ b/src/audio_analyzer/segment_finder.py
@@ -19,6 +19,9 @@ TMP_CHANNELS_MAP = os.path.join(TMP_FOLDER, "channels_map.json")
 def calculate_offset(audio: AudioWave, video: AudioWave, interval: int = 10) -> int:
    """
    Calculates the offset between two audio files based on their cross-correlation.
+    Since the cross-correlation is a computationally expensive operation, the audio files are resampled to 1/4 of their original sampling rate.
+    In addition to that, only the specified time interval (starting at 15 seconds) is used for the cross-correlation,
+    assuming that after that time the audio and video contain portions of the same content.

    Parameters
    ----------
@@ -54,6 +57,23 @@ class BitDepth(Enum):
    PCM_S32LE = "pcm_s32le"

 def get_audio_from_video(video_src: str, samplerate: int, bit_depth: BitDepth) -> AudioWave:
+    """
+    Extracts the audio from a video file and returns it as an AudioWave object.
+    
+    Parameters
+    ----------
+    video_src : str
+        The path to the video file.
+    samplerate : int
+        The sampling rate of the audio output.
+    bit_depth : BitDepth
+        The bit depth of the audio output.
+        
+    Returns
+    -------
+    AudioWave
+        The extracted audio. The number of channels is always 2. The audio is saved as a temporary file.
+    """

    # ffmpeg -i video.mov -acodec pcm_s16le -ac 2 audio.wav
    extracted_audio_path = os.path.join(TMP_FOLDER, 'audio.wav')
@@ -118,6 +138,9 @@ def merge_irreg_files(
    file1: IrregularityFile,
    file2: IrregularityFile
 ) -> IrregularityFile:
+    """ 
+    Merge two IrregularityFiles into one. The offset of the new file is the maximum of the two offsets.
+    """

    match file1.offset, file2.offset:
        case None, _:
@@ -164,19 +187,3 @@ def extract_audio_irregularities(
    os.remove(TMP_CHANNELS_MAP)

    return irreg_file
-
-
-if __name__ == "__main__":
-
-    from rich.console import Console
-    
-    console = Console()
-    
-    with console.status("Reading PreservationAudioFile", spinner="dots"):
-        audio = AudioWave.from_file("../data/PreservationAudioFile/BERIO100.wav", bufferize=True)
-    with console.status("Extracting audio from PreservationAudioVisualFile", spinner="dots"):
-        video = get_audio_from_video("../data/PreservationAudioVisualFile/BERIO100.mov", audio.samplerate, BitDepth.PCM_S24LE)
-    with console.status("Calculating offset", spinner="dots"):
-        offset = calculate_offset(audio, video)
-
-    print(offset)
--- a/src/audio_analyzer/server.py
+++ b/src/audio_analyzer/server.py
@@ -16,8 +16,8 @@ from mpai_cae_arp.network.arp_pb2 import (
    License,
 )

-from . import segment_finder as sf
-from . import classifier as cl
+import audio_analyzer.segment_finder as sf
+import audio_analyzer.classifier as cl

 info = File('config.yml', FileType.YAML).get_content()


--- a/tests/test_segment_finder.py
+++ b/tests/test_segment_finder.py
@@ -6,7 +6,7 @@ import numpy as np
 from mpai_cae_arp.audio import AudioWave
 from mpai_cae_arp.types.irregularity import Irregularity, IrregularityFile, Source

-import segment_finder as sf
+import audio_analyzer.segment_finder as sf

 def test_calculate_offset():
    audio = AudioWave(np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]), 24, 1, 8000)