Commit fe415fcf authored by Matteo's avatar Matteo
Browse files

update

parent 7f2bb3a1
# Audio Analyzer # Audio Analyzer
[![MPAI CAE-ARP](https://img.shields.io/badge/MPAI%20CAE--ARP-gray?style=for-the-badge&logo=AppleMusic&logoColor=cyan&link=https://mpai.community/standards/mpai-cae/about-mpai-cae/)](https://mpai.community/standards/mpai-cae/about-mpai-cae/)
Implements the Technical Specification of [MPAI CAE-ARP](https://mpai.community/standards/mpai-cae/about-mpai-cae/#Figure2) *Audio Analyser* AIM, providing:
- 2 Irregularity Files
- Audio Files
# TODO # TODO
- [x] calculate the video/audio offset - [x] calculate the video/audio offset
- [ ] Read the input file(s?) and generate a list of audio files - [ ] Read the input file(s?) and generate a list of audio files
- [ ] Split each file different channels - [x] Split each file different channels
- [ ] extract silence from each channel - [x] extract silence from each channel
- [ ] generate an irregularity for each silence found - [x] generate an irregularity for each silence found
- [ ] save the list of irregularities as an irregularity file - [x] save the list of irregularities as an irregularity file
- [ ] get the irregularity file from video analyzer - [x] get the irregularity file from video analyzer
- [ ] merge the irregularity files - [x] merge the irregularity files
- [ ] extract the audio from every irregularity - [x] extract the audio from every irregularity
- [ ] for each audio irregularity, make a classification - [ ] for each audio irregularity, make a classification
- [ ] save everything in a single irregularity file - [x] save everything in a single irregularity file
Sample irregularityFile from Audio to Video Analyzer: Sample irregularityFile from Audio to Video Analyzer:
```json ```json
...@@ -30,12 +36,12 @@ Sample irregularityFile from Audio to Video Analyzer: ...@@ -30,12 +36,12 @@ Sample irregularityFile from Audio to Video Analyzer:
{ {
"IrregularityID": "09859d16-3c73-4bb0-9c74-91b451e34925", "IrregularityID": "09859d16-3c73-4bb0-9c74-91b451e34925",
"Source": "a", "Source": "a",
"TimeLabel": "00:00:00.000", "TimeLabel": "00:00:02.000",
}, },
{ {
"IrregularityID": "09859d16-3c73-4bb0-9c74-91b451e34925", "IrregularityID": "09859d16-3c73-4bb0-9c74-91b451e34925",
"Source": "a", "Source": "a",
"TimeLabel": "00:00:00.000", "TimeLabel": "00:00:05.000",
} }
] ]
} }
...@@ -48,17 +54,17 @@ Sample irregularityFile from Video to Audio Analyzer: ...@@ -48,17 +54,17 @@ Sample irregularityFile from Video to Audio Analyzer:
{ {
"IrregularityID": "09859d16-3c73-4bb0-9c74-91b451e34925", "IrregularityID": "09859d16-3c73-4bb0-9c74-91b451e34925",
"Source": "v", "Source": "v",
"TimeLabel": "00:00:00.000", "TimeLabel": "00:00:10.000",
}, },
{ {
"IrregularityID": "09859d16-3c73-4bb0-9c74-91b451e34925", "IrregularityID": "09859d16-3c73-4bb0-9c74-91b451e34925",
"Source": "v", "Source": "v",
"TimeLabel": "00:00:00.000", "TimeLabel": "00:00:20.000",
}, },
{ {
"IrregularityID": "09859d16-3c73-4bb0-9c74-91b451e34925", "IrregularityID": "09859d16-3c73-4bb0-9c74-91b451e34925",
"Source": "v", "Source": "v",
"TimeLabel": "00:00:00.000", "TimeLabel": "00:00:30.000",
} }
] ]
} }
......
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
name = "audio-analyzer" name = "audio-analyzer"
version = "1.0.0" version = "1.0.0"
description = "MPAI CAE-ARP Audio Analyser" description = "MPAI CAE-ARP Audio Analyser"
repository = "https://gitlab.dei.unipd.it/mpai/audio-analyzer.git"
authors = ["Matteo Spanio <dev2@audioinnova.com>"] authors = ["Matteo Spanio <dev2@audioinnova.com>"]
license = "GPLv3" license = "GPLv3"
readme = "README.md" readme = "README.md"
......
...@@ -7,8 +7,8 @@ from mpai_cae_arp.types.irregularity import IrregularityFile, Source ...@@ -7,8 +7,8 @@ from mpai_cae_arp.types.irregularity import IrregularityFile, Source
from mpai_cae_arp.files import File, FileType from mpai_cae_arp.files import File, FileType
from mpai_cae_arp.io import prettify, Style from mpai_cae_arp.io import prettify, Style
from . import segment_finder as sf import audio_analyzer.segment_finder as sf
from . import classifier as cl import audio_analyzer.classifier as cl
def get_args() -> tuple[str | None, str | None]: def get_args() -> tuple[str | None, str | None]:
......
...@@ -9,8 +9,8 @@ channels = { ...@@ -9,8 +9,8 @@ channels = {
"AudioAnalyser": grpc.insecure_channel("[::]:50051"), "AudioAnalyser": grpc.insecure_channel("[::]:50051"),
"VideoAnalyser": grpc.insecure_channel("[::]:50052"), "VideoAnalyser": grpc.insecure_channel("[::]:50052"),
"TapeIrregularityClassifier": grpc.insecure_channel("[::]:50053"), "TapeIrregularityClassifier": grpc.insecure_channel("[::]:50053"),
"TapeAudioRestoration": grpc.insecure_channel("[::]:50051/tape-audio-restoration"), "TapeAudioRestoration": grpc.insecure_channel("[::]:50051"),
"Packager": grpc.insecure_channel("[::]:50051/packager"), #"Packager": grpc.insecure_channel("[::]:50051/packager"),
} }
def run(console: Console): def run(console: Console):
...@@ -18,53 +18,64 @@ def run(console: Console): ...@@ -18,53 +18,64 @@ def run(console: Console):
audio_analyser = arp_pb2_grpc.AIMStub(channels["AudioAnalyser"]) audio_analyser = arp_pb2_grpc.AIMStub(channels["AudioAnalyser"])
video_analyser = arp_pb2_grpc.AIMStub(channels["VideoAnalyser"]) video_analyser = arp_pb2_grpc.AIMStub(channels["VideoAnalyser"])
tape_irreg_classifier = arp_pb2_grpc.AIMStub(channels["TapeIrregularityClassifier"]) tape_irreg_classifier = arp_pb2_grpc.AIMStub(channels["TapeIrregularityClassifier"])
tape_audio_restoration = arp_pb2_grpc.AIMStub(channels["TapeAudioRestoration"])
request = arp_pb2.InfoRequest() request = arp_pb2.InfoRequest()
for analyser in [audio_analyser, video_analyser, tape_irreg_classifier]: # for analyser in [audio_analyser, video_analyser, tape_irreg_classifier]:
response = analyser.getInfo(request) # response = analyser.getInfo(request)
console.print("[bold]{}[/], v{}".format(response.title, response.version)) # console.print("[bold]{}[/], v{}".format(response.title, response.version))
request = arp_pb2.JobRequest( request = arp_pb2.JobRequest(
working_dir="/data", working_dir="../data",
files_name="BERIO100", files_name="BERIO100",
index=1, index=1,
) )
with console.status("[bold]Computing AudioAnalyser IrregularityFile 1...", spinner="bouncingBall"): # with console.status("[bold]Computing AudioAnalyser IrregularityFile 1...", spinner="bouncingBall"):
for result in audio_analyser.work(request): # for result in audio_analyser.work(request):
if result.status == "error": # if result.status == "error":
console.print("[bold red]Error![/] :boom:") # console.print("[bold red]Error![/] :boom:")
console.print(f"[italic red]{result.message}") # console.print(f"[italic red]{result.message}")
for channel in channels.values(): # for channel in channels.values():
channel.close() # channel.close()
exit(os.EX_SOFTWARE) # exit(os.EX_SOFTWARE)
console.print(result.message) # console.print(result.message)
request.files_name = "BERIO100.mov" # request.files_name = "BERIO100.mov"
with console.status("[bold]Computing VideoAnalyser IrregularityFiles...", spinner="bouncingBall"): # with console.status("[bold]Computing VideoAnalyser IrregularityFiles...", spinner="bouncingBall"):
for result in video_analyser.work(request): # for result in video_analyser.work(request):
if result.status == "error": # if result.status == "error":
console.print("[bold red]Error![/] :boom:") # console.print("[bold red]Error![/] :boom:")
console.print(f"[italic red]{result.message}") # console.print(f"[italic red]{result.message}")
for channel in channels.values(): # for channel in channels.values():
channel.close() # channel.close()
exit(os.EX_SOFTWARE) # exit(os.EX_SOFTWARE)
console.print(result.message) # console.print(result.message)
request.index = 2 # request.index = 2
request.files_name = "BERIO100" # request.files_name = "BERIO100"
with console.status("[bold]Computing AudioAnalyser IrregularityFile 2...", spinner="bouncingBall"): # with console.status("[bold]Computing AudioAnalyser IrregularityFile 2...", spinner="bouncingBall"):
for result in audio_analyser.work(request): # for result in audio_analyser.work(request):
if result.status == "error": # if result.status == "error":
console.print("[bold red]Error![/] :boom:") # console.print("[bold red]Error![/] :boom:")
console.print(f"[italic red]{result.message}") # console.print(f"[italic red]{result.message}")
for channel in channels.values(): # for channel in channels.values():
channel.close() # channel.close()
exit(os.EX_SOFTWARE) # exit(os.EX_SOFTWARE)
console.print(result.message) # console.print(result.message)
# with console.status("[bold]Computing TapeIrregularityClassifier...", spinner="bouncingBall"):
# for result in tape_irreg_classifier.work(request):
# if result.status == "error":
# console.print("[bold red]Error![/] :boom:")
# console.print(f"[italic red]{result.message}")
# for channel in channels.values():
# channel.close()
# exit(os.EX_SOFTWARE)
# console.print(result.message)
with console.status("[bold]Computing TapeIrregularityClassifier...", spinner="bouncingBall"): with console.status("[bold]Computing TapeAudioRestoration...", spinner="bouncingBall"):
for result in tape_irreg_classifier.work(request): for result in tape_audio_restoration.work(request):
if result.status == "error": if result.status == "error":
console.print("[bold red]Error![/] :boom:") console.print("[bold red]Error![/] :boom:")
console.print(f"[italic red]{result.message}") console.print(f"[italic red]{result.message}")
...@@ -76,6 +87,7 @@ def run(console: Console): ...@@ -76,6 +87,7 @@ def run(console: Console):
channels["AudioAnalyser"].close() channels["AudioAnalyser"].close()
channels["VideoAnalyser"].close() channels["VideoAnalyser"].close()
channels["TapeIrregularityClassifier"].close() channels["TapeIrregularityClassifier"].close()
channels["TapeAudioRestoration"].close()
console.print("[bold green]Success![/] :tada:") console.print("[bold green]Success![/] :tada:")
......
...@@ -19,6 +19,9 @@ TMP_CHANNELS_MAP = os.path.join(TMP_FOLDER, "channels_map.json") ...@@ -19,6 +19,9 @@ TMP_CHANNELS_MAP = os.path.join(TMP_FOLDER, "channels_map.json")
def calculate_offset(audio: AudioWave, video: AudioWave, interval: int = 10) -> int: def calculate_offset(audio: AudioWave, video: AudioWave, interval: int = 10) -> int:
""" """
Calculates the offset between two audio files based on their cross-correlation. Calculates the offset between two audio files based on their cross-correlation.
Since the cross-correlation is a computationally expensive operation, the audio files are resampled to 1/4 of their original sampling rate.
In addition to that, only the specified time interval (starting at 15 seconds) is used for the cross-correlation,
assuming that after that time the audio and video contain portions of the same content.
Parameters Parameters
---------- ----------
...@@ -54,6 +57,23 @@ class BitDepth(Enum): ...@@ -54,6 +57,23 @@ class BitDepth(Enum):
PCM_S32LE = "pcm_s32le" PCM_S32LE = "pcm_s32le"
def get_audio_from_video(video_src: str, samplerate: int, bit_depth: BitDepth) -> AudioWave: def get_audio_from_video(video_src: str, samplerate: int, bit_depth: BitDepth) -> AudioWave:
"""
Extracts the audio from a video file and returns it as an AudioWave object.
Parameters
----------
video_src : str
The path to the video file.
samplerate : int
The sampling rate of the audio output.
bit_depth : BitDepth
The bit depth of the audio output.
Returns
-------
AudioWave
The extracted audio. The number of channels is always 2. The audio is saved as a temporary file.
"""
# ffmpeg -i video.mov -acodec pcm_s16le -ac 2 audio.wav # ffmpeg -i video.mov -acodec pcm_s16le -ac 2 audio.wav
extracted_audio_path = os.path.join(TMP_FOLDER, 'audio.wav') extracted_audio_path = os.path.join(TMP_FOLDER, 'audio.wav')
...@@ -118,6 +138,9 @@ def merge_irreg_files( ...@@ -118,6 +138,9 @@ def merge_irreg_files(
file1: IrregularityFile, file1: IrregularityFile,
file2: IrregularityFile file2: IrregularityFile
) -> IrregularityFile: ) -> IrregularityFile:
"""
Merge two IrregularityFiles into one. The offset of the new file is the maximum of the two offsets.
"""
match file1.offset, file2.offset: match file1.offset, file2.offset:
case None, _: case None, _:
...@@ -164,19 +187,3 @@ def extract_audio_irregularities( ...@@ -164,19 +187,3 @@ def extract_audio_irregularities(
os.remove(TMP_CHANNELS_MAP) os.remove(TMP_CHANNELS_MAP)
return irreg_file return irreg_file
if __name__ == "__main__":
from rich.console import Console
console = Console()
with console.status("Reading PreservationAudioFile", spinner="dots"):
audio = AudioWave.from_file("../data/PreservationAudioFile/BERIO100.wav", bufferize=True)
with console.status("Extracting audio from PreservationAudioVisualFile", spinner="dots"):
video = get_audio_from_video("../data/PreservationAudioVisualFile/BERIO100.mov", audio.samplerate, BitDepth.PCM_S24LE)
with console.status("Calculating offset", spinner="dots"):
offset = calculate_offset(audio, video)
print(offset)
...@@ -16,8 +16,8 @@ from mpai_cae_arp.network.arp_pb2 import ( ...@@ -16,8 +16,8 @@ from mpai_cae_arp.network.arp_pb2 import (
License, License,
) )
from . import segment_finder as sf import audio_analyzer.segment_finder as sf
from . import classifier as cl import audio_analyzer.classifier as cl
info = File('config.yml', FileType.YAML).get_content() info = File('config.yml', FileType.YAML).get_content()
......
...@@ -6,7 +6,7 @@ import numpy as np ...@@ -6,7 +6,7 @@ import numpy as np
from mpai_cae_arp.audio import AudioWave from mpai_cae_arp.audio import AudioWave
from mpai_cae_arp.types.irregularity import Irregularity, IrregularityFile, Source from mpai_cae_arp.types.irregularity import Irregularity, IrregularityFile, Source
import segment_finder as sf import audio_analyzer.segment_finder as sf
def test_calculate_offset(): def test_calculate_offset():
audio = AudioWave(np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]), 24, 1, 8000) audio = AudioWave(np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]), 24, 1, 8000)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment