Commit fe415fcf authored by Matteo's avatar Matteo
Browse files

update

parent 7f2bb3a1
# Audio Analyzer
[![MPAI CAE-ARP](https://img.shields.io/badge/MPAI%20CAE--ARP-gray?style=for-the-badge&logo=AppleMusic&logoColor=cyan&link=https://mpai.community/standards/mpai-cae/about-mpai-cae/)](https://mpai.community/standards/mpai-cae/about-mpai-cae/)
Implements the Technical Specification of [MPAI CAE-ARP](https://mpai.community/standards/mpai-cae/about-mpai-cae/#Figure2) *Audio Analyser* AIM, providing:
- 2 Irregularity Files
- Audio Files
# TODO
- [x] calculate the video/audio offset
- [ ] Read the input file(s?) and generate a list of audio files
- [ ] Split each file different channels
- [ ] extract silence from each channel
- [ ] generate an irregularity for each silence found
- [ ] save the list of irregularities as an irregularity file
- [x] Split each file different channels
- [x] extract silence from each channel
- [x] generate an irregularity for each silence found
- [x] save the list of irregularities as an irregularity file
- [ ] get the irregularity file from video analyzer
- [x] get the irregularity file from video analyzer
- [ ] merge the irregularity files
- [ ] extract the audio from every irregularity
- [x] merge the irregularity files
- [x] extract the audio from every irregularity
- [ ] for each audio irregularity, make a classification
- [ ] save everything in a single irregularity file
- [x] save everything in a single irregularity file
Sample irregularityFile from Audio to Video Analyzer:
```json
......@@ -30,12 +36,12 @@ Sample irregularityFile from Audio to Video Analyzer:
{
"IrregularityID": "09859d16-3c73-4bb0-9c74-91b451e34925",
"Source": "a",
"TimeLabel": "00:00:00.000",
"TimeLabel": "00:00:02.000",
},
{
"IrregularityID": "09859d16-3c73-4bb0-9c74-91b451e34925",
"Source": "a",
"TimeLabel": "00:00:00.000",
"TimeLabel": "00:00:05.000",
}
]
}
......@@ -48,17 +54,17 @@ Sample irregularityFile from Video to Audio Analyzer:
{
"IrregularityID": "09859d16-3c73-4bb0-9c74-91b451e34925",
"Source": "v",
"TimeLabel": "00:00:00.000",
"TimeLabel": "00:00:10.000",
},
{
"IrregularityID": "09859d16-3c73-4bb0-9c74-91b451e34925",
"Source": "v",
"TimeLabel": "00:00:00.000",
"TimeLabel": "00:00:20.000",
},
{
"IrregularityID": "09859d16-3c73-4bb0-9c74-91b451e34925",
"Source": "v",
"TimeLabel": "00:00:00.000",
"TimeLabel": "00:00:30.000",
}
]
}
......
......@@ -2,6 +2,7 @@
name = "audio-analyzer"
version = "1.0.0"
description = "MPAI CAE-ARP Audio Analyser"
repository = "https://gitlab.dei.unipd.it/mpai/audio-analyzer.git"
authors = ["Matteo Spanio <dev2@audioinnova.com>"]
license = "GPLv3"
readme = "README.md"
......
......@@ -7,8 +7,8 @@ from mpai_cae_arp.types.irregularity import IrregularityFile, Source
from mpai_cae_arp.files import File, FileType
from mpai_cae_arp.io import prettify, Style
from . import segment_finder as sf
from . import classifier as cl
import audio_analyzer.segment_finder as sf
import audio_analyzer.classifier as cl
def get_args() -> tuple[str | None, str | None]:
......
......@@ -9,8 +9,8 @@ channels = {
"AudioAnalyser": grpc.insecure_channel("[::]:50051"),
"VideoAnalyser": grpc.insecure_channel("[::]:50052"),
"TapeIrregularityClassifier": grpc.insecure_channel("[::]:50053"),
"TapeAudioRestoration": grpc.insecure_channel("[::]:50051/tape-audio-restoration"),
"Packager": grpc.insecure_channel("[::]:50051/packager"),
"TapeAudioRestoration": grpc.insecure_channel("[::]:50051"),
#"Packager": grpc.insecure_channel("[::]:50051/packager"),
}
def run(console: Console):
......@@ -18,53 +18,64 @@ def run(console: Console):
audio_analyser = arp_pb2_grpc.AIMStub(channels["AudioAnalyser"])
video_analyser = arp_pb2_grpc.AIMStub(channels["VideoAnalyser"])
tape_irreg_classifier = arp_pb2_grpc.AIMStub(channels["TapeIrregularityClassifier"])
tape_audio_restoration = arp_pb2_grpc.AIMStub(channels["TapeAudioRestoration"])
request = arp_pb2.InfoRequest()
for analyser in [audio_analyser, video_analyser, tape_irreg_classifier]:
response = analyser.getInfo(request)
console.print("[bold]{}[/], v{}".format(response.title, response.version))
# for analyser in [audio_analyser, video_analyser, tape_irreg_classifier]:
# response = analyser.getInfo(request)
# console.print("[bold]{}[/], v{}".format(response.title, response.version))
request = arp_pb2.JobRequest(
working_dir="/data",
working_dir="../data",
files_name="BERIO100",
index=1,
)
with console.status("[bold]Computing AudioAnalyser IrregularityFile 1...", spinner="bouncingBall"):
for result in audio_analyser.work(request):
if result.status == "error":
console.print("[bold red]Error![/] :boom:")
console.print(f"[italic red]{result.message}")
for channel in channels.values():
channel.close()
exit(os.EX_SOFTWARE)
console.print(result.message)
# with console.status("[bold]Computing AudioAnalyser IrregularityFile 1...", spinner="bouncingBall"):
# for result in audio_analyser.work(request):
# if result.status == "error":
# console.print("[bold red]Error![/] :boom:")
# console.print(f"[italic red]{result.message}")
# for channel in channels.values():
# channel.close()
# exit(os.EX_SOFTWARE)
# console.print(result.message)
request.files_name = "BERIO100.mov"
with console.status("[bold]Computing VideoAnalyser IrregularityFiles...", spinner="bouncingBall"):
for result in video_analyser.work(request):
if result.status == "error":
console.print("[bold red]Error![/] :boom:")
console.print(f"[italic red]{result.message}")
for channel in channels.values():
channel.close()
exit(os.EX_SOFTWARE)
console.print(result.message)
# request.files_name = "BERIO100.mov"
# with console.status("[bold]Computing VideoAnalyser IrregularityFiles...", spinner="bouncingBall"):
# for result in video_analyser.work(request):
# if result.status == "error":
# console.print("[bold red]Error![/] :boom:")
# console.print(f"[italic red]{result.message}")
# for channel in channels.values():
# channel.close()
# exit(os.EX_SOFTWARE)
# console.print(result.message)
request.index = 2
request.files_name = "BERIO100"
with console.status("[bold]Computing AudioAnalyser IrregularityFile 2...", spinner="bouncingBall"):
for result in audio_analyser.work(request):
if result.status == "error":
console.print("[bold red]Error![/] :boom:")
console.print(f"[italic red]{result.message}")
for channel in channels.values():
channel.close()
exit(os.EX_SOFTWARE)
console.print(result.message)
# request.index = 2
# request.files_name = "BERIO100"
# with console.status("[bold]Computing AudioAnalyser IrregularityFile 2...", spinner="bouncingBall"):
# for result in audio_analyser.work(request):
# if result.status == "error":
# console.print("[bold red]Error![/] :boom:")
# console.print(f"[italic red]{result.message}")
# for channel in channels.values():
# channel.close()
# exit(os.EX_SOFTWARE)
# console.print(result.message)
# with console.status("[bold]Computing TapeIrregularityClassifier...", spinner="bouncingBall"):
# for result in tape_irreg_classifier.work(request):
# if result.status == "error":
# console.print("[bold red]Error![/] :boom:")
# console.print(f"[italic red]{result.message}")
# for channel in channels.values():
# channel.close()
# exit(os.EX_SOFTWARE)
# console.print(result.message)
with console.status("[bold]Computing TapeIrregularityClassifier...", spinner="bouncingBall"):
for result in tape_irreg_classifier.work(request):
with console.status("[bold]Computing TapeAudioRestoration...", spinner="bouncingBall"):
for result in tape_audio_restoration.work(request):
if result.status == "error":
console.print("[bold red]Error![/] :boom:")
console.print(f"[italic red]{result.message}")
......@@ -76,6 +87,7 @@ def run(console: Console):
channels["AudioAnalyser"].close()
channels["VideoAnalyser"].close()
channels["TapeIrregularityClassifier"].close()
channels["TapeAudioRestoration"].close()
console.print("[bold green]Success![/] :tada:")
......
......@@ -19,6 +19,9 @@ TMP_CHANNELS_MAP = os.path.join(TMP_FOLDER, "channels_map.json")
def calculate_offset(audio: AudioWave, video: AudioWave, interval: int = 10) -> int:
"""
Calculates the offset between two audio files based on their cross-correlation.
Since the cross-correlation is a computationally expensive operation, the audio files are resampled to 1/4 of their original sampling rate.
In addition to that, only the specified time interval (starting at 15 seconds) is used for the cross-correlation,
assuming that after that time the audio and video contain portions of the same content.
Parameters
----------
......@@ -54,6 +57,23 @@ class BitDepth(Enum):
PCM_S32LE = "pcm_s32le"
def get_audio_from_video(video_src: str, samplerate: int, bit_depth: BitDepth) -> AudioWave:
"""
Extracts the audio from a video file and returns it as an AudioWave object.
Parameters
----------
video_src : str
The path to the video file.
samplerate : int
The sampling rate of the audio output.
bit_depth : BitDepth
The bit depth of the audio output.
Returns
-------
AudioWave
The extracted audio. The number of channels is always 2. The audio is saved as a temporary file.
"""
# ffmpeg -i video.mov -acodec pcm_s16le -ac 2 audio.wav
extracted_audio_path = os.path.join(TMP_FOLDER, 'audio.wav')
......@@ -118,6 +138,9 @@ def merge_irreg_files(
file1: IrregularityFile,
file2: IrregularityFile
) -> IrregularityFile:
"""
Merge two IrregularityFiles into one. The offset of the new file is the maximum of the two offsets.
"""
match file1.offset, file2.offset:
case None, _:
......@@ -164,19 +187,3 @@ def extract_audio_irregularities(
os.remove(TMP_CHANNELS_MAP)
return irreg_file
if __name__ == "__main__":
from rich.console import Console
console = Console()
with console.status("Reading PreservationAudioFile", spinner="dots"):
audio = AudioWave.from_file("../data/PreservationAudioFile/BERIO100.wav", bufferize=True)
with console.status("Extracting audio from PreservationAudioVisualFile", spinner="dots"):
video = get_audio_from_video("../data/PreservationAudioVisualFile/BERIO100.mov", audio.samplerate, BitDepth.PCM_S24LE)
with console.status("Calculating offset", spinner="dots"):
offset = calculate_offset(audio, video)
print(offset)
......@@ -16,8 +16,8 @@ from mpai_cae_arp.network.arp_pb2 import (
License,
)
from . import segment_finder as sf
from . import classifier as cl
import audio_analyzer.segment_finder as sf
import audio_analyzer.classifier as cl
info = File('config.yml', FileType.YAML).get_content()
......
......@@ -6,7 +6,7 @@ import numpy as np
from mpai_cae_arp.audio import AudioWave
from mpai_cae_arp.types.irregularity import Irregularity, IrregularityFile, Source
import segment_finder as sf
import audio_analyzer.segment_finder as sf
def test_calculate_offset():
audio = AudioWave(np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]), 24, 1, 8000)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment