segment_finder.py 6.2 KB
Newer Older
Matteo's avatar
update    
Matteo committed
1
from enum import Enum
Matteo's avatar
update    
Matteo committed
2
3
import os
import tempfile
Matteo's avatar
update  
Matteo committed
4
5
from uuid import uuid4

matteospanio's avatar
update    
matteospanio committed
6
import numpy as np
Matteo's avatar
update    
Matteo committed
7
8
import ffmpeg
import scipy
matteospanio's avatar
update    
matteospanio committed
9

Matteo's avatar
update  
Matteo committed
10
from mpai_cae_arp.audio import AudioWave, Noise
Matteo's avatar
update    
Matteo committed
11
from mpai_cae_arp.files import File, FileType
Matteo's avatar
update  
Matteo committed
12
from mpai_cae_arp.types.irregularity import Irregularity, IrregularityFile, Source
Matteo's avatar
update    
Matteo committed
13
from mpai_cae_arp.time import frames_to_seconds, seconds_to_frames, seconds_to_string, time_to_seconds
Matteo's avatar
update  
Matteo committed
14

Matteo's avatar
update    
Matteo committed
15
16
17
TMP_FOLDER = os.path.join(tempfile.gettempdir(), "mpai")
os.makedirs(TMP_FOLDER, exist_ok=True)
TMP_CHANNELS_MAP = os.path.join(TMP_FOLDER, "channels_map.json")
Matteo's avatar
update  
Matteo committed
18

Matteo's avatar
update    
Matteo committed
19
def calculate_offset(audio: AudioWave, video: AudioWave, interval: int = 10) -> int:
Matteo's avatar
update  
Matteo committed
20
21
22
23
24
25
26
27
28
    """
    Calculates the offset between two audio files based on their cross-correlation.

    Parameters
    ----------
    audio : AudioWave
        The audio file to be used as reference.
    video : AudioWave
        The audio file to be used as target.
Matteo's avatar
update    
Matteo committed
29
30
31
    interval : int, optional
        The interval in seconds to be used for the cross-correlation, by default 10

Matteo's avatar
update  
Matteo committed
32
33
    Returns
    -------
Matteo's avatar
update    
Matteo committed
34
35
    int
        The offset in milliseconds.
Matteo's avatar
update  
Matteo committed
36
    """
Matteo's avatar
update    
Matteo committed
37
38
39
40
41
42
43
44
45
    audio = audio.get_channel(0)[(audio.samplerate*15):(audio.samplerate*(15+interval))]
    video = video.get_channel(0)[(audio.samplerate*15):(video.samplerate*(15+interval))]

    resampled_audio = audio.array[::4]
    resampled_video = video.array[::4]

    corr = scipy.signal.correlate(resampled_audio, resampled_video, mode="full", method="auto")
    offset = np.argmax(corr) - len(resampled_audio)
    offset_ms = offset / (audio.samplerate / 4) * 1000
Matteo's avatar
update  
Matteo committed
46

Matteo's avatar
update    
Matteo committed
47
    return round(offset_ms)
matteospanio's avatar
update    
matteospanio committed
48
49


Matteo's avatar
update    
Matteo committed
50
51
52
53
54
55
56
class BitDepth(Enum):
    PCM_S8 = "pcm_s8"
    PCM_S16LE = "pcm_s16le"
    PCM_S24LE = "pcm_s24le"
    PCM_S32LE = "pcm_s32le"

def get_audio_from_video(video_src: str, samplerate: int, bit_depth: BitDepth) -> AudioWave:
Matteo's avatar
update  
Matteo committed
57

matteospanio's avatar
update    
matteospanio committed
58
    # ffmpeg -i video.mov -acodec pcm_s16le -ac 2 audio.wav
Matteo's avatar
update    
Matteo committed
59
60
61
62
63
64
65
66
67
    extracted_audio_path = os.path.join(TMP_FOLDER, 'audio.wav')
    
    in_file = ffmpeg.input(video_src)
    out_file = ffmpeg.output(in_file.audio, extracted_audio_path, ac=2, ar=samplerate, acodec=bit_depth.value)
    ffmpeg.run(out_file, quiet=True, overwrite_output=True)

    rate, data = scipy.io.wavfile.read(extracted_audio_path)

    return AudioWave(data, 24, 2, rate)
Matteo's avatar
update  
Matteo committed
68
69


Matteo's avatar
update    
Matteo committed
70
def get_irregularities_from_audio(audio_src: AudioWave) -> list[Irregularity]:
Matteo's avatar
update  
Matteo committed
71
    input_channels: list[AudioWave] = []
Matteo's avatar
update    
Matteo committed
72
73
74
75
76
77

    if audio_src.channels > 1:
        for channel in range(audio_src.channels):
            input_channels.append(audio_src.get_channel(channel))
    else:
        input_channels.append(audio_src)
Matteo's avatar
update    
Matteo committed
78
79

    channels_map = {}
Matteo's avatar
update  
Matteo committed
80
81

    irreg_list: list[Irregularity] = []
Matteo's avatar
update    
Matteo committed
82
    for idx, audio in enumerate(input_channels):
Matteo's avatar
update  
Matteo committed
83
84
85
86
87
88
        for _, noise_list in audio.get_silence_slices([
            Noise("A", -50, -63),
            Noise("B", -63, -69),
            Noise("C", -69, -72)],
            length=500).items():
            for start, _ in noise_list:
Matteo's avatar
update    
Matteo committed
89
                id = uuid4()
Matteo's avatar
update  
Matteo committed
90
91
                irreg_list.append(
                    Irregularity(
Matteo's avatar
update    
Matteo committed
92
                        irregularity_ID=id,
Matteo's avatar
update  
Matteo committed
93
                        source=Source.AUDIO,
Matteo's avatar
update    
Matteo committed
94
                        time_label= seconds_to_string(frames_to_seconds(start, audio.samplerate))
Matteo's avatar
update  
Matteo committed
95
96
                    )
                )
Matteo's avatar
update    
Matteo committed
97
                channels_map[str(id)] = idx
Matteo's avatar
update    
Matteo committed
98
99

    File(TMP_CHANNELS_MAP, FileType.JSON).write_content(channels_map)
Matteo's avatar
update  
Matteo committed
100
101
102

    return irreg_list

Matteo's avatar
update    
Matteo committed
103
104
105
106

def create_irreg_file(audio_src: str, video_src: str) -> IrregularityFile:

    audio = AudioWave.from_file(audio_src, bufferize=True)
Matteo's avatar
update    
Matteo committed
107
    video = get_audio_from_video(video_src, audio.samplerate, BitDepth.PCM_S24LE)
matteospanio's avatar
update    
matteospanio committed
108

Matteo's avatar
update    
Matteo committed
109
    offset = calculate_offset(audio, video)
Matteo's avatar
update    
Matteo committed
110
111
112
113
114
    irregularities = get_irregularities_from_audio(audio)

    irregularities.sort(key=lambda x: time_to_seconds(x.time_label))
    
    return IrregularityFile(irregularities=irregularities, offset=offset)
Matteo's avatar
update    
Matteo committed
115
116
117
118


def merge_irreg_files(
    file1: IrregularityFile,
Matteo's avatar
update    
Matteo committed
119
120
121
122
123
124
125
126
127
128
    file2: IrregularityFile
) -> IrregularityFile:

    match file1.offset, file2.offset:
        case None, _:
            offset=file2.offset
        case _, None:
            offset=file1.offset
        case _, _:
            offset=max(file1.offset, file2.offset)
Matteo's avatar
update    
Matteo committed
129

Matteo's avatar
update    
Matteo committed
130
131
132
133
134
    irregularities = file1.irregularities + file2.irregularities
    irregularities.sort(key=lambda x: time_to_seconds(x.time_label))

    new_file = IrregularityFile(
        irregularities=irregularities, offset=offset)
Matteo's avatar
update    
Matteo committed
135
136
137
138
139

    return new_file


def extract_audio_irregularities(
Matteo's avatar
update    
Matteo committed
140
    audio_src: str,
Matteo's avatar
update    
Matteo committed
141
    irreg_file: IrregularityFile,
Matteo's avatar
update    
Matteo committed
142
    path: str
Matteo's avatar
update    
Matteo committed
143
) -> IrregularityFile:
Matteo's avatar
update    
Matteo committed
144

Matteo's avatar
update    
Matteo committed
145
    channels_map = File(TMP_CHANNELS_MAP, FileType.JSON).get_content()
Matteo's avatar
update    
Matteo committed
146
147
148
    os.makedirs(f"{path}/AudioBlocks", exist_ok=True)

    audio = AudioWave.from_file(audio_src, bufferize=True)
Matteo's avatar
update    
Matteo committed
149
    for irreg in irreg_file.irregularities:
Matteo's avatar
update    
Matteo committed
150
151
        if channels_map.get(str(irreg.irregularity_ID)) is None:
            audio[seconds_to_frames(
Matteo's avatar
update    
Matteo committed
152
                        time_to_seconds(irreg.time_label), audio.samplerate
Matteo's avatar
update    
Matteo committed
153
                    ):seconds_to_frames(
Matteo's avatar
update    
Matteo committed
154
                        time_to_seconds(irreg.time_label), audio.samplerate)+audio.samplerate//2]\
Matteo's avatar
update    
Matteo committed
155
156
157
158
159
160
161
162
                .save(f"{path}/AudioBlocks/{irreg.irregularity_ID}.wav")
        else:
            audio.get_channel(channels_map[str(irreg.irregularity_ID)])[
                    seconds_to_frames(
                        time_to_seconds(irreg.time_label), audio.samplerate
                    ):seconds_to_frames(
                        time_to_seconds(irreg.time_label), audio.samplerate)+audio.samplerate//2]\
                .save(f"{path}/AudioBlocks/{irreg.irregularity_ID}.wav")
Matteo's avatar
update    
Matteo committed
163
        irreg.audio_block_URI = f"{path}/AudioBlocks/{irreg.irregularity_ID}.wav"
Matteo's avatar
update    
Matteo committed
164
    os.remove(TMP_CHANNELS_MAP)
Matteo's avatar
update    
Matteo committed
165
166

    return irreg_file
Matteo's avatar
update    
Matteo committed
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182


if __name__ == "__main__":

    from rich.console import Console
    
    console = Console()
    
    with console.status("Reading PreservationAudioFile", spinner="dots"):
        audio = AudioWave.from_file("../data/PreservationAudioFile/BERIO100.wav", bufferize=True)
    with console.status("Extracting audio from PreservationAudioVisualFile", spinner="dots"):
        video = get_audio_from_video("../data/PreservationAudioVisualFile/BERIO100.mov", audio.samplerate, BitDepth.PCM_S24LE)
    with console.status("Calculating offset", spinner="dots"):
        offset = calculate_offset(audio, video)

    print(offset)