segment_finder.py 6.51 KB
Newer Older
Matteo's avatar
update    
Matteo committed
1
from enum import Enum
Matteo's avatar
update    
Matteo committed
2
3
import os
import tempfile
Matteo's avatar
update  
Matteo committed
4
5
from uuid import uuid4

matteospanio's avatar
update    
matteospanio committed
6
import numpy as np
Matteo's avatar
update    
Matteo committed
7
8
import ffmpeg
import scipy
matteospanio's avatar
update    
matteospanio committed
9

Matteo's avatar
update  
Matteo committed
10
from mpai_cae_arp.audio import AudioWave, Noise
Matteo's avatar
update    
Matteo committed
11
from mpai_cae_arp.files import File, FileType
Matteo's avatar
update  
Matteo committed
12
from mpai_cae_arp.types.irregularity import Irregularity, IrregularityFile, Source
Matteo's avatar
update    
Matteo committed
13
from mpai_cae_arp.time import frames_to_seconds, seconds_to_frames, seconds_to_string, time_to_seconds
Matteo's avatar
update  
Matteo committed
14

Matteo's avatar
update    
Matteo committed
15
16
17
TMP_FOLDER = os.path.join(tempfile.gettempdir(), "mpai")
os.makedirs(TMP_FOLDER, exist_ok=True)
TMP_CHANNELS_MAP = os.path.join(TMP_FOLDER, "channels_map.json")
Matteo's avatar
update  
Matteo committed
18

Matteo's avatar
update    
Matteo committed
19
def calculate_offset(audio: AudioWave, video: AudioWave, interval: int = 10) -> int:
Matteo's avatar
update  
Matteo committed
20
21
    """
    Calculates the offset between two audio files based on their cross-correlation.
Matteo's avatar
update    
Matteo committed
22
23
24
    Since the cross-correlation is a computationally expensive operation, the audio files are resampled to 1/4 of their original sampling rate.
    In addition to that, only the specified time interval (starting at 15 seconds) is used for the cross-correlation,
    assuming that after that time the audio and video contain portions of the same content.
Matteo's avatar
update  
Matteo committed
25
26
27
28
29
30
31

    Parameters
    ----------
    audio : AudioWave
        The audio file to be used as reference.
    video : AudioWave
        The audio file to be used as target.
Matteo's avatar
update    
Matteo committed
32
33
34
    interval : int, optional
        The interval in seconds to be used for the cross-correlation, by default 10

Matteo's avatar
update  
Matteo committed
35
36
    Returns
    -------
Matteo's avatar
update    
Matteo committed
37
38
    int
        The offset in milliseconds.
Matteo's avatar
update  
Matteo committed
39
    """
Matteo's avatar
update    
Matteo committed
40
41
42
43
44
45
46
47
48
    audio = audio.get_channel(0)[(audio.samplerate*15):(audio.samplerate*(15+interval))]
    video = video.get_channel(0)[(audio.samplerate*15):(video.samplerate*(15+interval))]

    resampled_audio = audio.array[::4]
    resampled_video = video.array[::4]

    corr = scipy.signal.correlate(resampled_audio, resampled_video, mode="full", method="auto")
    offset = np.argmax(corr) - len(resampled_audio)
    offset_ms = offset / (audio.samplerate / 4) * 1000
Matteo's avatar
update  
Matteo committed
49

Matteo's avatar
update    
Matteo committed
50
    return round(offset_ms)
matteospanio's avatar
update    
matteospanio committed
51
52


Matteo's avatar
update    
Matteo committed
53
54
55
56
57
58
59
class BitDepth(Enum):
    PCM_S8 = "pcm_s8"
    PCM_S16LE = "pcm_s16le"
    PCM_S24LE = "pcm_s24le"
    PCM_S32LE = "pcm_s32le"

def get_audio_from_video(video_src: str, samplerate: int, bit_depth: BitDepth) -> AudioWave:
Matteo's avatar
update    
Matteo committed
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
    """
    Extracts the audio from a video file and returns it as an AudioWave object.
    
    Parameters
    ----------
    video_src : str
        The path to the video file.
    samplerate : int
        The sampling rate of the audio output.
    bit_depth : BitDepth
        The bit depth of the audio output.
        
    Returns
    -------
    AudioWave
        The extracted audio. The number of channels is always 2. The audio is saved as a temporary file.
    """
Matteo's avatar
update  
Matteo committed
77

matteospanio's avatar
update    
matteospanio committed
78
    # ffmpeg -i video.mov -acodec pcm_s16le -ac 2 audio.wav
Matteo's avatar
update    
Matteo committed
79
80
81
82
83
84
85
86
87
    extracted_audio_path = os.path.join(TMP_FOLDER, 'audio.wav')
    
    in_file = ffmpeg.input(video_src)
    out_file = ffmpeg.output(in_file.audio, extracted_audio_path, ac=2, ar=samplerate, acodec=bit_depth.value)
    ffmpeg.run(out_file, quiet=True, overwrite_output=True)

    rate, data = scipy.io.wavfile.read(extracted_audio_path)

    return AudioWave(data, 24, 2, rate)
Matteo's avatar
update  
Matteo committed
88
89


Matteo's avatar
update    
Matteo committed
90
def get_irregularities_from_audio(audio_src: AudioWave) -> list[Irregularity]:
Matteo's avatar
update  
Matteo committed
91
    input_channels: list[AudioWave] = []
Matteo's avatar
update    
Matteo committed
92
93
94
95
96
97

    if audio_src.channels > 1:
        for channel in range(audio_src.channels):
            input_channels.append(audio_src.get_channel(channel))
    else:
        input_channels.append(audio_src)
Matteo's avatar
update    
Matteo committed
98
99

    channels_map = {}
Matteo's avatar
update  
Matteo committed
100
101

    irreg_list: list[Irregularity] = []
Matteo's avatar
update    
Matteo committed
102
    for idx, audio in enumerate(input_channels):
Matteo's avatar
update  
Matteo committed
103
104
105
106
107
108
        for _, noise_list in audio.get_silence_slices([
            Noise("A", -50, -63),
            Noise("B", -63, -69),
            Noise("C", -69, -72)],
            length=500).items():
            for start, _ in noise_list:
Matteo's avatar
update    
Matteo committed
109
                id = uuid4()
Matteo's avatar
update  
Matteo committed
110
111
                irreg_list.append(
                    Irregularity(
Matteo's avatar
update    
Matteo committed
112
                        irregularity_ID=id,
Matteo's avatar
update  
Matteo committed
113
                        source=Source.AUDIO,
Matteo's avatar
update    
Matteo committed
114
                        time_label= seconds_to_string(frames_to_seconds(start, audio.samplerate))
Matteo's avatar
update  
Matteo committed
115
116
                    )
                )
Matteo's avatar
update    
Matteo committed
117
                channels_map[str(id)] = idx
Matteo's avatar
update    
Matteo committed
118
119

    File(TMP_CHANNELS_MAP, FileType.JSON).write_content(channels_map)
Matteo's avatar
update  
Matteo committed
120
121
122

    return irreg_list

Matteo's avatar
update    
Matteo committed
123
124
125
126

def create_irreg_file(audio_src: str, video_src: str) -> IrregularityFile:

    audio = AudioWave.from_file(audio_src, bufferize=True)
Matteo's avatar
update    
Matteo committed
127
    video = get_audio_from_video(video_src, audio.samplerate, BitDepth.PCM_S24LE)
matteospanio's avatar
update    
matteospanio committed
128

Matteo's avatar
update    
Matteo committed
129
    offset = calculate_offset(audio, video)
Matteo's avatar
update    
Matteo committed
130
131
132
133
134
    irregularities = get_irregularities_from_audio(audio)

    irregularities.sort(key=lambda x: time_to_seconds(x.time_label))
    
    return IrregularityFile(irregularities=irregularities, offset=offset)
Matteo's avatar
update    
Matteo committed
135
136
137
138


def merge_irreg_files(
    file1: IrregularityFile,
Matteo's avatar
update    
Matteo committed
139
140
    file2: IrregularityFile
) -> IrregularityFile:
Matteo's avatar
update    
Matteo committed
141
142
143
    """ 
    Merge two IrregularityFiles into one. The offset of the new file is the maximum of the two offsets.
    """
Matteo's avatar
update    
Matteo committed
144
145
146
147
148
149
150
151

    match file1.offset, file2.offset:
        case None, _:
            offset=file2.offset
        case _, None:
            offset=file1.offset
        case _, _:
            offset=max(file1.offset, file2.offset)
Matteo's avatar
update    
Matteo committed
152

Matteo's avatar
update    
Matteo committed
153
154
155
156
157
    irregularities = file1.irregularities + file2.irregularities
    irregularities.sort(key=lambda x: time_to_seconds(x.time_label))

    new_file = IrregularityFile(
        irregularities=irregularities, offset=offset)
Matteo's avatar
update    
Matteo committed
158
159
160
161
162

    return new_file


def extract_audio_irregularities(
Matteo's avatar
update    
Matteo committed
163
    audio_src: str,
Matteo's avatar
update    
Matteo committed
164
    irreg_file: IrregularityFile,
Matteo's avatar
update    
Matteo committed
165
    path: str
Matteo's avatar
update    
Matteo committed
166
) -> IrregularityFile:
Matteo's avatar
update    
Matteo committed
167

Matteo's avatar
update    
Matteo committed
168
    channels_map = File(TMP_CHANNELS_MAP, FileType.JSON).get_content()
Matteo's avatar
update    
Matteo committed
169
170
171
    os.makedirs(f"{path}/AudioBlocks", exist_ok=True)

    audio = AudioWave.from_file(audio_src, bufferize=True)
Matteo's avatar
update    
Matteo committed
172
    for irreg in irreg_file.irregularities:
Matteo's avatar
update    
Matteo committed
173
174
        if channels_map.get(str(irreg.irregularity_ID)) is None:
            audio[seconds_to_frames(
Matteo's avatar
update    
Matteo committed
175
                        time_to_seconds(irreg.time_label), audio.samplerate
Matteo's avatar
update    
Matteo committed
176
                    ):seconds_to_frames(
Matteo's avatar
update    
Matteo committed
177
                        time_to_seconds(irreg.time_label), audio.samplerate)+audio.samplerate//2]\
Matteo's avatar
update    
Matteo committed
178
179
180
181
182
183
184
185
                .save(f"{path}/AudioBlocks/{irreg.irregularity_ID}.wav")
        else:
            audio.get_channel(channels_map[str(irreg.irregularity_ID)])[
                    seconds_to_frames(
                        time_to_seconds(irreg.time_label), audio.samplerate
                    ):seconds_to_frames(
                        time_to_seconds(irreg.time_label), audio.samplerate)+audio.samplerate//2]\
                .save(f"{path}/AudioBlocks/{irreg.irregularity_ID}.wav")
Matteo's avatar
update    
Matteo committed
186
        irreg.audio_block_URI = f"{path}/AudioBlocks/{irreg.irregularity_ID}.wav"
Matteo's avatar
update    
Matteo committed
187
    os.remove(TMP_CHANNELS_MAP)
Matteo's avatar
update    
Matteo committed
188
189

    return irreg_file