Commit 2c57ee0e authored by Mattia Bergagio's avatar Mattia Bergagio
Browse files

Initial commit

parents
Pipeline #35 canceled with stages
# Ubuntu 20.04
# CUDA 11.6.2
# cuDNN 8
FROM nvcr.io/nvidia/cuda:11.6.2-cudnn8-runtime-ubuntu20.04
ENV TZ='Europe/Rome'
ENV BASE_FOLDER='/DIARIZ'
ENV LOGS_FOLDER='/LOGS'
ENV APP_USER='devuser'
ARG GIT_NAME
ARG GIT_TOKEN
RUN apt-get update --fix-missing
RUN apt-get upgrade -y
# libsndfile1 fixes error
# OSError: sndfile library not found
RUN \
apt-get update && \
DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \
python3.8 \
python3-dev \
python3-pip \
tzdata \
libsndfile1 \
ffmpeg \
git \
# TODO Other packets common to all images go in here
&& \
apt-get clean && \
rm -rf /var/lib/apt/lists/* && \
ln -snf "/usr/share/zoneinfo/$TZ" '/etc/localtime' && \
echo "$TZ" > '/etc/timezone'
RUN useradd -m "$APP_USER" && \
mkdir -p "$BASE_FOLDER" && \
chown -R "${APP_USER}:${APP_USER}" "$BASE_FOLDER" && \
mkdir -p "$LOGS_FOLDER" && \
chown -R "${APP_USER}:${APP_USER}" "$LOGS_FOLDER"
USER "$APP_USER"
WORKDIR "$BASE_FOLDER"
ENV PATH="${PATH}:/root/.local/bin:/home/${APP_USER}/.local/bin"
ENV PYTHONPATH="${PYTHONPATH}:${BASE_FOLDER}"
COPY --chown="${APP_USER}:${APP_USER}" /requirements.txt ./requirements.txt
RUN python3 -m pip install --no-cache-dir -r requirements.txt
WORKDIR "$BASE_FOLDER"/src
RUN git clone https://${GIT_NAME}:${GIT_TOKEN}@gitlab.eurixgroup.com/mpai/common_module.git
RUN pwd
RUN ls
WORKDIR ..
COPY --chown="${APP_USER}:${APP_USER}" /src ./src
CMD ["python3.8", "src/main.py"]
\ No newline at end of file
```
cd $PATH_SHARED
mkdir models
cd models
mkdir mmc_aus
cd mmc_aus
mkdir speechbrain
cd speechbrain
apt install git
apt install git-lfs
git lfs install
git clone https://huggingface.co/speechbrain/spkrec-ecapa-voxceleb
# this creates folder 'spkrec-ecapa-voxceleb'
# rename label_encoder.txt as label_encoder.ckpt
mv spkrec-ecapa-voxceleb/label_encoder.txt spkrec-ecapa-voxceleb/label_encoder.ckpt
# cp spkrec-ecapa-voxceleb to folder speechbrain
cp -rf spkrec-ecapa-voxceleb/* .
# segmentation: pyannote/segmentation@2022.07
cd $PATH_SHARED
cd models
cd mmc_aus
git clone https://huggingface.co/pyannote/segmentation
# this creates folder 'segmentation'
```
pipeline:
name: pyannote.audio.pipelines.SpeakerDiarization
params:
clustering: AgglomerativeClustering
embedding: voxceleb_path
embedding_batch_size: 32
embedding_exclude_overlap: true
segmentation: bin_path
segmentation_batch_size: 32
params:
clustering:
method: centroid
min_cluster_size: 15
threshold: 0.7153814381597874
segmentation:
min_duration_off: 0.5817029604921046
threshold: 0.4442333667381752
\ No newline at end of file
aiohttp==3.8.5
aiosignal==1.3.1
alembic==1.12.0
annotated-types==0.5.0
antlr4-python3-runtime==4.9.3
anyio==3.7.1
appdirs==1.4.4
arrow==1.2.3
asteroid-filterbanks==0.4.0
async-timeout==4.0.3
attrs==23.1.0
audioread==3.0.1
backoff==2.2.1
beautifulsoup4==4.12.2
blessed==1.20.0
certifi==2022.12.7
cffi==1.15.1
charset-normalizer==3.1.0
click==8.1.7
cmaes==0.10.0
cmake==3.27.5
colorama==0.4.6
coloredlogs==15.0.1
colorlog==6.7.0
croniter==1.4.1
dateutils==0.6.12
decorator==5.1.1
deepdiff==6.5.0
docopt==0.6.2
einops==0.6.1
exceptiongroup==1.1.3
fastapi==0.103.2
fastjsonschema==2.16.3
ffmpeg-python==0.2.0
filelock==3.11.0
flatbuffers==23.5.26
frozenlist==1.4.0
fsspec==2023.9.2
future==0.18.3
greenlet==2.0.2
h11==0.14.0
huggingface-hub==0.13.0
humanfriendly==10.0
HyperPyYAML==1.2.2
idna==3.4
importlib-metadata==7.0.0
importlib-resources==6.1.1
inquirer==3.1.3
itsdangerous==2.1.2
Jinja2==3.1.2
joblib==1.3.2
jsonpickle==3.0.1
jsonschema==4.3.3
julius==0.2.7
kiwisolver==1.4.5
lazy-loader==0.3
libclang==16.0.0
librosa==0.10.0.post2
lightning-cloud==0.5.39
lightning-utilities==0.9.0
lightning==2.0.9.post0
lit==17.0.1
llvmlite==0.41.0
Mako==1.2.4
markdown-it-py==3.0.0
MarkupSafe==2.1.2
matplotlib==3.7.4
mdurl==0.1.2
mpmath==1.3.0
msgpack==1.0.7
multidict==6.0.4
networkx==3.1
numba==0.58.0
numpy==1.24.2
nvidia-cublas-cu11==11.10.3.66
nvidia-cuda-cupti-cu11==11.7.101
nvidia-cuda-nvrtc-cu11==11.7.99
nvidia-cuda-runtime-cu11==11.7.99
nvidia-cudnn-cu11==8.5.0.96
nvidia-cufft-cu11==10.9.0.58
nvidia-curand-cu11==10.2.10.91
nvidia-cusolver-cu11==11.4.0.1
nvidia-cusparse-cu11==11.7.4.91
nvidia-nccl-cu11==2.14.3
nvidia-nvtx-cu11==11.7.91
omegaconf==2.3.0
onnxruntime-gpu==1.16.0
optuna==3.3.0
ordered-set==4.1.0
packaging==23.1
pika==1.3.1
Pillow==10.0.1
platformdirs==3.10.0
pooch==1.6.0
primePy==1.3
protobuf==4.24.3
psutil==5.9.5
pyannote.audio==3.0.1
pyannote.core==5.0.0
pyannote.database==5.0.1
pyannote.metrics==3.2.1
pyannote.pipeline==3.0.1
pycparser==2.21
pydantic-core==2.4.0
pydantic==2.1.1
Pygments==2.16.1
PyJWT==2.8.0
pyparsing==3.1.1
pyrsistent==0.19.3
python-dateutil==2.8.2
python-editor==1.0.4
python-multipart==0.0.6
pytorch-lightning==2.0.9.post0
pytorch-metric-learning==2.3.0
pytz==2023.3.post1
PyYAML==6.0
readchar==4.0.5
regex==2023.3.23
requests==2.28.2
rich==13.5.3
ruamel.yaml.clib==0.2.7
ruamel.yaml==0.17.33
scikit-learn==1.3.1
scipy==1.10.1
semver==3.0.1
sentencepiece==0.1.99
shellingham==1.5.3
six==1.16.0
sniffio==1.3.0
sortedcontainers==2.4.0
soundfile==0.12.1
soupsieve==2.5
soxr==0.3.4
speechbrain==0.5.15
SQLAlchemy==2.0.21
starlette==0.27.0
starsessions==1.3.0
sympy==1.11.1
tabulate==0.9.0
tensorboardX==2.6.2.2
threadpoolctl==3.2.0
tokenizers==0.13.2
torch-audiomentations==0.11.0
torch-pitch-shift==1.2.4
torch==2.0.1
torchaudio==2.0.2
torchmetrics==1.2.0
torchsummary==1.5.1
tqdm==4.65.0
traitlets==5.10.1
transformers==4.26.1
triton==2.0.0
typeguard==4.1.5
typer==0.9.0
typing_extensions==4.8.0
tzdata==2023.3
uri==2.0.1
urllib3==1.26.15
uvicorn==0.23.2
wcwidth==0.2.7
websocket-client==1.6.3
websockets==11.0.3
wincertstore==0.2
yarl==1.9.2
zipp==3.17.0
import os
from pathlib import Path
import subprocess
from typing import Dict, List
import torch
from pyannote.audio import core, Pipeline
from typeguard import typechecked
import util_funs
try:
from common_utils.logger import create_logger
except ModuleNotFoundError:
from common_module.common_utils.logger import create_logger
try:
from common_utils.saves import save
except ModuleNotFoundError:
from common_module.common_utils.saves import save
try:
from common_utils.gpus_torch import pick_best_gpu
except ModuleNotFoundError:
from common_module.common_utils.gpus_torch import pick_best_gpu
LOGGER = create_logger(__name__)
def mk_pipeline(model_dir: str, conf_dir: str) -> Pipeline:
"""
model_dir: dir the model is saved to.
conf_dir: dir diar_conf.yaml is saved to.
Makes mmc_aus pipeline.
"""
hf_token = os.environ["HUGGINGFACE_TOKEN"]
LOGGER.debug(f"pwd = {os.getcwd()}")
# SpeechBrain_EncoderClassifier uses CACHE_DIR
# https://github.com/pyannote/pyannote-audio/blob/a810a5a53ac6e241606fd4ec822ea842f4c0a9b5/pyannote/audio/pipelines/speaker_verification.py#L262
# CACHE_DIR is set here:
# https://github.com/pyannote/pyannote-audio/blob/a810a5a53ac6e241606fd4ec822ea842f4c0a9b5/pyannote/audio/core/model.py#L56
os.environ["PYANNOTE_CACHE"] = model_dir
LOGGER.debug(f'{os.environ["PYANNOTE_CACHE"]=}')
LOGGER.debug(f"def: {core.model.CACHE_DIR=}")
core.model.CACHE_DIR = os.environ["PYANNOTE_CACHE"]
LOGGER.debug(f"upd: {core.model.CACHE_DIR=}")
# path of pytorch_model.bin
bin_path = os.path.join(model_dir, "segmentation", "pytorch_model.bin")
# path of speechbrain/spkrec-ecapa-voxceleb
voxceleb_path = os.path.join(model_dir, "speechbrain", "spkrec-ecapa-voxceleb")
# copies ner
LOGGER.debug("loading model from local dir...")
# TODO
# upgrade to pyannote/speaker-diarization-3.0
# speaker_mmc_aus = Pipeline.from_pretrained(
# "pyannote/speaker-diarization@2.1", use_auth_token=hf_token
# )
# replace bin_path in YML
util_funs.replace_str_in_fil(
os.path.join(conf_dir, "diar_conf.yaml"),
os.path.join(conf_dir, "tmp_diar_conf.yaml"),
"bin_path",
bin_path,
)
# replace voxceleb_path in YML
util_funs.replace_str_in_fil(
os.path.join(conf_dir, "tmp_diar_conf.yaml"),
os.path.join(conf_dir, "new_diar_conf.yaml"),
"voxceleb_path",
voxceleb_path,
)
# print YML
with open(os.path.join(conf_dir, "new_diar_conf.yaml"), "r") as ymlr:
for ymllin in ymlr:
LOGGER.debug(ymllin)
speaker_mmc_aus = Pipeline.from_pretrained(
os.path.join(conf_dir, "new_diar_conf.yaml"), use_auth_token=hf_token
)
# copies ner
try:
# pick best GPU
dev_idx = pick_best_gpu()
LOGGER.debug(f"using best GPU = {dev_idx}")
device = torch.device(f"cuda:{dev_idx}")
# push the pipeline to GPU
speaker_mmc_aus = speaker_mmc_aus.to(device)
except RuntimeError as gpu_err:
LOGGER.debug(f"Unexpected {gpu_err=}, {type(gpu_err)=}")
LOGGER.debug("using CPU")
device = torch.device("cpu")
# push the pipeline to CPU
speaker_mmc_aus = speaker_mmc_aus.to(device)
return speaker_mmc_aus
@typechecked
def diarize(
audio: str, model_dir: str, conf_dir: str, out_path: str
) -> Dict[str, List]:
"""
model_dir: dir the model is saved to.
conf_dir: dir diar_conf.yaml is saved to.
Diarizes audio.
"""
diar_pipeline = mk_pipeline(model_dir, conf_dir)
# num_speakers, min_speakers, max_speakers
# can be set if they are known
who_speaks_when = diar_pipeline(
audio,
num_speakers=None,
min_speakers=None,
max_speakers=None,
)
speakers = []
for segment, _, speaker in who_speaks_when.itertracks(yield_label=True):
speakers.append({"start": segment.start, "end": segment.end, "label": speaker})
span = segment.end - segment.start
diar_segm_path = os.path.join(out_path, f"split.{len(speakers) - 1}.wav")
ffmpeg_split = [
"ffmpeg",
"-ss",
str(segment.start),
"-i",
audio,
"-t",
str(span),
"-c",
"copy",
diar_segm_path,
]
try:
subprocess.check_output(ffmpeg_split)
except subprocess.CalledProcessError as err:
raise RuntimeError(f"FFMPEG error {str(err)}")
return {"voices": speakers}
@typechecked
def diarize_save(
audio: str, out_json: str, out_path: str, model_dir: str, conf_dir: str
) -> None:
"""
out_json: JSON the annotation is saved to.
model_dir: dir the model is saved to.
conf_dir: dir diar_conf.yaml is saved to.
Diarizes audio.
Saves output.
"""
LOGGER.info(f"diarizing {audio}...")
who_speaks_when = diarize(audio, model_dir, conf_dir, out_path)
LOGGER.info(who_speaks_when)
save(who_speaks_when, out_json)
@typechecked
def dl_diarize_save(
message_body: dict,
out_json: str,
out_path: str,
base_dir: str,
model_dir: str,
) -> bool:
"""
message_body: msg body.
out_json: JSON the output is saved to.
out_path: unused input.
base_dir: base dir.
model_dir: dir the model is saved to.
Downloads audio.
Diarizes audio.
Saves output.
Returns 0 if success.
"""
# copies landmark
ret_code = -1
# access audio
aud_path = os.path.join(
base_dir,
message_body["programme"]["uid"],
message_body["programme"]["external_id"],
)
if Path(f"{aud_path}.wav").is_file():
diarize_save(
f"{aud_path}.wav",
out_json,
out_path,
model_dir,
message_body["programme"]["conf_dir"],
)
# success
ret_code = 0
else:
# wav is not available
# out_path = None
LOGGER.error("Wav is not available")
# failure
ret_code = 2
# copies landmark
LOGGER.debug(f"return code: {ret_code}")
if ret_code != 0:
return False
else:
# success if ret_code = 0
return True
from run_funs import run
try:
from common_utils import adapter
except ModuleNotFoundError:
from common_module.common_utils import adapter
try:
from common_utils import rabbitmq
except ModuleNotFoundError:
from common_module.common_utils import rabbitmq
if __name__ == "__main__":
Worker = rabbitmq.Worker()
Worker.register_callback(queue="queue_module_mmc_aus", callback=run)
this_adapter = adapter.Adapter(Worker)
this_adapter.start_listening()
import os
from pathlib import Path
from typeguard import typechecked
import diariz_funs
try:
from common_utils import msg_builder, rabbitmq
except ModuleNotFoundError:
from common_module.common_utils import msg_builder, rabbitmq
# TODO
# get vid_dir from input msg
base_dir = os.path.join(os.environ["AI_FW_DIR"], "vids")
# TODO
# get model_dir from input msg
model_dir = os.path.join(os.environ["AI_FW_DIR"], "models", "mmc_aus")
Path(model_dir).mkdir(parents=True, exist_ok=True)
# TODO
# get conf_dir from input msg
conf_dir = os.path.join(os.environ["AI_FW_DIR"], "confs", "mmc_aus")
@typechecked
def run(message_body: dict, worker: rabbitmq.Worker) -> bool:
defs = {
# module name in msg
"mod_name": "mmc_aus",
# metadata key in output msg
"metadata_key": "segments",
# metadata type in output msg
"metadata_type": "mmc_aus",
# main key in output JSON
"out_json_key": "voices",
# error msg if output JSON is not found
"not_found_msg": "cannot diarize!",
# error msg if input msg is invalid
"invalid_msg": "External ID/UID/Application Required!",
}
if "programme" in message_body:
if "external_id" in message_body["programme"]:
# name of output JSON
defs["out_json"] = f'{message_body["programme"]["external_id"]}.json'
extras = {"programme": {"module": defs["mod_name"]}}
if "programme" in message_body:
for k in msg_builder.handed_over_keys():
if k in message_body["programme"]:
extras["programme"][k] = message_body["programme"][k]
message_body["programme"]["conf_dir"] = conf_dir
return msg_builder.build_msg(
message_body,
worker,
"mmc_aus",
diariz_funs.dl_diarize_save,
msg_builder.validate_message,
["external_id", "application", "uid"],
base_dir,
model_dir,
defs,
extras,
)
from typeguard import typechecked
@typechecked
def replace_str_in_fil(old_fil: str, new_fil: str, tgt: str, repl: str):
"""
old_fil: old file.
tgt: str to be replaced.
repl: replacement.
new_fil: upd'ed file.
Replaces tgt in old file.
Saves upd'ed file.
"""
# read old file
with open(old_fil, "r") as f:
lines = f.read()
# replace target str
new_lines = lines.replace(tgt, repl)
# save upd'ed file
with open(new_fil, "w") as f:
f.write(new_lines)
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment