Commit 9caed9d6 authored by Mattia Bergagio's avatar Mattia Bergagio
Browse files

Initial commit

parents
Pipeline #36 canceled with stages
# Ubuntu 20.04
# CUDA 11.6.2
# cuDNN 8
FROM nvcr.io/nvidia/cuda:11.6.2-cudnn8-runtime-ubuntu20.04
ENV TZ='Europe/Rome'
ENV BASE_FOLDER='/SPKREC'
ENV LOGS_FOLDER='/LOGS'
ENV APP_USER='devuser'
ARG GIT_NAME
ARG GIT_TOKEN
RUN apt-get update --fix-missing
RUN apt-get upgrade -y
RUN \
apt-get update && \
DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \
python3.8 \
python3-dev \
python3-pip \
tzdata \
git \
# TODO Other packets common to all images go in here
&& \
apt-get clean && \
rm -rf /var/lib/apt/lists/* && \
ln -snf "/usr/share/zoneinfo/$TZ" '/etc/localtime' && \
echo "$TZ" > '/etc/timezone'
RUN useradd -m "$APP_USER" && \
mkdir -p "$BASE_FOLDER" && \
chown -R "${APP_USER}:${APP_USER}" "$BASE_FOLDER" && \
mkdir -p "$LOGS_FOLDER" && \
chown -R "${APP_USER}:${APP_USER}" "$LOGS_FOLDER"
USER "$APP_USER"
WORKDIR "$BASE_FOLDER"
ENV PATH="${PATH}:/root/.local/bin:/home/${APP_USER}/.local/bin"
ENV PYTHONPATH="${PYTHONPATH}:${BASE_FOLDER}"
COPY --chown="${APP_USER}:${APP_USER}" /requirements.txt ./requirements.txt
RUN python3 -m pip install --no-cache-dir -r requirements.txt
WORKDIR "$BASE_FOLDER"/src
RUN git clone https://${GIT_NAME}:${GIT_TOKEN}@gitlab.eurixgroup.com/mpai/common_module.git
RUN pwd
RUN ls
WORKDIR ..
COPY --chown="${APP_USER}:${APP_USER}" /src ./src
CMD ["python3.8", "src/main.py"]
```
cd $PATH_SHARED
mkdir models
cd models
mkdir mmc_aus
cd mmc_aus
mkdir speechbrain
cd speechbrain
apt install git
apt install git-lfs
git lfs install
git clone https://huggingface.co/speechbrain/spkrec-ecapa-voxceleb
# this creates folder 'spkrec-ecapa-voxceleb'
# rename label_encoder.txt as label_encoder.ckpt
mv spkrec-ecapa-voxceleb/label_encoder.txt spkrec-ecapa-voxceleb/label_encoder.ckpt
# cp spkrec-ecapa-voxceleb to folder speechbrain
cp -rf spkrec-ecapa-voxceleb/* .
```
\ No newline at end of file
git+https://github.com/speechbrain/speechbrain.git@develop
pika==1.3.1
PySoundFile
torch==2.0.1
torchaudio==2.0.2
typeguard==4.1.5
typing_extensions==4.8.0
\ No newline at end of file
from run_funs import run
try:
from common_utils import adapter
except ModuleNotFoundError:
from common_module.common_utils import adapter
try:
from common_utils import rabbitmq
except ModuleNotFoundError:
from common_module.common_utils import rabbitmq
if __name__ == "__main__":
Worker = rabbitmq.Worker()
Worker.register_callback(queue="queue_module_mmc_sir", callback=run)
this_adapter = adapter.Adapter(Worker)
this_adapter.start_listening()
import os
from pathlib import Path
from typeguard import typechecked
import spkrec_funs
try:
from common_utils import msg_builder, rabbitmq
except ModuleNotFoundError:
from common_module.common_utils import msg_builder, rabbitmq
# TODO
# get vid_dir from input msg
base_dir = os.path.join(os.environ["AI_FW_DIR"], "vids")
# TODO
# get model_dir from input msg
model_dir = os.path.join(os.environ["AI_FW_DIR"], "models", "mmc_sir")
Path(model_dir).mkdir(parents=True, exist_ok=True)
@typechecked
def run(message_body: dict, worker: rabbitmq.Worker) -> bool:
defs = {
# module name in msg
"mod_name": "mmc_sir",
# metadata key in output msg
"metadata_key": "mmc_sir",
# metadata type in output msg
"metadata_type": "mmc_sir",
# main key in output JSON
"out_json_key": "mmc_sir",
# error msg if output JSON is not found
"not_found_msg": "cannot recog speakers!",
# error msg if input msg is invalid
"invalid_msg": "External ID/UID/Application/mmc_aus/SpkrecDataset Required!",
}
extras = {"programme": {"module": defs["mod_name"]}}
if "programme" in message_body:
if "external_id" in message_body["programme"]:
# name of output JSON
defs["out_json"] = f'{message_body["programme"]["external_id"]}.json'
for k in msg_builder.handed_over_keys():
if k in message_body["programme"]:
extras["programme"][k] = message_body["programme"][k]
return msg_builder.build_msg(
message_body,
worker,
"mmc_sir",
spkrec_funs.spkrec_save,
msg_builder.validate_message,
["external_id", "application", "uid", "mmc_aus", "spkrec_dataset"],
base_dir,
model_dir,
defs,
extras,
)
import os
import operator
from pathlib import Path
from typing import Dict, List
import numpy as np
from speechbrain.inference.speaker import SpeakerRecognition
from typeguard import typechecked
try:
from common_utils.times import timeit
except ModuleNotFoundError:
from common_module.common_utils.times import timeit
try:
from common_utils.logger import create_logger
except ModuleNotFoundError:
from common_module.common_utils.logger import create_logger
try:
from common_utils.saves import save
except ModuleNotFoundError:
from common_module.common_utils.saves import save
try:
from common_utils.gpus_torch import pick_best_gpu
except ModuleNotFoundError:
from common_module.common_utils.gpus_torch import pick_best_gpu
LOGGER = create_logger(__name__)
@typechecked
def verify_speaker(
dataset: List[str], out_path: str, model_dir: str
) -> Dict[str, list]:
"""
dataset: dataset path. The dataset is a list of dirs containing WAVs.
Verifies the speaker.
"""
spkrecs = []
# path of speechbrain/spkrec-ecapa-voxceleb
voxceleb_path = os.path.join(model_dir, "speechbrain", "spkrec-ecapa-voxceleb")
try:
LOGGER.debug("trying to use GPU")
# pick best GPU
dev_idx = pick_best_gpu()
device = f"cuda:{dev_idx}"
LOGGER.debug(f"using best GPU = {dev_idx}")
# https://huggingface.co/speechbrain/spkrec-ecapa-voxceleb
verification = SpeakerRecognition.from_hparams(
source="speechbrain/spkrec-ecapa-voxceleb",
savedir=voxceleb_path,
run_opts={"device": device},
)
except RuntimeError:
try:
device = "cpu"
LOGGER.debug("using CPU")
# https://huggingface.co/speechbrain/spkrec-ecapa-voxceleb
verification = SpeakerRecognition.from_hparams(
source="speechbrain/spkrec-ecapa-voxceleb",
savedir=voxceleb_path,
)
except Exception as err:
LOGGER.error(f"{err=}")
return {"mmc_sir": spkrecs}
# read WAVs from mmc_aus
mmc_asr_out_path = os.path.join(Path(out_path).parent, "mmc_aus")
# read WAVs from dataset
# dataset is a list of dirs
# TODO
# read AI_FW_DIR from input msg?
dataset_dir = os.path.join(os.environ["AI_FW_DIR"], *dataset)
LOGGER.debug(f"{dataset_dir=}")
LOGGER.debug("dataset: mapping WAV to ID...")
# map: path of WAV in dataset -> speaker ID
dataset_map = {"wav": [], "id": []}
for root, _, filnames in os.walk(dataset_dir):
for filname in filnames:
if filname.endswith(".wav"):
full_path = os.path.join(root, filname)
tmp_path = full_path
# get immediate subdir below dataset_dir
# this subdir is named after speaker ID
while tmp_path != dataset_dir:
old_tmp_path = tmp_path
tmp_path = str(Path(tmp_path).parent)
# get speaker ID using os.path.basename
dataset_map["wav"] += [full_path]
dataset_map["id"] += [os.path.basename(old_tmp_path)]
LOGGER.debug(f"{len(dataset_map['wav'])=}")
for mmc_asr_out_fil in os.listdir(mmc_asr_out_path):
mmc_asr_out = os.path.join(mmc_asr_out_path, mmc_asr_out_fil)
if mmc_asr_out_fil.endswith(".wav"):
LOGGER.debug(f"proc'ing {mmc_asr_out}...")
# compute scores
if device.startswith("cuda"):
# torch.Tensor -> np.array
scores = [
verification.verify_files(mmc_asr_out, dataset_wav)[0].cpu().numpy()
for dataset_wav in dataset_map["wav"]
]
elif device == "cpu":
scores = [
verification.verify_files(mmc_asr_out, dataset_wav)[0]
for dataset_wav in dataset_map["wav"]
]
score_argmax = np.argmax(scores)
LOGGER.debug(f"{score_argmax=}")
spkrecs.append(
{
# read segm no. from filename
"segment": int(mmc_asr_out_fil.split("split.")[1].split(".wav")[0]),
"speaker_id": dataset_map["id"][score_argmax],
"score": float(np.max(scores)),
}
)
return {"mmc_sir": sorted(spkrecs, key=operator.itemgetter("segment"))}
@typechecked
def verify_speaker_save(
dataset: List[str], json_path: str, out_path: str, model_dir: str
):
"""
dataset: list of dirs. WAVs in dataset are saved here.
Recogs speakers.
Saves output to JSON.
"""
annotation = verify_speaker(dataset, out_path, model_dir)
save(annotation, json_path)
return
@typechecked
def spkrec_save(
message_body: dict,
out_json: str,
out_path: str,
base_dir: str,
model_dir: str,
) -> bool:
"""
out_json: JSON the output is saved to.
out_path: unused input.
base_dir: base dir.
model_dir: dir the model is saved to.
Recogs speakers.
Returns 0 if success.
"""
# copies landmark
ret_code = -1
diar_out = message_body["programme"]["mmc_aus"]["segments"]
# check diar_out
# True if diar_out is a list
# True if all dicts have keys ["start", "end", "label"]
if isinstance(diar_out, list) and all(
[
# True if all keys in a dict are ["start", "end", "label"]
all(k in voice for k in ["start", "end", "label"])
for voice in diar_out
]
):
# recog speakers
verify_speaker_save(
message_body["programme"]["spkrec_dataset"],
out_json,
out_path,
model_dir,
)
# success
ret_code = 0
# copies landmark
LOGGER.debug(f"return code: {ret_code}")
if ret_code != 0:
return False
else:
# success if ret_code = 0
return True
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment