Initial commit

9caed9d6 · Mattia Bergagio · 9caed9d6 · 9caed9d6 · 9caed9d6 · 9caed9d6
Commit 9caed9d6 authored Sep 23, 2024 by Mattia Bergagio
--- a/Dockerfile
+++ b/Dockerfile
+
+# Ubuntu 20.04
+# CUDA 11.6.2
+# cuDNN 8
+FROM nvcr.io/nvidia/cuda:11.6.2-cudnn8-runtime-ubuntu20.04
+
+ENV TZ='Europe/Rome'
+ENV BASE_FOLDER='/SPKREC'
+ENV LOGS_FOLDER='/LOGS'
+ENV APP_USER='devuser'
+
+ARG GIT_NAME
+ARG GIT_TOKEN
+
+RUN apt-get update --fix-missing
+RUN apt-get upgrade -y
+
+RUN \
+  apt-get update && \
+  DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \
+    python3.8 \
+    python3-dev \
+    python3-pip \
+    tzdata \
+    git \
+    # TODO Other packets common to all images go in here
+  && \
+  apt-get clean && \
+  rm -rf /var/lib/apt/lists/* && \
+  ln -snf "/usr/share/zoneinfo/$TZ" '/etc/localtime' && \
+  echo "$TZ" > '/etc/timezone'
+
+RUN useradd -m "$APP_USER" && \
+  mkdir -p "$BASE_FOLDER" && \
+  chown -R "${APP_USER}:${APP_USER}" "$BASE_FOLDER" && \
+  mkdir -p "$LOGS_FOLDER" && \
+  chown -R "${APP_USER}:${APP_USER}" "$LOGS_FOLDER"
+
+USER "$APP_USER"
+WORKDIR "$BASE_FOLDER"
+
+ENV PATH="${PATH}:/root/.local/bin:/home/${APP_USER}/.local/bin"
+ENV PYTHONPATH="${PYTHONPATH}:${BASE_FOLDER}"
+
+COPY --chown="${APP_USER}:${APP_USER}" /requirements.txt ./requirements.txt
+
+RUN python3 -m pip install --no-cache-dir -r requirements.txt
+
+WORKDIR "$BASE_FOLDER"/src
+RUN git clone https://${GIT_NAME}:${GIT_TOKEN}@gitlab.eurixgroup.com/mpai/common_module.git
+RUN pwd
+RUN ls
+
+WORKDIR ..
+COPY --chown="${APP_USER}:${APP_USER}" /src ./src
+
+CMD ["python3.8", "src/main.py"]
+
+
--- a/README.md
+++ b/README.md
+```
+cd $PATH_SHARED
+mkdir models
+cd models
+mkdir mmc_aus
+cd mmc_aus
+mkdir speechbrain
+cd speechbrain
+apt install git
+apt install git-lfs
+git lfs install
+git clone https://huggingface.co/speechbrain/spkrec-ecapa-voxceleb 
+# this creates folder 'spkrec-ecapa-voxceleb'
+# rename label_encoder.txt as label_encoder.ckpt
+mv spkrec-ecapa-voxceleb/label_encoder.txt spkrec-ecapa-voxceleb/label_encoder.ckpt
+# cp spkrec-ecapa-voxceleb to folder speechbrain
+cp -rf spkrec-ecapa-voxceleb/* .
+```
\ No newline at end of file
--- a/requirements.txt
+++ b/requirements.txt
+git+https://github.com/speechbrain/speechbrain.git@develop
+pika==1.3.1
+PySoundFile
+torch==2.0.1
+torchaudio==2.0.2
+typeguard==4.1.5
+typing_extensions==4.8.0
\ No newline at end of file
--- a/src/main.py
+++ b/src/main.py
+from run_funs import run
+
+try:
+    from common_utils import adapter
+except ModuleNotFoundError:
+    from common_module.common_utils import adapter
+
+try:
+    from common_utils import rabbitmq
+except ModuleNotFoundError:
+    from common_module.common_utils import rabbitmq
+
+if __name__ == "__main__":
+    Worker = rabbitmq.Worker()
+    Worker.register_callback(queue="queue_module_mmc_sir", callback=run)
+    this_adapter = adapter.Adapter(Worker)
+    this_adapter.start_listening()
--- a/src/run_funs.py
+++ b/src/run_funs.py
+import os
+from pathlib import Path
+
+from typeguard import typechecked
+
+import spkrec_funs
+
+try:
+    from common_utils import msg_builder, rabbitmq
+except ModuleNotFoundError:
+    from common_module.common_utils import msg_builder, rabbitmq
+
+# TODO
+# get vid_dir from input msg
+base_dir = os.path.join(os.environ["AI_FW_DIR"], "vids")
+
+# TODO
+# get model_dir from input msg
+model_dir = os.path.join(os.environ["AI_FW_DIR"], "models", "mmc_sir")
+Path(model_dir).mkdir(parents=True, exist_ok=True)
+
+
+@typechecked
+def run(message_body: dict, worker: rabbitmq.Worker) -> bool:
+    defs = {
+        # module name in msg
+        "mod_name": "mmc_sir",
+        # metadata key in output msg
+        "metadata_key": "mmc_sir",
+        # metadata type in output msg
+        "metadata_type": "mmc_sir",
+        # main key in output JSON
+        "out_json_key": "mmc_sir",
+        # error msg if output JSON is not found
+        "not_found_msg": "cannot recog speakers!",
+        # error msg if input msg is invalid
+        "invalid_msg": "External ID/UID/Application/mmc_aus/SpkrecDataset Required!",
+    }
+
+    extras = {"programme": {"module": defs["mod_name"]}}
+
+    if "programme" in message_body:
+        if "external_id" in message_body["programme"]:
+            # name of output JSON
+            defs["out_json"] = f'{message_body["programme"]["external_id"]}.json'
+
+        for k in msg_builder.handed_over_keys():
+            if k in message_body["programme"]:
+                extras["programme"][k] = message_body["programme"][k]
+
+    return msg_builder.build_msg(
+        message_body,
+        worker,
+        "mmc_sir",
+        spkrec_funs.spkrec_save,
+        msg_builder.validate_message,
+        ["external_id", "application", "uid", "mmc_aus", "spkrec_dataset"],
+        base_dir,
+        model_dir,
+        defs,
+        extras,
+    )
--- a/src/spkrec_funs.py
+++ b/src/spkrec_funs.py
+import os
+import operator
+from pathlib import Path
+from typing import Dict, List
+
+import numpy as np
+from speechbrain.inference.speaker import SpeakerRecognition
+from typeguard import typechecked
+
+try:
+    from common_utils.times import timeit
+except ModuleNotFoundError:
+    from common_module.common_utils.times import timeit
+
+try:
+    from common_utils.logger import create_logger
+except ModuleNotFoundError:
+    from common_module.common_utils.logger import create_logger
+
+try:
+    from common_utils.saves import save
+except ModuleNotFoundError:
+    from common_module.common_utils.saves import save
+
+try:
+    from common_utils.gpus_torch import pick_best_gpu
+except ModuleNotFoundError:
+    from common_module.common_utils.gpus_torch import pick_best_gpu
+
+LOGGER = create_logger(__name__)
+
+
+@typechecked
+def verify_speaker(
+    dataset: List[str], out_path: str, model_dir: str
+) -> Dict[str, list]:
+    """
+    dataset: dataset path. The dataset is a list of dirs containing WAVs.
+
+    Verifies the speaker.
+    """
+    spkrecs = []
+
+    # path of speechbrain/spkrec-ecapa-voxceleb
+    voxceleb_path = os.path.join(model_dir, "speechbrain", "spkrec-ecapa-voxceleb")
+
+    try:
+        LOGGER.debug("trying to use GPU")
+
+        # pick best GPU
+        dev_idx = pick_best_gpu()
+        device = f"cuda:{dev_idx}"
+        LOGGER.debug(f"using best GPU = {dev_idx}")
+
+        # https://huggingface.co/speechbrain/spkrec-ecapa-voxceleb
+        verification = SpeakerRecognition.from_hparams(
+            source="speechbrain/spkrec-ecapa-voxceleb",
+            savedir=voxceleb_path,
+            run_opts={"device": device},
+        )
+
+    except RuntimeError:
+        try:
+            device = "cpu"
+            LOGGER.debug("using CPU")
+
+            # https://huggingface.co/speechbrain/spkrec-ecapa-voxceleb
+            verification = SpeakerRecognition.from_hparams(
+                source="speechbrain/spkrec-ecapa-voxceleb",
+                savedir=voxceleb_path,
+            )
+
+        except Exception as err:
+            LOGGER.error(f"{err=}")
+            return {"mmc_sir": spkrecs}
+
+    # read WAVs from mmc_aus
+    mmc_asr_out_path = os.path.join(Path(out_path).parent, "mmc_aus")
+
+    # read WAVs from dataset
+    # dataset is a list of dirs
+
+    # TODO
+    # read AI_FW_DIR from input msg?
+    dataset_dir = os.path.join(os.environ["AI_FW_DIR"], *dataset)
+    LOGGER.debug(f"{dataset_dir=}")
+
+    LOGGER.debug("dataset: mapping WAV to ID...")
+    # map: path of WAV in dataset -> speaker ID
+    dataset_map = {"wav": [], "id": []}
+    for root, _, filnames in os.walk(dataset_dir):
+        for filname in filnames:
+            if filname.endswith(".wav"):
+                full_path = os.path.join(root, filname)
+                tmp_path = full_path
+                # get immediate subdir below dataset_dir
+                # this subdir is named after speaker ID
+                while tmp_path != dataset_dir:
+                    old_tmp_path = tmp_path
+                    tmp_path = str(Path(tmp_path).parent)
+
+                # get speaker ID using os.path.basename
+                dataset_map["wav"] += [full_path]
+                dataset_map["id"] += [os.path.basename(old_tmp_path)]
+
+    LOGGER.debug(f"{len(dataset_map['wav'])=}")
+
+    for mmc_asr_out_fil in os.listdir(mmc_asr_out_path):
+        mmc_asr_out = os.path.join(mmc_asr_out_path, mmc_asr_out_fil)
+        if mmc_asr_out_fil.endswith(".wav"):
+            LOGGER.debug(f"proc'ing {mmc_asr_out}...")
+
+            # compute scores
+            if device.startswith("cuda"):
+                # torch.Tensor -> np.array
+                scores = [
+                    verification.verify_files(mmc_asr_out, dataset_wav)[0].cpu().numpy()
+                    for dataset_wav in dataset_map["wav"]
+                ]
+
+            elif device == "cpu":
+                scores = [
+                    verification.verify_files(mmc_asr_out, dataset_wav)[0]
+                    for dataset_wav in dataset_map["wav"]
+                ]
+
+            score_argmax = np.argmax(scores)
+
+            LOGGER.debug(f"{score_argmax=}")
+            spkrecs.append(
+                {
+                    # read segm no. from filename
+                    "segment": int(mmc_asr_out_fil.split("split.")[1].split(".wav")[0]),
+                    "speaker_id": dataset_map["id"][score_argmax],
+                    "score": float(np.max(scores)),
+                }
+            )
+
+    return {"mmc_sir": sorted(spkrecs, key=operator.itemgetter("segment"))}
+
+
+@typechecked
+def verify_speaker_save(
+    dataset: List[str], json_path: str, out_path: str, model_dir: str
+):
+    """
+    dataset: list of dirs. WAVs in dataset are saved here.
+
+    Recogs speakers.
+    Saves output to JSON.
+    """
+    annotation = verify_speaker(dataset, out_path, model_dir)
+    save(annotation, json_path)
+    return
+
+
+@typechecked
+def spkrec_save(
+    message_body: dict,
+    out_json: str,
+    out_path: str,
+    base_dir: str,
+    model_dir: str,
+) -> bool:
+    """
+    out_json: JSON the output is saved to.
+    out_path: unused input.
+    base_dir: base dir.
+    model_dir: dir the model is saved to.
+
+    Recogs speakers.
+    Returns 0 if success.
+    """
+    # copies landmark
+    ret_code = -1
+
+    diar_out = message_body["programme"]["mmc_aus"]["segments"]
+
+    # check diar_out
+    # True if diar_out is a list
+    # True if all dicts have keys ["start", "end", "label"]
+    if isinstance(diar_out, list) and all(
+        [
+            # True if all keys in a dict are ["start", "end", "label"]
+            all(k in voice for k in ["start", "end", "label"])
+            for voice in diar_out
+        ]
+    ):
+
+        # recog speakers
+        verify_speaker_save(
+            message_body["programme"]["spkrec_dataset"],
+            out_json,
+            out_path,
+            model_dir,
+        )
+
+        # success
+        ret_code = 0
+
+    # copies landmark
+    LOGGER.debug(f"return code: {ret_code}")
+
+    if ret_code != 0:
+        return False
+    else:
+        # success if ret_code = 0
+        return True