Initial commit

2c57ee0e · Mattia Bergagio · 2c57ee0e · 2c57ee0e · 2c57ee0e · 2c57ee0e
Commit 2c57ee0e authored Sep 23, 2024 by Mattia Bergagio
--- a/Dockerfile
+++ b/Dockerfile
+# Ubuntu 20.04
+# CUDA 11.6.2
+# cuDNN 8
+FROM nvcr.io/nvidia/cuda:11.6.2-cudnn8-runtime-ubuntu20.04
+
+ENV TZ='Europe/Rome'
+ENV BASE_FOLDER='/DIARIZ'
+ENV LOGS_FOLDER='/LOGS'
+ENV APP_USER='devuser'
+
+ARG GIT_NAME
+ARG GIT_TOKEN
+
+RUN apt-get update --fix-missing
+RUN apt-get upgrade -y
+
+# libsndfile1 fixes error 
+# OSError: sndfile library not found 
+RUN \
+  apt-get update && \
+  DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \
+    python3.8 \
+    python3-dev \
+    python3-pip \
+    tzdata \
+    libsndfile1 \
+    ffmpeg \
+    git \
+    # TODO Other packets common to all images go in here
+  && \
+  apt-get clean && \
+  rm -rf /var/lib/apt/lists/* && \
+  ln -snf "/usr/share/zoneinfo/$TZ" '/etc/localtime' && \
+  echo "$TZ" > '/etc/timezone'
+
+RUN useradd -m "$APP_USER" && \
+  mkdir -p "$BASE_FOLDER" && \
+  chown -R "${APP_USER}:${APP_USER}" "$BASE_FOLDER" && \
+  mkdir -p "$LOGS_FOLDER" && \
+  chown -R "${APP_USER}:${APP_USER}" "$LOGS_FOLDER"
+
+USER "$APP_USER"
+WORKDIR "$BASE_FOLDER"
+
+ENV PATH="${PATH}:/root/.local/bin:/home/${APP_USER}/.local/bin"
+ENV PYTHONPATH="${PYTHONPATH}:${BASE_FOLDER}"
+
+COPY --chown="${APP_USER}:${APP_USER}" /requirements.txt ./requirements.txt
+
+RUN python3 -m pip install --no-cache-dir -r requirements.txt
+
+WORKDIR "$BASE_FOLDER"/src
+RUN git clone https://${GIT_NAME}:${GIT_TOKEN}@gitlab.eurixgroup.com/mpai/common_module.git
+RUN pwd
+RUN ls
+
+WORKDIR ..
+COPY --chown="${APP_USER}:${APP_USER}" /src ./src
+
+CMD ["python3.8", "src/main.py"]
\ No newline at end of file
--- a/README.md
+++ b/README.md
+```
+cd $PATH_SHARED
+mkdir models
+cd models
+mkdir mmc_aus
+cd mmc_aus
+mkdir speechbrain
+cd speechbrain
+apt install git
+apt install git-lfs
+git lfs install
+git clone https://huggingface.co/speechbrain/spkrec-ecapa-voxceleb 
+# this creates folder 'spkrec-ecapa-voxceleb'
+# rename label_encoder.txt as label_encoder.ckpt
+mv spkrec-ecapa-voxceleb/label_encoder.txt spkrec-ecapa-voxceleb/label_encoder.ckpt
+# cp spkrec-ecapa-voxceleb to folder speechbrain
+cp -rf spkrec-ecapa-voxceleb/* .
+
+# segmentation: pyannote/segmentation@2022.07
+cd $PATH_SHARED
+cd models
+cd mmc_aus
+git clone https://huggingface.co/pyannote/segmentation
+# this creates folder 'segmentation'
+```
--- a/diar_conf.yaml
+++ b/diar_conf.yaml
+pipeline:
+  name: pyannote.audio.pipelines.SpeakerDiarization
+  params:
+    clustering: AgglomerativeClustering
+    embedding: voxceleb_path
+    embedding_batch_size: 32
+    embedding_exclude_overlap: true
+    segmentation: bin_path
+    segmentation_batch_size: 32
+
+params:
+  clustering:
+    method: centroid
+    min_cluster_size: 15
+    threshold: 0.7153814381597874
+  segmentation:
+    min_duration_off: 0.5817029604921046
+    threshold: 0.4442333667381752
\ No newline at end of file
--- a/requirements.txt
+++ b/requirements.txt
+aiohttp==3.8.5
+aiosignal==1.3.1
+alembic==1.12.0
+annotated-types==0.5.0
+antlr4-python3-runtime==4.9.3
+anyio==3.7.1
+appdirs==1.4.4
+arrow==1.2.3
+asteroid-filterbanks==0.4.0
+async-timeout==4.0.3
+attrs==23.1.0
+audioread==3.0.1
+backoff==2.2.1
+beautifulsoup4==4.12.2
+blessed==1.20.0
+certifi==2022.12.7
+cffi==1.15.1
+charset-normalizer==3.1.0
+click==8.1.7
+cmaes==0.10.0
+cmake==3.27.5
+colorama==0.4.6
+coloredlogs==15.0.1
+colorlog==6.7.0
+croniter==1.4.1
+dateutils==0.6.12
+decorator==5.1.1
+deepdiff==6.5.0
+docopt==0.6.2
+einops==0.6.1
+exceptiongroup==1.1.3
+fastapi==0.103.2
+fastjsonschema==2.16.3
+ffmpeg-python==0.2.0
+filelock==3.11.0
+flatbuffers==23.5.26
+frozenlist==1.4.0
+fsspec==2023.9.2
+future==0.18.3
+greenlet==2.0.2
+h11==0.14.0
+huggingface-hub==0.13.0
+humanfriendly==10.0
+HyperPyYAML==1.2.2
+idna==3.4
+importlib-metadata==7.0.0
+importlib-resources==6.1.1
+inquirer==3.1.3
+itsdangerous==2.1.2
+Jinja2==3.1.2
+joblib==1.3.2
+jsonpickle==3.0.1
+jsonschema==4.3.3
+julius==0.2.7
+kiwisolver==1.4.5
+lazy-loader==0.3
+libclang==16.0.0
+librosa==0.10.0.post2
+lightning-cloud==0.5.39
+lightning-utilities==0.9.0
+lightning==2.0.9.post0
+lit==17.0.1
+llvmlite==0.41.0
+Mako==1.2.4
+markdown-it-py==3.0.0
+MarkupSafe==2.1.2
+matplotlib==3.7.4
+mdurl==0.1.2
+mpmath==1.3.0
+msgpack==1.0.7
+multidict==6.0.4
+networkx==3.1
+numba==0.58.0
+numpy==1.24.2
+nvidia-cublas-cu11==11.10.3.66
+nvidia-cuda-cupti-cu11==11.7.101
+nvidia-cuda-nvrtc-cu11==11.7.99
+nvidia-cuda-runtime-cu11==11.7.99
+nvidia-cudnn-cu11==8.5.0.96
+nvidia-cufft-cu11==10.9.0.58
+nvidia-curand-cu11==10.2.10.91
+nvidia-cusolver-cu11==11.4.0.1
+nvidia-cusparse-cu11==11.7.4.91
+nvidia-nccl-cu11==2.14.3
+nvidia-nvtx-cu11==11.7.91
+omegaconf==2.3.0
+onnxruntime-gpu==1.16.0
+optuna==3.3.0
+ordered-set==4.1.0
+packaging==23.1
+pika==1.3.1
+Pillow==10.0.1
+platformdirs==3.10.0
+pooch==1.6.0
+primePy==1.3
+protobuf==4.24.3
+psutil==5.9.5
+pyannote.audio==3.0.1
+pyannote.core==5.0.0
+pyannote.database==5.0.1
+pyannote.metrics==3.2.1
+pyannote.pipeline==3.0.1
+pycparser==2.21
+pydantic-core==2.4.0
+pydantic==2.1.1
+Pygments==2.16.1
+PyJWT==2.8.0
+pyparsing==3.1.1
+pyrsistent==0.19.3
+python-dateutil==2.8.2
+python-editor==1.0.4
+python-multipart==0.0.6
+pytorch-lightning==2.0.9.post0
+pytorch-metric-learning==2.3.0
+pytz==2023.3.post1
+PyYAML==6.0
+readchar==4.0.5
+regex==2023.3.23
+requests==2.28.2
+rich==13.5.3
+ruamel.yaml.clib==0.2.7
+ruamel.yaml==0.17.33
+scikit-learn==1.3.1
+scipy==1.10.1
+semver==3.0.1
+sentencepiece==0.1.99
+shellingham==1.5.3
+six==1.16.0
+sniffio==1.3.0
+sortedcontainers==2.4.0
+soundfile==0.12.1
+soupsieve==2.5
+soxr==0.3.4
+speechbrain==0.5.15
+SQLAlchemy==2.0.21
+starlette==0.27.0
+starsessions==1.3.0
+sympy==1.11.1
+tabulate==0.9.0
+tensorboardX==2.6.2.2
+threadpoolctl==3.2.0
+tokenizers==0.13.2
+torch-audiomentations==0.11.0
+torch-pitch-shift==1.2.4
+torch==2.0.1
+torchaudio==2.0.2
+torchmetrics==1.2.0
+torchsummary==1.5.1
+tqdm==4.65.0
+traitlets==5.10.1
+transformers==4.26.1
+triton==2.0.0
+typeguard==4.1.5
+typer==0.9.0
+typing_extensions==4.8.0
+tzdata==2023.3
+uri==2.0.1
+urllib3==1.26.15
+uvicorn==0.23.2
+wcwidth==0.2.7
+websocket-client==1.6.3
+websockets==11.0.3
+wincertstore==0.2
+yarl==1.9.2
+zipp==3.17.0
--- a/src/diariz_funs.py
+++ b/src/diariz_funs.py
+import os
+from pathlib import Path
+import subprocess
+from typing import Dict, List
+
+import torch
+from pyannote.audio import core, Pipeline
+from typeguard import typechecked
+
+import util_funs
+
+try:
+    from common_utils.logger import create_logger
+except ModuleNotFoundError:
+    from common_module.common_utils.logger import create_logger
+
+try:
+    from common_utils.saves import save
+except ModuleNotFoundError:
+    from common_module.common_utils.saves import save
+
+try:
+    from common_utils.gpus_torch import pick_best_gpu
+except ModuleNotFoundError:
+    from common_module.common_utils.gpus_torch import pick_best_gpu
+
+LOGGER = create_logger(__name__)
+
+
+def mk_pipeline(model_dir: str, conf_dir: str) -> Pipeline:
+    """
+    model_dir: dir the model is saved to.
+    conf_dir: dir diar_conf.yaml is saved to.
+
+    Makes mmc_aus pipeline.
+    """
+    hf_token = os.environ["HUGGINGFACE_TOKEN"]
+
+    LOGGER.debug(f"pwd = {os.getcwd()}")
+
+    # SpeechBrain_EncoderClassifier uses CACHE_DIR
+    # https://github.com/pyannote/pyannote-audio/blob/a810a5a53ac6e241606fd4ec822ea842f4c0a9b5/pyannote/audio/pipelines/speaker_verification.py#L262
+    # CACHE_DIR is set here:
+    # https://github.com/pyannote/pyannote-audio/blob/a810a5a53ac6e241606fd4ec822ea842f4c0a9b5/pyannote/audio/core/model.py#L56
+    os.environ["PYANNOTE_CACHE"] = model_dir
+    LOGGER.debug(f'{os.environ["PYANNOTE_CACHE"]=}')
+    LOGGER.debug(f"def: {core.model.CACHE_DIR=}")
+    core.model.CACHE_DIR = os.environ["PYANNOTE_CACHE"]
+    LOGGER.debug(f"upd: {core.model.CACHE_DIR=}")
+
+    # path of pytorch_model.bin
+    bin_path = os.path.join(model_dir, "segmentation", "pytorch_model.bin")
+
+    # path of speechbrain/spkrec-ecapa-voxceleb
+    voxceleb_path = os.path.join(model_dir, "speechbrain", "spkrec-ecapa-voxceleb")
+
+    # copies ner
+    LOGGER.debug("loading model from local dir...")
+    # TODO
+    # upgrade to pyannote/speaker-diarization-3.0
+    # speaker_mmc_aus = Pipeline.from_pretrained(
+    #     "pyannote/speaker-diarization@2.1", use_auth_token=hf_token
+    # )
+
+    # replace bin_path in YML
+    util_funs.replace_str_in_fil(
+        os.path.join(conf_dir, "diar_conf.yaml"),
+        os.path.join(conf_dir, "tmp_diar_conf.yaml"),
+        "bin_path",
+        bin_path,
+    )
+
+    # replace voxceleb_path in YML
+    util_funs.replace_str_in_fil(
+        os.path.join(conf_dir, "tmp_diar_conf.yaml"),
+        os.path.join(conf_dir, "new_diar_conf.yaml"),
+        "voxceleb_path",
+        voxceleb_path,
+    )
+
+    # print YML
+    with open(os.path.join(conf_dir, "new_diar_conf.yaml"), "r") as ymlr:
+        for ymllin in ymlr:
+            LOGGER.debug(ymllin)
+
+    speaker_mmc_aus = Pipeline.from_pretrained(
+        os.path.join(conf_dir, "new_diar_conf.yaml"), use_auth_token=hf_token
+    )
+
+    # copies ner
+    try:
+        # pick best GPU
+        dev_idx = pick_best_gpu()
+        LOGGER.debug(f"using best GPU = {dev_idx}")
+        device = torch.device(f"cuda:{dev_idx}")
+
+        # push the pipeline to GPU
+        speaker_mmc_aus = speaker_mmc_aus.to(device)
+
+    except RuntimeError as gpu_err:
+        LOGGER.debug(f"Unexpected {gpu_err=}, {type(gpu_err)=}")
+        LOGGER.debug("using CPU")
+        device = torch.device("cpu")
+
+        # push the pipeline to CPU
+        speaker_mmc_aus = speaker_mmc_aus.to(device)
+
+    return speaker_mmc_aus
+
+
+@typechecked
+def diarize(
+    audio: str, model_dir: str, conf_dir: str, out_path: str
+) -> Dict[str, List]:
+    """
+    model_dir: dir the model is saved to.
+    conf_dir: dir diar_conf.yaml is saved to.
+
+    Diarizes audio.
+    """
+    diar_pipeline = mk_pipeline(model_dir, conf_dir)
+
+    # num_speakers, min_speakers, max_speakers
+    # can be set if they are known
+    who_speaks_when = diar_pipeline(
+        audio,
+        num_speakers=None,
+        min_speakers=None,
+        max_speakers=None,
+    )
+
+    speakers = []
+    for segment, _, speaker in who_speaks_when.itertracks(yield_label=True):
+        speakers.append({"start": segment.start, "end": segment.end, "label": speaker})
+
+        span = segment.end - segment.start
+        diar_segm_path = os.path.join(out_path, f"split.{len(speakers) - 1}.wav")
+        ffmpeg_split = [
+            "ffmpeg",
+            "-ss",
+            str(segment.start),
+            "-i",
+            audio,
+            "-t",
+            str(span),
+            "-c",
+            "copy",
+            diar_segm_path,
+        ]
+        try:
+            subprocess.check_output(ffmpeg_split)
+        except subprocess.CalledProcessError as err:
+            raise RuntimeError(f"FFMPEG error {str(err)}")
+
+    return {"voices": speakers}
+
+
+@typechecked
+def diarize_save(
+    audio: str, out_json: str, out_path: str, model_dir: str, conf_dir: str
+) -> None:
+    """
+    out_json: JSON the annotation is saved to.
+    model_dir: dir the model is saved to.
+    conf_dir: dir diar_conf.yaml is saved to.
+
+    Diarizes audio.
+    Saves output.
+    """
+    LOGGER.info(f"diarizing {audio}...")
+    who_speaks_when = diarize(audio, model_dir, conf_dir, out_path)
+    LOGGER.info(who_speaks_when)
+
+    save(who_speaks_when, out_json)
+
+
+@typechecked
+def dl_diarize_save(
+    message_body: dict,
+    out_json: str,
+    out_path: str,
+    base_dir: str,
+    model_dir: str,
+) -> bool:
+    """
+    message_body: msg body.
+    out_json: JSON the output is saved to.
+    out_path: unused input.
+    base_dir: base dir.
+    model_dir: dir the model is saved to.
+
+    Downloads audio.
+    Diarizes audio.
+    Saves output.
+    Returns 0 if success.
+    """
+    # copies landmark
+    ret_code = -1
+
+    # access audio
+    aud_path = os.path.join(
+        base_dir,
+        message_body["programme"]["uid"],
+        message_body["programme"]["external_id"],
+    )
+
+    if Path(f"{aud_path}.wav").is_file():
+        diarize_save(
+            f"{aud_path}.wav",
+            out_json,
+            out_path,
+            model_dir,
+            message_body["programme"]["conf_dir"],
+        )
+
+        # success
+        ret_code = 0
+    else:
+        # wav is not available
+        # out_path = None
+        LOGGER.error("Wav is not available")
+
+        # failure
+        ret_code = 2
+
+    # copies landmark
+    LOGGER.debug(f"return code: {ret_code}")
+
+    if ret_code != 0:
+        return False
+    else:
+        # success if ret_code = 0
+        return True
--- a/src/main.py
+++ b/src/main.py
+from run_funs import run
+
+try:
+    from common_utils import adapter
+except ModuleNotFoundError:
+    from common_module.common_utils import adapter
+
+try:
+    from common_utils import rabbitmq
+except ModuleNotFoundError:
+    from common_module.common_utils import rabbitmq
+
+if __name__ == "__main__":
+    Worker = rabbitmq.Worker()
+    Worker.register_callback(queue="queue_module_mmc_aus", callback=run)
+    this_adapter = adapter.Adapter(Worker)
+    this_adapter.start_listening()
--- a/src/run_funs.py
+++ b/src/run_funs.py
+import os
+from pathlib import Path
+
+from typeguard import typechecked
+
+import diariz_funs
+
+try:
+    from common_utils import msg_builder, rabbitmq
+except ModuleNotFoundError:
+    from common_module.common_utils import msg_builder, rabbitmq
+
+# TODO
+# get vid_dir from input msg
+base_dir = os.path.join(os.environ["AI_FW_DIR"], "vids")
+
+# TODO
+# get model_dir from input msg
+model_dir = os.path.join(os.environ["AI_FW_DIR"], "models", "mmc_aus")
+Path(model_dir).mkdir(parents=True, exist_ok=True)
+
+# TODO
+# get conf_dir from input msg
+conf_dir = os.path.join(os.environ["AI_FW_DIR"], "confs", "mmc_aus")
+
+
+@typechecked
+def run(message_body: dict, worker: rabbitmq.Worker) -> bool:
+    defs = {
+        # module name in msg
+        "mod_name": "mmc_aus",
+        # metadata key in output msg
+        "metadata_key": "segments",
+        # metadata type in output msg
+        "metadata_type": "mmc_aus",
+        # main key in output JSON
+        "out_json_key": "voices",
+        # error msg if output JSON is not found
+        "not_found_msg": "cannot diarize!",
+        # error msg if input msg is invalid
+        "invalid_msg": "External ID/UID/Application Required!",
+    }
+
+    if "programme" in message_body:
+        if "external_id" in message_body["programme"]:
+            # name of output JSON
+            defs["out_json"] = f'{message_body["programme"]["external_id"]}.json'
+
+    extras = {"programme": {"module": defs["mod_name"]}}
+
+    if "programme" in message_body:
+        for k in msg_builder.handed_over_keys():
+            if k in message_body["programme"]:
+                extras["programme"][k] = message_body["programme"][k]
+
+        message_body["programme"]["conf_dir"] = conf_dir
+
+    return msg_builder.build_msg(
+        message_body,
+        worker,
+        "mmc_aus",
+        diariz_funs.dl_diarize_save,
+        msg_builder.validate_message,
+        ["external_id", "application", "uid"],
+        base_dir,
+        model_dir,
+        defs,
+        extras,
+    )
--- a/src/util_funs.py
+++ b/src/util_funs.py
+from typeguard import typechecked
+
+
+@typechecked
+def replace_str_in_fil(old_fil: str, new_fil: str, tgt: str, repl: str):
+    """
+    old_fil: old file.
+    tgt: str to be replaced.
+    repl: replacement.
+    new_fil: upd'ed file.
+
+    Replaces tgt in old file.
+    Saves upd'ed file.
+    """
+    # read old file
+    with open(old_fil, "r") as f:
+        lines = f.read()
+
+    # replace target str
+    new_lines = lines.replace(tgt, repl)
+
+    # save upd'ed file
+    with open(new_fil, "w") as f:
+        f.write(new_lines)