TextToSpeech.py 1.07 KB
Newer Older
1
2
3
4
5
6
import torch
from transformers import pipeline
import soundfile as sf
from datasets import load_dataset
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

Carl De Sousa Trias's avatar
Carl De Sousa Trias committed
7
class TextToSpeech():
8
9
10
    AnswerText = None
    AnswerAudio = None

Carl De Sousa Trias's avatar
Carl De Sousa Trias committed
11
    def funcTextToSpeech(self, input):
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
        synthesiser = pipeline("text-to-speech", "microsoft/speecht5_tts",device=device)

        embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
        speaker_embedding = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
        # You can replace this embedding with your own as well.

        speech = synthesiser(input,
                                forward_params={"speaker_embeddings": speaker_embedding})


        path_output = "AudioAnswer.wav"
        sf.write(path_output, speech["audio"], samplerate=speech["sampling_rate"])
        return path_output

    def run(self):
Carl De Sousa Trias's avatar
Carl De Sousa Trias committed
27
        self.AnswerAudio = self.funcTextToSpeech(self.AnswerText)
28
29

if __name__ == '__main__':
Carl De Sousa Trias's avatar
Carl De Sousa Trias committed
30
31
    module = TextToSpeech()
    module.AnswerText="Text as a string"
32
    module.run()