Commit d771523b authored by Carl De Sousa Trias's avatar Carl De Sousa Trias
Browse files

Add AIMS implementation as standalone file

parent 6b23cdc1
import torch
from transformers import pipeline
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
class SpeechRecognition():
QuestionAudio = None
##
QuestionText = None
def funcSpeechRecognition(self, input):
'''
Verify the inference
'''
if self.QuestionText == None:
playsound(input)
speech_reco = pipeline(
"automatic-speech-recognition", model="openai/whisper-base", device=device
)
res = speech_reco(input)
return res["text"]
def run(self):
self.QuestionText = self.funcSpeechRecognition(self.QuestionAudio)
if __name__ == '__main__':
module = SpeechRecognition()
module.QuestionAudio = "path/to/audiofile"
module.run()
print(module.QuestionText)
import torch
from transformers import pipeline
import soundfile as sf
from datasets import load_dataset
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
class SpeechSynthesis():
AnswerText = None
AnswerAudio = None
def funcSpeechSynthesis(self, input):
synthesiser = pipeline("text-to-speech", "microsoft/speecht5_tts",device=device)
embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
speaker_embedding = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
# You can replace this embedding with your own as well.
speech = synthesiser(input,
forward_params={"speaker_embeddings": speaker_embedding})
path_output = "AudioAnswer.wav"
sf.write(path_output, speech["audio"], samplerate=speech["sampling_rate"])
return path_output
def run(self):
self.AnswerAudio = self.funcSpeechSynthesis(self.AnswerText)
if __name__ == '__main__':
module = SpeechSynthesis()
module.AnswerText="is it a boy?"
module.run()
from transformers import pipeline
from PIL import Image
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
class TextandImageQuery():
QuestionText = None
RawImage = None
##
AnswerText = None
def funcTextandImageQuery(self, raw_image_path,question):
'''
Apply an NN to answer the question
'''
raw_image=Image.open(raw_image_path).convert("RGB")
pipe = pipeline("visual-question-answering", model="Salesforce/blip-vqa-base",device=device)
output = pipe(raw_image, question, top_k=1)[0]
return output['answer']
def run(self):
self.AnswerText = self.funcTextandImageQuery(self.RawImage, self.QuestionText)
if __name__ == '__main__':
module = TextandImageQuery()
module.QuestionText="Question as a string"
module.RawImage="path/to/the/contextImage"
module.run()
print(module.AnswerText)
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment