TextandImageQuery.py

import torch
from transformers import pipeline
from PIL import Image
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

class TextandImageQuery():
    QuestionText = None
    RawImage = None

    ##
    AnswerText = None

    def funcTextandImageQuery(self, raw_image_path,question):
        '''
        Apply an NN to answer the question
        '''
        raw_image=Image.open(raw_image_path).convert("RGB")
        pipe = pipeline("visual-question-answering", model="Salesforce/blip-vqa-base",device=device)

        output = pipe(raw_image, question, top_k=1)[0]
        return output['answer']

    def run(self):
        self.AnswerText = self.funcTextandImageQuery(self.RawImage, self.QuestionText)

if __name__ == '__main__':
    module = TextandImageQuery()
    module.QuestionText="Question as a string"
    module.RawImage="path/to/the/contextImage"
    module.run()
    print(module.AnswerText)