langchain-ynp-test

Runtime error

App Files Files Community

multiple updates

by cececerece - opened Jun 28, 2023

base: refs/heads/main

←

from: refs/pr/5

Discussion Files changed

+464

-275

Files changed (5) hide show

app.py +63 -267
requirements.txt +123 -8
utils/__init__.py +3 -0
utils/bot.py +203 -0
utils/functions.py +72 -0

app.py CHANGED Viewed

@@ -1,287 +1,83 @@
 import gradio as gr
-import os
 import time
-from langchain.document_loaders import OnlinePDFLoader
-from langchain.text_splitter import CharacterTextSplitter
-from langchain.llms import OpenAI
-from langchain.embeddings import OpenAIEmbeddings
-from langchain.vectorstores import Chroma
-from langchain.chains import ConversationalRetrievalChain
-from langchain import PromptTemplate
-from transformers import Pix2StructForConditionalGeneration, Pix2StructProcessor
-import requests
-from PIL import Image
-import torch
-# _template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question.
-# Chat History:
-# {chat_history}
-# Follow Up Input: {question}
-# Standalone question:"""
-# CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)
-# template = """
-# You are given the following extracted parts of a long document and a question. Provide a short structured answer.
-# If you don't know the answer, look on the web. Don't try to make up an answer.
-# Question: {question}
-# =========
-# {context}
-# =========
-# Answer in Markdown:"""
-torch.hub.download_url_to_file('https://raw.githubusercontent.com/vis-nlp/ChartQA/main/ChartQA%20Dataset/val/png/20294671002019.png', 'chart_example.png')
-torch.hub.download_url_to_file('https://raw.githubusercontent.com/vis-nlp/ChartQA/main/ChartQA%20Dataset/test/png/multi_col_1081.png', 'chart_example_2.png')
-torch.hub.download_url_to_file('https://raw.githubusercontent.com/vis-nlp/ChartQA/main/ChartQA%20Dataset/test/png/18143564004789.png', 'chart_example_3.png')
-torch.hub.download_url_to_file('https://sharkcoder.com/files/article/matplotlib-bar-plot.png', 'chart_example_4.png')
-model_name = "google/matcha-chartqa"
-model = Pix2StructForConditionalGeneration.from_pretrained(model_name)
-processor = Pix2StructProcessor.from_pretrained(model_name)
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-model.to(device)
-def filter_output(output):
-    return output.replace("<0x0A>", "")
-def chart_qa(image, question):
-    inputs = processor(images=image, text=question, return_tensors="pt").to(device)
-    predictions = model.generate(**inputs, max_new_tokens=512)
-    return filter_output(processor.decode(predictions[0], skip_special_tokens=True))
-def loading_pdf():
-    return "Loading..."
-def pdf_changes(pdf_doc, open_ai_key):
-    if open_ai_key is not None:
-        os.environ['OPENAI_API_KEY'] = open_ai_key
-        loader = OnlinePDFLoader(pdf_doc.name)
-        documents = loader.load()
-        text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
-        texts = text_splitter.split_documents(documents)
-        embeddings = OpenAIEmbeddings()
-        db = Chroma.from_documents(texts, embeddings)
-        retriever = db.as_retriever()
-        global qa
-        qa = ConversationalRetrievalChain.from_llm(
-            llm=OpenAI(temperature=0.5),
-            retriever=retriever,
-            return_source_documents=True)
-        return "Ready"
-    else:
-        return "You forgot OpenAI API key"
-def add_text(history, text):
-    history = history + [(text, None)]
-    return history, ""
-def bot(history):
-    response = infer(history[-1][0], history)
-    history[-1][1] = ""
-    for character in response:
-        history[-1][1] += character
-        time.sleep(0.05)
-        yield history
-def infer(question, history):
-    res = []
-    for human, ai in history[:-1]:
-        pair = (human, ai)
-        res.append(pair)
-    chat_history = res
-    #print(chat_history)
-    query = question
-    result = qa({"question": query, "chat_history": chat_history})
-    #print(result)
-    return result["answer"]
-css="""
-#col-container {max-width: 700px; margin-left: auto; margin-right: auto;}
-"""
-title = """
-<div style="text-align: center;">
-    <h1>YnP LangChain Test </h1>
-    <p style="text-align: center;">Please specify OpenAI Key before use</p>
-</div>
-"""
-# with gr.Blocks(css=css) as demo:
-#     with gr.Column(elem_id="col-container"):
-#         gr.HTML(title)
-#         with gr.Column():
-#             openai_key = gr.Textbox(label="You OpenAI API key", type="password")
-#             pdf_doc = gr.File(label="Load a pdf", file_types=['.pdf'], type="file")
-#             with gr.Row():
-#                 langchain_status = gr.Textbox(label="Status", placeholder="", interactive=False)
-#                 load_pdf = gr.Button("Load pdf to langchain")
-#         chatbot = gr.Chatbot([], elem_id="chatbot").style(height=350)
-#         question = gr.Textbox(label="Question", placeholder="Type your question and hit Enter ")
-#         submit_btn = gr.Button("Send Message")
-#     load_pdf.click(loading_pdf, None, langchain_status, queue=False)
-#     load_pdf.click(pdf_changes, inputs=[pdf_doc, openai_key], outputs=[langchain_status], queue=False)
-#     question.submit(add_text, [chatbot, question], [chatbot, question]).then(
-#         bot, chatbot, chatbot
-#     )
-#     submit_btn.click(add_text, [chatbot, question], [chatbot, question]).then(
-#         bot, chatbot, chatbot)
-# demo.launch()
-"""functions"""
-def load_file():
-    return "Loading..."
-def load_xlsx(name):
-    import pandas as pd
-    xls_file = rf'{name}'
-    data = pd.read_excel(xls_file)
-    return data
-def table_loader(table_file, open_ai_key):
-    import os
-    from langchain.llms import OpenAI
-    from langchain.agents import create_pandas_dataframe_agent
-    from pandas import read_csv
-    global agent
-    if open_ai_key is not None:
-        os.environ['OPENAI_API_KEY'] = open_ai_key
-    else:
-        return "Enter API"
-    if table_file.name.endswith('.xlsx') or table_file.name.endswith('.xls'):
-        data = load_xlsx(table_file.name)
-        agent = create_pandas_dataframe_agent(OpenAI(temperature=0), data)
-        return "Ready!"
-    elif table_file.name.endswith('.csv'):
-        data = read_csv(table_file.name)
-        agent = create_pandas_dataframe_agent(OpenAI(temperature=0), data)
-        return "Ready!"
-    else:
-        return "Wrong file format! Upload excel file or csv!"
-def run(query):
-    from langchain.callbacks import get_openai_callback
-    with get_openai_callback() as cb:
-        response = (agent.run(query))
-        costs = (f"Total Cost (USD): ${cb.total_cost}")
-        output = f'{response} \n {costs}'
-        return output
-def respond(message, chat_history):
-    import time
-    bot_message = run(message)
-    chat_history.append((message, bot_message))
-    time.sleep(0.5)
-    return "", chat_history
 with gr.Blocks() as demo:
-    with gr.Column(elem_id="col-container"):
-        gr.HTML(title)
-        key = gr.Textbox(
-                show_label=False,
-                placeholder="Your OpenAI key",
-                type = 'password',
-                ).style(container=False)
-    # PDF processing tab
-    with gr.Tab("PDFs"):
-        with gr.Row():
-            with gr.Column(scale=0.5):
-                langchain_status = gr.Textbox(label="Status", placeholder="", interactive=False)
-                load_pdf = gr.Button("Load pdf to langchain")
-            with gr.Column(scale=0.5):
-                pdf_doc = gr.File(label="Load a pdf", file_types=['.pdf'], type="file")
-        with gr.Row():
-            with gr.Column(scale=1):
-                chatbot = gr.Chatbot([], elem_id="chatbot").style(height=350)
-        with gr.Row():
-            with gr.Column(scale=0.85):
-                question = gr.Textbox(
-                show_label=False,
-                placeholder="Enter text and press enter, or upload an image",
-                ).style(container=False)
-            with gr.Column(scale=0.15, min_width=0):
-                clr_btn = gr.Button("Clear!")
-    load_pdf.click(loading_pdf, None, langchain_status, queue=False)
-    load_pdf.click(pdf_changes, inputs=[pdf_doc, key], outputs=[langchain_status], queue=True)
-    question.submit(add_text, [chatbot, question], [chatbot, question]).then(
-        bot, chatbot, chatbot
-    )
-    # XLSX and CSV processing tab
-    with gr.Tab("Spreadsheets"):
-        with gr.Row():
-            with gr.Column(scale=0.5):
-                status_sh = gr.Textbox(label="Status", placeholder="", interactive=False)
-                load_table = gr.Button("Load csv|xlsx to langchain")
-            with gr.Column(scale=0.5):
-                raw_table = gr.File(label="Load a table file (xls or csv)", file_types=['.csv, xlsx, xls'], type="file")
-        with gr.Row():
-            with gr.Column(scale=1):
-                chatbot_sh = gr.Chatbot([], elem_id="chatbot").style(height=350)
-        with gr.Row():
-            with gr.Column(scale=0.85):
-                question_sh = gr.Textbox(
-                show_label=False,
-                placeholder="Enter text and press enter, or upload an image",
-                ).style(container=False)
-            with gr.Column(scale=0.15, min_width=0):
-                clr_btn = gr.Button("Clear!")
-    load_table.click(load_file, None, status_sh, queue=False)
-    load_table.click(table_loader, inputs=[raw_table, key], outputs=[status_sh], queue=False)
-    question_sh.submit(respond, [question_sh, chatbot_sh], [question_sh, chatbot_sh])
-    clr_btn.click(lambda: None, None, chatbot_sh, queue=False)
-    with gr.Tab("Charts"):
-            image = gr.Image(type="pil", label="Chart")
-            question = gr.Textbox(label="Question")
-            load_chart = gr.Button("Load chart and question!")
-            answer = gr.Textbox(label="Model Output")
-    load_chart.click(chart_qa, [image, question], answer)
-demo.queue(concurrency_count=3)
-demo.launch()

 import gradio as gr
 import time
+from utils import Bot
+from utils.functions import make_documents, make_descriptions
+def init_bot(file=None,title=None,pdf=None,key=None):
+    if key is None:
+        return 'You must submit OpenAI key'
+    if pdf is None:
+        return 'You must submit pdf file'
+    if file is None:
+        return 'You must submit media file'
+    if title is None:
+        return 'You must submit the description of the media'
+    file = file.name
+    print(file)
+    pdf = pdf.name
+    file_description = make_descriptions(file, title)
+    # print(file_description)
+    documents = make_documents(pdf)
+    # print(documents[0])
+    global bot
+    bot = Bot(
+        openai_api_key=key,
+        file_descriptions=file_description,
+        text_documents=documents,
+        verbose=False
+    )
+    return 'Chat bot successfully initialized'
+def msg_bot(history):
+    message = history[-1][0]
+    bot_message = bot(message)['output']
+    history[-1][1] = ""
+    for character in bot_message:
+        history[-1][1] += character
+        time.sleep(0.05)
+        yield history
+def user(user_message, history):
+    return "", history + [[user_message, None]]
 with gr.Blocks() as demo:
+    key = gr.Textbox(label='OpenAI key')
+    with gr.Tab("Chat bot initialization"):
+        with gr.Row(variant='panel'):
+            with gr.Column():
+                with gr.Row():
+                    title = gr.Textbox(label='File short description')
+                with gr.Row():
+                    file = gr.File(label='CSV or image', file_types=['.csv', 'image'])
+            pdf = gr.File(label='pdf')
+        with gr.Row(variant='panel'):
+            init_button = gr.Button('submit')
+            init_output = gr.Textbox(label="Initialization status")
+            init_button.click(fn=init_bot,inputs=[file,title,pdf,key],outputs=init_output,api_name='init')
+        chatbot = gr.Chatbot()
+        msg = gr.Textbox(label='Ask the bot')
+        clear = gr.Button('Clear')
+        msg.submit(user,[msg,chatbot],[msg,chatbot],queue=False).then(
+            msg_bot, chatbot, chatbot
+        )
+        clear.click(lambda: None, None, chatbot, queue=False)
+demo.queue()
+demo.launch()

requirements.txt CHANGED Viewed

@@ -1,8 +1,123 @@
-openai
-tiktoken
-chromadb
-langchain
-unstructured
-unstructured[local-inference]
-pandas
-tabulate

+aiofiles==23.1.0
+aiohttp==3.8.4
+aiosignal==1.3.1
+altair==5.0.0
+anyio==3.6.2
+async-timeout==4.0.2
+attrs==23.1.0
+backoff==2.2.1
+certifi==2023.5.7
+charset-normalizer==3.1.0
+chromadb==0.3.22
+click==8.1.3
+clickhouse-connect==0.5.24
+cmake==3.26.3
+contourpy==1.0.7
+cycler==0.11.0
+dataclasses-json==0.5.7
+duckdb==0.7.1
+fastapi==0.95.1
+ffmpy==0.3.0
+filelock==3.12.0
+fonttools==4.39.4
+frozenlist==1.3.3
+fsspec==2023.5.0
+gradio==3.29.0
+gradio_client==0.2.2
+greenlet==2.0.2
+h11==0.14.0
+hnswlib==0.7.0
+httpcore==0.17.0
+httptools==0.5.0
+httpx==0.24.0
+huggingface-hub==0.14.1
+idna==3.4
+importlib-resources==5.12.0
+Jinja2==3.1.2
+joblib==1.2.0
+jsonschema==4.17.3
+kiwisolver==1.4.4
+langchain==0.0.164
+linkify-it-py==2.0.2
+lit==16.0.3
+lz4==4.3.2
+markdown-it-py==2.2.0
+MarkupSafe==2.1.2
+marshmallow==3.19.0
+marshmallow-enum==1.5.1
+matplotlib==3.7.1
+mdit-py-plugins==0.3.3
+mdurl==0.1.2
+monotonic==1.6
+mpmath==1.3.0
+multidict==6.0.4
+mypy-extensions==1.0.0
+networkx==3.1
+nltk==3.8.1
+numexpr==2.8.4
+numpy==1.24.3
+nvidia-cublas-cu11==11.10.3.66
+nvidia-cuda-cupti-cu11==11.7.101
+nvidia-cuda-nvrtc-cu11==11.7.99
+nvidia-cuda-runtime-cu11==11.7.99
+nvidia-cudnn-cu11==8.5.0.96
+nvidia-cufft-cu11==10.9.0.58
+nvidia-curand-cu11==10.2.10.91
+nvidia-cusolver-cu11==11.4.0.1
+nvidia-cusparse-cu11==11.7.4.91
+nvidia-nccl-cu11==2.14.3
+nvidia-nvtx-cu11==11.7.91
+openai==0.27.6
+openapi-schema-pydantic==1.2.4
+orjson==3.8.12
+packaging==23.1
+pandas==2.0.1
+Pillow==9.5.0
+pkgutil_resolve_name==1.3.10
+posthog==3.0.1
+pydantic==1.10.7
+pydub==0.25.1
+Pygments==2.15.1
+pyparsing==3.0.9
+pypdf==3.8.1
+pyrsistent==0.19.3
+python-dateutil==2.8.2
+python-dotenv==1.0.0
+python-multipart==0.0.6
+pytz==2023.3
+PyYAML==6.0
+regex==2023.5.5
+requests==2.30.0
+scikit-learn==1.2.2
+scipy==1.10.1
+semantic-version==2.10.0
+sentence-transformers==2.2.2
+sentencepiece==0.1.99
+six==1.16.0
+sniffio==1.3.0
+SQLAlchemy==2.0.12
+starlette==0.26.1
+sympy==1.12
+tabulate==0.9.0
+tenacity==8.2.2
+threadpoolctl==3.1.0
+tiktoken==0.4.0
+tokenizers==0.13.3
+toolz==0.12.0
+torch==2.0.1
+torchvision==0.15.2
+tqdm==4.65.0
+transformers==4.29.0
+triton==2.0.0
+typing-inspect==0.8.0
+typing_extensions==4.5.0
+tzdata==2023.3
+uc-micro-py==1.0.2
+urllib3==2.0.2
+uvicorn==0.22.0
+uvloop==0.17.0
+watchfiles==0.19.0
+websockets==11.0.3
+yarl==1.9.2
+zipp==3.15.0
+zstandard==0.21.0

utils/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ from .bot import Bot
2	+ from .functions import make_documents, make_descriptions
3	+

utils/bot.py ADDED Viewed

	@@ -0,0 +1,203 @@

+import langchain
+from langchain.agents import create_csv_agent
+from langchain.schema import HumanMessage
+from langchain.chat_models import ChatOpenAI
+from langchain.embeddings import OpenAIEmbeddings
+from langchain.vectorstores import Chroma
+from typing import List, Dict
+from langchain.agents import AgentType
+from langchain.chains.conversation.memory import ConversationBufferWindowMemory
+from utils.functions import Matcha_model
+from PIL import Image
+from pathlib import Path
+from langchain.tools import StructuredTool
+from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings
+class Bot:
+    def __init__(
+            self,
+            openai_api_key: str,
+            file_descriptions: List[Dict[str, any]],
+            text_documents: List[langchain.schema.Document],
+            verbose: bool = False
+    ):
+        self.verbose = verbose
+        self.file_descriptions = file_descriptions
+        self.llm = ChatOpenAI(
+            openai_api_key=openai_api_key,
+            temperature=0,
+            model_name="gpt-3.5-turbo"
+        )
+        embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
+        # embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)
+        vector_store = Chroma.from_documents(text_documents, embedding_function)
+        self.text_retriever = langchain.chains.RetrievalQAWithSourcesChain.from_chain_type(
+            llm=self.llm,
+            chain_type='stuff',
+            retriever=vector_store.as_retriever()
+        )
+        self.text_search_tool = langchain.agents.Tool(
+            func=self._text_search,
+            description="Use this tool when searching for text information",
+            name="search text information"
+        )
+        self.chart_model = Matcha_model()
+    def __call__(
+            self,
+            question: str
+    ):
+        self.tools = []
+        self.tools.append(self.text_search_tool)
+        file = self._define_appropriate_file(question)
+        if file != "None of the files":
+            number = int(file[file.find('№')+1:])
+            file_description = [x for x in self.file_descriptions if x['number'] == number][0]
+            file_path = file_description['path']
+            if Path(file).suffix == '.csv':
+                self.csv_agent = create_csv_agent(
+                    llm=self.llm,
+                    path=file_path,
+                    verbose=self.verbose
+                )
+                self._init_tabular_search_tool(file_description)
+                self.tools.append(self.tabular_search_tool)
+            else:
+                self._init_chart_search_tool(file_description)
+                self.tools.append(self.chart_search_tool)
+        self._init_chatbot()
+        # print(file)
+        response = self.agent(question)
+        return response
+    def _init_chatbot(self):
+        conversational_memory = ConversationBufferWindowMemory(
+            memory_key='chat_history',
+            k=5,
+            return_messages=True
+        )
+        self.agent = langchain.agents.initialize_agent(
+            agent=AgentType.STRUCTURED_CHAT_ZERO_SHOT_REACT_DESCRIPTION,
+            tools=self.tools,
+            llm=self.llm,
+            verbose=self.verbose,
+            max_iterations=5,
+            early_stopping_method='generate',
+            memory=conversational_memory
+        )
+        sys_msg = (
+            "You are an expert summarizer and deliverer of information. "
+            "Yet, the reason you are so intelligent is that you make complex "
+            "information incredibly simple to understand. It's actually rather incredible."
+            "When users ask information you refer to the relevant tools."
+            "if one of the tools helped you with only a part of the necessary information, you must "
+            "try to find the missing information using another tool"
+            "if you can't find the information using the provided tools, you MUST "
+            "say 'I don't know'. Don't try to make up an answer."
+        )
+        prompt = self.agent.agent.create_prompt(
+            tools=self.tools,
+            prefix = sys_msg
+        )
+        self.agent.agent.llm_chain.prompt = prompt
+    def _text_search(
+            self,
+            query: str
+    ) -> str:
+        query = self.text_retriever.prep_inputs(query)
+        res = self.text_retriever(query)['answer']
+        return res
+    def _tabular_search(
+            self,
+            query: str
+    ) -> str:
+        res = self.csv_agent.run(query)
+        return res
+    def _chart_search(
+        self,
+        image,
+        query: str
+    ) -> str:
+        image = Image.open(image)
+        res = self.chart_model.chart_qa(image, query)
+        return res
+    def _init_chart_search_tool(
+        self,
+        title: str
+    ) -> None:
+        title = title
+        description = f"""
+            Use this tool when searching for information on charts.
+            With this tool you can answer the question about related chart.
+            You should ask simple question about a chart, then the tool will give you number.
+            This chart is called {title}.
+        """
+        self.chart_search_tool = StructuredTool(
+            func=self._chart_search,
+            description=description,
+            name="Ask over charts"
+        )
+    def _init_tabular_search_tool(
+            self,
+            file_: Dict[str, any]
+    ) -> None:
+        description = f"""
+            Use this tool when searching for tabular information.
+            With this tool you could get access to table.
+            This table title is "{title}" and the names of the columns in this table: {columns}
+        """
+        self.tabular_search_tool = langchain.agents.Tool(
+            func=self._tabular_search,
+            description=description,
+            name="search tabular information"
+        )
+    def _define_appropriate_file(
+            self,
+            question: str
+    ) -> str:
+        ''' Определяет по описаниям таблиц в какой из них может содержаться ответ на вопрос.
+        Возвращает номер таблицы по шаблону "Table №1" или "None of the tables" '''
+        message = 'I have list of descriptions: \n'
+        k = 0
+        for description in self.file_descriptions:
+            k += 1
+            str_description = f"""  {k}) description for File №{description['number']}: """
+            for key, value in description.items():
+                string_val = str(key) + ' : ' + str(value) + '\n'
+                str_description += string_val
+            message += str_description
+        print(message)
+        question = f""" How do you think, which file can help answer the question: "{question}" .
+        Your answer MUST be specific,
+        for example if you think that File №2 can help answer the question, you MUST just write  "File №2!".
+        If you think that none of the files can help answer the question just write "None of the files!"
+        Don't include to answer information about your thinking.
+        """
+        message += question
+        res = self.llm([HumanMessage(content=message)])
+        print(res.content)
+        print(res.content[:-1])
+        return res.content[:-1]

utils/functions.py ADDED Viewed

	@@ -0,0 +1,72 @@

+import pandas as pd
+from langchain.document_loaders import PyPDFLoader
+from langchain.text_splitter import CharacterTextSplitter
+import torch
+from transformers import Pix2StructForConditionalGeneration, Pix2StructProcessor
+from pathlib import Path
+def make_descriptions(file, title):
+    if Path(file).suffix == '.csv':
+        # print(file)
+        df = pd.read_csv(file)
+        print(df.head())
+        columns = list(df.columns)
+        print(columns)
+        table_description0 = {
+            'path': 'random',
+            'number': 1,
+            'columns': ["clothes", "animals", "students"],
+            'title': "fashionable student clothes"
+        }
+        table_description1 = {
+            'path': file,
+            'number': 2,
+            'columns': columns,
+            'title': title
+        }
+        table_descriptions = [table_description0, table_description1]
+        return table_descriptions
+    else:
+        file_description = {
+            'path': file,
+            'number': 1,
+            'title': title
+        }
+        file_descriptions = [file_description]
+        return file_descriptions
+def make_documents(pdf):
+    loader = PyPDFLoader(pdf)
+    documents = loader.load()
+    text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=0, separator='\n')
+    documents = text_splitter.split_documents(documents)
+    return documents
+class Matcha_model:
+    def __init__(self) -> None:
+        # torch.hub.download_url_to_file('https://raw.githubusercontent.com/vis-nlp/ChartQA/main/ChartQA%20Dataset/val/png/20294671002019.png', 'chart_example.png')
+        # torch.hub.download_url_to_file('https://raw.githubusercontent.com/vis-nlp/ChartQA/main/ChartQA%20Dataset/test/png/multi_col_1081.png', 'chart_example_2.png')
+        # torch.hub.download_url_to_file('https://raw.githubusercontent.com/vis-nlp/ChartQA/main/ChartQA%20Dataset/test/png/18143564004789.png', 'chart_example_3.png')
+        # torch.hub.download_url_to_file('https://sharkcoder.com/files/article/matplotlib-bar-plot.png', 'chart_example_4.png')
+        self.model_name = "google/matcha-chartqa"
+        self.model = Pix2StructForConditionalGeneration.from_pretrained(self.model_name)
+        self.processor = Pix2StructProcessor.from_pretrained(self.model_name)
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self.model.to(self.device)
+    def _filter_output(self, output):
+        return output.replace("<0x0A>", "")
+    def chart_qa(self, image, question: str) -> str:
+        inputs = self.processor(images=image, text=question, return_tensors="pt").to(self.device)
+        predictions = self.model.generate(**inputs, max_new_tokens=512)
+        return self._filter_output(self.processor.decode(predictions[0], skip_special_tokens=True))