Spaces:

ngebodh
/

SimpleChatbot

Running

App Files Files Community

ngebodh commited on Apr 21

Commit

807a9b1

verified ·

1 Parent(s): a1ab263

Updated the inference API

Browse files

Updated the app to provide some limited API calls.

Files changed (1) hide show

app.py +132 -39

app.py CHANGED Viewed

@@ -1,7 +1,6 @@
 """ Simple Chatbot
 @author: Nigel Gebodh
 @email: [email protected]
 """
 import numpy as np
 import streamlit as st
@@ -11,14 +10,36 @@ import sys
 from dotenv import load_dotenv, dotenv_values
 load_dotenv()
-#Comment_test_11_09_2024
-model_links ={
       "Gemma-3-27B-it":{
                       "inf_point":"https://router.huggingface.co/nebius/v1",
                       "link":"google/gemma-3-27b-it-fast",
@@ -45,6 +66,18 @@ model_links ={
                       },
   }
 #Pull info about the model to display
 model_info ={
     "Mistral-7B":
@@ -63,6 +96,10 @@ model_info ={
         {'description':"""The Gemma model is a **Large Language Model (LLM)** that's able to have question and answer interactions.\n \
             \nIt was created by the [**Google's AI Team**](https://blog.google/technology/developers/gemma-open-models/) team as has over  **2 billion parameters.** \n""",
         'logo':'https://pbs.twimg.com/media/GG3sJg7X0AEaNIq.jpg'},
     "Zephyr-7B":
         {'description':"""The Zephyr model is a **Large Language Model (LLM)** that's able to have question and answer interactions.\n \
             \nFrom Huggingface: \n\
@@ -118,6 +155,44 @@ def reset_conversation():
 # Define the available models
 models =[key for key in model_links.keys()]
@@ -129,11 +204,14 @@ selected_model = st.sidebar.selectbox("Select Model", models)
 temp_values = st.sidebar.slider('Select a temperature value', 0.0, 1.0, (0.5))
 #Add reset button to clear conversation
-st.sidebar.button('Reset Chat', on_click=reset_conversation) #Reset button
 # Create model description
 st.sidebar.write(f"You're now chatting with **{selected_model}**")
 st.sidebar.markdown(model_info[selected_model]['description'])
 st.sidebar.image(model_info[selected_model]['logo'])
@@ -149,7 +227,6 @@ if "prev_option" not in st.session_state:
 if st.session_state.prev_option != selected_model:
     st.session_state.messages = []
-    # st.write(f"Changed to {selected_model}")
     st.session_state.prev_option = selected_model
     reset_conversation()
@@ -161,12 +238,11 @@ repo_id = model_links[selected_model]
 # initialize the client
 client = OpenAI(
   base_url=model_links[selected_model]["inf_point"],#"https://api-inference.huggingface.co/v1",
-  api_key=os.environ.get('HUGGINGFACEHUB_API_TOKEN')#"hf_xxx" # Replace with your token
 )
 st.subheader(f'AI - {selected_model}')
-# st.title(f'ChatBot Using {selected_model}')
 # Set a default model
 if selected_model not in st.session_state:
@@ -184,8 +260,9 @@ for message in st.session_state.messages:
-# Accept user input
-if prompt := st.chat_input(f"Hi I'm {selected_model}, ask me a question"):
     # Display user message in chat message container
     with st.chat_message("user"):
@@ -194,38 +271,54 @@ if prompt := st.chat_input(f"Hi I'm {selected_model}, ask me a question"):
     st.session_state.messages.append({"role": "user", "content": prompt})
-    # Display assistant response in chat message container
-    with st.chat_message("assistant"):
-        try:
-            stream = client.chat.completions.create(
-                model=model_links[selected_model]["link"],
-                messages=[
-                    {"role": m["role"], "content": m["content"]}
-                    for m in st.session_state.messages
-                ],
-                temperature=temp_values,#0.5,
-                stream=True,
-                max_tokens=3000,
-            )
-            response = st.write_stream(stream)
-        except Exception as e:
-            # st.empty()
-            response = "😵‍💫 Looks like someone unplugged something!\
-                    \n Either the model space is being updated or something is down.\
-                    \n\
-                    \n Try again later. \
-                    \n\
-                    \n Here's a random pic of a 🐶:"
-            st.write(response)
-            random_dog_pick = 'https://random.dog/'+ random_dog[np.random.randint(len(random_dog))]
-            st.image(random_dog_pick)
-            st.write("This was the error message:")
-            st.write(e)
-    st.session_state.messages.append({"role": "assistant", "content": response})

 """ Simple Chatbot
 @author: Nigel Gebodh
 @email: [email protected]
 """
 import numpy as np
 import streamlit as st
 from dotenv import load_dotenv, dotenv_values
 load_dotenv()
+#===========================================
+updates = '''
+Updates
++ 04/20/2025
+- Changed the inference from HF b/c
+    API calls are not very limted.
+- Added API call limiting to allow for demoing
+- Added support for adding your own API token.
++ 04/16/2025
+- Changed the inference points on HF b/c
+    older points no longer supported.
+'''
+#-------------------------------------------
+API_CALL_LIMIT = 5 # Define the limit
+if 'api_call_count' not in st.session_state:
+    st.session_state.api_call_count = 0
+    st.session_state.remaining_calls = API_CALL_LIMIT
+model_links_hf ={
       "Gemma-3-27B-it":{
                       "inf_point":"https://router.huggingface.co/nebius/v1",
                       "link":"google/gemma-3-27b-it-fast",
                       },
   }
+model_links_groq ={
+      "Gemma-2-9B-it":{
+                      "inf_point":"https://api.groq.com/openai/v1",
+                      "link":"gemma2-9b-it",
+                      },
+      "Meta-Llama-3.1-8B":{
+                      "inf_point":"https://api.groq.com/openai/v1",
+                      "link":"llama-3.1-8b-instant",
+                      },
+  }
 #Pull info about the model to display
 model_info ={
     "Mistral-7B":
         {'description':"""The Gemma model is a **Large Language Model (LLM)** that's able to have question and answer interactions.\n \
             \nIt was created by the [**Google's AI Team**](https://blog.google/technology/developers/gemma-open-models/) team as has over  **2 billion parameters.** \n""",
         'logo':'https://pbs.twimg.com/media/GG3sJg7X0AEaNIq.jpg'},
+    "Gemma-2-9B-it":
+        {'description':"""The Gemma model is a **Large Language Model (LLM)** that's able to have question and answer interactions.\n \
+            \nIt was created by the [**Google's AI Team**](https://blog.google/technology/developers/gemma-open-models/) team as has over  **9 billion parameters.** \n""",
+        'logo':'https://pbs.twimg.com/media/GG3sJg7X0AEaNIq.jpg'},
     "Zephyr-7B":
         {'description':"""The Zephyr model is a **Large Language Model (LLM)** that's able to have question and answer interactions.\n \
             \nFrom Huggingface: \n\
+# --- Sidebar Setup ---
+st.sidebar.title("Chatbot Settings")
+#Define model clients
+client_names = ["Provided API Call", "HF-Token"]
+client_select = st.sidebar.selectbox("Select Model Client", client_names)
+if "HF-Token" in client_select:
+    try:
+        if "API_token" not in st.session_state:
+            st.session_state.API_token = None
+        st.session_state.API_token = st.sidebar.text_input("Enter you Hugging Face Access Token", type="password")
+        model_links = model_links_hf
+    except Exception as e:
+        st.sidebar.error(f"Credentials Error:\n\n {e}")
+elif "Provided API Call"  in client_select:
+    try:
+        if "API_token" not in st.session_state:
+            st.session_state.API_token = None
+        st.session_state.API_token = os.environ.get('GROQ_API_TOKEN')#Should be like os.environ.get('HUGGINGFACE_API_TOKEN')
+        model_links = model_links_groq
+    except Exception as e:
+        st.sidebar.error(f"Credentials Error:\n\n {e}")
 # Define the available models
 models =[key for key in model_links.keys()]
 temp_values = st.sidebar.slider('Select a temperature value', 0.0, 1.0, (0.5))
 #Add reset button to clear conversation
+st.sidebar.button('Reset Chat', on_click=reset_conversation, type="primary") #Reset button
+st.sidebar.divider() # Add a visual separator
 # Create model description
+st.sidebar.subheader(f"About {selected_model}")
 st.sidebar.write(f"You're now chatting with **{selected_model}**")
 st.sidebar.markdown(model_info[selected_model]['description'])
 st.sidebar.image(model_info[selected_model]['logo'])
 if st.session_state.prev_option != selected_model:
     st.session_state.messages = []
     st.session_state.prev_option = selected_model
     reset_conversation()
 # initialize the client
 client = OpenAI(
   base_url=model_links[selected_model]["inf_point"],#"https://api-inference.huggingface.co/v1",
+  api_key=st.session_state.API_token#os.environ.get('HUGGINGFACE_API_TOKEN')#"hf_xxx" # Replace with your token
 )
 st.subheader(f'AI - {selected_model}')
 # Set a default model
 if selected_model not in st.session_state:
+if prompt := st.chat_input(f"Hi I'm {selected_model}, ask me a question "):
     # Display user message in chat message container
     with st.chat_message("user"):
     st.session_state.messages.append({"role": "user", "content": prompt})
+    if st.session_state.api_call_count >= API_CALL_LIMIT:
+        # Add the warning to the displayed messages, but not to the history sent to the model
+        response = f"LIMIT REACHED: Sorry, you have reached the API call limit for this session."
+        # st.write(response)
+        st.warning(f"Sorry, you have reached the API call limit for this session.")
+        st.session_state.messages.append({"role": "assistant", "content": response })
+    else:
+        # Display assistant response in chat message container
+        with st.chat_message("assistant"):
+            try:
+                st.session_state.api_call_count += 1
+                # Add a spinner for better UX while waiting
+                with st.spinner(f"Asking {selected_model}..."):
+                    stream = client.chat.completions.create(
+                        model=model_links[selected_model]["link"],
+                        messages=[
+                            {"role": m["role"], "content": m["content"]}
+                            for m in st.session_state.messages
+                        ],
+                        temperature=temp_values,#0.5,
+                        stream=True,
+                        max_tokens=3000,
+                    )
+                    response = st.write_stream(stream)
+                    remaining_calls = (API_CALL_LIMIT) - st.session_state.api_call_count
+                    st.markdown(f"\n\n <span style='float: right; font-size: 0.8em; color: gray;'>API calls:({remaining_calls}/{API_CALL_LIMIT})</span>", unsafe_allow_html=True)
+            except Exception as e:
+                response = "😵‍💫 Looks like someone unplugged something!\
+                        \n Either the model space is being updated or something is down.\
+                        \n\
+                        \n Try again later. \
+                        \n\
+                        \n Here's a random pic of a 🐶:"
+                st.write(response)
+                random_dog_pick = 'https://random.dog/'+ random_dog[np.random.randint(len(random_dog))]
+                st.image(random_dog_pick)
+                st.write("This was the error message:")
+                st.write(e)
+        st.session_state.messages.append({"role": "assistant", "content": response})