|
|
""" |
|
|
AI chat functionality implementation |
|
|
|
|
|
Provides chat functionality by calling the LLM endpoint on Modal or using ZeroGPU. |
|
|
""" |
|
|
|
|
|
import logging |
|
|
import os |
|
|
|
|
|
import openai_harmony as oh |
|
|
from dotenv import load_dotenv |
|
|
|
|
|
load_dotenv() |
|
|
|
|
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
USE_MODAL = os.getenv("USE_MODAL", "false").lower() == "true" |
|
|
|
|
|
|
|
|
if USE_MODAL: |
|
|
import modal |
|
|
|
|
|
APP_NAME = "unpredictable-lord" |
|
|
_generate_stream = modal.Function.from_name(APP_NAME, "generate_stream") |
|
|
|
|
|
def generate_stream(input_tokens): |
|
|
return _generate_stream.remote_gen(input_tokens) |
|
|
else: |
|
|
from unpredictable_lord.llm_zerogpu import generate_stream as _generate_stream |
|
|
|
|
|
def generate_stream(input_tokens): |
|
|
return _generate_stream(input_tokens) |
|
|
|
|
|
|
|
|
def chat_with_llm_stream( |
|
|
user_message: str, |
|
|
chat_history: list[dict[str, str]], |
|
|
): |
|
|
""" |
|
|
Chat with LLM (streaming version) |
|
|
|
|
|
Args: |
|
|
user_message: User's message |
|
|
chat_history: Past chat history (list of dictionaries in Gradio format) |
|
|
[{"role": "user", "content": "..."}, {"role": "assistant", "content": "..."}, ...] |
|
|
|
|
|
Yields: |
|
|
updated_chat_history: Updated chat history (Gradio format) |
|
|
""" |
|
|
try: |
|
|
|
|
|
messages = [] |
|
|
|
|
|
|
|
|
system_content = ( |
|
|
oh.SystemContent.new() |
|
|
.with_model_identity( |
|
|
"You are a lord of a medieval fantasy kingdom. The user is your advisor." |
|
|
"Listen to your advisor's advice and act for the development of your territory and the maintenance of your authority." |
|
|
"Speak in an arrogant tone." |
|
|
) |
|
|
.with_reasoning_effort(oh.ReasoningEffort.LOW) |
|
|
|
|
|
) |
|
|
messages.append( |
|
|
oh.Message.from_role_and_content(oh.Role.SYSTEM, system_content) |
|
|
) |
|
|
|
|
|
|
|
|
for msg in chat_history: |
|
|
if msg["role"] == "user": |
|
|
messages.append( |
|
|
oh.Message.from_role_and_content(oh.Role.USER, msg["content"]) |
|
|
) |
|
|
elif msg["role"] == "assistant": |
|
|
messages.append( |
|
|
oh.Message.from_role_and_content(oh.Role.ASSISTANT, msg["content"]) |
|
|
) |
|
|
|
|
|
|
|
|
messages.append(oh.Message.from_role_and_content(oh.Role.USER, user_message)) |
|
|
|
|
|
|
|
|
convo = oh.Conversation.from_messages(messages) |
|
|
encoding = oh.load_harmony_encoding(oh.HarmonyEncodingName.HARMONY_GPT_OSS) |
|
|
input_tokens = encoding.render_conversation_for_completion( |
|
|
convo, oh.Role.ASSISTANT |
|
|
) |
|
|
|
|
|
parser = oh.StreamableParser(encoding, role=oh.Role.ASSISTANT) |
|
|
|
|
|
|
|
|
|
|
|
partial_history = chat_history + [ |
|
|
{"role": "user", "content": user_message}, |
|
|
{"role": "assistant", "content": ""}, |
|
|
] |
|
|
|
|
|
|
|
|
generater = generate_stream(input_tokens) |
|
|
|
|
|
response_text = "" |
|
|
for token in generater: |
|
|
if token is None: |
|
|
continue |
|
|
parser.process(token) |
|
|
|
|
|
|
|
|
if parser.current_channel == "final": |
|
|
delta = parser.last_content_delta |
|
|
if delta: |
|
|
response_text += delta |
|
|
|
|
|
partial_history[-1]["content"] = response_text |
|
|
yield partial_history |
|
|
|
|
|
except Exception: |
|
|
logger.exception("Error during chat_with_llm_stream") |
|
|
updated_history = chat_history + [ |
|
|
{"role": "user", "content": user_message}, |
|
|
{ |
|
|
"role": "assistant", |
|
|
"content": "[Error occurred while generating response.]", |
|
|
}, |
|
|
] |
|
|
yield updated_history |
|
|
|