Spaces:
Runtime error
Runtime error
File size: 1,336 Bytes
b5dbbe2 56b6cfd b5dbbe2 27be546 138388a b5dbbe2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 |
import gradio as gr
import mlx.core as mx
import mlx.nn as nn
import spaces
from mlx_lm import load, generate
from transformers import AutoTokenizer
# モデルとトークナイザーのロード
model_name = "Sakalti/ultiima-78B-Q2-mlx"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = load(model_name)
@spaces.gpu(duration=100)
def chat(prompt, top_p, top_k, max_new_tokens, system_prompt):
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
inputs = tokenizer([text], return_tensors="pt")
# モデルに入力を渡して生成
output = generate(
model,
inputs.input_ids,
tokenizer=tokenizer,
max_tokens=max_new_tokens,
top_p=top_p,
top_k=top_k
)
return output
# GradioのUI設定
chat_interface = gr.ChatInterface(
fn=chat,
additional_inputs=[
gr.Textbox(value="あなたはフレンドリーなチャットボットです。", label="System Prompt"),
gr.Slider(0.0, 1.0, value=0.9, label="Top-p"),
gr.Slider(1, 100, value=50, label="Top-k"),
gr.Slider(1, 1024, value=512, step=1, label="Max New Tokens")
]
)
chat_interface.launch() |