kawre's picture
Update app.py
e66ba96 verified
raw
history blame
1.45 kB
import gradio as gr
from huggingface_hub import InferenceClient
def respond(message, history, system_message, max_tokens, temperature, top_p, hf_token):
# Inicializa os 3 clientes
client_main = InferenceClient(token=hf_token, model="meta-llama/Llama-3.1-8B-Instruct")
client_aux1 = InferenceClient(token=hf_token, model="google/flan-t5-large")
client_aux2 = InferenceClient(token=hf_token, model="facebook/bart-large-cnn")
# Histórico e system message
messages = [{"role": "system", "content": system_message}]
messages.extend(history)
messages.append({"role": "user", "content": message})
# Passo 1: Llama 3.1
response_main = client_main.text_generation(inputs=message, max_tokens=max_tokens)
# Passo 2: Aux1
response_aux1 = client_aux1.text_generation(inputs=response_main, max_new_tokens=max_tokens)
# Passo 3: Aux2
response_aux2 = client_aux2.text_generation(inputs=response_aux1, max_new_tokens=max_tokens)
return response_aux2
# Interface Gradio
chatbot = gr.ChatInterface(
fn=respond,
additional_inputs=[
gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
gr.Slider(1, 2048, 512, label="Max new tokens"),
gr.Slider(0.1, 4.0, 0.7, label="Temperature"),
gr.Slider(0.1, 1.0, 0.95, label="Top-p (nucleus sampling)"),
],
)
with gr.Blocks() as demo:
chatbot.render()
if __name__ == "__main__":
demo.launch()