kawre's picture
Update app.py
2674c93 verified
raw
history blame
2.83 kB
import os
import gradio as gr
from huggingface_hub import InferenceClient
# Pega o token do Hugging Face dos Secrets
HF_TOKEN = os.environ.get("HF_TOKEN")
# Inicializa os clientes dos modelos
client_main = InferenceClient(token=HF_TOKEN, model="meta-llama/Llama-3.1-8B-Instruct")
client_aux1 = InferenceClient(token=HF_TOKEN, model="google/flan-t5-large")
client_aux2 = InferenceClient(token=HF_TOKEN, model="facebook/bart-large-cnn")
# Função principal de resposta
def respond(message, history, system_message, max_tokens, temperature, top_p):
try:
# Prompt principal incluindo system_message
full_prompt = f"{system_message}\nUsuário: {message}"
# --- Passo 1: Llama 3.1 (conversational) ---
result_main = client_main.conversational(
prompt=full_prompt, # string diretamente
max_new_tokens=max_tokens,
temperature=temperature,
top_p=top_p
)
response_main = result_main.generated_responses[-1] # pega a última resposta
# --- Passo 2: FLAN-T5 ---
result_aux1 = client_aux1.text_generation(
prompt=f"Reformule este texto de forma clara:\n{response_main}",
max_new_tokens=max_tokens
)
response_aux1 = result_aux1.generated_text
# --- Passo 3: BART ---
result_aux2 = client_aux2.text_generation(
prompt=f"Resuma este texto em 3 frases:\n{response_aux1}",
max_new_tokens=150
)
response_aux2 = result_aux2.generated_text
except Exception as e:
response_aux2 = f"Erro ao gerar resposta: {e}"
# Atualiza histórico no formato correto para o Gradio Chatbot
history.append({"role": "user", "content": message})
history.append({"role": "assistant", "content": response_aux2})
return history, history
# Interface Gradio
with gr.Blocks() as demo:
gr.Markdown("## 🤖 Chatbot em Cascata (Llama 3.1 + FLAN-T5 + BART)")
system_message = gr.Textbox(
value="Você é um chatbot amigável e prestativo.",
label="System Message"
)
chatbot = gr.Chatbot()
msg = gr.Textbox(label="Digite sua mensagem")
max_tokens = gr.Slider(50, 2048, 512, step=50, label="Max Tokens")
temperature = gr.Slider(0.1, 1.0, 0.7, step=0.05, label="Temperature")
top_p = gr.Slider(0.1, 1.0, 0.95, step=0.05, label="Top-p (nucleus sampling)")
history = gr.State([])
def handle_submit(message, history, system_message, max_tokens, temperature, top_p):
return respond(message, history, system_message, max_tokens, temperature, top_p)
msg.submit(
handle_submit,
inputs=[msg, history, system_message, max_tokens, temperature, top_p],
outputs=[chatbot, history]
)
if __name__ == "__main__":
demo.launch()