Spaces:
Sleeping
Sleeping
File size: 2,831 Bytes
e48436e 8c337f2 2674c93 8c337f2 e48436e 8c337f2 e48436e a68cb46 a1f93f9 8c337f2 8fd9afa 2674c93 8fd9afa 881ad0f a1f93f9 a68cb46 05fce26 a68cb46 05fce26 881ad0f a1f93f9 a68cb46 05fce26 a68cb46 05fce26 881ad0f a68cb46 8c337f2 a1f93f9 e48436e 05fce26 8c337f2 e48436e 8c337f2 e48436e 05fce26 a68cb46 05fce26 e48436e a68cb46 8c337f2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
import os
import gradio as gr
from huggingface_hub import InferenceClient
# Pega o token do Hugging Face dos Secrets
HF_TOKEN = os.environ.get("HF_TOKEN")
# Inicializa os clientes dos modelos
client_main = InferenceClient(token=HF_TOKEN, model="meta-llama/Llama-3.1-8B-Instruct")
client_aux1 = InferenceClient(token=HF_TOKEN, model="google/flan-t5-large")
client_aux2 = InferenceClient(token=HF_TOKEN, model="facebook/bart-large-cnn")
# Função principal de resposta
def respond(message, history, system_message, max_tokens, temperature, top_p):
try:
# Prompt principal incluindo system_message
full_prompt = f"{system_message}\nUsuário: {message}"
# --- Passo 1: Llama 3.1 (conversational) ---
result_main = client_main.conversational(
prompt=full_prompt, # string diretamente
max_new_tokens=max_tokens,
temperature=temperature,
top_p=top_p
)
response_main = result_main.generated_responses[-1] # pega a última resposta
# --- Passo 2: FLAN-T5 ---
result_aux1 = client_aux1.text_generation(
prompt=f"Reformule este texto de forma clara:\n{response_main}",
max_new_tokens=max_tokens
)
response_aux1 = result_aux1.generated_text
# --- Passo 3: BART ---
result_aux2 = client_aux2.text_generation(
prompt=f"Resuma este texto em 3 frases:\n{response_aux1}",
max_new_tokens=150
)
response_aux2 = result_aux2.generated_text
except Exception as e:
response_aux2 = f"Erro ao gerar resposta: {e}"
# Atualiza histórico no formato correto para o Gradio Chatbot
history.append({"role": "user", "content": message})
history.append({"role": "assistant", "content": response_aux2})
return history, history
# Interface Gradio
with gr.Blocks() as demo:
gr.Markdown("## 🤖 Chatbot em Cascata (Llama 3.1 + FLAN-T5 + BART)")
system_message = gr.Textbox(
value="Você é um chatbot amigável e prestativo.",
label="System Message"
)
chatbot = gr.Chatbot()
msg = gr.Textbox(label="Digite sua mensagem")
max_tokens = gr.Slider(50, 2048, 512, step=50, label="Max Tokens")
temperature = gr.Slider(0.1, 1.0, 0.7, step=0.05, label="Temperature")
top_p = gr.Slider(0.1, 1.0, 0.95, step=0.05, label="Top-p (nucleus sampling)")
history = gr.State([])
def handle_submit(message, history, system_message, max_tokens, temperature, top_p):
return respond(message, history, system_message, max_tokens, temperature, top_p)
msg.submit(
handle_submit,
inputs=[msg, history, system_message, max_tokens, temperature, top_p],
outputs=[chatbot, history]
)
if __name__ == "__main__":
demo.launch()
|