import os import gradio as gr from huggingface_hub import InferenceClient # Pega o token do Hugging Face dos Secrets HF_TOKEN = os.environ.get("HF_TOKEN") # Inicializa os clientes dos modelos client_main = InferenceClient(token=HF_TOKEN, model="meta-llama/Llama-3.1-8B-Instruct") client_aux1 = InferenceClient(token=HF_TOKEN, model="google/flan-t5-large") client_aux2 = InferenceClient(token=HF_TOKEN, model="facebook/bart-large-cnn") # Função principal de resposta def respond(message, history, system_message, max_tokens, temperature, top_p): try: # --- Passo 1: Llama 3.1 via ProxyClientChat --- chat = client_main.chat # objeto de chat, não chamável chat.clear_messages() # limpa mensagens anteriores do objeto (opcional) # Adiciona mensagens do histórico chat.add_message("system", system_message) for h in history: chat.add_message(h['role'], h['content']) chat.add_message("user", message) # Gera resposta response_main_obj = chat.send_message( max_new_tokens=max_tokens, temperature=temperature, top_p=top_p ) response_main = response_main_obj.content # pega o texto gerado # --- Passo 2: FLAN-T5 (reformulação) --- result_aux1 = client_aux1.text_generation( prompt=f"Reformule este texto de forma clara e concisa:\n{response_main}", max_new_tokens=max_tokens ) response_aux1 = result_aux1.generated_text # --- Passo 3: BART (resumo em 3 frases) --- result_aux2 = client_aux2.text_generation( prompt=f"Resuma este texto em 3 frases:\n{response_aux1}", max_new_tokens=150 ) response_aux2 = result_aux2.generated_text except Exception as e: response_aux2 = f"Erro ao gerar resposta: {e}" # Atualiza histórico no formato Gradio Chatbot history.append({"role": "user", "content": message}) history.append({"role": "assistant", "content": response_aux2}) return history, history # Interface Gradio with gr.Blocks() as demo: gr.Markdown("## 🤖 Chatbot em Cascata (Llama 3.1 + FLAN-T5 + BART)") system_message = gr.Textbox( value="Você é um chatbot amigável e prestativo.", label="System Message" ) chatbot = gr.Chatbot() msg = gr.Textbox(label="Digite sua mensagem") max_tokens = gr.Slider(50, 2048, 512, step=50, label="Max Tokens") temperature = gr.Slider(0.1, 1.0, 0.7, step=0.05, label="Temperature") top_p = gr.Slider(0.1, 1.0, 0.95, step=0.05, label="Top-p (nucleus sampling)") history = gr.State([]) def handle_submit(message, history, system_message, max_tokens, temperature, top_p): return respond(message, history, system_message, max_tokens, temperature, top_p) msg.submit( handle_submit, inputs=[msg, history, system_message, max_tokens, temperature, top_p], outputs=[chatbot, history] ) if __name__ == "__main__": demo.launch()