import os import gradio as gr from huggingface_hub import InferenceClient # Pega o token do Hugging Face dos Secrets HF_TOKEN = os.environ.get("HF_TOKEN") # Inicializa os clientes dos modelos client_main = InferenceClient(token=HF_TOKEN, model="meta-llama/Llama-3.1-8B-Instruct") client_aux1 = InferenceClient(token=HF_TOKEN, model="google/flan-t5-large") client_aux2 = InferenceClient(token=HF_TOKEN, model="facebook/bart-large-cnn") # Função principal de resposta def respond(message, history, system_message, max_tokens, temperature, top_p): try: # Constrói o histórico completo com system message messages = [{"role": "system", "content": system_message}] messages.extend(history) messages.append({"role": "user", "content": message}) full_prompt = "\n".join([m["content"] for m in messages]) # --- Passo 1: Llama 3.1 --- result_main = client_main.text_generation( inputs=full_prompt, max_tokens=max_tokens, temperature=temperature, top_p=top_p ) response_main = result_main[0]["generated_text"] # --- Passo 2: FLAN-T5 (reformulação) --- result_aux1 = client_aux1.text_generation( inputs=f"Reformule este texto de forma clara e objetiva:\n{response_main}", max_new_tokens=max_tokens ) response_aux1 = result_aux1[0]["generated_text"] # --- Passo 3: BART (resumo) --- result_aux2 = client_aux2.text_generation( inputs=f"Resuma este texto em 3 frases:\n{response_aux1}", max_new_tokens=150 ) response_aux2 = result_aux2[0]["generated_text"] except Exception as e: response_aux2 = f"Erro ao gerar resposta: {e}" # Atualiza histórico do chat history.append({"role": "user", "content": message}) history.append({"role": "assistant", "content": response_aux2}) return response_aux2, history # Interface Gradio with gr.Blocks() as demo: gr.Markdown("## 🤖 Chatbot em Cascata (Llama 3.1 + FLAN-T5 + BART)") system_message = gr.Textbox( value="Você é um chatbot amigável e prestativo.", label="System Message" ) chatbot = gr.Chatbot() msg = gr.Textbox(label="Digite sua mensagem") max_tokens = gr.Slider(50, 2048, 512, step=50, label="Max Tokens") temperature = gr.Slider(0.1, 1.0, 0.7, step=0.05, label="Temperature") top_p = gr.Slider(0.1, 1.0, 0.95, step=0.05, label="Top-p (nucleus sampling)") history = gr.State([]) def handle_submit(message, history, system_message, max_tokens, temperature, top_p): return respond(message, history, system_message, max_tokens, temperature, top_p) msg.submit( handle_submit, inputs=[msg, history, system_message, max_tokens, temperature, top_p], outputs=[chatbot, history] ) if __name__ == "__main__": demo.launch()