Spaces:
Sleeping
Sleeping
| import os | |
| import gradio as gr | |
| from huggingface_hub import InferenceClient | |
| # Pega o token do Hugging Face dos Secrets | |
| HF_TOKEN = os.environ.get("HF_TOKEN") | |
| # Inicializa os clientes dos modelos | |
| client_main = InferenceClient(token=HF_TOKEN, model="meta-llama/Llama-3.1-8B-Instruct") | |
| client_aux1 = InferenceClient(token=HF_TOKEN, model="google/flan-t5-large") | |
| client_aux2 = InferenceClient(token=HF_TOKEN, model="facebook/bart-large-cnn") | |
| # Função principal de resposta | |
| def respond(message, history, system_message, max_tokens, temperature, top_p): | |
| try: | |
| # Prompt principal incluindo system_message | |
| full_prompt = f"{system_message}\nUsuário: {message}" | |
| # --- Passo 1: Llama 3.1 (conversational) --- | |
| result_main = client_main.conversational( | |
| prompt=full_prompt, # string diretamente | |
| max_new_tokens=max_tokens, | |
| temperature=temperature, | |
| top_p=top_p | |
| ) | |
| response_main = result_main.generated_responses[-1] # pega a última resposta | |
| # --- Passo 2: FLAN-T5 --- | |
| result_aux1 = client_aux1.text_generation( | |
| prompt=f"Reformule este texto de forma clara:\n{response_main}", | |
| max_new_tokens=max_tokens | |
| ) | |
| response_aux1 = result_aux1.generated_text | |
| # --- Passo 3: BART --- | |
| result_aux2 = client_aux2.text_generation( | |
| prompt=f"Resuma este texto em 3 frases:\n{response_aux1}", | |
| max_new_tokens=150 | |
| ) | |
| response_aux2 = result_aux2.generated_text | |
| except Exception as e: | |
| response_aux2 = f"Erro ao gerar resposta: {e}" | |
| # Atualiza histórico no formato correto para o Gradio Chatbot | |
| history.append({"role": "user", "content": message}) | |
| history.append({"role": "assistant", "content": response_aux2}) | |
| return history, history | |
| # Interface Gradio | |
| with gr.Blocks() as demo: | |
| gr.Markdown("## 🤖 Chatbot em Cascata (Llama 3.1 + FLAN-T5 + BART)") | |
| system_message = gr.Textbox( | |
| value="Você é um chatbot amigável e prestativo.", | |
| label="System Message" | |
| ) | |
| chatbot = gr.Chatbot() | |
| msg = gr.Textbox(label="Digite sua mensagem") | |
| max_tokens = gr.Slider(50, 2048, 512, step=50, label="Max Tokens") | |
| temperature = gr.Slider(0.1, 1.0, 0.7, step=0.05, label="Temperature") | |
| top_p = gr.Slider(0.1, 1.0, 0.95, step=0.05, label="Top-p (nucleus sampling)") | |
| history = gr.State([]) | |
| def handle_submit(message, history, system_message, max_tokens, temperature, top_p): | |
| return respond(message, history, system_message, max_tokens, temperature, top_p) | |
| msg.submit( | |
| handle_submit, | |
| inputs=[msg, history, system_message, max_tokens, temperature, top_p], | |
| outputs=[chatbot, history] | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |