import os import gradio as gr from huggingface_hub import InferenceClient import traceback # Pega o token do Hugging Face dos Secrets HF_TOKEN = os.environ.get("HF_TOKEN") # Inicializa os clientes dos modelos client_main = InferenceClient(token=HF_TOKEN, model="meta-llama/Llama-3.1-8B-Instruct") client_aux1 = InferenceClient(token=HF_TOKEN, model="google/flan-t5-large") client_aux2 = InferenceClient(token=HF_TOKEN, model="facebook/bart-large-cnn") # Extrai texto de objetos de resposta variados def _extract_text_from_response(obj): if obj is None: return "" # atributos comuns if hasattr(obj, "content"): try: return obj.content except Exception: pass if hasattr(obj, "text"): try: return obj.text except Exception: pass if hasattr(obj, "generated_text"): try: return obj.generated_text except Exception: pass # choices style try: if hasattr(obj, "choices") and len(obj.choices) > 0: choice = obj.choices[0] # dict-like if isinstance(choice, dict) and "message" in choice and isinstance(choice["message"], dict) and "content" in choice["message"]: return choice["message"]["content"] # object-like if hasattr(choice, "message"): msg = choice.message if isinstance(msg, dict) and "content" in msg: return msg["content"] if hasattr(msg, "get") and msg.get("content"): return msg.get("content") except Exception: pass # last resort try: return str(obj) except Exception: return "" # Função que tenta várias formas de usar o chat do Llama def _call_llama_chat_stateless(client_chat_proxy, messages, max_new_tokens, temperature, top_p): """ Tenta (em ordem): 1) client_chat_proxy(messages=..., max_new_tokens=..., ...) 2) client_chat_proxy.chat_completion(messages=..., ...) or client_chat_proxy.create(...) 3) adicionar messages via add_message() + client_chat_proxy.generate()/send() (se suportado) 4) procura por métodos alternativos (chat_completion, create, generate, run) e tenta-os Retorna o objeto de resposta ou levanta Exception com debug info. """ # 1) chamada direta se proxy for chamável if callable(client_chat_proxy): try: return client_chat_proxy(messages=messages, max_new_tokens=max_new_tokens, temperature=temperature, top_p=top_p) except TypeError: # talvez aceita positional args try: return client_chat_proxy(messages, max_new_tokens, temperature, top_p) except Exception: pass except Exception: pass # 2) tenta métodos conhecidos for method_name in ("send_message", "send", "create", "generate", "run", "complete", "chat_completion", "chat_complete"): if hasattr(client_chat_proxy, method_name): method = getattr(client_chat_proxy, method_name) try: # muitas variantes: preferimos passar messages=... return method(messages=messages, max_new_tokens=max_new_tokens, temperature=temperature, top_p=top_p) except TypeError: # talvez aceite apenas positional try: return method(messages, max_new_tokens, temperature, top_p) except Exception: # tentar sem argumentos (algumas impls usam add_message + send) pass except Exception: # se falhar, tenta próxima opção pass # 3) tenta add_message() sequencial e depois gerar if hasattr(client_chat_proxy, "add_message") and hasattr(client_chat_proxy, "send_message") : try: # adiciona sequencialmente for m in messages: client_chat_proxy.add_message(m["role"], m["content"]) return client_chat_proxy.send_message(max_new_tokens=max_new_tokens, temperature=temperature, top_p=top_p) except Exception: pass if hasattr(client_chat_proxy, "add_message") and hasattr(client_chat_proxy, "generate"): try: for m in messages: client_chat_proxy.add_message(m["role"], m["content"]) return client_chat_proxy.generate(max_new_tokens=max_new_tokens, temperature=temperature, top_p=top_p) except Exception: pass # 4) última tentativa: procurar métodos parecidos e tentar chamá-los com only messages candidate_methods = [m for m in dir(client_chat_proxy) if any(k in m for k in ("send", "create", "generate", "run", "complete"))] for name in candidate_methods: try: method = getattr(client_chat_proxy, name) if callable(method): try: return method(messages=messages) except TypeError: try: return method(messages) except Exception: pass except Exception: pass # Se chegou aqui, falhou — lançar erro com debug debug = { "available_attrs": dir(client_chat_proxy), "messages_sample": messages[:3] } raise RuntimeError(f"Não foi possível chamar o chat proxy do Llama com as assinaturas testadas. Debug: {debug}") # Função principal de resposta def respond(message, history, system_message, max_tokens, temperature, top_p): try: # Monta lista de messages (stateless) messages = [] messages.append({"role": "system", "content": system_message or ""}) for h in history: messages.append({"role": h.get("role", "user"), "content": h.get("content", "")}) messages.append({"role": "user", "content": message}) # Chama o chat do Llama tentando várias assinaturas chat_proxy = client_main.chat response_main_obj = _call_llama_chat_stateless(chat_proxy, messages, max_tokens, temperature, top_p) response_main = _extract_text_from_response(response_main_obj) # Passo 2: FLAN-T5 (reformulação) result_aux1 = client_aux1.text_generation( prompt=f"Reformule este texto de forma clara e concisa:\n{response_main}", max_new_tokens=max_tokens ) response_aux1 = _extract_text_from_response(result_aux1) # Passo 3: BART (resumo em 3 frases) result_aux2 = client_aux2.text_generation( prompt=f"Resuma este texto em 3 frases:\n{response_aux1}", max_new_tokens=150 ) response_aux2 = _extract_text_from_response(result_aux2) except Exception as e: # enviar traceback curto para depuração — útil para o próximo ajuste tb = traceback.format_exc(limit=5) response_aux2 = f"Erro ao gerar resposta: {e}\n\nTraceback (curto):\n{tb}" # Atualiza histórico no formato Gradio Chatbot history.append({"role": "user", "content": message}) history.append({"role": "assistant", "content": response_aux2}) return history, history # Interface Gradio with gr.Blocks() as demo: gr.Markdown("## 🤖 Chatbot em Cascata (Llama 3.1 + FLAN-T5 + BART)") system_message = gr.Textbox( value="Você é um chatbot amigável e prestativo.", label="System Message" ) chatbot = gr.Chatbot() msg = gr.Textbox(label="Digite sua mensagem") max_tokens = gr.Slider(50, 2048, 512, step=50, label="Max Tokens") temperature = gr.Slider(0.1, 1.0, 0.7, step=0.05, label="Temperature") top_p = gr.Slider(0.1, 1.0, 0.95, step=0.05, label="Top-p (nucleus sampling)") history = gr.State([]) def handle_submit(message, history, system_message, max_tokens, temperature, top_p): return respond(message, history, system_message, max_tokens, temperature, top_p) msg.submit( handle_submit, inputs=[msg, history, system_message, max_tokens, temperature, top_p], outputs=[chatbot, history] ) if __name__ == "__main__": demo.launch()