kawre's picture
Update app.py
0a7cdb1 verified
raw
history blame
8.26 kB
import os
import gradio as gr
from huggingface_hub import InferenceClient
import traceback
# Pega o token do Hugging Face dos Secrets
HF_TOKEN = os.environ.get("HF_TOKEN")
# Inicializa os clientes dos modelos
client_main = InferenceClient(token=HF_TOKEN, model="meta-llama/Llama-3.1-8B-Instruct")
client_aux1 = InferenceClient(token=HF_TOKEN, model="google/flan-t5-large")
client_aux2 = InferenceClient(token=HF_TOKEN, model="facebook/bart-large-cnn")
# Extrai texto de objetos de resposta variados
def _extract_text_from_response(obj):
if obj is None:
return ""
# atributos comuns
if hasattr(obj, "content"):
try:
return obj.content
except Exception:
pass
if hasattr(obj, "text"):
try:
return obj.text
except Exception:
pass
if hasattr(obj, "generated_text"):
try:
return obj.generated_text
except Exception:
pass
# choices style
try:
if hasattr(obj, "choices") and len(obj.choices) > 0:
choice = obj.choices[0]
# dict-like
if isinstance(choice, dict) and "message" in choice and isinstance(choice["message"], dict) and "content" in choice["message"]:
return choice["message"]["content"]
# object-like
if hasattr(choice, "message"):
msg = choice.message
if isinstance(msg, dict) and "content" in msg:
return msg["content"]
if hasattr(msg, "get") and msg.get("content"):
return msg.get("content")
except Exception:
pass
# last resort
try:
return str(obj)
except Exception:
return ""
# Função que tenta várias formas de usar o chat do Llama
def _call_llama_chat_stateless(client_chat_proxy, messages, max_new_tokens, temperature, top_p):
"""
Tenta (em ordem):
1) client_chat_proxy(messages=..., max_new_tokens=..., ...)
2) client_chat_proxy.chat_completion(messages=..., ...) or client_chat_proxy.create(...)
3) adicionar messages via add_message() + client_chat_proxy.generate()/send() (se suportado)
4) procura por métodos alternativos (chat_completion, create, generate, run) e tenta-os
Retorna o objeto de resposta ou levanta Exception com debug info.
"""
# 1) chamada direta se proxy for chamável
if callable(client_chat_proxy):
try:
return client_chat_proxy(messages=messages, max_new_tokens=max_new_tokens, temperature=temperature, top_p=top_p)
except TypeError:
# talvez aceita positional args
try:
return client_chat_proxy(messages, max_new_tokens, temperature, top_p)
except Exception:
pass
except Exception:
pass
# 2) tenta métodos conhecidos
for method_name in ("send_message", "send", "create", "generate", "run", "complete", "chat_completion", "chat_complete"):
if hasattr(client_chat_proxy, method_name):
method = getattr(client_chat_proxy, method_name)
try:
# muitas variantes: preferimos passar messages=...
return method(messages=messages, max_new_tokens=max_new_tokens, temperature=temperature, top_p=top_p)
except TypeError:
# talvez aceite apenas positional
try:
return method(messages, max_new_tokens, temperature, top_p)
except Exception:
# tentar sem argumentos (algumas impls usam add_message + send)
pass
except Exception:
# se falhar, tenta próxima opção
pass
# 3) tenta add_message() sequencial e depois gerar
if hasattr(client_chat_proxy, "add_message") and hasattr(client_chat_proxy, "send_message") :
try:
# adiciona sequencialmente
for m in messages:
client_chat_proxy.add_message(m["role"], m["content"])
return client_chat_proxy.send_message(max_new_tokens=max_new_tokens, temperature=temperature, top_p=top_p)
except Exception:
pass
if hasattr(client_chat_proxy, "add_message") and hasattr(client_chat_proxy, "generate"):
try:
for m in messages:
client_chat_proxy.add_message(m["role"], m["content"])
return client_chat_proxy.generate(max_new_tokens=max_new_tokens, temperature=temperature, top_p=top_p)
except Exception:
pass
# 4) última tentativa: procurar métodos parecidos e tentar chamá-los com only messages
candidate_methods = [m for m in dir(client_chat_proxy) if any(k in m for k in ("send", "create", "generate", "run", "complete"))]
for name in candidate_methods:
try:
method = getattr(client_chat_proxy, name)
if callable(method):
try:
return method(messages=messages)
except TypeError:
try:
return method(messages)
except Exception:
pass
except Exception:
pass
# Se chegou aqui, falhou — lançar erro com debug
debug = {
"available_attrs": dir(client_chat_proxy),
"messages_sample": messages[:3]
}
raise RuntimeError(f"Não foi possível chamar o chat proxy do Llama com as assinaturas testadas. Debug: {debug}")
# Função principal de resposta
def respond(message, history, system_message, max_tokens, temperature, top_p):
try:
# Monta lista de messages (stateless)
messages = []
messages.append({"role": "system", "content": system_message or ""})
for h in history:
messages.append({"role": h.get("role", "user"), "content": h.get("content", "")})
messages.append({"role": "user", "content": message})
# Chama o chat do Llama tentando várias assinaturas
chat_proxy = client_main.chat
response_main_obj = _call_llama_chat_stateless(chat_proxy, messages, max_tokens, temperature, top_p)
response_main = _extract_text_from_response(response_main_obj)
# Passo 2: FLAN-T5 (reformulação)
result_aux1 = client_aux1.text_generation(
prompt=f"Reformule este texto de forma clara e concisa:\n{response_main}",
max_new_tokens=max_tokens
)
response_aux1 = _extract_text_from_response(result_aux1)
# Passo 3: BART (resumo em 3 frases)
result_aux2 = client_aux2.text_generation(
prompt=f"Resuma este texto em 3 frases:\n{response_aux1}",
max_new_tokens=150
)
response_aux2 = _extract_text_from_response(result_aux2)
except Exception as e:
# enviar traceback curto para depuração — útil para o próximo ajuste
tb = traceback.format_exc(limit=5)
response_aux2 = f"Erro ao gerar resposta: {e}\n\nTraceback (curto):\n{tb}"
# Atualiza histórico no formato Gradio Chatbot
history.append({"role": "user", "content": message})
history.append({"role": "assistant", "content": response_aux2})
return history, history
# Interface Gradio
with gr.Blocks() as demo:
gr.Markdown("## 🤖 Chatbot em Cascata (Llama 3.1 + FLAN-T5 + BART)")
system_message = gr.Textbox(
value="Você é um chatbot amigável e prestativo.",
label="System Message"
)
chatbot = gr.Chatbot()
msg = gr.Textbox(label="Digite sua mensagem")
max_tokens = gr.Slider(50, 2048, 512, step=50, label="Max Tokens")
temperature = gr.Slider(0.1, 1.0, 0.7, step=0.05, label="Temperature")
top_p = gr.Slider(0.1, 1.0, 0.95, step=0.05, label="Top-p (nucleus sampling)")
history = gr.State([])
def handle_submit(message, history, system_message, max_tokens, temperature, top_p):
return respond(message, history, system_message, max_tokens, temperature, top_p)
msg.submit(
handle_submit,
inputs=[msg, history, system_message, max_tokens, temperature, top_p],
outputs=[chatbot, history]
)
if __name__ == "__main__":
demo.launch()