|
|
import os |
|
|
import gradio as gr |
|
|
from huggingface_hub import InferenceClient |
|
|
import traceback |
|
|
|
|
|
|
|
|
HF_TOKEN = os.environ.get("HF_TOKEN") |
|
|
|
|
|
|
|
|
client_main = InferenceClient(token=HF_TOKEN, model="meta-llama/Llama-3.1-8B-Instruct") |
|
|
client_aux1 = InferenceClient(token=HF_TOKEN, model="google/flan-t5-large") |
|
|
client_aux2 = InferenceClient(token=HF_TOKEN, model="facebook/bart-large-cnn") |
|
|
|
|
|
|
|
|
def _extract_text_from_response(obj): |
|
|
if obj is None: |
|
|
return "" |
|
|
|
|
|
if hasattr(obj, "content"): |
|
|
try: |
|
|
return obj.content |
|
|
except Exception: |
|
|
pass |
|
|
if hasattr(obj, "text"): |
|
|
try: |
|
|
return obj.text |
|
|
except Exception: |
|
|
pass |
|
|
if hasattr(obj, "generated_text"): |
|
|
try: |
|
|
return obj.generated_text |
|
|
except Exception: |
|
|
pass |
|
|
|
|
|
try: |
|
|
if hasattr(obj, "choices") and len(obj.choices) > 0: |
|
|
choice = obj.choices[0] |
|
|
|
|
|
if isinstance(choice, dict) and "message" in choice and isinstance(choice["message"], dict) and "content" in choice["message"]: |
|
|
return choice["message"]["content"] |
|
|
|
|
|
if hasattr(choice, "message"): |
|
|
msg = choice.message |
|
|
if isinstance(msg, dict) and "content" in msg: |
|
|
return msg["content"] |
|
|
if hasattr(msg, "get") and msg.get("content"): |
|
|
return msg.get("content") |
|
|
except Exception: |
|
|
pass |
|
|
|
|
|
try: |
|
|
return str(obj) |
|
|
except Exception: |
|
|
return "" |
|
|
|
|
|
|
|
|
def _call_llama_chat_stateless(client_chat_proxy, messages, max_new_tokens, temperature, top_p): |
|
|
""" |
|
|
Tenta (em ordem): |
|
|
1) client_chat_proxy(messages=..., max_new_tokens=..., ...) |
|
|
2) client_chat_proxy.chat_completion(messages=..., ...) or client_chat_proxy.create(...) |
|
|
3) adicionar messages via add_message() + client_chat_proxy.generate()/send() (se suportado) |
|
|
4) procura por métodos alternativos (chat_completion, create, generate, run) e tenta-os |
|
|
Retorna o objeto de resposta ou levanta Exception com debug info. |
|
|
""" |
|
|
|
|
|
if callable(client_chat_proxy): |
|
|
try: |
|
|
return client_chat_proxy(messages=messages, max_new_tokens=max_new_tokens, temperature=temperature, top_p=top_p) |
|
|
except TypeError: |
|
|
|
|
|
try: |
|
|
return client_chat_proxy(messages, max_new_tokens, temperature, top_p) |
|
|
except Exception: |
|
|
pass |
|
|
except Exception: |
|
|
pass |
|
|
|
|
|
|
|
|
for method_name in ("send_message", "send", "create", "generate", "run", "complete", "chat_completion", "chat_complete"): |
|
|
if hasattr(client_chat_proxy, method_name): |
|
|
method = getattr(client_chat_proxy, method_name) |
|
|
try: |
|
|
|
|
|
return method(messages=messages, max_new_tokens=max_new_tokens, temperature=temperature, top_p=top_p) |
|
|
except TypeError: |
|
|
|
|
|
try: |
|
|
return method(messages, max_new_tokens, temperature, top_p) |
|
|
except Exception: |
|
|
|
|
|
pass |
|
|
except Exception: |
|
|
|
|
|
pass |
|
|
|
|
|
|
|
|
if hasattr(client_chat_proxy, "add_message") and hasattr(client_chat_proxy, "send_message") : |
|
|
try: |
|
|
|
|
|
for m in messages: |
|
|
client_chat_proxy.add_message(m["role"], m["content"]) |
|
|
return client_chat_proxy.send_message(max_new_tokens=max_new_tokens, temperature=temperature, top_p=top_p) |
|
|
except Exception: |
|
|
pass |
|
|
|
|
|
if hasattr(client_chat_proxy, "add_message") and hasattr(client_chat_proxy, "generate"): |
|
|
try: |
|
|
for m in messages: |
|
|
client_chat_proxy.add_message(m["role"], m["content"]) |
|
|
return client_chat_proxy.generate(max_new_tokens=max_new_tokens, temperature=temperature, top_p=top_p) |
|
|
except Exception: |
|
|
pass |
|
|
|
|
|
|
|
|
candidate_methods = [m for m in dir(client_chat_proxy) if any(k in m for k in ("send", "create", "generate", "run", "complete"))] |
|
|
for name in candidate_methods: |
|
|
try: |
|
|
method = getattr(client_chat_proxy, name) |
|
|
if callable(method): |
|
|
try: |
|
|
return method(messages=messages) |
|
|
except TypeError: |
|
|
try: |
|
|
return method(messages) |
|
|
except Exception: |
|
|
pass |
|
|
except Exception: |
|
|
pass |
|
|
|
|
|
|
|
|
debug = { |
|
|
"available_attrs": dir(client_chat_proxy), |
|
|
"messages_sample": messages[:3] |
|
|
} |
|
|
raise RuntimeError(f"Não foi possível chamar o chat proxy do Llama com as assinaturas testadas. Debug: {debug}") |
|
|
|
|
|
|
|
|
def respond(message, history, system_message, max_tokens, temperature, top_p): |
|
|
try: |
|
|
|
|
|
messages = [] |
|
|
messages.append({"role": "system", "content": system_message or ""}) |
|
|
for h in history: |
|
|
messages.append({"role": h.get("role", "user"), "content": h.get("content", "")}) |
|
|
messages.append({"role": "user", "content": message}) |
|
|
|
|
|
|
|
|
chat_proxy = client_main.chat |
|
|
response_main_obj = _call_llama_chat_stateless(chat_proxy, messages, max_tokens, temperature, top_p) |
|
|
response_main = _extract_text_from_response(response_main_obj) |
|
|
|
|
|
|
|
|
result_aux1 = client_aux1.text_generation( |
|
|
prompt=f"Reformule este texto de forma clara e concisa:\n{response_main}", |
|
|
max_new_tokens=max_tokens |
|
|
) |
|
|
response_aux1 = _extract_text_from_response(result_aux1) |
|
|
|
|
|
|
|
|
result_aux2 = client_aux2.text_generation( |
|
|
prompt=f"Resuma este texto em 3 frases:\n{response_aux1}", |
|
|
max_new_tokens=150 |
|
|
) |
|
|
response_aux2 = _extract_text_from_response(result_aux2) |
|
|
|
|
|
except Exception as e: |
|
|
|
|
|
tb = traceback.format_exc(limit=5) |
|
|
response_aux2 = f"Erro ao gerar resposta: {e}\n\nTraceback (curto):\n{tb}" |
|
|
|
|
|
|
|
|
history.append({"role": "user", "content": message}) |
|
|
history.append({"role": "assistant", "content": response_aux2}) |
|
|
|
|
|
return history, history |
|
|
|
|
|
|
|
|
with gr.Blocks() as demo: |
|
|
gr.Markdown("## 🤖 Chatbot em Cascata (Llama 3.1 + FLAN-T5 + BART)") |
|
|
|
|
|
system_message = gr.Textbox( |
|
|
value="Você é um chatbot amigável e prestativo.", |
|
|
label="System Message" |
|
|
) |
|
|
|
|
|
chatbot = gr.Chatbot() |
|
|
msg = gr.Textbox(label="Digite sua mensagem") |
|
|
max_tokens = gr.Slider(50, 2048, 512, step=50, label="Max Tokens") |
|
|
temperature = gr.Slider(0.1, 1.0, 0.7, step=0.05, label="Temperature") |
|
|
top_p = gr.Slider(0.1, 1.0, 0.95, step=0.05, label="Top-p (nucleus sampling)") |
|
|
|
|
|
history = gr.State([]) |
|
|
|
|
|
def handle_submit(message, history, system_message, max_tokens, temperature, top_p): |
|
|
return respond(message, history, system_message, max_tokens, temperature, top_p) |
|
|
|
|
|
msg.submit( |
|
|
handle_submit, |
|
|
inputs=[msg, history, system_message, max_tokens, temperature, top_p], |
|
|
outputs=[chatbot, history] |
|
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch() |
|
|
|