Spaces:
Sleeping
Sleeping
File size: 8,259 Bytes
e48436e 8c337f2 2674c93 0a7cdb1 8c337f2 e48436e 8c337f2 e48436e 0a7cdb1 d0cfe92 0a7cdb1 d0cfe92 0a7cdb1 d0cfe92 0a7cdb1 d0cfe92 0a7cdb1 d0cfe92 0a7cdb1 d0cfe92 0a7cdb1 d0cfe92 e48436e a68cb46 0a7cdb1 d0cfe92 3689306 0a7cdb1 d0cfe92 0a7cdb1 d0cfe92 0a7cdb1 d0cfe92 881ad0f 0a7cdb1 a68cb46 9f87902 a68cb46 d0cfe92 881ad0f 0a7cdb1 a68cb46 05fce26 a68cb46 d0cfe92 881ad0f a68cb46 0a7cdb1 8c337f2 9f87902 e48436e 05fce26 8c337f2 e48436e 8c337f2 e48436e 05fce26 a68cb46 0a7cdb1 a68cb46 05fce26 e48436e 0a7cdb1 e48436e a68cb46 0a7cdb1 a68cb46 8c337f2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 |
import os
import gradio as gr
from huggingface_hub import InferenceClient
import traceback
# Pega o token do Hugging Face dos Secrets
HF_TOKEN = os.environ.get("HF_TOKEN")
# Inicializa os clientes dos modelos
client_main = InferenceClient(token=HF_TOKEN, model="meta-llama/Llama-3.1-8B-Instruct")
client_aux1 = InferenceClient(token=HF_TOKEN, model="google/flan-t5-large")
client_aux2 = InferenceClient(token=HF_TOKEN, model="facebook/bart-large-cnn")
# Extrai texto de objetos de resposta variados
def _extract_text_from_response(obj):
if obj is None:
return ""
# atributos comuns
if hasattr(obj, "content"):
try:
return obj.content
except Exception:
pass
if hasattr(obj, "text"):
try:
return obj.text
except Exception:
pass
if hasattr(obj, "generated_text"):
try:
return obj.generated_text
except Exception:
pass
# choices style
try:
if hasattr(obj, "choices") and len(obj.choices) > 0:
choice = obj.choices[0]
# dict-like
if isinstance(choice, dict) and "message" in choice and isinstance(choice["message"], dict) and "content" in choice["message"]:
return choice["message"]["content"]
# object-like
if hasattr(choice, "message"):
msg = choice.message
if isinstance(msg, dict) and "content" in msg:
return msg["content"]
if hasattr(msg, "get") and msg.get("content"):
return msg.get("content")
except Exception:
pass
# last resort
try:
return str(obj)
except Exception:
return ""
# Função que tenta várias formas de usar o chat do Llama
def _call_llama_chat_stateless(client_chat_proxy, messages, max_new_tokens, temperature, top_p):
"""
Tenta (em ordem):
1) client_chat_proxy(messages=..., max_new_tokens=..., ...)
2) client_chat_proxy.chat_completion(messages=..., ...) or client_chat_proxy.create(...)
3) adicionar messages via add_message() + client_chat_proxy.generate()/send() (se suportado)
4) procura por métodos alternativos (chat_completion, create, generate, run) e tenta-os
Retorna o objeto de resposta ou levanta Exception com debug info.
"""
# 1) chamada direta se proxy for chamável
if callable(client_chat_proxy):
try:
return client_chat_proxy(messages=messages, max_new_tokens=max_new_tokens, temperature=temperature, top_p=top_p)
except TypeError:
# talvez aceita positional args
try:
return client_chat_proxy(messages, max_new_tokens, temperature, top_p)
except Exception:
pass
except Exception:
pass
# 2) tenta métodos conhecidos
for method_name in ("send_message", "send", "create", "generate", "run", "complete", "chat_completion", "chat_complete"):
if hasattr(client_chat_proxy, method_name):
method = getattr(client_chat_proxy, method_name)
try:
# muitas variantes: preferimos passar messages=...
return method(messages=messages, max_new_tokens=max_new_tokens, temperature=temperature, top_p=top_p)
except TypeError:
# talvez aceite apenas positional
try:
return method(messages, max_new_tokens, temperature, top_p)
except Exception:
# tentar sem argumentos (algumas impls usam add_message + send)
pass
except Exception:
# se falhar, tenta próxima opção
pass
# 3) tenta add_message() sequencial e depois gerar
if hasattr(client_chat_proxy, "add_message") and hasattr(client_chat_proxy, "send_message") :
try:
# adiciona sequencialmente
for m in messages:
client_chat_proxy.add_message(m["role"], m["content"])
return client_chat_proxy.send_message(max_new_tokens=max_new_tokens, temperature=temperature, top_p=top_p)
except Exception:
pass
if hasattr(client_chat_proxy, "add_message") and hasattr(client_chat_proxy, "generate"):
try:
for m in messages:
client_chat_proxy.add_message(m["role"], m["content"])
return client_chat_proxy.generate(max_new_tokens=max_new_tokens, temperature=temperature, top_p=top_p)
except Exception:
pass
# 4) última tentativa: procurar métodos parecidos e tentar chamá-los com only messages
candidate_methods = [m for m in dir(client_chat_proxy) if any(k in m for k in ("send", "create", "generate", "run", "complete"))]
for name in candidate_methods:
try:
method = getattr(client_chat_proxy, name)
if callable(method):
try:
return method(messages=messages)
except TypeError:
try:
return method(messages)
except Exception:
pass
except Exception:
pass
# Se chegou aqui, falhou — lançar erro com debug
debug = {
"available_attrs": dir(client_chat_proxy),
"messages_sample": messages[:3]
}
raise RuntimeError(f"Não foi possível chamar o chat proxy do Llama com as assinaturas testadas. Debug: {debug}")
# Função principal de resposta
def respond(message, history, system_message, max_tokens, temperature, top_p):
try:
# Monta lista de messages (stateless)
messages = []
messages.append({"role": "system", "content": system_message or ""})
for h in history:
messages.append({"role": h.get("role", "user"), "content": h.get("content", "")})
messages.append({"role": "user", "content": message})
# Chama o chat do Llama tentando várias assinaturas
chat_proxy = client_main.chat
response_main_obj = _call_llama_chat_stateless(chat_proxy, messages, max_tokens, temperature, top_p)
response_main = _extract_text_from_response(response_main_obj)
# Passo 2: FLAN-T5 (reformulação)
result_aux1 = client_aux1.text_generation(
prompt=f"Reformule este texto de forma clara e concisa:\n{response_main}",
max_new_tokens=max_tokens
)
response_aux1 = _extract_text_from_response(result_aux1)
# Passo 3: BART (resumo em 3 frases)
result_aux2 = client_aux2.text_generation(
prompt=f"Resuma este texto em 3 frases:\n{response_aux1}",
max_new_tokens=150
)
response_aux2 = _extract_text_from_response(result_aux2)
except Exception as e:
# enviar traceback curto para depuração — útil para o próximo ajuste
tb = traceback.format_exc(limit=5)
response_aux2 = f"Erro ao gerar resposta: {e}\n\nTraceback (curto):\n{tb}"
# Atualiza histórico no formato Gradio Chatbot
history.append({"role": "user", "content": message})
history.append({"role": "assistant", "content": response_aux2})
return history, history
# Interface Gradio
with gr.Blocks() as demo:
gr.Markdown("## 🤖 Chatbot em Cascata (Llama 3.1 + FLAN-T5 + BART)")
system_message = gr.Textbox(
value="Você é um chatbot amigável e prestativo.",
label="System Message"
)
chatbot = gr.Chatbot()
msg = gr.Textbox(label="Digite sua mensagem")
max_tokens = gr.Slider(50, 2048, 512, step=50, label="Max Tokens")
temperature = gr.Slider(0.1, 1.0, 0.7, step=0.05, label="Temperature")
top_p = gr.Slider(0.1, 1.0, 0.95, step=0.05, label="Top-p (nucleus sampling)")
history = gr.State([])
def handle_submit(message, history, system_message, max_tokens, temperature, top_p):
return respond(message, history, system_message, max_tokens, temperature, top_p)
msg.submit(
handle_submit,
inputs=[msg, history, system_message, max_tokens, temperature, top_p],
outputs=[chatbot, history]
)
if __name__ == "__main__":
demo.launch()
|