File size: 16,664 Bytes
55f556b e48436e 1f60caf 55f556b 1f60caf 8c337f2 2674c93 8c337f2 76c2898 55f556b e48436e 1f60caf 55f556b 8c337f2 55f556b 1f60caf 55f556b 76c2898 55f556b 76c2898 55f556b 1f60caf 55f556b 1f60caf d0cfe92 76c2898 1f60caf 55f556b 1f60caf 76c2898 d0cfe92 1f60caf 0a7cdb1 1f60caf d0cfe92 76c2898 1f60caf 76c2898 1f60caf 76c2898 0a7cdb1 55f556b 1f60caf 55f556b 0a7cdb1 55f556b 76c2898 55f556b 1f60caf 76c2898 55f556b 76c2898 1f60caf 55f556b 1f60caf 55f556b 0008a36 55f556b 0008a36 76c2898 1f60caf 55f556b 1f60caf 55f556b 1f60caf 55f556b 1f60caf 0a7cdb1 1f60caf 0a7cdb1 55f556b 0a7cdb1 55f556b 0a7cdb1 55f556b 1f60caf 55f556b a68cb46 55f556b d0cfe92 55f556b 881ad0f 55f556b 1f60caf 55f556b 1f60caf 55f556b 1f60caf 55f556b 1f60caf 881ad0f 55f556b 1f60caf 55f556b 1f60caf 55f556b 1f60caf 55f556b 1f60caf 881ad0f a68cb46 0a7cdb1 55f556b 0a7cdb1 55f556b 142f44e 76c2898 55f556b 76c2898 55f556b 13a07fa ac72309 55f556b 76c2898 55f556b ac72309 76c2898 13a07fa ac72309 8c337f2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 |
# app.py
# Chatbot em cascata para Hugging Face Space / execução local
# - Llama 3.1 (entrada)
# - FLAN-T5 (reformulação)
# - BART (resumo em 3 frases)
#
# Requisitos (no Space): defina HF_TOKEN nos Secrets.
# Variáveis opcionais para troca de modelos:
# - LLAMA_MODEL (padrao: meta-llama/Llama-3.1-8B-Instruct)
# - AUX1_MODEL (padrao: google/flan-t5-large)
# - AUX2_MODEL (padrao: facebook/bart-large-cnn)
#
# Use: python app.py
# Recomendações: requirements.txt com gradio, huggingface-hub, transformers, accelerate, etc.
import os
import traceback
import logging
from typing import List, Dict, Any, Tuple
import gradio as gr
from huggingface_hub import InferenceClient
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("cascade_chatbot")
HF_TOKEN = os.environ.get("HF_TOKEN")
DEFAULT_LLAMA_MODEL = os.environ.get("LLAMA_MODEL", "meta-llama/Llama-3.1-8B-Instruct")
DEFAULT_AUX1 = os.environ.get("AUX1_MODEL", "google/flan-t5-large")
DEFAULT_AUX2 = os.environ.get("AUX2_MODEL", "facebook/bart-large-cnn")
if not HF_TOKEN:
logger.warning("HF_TOKEN não encontrado nas variáveis de ambiente. Configure nos Secrets do Space ou no ambiente local.")
# -------------------------
# Inicializa clientes HF
# -------------------------
try:
client_main = InferenceClient(token=HF_TOKEN, model=DEFAULT_LLAMA_MODEL)
client_aux1 = InferenceClient(token=HF_TOKEN, model=DEFAULT_AUX1)
client_aux2 = InferenceClient(token=HF_TOKEN, model=DEFAULT_AUX2)
except Exception:
logger.exception("Falha ao inicializar InferenceClient(s). Verifique HF_TOKEN e nomes dos modelos.")
# Criar objetos None para evitar crash imediato; erros aparecerão ao tentar usar
client_main = None
client_aux1 = None
client_aux2 = None
# -------------------------
# Helpers
# -------------------------
def _messages_to_prompt(messages: List[Dict[str, str]]) -> str:
lines = []
for m in messages:
role = m.get("role", "user")
content = m.get("content", "")
lines.append(f"{role.upper()}: {content}")
lines.append("ASSISTANT:")
return "\n".join(lines)
def _extract_text_from_response(obj: Any) -> str:
if obj is None:
return ""
# Common atributos
for attr in ("content", "text", "generated_text", "generation_text"):
if hasattr(obj, attr):
try:
v = getattr(obj, attr)
if isinstance(v, str):
return v
return str(v)
except Exception:
pass
try:
choices = None
if hasattr(obj, "choices"):
choices = obj.choices
elif isinstance(obj, dict) and "choices" in obj:
choices = obj["choices"]
if choices:
first = choices[0]
if isinstance(first, dict):
if "message" in first and isinstance(first["message"], dict) and "content" in first["message"]:
return first["message"]["content"]
if "text" in first:
return first["text"]
if "content" in first:
return first["content"]
if hasattr(first, "message"):
msg = first.message
if isinstance(msg, dict) and "content" in msg:
return msg["content"]
if hasattr(first, "text"):
return first.text
except Exception:
pass
try:
if hasattr(obj, "generations") and len(obj.generations) > 0:
g = obj.generations[0]
if isinstance(g, dict) and "text" in g:
return g["text"]
if hasattr(g, "text"):
return g.text
except Exception:
pass
try:
if isinstance(obj, dict):
for k in ("text", "content", "generated_text"):
if k in obj and isinstance(obj[k], str):
return obj[k]
except Exception:
pass
try:
return str(obj)
except Exception:
return ""
# -------------------------
# Chamadas robustas ao InferenceClient
# -------------------------
def call_model_with_messages(client: InferenceClient, messages: List[Dict[str, str]],
max_new_tokens: int = 512, temperature: float = 0.7, top_p: float = 0.95) -> Any:
"""
Tenta múltiplas assinaturas (chat_completion, client.chat, text_generation, etc).
Registra exceções completas para diagnóstico.
"""
def try_call(method, /, *pos_args, **kw_args):
try:
# Não imprimir todo messages no log — resumir
safe_kw = {k: ("[MESSAGES]" if k == "messages" else v) for k, v in kw_args.items()}
logger.info("Tentando %s pos=%s kwargs=%s", getattr(method, "__name__", str(method)), pos_args, safe_kw)
return method(*pos_args, **kw_args)
except Exception:
logger.exception("Falha ao chamar %s", getattr(method, "__name__", str(method)))
return None
# Tentar obter nome do modelo
model_name = getattr(client, "model", None) or DEFAULT_LLAMA_MODEL
# 1) chat_completion
try:
cc = getattr(client, "chat_completion", None)
if cc:
# a) cc(model=..., messages=...)
res = try_call(cc, model=model_name, messages=messages, max_new_tokens=max_new_tokens, temperature=temperature)
if res is not None:
return res
# b) cc(messages=..., model=...)
res = try_call(cc, messages=messages, model=model_name, max_new_tokens=max_new_tokens, temperature=temperature)
if res is not None:
return res
# c) cc.create(...)
if hasattr(cc, "create"):
res = try_call(cc.create, model=model_name, messages=messages, max_new_tokens=max_new_tokens, temperature=temperature)
if res is not None:
return res
# d) positional
res = try_call(cc, messages)
if res is not None:
return res
except Exception:
logger.exception("Erro no bloco chat_completion")
# 2) client.chat namespace
try:
chat_ns = getattr(client, "chat", None)
if chat_ns:
if hasattr(chat_ns, "create"):
res = try_call(chat_ns.create, model=model_name, messages=messages, max_new_tokens=max_new_tokens, temperature=temperature)
if res is not None:
return res
if hasattr(chat_ns, "chat_completion") and hasattr(chat_ns.chat_completion, "create"):
res = try_call(chat_ns.chat_completion.create, model=model_name, messages=messages, max_new_tokens=max_new_tokens, temperature=temperature)
if res is not None:
return res
res = try_call(chat_ns, model_name, messages)
if res is not None:
return res
except Exception:
logger.exception("Erro no bloco chat namespace")
# 3) text_generation
prompt = _messages_to_prompt(messages)
try:
if hasattr(client, "text_generation"):
res = try_call(client.text_generation, prompt=prompt, max_new_tokens=max_new_tokens, temperature=temperature)
if res is not None:
return res
if hasattr(client, "generate") and callable(client.generate):
res = try_call(client.generate, prompt=prompt, max_new_tokens=max_new_tokens)
if res is not None:
return res
except Exception:
logger.exception("Erro no bloco text_generation/generate")
# 4) última tentativa: explorar métodos candidatos
candidate_methods = [m for m in dir(client) if any(k in m for k in ("create", "generate", "complete", "run"))]
for name in candidate_methods:
try:
method = getattr(client, name)
if callable(method):
res = try_call(method, messages=messages)
if res is not None:
return res
res = try_call(method, prompt)
if res is not None:
return res
res = try_call(method, messages)
if res is not None:
return res
except Exception:
logger.exception("Erro testando candidato %s", name)
# falhou todas as tentativas
debug = {"available_attrs": dir(client), "messages_sample": messages[:3]}
logger.error("Todas as tentativas falharam. Debug: %s", debug)
raise RuntimeError(f"Não foi possível chamar o cliente HF com as assinaturas testadas. Debug: {debug}")
# -------------------------
# Pipeline: Llama -> FLAN -> BART
# -------------------------
def pipeline_cascade(user_message: str, system_message: str,
max_tokens: int, temperature: float, top_p: float) -> Tuple[str, List[str]]:
"""
Executa a cascata: Llama (client_main) -> FLAN (client_aux1) -> BART (client_aux2).
Retorna o texto final e um log de passos.
"""
logs = []
# Monta mensagens
messages = [{"role": "system", "content": system_message or ""}, {"role": "user", "content": user_message}]
try:
logs.append("1) Chamando Llama (entrada)")
response_main_obj = call_model_with_messages(client_main, messages, max_new_tokens=max_tokens, temperature=temperature, top_p=top_p)
response_main = _extract_text_from_response(response_main_obj)
logs.append(f"-> Llama respondeu (resumo): {response_main[:300]}")
# Aux1: FLAN-T5 - reformular
logs.append("2) Chamando FLAN-T5 (reformular)")
prompt_aux1 = f"Reformule este texto de forma clara e concisa:\n{response_main}"
try:
if client_aux1 and hasattr(client_aux1, "text_generation"):
res_a1 = client_aux1.text_generation(prompt=prompt_aux1, max_new_tokens=max(128, max_tokens // 4))
elif client_aux1 and hasattr(client_aux1, "completions") and hasattr(client_aux1.completions, "create"):
res_a1 = client_aux1.completions.create(prompt=prompt_aux1, max_new_tokens=max(128, max_tokens // 4))
else:
res_a1 = None
response_aux1 = _extract_text_from_response(res_a1) if res_a1 is not None else response_main
logs.append(f"-> FLAN-T5 respondeu (resumo): {response_aux1[:300]}")
except Exception:
logs.append("FLAN-T5 falhou; usando resposta do Llama")
response_aux1 = response_main
# Aux2: BART - resumo em 3 frases
logs.append("3) Chamando BART (resumo em 3 frases)")
prompt_aux2 = f"Resuma este texto em 3 frases:\n{response_aux1}"
try:
if client_aux2 and hasattr(client_aux2, "text_generation"):
res_a2 = client_aux2.text_generation(prompt=prompt_aux2, max_new_tokens=150)
elif client_aux2 and hasattr(client_aux2, "completions") and hasattr(client_aux2.completions, "create"):
res_a2 = client_aux2.completions.create(prompt=prompt_aux2, max_new_tokens=150)
else:
res_a2 = None
response_aux2 = _extract_text_from_response(res_a2) if res_a2 is not None else response_aux1
logs.append(f"-> BART respondeu (resumo): {response_aux2[:300]}")
except Exception:
logs.append("BART falhou; usando resposta do passo anterior")
response_aux2 = response_aux1
except Exception as e:
tb = traceback.format_exc(limit=5)
logger.exception("Erro pipeline principal: %s", e)
response_aux2 = f"Erro ao gerar resposta: {e}\n\nTraceback (curto):\n{tb}"
logs.append("Erro no pipeline: " + str(e))
return response_aux2, logs
# -------------------------
# Gradio App
# -------------------------
with gr.Blocks(title="Chatbot em Cascata - Llama + FLAN + BART") as demo:
gr.Markdown("## Trabalho Acadêmico FMU - Chatbot em Cascata\n"
"Fluxo: **Llama (entrada)** → **FLAN-T5 (reformulação)** → **BART(resumo)**\n\n"
"Disciplina: INTELIGÊNCIA ARTIFICIAL E APRENDIZADO DE MÁQUINA")
with gr.Row():
with gr.Column(scale=2):
system_message = gr.Textbox(value="Você é um chatbot racional e alegre.",
label="System Message", lines=2)
chatbot = gr.Chatbot(label="Chat")
user_input = gr.Textbox(label="Digite sua mensagem", placeholder="Digite aqui...")
max_tokens = gr.Slider(50, 2048, value=512, step=50, label="Max Tokens")
temperature = gr.Slider(0.0, 1.0, value=0.7, step=0.05, label="Temperature")
top_p = gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)")
history = gr.State([])
def submit_handler(msg, history, system_message, max_tokens, temperature, top_p):
# roda pipeline e atualiza histórico
out_text, logs = pipeline_cascade(msg, system_message, int(max_tokens), float(temperature), float(top_p))
history.append({"role": "user", "content": msg})
history.append({"role": "assistant", "content": out_text})
# exibimos também logs no console (útil)
logger.info("Pipeline logs:\n%s", "\n".join(logs))
return history, history
user_input.submit(submit_handler,
inputs=[user_input, history, system_message, max_tokens, temperature, top_p],
outputs=[chatbot, history])
btn_send = gr.Button("Enviar")
btn_send.click(submit_handler,
inputs=[user_input, history, system_message, max_tokens, temperature, top_p],
outputs=[chatbot, history])
with gr.Column(scale=1):
gr.Markdown("### Informações sobre o Projeto\n"
"Painel feito para descrever as **configurações**, **testar a geração** e sobre os **envolvidos**:")
model_info_md = f"""
**Modelos usados:**
- Llama (input): `{DEFAULT_LLAMA_MODEL}`
- Aux 1 (reformulação): `{DEFAULT_AUX1}`
- Aux 2 (resumo): `{DEFAULT_AUX2}`
**Como foram configurados:**
- Cada modelo é instanciado via `InferenceClient(token=HF_TOKEN, model=<model_name>)`.
- Chamadas preferenciais:
- Para chat: `client.chat_completion(messages=..., model=...)` (quando disponível)
- Fallback: `client.text_generation(prompt=...)`
- Ajustes de inferência controlados pelo usuário: `max_tokens`, `temperature`, `top_p`.
- Logs de diagnóstico são gravados (úteis se houver erros de assinatura/permissão).
"""
gr.Markdown(model_info_md)
# Self-test: roda testes com mensagens predefinidas e mostra o resultado
test_output = gr.Textbox(label="Resultado do Self-Test", lines=12, interactive=False)
def run_self_test(system_message, max_tokens, temperature, top_p):
msgs = [
"Explique resumidamente o que é a técnica de regressão linear.",
"Resuma em 1 frase as vantagens de usar validação cruzada.",
"Como posso autenticar usuários em uma aplicação web?"
]
accumulated = []
for m in msgs:
out, logs = pipeline_cascade(m, system_message, int(max_tokens), float(temperature), float(top_p))
accumulated.append("INPUT: " + m)
accumulated.append("OUTPUT: " + out)
accumulated.append("LOGS: " + " | ".join(logs))
accumulated.append("-" * 40)
return "\n".join(accumulated)
btn_test = gr.Button("Run self-test")
btn_test.click(run_self_test, inputs=[system_message, max_tokens, temperature, top_p], outputs=[test_output])
gr.Markdown(
"### Disciplina: INTELIGÊNCIA ARTIFICIAL E APRENDIZADO DE MÁQUINA\n"
"- Trabalho N2\n"
"- Turma Noturna de Bacharelado em Ciências da Computação 2025.\n"
"- Integrantes:\n "
"- Lucas Antonini - 1722631\n "
"- Carlos Eduardo da Silva - 1961011\n "
"- Felipe Rios Amaral - 1847080 \n"
"- Kawrê Britto de Oliveira - 2260931\n"
"- Miguel Putini Alfano - 2879347 ")
if __name__ == "__main__":
demo.launch()
|