Spaces:

kawre
/

Huggingface_Chatbot_Cascade

Running

App Files Files Community

Huggingface_Chatbot_Cascade / app.py

kawre

Update app.py

0a7cdb1 verified 9 days ago

raw

history blame

8.26 kB

	import os
	import gradio as gr
	from huggingface_hub import InferenceClient
	import traceback

	# Pega o token do Hugging Face dos Secrets
	HF_TOKEN = os.environ.get("HF_TOKEN")

	# Inicializa os clientes dos modelos
	client_main = InferenceClient(token=HF_TOKEN, model="meta-llama/Llama-3.1-8B-Instruct")
	client_aux1 = InferenceClient(token=HF_TOKEN, model="google/flan-t5-large")
	client_aux2 = InferenceClient(token=HF_TOKEN, model="facebook/bart-large-cnn")

	# Extrai texto de objetos de resposta variados
	def _extract_text_from_response(obj):
	if obj is None:
	return ""
	# atributos comuns
	if hasattr(obj, "content"):
	try:
	return obj.content
	except Exception:
	pass
	if hasattr(obj, "text"):
	try:
	return obj.text
	except Exception:
	pass
	if hasattr(obj, "generated_text"):
	try:
	return obj.generated_text
	except Exception:
	pass
	# choices style
	try:
	if hasattr(obj, "choices") and len(obj.choices) > 0:
	choice = obj.choices[0]
	# dict-like
	if isinstance(choice, dict) and "message" in choice and isinstance(choice["message"], dict) and "content" in choice["message"]:
	return choice["message"]["content"]
	# object-like
	if hasattr(choice, "message"):
	msg = choice.message
	if isinstance(msg, dict) and "content" in msg:
	return msg["content"]
	if hasattr(msg, "get") and msg.get("content"):
	return msg.get("content")
	except Exception:
	pass
	# last resort
	try:
	return str(obj)
	except Exception:
	return ""

	# Função que tenta várias formas de usar o chat do Llama
	def _call_llama_chat_stateless(client_chat_proxy, messages, max_new_tokens, temperature, top_p):
	"""
	Tenta (em ordem):
	1) client_chat_proxy(messages=..., max_new_tokens=..., ...)
	2) client_chat_proxy.chat_completion(messages=..., ...) or client_chat_proxy.create(...)
	3) adicionar messages via add_message() + client_chat_proxy.generate()/send() (se suportado)
	4) procura por métodos alternativos (chat_completion, create, generate, run) e tenta-os
	Retorna o objeto de resposta ou levanta Exception com debug info.
	"""
	# 1) chamada direta se proxy for chamável
	if callable(client_chat_proxy):
	try:
	return client_chat_proxy(messages=messages, max_new_tokens=max_new_tokens, temperature=temperature, top_p=top_p)
	except TypeError:
	# talvez aceita positional args
	try:
	return client_chat_proxy(messages, max_new_tokens, temperature, top_p)
	except Exception:
	pass
	except Exception:
	pass

	# 2) tenta métodos conhecidos
	for method_name in ("send_message", "send", "create", "generate", "run", "complete", "chat_completion", "chat_complete"):
	if hasattr(client_chat_proxy, method_name):
	method = getattr(client_chat_proxy, method_name)
	try:
	# muitas variantes: preferimos passar messages=...
	return method(messages=messages, max_new_tokens=max_new_tokens, temperature=temperature, top_p=top_p)
	except TypeError:
	# talvez aceite apenas positional
	try:
	return method(messages, max_new_tokens, temperature, top_p)
	except Exception:
	# tentar sem argumentos (algumas impls usam add_message + send)
	pass
	except Exception:
	# se falhar, tenta próxima opção
	pass

	# 3) tenta add_message() sequencial e depois gerar
	if hasattr(client_chat_proxy, "add_message") and hasattr(client_chat_proxy, "send_message") :
	try:
	# adiciona sequencialmente
	for m in messages:
	client_chat_proxy.add_message(m["role"], m["content"])
	return client_chat_proxy.send_message(max_new_tokens=max_new_tokens, temperature=temperature, top_p=top_p)
	except Exception:
	pass

	if hasattr(client_chat_proxy, "add_message") and hasattr(client_chat_proxy, "generate"):
	try:
	for m in messages:
	client_chat_proxy.add_message(m["role"], m["content"])
	return client_chat_proxy.generate(max_new_tokens=max_new_tokens, temperature=temperature, top_p=top_p)
	except Exception:
	pass

	# 4) última tentativa: procurar métodos parecidos e tentar chamá-los com only messages
	candidate_methods = [m for m in dir(client_chat_proxy) if any(k in m for k in ("send", "create", "generate", "run", "complete"))]
	for name in candidate_methods:
	try:
	method = getattr(client_chat_proxy, name)
	if callable(method):
	try:
	return method(messages=messages)
	except TypeError:
	try:
	return method(messages)
	except Exception:
	pass
	except Exception:
	pass

	# Se chegou aqui, falhou — lançar erro com debug
	debug = {
	"available_attrs": dir(client_chat_proxy),
	"messages_sample": messages[:3]
	}
	raise RuntimeError(f"Não foi possível chamar o chat proxy do Llama com as assinaturas testadas. Debug: {debug}")

	# Função principal de resposta
	def respond(message, history, system_message, max_tokens, temperature, top_p):
	try:
	# Monta lista de messages (stateless)
	messages = []
	messages.append({"role": "system", "content": system_message or ""})
	for h in history:
	messages.append({"role": h.get("role", "user"), "content": h.get("content", "")})
	messages.append({"role": "user", "content": message})

	# Chama o chat do Llama tentando várias assinaturas
	chat_proxy = client_main.chat
	response_main_obj = _call_llama_chat_stateless(chat_proxy, messages, max_tokens, temperature, top_p)
	response_main = _extract_text_from_response(response_main_obj)

	# Passo 2: FLAN-T5 (reformulação)
	result_aux1 = client_aux1.text_generation(
	prompt=f"Reformule este texto de forma clara e concisa:\n{response_main}",
	max_new_tokens=max_tokens
	)
	response_aux1 = _extract_text_from_response(result_aux1)

	# Passo 3: BART (resumo em 3 frases)
	result_aux2 = client_aux2.text_generation(
	prompt=f"Resuma este texto em 3 frases:\n{response_aux1}",
	max_new_tokens=150
	)
	response_aux2 = _extract_text_from_response(result_aux2)

	except Exception as e:
	# enviar traceback curto para depuração — útil para o próximo ajuste
	tb = traceback.format_exc(limit=5)
	response_aux2 = f"Erro ao gerar resposta: {e}\n\nTraceback (curto):\n{tb}"

	# Atualiza histórico no formato Gradio Chatbot
	history.append({"role": "user", "content": message})
	history.append({"role": "assistant", "content": response_aux2})

	return history, history

	# Interface Gradio
	with gr.Blocks() as demo:
	gr.Markdown("## 🤖 Chatbot em Cascata (Llama 3.1 + FLAN-T5 + BART)")

	system_message = gr.Textbox(
	value="Você é um chatbot amigável e prestativo.",
	label="System Message"
	)

	chatbot = gr.Chatbot()
	msg = gr.Textbox(label="Digite sua mensagem")
	max_tokens = gr.Slider(50, 2048, 512, step=50, label="Max Tokens")
	temperature = gr.Slider(0.1, 1.0, 0.7, step=0.05, label="Temperature")
	top_p = gr.Slider(0.1, 1.0, 0.95, step=0.05, label="Top-p (nucleus sampling)")

	history = gr.State([])

	def handle_submit(message, history, system_message, max_tokens, temperature, top_p):
	return respond(message, history, system_message, max_tokens, temperature, top_p)

	msg.submit(
	handle_submit,
	inputs=[msg, history, system_message, max_tokens, temperature, top_p],
	outputs=[chatbot, history]
	)

	if __name__ == "__main__":
	demo.launch()