import os
import gradio as gr
from huggingface_hub import InferenceClient
import traceback

# Pega o token do Hugging Face dos Secrets
HF_TOKEN = os.environ.get("HF_TOKEN")

# Inicializa os clientes dos modelos
client_main = InferenceClient(token=HF_TOKEN, model="meta-llama/Llama-3.1-8B-Instruct")
client_aux1 = InferenceClient(token=HF_TOKEN, model="google/flan-t5-large")
client_aux2 = InferenceClient(token=HF_TOKEN, model="facebook/bart-large-cnn")

# Extrai texto de objetos de resposta variados
def _extract_text_from_response(obj):
    if obj is None:
        return ""
    # atributos comuns
    if hasattr(obj, "content"):
        try:
            return obj.content
        except Exception:
            pass
    if hasattr(obj, "text"):
        try:
            return obj.text
        except Exception:
            pass
    if hasattr(obj, "generated_text"):
        try:
            return obj.generated_text
        except Exception:
            pass
    # choices style
    try:
        if hasattr(obj, "choices") and len(obj.choices) > 0:
            choice = obj.choices[0]
            # dict-like
            if isinstance(choice, dict) and "message" in choice and isinstance(choice["message"], dict) and "content" in choice["message"]:
                return choice["message"]["content"]
            # object-like
            if hasattr(choice, "message"):
                msg = choice.message
                if isinstance(msg, dict) and "content" in msg:
                    return msg["content"]
                if hasattr(msg, "get") and msg.get("content"):
                    return msg.get("content")
    except Exception:
        pass
    # last resort
    try:
        return str(obj)
    except Exception:
        return ""

# Função que tenta várias formas de usar o chat do Llama
def _call_llama_chat_stateless(client_chat_proxy, messages, max_new_tokens, temperature, top_p):
    """
    Tenta (em ordem):
     1) client_chat_proxy(messages=..., max_new_tokens=..., ...)
     2) client_chat_proxy.chat_completion(messages=..., ...) or client_chat_proxy.create(...)
     3) adicionar messages via add_message() + client_chat_proxy.generate()/send() (se suportado)
     4) procura por métodos alternativos (chat_completion, create, generate, run) e tenta-os
    Retorna o objeto de resposta ou levanta Exception com debug info.
    """
    # 1) chamada direta se proxy for chamável
    if callable(client_chat_proxy):
        try:
            return client_chat_proxy(messages=messages, max_new_tokens=max_new_tokens, temperature=temperature, top_p=top_p)
        except TypeError:
            # talvez aceita positional args
            try:
                return client_chat_proxy(messages, max_new_tokens, temperature, top_p)
            except Exception:
                pass
        except Exception:
            pass

    # 2) tenta métodos conhecidos
    for method_name in ("send_message", "send", "create", "generate", "run", "complete", "chat_completion", "chat_complete"):
        if hasattr(client_chat_proxy, method_name):
            method = getattr(client_chat_proxy, method_name)
            try:
                # muitas variantes: preferimos passar messages=...
                return method(messages=messages, max_new_tokens=max_new_tokens, temperature=temperature, top_p=top_p)
            except TypeError:
                # talvez aceite apenas positional
                try:
                    return method(messages, max_new_tokens, temperature, top_p)
                except Exception:
                    # tentar sem argumentos (algumas impls usam add_message + send)
                    pass
            except Exception:
                # se falhar, tenta próxima opção
                pass

    # 3) tenta add_message() sequencial e depois gerar
    if hasattr(client_chat_proxy, "add_message") and hasattr(client_chat_proxy, "send_message") :
        try:
            # adiciona sequencialmente
            for m in messages:
                client_chat_proxy.add_message(m["role"], m["content"])
            return client_chat_proxy.send_message(max_new_tokens=max_new_tokens, temperature=temperature, top_p=top_p)
        except Exception:
            pass

    if hasattr(client_chat_proxy, "add_message") and hasattr(client_chat_proxy, "generate"):
        try:
            for m in messages:
                client_chat_proxy.add_message(m["role"], m["content"])
            return client_chat_proxy.generate(max_new_tokens=max_new_tokens, temperature=temperature, top_p=top_p)
        except Exception:
            pass

    # 4) última tentativa: procurar métodos parecidos e tentar chamá-los com only messages
    candidate_methods = [m for m in dir(client_chat_proxy) if any(k in m for k in ("send", "create", "generate", "run", "complete"))]
    for name in candidate_methods:
        try:
            method = getattr(client_chat_proxy, name)
            if callable(method):
                try:
                    return method(messages=messages)
                except TypeError:
                    try:
                        return method(messages)
                    except Exception:
                        pass
        except Exception:
            pass

    # Se chegou aqui, falhou — lançar erro com debug
    debug = {
        "available_attrs": dir(client_chat_proxy),
        "messages_sample": messages[:3]
    }
    raise RuntimeError(f"Não foi possível chamar o chat proxy do Llama com as assinaturas testadas. Debug: {debug}")

# Função principal de resposta
def respond(message, history, system_message, max_tokens, temperature, top_p):
    try:
        # Monta lista de messages (stateless)
        messages = []
        messages.append({"role": "system", "content": system_message or ""})
        for h in history:
            messages.append({"role": h.get("role", "user"), "content": h.get("content", "")})
        messages.append({"role": "user", "content": message})

        # Chama o chat do Llama tentando várias assinaturas
        chat_proxy = client_main.chat
        response_main_obj = _call_llama_chat_stateless(chat_proxy, messages, max_tokens, temperature, top_p)
        response_main = _extract_text_from_response(response_main_obj)

        # Passo 2: FLAN-T5 (reformulação)
        result_aux1 = client_aux1.text_generation(
            prompt=f"Reformule este texto de forma clara e concisa:\n{response_main}",
            max_new_tokens=max_tokens
        )
        response_aux1 = _extract_text_from_response(result_aux1)

        # Passo 3: BART (resumo em 3 frases)
        result_aux2 = client_aux2.text_generation(
            prompt=f"Resuma este texto em 3 frases:\n{response_aux1}",
            max_new_tokens=150
        )
        response_aux2 = _extract_text_from_response(result_aux2)

    except Exception as e:
        # enviar traceback curto para depuração — útil para o próximo ajuste
        tb = traceback.format_exc(limit=5)
        response_aux2 = f"Erro ao gerar resposta: {e}\n\nTraceback (curto):\n{tb}"

    # Atualiza histórico no formato Gradio Chatbot
    history.append({"role": "user", "content": message})
    history.append({"role": "assistant", "content": response_aux2})

    return history, history

# Interface Gradio
with gr.Blocks() as demo:
    gr.Markdown("## 🤖 Chatbot em Cascata (Llama 3.1 + FLAN-T5 + BART)")

    system_message = gr.Textbox(
        value="Você é um chatbot amigável e prestativo.",
        label="System Message"
    )

    chatbot = gr.Chatbot()
    msg = gr.Textbox(label="Digite sua mensagem")
    max_tokens = gr.Slider(50, 2048, 512, step=50, label="Max Tokens")
    temperature = gr.Slider(0.1, 1.0, 0.7, step=0.05, label="Temperature")
    top_p = gr.Slider(0.1, 1.0, 0.95, step=0.05, label="Top-p (nucleus sampling)")

    history = gr.State([])

    def handle_submit(message, history, system_message, max_tokens, temperature, top_p):
        return respond(message, history, system_message, max_tokens, temperature, top_p)

    msg.submit(
        handle_submit,
        inputs=[msg, history, system_message, max_tokens, temperature, top_p],
        outputs=[chatbot, history]
    )

if __name__ == "__main__":
    demo.launch()