# app.py
# Chatbot em cascata para Hugging Face Space / execução local
# - Llama 3.1 (entrada)
# - FLAN-T5 (reformulação)
# - BART (resumo em 3 frases)
#
# Requisitos (no Space): defina HF_TOKEN nos Secrets.
# Variáveis opcionais para troca de modelos:
#  - LLAMA_MODEL (padrao: meta-llama/Llama-3.1-8B-Instruct)
#  - AUX1_MODEL  (padrao: google/flan-t5-large)
#  - AUX2_MODEL  (padrao: facebook/bart-large-cnn)
#
# Use: python app.py
# Recomendações: requirements.txt com gradio, huggingface-hub, transformers, accelerate, etc.

import os
import traceback
import logging
from typing import List, Dict, Any, Tuple

import gradio as gr
from huggingface_hub import InferenceClient


logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("cascade_chatbot")

HF_TOKEN = os.environ.get("HF_TOKEN")
DEFAULT_LLAMA_MODEL = os.environ.get("LLAMA_MODEL", "meta-llama/Llama-3.1-8B-Instruct")
DEFAULT_AUX1 = os.environ.get("AUX1_MODEL", "google/flan-t5-large")
DEFAULT_AUX2 = os.environ.get("AUX2_MODEL", "facebook/bart-large-cnn")

if not HF_TOKEN:
    logger.warning("HF_TOKEN não encontrado nas variáveis de ambiente. Configure nos Secrets do Space ou no ambiente local.")

# -------------------------
# Inicializa clientes HF
# -------------------------

try:
    client_main = InferenceClient(token=HF_TOKEN, model=DEFAULT_LLAMA_MODEL)
    client_aux1 = InferenceClient(token=HF_TOKEN, model=DEFAULT_AUX1)
    client_aux2 = InferenceClient(token=HF_TOKEN, model=DEFAULT_AUX2)
except Exception:
    
    logger.exception("Falha ao inicializar InferenceClient(s). Verifique HF_TOKEN e nomes dos modelos.")
    # Criar objetos None para evitar crash imediato; erros aparecerão ao tentar usar
    client_main = None
    client_aux1 = None
    client_aux2 = None

# -------------------------
# Helpers
# -------------------------
def _messages_to_prompt(messages: List[Dict[str, str]]) -> str:
    lines = []
    for m in messages:
        role = m.get("role", "user")
        content = m.get("content", "")
        lines.append(f"{role.upper()}: {content}")
    lines.append("ASSISTANT:")
    return "\n".join(lines)

def _extract_text_from_response(obj: Any) -> str:
    if obj is None:
        return ""
    # Common atributos
    for attr in ("content", "text", "generated_text", "generation_text"):
        if hasattr(obj, attr):
            try:
                v = getattr(obj, attr)
                if isinstance(v, str):
                    return v
                return str(v)
            except Exception:
                pass

    try:
        choices = None
        if hasattr(obj, "choices"):
            choices = obj.choices
        elif isinstance(obj, dict) and "choices" in obj:
            choices = obj["choices"]
        if choices:
            first = choices[0]
            if isinstance(first, dict):
                if "message" in first and isinstance(first["message"], dict) and "content" in first["message"]:
                    return first["message"]["content"]
                if "text" in first:
                    return first["text"]
                if "content" in first:
                    return first["content"]
            if hasattr(first, "message"):
                msg = first.message
                if isinstance(msg, dict) and "content" in msg:
                    return msg["content"]
            if hasattr(first, "text"):
                return first.text
    except Exception:
        pass

    try:
        if hasattr(obj, "generations") and len(obj.generations) > 0:
            g = obj.generations[0]
            if isinstance(g, dict) and "text" in g:
                return g["text"]
            if hasattr(g, "text"):
                return g.text
    except Exception:
        pass

    try:
        if isinstance(obj, dict):
            for k in ("text", "content", "generated_text"):
                if k in obj and isinstance(obj[k], str):
                    return obj[k]
    except Exception:
        pass

    try:
        return str(obj)
    except Exception:
        return ""

# -------------------------
# Chamadas robustas ao InferenceClient
# -------------------------
def call_model_with_messages(client: InferenceClient, messages: List[Dict[str, str]],
                             max_new_tokens: int = 512, temperature: float = 0.7, top_p: float = 0.95) -> Any:
    """
    Tenta múltiplas assinaturas (chat_completion, client.chat, text_generation, etc).
    Registra exceções completas para diagnóstico.
    """

    def try_call(method, /, *pos_args, **kw_args):
        try:
            # Não imprimir todo messages no log  — resumir
            safe_kw = {k: ("[MESSAGES]" if k == "messages" else v) for k, v in kw_args.items()}
            logger.info("Tentando %s pos=%s kwargs=%s", getattr(method, "__name__", str(method)), pos_args, safe_kw)
            return method(*pos_args, **kw_args)
        except Exception:
            logger.exception("Falha ao chamar %s", getattr(method, "__name__", str(method)))
            return None

    # Tentar obter nome do modelo
    model_name = getattr(client, "model", None) or DEFAULT_LLAMA_MODEL

    # 1) chat_completion 
    try:
        cc = getattr(client, "chat_completion", None)
        if cc:
            # a) cc(model=..., messages=...)
            res = try_call(cc, model=model_name, messages=messages, max_new_tokens=max_new_tokens, temperature=temperature)
            if res is not None:
                return res
            # b) cc(messages=..., model=...)
            res = try_call(cc, messages=messages, model=model_name, max_new_tokens=max_new_tokens, temperature=temperature)
            if res is not None:
                return res
            # c) cc.create(...)
            if hasattr(cc, "create"):
                res = try_call(cc.create, model=model_name, messages=messages, max_new_tokens=max_new_tokens, temperature=temperature)
                if res is not None:
                    return res
            # d) positional
            res = try_call(cc, messages)
            if res is not None:
                return res
    except Exception:
        logger.exception("Erro no bloco chat_completion")

    # 2) client.chat namespace
    try:
        chat_ns = getattr(client, "chat", None)
        if chat_ns:
            if hasattr(chat_ns, "create"):
                res = try_call(chat_ns.create, model=model_name, messages=messages, max_new_tokens=max_new_tokens, temperature=temperature)
                if res is not None:
                    return res
            if hasattr(chat_ns, "chat_completion") and hasattr(chat_ns.chat_completion, "create"):
                res = try_call(chat_ns.chat_completion.create, model=model_name, messages=messages, max_new_tokens=max_new_tokens, temperature=temperature)
                if res is not None:
                    return res
            res = try_call(chat_ns, model_name, messages)
            if res is not None:
                return res
    except Exception:
        logger.exception("Erro no bloco chat namespace")

    # 3) text_generation 
    prompt = _messages_to_prompt(messages)
    try:
        if hasattr(client, "text_generation"):
            res = try_call(client.text_generation, prompt=prompt, max_new_tokens=max_new_tokens, temperature=temperature)
            if res is not None:
                return res
        if hasattr(client, "generate") and callable(client.generate):
            res = try_call(client.generate, prompt=prompt, max_new_tokens=max_new_tokens)
            if res is not None:
                return res
    except Exception:
        logger.exception("Erro no bloco text_generation/generate")

    # 4) última tentativa: explorar métodos candidatos
    candidate_methods = [m for m in dir(client) if any(k in m for k in ("create", "generate", "complete", "run"))]
    for name in candidate_methods:
        try:
            method = getattr(client, name)
            if callable(method):
                res = try_call(method, messages=messages)
                if res is not None:
                    return res
                res = try_call(method, prompt)
                if res is not None:
                    return res
                res = try_call(method, messages)
                if res is not None:
                    return res
        except Exception:
            logger.exception("Erro testando candidato %s", name)

    # falhou todas as tentativas
    debug = {"available_attrs": dir(client), "messages_sample": messages[:3]}
    logger.error("Todas as tentativas falharam. Debug: %s", debug)
    raise RuntimeError(f"Não foi possível chamar o cliente HF com as assinaturas testadas. Debug: {debug}")

# -------------------------
# Pipeline: Llama -> FLAN -> BART
# -------------------------
def pipeline_cascade(user_message: str, system_message: str,
                     max_tokens: int, temperature: float, top_p: float) -> Tuple[str, List[str]]:
    """
    Executa a cascata: Llama (client_main) -> FLAN (client_aux1) -> BART (client_aux2).
    Retorna o texto final e um log de passos.
    """
    logs = []
    # Monta mensagens
    messages = [{"role": "system", "content": system_message or ""}, {"role": "user", "content": user_message}]
    try:
        logs.append("1) Chamando Llama (entrada)")
        response_main_obj = call_model_with_messages(client_main, messages, max_new_tokens=max_tokens, temperature=temperature, top_p=top_p)
        response_main = _extract_text_from_response(response_main_obj)
        logs.append(f"-> Llama respondeu (resumo): {response_main[:300]}")

        # Aux1: FLAN-T5 - reformular
        logs.append("2) Chamando FLAN-T5 (reformular)")
        prompt_aux1 = f"Reformule este texto de forma clara e concisa:\n{response_main}"
        try:
            if client_aux1 and hasattr(client_aux1, "text_generation"):
                res_a1 = client_aux1.text_generation(prompt=prompt_aux1, max_new_tokens=max(128, max_tokens // 4))
            elif client_aux1 and hasattr(client_aux1, "completions") and hasattr(client_aux1.completions, "create"):
                res_a1 = client_aux1.completions.create(prompt=prompt_aux1, max_new_tokens=max(128, max_tokens // 4))
            else:
                res_a1 = None
            response_aux1 = _extract_text_from_response(res_a1) if res_a1 is not None else response_main
            logs.append(f"-> FLAN-T5 respondeu (resumo): {response_aux1[:300]}")
        except Exception:
            logs.append("FLAN-T5 falhou; usando resposta do Llama")
            response_aux1 = response_main

        # Aux2: BART - resumo em 3 frases
        logs.append("3) Chamando BART (resumo em 3 frases)")
        prompt_aux2 = f"Resuma este texto em 3 frases:\n{response_aux1}"
        try:
            if client_aux2 and hasattr(client_aux2, "text_generation"):
                res_a2 = client_aux2.text_generation(prompt=prompt_aux2, max_new_tokens=150)
            elif client_aux2 and hasattr(client_aux2, "completions") and hasattr(client_aux2.completions, "create"):
                res_a2 = client_aux2.completions.create(prompt=prompt_aux2, max_new_tokens=150)
            else:
                res_a2 = None
            response_aux2 = _extract_text_from_response(res_a2) if res_a2 is not None else response_aux1
            logs.append(f"-> BART respondeu (resumo): {response_aux2[:300]}")
        except Exception:
            logs.append("BART falhou; usando resposta do passo anterior")
            response_aux2 = response_aux1

    except Exception as e:
        tb = traceback.format_exc(limit=5)
        logger.exception("Erro pipeline principal: %s", e)
        response_aux2 = f"Erro ao gerar resposta: {e}\n\nTraceback (curto):\n{tb}"
        logs.append("Erro no pipeline: " + str(e))

    return response_aux2, logs

# -------------------------
# Gradio App
# -------------------------
with gr.Blocks(title="Chatbot em Cascata - Llama + FLAN + BART") as demo:
    gr.Markdown("##  Trabalho Acadêmico FMU - Chatbot em Cascata\n"
                "Fluxo: **Llama (entrada)** → **FLAN-T5 (reformulação)** → **BART(resumo)**\n\n"
                "Disciplina: INTELIGÊNCIA ARTIFICIAL E APRENDIZADO DE MÁQUINA")

    with gr.Row():
        with gr.Column(scale=2):
            system_message = gr.Textbox(value="Você é um chatbot racional e alegre.",
                                       label="System Message", lines=2)
            chatbot = gr.Chatbot(label="Chat")
            user_input = gr.Textbox(label="Digite sua mensagem", placeholder="Digite aqui...")
            max_tokens = gr.Slider(50, 2048, value=512, step=50, label="Max Tokens")
            temperature = gr.Slider(0.0, 1.0, value=0.7, step=0.05, label="Temperature")
            top_p = gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)")

            history = gr.State([])

            def submit_handler(msg, history, system_message, max_tokens, temperature, top_p):
                # roda pipeline e atualiza histórico
                out_text, logs = pipeline_cascade(msg, system_message, int(max_tokens), float(temperature), float(top_p))
                history.append({"role": "user", "content": msg})
                history.append({"role": "assistant", "content": out_text})
                # exibimos também logs no console (útil)
                logger.info("Pipeline logs:\n%s", "\n".join(logs))
                return history, history

            user_input.submit(submit_handler,
                              inputs=[user_input, history, system_message, max_tokens, temperature, top_p],
                              outputs=[chatbot, history])

            btn_send = gr.Button("Enviar")
            btn_send.click(submit_handler,
                           inputs=[user_input, history, system_message, max_tokens, temperature, top_p],
                           outputs=[chatbot, history])

        with gr.Column(scale=1):
            gr.Markdown("### Informações sobre o Projeto\n"
                        "Painel feito para descrever as **configurações**, **testar a geração** e sobre os **envolvidos**:")

            model_info_md = f"""
**Modelos usados:**

- Llama (input): `{DEFAULT_LLAMA_MODEL}`
- Aux 1 (reformulação): `{DEFAULT_AUX1}`
- Aux 2 (resumo): `{DEFAULT_AUX2}`

**Como foram configurados:**

- Cada modelo é instanciado via `InferenceClient(token=HF_TOKEN, model=<model_name>)`.
- Chamadas preferenciais:
  - Para chat: `client.chat_completion(messages=..., model=...)` (quando disponível)
  - Fallback: `client.text_generation(prompt=...)`
- Ajustes de inferência controlados pelo usuário: `max_tokens`, `temperature`, `top_p`.
- Logs de diagnóstico são gravados (úteis se houver erros de assinatura/permissão).
"""
            gr.Markdown(model_info_md)

            # Self-test: roda testes com mensagens predefinidas e mostra o resultado
            test_output = gr.Textbox(label="Resultado do Self-Test", lines=12, interactive=False)

            def run_self_test(system_message, max_tokens, temperature, top_p):
                msgs = [
                    "Explique resumidamente o que é a técnica de regressão linear.",
                    "Resuma em 1 frase as vantagens de usar validação cruzada.",
                    "Como posso autenticar usuários em uma aplicação web?"
                ]
                accumulated = []
                for m in msgs:
                    out, logs = pipeline_cascade(m, system_message, int(max_tokens), float(temperature), float(top_p))
                    accumulated.append("INPUT: " + m)
                    accumulated.append("OUTPUT: " + out)
                    accumulated.append("LOGS: " + " | ".join(logs))
                    accumulated.append("-" * 40)
                return "\n".join(accumulated)

            btn_test = gr.Button("Run self-test")
            btn_test.click(run_self_test, inputs=[system_message, max_tokens, temperature, top_p], outputs=[test_output])

            gr.Markdown(
                "### Disciplina: INTELIGÊNCIA ARTIFICIAL E APRENDIZADO DE MÁQUINA\n"
                        "- Trabalho N2\n"
                        "- Turma Noturna de Bacharelado em Ciências da Computação 2025.\n"
                        "- Integrantes:\n "
                        "- Lucas Antonini - 1722631\n "
                        "- Carlos Eduardo da Silva - 1961011\n "
                        "- Felipe Rios Amaral - 1847080 \n"
                        "- Kawrê Britto de Oliveira - 2260931\n" 
                        "- Miguel Putini Alfano - 2879347 ")

if __name__ == "__main__":
    demo.launch()