File size: 8,259 Bytes
e48436e
8c337f2
2674c93
0a7cdb1
8c337f2
e48436e
 
8c337f2
e48436e
 
 
 
 
0a7cdb1
d0cfe92
 
 
0a7cdb1
d0cfe92
 
 
 
 
0a7cdb1
 
 
 
 
d0cfe92
 
 
 
 
0a7cdb1
d0cfe92
 
 
0a7cdb1
 
d0cfe92
0a7cdb1
 
 
 
 
 
 
d0cfe92
 
0a7cdb1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d0cfe92
e48436e
 
a68cb46
0a7cdb1
d0cfe92
 
3689306
0a7cdb1
d0cfe92
 
0a7cdb1
d0cfe92
0a7cdb1
d0cfe92
881ad0f
0a7cdb1
a68cb46
9f87902
a68cb46
 
d0cfe92
881ad0f
0a7cdb1
a68cb46
05fce26
a68cb46
 
d0cfe92
881ad0f
a68cb46
0a7cdb1
 
 
8c337f2
9f87902
e48436e
 
 
05fce26
8c337f2
e48436e
8c337f2
e48436e
05fce26
a68cb46
0a7cdb1
a68cb46
 
05fce26
e48436e
 
 
 
 
0a7cdb1
e48436e
 
 
 
 
a68cb46
0a7cdb1
 
a68cb46
 
8c337f2
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
import os
import gradio as gr
from huggingface_hub import InferenceClient
import traceback

# Pega o token do Hugging Face dos Secrets
HF_TOKEN = os.environ.get("HF_TOKEN")

# Inicializa os clientes dos modelos
client_main = InferenceClient(token=HF_TOKEN, model="meta-llama/Llama-3.1-8B-Instruct")
client_aux1 = InferenceClient(token=HF_TOKEN, model="google/flan-t5-large")
client_aux2 = InferenceClient(token=HF_TOKEN, model="facebook/bart-large-cnn")

# Extrai texto de objetos de resposta variados
def _extract_text_from_response(obj):
    if obj is None:
        return ""
    # atributos comuns
    if hasattr(obj, "content"):
        try:
            return obj.content
        except Exception:
            pass
    if hasattr(obj, "text"):
        try:
            return obj.text
        except Exception:
            pass
    if hasattr(obj, "generated_text"):
        try:
            return obj.generated_text
        except Exception:
            pass
    # choices style
    try:
        if hasattr(obj, "choices") and len(obj.choices) > 0:
            choice = obj.choices[0]
            # dict-like
            if isinstance(choice, dict) and "message" in choice and isinstance(choice["message"], dict) and "content" in choice["message"]:
                return choice["message"]["content"]
            # object-like
            if hasattr(choice, "message"):
                msg = choice.message
                if isinstance(msg, dict) and "content" in msg:
                    return msg["content"]
                if hasattr(msg, "get") and msg.get("content"):
                    return msg.get("content")
    except Exception:
        pass
    # last resort
    try:
        return str(obj)
    except Exception:
        return ""

# Função que tenta várias formas de usar o chat do Llama
def _call_llama_chat_stateless(client_chat_proxy, messages, max_new_tokens, temperature, top_p):
    """
    Tenta (em ordem):
     1) client_chat_proxy(messages=..., max_new_tokens=..., ...)
     2) client_chat_proxy.chat_completion(messages=..., ...) or client_chat_proxy.create(...)
     3) adicionar messages via add_message() + client_chat_proxy.generate()/send() (se suportado)
     4) procura por métodos alternativos (chat_completion, create, generate, run) e tenta-os
    Retorna o objeto de resposta ou levanta Exception com debug info.
    """
    # 1) chamada direta se proxy for chamável
    if callable(client_chat_proxy):
        try:
            return client_chat_proxy(messages=messages, max_new_tokens=max_new_tokens, temperature=temperature, top_p=top_p)
        except TypeError:
            # talvez aceita positional args
            try:
                return client_chat_proxy(messages, max_new_tokens, temperature, top_p)
            except Exception:
                pass
        except Exception:
            pass

    # 2) tenta métodos conhecidos
    for method_name in ("send_message", "send", "create", "generate", "run", "complete", "chat_completion", "chat_complete"):
        if hasattr(client_chat_proxy, method_name):
            method = getattr(client_chat_proxy, method_name)
            try:
                # muitas variantes: preferimos passar messages=...
                return method(messages=messages, max_new_tokens=max_new_tokens, temperature=temperature, top_p=top_p)
            except TypeError:
                # talvez aceite apenas positional
                try:
                    return method(messages, max_new_tokens, temperature, top_p)
                except Exception:
                    # tentar sem argumentos (algumas impls usam add_message + send)
                    pass
            except Exception:
                # se falhar, tenta próxima opção
                pass

    # 3) tenta add_message() sequencial e depois gerar
    if hasattr(client_chat_proxy, "add_message") and hasattr(client_chat_proxy, "send_message") :
        try:
            # adiciona sequencialmente
            for m in messages:
                client_chat_proxy.add_message(m["role"], m["content"])
            return client_chat_proxy.send_message(max_new_tokens=max_new_tokens, temperature=temperature, top_p=top_p)
        except Exception:
            pass

    if hasattr(client_chat_proxy, "add_message") and hasattr(client_chat_proxy, "generate"):
        try:
            for m in messages:
                client_chat_proxy.add_message(m["role"], m["content"])
            return client_chat_proxy.generate(max_new_tokens=max_new_tokens, temperature=temperature, top_p=top_p)
        except Exception:
            pass

    # 4) última tentativa: procurar métodos parecidos e tentar chamá-los com only messages
    candidate_methods = [m for m in dir(client_chat_proxy) if any(k in m for k in ("send", "create", "generate", "run", "complete"))]
    for name in candidate_methods:
        try:
            method = getattr(client_chat_proxy, name)
            if callable(method):
                try:
                    return method(messages=messages)
                except TypeError:
                    try:
                        return method(messages)
                    except Exception:
                        pass
        except Exception:
            pass

    # Se chegou aqui, falhou — lançar erro com debug
    debug = {
        "available_attrs": dir(client_chat_proxy),
        "messages_sample": messages[:3]
    }
    raise RuntimeError(f"Não foi possível chamar o chat proxy do Llama com as assinaturas testadas. Debug: {debug}")

# Função principal de resposta
def respond(message, history, system_message, max_tokens, temperature, top_p):
    try:
        # Monta lista de messages (stateless)
        messages = []
        messages.append({"role": "system", "content": system_message or ""})
        for h in history:
            messages.append({"role": h.get("role", "user"), "content": h.get("content", "")})
        messages.append({"role": "user", "content": message})

        # Chama o chat do Llama tentando várias assinaturas
        chat_proxy = client_main.chat
        response_main_obj = _call_llama_chat_stateless(chat_proxy, messages, max_tokens, temperature, top_p)
        response_main = _extract_text_from_response(response_main_obj)

        # Passo 2: FLAN-T5 (reformulação)
        result_aux1 = client_aux1.text_generation(
            prompt=f"Reformule este texto de forma clara e concisa:\n{response_main}",
            max_new_tokens=max_tokens
        )
        response_aux1 = _extract_text_from_response(result_aux1)

        # Passo 3: BART (resumo em 3 frases)
        result_aux2 = client_aux2.text_generation(
            prompt=f"Resuma este texto em 3 frases:\n{response_aux1}",
            max_new_tokens=150
        )
        response_aux2 = _extract_text_from_response(result_aux2)

    except Exception as e:
        # enviar traceback curto para depuração — útil para o próximo ajuste
        tb = traceback.format_exc(limit=5)
        response_aux2 = f"Erro ao gerar resposta: {e}\n\nTraceback (curto):\n{tb}"

    # Atualiza histórico no formato Gradio Chatbot
    history.append({"role": "user", "content": message})
    history.append({"role": "assistant", "content": response_aux2})

    return history, history

# Interface Gradio
with gr.Blocks() as demo:
    gr.Markdown("## 🤖 Chatbot em Cascata (Llama 3.1 + FLAN-T5 + BART)")

    system_message = gr.Textbox(
        value="Você é um chatbot amigável e prestativo.",
        label="System Message"
    )

    chatbot = gr.Chatbot()
    msg = gr.Textbox(label="Digite sua mensagem")
    max_tokens = gr.Slider(50, 2048, 512, step=50, label="Max Tokens")
    temperature = gr.Slider(0.1, 1.0, 0.7, step=0.05, label="Temperature")
    top_p = gr.Slider(0.1, 1.0, 0.95, step=0.05, label="Top-p (nucleus sampling)")

    history = gr.State([])

    def handle_submit(message, history, system_message, max_tokens, temperature, top_p):
        return respond(message, history, system_message, max_tokens, temperature, top_p)

    msg.submit(
        handle_submit,
        inputs=[msg, history, system_message, max_tokens, temperature, top_p],
        outputs=[chatbot, history]
    )

if __name__ == "__main__":
    demo.launch()