kawre commited on
Commit
55f556b
·
verified ·
1 Parent(s): 0008a36

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +260 -214
app.py CHANGED
@@ -1,84 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
  import traceback
3
  import logging
4
- from typing import List, Dict, Any
5
 
6
  import gradio as gr
7
  from huggingface_hub import InferenceClient
8
 
9
- # --------------------------
10
- # CONFIG / ENV
11
- # --------------------------
 
 
 
12
  HF_TOKEN = os.environ.get("HF_TOKEN")
13
  DEFAULT_LLAMA_MODEL = os.environ.get("LLAMA_MODEL", "meta-llama/Llama-3.1-8B-Instruct")
14
  DEFAULT_AUX1 = os.environ.get("AUX1_MODEL", "google/flan-t5-large")
15
  DEFAULT_AUX2 = os.environ.get("AUX2_MODEL", "facebook/bart-large-cnn")
16
 
17
- # Basic logging
18
- logging.basicConfig(level=logging.INFO)
19
- logger = logging.getLogger(__name__)
20
-
21
- # Simple requirement check message for the user
22
  if not HF_TOKEN:
23
- logger.warning("HF_TOKEN não encontrado nas variáveis de ambiente. Configure-o nos Secrets do Hugging Face ou no ambiente local.")
24
 
25
- # --------------------------
26
  # Inicializa clientes HF
27
- # --------------------------
28
- # InferenceClient costuma aceitar token e opcionalmente model no construtor.
29
- client_main = InferenceClient(token=HF_TOKEN, model=DEFAULT_LLAMA_MODEL)
30
- client_aux1 = InferenceClient(token=HF_TOKEN, model=DEFAULT_AUX1)
31
- client_aux2 = InferenceClient(token=HF_TOKEN, model=DEFAULT_AUX2)
32
-
33
-
34
- # --------------------------
 
 
 
 
 
 
 
35
  # Helpers
36
- # --------------------------
 
 
 
 
 
 
 
 
37
 
38
  def _extract_text_from_response(obj: Any) -> str:
39
- """Tenta extrair texto de várias estruturas de resposta do HF/Inferences.
40
-
41
- Retorna string vazia se não conseguir extrair.
42
- """
43
  if obj is None:
44
  return ""
45
-
46
- # 1) objetos simples com atributos comuns
47
  for attr in ("content", "text", "generated_text", "generation_text"):
48
  if hasattr(obj, attr):
49
  try:
50
- val = getattr(obj, attr)
51
- if isinstance(val, str):
52
- return val
53
- # às vezes é um array/obj com texto
54
- try:
55
- return str(val)
56
- except Exception:
57
- pass
58
  except Exception:
59
  pass
60
-
61
- # 2) estilo choices (OpenAI/HF)
62
  try:
63
  choices = None
64
  if hasattr(obj, "choices"):
65
  choices = obj.choices
66
  elif isinstance(obj, dict) and "choices" in obj:
67
  choices = obj["choices"]
68
-
69
  if choices:
70
  first = choices[0]
71
- # dict-like
72
  if isinstance(first, dict):
73
- # message.content
74
  if "message" in first and isinstance(first["message"], dict) and "content" in first["message"]:
75
  return first["message"]["content"]
76
- # text
77
  if "text" in first:
78
  return first["text"]
79
  if "content" in first:
80
  return first["content"]
81
- # object-like
82
  if hasattr(first, "message"):
83
  msg = first.message
84
  if isinstance(msg, dict) and "content" in msg:
@@ -87,8 +100,7 @@ def _extract_text_from_response(obj: Any) -> str:
87
  return first.text
88
  except Exception:
89
  pass
90
-
91
- # 3) HuggingFace "generations" common structure
92
  try:
93
  if hasattr(obj, "generations") and len(obj.generations) > 0:
94
  g = obj.generations[0]
@@ -98,231 +110,265 @@ def _extract_text_from_response(obj: Any) -> str:
98
  return g.text
99
  except Exception:
100
  pass
101
-
102
- # 4) dict-like fallback
103
  try:
104
  if isinstance(obj, dict):
105
- # procurar primeiras strings
106
  for k in ("text", "content", "generated_text"):
107
  if k in obj and isinstance(obj[k], str):
108
  return obj[k]
109
  except Exception:
110
  pass
111
-
112
- # 5) última tentativa
113
  try:
114
  return str(obj)
115
  except Exception:
116
  return ""
117
 
118
-
119
- def _messages_to_prompt(messages: List[Dict[str, str]]) -> str:
120
- """Converte uma lista de mensagens [{role, content}] em um prompt textual simples.
121
-
122
- Ex: "SYSTEM: ...\nUSER: ...\nASSISTANT:" — pronto para text_generation.
123
- """
124
- lines = []
125
- for m in messages:
126
- role = m.get("role", "user")
127
- content = m.get("content", "")
128
- lines.append(f"{role.upper()}: {content}")
129
- lines.append("ASSISTANT:")
130
- return "\n".join(lines)
131
-
132
-
133
  def call_model_with_messages(client: InferenceClient, messages: List[Dict[str, str]],
134
  max_new_tokens: int = 512, temperature: float = 0.7, top_p: float = 0.95) -> Any:
135
- """Tenta várias formas de chamar o cliente HF/Inferences para obter uma resposta de chat.
 
 
 
136
 
137
- Estratégia (ordem):
138
- 1) client.completions.create(messages=...)
139
- 2) client.chat.create / client.chat(...) / client.chat_completion.create / client.chat_completion(...)
140
- 3) client.text_generation(prompt=...)
141
- 4) tentar chamar diretamente funções encontradas que contenham 'create'/'generate'/'complet'
 
 
 
 
142
 
143
- Retorna o objeto cru retornado pela biblioteca ou lança RuntimeError com info de debug.
144
- """
145
- # 1) tentar completions.create
 
146
  try:
147
- comps = getattr(client, "completions", None)
148
- if comps is not None and hasattr(comps, "create"):
149
- logger.info("Chamando client.completions.create(messages=...)")
150
- return comps.create(messages=messages, max_new_tokens=max_new_tokens, temperature=temperature)
151
- # caso completions seja chamável diretamente
152
- if hasattr(client, "completions") and callable(client.completions):
153
- logger.info("Chamando client.completions(...) diretamente")
154
- return client.completions(messages=messages, max_new_tokens=max_new_tokens, temperature=temperature)
155
- except Exception as e:
156
- logger.debug("completions.create falhou: %s", e)
 
 
 
 
 
 
 
 
 
 
 
157
 
158
- # 2) tentar chat / chat_completion namespaces (há no runtime mostrado)
159
- # suporte: client.chat.create, client.chat(...), client.chat_completion.create, client.chat_completion(...)
160
- for chat_ns in ("chat", "chat_completion", "chat_completions"):
161
- try:
162
- ns = getattr(client, chat_ns, None)
163
- if ns is None:
164
- continue
165
- # ns pode ser um objeto com .create ou chamável diretamente
166
- if hasattr(ns, "create"):
167
- logger.info(f"Chamando {chat_ns}.create(messages=...)")
168
- return ns.create(messages=messages, max_new_tokens=max_new_tokens, temperature=temperature)
169
- if callable(ns):
170
- logger.info(f"Chamando {chat_ns}(messages=...)")
171
- return ns(messages=messages, max_new_tokens=max_new_tokens, temperature=temperature)
172
- except Exception as e:
173
- logger.debug("%s falhou: %s", chat_ns, e)
174
-
175
- # 3) tentar diretamente client.chat (que pelo debug pode existir como atributo com métodos internos)
176
  try:
177
- if hasattr(client, "chat"):
178
- chat_obj = getattr(client, "chat")
179
- # se chat_obj tem create
180
- if hasattr(chat_obj, "create"):
181
- logger.info("Chamando client.chat.create(messages=...)")
182
- return chat_obj.create(messages=messages, max_new_tokens=max_new_tokens, temperature=temperature)
183
- # se chat_obj tem chat_completion
184
- if hasattr(chat_obj, "chat_completion") and hasattr(chat_obj.chat_completion, "create"):
185
- logger.info("Chamando client.chat.chat_completion.create(messages=...)")
186
- return chat_obj.chat_completion.create(messages=messages, max_new_tokens=max_new_tokens, temperature=temperature)
187
- # se chat_obj é chamável
188
- if callable(chat_obj):
189
- logger.info("Chamando client.chat(messages=...)")
190
- return chat_obj(messages=messages, max_new_tokens=max_new_tokens, temperature=temperature)
191
- except Exception as e:
192
- logger.debug("client.chat path falhou: %s", e)
193
 
194
- # 4) gerar prompt concatenado e usar text_generation
195
  prompt = _messages_to_prompt(messages)
196
  try:
197
  if hasattr(client, "text_generation"):
198
- logger.info("Chamando client.text_generation(prompt=...)")
199
- return client.text_generation(prompt=prompt, max_new_tokens=max_new_tokens, temperature=temperature)
200
- # algumas versões usam .generate
201
  if hasattr(client, "generate") and callable(client.generate):
202
- logger.info("Chamando client.generate(prompt=...)")
203
- return client.generate(prompt=prompt, max_new_tokens=max_new_tokens)
204
- except Exception as e:
205
- logger.debug("text_generation/generate falhou: %s", e)
 
206
 
207
- # 5) última tentativa: procurar métodos nomeados úteis
208
  candidate_methods = [m for m in dir(client) if any(k in m for k in ("create", "generate", "complete", "run"))]
209
  for name in candidate_methods:
210
  try:
211
  method = getattr(client, name)
212
  if callable(method):
213
- try:
214
- # preferir named arg messages
215
- return method(messages=messages)
216
- except TypeError:
217
- try:
218
- return method(prompt)
219
- except Exception:
220
- try:
221
- return method(messages)
222
- except Exception:
223
- pass
224
  except Exception:
225
- pass
226
 
227
- # se chegou aqui, falhou
228
- debug = {
229
- "available_attrs": dir(client),
230
- "messages_sample": messages[:3]
231
- }
232
  raise RuntimeError(f"Não foi possível chamar o cliente HF com as assinaturas testadas. Debug: {debug}")
233
 
234
-
235
- # --------------------------
236
- # Função principal respond
237
- # --------------------------
238
-
239
- def respond(message: str, history: List[Dict[str, str]], system_message: str,
240
- max_tokens: int, temperature: float, top_p: float):
241
- # prepara mensagens no formato estateless
242
- messages: List[Dict[str, str]] = []
243
- messages.append({"role": "system", "content": system_message or ""})
244
- for h in history:
245
- # history contém dicts com 'role' e 'content' (Gradio state)
246
- messages.append({"role": h.get("role", "user"), "content": h.get("content", "")})
247
- messages.append({"role": "user", "content": message})
248
-
249
  try:
250
- # chama Llama (client_main)
251
- response_main_obj = call_model_with_messages(client_main, messages,
252
- max_new_tokens=max_tokens, temperature=temperature, top_p=top_p)
253
  response_main = _extract_text_from_response(response_main_obj)
 
254
 
255
- # Aux 1: FLAN-T5 - reformulação
 
256
  prompt_aux1 = f"Reformule este texto de forma clara e concisa:\n{response_main}"
257
  try:
258
- # tentar text_generation com client_aux1
259
- if hasattr(client_aux1, "text_generation"):
260
  res_a1 = client_aux1.text_generation(prompt=prompt_aux1, max_new_tokens=max(128, max_tokens // 4))
261
- elif hasattr(client_aux1, "completions") and hasattr(client_aux1.completions, "create"):
262
  res_a1 = client_aux1.completions.create(prompt=prompt_aux1, max_new_tokens=max(128, max_tokens // 4))
263
  else:
264
- # fallback simples
265
- res_a1 = client_aux1.text_generation(prompt=prompt_aux1, max_new_tokens=max(128, max_tokens // 4))
266
- response_aux1 = _extract_text_from_response(res_a1)
267
- except Exception as e:
268
- logger.exception("Erro no passo Aux1 (FLAN-T5): %s", e)
269
  response_aux1 = response_main
270
 
271
- # Aux 2: BART - resumo em 3 frases
 
272
  prompt_aux2 = f"Resuma este texto em 3 frases:\n{response_aux1}"
273
  try:
274
- if hasattr(client_aux2, "text_generation"):
275
  res_a2 = client_aux2.text_generation(prompt=prompt_aux2, max_new_tokens=150)
276
- elif hasattr(client_aux2, "completions") and hasattr(client_aux2.completions, "create"):
277
  res_a2 = client_aux2.completions.create(prompt=prompt_aux2, max_new_tokens=150)
278
  else:
279
- res_a2 = client_aux2.text_generation(prompt=prompt_aux2, max_new_tokens=150)
280
- response_aux2 = _extract_text_from_response(res_a2)
281
- except Exception as e:
282
- logger.exception("Erro no passo Aux2 (BART): %s", e)
 
283
  response_aux2 = response_aux1
284
 
285
  except Exception as e:
286
  tb = traceback.format_exc(limit=5)
287
- logger.exception("Erro ao gerar resposta principal: %s", e)
288
  response_aux2 = f"Erro ao gerar resposta: {e}\n\nTraceback (curto):\n{tb}"
289
-
290
- # atualiza histórico no formato Gradio Chatbot (user + assistant)
291
- history.append({"role": "user", "content": message})
292
- history.append({"role": "assistant", "content": response_aux2})
293
-
294
- # gradio espera retornar (chatbot, history) — neste app usamos o próprio history como chatbot
295
- return history, history
296
-
297
-
298
- # --------------------------
299
- # Interface Gradio
300
- # --------------------------
301
-
302
- with gr.Blocks() as demo:
303
- gr.Markdown("## 🤖 Chatbot em Cascata (Llama 3.1 + FLAN-T5 + BART) - Versão Corrigida")
304
-
305
- system_message = gr.Textbox(
306
- value="Você é um chatbot amigável e prestativo.",
307
- label="System Message"
308
- )
309
-
310
- chatbot = gr.Chatbot()
311
- msg = gr.Textbox(label="Digite sua mensagem")
312
- max_tokens = gr.Slider(50, 2048, 512, step=50, label="Max Tokens")
313
- temperature = gr.Slider(0.1, 1.0, 0.7, step=0.05, label="Temperature")
314
- top_p = gr.Slider(0.1, 1.0, 0.95, step=0.05, label="Top-p (nucleus sampling)")
315
-
316
- history = gr.State([])
317
-
318
- def handle_submit(message, history, system_message, max_tokens, temperature, top_p):
319
- return respond(message, history, system_message, max_tokens, temperature, top_p)
320
-
321
- msg.submit(
322
- handle_submit,
323
- inputs=[msg, history, system_message, max_tokens, temperature, top_p],
324
- outputs=[chatbot, history]
325
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
326
 
327
  if __name__ == "__main__":
328
  demo.launch()
 
1
+ # app.py
2
+ # Chatbot em cascata para Hugging Face Space / execução local
3
+ # - Llama 3.1 (entrada)
4
+ # - FLAN-T5 (reformulação)
5
+ # - BART (resumo em 3 frases)
6
+ #
7
+ # Requisitos (no Space): defina HF_TOKEN nos Secrets.
8
+ # Variáveis opcionais para troca de modelos:
9
+ # - LLAMA_MODEL (padrao: meta-llama/Llama-3.1-8B-Instruct)
10
+ # - AUX1_MODEL (padrao: google/flan-t5-large)
11
+ # - AUX2_MODEL (padrao: facebook/bart-large-cnn)
12
+ #
13
+ # Use: python app.py
14
+ # Recomendações: requirements.txt com gradio, huggingface-hub, transformers, accelerate, etc.
15
+
16
  import os
17
  import traceback
18
  import logging
19
+ from typing import List, Dict, Any, Tuple
20
 
21
  import gradio as gr
22
  from huggingface_hub import InferenceClient
23
 
24
+ # -------------------------
25
+ # Config / Logging
26
+ # -------------------------
27
+ logging.basicConfig(level=logging.INFO)
28
+ logger = logging.getLogger("cascade_chatbot")
29
+
30
  HF_TOKEN = os.environ.get("HF_TOKEN")
31
  DEFAULT_LLAMA_MODEL = os.environ.get("LLAMA_MODEL", "meta-llama/Llama-3.1-8B-Instruct")
32
  DEFAULT_AUX1 = os.environ.get("AUX1_MODEL", "google/flan-t5-large")
33
  DEFAULT_AUX2 = os.environ.get("AUX2_MODEL", "facebook/bart-large-cnn")
34
 
 
 
 
 
 
35
  if not HF_TOKEN:
36
+ logger.warning("HF_TOKEN não encontrado nas variáveis de ambiente. Configure nos Secrets do Space ou no ambiente local.")
37
 
38
+ # -------------------------
39
  # Inicializa clientes HF
40
+ # -------------------------
41
+ # Criamos clientes distintos por modelo para garantir independência de configuração
42
+ try:
43
+ client_main = InferenceClient(token=HF_TOKEN, model=DEFAULT_LLAMA_MODEL)
44
+ client_aux1 = InferenceClient(token=HF_TOKEN, model=DEFAULT_AUX1)
45
+ client_aux2 = InferenceClient(token=HF_TOKEN, model=DEFAULT_AUX2)
46
+ except Exception:
47
+ # falha na inicialização do client (token inválido, etc)
48
+ logger.exception("Falha ao inicializar InferenceClient(s). Verifique HF_TOKEN e nomes dos modelos.")
49
+ # Criar objetos None para evitar crash imediato; erros aparecerão ao tentar usar
50
+ client_main = None
51
+ client_aux1 = None
52
+ client_aux2 = None
53
+
54
+ # -------------------------
55
  # Helpers
56
+ # -------------------------
57
+ def _messages_to_prompt(messages: List[Dict[str, str]]) -> str:
58
+ lines = []
59
+ for m in messages:
60
+ role = m.get("role", "user")
61
+ content = m.get("content", "")
62
+ lines.append(f"{role.upper()}: {content}")
63
+ lines.append("ASSISTANT:")
64
+ return "\n".join(lines)
65
 
66
  def _extract_text_from_response(obj: Any) -> str:
 
 
 
 
67
  if obj is None:
68
  return ""
69
+ # Common attributes
 
70
  for attr in ("content", "text", "generated_text", "generation_text"):
71
  if hasattr(obj, attr):
72
  try:
73
+ v = getattr(obj, attr)
74
+ if isinstance(v, str):
75
+ return v
76
+ return str(v)
 
 
 
 
77
  except Exception:
78
  pass
79
+ # choices style
 
80
  try:
81
  choices = None
82
  if hasattr(obj, "choices"):
83
  choices = obj.choices
84
  elif isinstance(obj, dict) and "choices" in obj:
85
  choices = obj["choices"]
 
86
  if choices:
87
  first = choices[0]
 
88
  if isinstance(first, dict):
 
89
  if "message" in first and isinstance(first["message"], dict) and "content" in first["message"]:
90
  return first["message"]["content"]
 
91
  if "text" in first:
92
  return first["text"]
93
  if "content" in first:
94
  return first["content"]
 
95
  if hasattr(first, "message"):
96
  msg = first.message
97
  if isinstance(msg, dict) and "content" in msg:
 
100
  return first.text
101
  except Exception:
102
  pass
103
+ # generations
 
104
  try:
105
  if hasattr(obj, "generations") and len(obj.generations) > 0:
106
  g = obj.generations[0]
 
110
  return g.text
111
  except Exception:
112
  pass
113
+ # dict fallback
 
114
  try:
115
  if isinstance(obj, dict):
 
116
  for k in ("text", "content", "generated_text"):
117
  if k in obj and isinstance(obj[k], str):
118
  return obj[k]
119
  except Exception:
120
  pass
121
+ # last resort
 
122
  try:
123
  return str(obj)
124
  except Exception:
125
  return ""
126
 
127
+ # -------------------------
128
+ # Chamadas robustas ao InferenceClient
129
+ # -------------------------
 
 
 
 
 
 
 
 
 
 
 
 
130
  def call_model_with_messages(client: InferenceClient, messages: List[Dict[str, str]],
131
  max_new_tokens: int = 512, temperature: float = 0.7, top_p: float = 0.95) -> Any:
132
+ """
133
+ Tenta múltiplas assinaturas (chat_completion, client.chat, text_generation, etc).
134
+ Registra exceções completas para diagnóstico.
135
+ """
136
 
137
+ def try_call(method, /, *pos_args, **kw_args):
138
+ try:
139
+ # Não imprimir todo messages no log (pode ser grande) — resumir
140
+ safe_kw = {k: ("[MESSAGES]" if k == "messages" else v) for k, v in kw_args.items()}
141
+ logger.info("Tentando %s pos=%s kwargs=%s", getattr(method, "__name__", str(method)), pos_args, safe_kw)
142
+ return method(*pos_args, **kw_args)
143
+ except Exception:
144
+ logger.exception("Falha ao chamar %s", getattr(method, "__name__", str(method)))
145
+ return None
146
 
147
+ # Tentar obter nome do modelo (fallback)
148
+ model_name = getattr(client, "model", None) or DEFAULT_LLAMA_MODEL
149
+
150
+ # 1) chat_completion (método mais comum)
151
  try:
152
+ cc = getattr(client, "chat_completion", None)
153
+ if cc:
154
+ # a) cc(model=..., messages=...)
155
+ res = try_call(cc, model=model_name, messages=messages, max_new_tokens=max_new_tokens, temperature=temperature)
156
+ if res is not None:
157
+ return res
158
+ # b) cc(messages=..., model=...)
159
+ res = try_call(cc, messages=messages, model=model_name, max_new_tokens=max_new_tokens, temperature=temperature)
160
+ if res is not None:
161
+ return res
162
+ # c) cc.create(...)
163
+ if hasattr(cc, "create"):
164
+ res = try_call(cc.create, model=model_name, messages=messages, max_new_tokens=max_new_tokens, temperature=temperature)
165
+ if res is not None:
166
+ return res
167
+ # d) positional
168
+ res = try_call(cc, messages)
169
+ if res is not None:
170
+ return res
171
+ except Exception:
172
+ logger.exception("Erro no bloco chat_completion")
173
 
174
+ # 2) client.chat namespace
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
175
  try:
176
+ chat_ns = getattr(client, "chat", None)
177
+ if chat_ns:
178
+ if hasattr(chat_ns, "create"):
179
+ res = try_call(chat_ns.create, model=model_name, messages=messages, max_new_tokens=max_new_tokens, temperature=temperature)
180
+ if res is not None:
181
+ return res
182
+ if hasattr(chat_ns, "chat_completion") and hasattr(chat_ns.chat_completion, "create"):
183
+ res = try_call(chat_ns.chat_completion.create, model=model_name, messages=messages, max_new_tokens=max_new_tokens, temperature=temperature)
184
+ if res is not None:
185
+ return res
186
+ res = try_call(chat_ns, model_name, messages)
187
+ if res is not None:
188
+ return res
189
+ except Exception:
190
+ logger.exception("Erro no bloco chat namespace")
 
191
 
192
+ # 3) text_generation (fallback)
193
  prompt = _messages_to_prompt(messages)
194
  try:
195
  if hasattr(client, "text_generation"):
196
+ res = try_call(client.text_generation, prompt=prompt, max_new_tokens=max_new_tokens, temperature=temperature)
197
+ if res is not None:
198
+ return res
199
  if hasattr(client, "generate") and callable(client.generate):
200
+ res = try_call(client.generate, prompt=prompt, max_new_tokens=max_new_tokens)
201
+ if res is not None:
202
+ return res
203
+ except Exception:
204
+ logger.exception("Erro no bloco text_generation/generate")
205
 
206
+ # 4) última tentativa: explorar métodos candidatos
207
  candidate_methods = [m for m in dir(client) if any(k in m for k in ("create", "generate", "complete", "run"))]
208
  for name in candidate_methods:
209
  try:
210
  method = getattr(client, name)
211
  if callable(method):
212
+ res = try_call(method, messages=messages)
213
+ if res is not None:
214
+ return res
215
+ res = try_call(method, prompt)
216
+ if res is not None:
217
+ return res
218
+ res = try_call(method, messages)
219
+ if res is not None:
220
+ return res
 
 
221
  except Exception:
222
+ logger.exception("Erro testando candidato %s", name)
223
 
224
+ # falhou todas as tentativas
225
+ debug = {"available_attrs": dir(client), "messages_sample": messages[:3]}
226
+ logger.error("Todas as tentativas falharam. Debug: %s", debug)
 
 
227
  raise RuntimeError(f"Não foi possível chamar o cliente HF com as assinaturas testadas. Debug: {debug}")
228
 
229
+ # -------------------------
230
+ # Pipeline: Llama -> FLAN -> BART
231
+ # -------------------------
232
+ def pipeline_cascade(user_message: str, system_message: str,
233
+ max_tokens: int, temperature: float, top_p: float) -> Tuple[str, List[str]]:
234
+ """
235
+ Executa a cascata: Llama (client_main) -> FLAN (client_aux1) -> BART (client_aux2).
236
+ Retorna o texto final e um log de passos.
237
+ """
238
+ logs = []
239
+ # Monta mensagens
240
+ messages = [{"role": "system", "content": system_message or ""}, {"role": "user", "content": user_message}]
 
 
 
241
  try:
242
+ logs.append("1) Chamando Llama (entrada)")
243
+ response_main_obj = call_model_with_messages(client_main, messages, max_new_tokens=max_tokens, temperature=temperature, top_p=top_p)
 
244
  response_main = _extract_text_from_response(response_main_obj)
245
+ logs.append(f"-> Llama respondeu (resumo): {response_main[:300]}")
246
 
247
+ # Aux1: FLAN-T5 - reformular
248
+ logs.append("2) Chamando FLAN-T5 (reformular)")
249
  prompt_aux1 = f"Reformule este texto de forma clara e concisa:\n{response_main}"
250
  try:
251
+ if client_aux1 and hasattr(client_aux1, "text_generation"):
 
252
  res_a1 = client_aux1.text_generation(prompt=prompt_aux1, max_new_tokens=max(128, max_tokens // 4))
253
+ elif client_aux1 and hasattr(client_aux1, "completions") and hasattr(client_aux1.completions, "create"):
254
  res_a1 = client_aux1.completions.create(prompt=prompt_aux1, max_new_tokens=max(128, max_tokens // 4))
255
  else:
256
+ res_a1 = None
257
+ response_aux1 = _extract_text_from_response(res_a1) if res_a1 is not None else response_main
258
+ logs.append(f"-> FLAN-T5 respondeu (resumo): {response_aux1[:300]}")
259
+ except Exception:
260
+ logs.append("FLAN-T5 falhou; usando resposta do Llama")
261
  response_aux1 = response_main
262
 
263
+ # Aux2: BART - resumo em 3 frases
264
+ logs.append("3) Chamando BART (resumo em 3 frases)")
265
  prompt_aux2 = f"Resuma este texto em 3 frases:\n{response_aux1}"
266
  try:
267
+ if client_aux2 and hasattr(client_aux2, "text_generation"):
268
  res_a2 = client_aux2.text_generation(prompt=prompt_aux2, max_new_tokens=150)
269
+ elif client_aux2 and hasattr(client_aux2, "completions") and hasattr(client_aux2.completions, "create"):
270
  res_a2 = client_aux2.completions.create(prompt=prompt_aux2, max_new_tokens=150)
271
  else:
272
+ res_a2 = None
273
+ response_aux2 = _extract_text_from_response(res_a2) if res_a2 is not None else response_aux1
274
+ logs.append(f"-> BART respondeu (resumo): {response_aux2[:300]}")
275
+ except Exception:
276
+ logs.append("BART falhou; usando resposta do passo anterior")
277
  response_aux2 = response_aux1
278
 
279
  except Exception as e:
280
  tb = traceback.format_exc(limit=5)
281
+ logger.exception("Erro pipeline principal: %s", e)
282
  response_aux2 = f"Erro ao gerar resposta: {e}\n\nTraceback (curto):\n{tb}"
283
+ logs.append("Erro no pipeline: " + str(e))
284
+
285
+ return response_aux2, logs
286
+
287
+ # -------------------------
288
+ # Gradio App
289
+ # -------------------------
290
+ with gr.Blocks(title="Chatbot em Cascata - Llama + FLAN + BART") as demo:
291
+ gr.Markdown("## 🤖 Chatbot em Cascata\n"
292
+ "Fluxo: **Llama (entrada)** → **FLAN-T5 (reformulação)** → **BART (resumo em 3 frases)**\n\n"
293
+ "Antes de rodar, confirme que `HF_TOKEN` está definido nos Secrets do Space.")
294
+
295
+ with gr.Row():
296
+ with gr.Column(scale=2):
297
+ system_message = gr.Textbox(value="Você é um chatbot amigável e prestativo.",
298
+ label="System Message", lines=2)
299
+ chatbot = gr.Chatbot(label="Chat")
300
+ user_input = gr.Textbox(label="Digite sua mensagem", placeholder="Digite aqui...")
301
+ max_tokens = gr.Slider(50, 2048, value=512, step=50, label="Max Tokens")
302
+ temperature = gr.Slider(0.0, 1.0, value=0.7, step=0.05, label="Temperature")
303
+ top_p = gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)")
304
+
305
+ history = gr.State([])
306
+
307
+ def submit_handler(msg, history, system_message, max_tokens, temperature, top_p):
308
+ # roda pipeline e atualiza histórico
309
+ out_text, logs = pipeline_cascade(msg, system_message, int(max_tokens), float(temperature), float(top_p))
310
+ history.append({"role": "user", "content": msg})
311
+ history.append({"role": "assistant", "content": out_text})
312
+ # exibimos também logs no console (útil)
313
+ logger.info("Pipeline logs:\n%s", "\n".join(logs))
314
+ return history, history
315
+
316
+ user_input.submit(submit_handler,
317
+ inputs=[user_input, history, system_message, max_tokens, temperature, top_p],
318
+ outputs=[chatbot, history])
319
+
320
+ btn_send = gr.Button("Enviar")
321
+ btn_send.click(submit_handler,
322
+ inputs=[user_input, history, system_message, max_tokens, temperature, top_p],
323
+ outputs=[chatbot, history])
324
+
325
+ with gr.Column(scale=1):
326
+ gr.Markdown("### Model Info & Config (dentro do app)\n"
327
+ "Este painel documenta os modelos usados e as configurações (exigência do trabalho).")
328
+
329
+ model_info_md = f"""
330
+ **Modelos usados (mínimo 3):**
331
+
332
+ - Llama (input): `{DEFAULT_LLAMA_MODEL}`
333
+ - Aux 1 (reformulação): `{DEFAULT_AUX1}`
334
+ - Aux 2 (resumo): `{DEFAULT_AUX2}`
335
+
336
+ **Como foram configurados:**
337
+
338
+ - Cada modelo é instanciado via `InferenceClient(token=HF_TOKEN, model=<model_name>)`.
339
+ - Chamadas preferenciais:
340
+ - Para chat: `client.chat_completion(messages=..., model=...)` (quando disponível)
341
+ - Fallback: `client.text_generation(prompt=...)`
342
+ - Ajustes de inferência controlados pelo usuário: `max_tokens`, `temperature`, `top_p`.
343
+ - Logs de diagnóstico são gravados (úteis se houver erros de assinatura/permissão).
344
+ """
345
+ gr.Markdown(model_info_md)
346
+
347
+ # Self-test: roda testes com mensagens predefinidas e mostra o resultado
348
+ test_output = gr.Textbox(label="Resultado do Self-Test", lines=12, interactive=False)
349
+
350
+ def run_self_test(system_message, max_tokens, temperature, top_p):
351
+ msgs = [
352
+ "Explique resumidamente o que é a técnica de regressão linear.",
353
+ "Resuma em 1 frase as vantagens de usar validação cruzada.",
354
+ "Como posso autenticar usuários em uma aplicação web?"
355
+ ]
356
+ accumulated = []
357
+ for m in msgs:
358
+ out, logs = pipeline_cascade(m, system_message, int(max_tokens), float(temperature), float(top_p))
359
+ accumulated.append("INPUT: " + m)
360
+ accumulated.append("OUTPUT: " + out)
361
+ accumulated.append("LOGS: " + " | ".join(logs))
362
+ accumulated.append("-" * 40)
363
+ return "\n".join(accumulated)
364
+
365
+ btn_test = gr.Button("Run self-test")
366
+ btn_test.click(run_self_test, inputs=[system_message, max_tokens, temperature, top_p], outputs=[test_output])
367
+
368
+ gr.Markdown("### Dicas de deploy\n"
369
+ "- Defina `HF_TOKEN` nos Secrets do Space.\n"
370
+ "- Use um runtime com GPU se disponível (modelos grandes exigem mais recursos).\n"
371
+ "- Verifique permissões do modelo (alguns modelos exigem permissões específicas).")
372
 
373
  if __name__ == "__main__":
374
  demo.launch()