kawre commited on
Commit
0a7cdb1
·
verified ·
1 Parent(s): d0cfe92

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +118 -50
app.py CHANGED
@@ -1,6 +1,7 @@
1
  import os
2
  import gradio as gr
3
  from huggingface_hub import InferenceClient
 
4
 
5
  # Pega o token do Hugging Face dos Secrets
6
  HF_TOKEN = os.environ.get("HF_TOKEN")
@@ -10,87 +11,153 @@ client_main = InferenceClient(token=HF_TOKEN, model="meta-llama/Llama-3.1-8B-Ins
10
  client_aux1 = InferenceClient(token=HF_TOKEN, model="google/flan-t5-large")
11
  client_aux2 = InferenceClient(token=HF_TOKEN, model="facebook/bart-large-cnn")
12
 
13
- # Função utilitária para extrair texto gerado de objetos de resposta variados
14
  def _extract_text_from_response(obj):
15
- # tenta algumas formas comuns de resposta dependendo da versão do SDK/backend
16
  if obj is None:
17
  return ""
18
- # caso: objeto com atributo 'content'
19
  if hasattr(obj, "content"):
20
  try:
21
  return obj.content
22
  except Exception:
23
  pass
24
- # caso: objeto com atributo 'generated_text'
 
 
 
 
25
  if hasattr(obj, "generated_text"):
26
  try:
27
  return obj.generated_text
28
  except Exception:
29
  pass
30
- # caso: resposta no estilo choices -> choices[0].message["content"]
31
  try:
32
  if hasattr(obj, "choices") and len(obj.choices) > 0:
33
  choice = obj.choices[0]
34
- # se for um dict-like
35
- if isinstance(choice, dict) and "message" in choice and "content" in choice["message"]:
36
  return choice["message"]["content"]
37
- # se choice tiver atributo 'message'
38
- if hasattr(choice, "message") and isinstance(choice.message, dict) and "content" in choice.message:
39
- return choice.message["content"]
 
 
 
 
40
  except Exception:
41
  pass
42
- # fallback: str()
43
- return str(obj)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
 
45
  # Função principal de resposta
46
  def respond(message, history, system_message, max_tokens, temperature, top_p):
47
  try:
48
- # --- Passo 1: Llama 3.1 via chamada stateless com lista de messages ---
49
- # Monta lista de mensagens (system + histórico + user atual)
50
  messages = []
51
  messages.append({"role": "system", "content": system_message or ""})
52
  for h in history:
53
- # espera itens do histórico no formato {"role": "user"/"assistant", "content": "..."}
54
- role = h.get("role", "user")
55
- content = h.get("content", "")
56
- messages.append({"role": role, "content": content})
57
  messages.append({"role": "user", "content": message})
58
 
59
- # Envia as mensagens para o chat do Llama (stateless)
60
- # Observação: alguns backends aceitam send_message(messages=...), outros aceitam send_message() após add_message.
61
- # Aqui tentamos enviar a lista diretamente.
62
  chat_proxy = client_main.chat
63
- try:
64
- # tentativa principal: enviar mensagens diretamente
65
- response_main_obj = chat_proxy.send_message(
66
- messages=messages,
67
- max_new_tokens=max_tokens,
68
- temperature=temperature,
69
- top_p=top_p
70
- )
71
- except TypeError:
72
- # se a assinatura não aceitar messages=..., tentamos criar um novo chat proxy e adicionar mensagens manualmente
73
- # (nem todos os ProxyClientChat expõem criação limpa; então adicionamos e depois geramos)
74
- # Este bloco tenta usar add_message() sequencialmente.
75
- # Nota: se add_message falhar, cairá no except geral abaixo.
76
- for msg_item in messages:
77
- chat_proxy.add_message(msg_item["role"], msg_item["content"])
78
- response_main_obj = chat_proxy.send_message(
79
- max_new_tokens=max_tokens,
80
- temperature=temperature,
81
- top_p=top_p
82
- )
83
-
84
  response_main = _extract_text_from_response(response_main_obj)
85
 
86
- # --- Passo 2: FLAN-T5 (reformulação) ---
87
  result_aux1 = client_aux1.text_generation(
88
  prompt=f"Reformule este texto de forma clara e concisa:\n{response_main}",
89
  max_new_tokens=max_tokens
90
  )
91
  response_aux1 = _extract_text_from_response(result_aux1)
92
 
93
- # --- Passo 3: BART (resumo em 3 frases) ---
94
  result_aux2 = client_aux2.text_generation(
95
  prompt=f"Resuma este texto em 3 frases:\n{response_aux1}",
96
  max_new_tokens=150
@@ -98,8 +165,9 @@ def respond(message, history, system_message, max_tokens, temperature, top_p):
98
  response_aux2 = _extract_text_from_response(result_aux2)
99
 
100
  except Exception as e:
101
- # Mensagem de erro amigável para o usuário (mantemos o traceback curto)
102
- response_aux2 = f"Erro ao gerar resposta: {e}"
 
103
 
104
  # Atualiza histórico no formato Gradio Chatbot
105
  history.append({"role": "user", "content": message})
@@ -112,7 +180,7 @@ with gr.Blocks() as demo:
112
  gr.Markdown("## 🤖 Chatbot em Cascata (Llama 3.1 + FLAN-T5 + BART)")
113
 
114
  system_message = gr.Textbox(
115
- value="Você é um chatbot amigável e prestativo.",
116
  label="System Message"
117
  )
118
 
@@ -121,15 +189,15 @@ with gr.Blocks() as demo:
121
  max_tokens = gr.Slider(50, 2048, 512, step=50, label="Max Tokens")
122
  temperature = gr.Slider(0.1, 1.0, 0.7, step=0.05, label="Temperature")
123
  top_p = gr.Slider(0.1, 1.0, 0.95, step=0.05, label="Top-p (nucleus sampling)")
124
-
125
  history = gr.State([])
126
 
127
  def handle_submit(message, history, system_message, max_tokens, temperature, top_p):
128
  return respond(message, history, system_message, max_tokens, temperature, top_p)
129
 
130
  msg.submit(
131
- handle_submit,
132
- inputs=[msg, history, system_message, max_tokens, temperature, top_p],
133
  outputs=[chatbot, history]
134
  )
135
 
 
1
  import os
2
  import gradio as gr
3
  from huggingface_hub import InferenceClient
4
+ import traceback
5
 
6
  # Pega o token do Hugging Face dos Secrets
7
  HF_TOKEN = os.environ.get("HF_TOKEN")
 
11
  client_aux1 = InferenceClient(token=HF_TOKEN, model="google/flan-t5-large")
12
  client_aux2 = InferenceClient(token=HF_TOKEN, model="facebook/bart-large-cnn")
13
 
14
+ # Extrai texto de objetos de resposta variados
15
  def _extract_text_from_response(obj):
 
16
  if obj is None:
17
  return ""
18
+ # atributos comuns
19
  if hasattr(obj, "content"):
20
  try:
21
  return obj.content
22
  except Exception:
23
  pass
24
+ if hasattr(obj, "text"):
25
+ try:
26
+ return obj.text
27
+ except Exception:
28
+ pass
29
  if hasattr(obj, "generated_text"):
30
  try:
31
  return obj.generated_text
32
  except Exception:
33
  pass
34
+ # choices style
35
  try:
36
  if hasattr(obj, "choices") and len(obj.choices) > 0:
37
  choice = obj.choices[0]
38
+ # dict-like
39
+ if isinstance(choice, dict) and "message" in choice and isinstance(choice["message"], dict) and "content" in choice["message"]:
40
  return choice["message"]["content"]
41
+ # object-like
42
+ if hasattr(choice, "message"):
43
+ msg = choice.message
44
+ if isinstance(msg, dict) and "content" in msg:
45
+ return msg["content"]
46
+ if hasattr(msg, "get") and msg.get("content"):
47
+ return msg.get("content")
48
  except Exception:
49
  pass
50
+ # last resort
51
+ try:
52
+ return str(obj)
53
+ except Exception:
54
+ return ""
55
+
56
+ # Função que tenta várias formas de usar o chat do Llama
57
+ def _call_llama_chat_stateless(client_chat_proxy, messages, max_new_tokens, temperature, top_p):
58
+ """
59
+ Tenta (em ordem):
60
+ 1) client_chat_proxy(messages=..., max_new_tokens=..., ...)
61
+ 2) client_chat_proxy.chat_completion(messages=..., ...) or client_chat_proxy.create(...)
62
+ 3) adicionar messages via add_message() + client_chat_proxy.generate()/send() (se suportado)
63
+ 4) procura por métodos alternativos (chat_completion, create, generate, run) e tenta-os
64
+ Retorna o objeto de resposta ou levanta Exception com debug info.
65
+ """
66
+ # 1) chamada direta se proxy for chamável
67
+ if callable(client_chat_proxy):
68
+ try:
69
+ return client_chat_proxy(messages=messages, max_new_tokens=max_new_tokens, temperature=temperature, top_p=top_p)
70
+ except TypeError:
71
+ # talvez aceita positional args
72
+ try:
73
+ return client_chat_proxy(messages, max_new_tokens, temperature, top_p)
74
+ except Exception:
75
+ pass
76
+ except Exception:
77
+ pass
78
+
79
+ # 2) tenta métodos conhecidos
80
+ for method_name in ("send_message", "send", "create", "generate", "run", "complete", "chat_completion", "chat_complete"):
81
+ if hasattr(client_chat_proxy, method_name):
82
+ method = getattr(client_chat_proxy, method_name)
83
+ try:
84
+ # muitas variantes: preferimos passar messages=...
85
+ return method(messages=messages, max_new_tokens=max_new_tokens, temperature=temperature, top_p=top_p)
86
+ except TypeError:
87
+ # talvez aceite apenas positional
88
+ try:
89
+ return method(messages, max_new_tokens, temperature, top_p)
90
+ except Exception:
91
+ # tentar sem argumentos (algumas impls usam add_message + send)
92
+ pass
93
+ except Exception:
94
+ # se falhar, tenta próxima opção
95
+ pass
96
+
97
+ # 3) tenta add_message() sequencial e depois gerar
98
+ if hasattr(client_chat_proxy, "add_message") and hasattr(client_chat_proxy, "send_message") :
99
+ try:
100
+ # adiciona sequencialmente
101
+ for m in messages:
102
+ client_chat_proxy.add_message(m["role"], m["content"])
103
+ return client_chat_proxy.send_message(max_new_tokens=max_new_tokens, temperature=temperature, top_p=top_p)
104
+ except Exception:
105
+ pass
106
+
107
+ if hasattr(client_chat_proxy, "add_message") and hasattr(client_chat_proxy, "generate"):
108
+ try:
109
+ for m in messages:
110
+ client_chat_proxy.add_message(m["role"], m["content"])
111
+ return client_chat_proxy.generate(max_new_tokens=max_new_tokens, temperature=temperature, top_p=top_p)
112
+ except Exception:
113
+ pass
114
+
115
+ # 4) última tentativa: procurar métodos parecidos e tentar chamá-los com only messages
116
+ candidate_methods = [m for m in dir(client_chat_proxy) if any(k in m for k in ("send", "create", "generate", "run", "complete"))]
117
+ for name in candidate_methods:
118
+ try:
119
+ method = getattr(client_chat_proxy, name)
120
+ if callable(method):
121
+ try:
122
+ return method(messages=messages)
123
+ except TypeError:
124
+ try:
125
+ return method(messages)
126
+ except Exception:
127
+ pass
128
+ except Exception:
129
+ pass
130
+
131
+ # Se chegou aqui, falhou — lançar erro com debug
132
+ debug = {
133
+ "available_attrs": dir(client_chat_proxy),
134
+ "messages_sample": messages[:3]
135
+ }
136
+ raise RuntimeError(f"Não foi possível chamar o chat proxy do Llama com as assinaturas testadas. Debug: {debug}")
137
 
138
  # Função principal de resposta
139
  def respond(message, history, system_message, max_tokens, temperature, top_p):
140
  try:
141
+ # Monta lista de messages (stateless)
 
142
  messages = []
143
  messages.append({"role": "system", "content": system_message or ""})
144
  for h in history:
145
+ messages.append({"role": h.get("role", "user"), "content": h.get("content", "")})
 
 
 
146
  messages.append({"role": "user", "content": message})
147
 
148
+ # Chama o chat do Llama tentando várias assinaturas
 
 
149
  chat_proxy = client_main.chat
150
+ response_main_obj = _call_llama_chat_stateless(chat_proxy, messages, max_tokens, temperature, top_p)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151
  response_main = _extract_text_from_response(response_main_obj)
152
 
153
+ # Passo 2: FLAN-T5 (reformulação)
154
  result_aux1 = client_aux1.text_generation(
155
  prompt=f"Reformule este texto de forma clara e concisa:\n{response_main}",
156
  max_new_tokens=max_tokens
157
  )
158
  response_aux1 = _extract_text_from_response(result_aux1)
159
 
160
+ # Passo 3: BART (resumo em 3 frases)
161
  result_aux2 = client_aux2.text_generation(
162
  prompt=f"Resuma este texto em 3 frases:\n{response_aux1}",
163
  max_new_tokens=150
 
165
  response_aux2 = _extract_text_from_response(result_aux2)
166
 
167
  except Exception as e:
168
+ # enviar traceback curto para depuração útil para o próximo ajuste
169
+ tb = traceback.format_exc(limit=5)
170
+ response_aux2 = f"Erro ao gerar resposta: {e}\n\nTraceback (curto):\n{tb}"
171
 
172
  # Atualiza histórico no formato Gradio Chatbot
173
  history.append({"role": "user", "content": message})
 
180
  gr.Markdown("## 🤖 Chatbot em Cascata (Llama 3.1 + FLAN-T5 + BART)")
181
 
182
  system_message = gr.Textbox(
183
+ value="Você é um chatbot amigável e prestativo.",
184
  label="System Message"
185
  )
186
 
 
189
  max_tokens = gr.Slider(50, 2048, 512, step=50, label="Max Tokens")
190
  temperature = gr.Slider(0.1, 1.0, 0.7, step=0.05, label="Temperature")
191
  top_p = gr.Slider(0.1, 1.0, 0.95, step=0.05, label="Top-p (nucleus sampling)")
192
+
193
  history = gr.State([])
194
 
195
  def handle_submit(message, history, system_message, max_tokens, temperature, top_p):
196
  return respond(message, history, system_message, max_tokens, temperature, top_p)
197
 
198
  msg.submit(
199
+ handle_submit,
200
+ inputs=[msg, history, system_message, max_tokens, temperature, top_p],
201
  outputs=[chatbot, history]
202
  )
203