kawre commited on
Commit
e48436e
·
verified ·
1 Parent(s): e66ba96

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -24
app.py CHANGED
@@ -1,41 +1,66 @@
 
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
 
4
- def respond(message, history, system_message, max_tokens, temperature, top_p, hf_token):
5
- # Inicializa os 3 clientes
6
- client_main = InferenceClient(token=hf_token, model="meta-llama/Llama-3.1-8B-Instruct")
7
- client_aux1 = InferenceClient(token=hf_token, model="google/flan-t5-large")
8
- client_aux2 = InferenceClient(token=hf_token, model="facebook/bart-large-cnn")
9
 
10
- # Histórico e system message
 
 
 
 
 
 
 
11
  messages = [{"role": "system", "content": system_message}]
12
  messages.extend(history)
13
  messages.append({"role": "user", "content": message})
14
 
15
- # Passo 1: Llama 3.1
16
- response_main = client_main.text_generation(inputs=message, max_tokens=max_tokens)
17
 
18
- # Passo 2: Aux1
19
- response_aux1 = client_aux1.text_generation(inputs=response_main, max_new_tokens=max_tokens)
 
 
 
 
 
20
 
21
- # Passo 3: Aux2
22
- response_aux2 = client_aux2.text_generation(inputs=response_aux1, max_new_tokens=max_tokens)
 
 
 
23
 
24
- return response_aux2
 
 
 
 
25
 
26
- # Interface Gradio
27
- chatbot = gr.ChatInterface(
28
- fn=respond,
29
- additional_inputs=[
30
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
31
- gr.Slider(1, 2048, 512, label="Max new tokens"),
32
- gr.Slider(0.1, 4.0, 0.7, label="Temperature"),
33
- gr.Slider(0.1, 1.0, 0.95, label="Top-p (nucleus sampling)"),
34
- ],
35
- )
36
 
 
37
  with gr.Blocks() as demo:
38
- chatbot.render()
 
 
 
 
 
 
 
 
 
 
 
 
 
39
 
40
  if __name__ == "__main__":
41
  demo.launch()
 
1
+ import os
2
  import gradio as gr
3
  from huggingface_hub import InferenceClient
4
 
5
+ # Pega o token do Hugging Face dos Secrets
6
+ HF_TOKEN = os.environ.get("HF_TOKEN")
 
 
 
7
 
8
+ # Inicializa os clientes dos modelos
9
+ client_main = InferenceClient(token=HF_TOKEN, model="meta-llama/Llama-3.1-8B-Instruct")
10
+ client_aux1 = InferenceClient(token=HF_TOKEN, model="google/flan-t5-large")
11
+ client_aux2 = InferenceClient(token=HF_TOKEN, model="facebook/bart-large-cnn")
12
+
13
+ # Função principal de resposta
14
+ def respond(message, history, system_message, max_tokens, temperature, top_p):
15
+ # Constrói o histórico completo com system message
16
  messages = [{"role": "system", "content": system_message}]
17
  messages.extend(history)
18
  messages.append({"role": "user", "content": message})
19
 
20
+ full_prompt = "\n".join([m["content"] for m in messages])
 
21
 
22
+ # Passo 1: Llama 3.1 - geração principal
23
+ response_main = client_main.text_generation(
24
+ inputs=full_prompt,
25
+ max_tokens=max_tokens,
26
+ temperature=temperature,
27
+ top_p=top_p
28
+ ).generated_text
29
 
30
+ # Passo 2: FLAN-T5 - reformulação
31
+ response_aux1 = client_aux1.text_generation(
32
+ inputs=f"Reformule este texto de forma clara e objetiva:\n{response_main}",
33
+ max_new_tokens=max_tokens
34
+ ).generated_text
35
 
36
+ # Passo 3: BART - resumo
37
+ response_aux2 = client_aux2.text_generation(
38
+ inputs=f"Resuma este texto em 3 frases:\n{response_aux1}",
39
+ max_new_tokens=150
40
+ ).generated_text
41
 
42
+ # Atualiza histórico do chat
43
+ history.append({"role": "user", "content": message})
44
+ history.append({"role": "assistant", "content": response_aux2})
45
+
46
+ return response_aux2, history
 
 
 
 
 
47
 
48
+ # Interface Gradio
49
  with gr.Blocks() as demo:
50
+ gr.Markdown("## 🤖 Chatbot em Cascata (Llama 3.1 + FLAN-T5 + BART)")
51
+ system_message = gr.Textbox(value="Você é um chatbot amigável e prestativo.", label="System Message")
52
+ chatbot = gr.Chatbot()
53
+ msg = gr.Textbox(label="Digite sua mensagem")
54
+ max_tokens = gr.Slider(50, 2048, 512, step=50, label="Max Tokens")
55
+ temperature = gr.Slider(0.1, 1.0, 0.7, step=0.05, label="Temperature")
56
+ top_p = gr.Slider(0.1, 1.0, 0.95, step=0.05, label="Top-p (nucleus sampling)")
57
+
58
+ history = gr.State([])
59
+
60
+ def handle_submit(message, history, system_message, max_tokens, temperature, top_p):
61
+ return respond(message, history, system_message, max_tokens, temperature, top_p)
62
+
63
+ msg.submit(handle_submit, inputs=[msg, history, system_message, max_tokens, temperature, top_p], outputs=[chatbot, history])
64
 
65
  if __name__ == "__main__":
66
  demo.launch()