Spaces:
Sleeping
Sleeping
File size: 1,453 Bytes
8c337f2 e66ba96 32fce34 e66ba96 8c337f2 32fce34 8c337f2 32fce34 8c337f2 32fce34 8c337f2 32fce34 8c337f2 32fce34 8c337f2 32fce34 8c337f2 e66ba96 8c337f2 32fce34 2e2c018 8c337f2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 |
import gradio as gr
from huggingface_hub import InferenceClient
def respond(message, history, system_message, max_tokens, temperature, top_p, hf_token):
# Inicializa os 3 clientes
client_main = InferenceClient(token=hf_token, model="meta-llama/Llama-3.1-8B-Instruct")
client_aux1 = InferenceClient(token=hf_token, model="google/flan-t5-large")
client_aux2 = InferenceClient(token=hf_token, model="facebook/bart-large-cnn")
# Histórico e system message
messages = [{"role": "system", "content": system_message}]
messages.extend(history)
messages.append({"role": "user", "content": message})
# Passo 1: Llama 3.1
response_main = client_main.text_generation(inputs=message, max_tokens=max_tokens)
# Passo 2: Aux1
response_aux1 = client_aux1.text_generation(inputs=response_main, max_new_tokens=max_tokens)
# Passo 3: Aux2
response_aux2 = client_aux2.text_generation(inputs=response_aux1, max_new_tokens=max_tokens)
return response_aux2
# Interface Gradio
chatbot = gr.ChatInterface(
fn=respond,
additional_inputs=[
gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
gr.Slider(1, 2048, 512, label="Max new tokens"),
gr.Slider(0.1, 4.0, 0.7, label="Temperature"),
gr.Slider(0.1, 1.0, 0.95, label="Top-p (nucleus sampling)"),
],
)
with gr.Blocks() as demo:
chatbot.render()
if __name__ == "__main__":
demo.launch()
|