import gradio as gr from huggingface_hub import InferenceClient def respond(message, history, system_message, max_tokens, temperature, top_p, hf_token): # Inicializa os 3 clientes client_main = InferenceClient(token=hf_token, model="meta-llama/Llama-3.1-8B-Instruct") client_aux1 = InferenceClient(token=hf_token, model="google/flan-t5-large") client_aux2 = InferenceClient(token=hf_token, model="facebook/bart-large-cnn") # Histórico e system message messages = [{"role": "system", "content": system_message}] messages.extend(history) messages.append({"role": "user", "content": message}) # Passo 1: Llama 3.1 response_main = client_main.text_generation(inputs=message, max_tokens=max_tokens) # Passo 2: Aux1 response_aux1 = client_aux1.text_generation(inputs=response_main, max_new_tokens=max_tokens) # Passo 3: Aux2 response_aux2 = client_aux2.text_generation(inputs=response_aux1, max_new_tokens=max_tokens) return response_aux2 # Interface Gradio chatbot = gr.ChatInterface( fn=respond, additional_inputs=[ gr.Textbox(value="You are a friendly Chatbot.", label="System message"), gr.Slider(1, 2048, 512, label="Max new tokens"), gr.Slider(0.1, 4.0, 0.7, label="Temperature"), gr.Slider(0.1, 1.0, 0.95, label="Top-p (nucleus sampling)"), ], ) with gr.Blocks() as demo: chatbot.render() if __name__ == "__main__": demo.launch()