Spaces:

LiKenun
/

ai-building-blocks

Running on Zero

App Files Files Community

LiKenun commited on Nov 3

Commit

4c71b8b

1 Parent(s): bb6107f

Add AI-generated chat sample

Browse files

Files changed (4) hide show

app.py +28 -0
chatbot.py +66 -0
image_to_text.py +4 -1
text_to_speech.py +6 -2

app.py CHANGED Viewed

@@ -3,6 +3,7 @@ from functools import partial
 import gradio as gr
 from huggingface_hub import InferenceClient
 from automatic_speech_recognition import automatic_speech_recognition
 from image_classification import image_classification
 from image_to_text import image_to_text
 from text_to_image import text_to_image
@@ -91,6 +92,33 @@ class App:
                         inputs=audio_transcription_audio_input,
                         outputs=audio_transcription_output
                     )
             demo.launch()

 import gradio as gr
 from huggingface_hub import InferenceClient
 from automatic_speech_recognition import automatic_speech_recognition
+from chatbot import chat
 from image_classification import image_classification
 from image_to_text import image_to_text
 from text_to_image import text_to_image
                         inputs=audio_transcription_audio_input,
                         outputs=audio_transcription_output
                     )
+                with gr.Tab("Chat"):
+                    gr.Markdown("Have a conversation with an AI chatbot.")
+                    chatbot_history = gr.State(value=None) # Store the conversation history.
+                    chatbot_output = gr.Chatbot(label="Conversation")
+                    chatbot_input = gr.Textbox(label="Your message")
+                    chatbot_send_button = gr.Button("Send")
+                    def chat_interface(message: str, history: list | None, conversation_state: list[dict] | None):
+                        """Handle chatbot interaction with Gradio chat format."""
+                        if not message.strip():
+                            return history, conversation_state, ""
+                        response, updated_conversation = chat(message, conversation_state) # Get response from chatbot.
+                        if history is None: # Update Gradio chat history format: list of [user_message, bot_message] pairs.
+                            history = []
+                        history.append([message, response])
+                        return history, updated_conversation, "" # Clear input field for the next message from the user.
+                    chatbot_send_button.click(
+                        fn=chat_interface,
+                        inputs=[chatbot_input, chatbot_output, chatbot_history],
+                        outputs=[chatbot_output, chatbot_history, chatbot_input]
+                    )
+                    chatbot_input.submit(
+                        fn=chat_interface,
+                        inputs=[chatbot_input, chatbot_output, chatbot_history],
+                        outputs=[chatbot_output, chatbot_history, chatbot_input]
+                    )
             demo.launch()

chatbot.py ADDED Viewed

	@@ -0,0 +1,66 @@

+from os import getenv
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+from utils import get_pytorch_device, spaces_gpu
+# Global chatbot instance (initialized once)
+_chatbot = None
+_tokenizer = None
+def get_chatbot():
+    global _chatbot, _tokenizer
+    if _chatbot is None:
+        model_id = getenv("CHAT_MODEL")
+        device = get_pytorch_device()
+        _tokenizer = AutoTokenizer.from_pretrained(model_id)
+        _chatbot = AutoModelForSeq2SeqLM.from_pretrained(
+            model_id,
+            use_safetensors=True  # Use safetensors to avoid torch.load restriction
+        ).to(device)
+    return _chatbot, _tokenizer
+@spaces_gpu
+def chat(message: str, conversation_history: list[dict] | None) -> tuple[str, list[dict]]:
+    model, tokenizer = get_chatbot()
+    # Initialize conversation history if this is the first message
+    if conversation_history is None:
+        conversation_history = []
+    # Add the user's message
+    conversation_history.append({"role": "user", "content": message})
+    # For BlenderBot models, format conversation as dialogue history
+    # Build the full conversation context as a string
+    dialogue_text = ""
+    for msg in conversation_history:
+        if msg["role"] == "user":
+            dialogue_text += f"User: {msg['content']}\n"
+        elif msg["role"] == "assistant":
+            dialogue_text += f"Assistant: {msg['content']}\n"
+    # Tokenize the input
+    inputs = tokenizer([dialogue_text], return_tensors="pt", truncation=True, max_length=512)
+    device = get_pytorch_device()
+    inputs = {k: v.to(device) for k, v in inputs.items()}
+    # Generate response
+    outputs = model.generate(
+        **inputs,
+        max_new_tokens=128,
+        do_sample=True,
+        temperature=0.7,
+        pad_token_id=tokenizer.eos_token_id
+    )
+    # Decode the generated tokens - for seq2seq models, this should be just the assistant's response
+    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    # Clean up the response - remove any "Assistant:" prefix if present
+    response = response.strip()
+    if response.startswith("Assistant:"):
+        response = response[len("Assistant:"):].strip()
+    # Add the assistant's response to history
+    conversation_history.append({"role": "assistant", "content": response})
+    return response, conversation_history

image_to_text.py CHANGED Viewed

@@ -10,7 +10,10 @@ def image_to_text(image: Image) -> list[str]:
     image_to_text_model_id = getenv("IMAGE_TO_TEXT_MODEL")
     pytorch_device = get_pytorch_device()
     processor = AutoProcessor.from_pretrained(image_to_text_model_id)
-    model = BlipForConditionalGeneration.from_pretrained(image_to_text_model_id).to(pytorch_device)
     inputs = processor(images=image, return_tensors="pt").to(pytorch_device)
     generated_ids = model.generate(pixel_values=inputs.pixel_values, num_beams=3, max_length=20, min_length=5)
     results = processor.batch_decode(generated_ids, skip_special_tokens=True)

     image_to_text_model_id = getenv("IMAGE_TO_TEXT_MODEL")
     pytorch_device = get_pytorch_device()
     processor = AutoProcessor.from_pretrained(image_to_text_model_id)
+    model = BlipForConditionalGeneration.from_pretrained(
+        image_to_text_model_id,
+        use_safetensors=True # Use safetensors to avoid torch.load restriction.
+    ).to(pytorch_device)
     inputs = processor(images=image, return_tensors="pt").to(pytorch_device)
     generated_ids = model.generate(pixel_values=inputs.pixel_values, num_beams=3, max_length=20, min_length=5)
     results = processor.batch_decode(generated_ids, skip_special_tokens=True)

text_to_speech.py CHANGED Viewed

@@ -6,8 +6,12 @@ from utils import spaces_gpu
 @spaces_gpu
 def text_to_speech(text: str) -> tuple[int, bytes]:
-    narrator = pipeline("text-to-speech", getenv("TEXT_TO_SPEECH_MODEL"))
     del narrator
     gc.collect()
-    result = narrator(text)
     return (result["sampling_rate"], result["audio"][0])

 @spaces_gpu
 def text_to_speech(text: str) -> tuple[int, bytes]:
+    narrator = pipeline(
+        "text-to-speech",
+        getenv("TEXT_TO_SPEECH_MODEL"),
+        model_kwargs={"use_safetensors": True} # Use safetensors to avoid torch.load restriction.
+    )
+    result = narrator(text)
     del narrator
     gc.collect()
     return (result["sampling_rate"], result["audio"][0])