Spaces:
Running
on
Zero
Running
on
Zero
| from dotenv import load_dotenv | |
| from os import getenv | |
| import gradio as gr | |
| from huggingface_hub import InferenceClient | |
| from automatic_speech_recognition import create_asr_tab | |
| from chatbot import create_chatbot_tab | |
| from image_classification import create_image_classification_tab | |
| from image_to_text import create_image_to_text_tab | |
| from text_to_image import create_text_to_image_tab | |
| from text_to_speech import create_text_to_speech_tab | |
| from translation import create_translation_tab | |
| class App: | |
| """Main application class for the AI Building Blocks Gradio interface. | |
| This class orchestrates the entire application by creating the Gradio UI | |
| and integrating all the individual building block tabs. | |
| """ | |
| def __init__( | |
| self, | |
| client: InferenceClient, | |
| text_to_image_model: str, | |
| image_to_text_model: str, | |
| image_classification_model: str, | |
| text_to_speech_model: str, | |
| audio_transcription_model: str, | |
| chat_model: str, | |
| fallback_translation_model: str | |
| ): | |
| """Initialize the App with an InferenceClient instance and model IDs. | |
| Args: | |
| client: Hugging Face InferenceClient instance for making API calls | |
| to Hugging Face's inference endpoints (used for text-to-image and ASR). | |
| text_to_image_model: Model ID for text-to-image generation. | |
| image_to_text_model: Model ID for image captioning. | |
| image_classification_model: Model ID for image classification. | |
| text_to_speech_model: Model ID for text-to-speech. | |
| audio_transcription_model: Model ID for automatic speech recognition. | |
| chat_model: Model ID for chatbot. | |
| fallback_translation_model: Fallback translation model ID for languages | |
| without specific translation models. | |
| """ | |
| self.client = client | |
| self.text_to_image_model = text_to_image_model | |
| self.image_to_text_model = image_to_text_model | |
| self.image_classification_model = image_classification_model | |
| self.text_to_speech_model = text_to_speech_model | |
| self.audio_transcription_model = audio_transcription_model | |
| self.chat_model = chat_model | |
| self.fallback_translation_model = fallback_translation_model | |
| def run(self): | |
| """Launch the Gradio application with all building block tabs. | |
| Creates a Gradio Blocks interface with multiple tabs, each representing | |
| a different AI building block. The application will block until the | |
| interface is closed. | |
| """ | |
| with gr.Blocks(title="AI Building Blocks") as demo: | |
| gr.Markdown("# AI Building Blocks") | |
| gr.Markdown("A gallery of building blocks for building AI applications") | |
| with gr.Tabs(): | |
| with gr.Tab("Text-to-image Generation"): | |
| create_text_to_image_tab(self.client, self.text_to_image_model) | |
| with gr.Tab("Image-to-text or Image Captioning"): | |
| create_image_to_text_tab(self.image_to_text_model) | |
| with gr.Tab("Image Classification"): | |
| create_image_classification_tab(self.image_classification_model) | |
| with gr.Tab("Text-to-speech (TTS)"): | |
| create_text_to_speech_tab(self.text_to_speech_model) | |
| with gr.Tab("Automatic Speech Recognition (ASR)"): | |
| create_asr_tab(self.client, self.audio_transcription_model) | |
| with gr.Tab("Chat"): | |
| create_chatbot_tab(self.chat_model) | |
| with gr.Tab("Translation to English"): | |
| create_translation_tab(self.fallback_translation_model) | |
| demo.launch() | |
| if __name__ == "__main__": | |
| load_dotenv() | |
| app = App( | |
| client=InferenceClient(), | |
| text_to_image_model=getenv("TEXT_TO_IMAGE_MODEL"), | |
| image_to_text_model=getenv("IMAGE_TO_TEXT_MODEL"), | |
| image_classification_model=getenv("IMAGE_CLASSIFICATION_MODEL"), | |
| text_to_speech_model=getenv("TEXT_TO_SPEECH_MODEL"), | |
| audio_transcription_model=getenv("AUDIO_TRANSCRIPTION_MODEL"), | |
| chat_model=getenv("CHAT_MODEL"), | |
| fallback_translation_model=getenv("FALLBACK_TRANSLATION_MODEL") | |
| ) | |
| app.run() | |