Spaces:

LiKenun
/

ai-building-blocks

Running on Zero

Switch text-to-image and automatic speech recognition (ASR) back to using the Hugging Face inference client; Zero GPU cannot accommodate the time it takes for those tasks

b71a3ad about 1 month ago

raw

history blame contribute delete

4.31 kB

	from dotenv import load_dotenv
	from os import getenv
	import gradio as gr
	from huggingface_hub import InferenceClient
	from automatic_speech_recognition import create_asr_tab
	from chatbot import create_chatbot_tab
	from image_classification import create_image_classification_tab
	from image_to_text import create_image_to_text_tab
	from text_to_image import create_text_to_image_tab
	from text_to_speech import create_text_to_speech_tab
	from translation import create_translation_tab


	class App:
	"""Main application class for the AI Building Blocks Gradio interface.

	This class orchestrates the entire application by creating the Gradio UI
	and integrating all the individual building block tabs.
	"""

	def __init__(
	self,
	client: InferenceClient,
	text_to_image_model: str,
	image_to_text_model: str,
	image_classification_model: str,
	text_to_speech_model: str,
	audio_transcription_model: str,
	chat_model: str,
	fallback_translation_model: str
	):
	"""Initialize the App with an InferenceClient instance and model IDs.

	Args:
	client: Hugging Face InferenceClient instance for making API calls
	to Hugging Face's inference endpoints (used for text-to-image and ASR).
	text_to_image_model: Model ID for text-to-image generation.
	image_to_text_model: Model ID for image captioning.
	image_classification_model: Model ID for image classification.
	text_to_speech_model: Model ID for text-to-speech.
	audio_transcription_model: Model ID for automatic speech recognition.
	chat_model: Model ID for chatbot.
	fallback_translation_model: Fallback translation model ID for languages
	without specific translation models.
	"""
	self.client = client
	self.text_to_image_model = text_to_image_model
	self.image_to_text_model = image_to_text_model
	self.image_classification_model = image_classification_model
	self.text_to_speech_model = text_to_speech_model
	self.audio_transcription_model = audio_transcription_model
	self.chat_model = chat_model
	self.fallback_translation_model = fallback_translation_model

	def run(self):
	"""Launch the Gradio application with all building block tabs.

	Creates a Gradio Blocks interface with multiple tabs, each representing
	a different AI building block. The application will block until the
	interface is closed.
	"""
	with gr.Blocks(title="AI Building Blocks") as demo:
	gr.Markdown("# AI Building Blocks")
	gr.Markdown("A gallery of building blocks for building AI applications")
	with gr.Tabs():
	with gr.Tab("Text-to-image Generation"):
	create_text_to_image_tab(self.client, self.text_to_image_model)
	with gr.Tab("Image-to-text or Image Captioning"):
	create_image_to_text_tab(self.image_to_text_model)
	with gr.Tab("Image Classification"):
	create_image_classification_tab(self.image_classification_model)
	with gr.Tab("Text-to-speech (TTS)"):
	create_text_to_speech_tab(self.text_to_speech_model)
	with gr.Tab("Automatic Speech Recognition (ASR)"):
	create_asr_tab(self.client, self.audio_transcription_model)
	with gr.Tab("Chat"):
	create_chatbot_tab(self.chat_model)
	with gr.Tab("Translation to English"):
	create_translation_tab(self.fallback_translation_model)

	demo.launch()


	if __name__ == "__main__":
	load_dotenv()
	app = App(
	client=InferenceClient(),
	text_to_image_model=getenv("TEXT_TO_IMAGE_MODEL"),
	image_to_text_model=getenv("IMAGE_TO_TEXT_MODEL"),
	image_classification_model=getenv("IMAGE_CLASSIFICATION_MODEL"),
	text_to_speech_model=getenv("TEXT_TO_SPEECH_MODEL"),
	audio_transcription_model=getenv("AUDIO_TRANSCRIPTION_MODEL"),
	chat_model=getenv("CHAT_MODEL"),
	fallback_translation_model=getenv("FALLBACK_TRANSLATION_MODEL")
	)
	app.run()