from dotenv import load_dotenv from functools import partial import gradio as gr from huggingface_hub import InferenceClient from image_classification import image_classification from image_to_text import image_to_text from text_to_image import text_to_image from text_to_speech import text_to_speech from utils import request_image class App: def __init__(self, client: InferenceClient): self.client = client def run(self): with gr.Blocks(title="AI Building Blocks") as demo: gr.Markdown("# AI Building Blocks") gr.Markdown("A gallery of building blocks for building AI applications") with gr.Tabs(): with gr.Tab("Text-to-image Generation"): gr.Markdown("Generate an image from a text prompt.") text_to_image_prompt = gr.Textbox(label="Prompt") text_to_image_generate_button = gr.Button("Generate") text_to_image_output = gr.Image(label="Image", type="pil") text_to_image_generate_button.click( fn=partial(text_to_image, self.client), inputs=text_to_image_prompt, outputs=text_to_image_output ) with gr.Tab("Image-to-text or Image Captioning"): gr.Markdown("Generate a text description of an image.") image_to_text_url_input = gr.Textbox(label="Image URL") image_to_text_image_request_button = gr.Button("Get Image") image_to_text_image_input = gr.Image(label="Image", type="pil") image_to_text_image_request_button.click( fn=request_image, inputs=image_to_text_url_input, outputs=image_to_text_image_input ) image_to_text_output = gr.List(label="Captions", headers=["Caption"]) image_to_text_button = gr.Button("Caption") image_to_text_button.click( fn=image_to_text, inputs=image_to_text_image_input, outputs=image_to_text_output ) with gr.Tab("Image Classification"): gr.Markdown("Classify a recyclable item as one of: cardboard, glass, metal, paper, plastic, or other using [Trash-Net](https://huggingface.co/prithivMLmods/Trash-Net).") image_classification_url_input = gr.Textbox(label="Image URL") image_classification_image_request_button = gr.Button("Get Image") image_classification_image_input = gr.Image(label="Image",type="pil") image_classification_image_request_button.click( fn=request_image, inputs=image_classification_url_input, outputs=image_classification_image_input ) image_classification_button = gr.Button("Classify") image_classification_output = gr.Dataframe(label="Classification", headers=["Label", "Probability"], interactive=False) image_classification_button.click( fn=partial(image_classification, self.client), inputs=image_classification_image_input, outputs=image_classification_output ) with gr.Tab("Text-to-speech (TTS)"): gr.Markdown("Generate speech from a text.") text_to_speech_text = gr.Textbox(label="Text") text_to_speech_generate_button = gr.Button("Generate") text_to_speech_output = gr.Audio(label="Speech") text_to_speech_generate_button.click( fn=text_to_speech, inputs=text_to_speech_text, outputs=text_to_speech_output ) demo.launch() if __name__ == "__main__": load_dotenv() app = App(InferenceClient()) app.run()