from dotenv import load_dotenv
from functools import partial
import gradio as gr
from huggingface_hub import InferenceClient
from image_classification import image_classification
from image_to_text import image_to_text
from text_to_image import text_to_image
from text_to_speech import text_to_speech
from utils import request_image


class App:

    def __init__(self, client: InferenceClient):
        self.client = client

    def run(self):
        with gr.Blocks(title="AI Building Blocks") as demo:
            gr.Markdown("# AI Building Blocks")
            gr.Markdown("A gallery of building blocks for building AI applications")
            with gr.Tabs():
                with gr.Tab("Text-to-image Generation"):
                    gr.Markdown("Generate an image from a text prompt.")
                    text_to_image_prompt = gr.Textbox(label="Prompt")
                    text_to_image_generate_button = gr.Button("Generate")
                    text_to_image_output = gr.Image(label="Image", type="pil")
                    text_to_image_generate_button.click(
                        fn=partial(text_to_image, self.client),
                        inputs=text_to_image_prompt,
                        outputs=text_to_image_output
                    )
                with gr.Tab("Image-to-text or Image Captioning"):
                    gr.Markdown("Generate a text description of an image.")
                    image_to_text_url_input = gr.Textbox(label="Image URL")
                    image_to_text_image_request_button = gr.Button("Get Image")
                    image_to_text_image_input = gr.Image(label="Image", type="pil")
                    image_to_text_image_request_button.click(
                        fn=request_image,
                        inputs=image_to_text_url_input,
                        outputs=image_to_text_image_input
                    )
                    image_to_text_output = gr.List(label="Captions", headers=["Caption"])
                    image_to_text_button = gr.Button("Caption")
                    image_to_text_button.click(
                        fn=image_to_text,
                        inputs=image_to_text_image_input,
                        outputs=image_to_text_output
                    )
                with gr.Tab("Image Classification"):
                    gr.Markdown("Classify a recyclable item as one of: cardboard, glass, metal, paper, plastic, or other using [Trash-Net](https://huggingface.co/prithivMLmods/Trash-Net).")
                    image_classification_url_input = gr.Textbox(label="Image URL")
                    image_classification_image_request_button = gr.Button("Get Image")
                    image_classification_image_input = gr.Image(label="Image",type="pil")
                    image_classification_image_request_button.click(
                        fn=request_image,
                        inputs=image_classification_url_input,
                        outputs=image_classification_image_input
                    )
                    image_classification_button = gr.Button("Classify")
                    image_classification_output = gr.Dataframe(label="Classification", headers=["Label", "Probability"], interactive=False)
                    image_classification_button.click(
                        fn=partial(image_classification, self.client),
                        inputs=image_classification_image_input,
                        outputs=image_classification_output
                    )
                with gr.Tab("Text-to-speech (TTS)"):
                    gr.Markdown("Generate speech from a text.")
                    text_to_speech_text = gr.Textbox(label="Text")
                    text_to_speech_generate_button = gr.Button("Generate")
                    text_to_speech_output = gr.Audio(label="Speech")
                    text_to_speech_generate_button.click(
                        fn=text_to_speech,
                        inputs=text_to_speech_text,
                        outputs=text_to_speech_output
                    )

            demo.launch()


if __name__ == "__main__":
    load_dotenv()
    app = App(InferenceClient())
    app.run()