LiKenun commited on
Commit
39d9406
·
1 Parent(s): 33ec230

Reorganize structure for even less code clutter; `app.py` is greatly slimmed down

Browse files
app.py CHANGED
@@ -1,14 +1,12 @@
1
  from dotenv import load_dotenv
2
- from functools import partial
3
  import gradio as gr
4
  from huggingface_hub import InferenceClient
5
- from automatic_speech_recognition import automatic_speech_recognition
6
- from chatbot import chat
7
- from image_classification import image_classification
8
- from image_to_text import image_to_text
9
- from text_to_image import text_to_image
10
- from text_to_speech import text_to_speech
11
- from utils import request_image, request_audio
12
 
13
 
14
  class App:
@@ -22,103 +20,17 @@ class App:
22
  gr.Markdown("A gallery of building blocks for building AI applications")
23
  with gr.Tabs():
24
  with gr.Tab("Text-to-image Generation"):
25
- gr.Markdown("Generate an image from a text prompt.")
26
- text_to_image_prompt = gr.Textbox(label="Prompt")
27
- text_to_image_generate_button = gr.Button("Generate")
28
- text_to_image_output = gr.Image(label="Image", type="pil")
29
- text_to_image_generate_button.click(
30
- fn=partial(text_to_image, self.client),
31
- inputs=text_to_image_prompt,
32
- outputs=text_to_image_output
33
- )
34
  with gr.Tab("Image-to-text or Image Captioning"):
35
- gr.Markdown("Generate a text description of an image.")
36
- image_to_text_url_input = gr.Textbox(label="Image URL")
37
- image_to_text_image_request_button = gr.Button("Get Image")
38
- image_to_text_image_input = gr.Image(label="Image", type="pil")
39
- image_to_text_image_request_button.click(
40
- fn=request_image,
41
- inputs=image_to_text_url_input,
42
- outputs=image_to_text_image_input
43
- )
44
- image_to_text_button = gr.Button("Caption")
45
- image_to_text_output = gr.List(label="Captions", headers=["Caption"])
46
- image_to_text_button.click(
47
- fn=image_to_text,
48
- inputs=image_to_text_image_input,
49
- outputs=image_to_text_output
50
- )
51
  with gr.Tab("Image Classification"):
52
- gr.Markdown("Classify a recyclable item as one of: cardboard, glass, metal, paper, plastic, or other using [Trash-Net](https://huggingface.co/prithivMLmods/Trash-Net).")
53
- image_classification_url_input = gr.Textbox(label="Image URL")
54
- image_classification_image_request_button = gr.Button("Get Image")
55
- image_classification_image_input = gr.Image(label="Image",type="pil")
56
- image_classification_image_request_button.click(
57
- fn=request_image,
58
- inputs=image_classification_url_input,
59
- outputs=image_classification_image_input
60
- )
61
- image_classification_button = gr.Button("Classify")
62
- image_classification_output = gr.Dataframe(label="Classification", headers=["Label", "Probability"], interactive=False)
63
- image_classification_button.click(
64
- fn=partial(image_classification, self.client),
65
- inputs=image_classification_image_input,
66
- outputs=image_classification_output
67
- )
68
  with gr.Tab("Text-to-speech (TTS)"):
69
- gr.Markdown("Generate speech from text.")
70
- text_to_speech_text = gr.Textbox(label="Text")
71
- text_to_speech_generate_button = gr.Button("Generate")
72
- text_to_speech_output = gr.Audio(label="Speech")
73
- text_to_speech_generate_button.click(
74
- fn=text_to_speech,
75
- inputs=text_to_speech_text,
76
- outputs=text_to_speech_output
77
- )
78
  with gr.Tab("Automatic Speech Recognition (ASR)"):
79
- gr.Markdown("Transcribe audio to text.")
80
- audio_transcription_url_input = gr.Textbox(label="Audio URL")
81
- audio_transcription_audio_request_button = gr.Button("Get Audio")
82
- audio_transcription_audio_input = gr.Audio(label="Audio")
83
- audio_transcription_audio_request_button.click(
84
- fn=request_audio,
85
- inputs=audio_transcription_url_input,
86
- outputs=audio_transcription_audio_input
87
- )
88
- audio_transcription_generate_button = gr.Button("Transcribe")
89
- audio_transcription_output = gr.Textbox(label="Text")
90
- audio_transcription_generate_button.click(
91
- fn=partial(automatic_speech_recognition, self.client),
92
- inputs=audio_transcription_audio_input,
93
- outputs=audio_transcription_output
94
- )
95
  with gr.Tab("Chat"):
96
- gr.Markdown("Have a conversation with an AI chatbot.")
97
- chatbot_history = gr.State(value=None) # Store the conversation history.
98
- chatbot_output = gr.Chatbot(label="Conversation")
99
- chatbot_input = gr.Textbox(label="Your message")
100
- chatbot_send_button = gr.Button("Send")
101
-
102
- def chat_interface(message: str, history: list | None, conversation_state: list[dict] | None):
103
- """Handle chatbot interaction with Gradio chat format."""
104
- if not message.strip():
105
- return history, conversation_state, ""
106
- response, updated_conversation = chat(message, conversation_state) # Get response from chatbot.
107
- if history is None: # Update Gradio chat history format: list of [user_message, bot_message] pairs.
108
- history = []
109
- history.append([message, response])
110
- return history, updated_conversation, "" # Clear input field for the next message from the user.
111
-
112
- chatbot_send_button.click(
113
- fn=chat_interface,
114
- inputs=[chatbot_input, chatbot_output, chatbot_history],
115
- outputs=[chatbot_output, chatbot_history, chatbot_input]
116
- )
117
- chatbot_input.submit(
118
- fn=chat_interface,
119
- inputs=[chatbot_input, chatbot_output, chatbot_history],
120
- outputs=[chatbot_output, chatbot_history, chatbot_input]
121
- )
122
 
123
  demo.launch()
124
 
 
1
  from dotenv import load_dotenv
 
2
  import gradio as gr
3
  from huggingface_hub import InferenceClient
4
+ from automatic_speech_recognition import create_asr_tab
5
+ from chatbot import create_chatbot_tab
6
+ from image_classification import create_image_classification_tab
7
+ from image_to_text import create_image_to_text_tab
8
+ from text_to_image import create_text_to_image_tab
9
+ from text_to_speech import create_text_to_speech_tab
 
10
 
11
 
12
  class App:
 
20
  gr.Markdown("A gallery of building blocks for building AI applications")
21
  with gr.Tabs():
22
  with gr.Tab("Text-to-image Generation"):
23
+ create_text_to_image_tab(self.client)
 
 
 
 
 
 
 
 
24
  with gr.Tab("Image-to-text or Image Captioning"):
25
+ create_image_to_text_tab()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  with gr.Tab("Image Classification"):
27
+ create_image_classification_tab(self.client)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  with gr.Tab("Text-to-speech (TTS)"):
29
+ create_text_to_speech_tab()
 
 
 
 
 
 
 
 
30
  with gr.Tab("Automatic Speech Recognition (ASR)"):
31
+ create_asr_tab(self.client)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  with gr.Tab("Chat"):
33
+ create_chatbot_tab()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
  demo.launch()
36
 
automatic_speech_recognition.py CHANGED
@@ -1,6 +1,8 @@
 
1
  from huggingface_hub import InferenceClient
2
  from os import getenv, path, unlink
3
- from utils import save_audio_to_temp_file, get_model_sample_rate
 
4
 
5
  def automatic_speech_recognition(client: InferenceClient, audio: tuple[int, bytes]) -> str:
6
  temp_file_path = None
@@ -16,3 +18,23 @@ def automatic_speech_recognition(client: InferenceClient, audio: tuple[int, byte
16
  unlink(temp_file_path)
17
  except Exception:
18
  pass # Ignore clean-up errors.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from functools import partial
2
  from huggingface_hub import InferenceClient
3
  from os import getenv, path, unlink
4
+ import gradio as gr
5
+ from utils import save_audio_to_temp_file, get_model_sample_rate, request_audio
6
 
7
  def automatic_speech_recognition(client: InferenceClient, audio: tuple[int, bytes]) -> str:
8
  temp_file_path = None
 
18
  unlink(temp_file_path)
19
  except Exception:
20
  pass # Ignore clean-up errors.
21
+
22
+
23
+ def create_asr_tab(client: InferenceClient):
24
+ """Create the automatic speech recognition tab."""
25
+ gr.Markdown("Transcribe audio to text.")
26
+ audio_transcription_url_input = gr.Textbox(label="Audio URL")
27
+ audio_transcription_audio_request_button = gr.Button("Get Audio")
28
+ audio_transcription_audio_input = gr.Audio(label="Audio")
29
+ audio_transcription_audio_request_button.click(
30
+ fn=request_audio,
31
+ inputs=audio_transcription_url_input,
32
+ outputs=audio_transcription_audio_input
33
+ )
34
+ audio_transcription_generate_button = gr.Button("Transcribe")
35
+ audio_transcription_output = gr.Textbox(label="Text")
36
+ audio_transcription_generate_button.click(
37
+ fn=partial(automatic_speech_recognition, client),
38
+ inputs=audio_transcription_audio_input,
39
+ outputs=audio_transcription_output
40
+ )
chatbot.py CHANGED
@@ -1,4 +1,5 @@
1
  from os import getenv
 
2
  from transformers import AutoModelForCausalLM, AutoModelForSeq2SeqLM, AutoTokenizer
3
  from utils import get_pytorch_device, spaces_gpu
4
 
@@ -125,3 +126,33 @@ def chat(message: str, conversation_history: list[dict] | None) -> tuple[str, li
125
  conversation_history.append({"role": "assistant", "content": response})
126
 
127
  return response, conversation_history
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from os import getenv
2
+ import gradio as gr
3
  from transformers import AutoModelForCausalLM, AutoModelForSeq2SeqLM, AutoTokenizer
4
  from utils import get_pytorch_device, spaces_gpu
5
 
 
126
  conversation_history.append({"role": "assistant", "content": response})
127
 
128
  return response, conversation_history
129
+
130
+
131
+ def create_chatbot_tab():
132
+ """Create the chatbot tab."""
133
+ gr.Markdown("Have a conversation with an AI chatbot.")
134
+ chatbot_history = gr.State(value=None) # Store the conversation history.
135
+ chatbot_output = gr.Chatbot(label="Conversation")
136
+ chatbot_input = gr.Textbox(label="Your message")
137
+ chatbot_send_button = gr.Button("Send")
138
+
139
+ def chat_interface(message: str, history: list | None, conversation_state: list[dict] | None):
140
+ """Handle chatbot interaction with Gradio chat format."""
141
+ if not message.strip():
142
+ return history, conversation_state, ""
143
+ response, updated_conversation = chat(message, conversation_state) # Get response from chatbot.
144
+ if history is None: # Update Gradio chat history format: list of [user_message, bot_message] pairs.
145
+ history = []
146
+ history.append([message, response])
147
+ return history, updated_conversation, "" # Clear input field for the next message from the user.
148
+
149
+ chatbot_send_button.click(
150
+ fn=chat_interface,
151
+ inputs=[chatbot_input, chatbot_output, chatbot_history],
152
+ outputs=[chatbot_output, chatbot_history, chatbot_input]
153
+ )
154
+ chatbot_input.submit(
155
+ fn=chat_interface,
156
+ inputs=[chatbot_input, chatbot_output, chatbot_history],
157
+ outputs=[chatbot_output, chatbot_history, chatbot_input]
158
+ )
image_classification.py CHANGED
@@ -1,9 +1,11 @@
 
1
  from huggingface_hub import InferenceClient
2
  from os import path, unlink, getenv
 
3
  from PIL.Image import Image
4
  import pandas as pd
5
  from pandas import DataFrame
6
- from utils import save_image_to_temp_file
7
 
8
 
9
  def image_classification(client: InferenceClient, image: Image) -> DataFrame:
@@ -22,3 +24,23 @@ def image_classification(client: InferenceClient, image: Image) -> DataFrame:
22
  unlink(temp_file_path)
23
  except Exception:
24
  pass # Ignore clean-up errors.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from functools import partial
2
  from huggingface_hub import InferenceClient
3
  from os import path, unlink, getenv
4
+ import gradio as gr
5
  from PIL.Image import Image
6
  import pandas as pd
7
  from pandas import DataFrame
8
+ from utils import save_image_to_temp_file, request_image
9
 
10
 
11
  def image_classification(client: InferenceClient, image: Image) -> DataFrame:
 
24
  unlink(temp_file_path)
25
  except Exception:
26
  pass # Ignore clean-up errors.
27
+
28
+
29
+ def create_image_classification_tab(client: InferenceClient):
30
+ """Create the image classification tab."""
31
+ gr.Markdown("Classify a recyclable item as one of: cardboard, glass, metal, paper, plastic, or other using [Trash-Net](https://huggingface.co/prithivMLmods/Trash-Net).")
32
+ image_classification_url_input = gr.Textbox(label="Image URL")
33
+ image_classification_image_request_button = gr.Button("Get Image")
34
+ image_classification_image_input = gr.Image(label="Image", type="pil")
35
+ image_classification_image_request_button.click(
36
+ fn=request_image,
37
+ inputs=image_classification_url_input,
38
+ outputs=image_classification_image_input
39
+ )
40
+ image_classification_button = gr.Button("Classify")
41
+ image_classification_output = gr.Dataframe(label="Classification", headers=["Label", "Probability"], interactive=False)
42
+ image_classification_button.click(
43
+ fn=partial(image_classification, client),
44
+ inputs=image_classification_image_input,
45
+ outputs=image_classification_output
46
+ )
image_to_text.py CHANGED
@@ -1,8 +1,9 @@
1
  import gc
2
  from os import getenv
 
3
  from PIL.Image import Image
4
  from transformers import AutoProcessor, BlipForConditionalGeneration
5
- from utils import get_pytorch_device, spaces_gpu
6
 
7
 
8
  @spaces_gpu
@@ -20,3 +21,23 @@ def image_to_text(image: Image) -> list[str]:
20
  del model, inputs
21
  gc.collect()
22
  return results
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gc
2
  from os import getenv
3
+ import gradio as gr
4
  from PIL.Image import Image
5
  from transformers import AutoProcessor, BlipForConditionalGeneration
6
+ from utils import get_pytorch_device, spaces_gpu, request_image
7
 
8
 
9
  @spaces_gpu
 
21
  del model, inputs
22
  gc.collect()
23
  return results
24
+
25
+
26
+ def create_image_to_text_tab():
27
+ """Create the image-to-text captioning tab."""
28
+ gr.Markdown("Generate a text description of an image.")
29
+ image_to_text_url_input = gr.Textbox(label="Image URL")
30
+ image_to_text_image_request_button = gr.Button("Get Image")
31
+ image_to_text_image_input = gr.Image(label="Image", type="pil")
32
+ image_to_text_image_request_button.click(
33
+ fn=request_image,
34
+ inputs=image_to_text_url_input,
35
+ outputs=image_to_text_image_input
36
+ )
37
+ image_to_text_button = gr.Button("Caption")
38
+ image_to_text_output = gr.List(label="Captions", headers=["Caption"])
39
+ image_to_text_button.click(
40
+ fn=image_to_text,
41
+ inputs=image_to_text_image_input,
42
+ outputs=image_to_text_output
43
+ )
text_to_image.py CHANGED
@@ -1,7 +1,22 @@
 
1
  from os import getenv
 
2
  from PIL.Image import Image
3
  from huggingface_hub import InferenceClient
4
 
5
 
6
  def text_to_image(client: InferenceClient, prompt: str) -> Image:
7
  return client.text_to_image(prompt, model=getenv("TEXT_TO_IMAGE_MODEL"))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from functools import partial
2
  from os import getenv
3
+ import gradio as gr
4
  from PIL.Image import Image
5
  from huggingface_hub import InferenceClient
6
 
7
 
8
  def text_to_image(client: InferenceClient, prompt: str) -> Image:
9
  return client.text_to_image(prompt, model=getenv("TEXT_TO_IMAGE_MODEL"))
10
+
11
+
12
+ def create_text_to_image_tab(client: InferenceClient):
13
+ """Create the text-to-image generation tab."""
14
+ gr.Markdown("Generate an image from a text prompt.")
15
+ text_to_image_prompt = gr.Textbox(label="Prompt")
16
+ text_to_image_generate_button = gr.Button("Generate")
17
+ text_to_image_output = gr.Image(label="Image", type="pil")
18
+ text_to_image_generate_button.click(
19
+ fn=partial(text_to_image, client),
20
+ inputs=text_to_image_prompt,
21
+ outputs=text_to_image_output
22
+ )
text_to_speech.py CHANGED
@@ -1,5 +1,6 @@
1
  import gc
2
  from os import getenv
 
3
  from transformers import pipeline
4
  from utils import spaces_gpu
5
 
@@ -15,3 +16,16 @@ def text_to_speech(text: str) -> tuple[int, bytes]:
15
  del narrator
16
  gc.collect()
17
  return (result["sampling_rate"], result["audio"][0])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gc
2
  from os import getenv
3
+ import gradio as gr
4
  from transformers import pipeline
5
  from utils import spaces_gpu
6
 
 
16
  del narrator
17
  gc.collect()
18
  return (result["sampling_rate"], result["audio"][0])
19
+
20
+
21
+ def create_text_to_speech_tab():
22
+ """Create the text-to-speech tab."""
23
+ gr.Markdown("Generate speech from text.")
24
+ text_to_speech_text = gr.Textbox(label="Text")
25
+ text_to_speech_generate_button = gr.Button("Generate")
26
+ text_to_speech_output = gr.Audio(label="Speech")
27
+ text_to_speech_generate_button.click(
28
+ fn=text_to_speech,
29
+ inputs=text_to_speech_text,
30
+ outputs=text_to_speech_output
31
+ )