ai-building-blocks / text_to_image.py
LiKenun's picture
Switch text-to-image and automatic speech recognition (ASR) back to using the Hugging Face inference client; Zero GPU cannot accommodate the time it takes for those tasks
b71a3ad
from functools import partial
import gradio as gr
from PIL.Image import Image
from huggingface_hub import InferenceClient
def text_to_image(client: InferenceClient, model: str, prompt: str) -> Image:
"""Generate an image from a text prompt using Hugging Face Inference API.
This function uses the Hugging Face Inference API to generate images from text prompts.
This approach offloads the model loading and inference to Hugging Face's infrastructure,
which is more suitable for environments with limited GPU memory or time constraints
(like Hugging Face Spaces with Zero GPU).
Args:
client: Hugging Face InferenceClient instance for API calls.
model: Hugging Face model ID to use for text-to-image generation.
prompt: Text description of the desired image.
Returns:
PIL Image object representing the generated image.
"""
return client.text_to_image(prompt, model=model)
def create_text_to_image_tab(client: InferenceClient, model: str):
"""Create the text-to-image generation tab in the Gradio interface.
This function sets up all UI components for text-to-image generation,
including input textbox, generate button, and output image display.
Args:
client: Hugging Face InferenceClient instance to pass to the text_to_image function.
model: Hugging Face model ID to use for text-to-image generation.
"""
gr.Markdown("Generate an image from a text prompt.")
text_to_image_prompt = gr.Textbox(label="Prompt")
text_to_image_generate_button = gr.Button("Generate")
text_to_image_output = gr.Image(label="Image", type="pil")
text_to_image_generate_button.click(
fn=partial(text_to_image, client, model),
inputs=text_to_image_prompt,
outputs=text_to_image_output
)