Spaces:

Abrahamau
/

gradiotest

Sleeping

App Files Files Community

gradiotest / app.py

Abrahamau

Update app.py

2abedae verified 12 months ago

raw

history blame

3.57 kB

	import torch
	import os
	import random
	import gradio as gr

	from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan, pipeline
	import base64
	from datasets import load_dataset
	from diffusers import DiffusionPipeline
	from huggingface_hub import login
	import numpy as np
	import spaces

	@spaces.GPU
	def guessanImage(model, image):
	imgclassifier = pipeline("image-classification", model=model)
	if image is not None:
	description = imgclassifier(image)
	return description

	@spaces.GPU
	def guessanAge(model, image):
	imgclassifier = pipeline("image-classification", model=model)
	if image is not None:
	description = imgclassifier(image)
	return description

	@spaces.GPU(duration=120)
	def text2speech(model, text, voice):
	print(voice)
	if len(text) > 0:
	synthesiser = pipeline("text-to-speech", model=model)

	embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
	speaker_embedding = torch.tensor(embeddings_dataset[voice]["xvector"]).unsqueeze(0)

	speech = synthesiser(text, forward_params={"speaker_embeddings": speaker_embedding})
	audio_data = np.frombuffer(speech["audio"], dtype=np.float32)
	audio_data_16bit = (audio_data * 32767).astype(np.int16)
	return speech["sampling_rate"], audio_data_16bit

	@spaces.GPU
	def ImageGenFromText(text, model):
	api_key = os.getenv("fluxauth")
	login(token=api_key)

	if len(text) > 0:
	dtype = torch.bfloat16
	device = "cuda" if torch.cuda.is_available() else "cpu"
	MAX_SEED = np.iinfo(np.int32).max
	seed = random.randint(0, MAX_SEED)
	pipe = DiffusionPipeline.from_pretrained(model, torch_dtype=dtype).to(device)
	generator = torch.Generator().manual_seed(seed)
	image = pipe(
	prompt = text,
	width = 512,
	height = 512,
	num_inference_steps = 4,
	generator = generator,
	guidance_scale=0.0
	).images[0]
	print(image)
	return image


	radio1 = gr.Radio(["microsoft/resnet-50", "google/vit-base-patch16-224", "apple/mobilevit-small"], value="microsoft/resnet-50", label="Select a Classifier", info="Image Classifier")
	tab1 = gr.Interface(
	fn=guessanImage,
	inputs=[radio1, gr.Image(type="pil")],
	outputs=["text"],
	)

	radio2 = gr.Radio(["nateraw/vit-age-classifier"], value="nateraw/vit-age-classifier", label="Select an Age Classifier", info="Age Classifier")
	tab2 = gr.Interface(
	fn=guessanAge,
	inputs=[radio2, gr.Image(type="pil")],
	outputs=["text"],
	)
	textbox = gr.Textbox(value="good morning pineapple! looking very good very nice!")
	radio3 = gr.Radio(["microsoft/speecht5_tts"], value="microsoft/speecht5_tts", label="Select an tts", info="Age Classifier")
	radio3_1 = gr.Radio([("Scottish male (awb)", 0), ("US male (bdl)", 1138), ("US female (clb)", 2271), ("Canadian male (jmk)",3403), ("Indian male (ksp)", 4535), ("US male (rms)", 5667), ("US female (slt)", 6799)], value=4535)
	tab3 = gr.Interface(
	fn=text2speech,
	inputs=[radio3, textbox, radio3_1],
	outputs=["audio"],
	)

	radio4 = gr.Radio(["black-forest-labs/FLUX.1-schnell"], value="black-forest-labs/FLUX.1-schnell", label="Select", info="text to image")
	tab4 = gr.Interface(
	fn=ImageGenFromText,
	inputs=["text", radio4],
	outputs=["image"],
	)

	demo = gr.TabbedInterface([tab1, tab2, tab3, tab4], ["Describe", "Estimage Age", "Speak", "Generate Image"])
	demo.launch()