Spaces:

Vyvo
/

VyvoTTS-V2-Tokenizer

Running on Zero

App Files Files Community

VyvoTTS-V2-Tokenizer / app.py

kadirnar

Update app.py

7f75ea6 verified 2 months ago

raw

history blame

4.45 kB

	import gradio as gr
	import os
	from vyvotts.audio_tokenizer import process_dataset
	import spaces

	@spaces.GPU(duration=120)
	def process_dataset_ui(
	original_dataset,
	output_dataset,
	model_type,
	text_field,
	hf_token
	):
	"""
	Process dataset with Gradio UI using ZeroGPU.

	Args:
	original_dataset: HuggingFace dataset path to process
	output_dataset: Output dataset path on HuggingFace Hub
	model_type: Model type - either "qwen3" or "lfm2"
	text_field: Name of text field in dataset
	hf_token: HuggingFace token for authentication

	Returns:
	Status message
	"""
	try:
	# Set HuggingFace token
	os.environ["HF_TOKEN"] = hf_token

	# Validate inputs
	if not original_dataset or not output_dataset:
	return "❌ Error: Please provide both original and output dataset names"

	if not hf_token:
	return "❌ Error: Please provide a HuggingFace token"

	if model_type not in ["qwen3", "lfm2"]:
	return "❌ Error: Model type must be either 'qwen3' or 'lfm2'"

	# Process dataset
	process_dataset(
	original_dataset=original_dataset,
	output_dataset=output_dataset,
	model_type=model_type,
	text_field=text_field
	)

	return f"✅ Dataset processed successfully and uploaded to: {output_dataset}"

	except Exception as e:
	return f"❌ Error: {str(e)}"

	# Create Gradio interface
	with gr.Blocks(title="VyvoTTS Dataset Tokenizer") as demo:
	gr.Markdown("""
	# 🎙️ VyvoTTS Dataset Tokenizer

	Process audio datasets for VyvoTTS training by tokenizing both audio and text.

	## Instructions:
	1. Enter your HuggingFace token (required for downloading and uploading datasets)
	2. Provide the original dataset path from HuggingFace Hub
	3. Specify the output dataset path where processed data will be uploaded
	4. Select the model type (Qwen3 or LFM2)
	5. Specify the text field name in your dataset
	6. Click "Process Dataset" to start

	Note: This process requires a GPU and may take several minutes depending on dataset size.
	""")

	with gr.Row():
	with gr.Column():
	hf_token = gr.Textbox(
	label="HuggingFace Token",
	placeholder="hf_...",
	type="password",
	info="Your HuggingFace token for authentication"
	)

	original_dataset = gr.Textbox(
	label="Original Dataset",
	placeholder="MrDragonFox/Elise",
	value="MrDragonFox/Elise",
	info="HuggingFace dataset path to process"
	)

	output_dataset = gr.Textbox(
	label="Output Dataset",
	placeholder="username/dataset-name",
	info="Output dataset path on HuggingFace Hub"
	)

	model_type = gr.Radio(
	choices=["qwen3", "lfm2"],
	value="qwen3",
	label="Model Type",
	info="Select the model type for tokenization"
	)

	text_field = gr.Textbox(
	label="Text Field Name",
	placeholder="text",
	value="text",
	info="Name of the text field in your dataset (e.g., 'text', 'text_scribe')"
	)

	process_btn = gr.Button("Process Dataset", variant="primary")

	with gr.Column():
	output = gr.Textbox(
	label="Status",
	placeholder="Status will appear here...",
	lines=10
	)

	process_btn.click(
	fn=process_dataset_ui,
	inputs=[original_dataset, output_dataset, model_type, text_field, hf_token],
	outputs=output
	)

	gr.Markdown("""
	## 📝 Example Values:

	### For Qwen3:
	- Original Dataset: `MrDragonFox/Elise`
	- Output Dataset: `username/elise-qwen3-processed`
	- Model Type: `qwen3`
	- Text Field: `text`

	### For LFM2:
	- Original Dataset: `MrDragonFox/Elise`
	- Output Dataset: `username/elise-lfm2-processed`
	- Model Type: `lfm2`
	- Text Field: `text`

	## ⚠️ Requirements:
	- GPU with CUDA support
	- HuggingFace account with write access
	- Valid HuggingFace token
	""")

	if __name__ == "__main__":
	demo.launch()