import gradio as gr import torch import torch.nn.functional as F import torchaudio import os import time from model import FullModel if not os.path.exists("user_data"): os.makedirs("user_data") if not os.path.exists("user_data/enrollments"): os.makedirs("user_data/enrollments") if not os.path.exists("user_data/verifications"): os.makedirs("user_data/verifications") print("Loading model...") DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") MODEL = FullModel().to(DEVICE) MODEL.load_state_dict(torch.load("speaker_verification_model.pth", map_location=torch.device('cpu'))) MODEL.eval() THRESHOLD = 0.5216 print("Model loaded successfully.") ENROLLED_USERS = {} def get_embedding(waveform): if waveform.shape[0] > 1: waveform = torch.mean(waveform, dim=0, keepdim=True) with torch.no_grad(): embedding, _ = MODEL(waveform.to(DEVICE)) return embedding def enroll_speaker(audio_filepath, username): if audio_filepath is None: return "Error: No audio recorded. Please record your voice first.", gr.update() if not username: return "Please enter a username.", None waveform, sr = torchaudio.load(audio_filepath) timestamp = int(time.time()) filename = f"user_data/enrollments/{username}_{timestamp}.wav" torchaudio.save(filename, waveform, sr) embedding = get_embedding(waveform) ENROLLED_USERS[username] = {"embedding": embedding, "file": filename} print(f"Enrolled user '{username}' with file {filename}") return f"✅ User '{username}' successfully enrolled!", gr.update(value=username, interactive=False) def verify_speaker(audio_filepath, username_to_verify): if audio_filepath is None: return "Error: No audio recorded. Please record your verification phrase.", None if not username_to_verify: return "Please enter the username to verify against.", None if username_to_verify not in ENROLLED_USERS: return f"User '{username_to_verify}' is not enrolled. Please enroll first.", None waveform, sr = torchaudio.load(audio_filepath) timestamp = int(time.time()) filename = f"user_data/verifications/{username_to_verify}_attempt_{timestamp}.wav" torchaudio.save(filename, waveform, sr) enrolled_embedding = ENROLLED_USERS[username_to_verify]["embedding"] verification_embedding = get_embedding(waveform) score = F.cosine_similarity(enrolled_embedding, verification_embedding).item() decision = "✅ Match Verified" if score > THRESHOLD else "❌ No Match (Imposter)" print(f"Verification for '{username_to_verify}': Score={score:.4f}, Decision='{decision}'") return f"Similarity Score: {score:.4f}", decision with gr.Blocks() as demo: gr.Markdown("# Voice Authentication System") gr.Markdown("❗ **Note:** For the best experience, please use **Google Chrome** or **Mozilla Firefox**. Audio recording may not work correctly on other browsers like Brave.") gr.Markdown("Enroll your voiceprint and then verify yourself.") with gr.Tabs(): with gr.TabItem("Enrollment"): enroll_username = gr.Textbox(label="Enter a unique Username") enroll_audio = gr.Audio(sources=["microphone"], type="filepath", label="Record your enrollment phrase (3-5 seconds)") enroll_button = gr.Button("Enroll Voiceprint") enroll_output = gr.Textbox(label="Enrollment Status") with gr.TabItem("Verification"): verify_username = gr.Textbox(label="Enter your Username to verify") verify_audio = gr.Audio(sources=["microphone"], type="filepath", label="Record your verification phrase (must be different!)") verify_button = gr.Button("Verify My Voice") verify_score = gr.Textbox(label="Result Score") verify_decision = gr.Textbox(label="Final Decision") enroll_button.click(fn=enroll_speaker, inputs=[enroll_audio, enroll_username], outputs=[enroll_output, verify_username]) verify_button.click(fn=verify_speaker, inputs=[verify_audio, verify_username], outputs=[verify_score, verify_decision]) if __name__ == "__main__": demo.queue().launch(share=True)