| import gradio as gr | |
| import evaluate | |
| clip_metric = evaluate.load("d-matrix/clip_eval") | |
| print("Successfully loaded CLIP evaluation metric") | |
| AVAILABLE_MODELS = [ | |
| "openai/clip-vit-base-patch32", | |
| "openai/clip-vit-large-patch14", | |
| "openai/clip-vit-base-patch16", | |
| ] | |
| AVAILABLE_DATASETS = ["mscoco", "flickr"] | |
| with gr.Blocks(title="CLIP Evaluation") as demo: | |
| gr.Markdown("# CLIP Model Evaluation") | |
| gr.Markdown( | |
| """ | |
| This tool evaluates CLIP models on image-text retrieval tasks using standard datasets. | |
| """ | |
| ) | |
| with gr.Row(): | |
| with gr.Column(): | |
| model_input = gr.Dropdown( | |
| choices=AVAILABLE_MODELS, value=AVAILABLE_MODELS[0], label="CLIP Model" | |
| ) | |
| dataset_input = gr.Dropdown( | |
| choices=AVAILABLE_DATASETS, value="mscoco", label="Dataset" | |
| ) | |
| samples_input = gr.Slider( | |
| minimum=1, maximum=10, value=1, step=1, label="Number of samples" | |
| ) | |
| evaluate_button = gr.Button("Evaluate Model") | |
| with gr.Column(): | |
| results_output = gr.Markdown("Results will appear here") | |
| def evaluate_clip(model_name, dataset, num_samples, progress=gr.Progress()): | |
| progress(0, desc="Evaluating CLIP model...") | |
| results = clip_metric.compute( | |
| model_name=[model_name], | |
| dataset_names=[dataset], | |
| n_examples=[int(num_samples)], | |
| ) | |
| output = f"## CLIP Evaluation Results\n\n" | |
| output += f"**Model:** {model_name}\n" | |
| output += f"**Dataset:** {dataset}\n" | |
| output += f"**Samples:** {num_samples}\n\n" | |
| output += "**Image Retrieval (Text→Image):**\n" | |
| for k in [1, 5, 10]: | |
| metric_name = f"{dataset}:image_recall@{k}" | |
| if metric_name in results: | |
| output += f"* Recall@{k}: {results[metric_name]:.4f}\n" | |
| output += "\n**Text Retrieval (Image→Text):**\n" | |
| for k in [1, 5, 10]: | |
| metric_name = f"{dataset}:text_recall@{k}" | |
| if metric_name in results: | |
| output += f"* Recall@{k}: {results[metric_name]:.4f}\n" | |
| return output | |
| evaluate_button.click( | |
| fn=evaluate_clip, | |
| inputs=[model_input, dataset_input, samples_input], | |
| outputs=results_output, | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |