Spaces:
Runtime error
Runtime error
| from typing import Tuple | |
| import gradio as gr | |
| import numpy as np | |
| import supervision as sv | |
| import torch | |
| from PIL import Image | |
| from transformers import SamModel, SamProcessor | |
| from utils.efficient_sam import load, inference_with_box | |
| MARKDOWN = """ | |
| # EfficientSAM sv. SAM | |
| This is a demo for comparing the performance of | |
| [EfficientSAM](https://arxiv.org/abs/2312.00863) and | |
| [SAM](https://arxiv.org/abs/2304.02643). | |
| """ | |
| DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| SAM_MODEL = SamModel.from_pretrained("facebook/sam-vit-huge").to(DEVICE) | |
| SAM_PROCESSOR = SamProcessor.from_pretrained("facebook/sam-vit-huge") | |
| EFFICIENT_SAM_MODEL = load(device=DEVICE) | |
| MASK_ANNOTATOR = sv.MaskAnnotator( | |
| color=sv.Color.red(), | |
| color_lookup=sv.ColorLookup.INDEX) | |
| BOX_ANNOTATOR = sv.BoundingBoxAnnotator( | |
| color=sv.Color.red(), | |
| color_lookup=sv.ColorLookup.INDEX) | |
| def annotate_image(image: np.ndarray, detections: sv.Detections) -> np.ndarray: | |
| bgr_image = image[:, :, ::-1] | |
| annotated_bgr_image = MASK_ANNOTATOR.annotate( | |
| scene=bgr_image, detections=detections) | |
| annotated_bgr_image = BOX_ANNOTATOR.annotate( | |
| scene=annotated_bgr_image, detections=detections) | |
| return annotated_bgr_image[:, :, ::-1] | |
| def efficient_sam_inference( | |
| image: np.ndarray, | |
| x_min: int, | |
| y_min: int, | |
| x_max: int, | |
| y_max: int | |
| ) -> np.ndarray: | |
| box = np.array([[x_min, y_min], [x_max, y_max]]) | |
| mask = inference_with_box(image, box, EFFICIENT_SAM_MODEL, DEVICE) | |
| mask = mask[np.newaxis, ...] | |
| detections = sv.Detections(xyxy=sv.mask_to_xyxy(masks=mask), mask=mask) | |
| return annotate_image(image=image, detections=detections) | |
| def sam_inference( | |
| image: np.ndarray, | |
| x_min: int, | |
| y_min: int, | |
| x_max: int, | |
| y_max: int | |
| ) -> np.ndarray: | |
| input_boxes = [[[x_min, y_min, x_max, y_max]]] | |
| inputs = SAM_PROCESSOR( | |
| Image.fromarray(image), | |
| input_boxes=[input_boxes], | |
| return_tensors="pt" | |
| ).to(DEVICE) | |
| with torch.no_grad(): | |
| outputs = SAM_MODEL(**inputs) | |
| mask = SAM_PROCESSOR.image_processor.post_process_masks( | |
| outputs.pred_masks.cpu(), | |
| inputs["original_sizes"].cpu(), | |
| inputs["reshaped_input_sizes"].cpu() | |
| )[0][0][0].numpy() | |
| mask = mask[np.newaxis, ...] | |
| detections = sv.Detections(xyxy=sv.mask_to_xyxy(masks=mask), mask=mask) | |
| return annotate_image(image=image, detections=detections) | |
| def inference( | |
| image: np.ndarray, | |
| x_min: int, | |
| y_min: int, | |
| x_max: int, | |
| y_max: int | |
| ) -> Tuple[np.ndarray, np.ndarray]: | |
| return ( | |
| efficient_sam_inference(image, x_min, y_min, x_max, y_max), | |
| sam_inference(image, x_min, y_min, x_max, y_max) | |
| ) | |
| def clear(image: np.ndarray) -> Tuple[None, None]: | |
| return (None, None) | |
| with gr.Blocks() as demo: | |
| gr.Markdown(MARKDOWN) | |
| with gr.Tab(label="Box prompt"): | |
| with gr.Row(): | |
| with gr.Column(): | |
| input_image = gr.Image() | |
| with gr.Accordion(label="Box", open=False): | |
| with gr.Row(): | |
| x_min_number = gr.Number(label="x_min") | |
| y_min_number = gr.Number(label="y_min") | |
| x_max_number = gr.Number(label="x_max") | |
| y_max_number = gr.Number(label="y_max") | |
| efficient_sam_output_image = gr.Image(label="EfficientSAM") | |
| sam_output_image = gr.Image(label="SAM") | |
| with gr.Row(): | |
| submit_button = gr.Button("Submit") | |
| gr.Examples( | |
| fn=inference, | |
| examples=[ | |
| [ | |
| 'https://media.roboflow.com/efficient-sam/beagle.jpeg', | |
| 69, | |
| 26, | |
| 625, | |
| 704 | |
| ], | |
| [ | |
| 'https://media.roboflow.com/efficient-sam/corgi.jpg', | |
| 801, | |
| 510, | |
| 1782, | |
| 993 | |
| ], | |
| [ | |
| 'https://media.roboflow.com/efficient-sam/horses.jpg', | |
| 814, | |
| 696, | |
| 1523, | |
| 1183 | |
| ], | |
| [ | |
| 'https://media.roboflow.com/efficient-sam/bears.jpg', | |
| 653, | |
| 874, | |
| 1173, | |
| 1229 | |
| ] | |
| ], | |
| inputs=[input_image, x_min_number, y_min_number, x_max_number, y_max_number], | |
| outputs=[efficient_sam_output_image, sam_output_image], | |
| ) | |
| submit_button.click( | |
| efficient_sam_inference, | |
| inputs=[input_image, x_min_number, y_min_number, x_max_number, y_max_number], | |
| outputs=efficient_sam_output_image | |
| ) | |
| submit_button.click( | |
| sam_inference, | |
| inputs=[input_image, x_min_number, y_min_number, x_max_number, y_max_number], | |
| outputs=sam_output_image | |
| ) | |
| input_image.change( | |
| clear, | |
| inputs=input_image, | |
| outputs=[efficient_sam_output_image, sam_output_image] | |
| ) | |
| demo.launch(debug=False, show_error=True) | |