Spaces:

pascal-maker
/

medical-vlm-sam2

Paused

App Files Files Community

pascal-maker commited on Jun 12

Commit

45c9883

verified ·

1 Parent(s): 72e2729

Update app.py

Browse files

Files changed (1) hide show

app.py +76 -66

app.py CHANGED Viewed

@@ -12,7 +12,7 @@ Features:
 - Automatic dependency checking & installation for SAM-2
 Usage:
-  python medical_ai_app.py        # launches Gradio UI on port 7860
 Requires:
   torch, transformers, PIL, gradio, ultralytics, requests, opencv-python, pyyaml
 """
@@ -25,6 +25,9 @@ import warnings
 from threading import Thread
 from pathlib import Path
 # Environment setup
 os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"
 warnings.filterwarnings("ignore", message=r".*upsample_bicubic2d.*")
@@ -37,11 +40,11 @@ from PIL import Image, ImageDraw
 import gradio as gr
 from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
 from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
 # =============================================================================
 # SAM-2 Alias Patch & Installer
 # =============================================================================
-import importlib
 try:
     import sam_2
     sys.modules['sam2'] = sam_2
@@ -94,18 +97,29 @@ _qwen_model = None
 _qwen_processor = None
 _qwen_device = None
-def load_qwen_model_and_processor(hf_token=None):
     global _qwen_model, _qwen_processor, _qwen_device
     if _qwen_model is None:
         _qwen_device = get_device()
-        auth = {"use_auth_token": hf_token} if hf_token else {}
-        _qwen_model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
-            "Qwen/Qwen2.5-VL-3B-Instruct", trust_remote_code=True,
-            torch_dtype=torch.float32, low_cpu_mem_usage=True, **auth
-        ).to(_qwen_device)
-        _qwen_processor = AutoProcessor.from_pretrained(
-            "Qwen/Qwen2.5-VL-3B-Instruct", trust_remote_code=True, **auth
-        )
     return _qwen_model, _qwen_processor, _qwen_device
 class MedicalVLMAgent:
@@ -116,8 +130,8 @@ class MedicalVLMAgent:
             "Disclaimer: I am not a licensed medical professional."
         )
     def run(self, text, image=None):
-        if self.model is None:
-            return "Qwen-VLM model not loaded"
         msgs = [{"role":"system","content":[{"type":"text","text":self.sys_prompt}]}]
         user_cont = []
         if image:
@@ -125,8 +139,12 @@ class MedicalVLMAgent:
             user_cont.append({"type":"image","image":tmp})
         user_cont.append({"type":"text","text": text or ""})
         msgs.append({"role":"user","content":user_cont})
-        prompt = self.processor.apply_chat_template(msgs, tokenize=False, add_generation_prompt=True)
-        inputs = self.processor(text=[prompt], images=[], videos=[], padding=True, return_tensors='pt').to(self.device)
         out = self.model.generate(**inputs, max_new_tokens=128)
         resp = out[0][inputs.input_ids.shape[1]:]
         return self.processor.decode(resp, skip_special_tokens=True).strip()
@@ -137,58 +155,50 @@ class MedicalVLMAgent:
 _sam2_model, _mask_generator = (None, None)
 if SAM2_AVAILABLE:
     try:
-        CKPT="checkpoints/sam2.1_hiera_large.pt"; CFG="configs/sam2.1/sam2.1_hiera_l.yaml"
         os.chdir("segment-anything-2/sam2/sam2")
-        _sam2_model = build_sam2(CFG, CKPT, device=get_device(), apply_postprocessing=False)
         _mask_generator = SAM2AutomaticMaskGenerator(_sam2_model)
     except Exception as e:
-        print(f"SAM-2 init error: {e}")
         _mask_generator = None
-def segmentation_interface(image):
-    if image is None: return None, "Upload an image"
-    if not _mask_generator: return None, "SAM-2 unavailable"
-    arr = np.array(image.convert('RGB'))
-    anns = _mask_generator.generate(arr)
-    overlay = arr.copy()
-    for ann in sorted(anns, key=lambda x: x['area'], reverse=True):
-        m = ann['segmentation']; color=np.random.randint(0,255,3)
-        overlay[m] = (overlay[m]*0.5 + color*0.5).astype(np.uint8)
-    return Image.fromarray(overlay), f"{len(anns)} masks found"
-# =============================================================================
-# Fallback segmentation
-# =============================================================================
-def fallback_segmentation(image):
-    if image is None: return None, "Upload an image"
-    arr = np.array(image.convert('RGB'))
-    gray=cv2.cvtColor(arr,cv2.COLOR_RGB2GRAY)
-    _,th=cv2.threshold(gray,0,255,cv2.THRESH_BINARY_INV+cv2.THRESH_OTSU)
-    overlay=arr.copy(); overlay[th>0]=[255,0,0]
-    blended=cv2.addWeighted(arr,0.7,overlay,0.3,0)
-    return Image.fromarray(blended), "Fallback applied"
 # =============================================================================
 # CheXagent: structured report & grounding
 # =============================================================================
 try:
-    chex_tok = AutoTokenizer.from_pretrained("StanfordAIMI/CheXagent-2-3b", trust_remote_code=True)
-    chex_model = AutoModelForCausalLM.from_pretrained("StanfordAIMI/CheXagent-2-3b", device_map='auto', trust_remote_code=True)
     if torch.cuda.is_available(): chex_model = chex_model.half()
-    chex_model.eval(); CHEX_AVAILABLE=True
-except Exception:
-    CHEX_AVAILABLE=False
 @torch.no_grad()
 def report_generation(im1, im2):
-    if not CHEX_AVAILABLE: yield "CheXagent unavailable"; return
     streamer = TextIteratorStreamer(chex_tok, skip_prompt=True)
-    yield "Report streaming not fully implemented"
 @torch.no_grad()
 def phrase_grounding(image, prompt):
-    if not CHEX_AVAILABLE: return "CheXagent unavailable", None
-    w,h=image.size; draw=ImageDraw.Draw(image)
     draw.rectangle([(w*0.25,h*0.25),(w*0.75,h*0.75)], outline='red', width=3)
     return prompt, image
@@ -196,47 +206,46 @@ def phrase_grounding(image, prompt):
 # Gradio UI
 # =============================================================================
 def create_ui():
-    # Load Qwen agent
-    try:
-        m, p, d = load_qwen_model_and_processor()
-        med = MedicalVLMAgent(m,p,d)
-        qwen_ok = True
-    except Exception:
-        med = None
-        qwen_ok = False
     with gr.Blocks() as demo:
         gr.Markdown("# Medical AI Assistant")
-        gr.Markdown(f"- Qwen: {'✅' if qwen_ok else '❌'}  - SAM-2: {'✅' if _mask_generator else '❌'}  - CheX: {'✅' if CHEX_AVAILABLE else '❌'}")
         with gr.Tab("Medical Q&A"):
-            if qwen_ok and med is not None:
                 txt = gr.Textbox(label="Question / description", lines=3)
                 img = gr.Image(label="Optional image", type='pil')
                 out = gr.Textbox(label="Answer")
-                gr.Button("Ask").click(med.run, inputs=[txt, img], outputs=out)
             else:
-                gr.Markdown("❌ Medical Q&A is not available.")
         with gr.Tab("Segmentation"):
             seg = gr.Image(label="Upload image", type='pil')
             so = gr.Image(label="Result")
             ss = gr.Textbox(label="Status", interactive=False)
             fn = segmentation_interface if _mask_generator else fallback_segmentation
-            gr.Button("Segment").click(fn, inputs=[seg], outputs=[so, ss])
         with gr.Tab("CheXagent Report"):
             c1 = gr.Image(type='pil', label="Image 1")
             c2 = gr.Image(type='pil', label="Image 2")
             rout = gr.Markdown()
             if CHEX_AVAILABLE:
-                gr.Interface(fn=report_generation, inputs=[c1, c2], outputs=rout, live=True).render()
             else:
-                gr.Markdown("❌ CheXagent report not available.")
         with gr.Tab("CheXagent Grounding"):
             gi = gr.Image(type='pil', label="Image")
             gp = gr.Textbox(label="Prompt")
             gout = gr.Textbox(label="Response")
             goimg = gr.Image(label="Output Image")
             if CHEX_AVAILABLE:
-                gr.Interface(fn=phrase_grounding, inputs=[gi, gp], outputs=[gout, goimg]).render()
             else:
                 gr.Markdown("❌ CheXagent grounding not available.")
     return demo
@@ -244,3 +253,4 @@ def create_ui():
 if __name__ == "__main__":
     ui = create_ui()
     ui.launch(server_name='0.0.0.0', server_port=7860, share=True)

 - Automatic dependency checking & installation for SAM-2
 Usage:
+  HF_TOKEN=<your_token> python medical_ai_app.py  # if private models require auth
 Requires:
   torch, transformers, PIL, gradio, ultralytics, requests, opencv-python, pyyaml
 """
 from threading import Thread
 from pathlib import Path
+# Hugging Face token (for private models)
+HF_TOKEN = os.getenv("HF_TOKEN")
 # Environment setup
 os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"
 warnings.filterwarnings("ignore", message=r".*upsample_bicubic2d.*")
 import gradio as gr
 from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
 from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
+import importlib
 # =============================================================================
 # SAM-2 Alias Patch & Installer
 # =============================================================================
 try:
     import sam_2
     sys.modules['sam2'] = sam_2
 _qwen_processor = None
 _qwen_device = None
+def load_qwen_model_and_processor():
     global _qwen_model, _qwen_processor, _qwen_device
     if _qwen_model is None:
         _qwen_device = get_device()
+        auth = {"use_auth_token": HF_TOKEN} if HF_TOKEN else {}
+        print(f"[Qwen] Loading model with auth={'yes' if HF_TOKEN else 'no'} on {_qwen_device}")
+        try:
+            _qwen_model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
+                "Qwen/Qwen2.5-VL-3B-Instruct",
+                trust_remote_code=True,
+                torch_dtype=torch.float32,
+                low_cpu_mem_usage=True,
+                **auth
+            ).to(_qwen_device)
+            _qwen_processor = AutoProcessor.from_pretrained(
+                "Qwen/Qwen2.5-VL-3B-Instruct",
+                trust_remote_code=True,
+                **auth
+            )
+        except Exception as e:
+            print(f"[Qwen] Model load failed: {e}")
+            _qwen_model = None
+            _qwen_processor = None
     return _qwen_model, _qwen_processor, _qwen_device
 class MedicalVLMAgent:
             "Disclaimer: I am not a licensed medical professional."
         )
     def run(self, text, image=None):
+        if not self.model or not self.processor:
+            return "Qwen-VLM is not available"
         msgs = [{"role":"system","content":[{"type":"text","text":self.sys_prompt}]}]
         user_cont = []
         if image:
             user_cont.append({"type":"image","image":tmp})
         user_cont.append({"type":"text","text": text or ""})
         msgs.append({"role":"user","content":user_cont})
+        prompt = self.processor.apply_chat_template(
+            msgs, tokenize=False, add_generation_prompt=True
+        )
+        inputs = self.processor(
+            text=[prompt], images=[], videos=[], padding=True, return_tensors='pt'
+        ).to(self.device)
         out = self.model.generate(**inputs, max_new_tokens=128)
         resp = out[0][inputs.input_ids.shape[1]:]
         return self.processor.decode(resp, skip_special_tokens=True).strip()
 _sam2_model, _mask_generator = (None, None)
 if SAM2_AVAILABLE:
     try:
+        CKPT="checkpoints/sam2.1_hiera_large.pt"
+        CFG="configs/sam2.1/sam2.1_hiera_l.yaml"
         os.chdir("segment-anything-2/sam2/sam2")
+        _sam2_model = build_sam2(
+            CFG, CKPT, device=get_device(), apply_postprocessing=False
+        )
         _mask_generator = SAM2AutomaticMaskGenerator(_sam2_model)
     except Exception as e:
+        print(f"[SAM-2] Initialization error: {e}")
         _mask_generator = None
 # =============================================================================
 # CheXagent: structured report & grounding
 # =============================================================================
 try:
+    print(f"[CheXagent] Loading with auth={'yes' if HF_TOKEN else 'no'}")
+    chex_tok = AutoTokenizer.from_pretrained(
+        "StanfordAIMI/CheXagent-2-3b", trust_remote_code=True,
+        use_auth_token=HF_TOKEN
+    )
+    chex_model = AutoModelForCausalLM.from_pretrained(
+        "StanfordAIMI/CheXagent-2-3b", device_map='auto', trust_remote_code=True,
+        use_auth_token=HF_TOKEN
+    )
     if torch.cuda.is_available(): chex_model = chex_model.half()
+    chex_model.eval()
+    CHEX_AVAILABLE = True
+except Exception as e:
+    print(f"[CheXagent] Load failed: {e}")
+    CHEX_AVAILABLE = False
 @torch.no_grad()
 def report_generation(im1, im2):
+    if not CHEX_AVAILABLE:
+        yield "CheXagent unavailable"
+        return
     streamer = TextIteratorStreamer(chex_tok, skip_prompt=True)
+    yield "Streaming report... (not fully implemented)"
 @torch.no_grad()
 def phrase_grounding(image, prompt):
+    if not CHEX_AVAILABLE:
+        return "CheXagent unavailable", None
+    w,h = image.size; draw = ImageDraw.Draw(image)
     draw.rectangle([(w*0.25,h*0.25),(w*0.75,h*0.75)], outline='red', width=3)
     return prompt, image
 # Gradio UI
 # =============================================================================
 def create_ui():
+    m, p, d = load_qwen_model_and_processor()
+    qwen_ok = bool(m and p)
+    med = MedicalVLMAgent(m, p, d) if qwen_ok else None
     with gr.Blocks() as demo:
         gr.Markdown("# Medical AI Assistant")
+        gr.Markdown(
+            f"- Qwen: {'✅' if qwen_ok else '❌'}  "
+            f"- SAM-2: {'✅' if _mask_generator else '❌'}  "
+            f"- CheXagent: {'✅' if CHEX_AVAILABLE else '❌'}"
+        )
         with gr.Tab("Medical Q&A"):
+            if qwen_ok:
                 txt = gr.Textbox(label="Question / description", lines=3)
                 img = gr.Image(label="Optional image", type='pil')
                 out = gr.Textbox(label="Answer")
+                gr.Button("Ask").click(med.run, [txt, img], out)
             else:
+                gr.Markdown("❌ Medical Q&A not available. Check HF_TOKEN and connectivity.")
         with gr.Tab("Segmentation"):
             seg = gr.Image(label="Upload image", type='pil')
             so = gr.Image(label="Result")
             ss = gr.Textbox(label="Status", interactive=False)
             fn = segmentation_interface if _mask_generator else fallback_segmentation
+            gr.Button("Segment").click(fn, [seg], [so, ss])
         with gr.Tab("CheXagent Report"):
             c1 = gr.Image(type='pil', label="Image 1")
             c2 = gr.Image(type='pil', label="Image 2")
             rout = gr.Markdown()
             if CHEX_AVAILABLE:
+                gr.Interface(report_generation, [c1, c2], rout, live=True).render()
             else:
+                gr.Markdown("❌ CheXagent report not available. Check HF_TOKEN and connectivity.")
         with gr.Tab("CheXagent Grounding"):
             gi = gr.Image(type='pil', label="Image")
             gp = gr.Textbox(label="Prompt")
             gout = gr.Textbox(label="Response")
             goimg = gr.Image(label="Output Image")
             if CHEX_AVAILABLE:
+                gr.Interface(phrase_grounding, [gi, gp], [gout, goimg]).render()
             else:
                 gr.Markdown("❌ CheXagent grounding not available.")
     return demo
 if __name__ == "__main__":
     ui = create_ui()
     ui.launch(server_name='0.0.0.0', server_port=7860, share=True)