Spaces:

Xiaomeng1130
/

Stoma-clip-api

Runtime error

App Files Files Community

Xiaomeng1130 commited on Oct 16

Commit

75059a8

verified ·

1 Parent(s): 7046c4a

Update app.py

Browse files

Files changed (1) hide show

app.py +54 -27

app.py CHANGED Viewed

@@ -3,18 +3,21 @@ import torch
 import gradio as gr
 from PIL import Image
 import numpy as np
-# ========== 1. Import project modules ==========
 try:
-    # 尝试导入 stoma_clip 模块（通过 requirements.txt 中的 -e . 安装）
     from stoma_clip import pmc_clip
     from stoma_clip.pmc_clip.factory import _rescan_model_configs
     from stoma_clip.training.fusion_method import convert_model_to_cls
     from stoma_clip.training.dataset.utils import encode_mlm
     print("Stoma-CLIP modules imported successfully.")
 except ImportError as e:
-    # 导入失败的日志，以供调试
-    print(f"Error importing Stoma-CLIP modules: {e}")
 # ========== 2. Model Configuration and Loading ==========
 LABEL_MAP = {
@@ -28,16 +31,14 @@ NUM_CLASSES = len(LABEL_MAP)
 class Args:
     def __init__(self):
         self.model = "RN50_fusion4"
-        # 假设 stoma_clip.pt 文件位于应用的根目录（/app），或被您的内部库识别。
-        # 确保这个文件是正确的文件名。
         self.pretrained = "stoma_clip.pt"
         self.num_classes = NUM_CLASSES
         self.mlm = True
         self.crop_scale = 0.9
         self.context_length = 77
-        # 自动检测并使用 CUDA/GPU
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         print(f"Using device: {self.device}")
 args = Args()
 MODEL = None
@@ -45,34 +46,46 @@ PREPROCESS = None
 TOKENIZER = None
 def load_model():
-    """Load model once when Gradio starts, implementing the singleton pattern."""
     global MODEL, PREPROCESS, TOKENIZER
     if MODEL is not None:
-        print("Model already loaded. Returning cached objects.")
         return MODEL, PREPROCESS, TOKENIZER
-    print("--- Starting Model Load Process ---")
     try:
         # Step 1: Create model and transforms
-        print("1. Rescanning model configs...")
         _rescan_model_configs()
         model, _, preprocess = pmc_clip.create_model_and_transforms(args)
         model = convert_model_to_cls(model, num_classes=args.num_classes, fusion_method='cross_attention')
         print("2. Model architecture created. Moving to device...")
         # Move model architecture to GPU/CPU
         model.to(args.device).eval()
-        # Step 2: Load weights - 使用 map_location 确保加载到正确的设备
         print(f"3. Loading weights from {args.pretrained} to {args.device}...")
-        # 这里的 torch.load 必须依赖于 Dockerfile 预下载或 COPY 进来的文件
         state_dict = torch.load(args.pretrained, map_location=args.device)
         print("4. Weights file loaded. Cleaning state dict...")
         state_dict_clean = {k.replace("module.", "", 1): v for k, v in state_dict['state_dict'].items()}
         # Step 3: Apply weights
         print("5. Loading state dict into model architecture...")
         model.load_state_dict(state_dict_clean)
         # Step 4: Final setup
@@ -81,32 +94,40 @@ def load_model():
         PREPROCESS = preprocess
         TOKENIZER = tokenizer
-        print("✨ Stoma-CLIP Model loaded successfully!")
         return MODEL, PREPROCESS, TOKENIZER
     except Exception as e:
         print(f"🔥 Error during model loading: {e}")
-        MODEL = None
-        # 抛出异常，让 Gradio 知道启动失败
         raise RuntimeError(f"Failed to load Stoma-CLIP model: {e}")
 # ========== 3. Inference Function ==========
 def predict_stoma_clip(image: Image.Image, caption: str):
-    # 确保在推理时调用加载模型
     try:
-        model, preprocess, tokenizer = load_model()
     except RuntimeError:
         return "Model Loading Failed (See Logs)", {}
     image = image.convert("RGB")
     device = args.device
     # 将输入数据移动到 GPU
     image_tensor = preprocess(image).unsqueeze(0).to(device)
     mask_token, pad_token = '[MASK]', '[PAD]'
     vocab = [v for v in tokenizer.get_vocab().keys() if v not in tokenizer.all_special_tokens]
     bert_input, bert_label = encode_mlm(
         caption=caption,
         vocab=vocab,
@@ -116,14 +137,14 @@ def predict_stoma_clip(image: Image.Image, caption: str):
         tokenizer=tokenizer,
         args=args,
     )
     with torch.no_grad():
         inputs = {"images": image_tensor, "bert_input": bert_input, "bert_label": bert_label}
         outputs = model(inputs)
         # 将结果移回 CPU 进行 numpy 转换
         probs = torch.softmax(outputs, dim=1).cpu().numpy()[0]
         predicted_class_idx = torch.argmax(outputs, dim=1).item()
     predicted_class_name = REVERSE_LABEL_MAP.get(predicted_class_idx, "Unknown")
     probability_distribution = {REVERSE_LABEL_MAP[i]: float(p) for i, p in enumerate(probs)}
     return predicted_class_name, probability_distribution
@@ -159,8 +180,14 @@ iface = gr.Interface(
 )
 if __name__ == "__main__":
-    # 在应用启动时尝试加载模型，如果失败，launch 会抛出异常
-    # load_model() # 在 iface.launch() 内部通常会自动触发模型加载，但显式调用可以捕获启动错误
-    # T4 / Docker 环境下使用 0.0.0.0 和默认端口
     iface.launch(server_name="0.0.0.0", server_port=7860)

 import gradio as gr
 from PIL import Image
 import numpy as np
+import sys
+import time
+# ========== 1. Import project modules and Model Configuration ==========
 try:
     from stoma_clip import pmc_clip
     from stoma_clip.pmc_clip.factory import _rescan_model_configs
     from stoma_clip.training.fusion_method import convert_model_to_cls
     from stoma_clip.training.dataset.utils import encode_mlm
     print("Stoma-CLIP modules imported successfully.")
+    sys.stdout.flush() # 强制刷新输出
 except ImportError as e:
+    print(f"FATAL: Error importing Stoma-CLIP modules: {e}")
+    sys.stdout.flush()
+    sys.exit(1)
 # ========== 2. Model Configuration and Loading ==========
 LABEL_MAP = {
 class Args:
     def __init__(self):
         self.model = "RN50_fusion4"
         self.pretrained = "stoma_clip.pt"
         self.num_classes = NUM_CLASSES
         self.mlm = True
         self.crop_scale = 0.9
         self.context_length = 77
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         print(f"Using device: {self.device}")
+        sys.stdout.flush()
 args = Args()
 MODEL = None
 TOKENIZER = None
 def load_model():
+    """Load model once in the main thread during application initialization."""
     global MODEL, PREPROCESS, TOKENIZER
+    start_time = time.time()
     if MODEL is not None:
         return MODEL, PREPROCESS, TOKENIZER
+    print(f"--- Starting Model Load Process at {time.strftime('%H:%M:%S')} ---")
+    sys.stdout.flush() # 诊断点 1
     try:
         # Step 1: Create model and transforms
+        print("1. Rescanning model configs and creating architecture...")
+        sys.stdout.flush() # 诊断点 2
         _rescan_model_configs()
         model, _, preprocess = pmc_clip.create_model_and_transforms(args)
         model = convert_model_to_cls(model, num_classes=args.num_classes, fusion_method='cross_attention')
         print("2. Model architecture created. Moving to device...")
+        sys.stdout.flush() # 诊断点 3
         # Move model architecture to GPU/CPU
         model.to(args.device).eval()
+        # Step 2: Load weights - 必须确保 stoma_clip.pt 文件大小合理或复制完整
         print(f"3. Loading weights from {args.pretrained} to {args.device}...")
+        sys.stdout.flush() # 诊断点 4 - 关键点：在执行耗时 I/O 前确保日志已输出
+        # 强制使用 Float32 加载，然后转换为半精度，如果模型支持的话，有助于加速传输
         state_dict = torch.load(args.pretrained, map_location=args.device)
         print("4. Weights file loaded. Cleaning state dict...")
+        sys.stdout.flush() # 诊断点 5
         state_dict_clean = {k.replace("module.", "", 1): v for k, v in state_dict['state_dict'].items()}
         # Step 3: Apply weights
         print("5. Loading state dict into model architecture...")
+        sys.stdout.flush() # 诊断点 6
         model.load_state_dict(state_dict_clean)
         # Step 4: Final setup
         PREPROCESS = preprocess
         TOKENIZER = tokenizer
+        end_time = time.time()
+        print(f"✨ Stoma-CLIP Model loaded successfully! Total time: {end_time - start_time:.2f} seconds.")
+        sys.stdout.flush() # 诊断点 7
         return MODEL, PREPROCESS, TOKENIZER
     except Exception as e:
         print(f"🔥 Error during model loading: {e}")
+        sys.stdout.flush()
         raise RuntimeError(f"Failed to load Stoma-CLIP model: {e}")
 # ========== 3. Inference Function ==========
 def predict_stoma_clip(image: Image.Image, caption: str):
+    # 确保在推理时调用加载模型（仅作为后备/懒加载）
     try:
+        # 如果启动时加载失败，这里会再次尝试，但依赖于全局 MODEL 变量
+        if MODEL is None:
+             model, preprocess, tokenizer = load_model()
+        else:
+             model, preprocess, tokenizer = MODEL, PREPROCESS, TOKENIZER
     except RuntimeError:
         return "Model Loading Failed (See Logs)", {}
+    # ... 原来的推理逻辑保持不变 ...
     image = image.convert("RGB")
     device = args.device
     # 将输入数据移动到 GPU
     image_tensor = preprocess(image).unsqueeze(0).to(device)
     mask_token, pad_token = '[MASK]', '[PAD]'
     vocab = [v for v in tokenizer.get_vocab().keys() if v not in tokenizer.all_special_tokens]
     bert_input, bert_label = encode_mlm(
         caption=caption,
         vocab=vocab,
         tokenizer=tokenizer,
         args=args,
     )
     with torch.no_grad():
         inputs = {"images": image_tensor, "bert_input": bert_input, "bert_label": bert_label}
         outputs = model(inputs)
         # 将结果移回 CPU 进行 numpy 转换
         probs = torch.softmax(outputs, dim=1).cpu().numpy()[0]
         predicted_class_idx = torch.argmax(outputs, dim=1).item()
     predicted_class_name = REVERSE_LABEL_MAP.get(predicted_class_idx, "Unknown")
     probability_distribution = {REVERSE_LABEL_MAP[i]: float(p) for i, p in enumerate(probs)}
     return predicted_class_name, probability_distribution
 )
 if __name__ == "__main__":
+    # --- 关键修复：强制在 Gradio launch 之前加载模型，将 I/O 阻塞移到启动阶段 ---
+    print("Pre-loading model before Gradio launch to prevent runtime timeout...")
+    sys.stdout.flush()
+    load_model()
+    print("Model loaded. Launching Gradio interface...")
+    sys.stdout.flush()
+    # 启动 Gradio
     iface.launch(server_name="0.0.0.0", server_port=7860)