Spaces:

breadlicker45
/

PaliGemma2

Sleeping

breadlicker45 commited on Dec 15, 2024

Commit

c580f5e

verified ·

1 Parent(s): 33262af

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,5 +1,8 @@
 import gradio as gr
-from transformers import AutoProcessor, AutoModelForImageTextToText
 from PIL import Image
 import torch
 import os
@@ -17,16 +20,12 @@ def load_model():
         )
     # Load the processor and model using the correct identifier
-    processor = AutoProcessor.from_pretrained(
-        "google/paligemma2-28b-pt-896", use_auth_token=token
-    )
-    model = AutoModelForImageTextToText.from_pretrained(
-        "google/paligemma2-28b-pt-896", use_auth_token=token, torch_dtype=torch.bfloat16
-    )
-    # Move model to GPU if available
-    if torch.cuda.is_available():
-        model = model.to("cuda")
     return processor, model
@@ -35,10 +34,10 @@ def load_model():
 def process_image_and_text(image, text_input):
     """Extract text from image using PaliGemma2."""
     processor, model = load_model()
     # Preprocess the image and text
     inputs = processor(text=text_input, images=image, return_tensors="pt").to(
-        "cuda" if torch.cuda.is_available() else "cpu", dtype=torch.bfloat16
     )
     # Generate predictions

 import gradio as gr
+from transformers import (
+    PaliGemmaProcessor,
+    PaliGemmaForConditionalGeneration,
+)
 from PIL import Image
 import torch
 import os
         )
     # Load the processor and model using the correct identifier
+    model_id = "google/paligemma2-28b-pt-896"
+    processor = PaliGemmaProcessor.from_pretrained(model_id, use_auth_token=token)
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    model = PaliGemmaForConditionalGeneration.from_pretrained(
+        model_id, use_auth_token=token, torch_dtype=torch.bfloat16
+    ).to(device)
     return processor, model
 def process_image_and_text(image, text_input):
     """Extract text from image using PaliGemma2."""
     processor, model = load_model()
+    device = "cuda" if torch.cuda.is_available() else "cpu"
     # Preprocess the image and text
     inputs = processor(text=text_input, images=image, return_tensors="pt").to(
+        device, dtype=torch.bfloat16
     )
     # Generate predictions