Spaces:

alinasdkey
/

alina-unslothPret

Paused

App Files Files Community

alinasdkey commited on Jun 16

Commit

c4a118e

verified ·

1 Parent(s): 7382fb8

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -62

app.py CHANGED Viewed

@@ -6,91 +6,57 @@ from transformers import AutoProcessor
 from PIL import Image
 import gradio as gr
-# Base LLaMA model (vision)
 model_name = "unsloth/Llama-3.2-11B-Vision-Instruct"
-# LoRA adapter repo
 lora_repo = "alinasdkey/unsloth-pret-lora"
-#  Load base model
 model, processor = FastVisionModel.from_pretrained(
-    model_name = model_name,
-    device_map = "auto",                 # or "cuda" if you're sure you're on GPU
-    load_in_4bit = False,                # Use 8bit if you're on CPU
-    load_in_8bit = True,                 # Set this to True
-    # remove torch_dtype entirely
 )
-#  Applying LoRA adapter from the root of the repo)
-model = PeftModel.from_pretrained(
-    model,
-    model_id = lora_repo,
-)
-#Switching to inference mode
 FastVisionModel.for_inference(model)
-#Inference function
 def describe_image(image, instruction):
-    try:
-        # Step 1: Prepare prompt
-        prompt = instruction.strip() if instruction else "Describe this image."
-        messages = [
-            {
-                "role": "user",
-                "content": [
-                    {"type": "image"},
-                    {"type": "text", "text": prompt}
-                ]
-            }
-        ]
-        prompt_text = processor.apply_chat_template(messages, add_generation_prompt=True)
-        # Step 2: Tokenize the prompt text
-        input_ids = processor.tokenizer(prompt_text, return_tensors="pt").input_ids.to(model.device)
-        # Step 3: Process image to get pixel values + aspect ratio IDs
-        image_inputs = processor(image, return_tensors="pt").to(model.device)
-        pixel_values = image_inputs["pixel_values"]
-        aspect_ratio_ids = image_inputs["aspect_ratio_ids"]
-        # Step 4: Generate
-        outputs = model.generate(
-            input_ids=input_ids,
-            pixel_values=pixel_values,
-            aspect_ratio_ids=aspect_ratio_ids,
-            max_new_tokens=256,
-            do_sample=False,
-            temperature=0.2,
-            top_p=0.95
-        )
-        # Step 5: Decode
-        return processor.tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
-    except Exception as e:
-        import traceback
-        return traceback.format_exc()
-    # Tokenize + image encode
-    image_inputs = processor(images=image, return_tensors="pt").to(model.device)
     input_ids = processor.tokenizer(prompt, return_tensors="pt").input_ids.to(model.device)
-    # Generate prediction
     outputs = model.generate(
         input_ids=input_ids,
-        images=image_inputs["pixel_values"],
         max_new_tokens=256,
         do_sample=False,
         temperature=0.2,
         top_p=0.95,
     )
     return processor.tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
-#Gradio Interface
 gr.Interface(
     fn=describe_image,
     inputs=[
@@ -98,6 +64,7 @@ gr.Interface(
         gr.Textbox(label="Instruction (e.g. Summarize this graph)")
     ],
     outputs="text",
-    title=" Welcome to the Graph Insight Engine",
     description="Upload a graph and get insightful analysis!"
 ).launch()

 from PIL import Image
 import gradio as gr
+# Load base LLaMA vision model
 model_name = "unsloth/Llama-3.2-11B-Vision-Instruct"
 lora_repo = "alinasdkey/unsloth-pret-lora"
+# Load base model and processor
 model, processor = FastVisionModel.from_pretrained(
+    model_name=model_name,
+    device_map="auto",
+    load_in_4bit=False,
+    load_in_8bit=True,
 )
+# Apply LoRA adapter
+model = PeftModel.from_pretrained(model, model_id=lora_repo)
+# Set to inference mode
 FastVisionModel.for_inference(model)
+# Inference function
 def describe_image(image, instruction):
+    # Load and preprocess image
+    image = image.convert("RGB")
+    inputs = processor(images=image, return_tensors="pt").to(model.device)
+    # Create input prompt with instruction
+    prompt = instruction if instruction else "Describe this graph."
+    # Tokenize text prompt
     input_ids = processor.tokenizer(prompt, return_tensors="pt").input_ids.to(model.device)
+    # Extract necessary vision inputs
+    pixel_values = inputs["pixel_values"]
+    aspect_ratio_ids = inputs.get("aspect_ratio_ids")
+    aspect_ratio_mask = inputs.get("aspect_ratio_mask")
+    # Generate model output
     outputs = model.generate(
         input_ids=input_ids,
+        pixel_values=pixel_values,
+        aspect_ratio_ids=aspect_ratio_ids,
+        aspect_ratio_mask=aspect_ratio_mask,
         max_new_tokens=256,
         do_sample=False,
         temperature=0.2,
         top_p=0.95,
     )
+    # Decode and return result
     return processor.tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
+# Gradio Interface
 gr.Interface(
     fn=describe_image,
     inputs=[
         gr.Textbox(label="Instruction (e.g. Summarize this graph)")
     ],
     outputs="text",
+    title="Welcome to the Graph Description AI: Pret",
     description="Upload a graph and get insightful analysis!"
 ).launch()