File size: 2,034 Bytes
e291546 6664cab 112d29e c4a118e e291546 2361b06 6664cab c4a118e e291546 c4a118e 8157279 6664cab c4a118e 6664cab c4a118e e291546 6664cab c4a118e e291546 c4a118e 38932cd c4a118e 38932cd c4a118e cb872a6 68bcd32 c4a118e cb872a6 c4a118e cb872a6 c4a118e cb872a6 6664cab c4a118e e291546 c4a118e e291546 c4a118e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 |
import os
import torch
from unsloth import FastVisionModel
from peft import PeftModel
from transformers import AutoProcessor
from PIL import Image
import gradio as gr
# Load base LLaMA vision model
model_name = "unsloth/Llama-3.2-11B-Vision-Instruct"
lora_repo = "alinasdkey/unsloth-pret-lora"
# Load base model and processor
model, processor = FastVisionModel.from_pretrained(
model_name=model_name,
device_map="auto",
load_in_4bit=False,
load_in_8bit=True,
)
# Apply LoRA adapter
model = PeftModel.from_pretrained(model, model_id=lora_repo)
# Set to inference mode
FastVisionModel.for_inference(model)
# Inference function
def describe_image(image, instruction):
# Load and preprocess image
image = image.convert("RGB")
inputs = processor(images=image, return_tensors="pt").to(model.device)
# Create input prompt with instruction
prompt = instruction if instruction else "Describe this graph."
# Tokenize text prompt
input_ids = processor.tokenizer(prompt, return_tensors="pt").input_ids.to(model.device)
# Extract necessary vision inputs
pixel_values = inputs["pixel_values"]
aspect_ratio_ids = inputs.get("aspect_ratio_ids")
aspect_ratio_mask = inputs.get("aspect_ratio_mask")
# Generate model output
outputs = model.generate(
input_ids=input_ids,
pixel_values=pixel_values,
aspect_ratio_ids=aspect_ratio_ids,
aspect_ratio_mask=aspect_ratio_mask,
max_new_tokens=256,
do_sample=False,
temperature=0.2,
top_p=0.95,
)
# Decode and return result
return processor.tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
# Gradio Interface
gr.Interface(
fn=describe_image,
inputs=[
gr.Image(type="pil", label="Upload a Graph Image"),
gr.Textbox(label="Instruction (e.g. Summarize this graph)")
],
outputs="text",
title="Welcome to the Graph Description AI: Pret",
description="Upload a graph and get insightful analysis!"
).launch()
|