alinasdkey commited on
Commit
c4a118e
·
verified ·
1 Parent(s): 7382fb8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -62
app.py CHANGED
@@ -6,91 +6,57 @@ from transformers import AutoProcessor
6
  from PIL import Image
7
  import gradio as gr
8
 
9
- # Base LLaMA model (vision)
10
  model_name = "unsloth/Llama-3.2-11B-Vision-Instruct"
11
-
12
- # LoRA adapter repo
13
  lora_repo = "alinasdkey/unsloth-pret-lora"
14
 
15
- # Load base model
16
  model, processor = FastVisionModel.from_pretrained(
17
- model_name = model_name,
18
- device_map = "auto", # or "cuda" if you're sure you're on GPU
19
- load_in_4bit = False, # Use 8bit if you're on CPU
20
- load_in_8bit = True, # Set this to True
21
- # remove torch_dtype entirely
22
  )
23
 
24
- # Applying LoRA adapter from the root of the repo)
25
- model = PeftModel.from_pretrained(
26
- model,
27
- model_id = lora_repo,
28
- )
29
 
30
- #Switching to inference mode
31
  FastVisionModel.for_inference(model)
32
 
33
- #Inference function
34
  def describe_image(image, instruction):
35
- try:
36
- # Step 1: Prepare prompt
37
- prompt = instruction.strip() if instruction else "Describe this image."
38
-
39
- messages = [
40
- {
41
- "role": "user",
42
- "content": [
43
- {"type": "image"},
44
- {"type": "text", "text": prompt}
45
- ]
46
- }
47
- ]
48
-
49
- prompt_text = processor.apply_chat_template(messages, add_generation_prompt=True)
50
-
51
- # Step 2: Tokenize the prompt text
52
- input_ids = processor.tokenizer(prompt_text, return_tensors="pt").input_ids.to(model.device)
53
 
54
- # Step 3: Process image to get pixel values + aspect ratio IDs
55
- image_inputs = processor(image, return_tensors="pt").to(model.device)
56
- pixel_values = image_inputs["pixel_values"]
57
- aspect_ratio_ids = image_inputs["aspect_ratio_ids"]
58
 
59
- # Step 4: Generate
60
- outputs = model.generate(
61
- input_ids=input_ids,
62
- pixel_values=pixel_values,
63
- aspect_ratio_ids=aspect_ratio_ids,
64
- max_new_tokens=256,
65
- do_sample=False,
66
- temperature=0.2,
67
- top_p=0.95
68
- )
69
-
70
- # Step 5: Decode
71
- return processor.tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
72
-
73
- except Exception as e:
74
- import traceback
75
- return traceback.format_exc()
76
-
77
- # Tokenize + image encode
78
- image_inputs = processor(images=image, return_tensors="pt").to(model.device)
79
  input_ids = processor.tokenizer(prompt, return_tensors="pt").input_ids.to(model.device)
80
 
81
- # Generate prediction
 
 
 
 
 
82
  outputs = model.generate(
83
  input_ids=input_ids,
84
- images=image_inputs["pixel_values"],
 
 
85
  max_new_tokens=256,
86
  do_sample=False,
87
  temperature=0.2,
88
  top_p=0.95,
89
  )
90
 
 
91
  return processor.tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
92
 
93
- #Gradio Interface
94
  gr.Interface(
95
  fn=describe_image,
96
  inputs=[
@@ -98,6 +64,7 @@ gr.Interface(
98
  gr.Textbox(label="Instruction (e.g. Summarize this graph)")
99
  ],
100
  outputs="text",
101
- title=" Welcome to the Graph Insight Engine",
102
  description="Upload a graph and get insightful analysis!"
103
  ).launch()
 
 
6
  from PIL import Image
7
  import gradio as gr
8
 
9
+ # Load base LLaMA vision model
10
  model_name = "unsloth/Llama-3.2-11B-Vision-Instruct"
 
 
11
  lora_repo = "alinasdkey/unsloth-pret-lora"
12
 
13
+ # Load base model and processor
14
  model, processor = FastVisionModel.from_pretrained(
15
+ model_name=model_name,
16
+ device_map="auto",
17
+ load_in_4bit=False,
18
+ load_in_8bit=True,
 
19
  )
20
 
21
+ # Apply LoRA adapter
22
+ model = PeftModel.from_pretrained(model, model_id=lora_repo)
 
 
 
23
 
24
+ # Set to inference mode
25
  FastVisionModel.for_inference(model)
26
 
27
+ # Inference function
28
  def describe_image(image, instruction):
29
+ # Load and preprocess image
30
+ image = image.convert("RGB")
31
+ inputs = processor(images=image, return_tensors="pt").to(model.device)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
+ # Create input prompt with instruction
34
+ prompt = instruction if instruction else "Describe this graph."
 
 
35
 
36
+ # Tokenize text prompt
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  input_ids = processor.tokenizer(prompt, return_tensors="pt").input_ids.to(model.device)
38
 
39
+ # Extract necessary vision inputs
40
+ pixel_values = inputs["pixel_values"]
41
+ aspect_ratio_ids = inputs.get("aspect_ratio_ids")
42
+ aspect_ratio_mask = inputs.get("aspect_ratio_mask")
43
+
44
+ # Generate model output
45
  outputs = model.generate(
46
  input_ids=input_ids,
47
+ pixel_values=pixel_values,
48
+ aspect_ratio_ids=aspect_ratio_ids,
49
+ aspect_ratio_mask=aspect_ratio_mask,
50
  max_new_tokens=256,
51
  do_sample=False,
52
  temperature=0.2,
53
  top_p=0.95,
54
  )
55
 
56
+ # Decode and return result
57
  return processor.tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
58
 
59
+ # Gradio Interface
60
  gr.Interface(
61
  fn=describe_image,
62
  inputs=[
 
64
  gr.Textbox(label="Instruction (e.g. Summarize this graph)")
65
  ],
66
  outputs="text",
67
+ title="Welcome to the Graph Description AI: Pret",
68
  description="Upload a graph and get insightful analysis!"
69
  ).launch()
70
+