Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -50,7 +50,7 @@ layout_prompt = """Please output the layout information from the image, includin
|
|
| 50 |
ocr_prompt = "Perform precise OCR on the image. Extract all text content, maintaining the original structure, paragraphs, and tables as formatted markdown."
|
| 51 |
|
| 52 |
# --- Model Loading ---
|
| 53 |
-
MODEL_ID_M = "prithivMLmods/Camel-Doc-OCR-
|
| 54 |
processor_m = AutoProcessor.from_pretrained(MODEL_ID_M, trust_remote_code=True)
|
| 55 |
model_m = Qwen2_5_VLForConditionalGeneration.from_pretrained(
|
| 56 |
MODEL_ID_M, trust_remote_code=True, torch_dtype=torch.float16
|
|
@@ -77,6 +77,12 @@ model_g = Qwen2_5_VLForConditionalGeneration.from_pretrained(
|
|
| 77 |
MODEL_ID_G, trust_remote_code=True, subfolder=SUBFOLDER, torch_dtype=torch.float16
|
| 78 |
).to(device).eval()
|
| 79 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
# --- Utility Functions ---
|
| 81 |
def layoutjson2md(layout_data: List[Dict]) -> str:
|
| 82 |
"""Converts the structured JSON from Layout Analysis into formatted Markdown."""
|
|
@@ -122,10 +128,11 @@ def process_document_stream(model_name: str, task_choice: str, image: Image.Imag
|
|
| 122 |
text_prompt = ocr_prompt if task_choice == "Content Extraction" else layout_prompt
|
| 123 |
|
| 124 |
# 2. Select model and processor
|
| 125 |
-
if model_name == "Camel-Doc-OCR-
|
| 126 |
elif model_name == "Megalodon-OCR-Sync-0713": processor, model = processor_t, model_t
|
| 127 |
elif model_name == "Nanonets-OCR-s": processor, model = processor_c, model_c
|
| 128 |
elif model_name == "MonkeyOCR-Recognition": processor, model = processor_g, model_g
|
|
|
|
| 129 |
else:
|
| 130 |
yield "Invalid model selected.", "Invalid model selected.", None
|
| 131 |
return
|
|
@@ -190,10 +197,11 @@ def create_gradio_interface():
|
|
| 190 |
# Left Column (Inputs)
|
| 191 |
with gr.Column(scale=1):
|
| 192 |
model_choice = gr.Dropdown(
|
| 193 |
-
choices=["Camel-Doc-OCR-
|
| 194 |
"MonkeyOCR-Recognition",
|
| 195 |
"Nanonets-OCR-s",
|
| 196 |
-
"Megalodon-OCR-Sync-0713"
|
|
|
|
| 197 |
label="Select Model", value="Nanonets-OCR-s"
|
| 198 |
)
|
| 199 |
task_choice = gr.Dropdown(
|
|
|
|
| 50 |
ocr_prompt = "Perform precise OCR on the image. Extract all text content, maintaining the original structure, paragraphs, and tables as formatted markdown."
|
| 51 |
|
| 52 |
# --- Model Loading ---
|
| 53 |
+
MODEL_ID_M = "prithivMLmods/Camel-Doc-OCR-080125"
|
| 54 |
processor_m = AutoProcessor.from_pretrained(MODEL_ID_M, trust_remote_code=True)
|
| 55 |
model_m = Qwen2_5_VLForConditionalGeneration.from_pretrained(
|
| 56 |
MODEL_ID_M, trust_remote_code=True, torch_dtype=torch.float16
|
|
|
|
| 77 |
MODEL_ID_G, trust_remote_code=True, subfolder=SUBFOLDER, torch_dtype=torch.float16
|
| 78 |
).to(device).eval()
|
| 79 |
|
| 80 |
+
MODEL_ID_I = "ChatDOC/OCRFlux-3B"
|
| 81 |
+
processor_i = AutoProcessor.from_pretrained(MODEL_ID_I, trust_remote_code=True)
|
| 82 |
+
model_i = Qwen2_5_VLForConditionalGeneration.from_pretrained(
|
| 83 |
+
MODEL_ID_I, trust_remote_code=True, torch_dtype=torch.float16
|
| 84 |
+
).to(device).eval()
|
| 85 |
+
|
| 86 |
# --- Utility Functions ---
|
| 87 |
def layoutjson2md(layout_data: List[Dict]) -> str:
|
| 88 |
"""Converts the structured JSON from Layout Analysis into formatted Markdown."""
|
|
|
|
| 128 |
text_prompt = ocr_prompt if task_choice == "Content Extraction" else layout_prompt
|
| 129 |
|
| 130 |
# 2. Select model and processor
|
| 131 |
+
if model_name == "Camel-Doc-OCR-080125": processor, model = processor_m, model_m
|
| 132 |
elif model_name == "Megalodon-OCR-Sync-0713": processor, model = processor_t, model_t
|
| 133 |
elif model_name == "Nanonets-OCR-s": processor, model = processor_c, model_c
|
| 134 |
elif model_name == "MonkeyOCR-Recognition": processor, model = processor_g, model_g
|
| 135 |
+
elif model_name == "OCRFlux-3B": processor, model = processor_i, model_i
|
| 136 |
else:
|
| 137 |
yield "Invalid model selected.", "Invalid model selected.", None
|
| 138 |
return
|
|
|
|
| 197 |
# Left Column (Inputs)
|
| 198 |
with gr.Column(scale=1):
|
| 199 |
model_choice = gr.Dropdown(
|
| 200 |
+
choices=["Camel-Doc-OCR-080125",
|
| 201 |
"MonkeyOCR-Recognition",
|
| 202 |
"Nanonets-OCR-s",
|
| 203 |
+
"Megalodon-OCR-Sync-0713",
|
| 204 |
+
"OCRFlux-3B"],
|
| 205 |
label="Select Model", value="Nanonets-OCR-s"
|
| 206 |
)
|
| 207 |
task_choice = gr.Dropdown(
|