Update app.py
Browse files
app.py
CHANGED
|
@@ -1,15 +1,13 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
from transformers import AutoProcessor, AutoModelForCausalLM
|
| 3 |
import spaces
|
|
|
|
| 4 |
import io
|
| 5 |
-
import base64 # Adicionando a biblioteca base64 para decodificação
|
| 6 |
from PIL import Image
|
|
|
|
| 7 |
import subprocess
|
| 8 |
-
|
| 9 |
-
# Instalando a dependência flash-attn se necessário
|
| 10 |
subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
|
| 11 |
-
|
| 12 |
-
# Carregando o modelo e o processador
|
| 13 |
model_id = 'J-LAB/Florence-vl3'
|
| 14 |
model = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True).to("cuda").eval()
|
| 15 |
processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
|
|
@@ -35,35 +33,28 @@ def run_example(task_prompt, image):
|
|
| 35 |
)
|
| 36 |
return parsed_answer
|
| 37 |
|
| 38 |
-
# Função para processar imagens, agora suportando Base64
|
| 39 |
def process_image(image, task_prompt):
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
image = Image.open(io.BytesIO(image))
|
| 48 |
-
else:
|
| 49 |
-
image = Image.fromarray(image) # Convertendo um array NumPy para imagem PIL, se aplicável
|
| 50 |
-
|
| 51 |
-
# Mapeando os prompts de tarefas
|
| 52 |
if task_prompt == 'Product Caption':
|
| 53 |
task_prompt = '<MORE_DETAILED_CAPTION>'
|
| 54 |
elif task_prompt == 'OCR':
|
| 55 |
task_prompt = '<OCR>'
|
| 56 |
|
| 57 |
-
# Chamando o exemplo com a imagem processada e o prompt da tarefa
|
| 58 |
results = run_example(task_prompt, image)
|
| 59 |
|
| 60 |
-
#
|
| 61 |
if results and task_prompt in results:
|
| 62 |
output_text = results[task_prompt]
|
| 63 |
else:
|
| 64 |
output_text = ""
|
| 65 |
|
| 66 |
-
#
|
| 67 |
output_text = output_text.replace("\n\n", "<br><br>").replace("\n", "<br>")
|
| 68 |
|
| 69 |
return output_text
|
|
@@ -92,47 +83,41 @@ document.querySelector('button').addEventListener('click', function() {
|
|
| 92 |
});
|
| 93 |
"""
|
| 94 |
|
| 95 |
-
single_task_list =
|
|
|
|
|
|
|
| 96 |
|
| 97 |
with gr.Blocks(css=css) as demo:
|
| 98 |
gr.Markdown(DESCRIPTION)
|
| 99 |
with gr.Tab(label="Product Image Select"):
|
| 100 |
with gr.Row():
|
| 101 |
with gr.Column():
|
| 102 |
-
input_img = gr.Image(label="Input Picture"
|
| 103 |
task_prompt = gr.Dropdown(choices=single_task_list, label="Task Prompt", value="Product Caption")
|
| 104 |
submit_btn = gr.Button(value="Submit")
|
| 105 |
with gr.Column():
|
| 106 |
output_text = gr.HTML(label="Output Text", elem_id="output")
|
| 107 |
|
| 108 |
-
gr.Markdown("""
|
| 109 |
## How to use via API
|
| 110 |
To use this model via API, you can follow the example code below:
|
| 111 |
|
| 112 |
-
```python
|
| 113 |
-
import base64
|
| 114 |
-
from PIL import Image
|
| 115 |
-
import io
|
| 116 |
-
import requests
|
| 117 |
-
|
| 118 |
-
# Converting image to base64
|
| 119 |
-
image_path = 'path_to_image.png'
|
| 120 |
-
with open(image_path, 'rb') as image_file:
|
| 121 |
-
image_base64 = base64.b64encode(image_file.read()).decode('utf-8')
|
| 122 |
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
|
|
|
|
|
|
| 132 |
""")
|
| 133 |
|
| 134 |
submit_btn.click(process_image, [input_img, task_prompt], [output_text])
|
| 135 |
|
| 136 |
demo.load(lambda: None, inputs=None, outputs=None, js=js)
|
| 137 |
|
| 138 |
-
demo.launch(debug=True)
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
from transformers import AutoProcessor, AutoModelForCausalLM
|
| 3 |
import spaces
|
| 4 |
+
|
| 5 |
import io
|
|
|
|
| 6 |
from PIL import Image
|
| 7 |
+
import base64 # Para decodificar imagens Base64
|
| 8 |
import subprocess
|
|
|
|
|
|
|
| 9 |
subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
|
| 10 |
+
#
|
|
|
|
| 11 |
model_id = 'J-LAB/Florence-vl3'
|
| 12 |
model = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True).to("cuda").eval()
|
| 13 |
processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
|
|
|
|
| 33 |
)
|
| 34 |
return parsed_answer
|
| 35 |
|
|
|
|
| 36 |
def process_image(image, task_prompt):
|
| 37 |
+
if isinstance(image, str):
|
| 38 |
+
if image.startswith('data:image/png;base64,'):
|
| 39 |
+
# Decodifica a imagem Base64
|
| 40 |
+
image_data = base64.b64decode(image.split(',')[1])
|
| 41 |
+
image = Image.open(io.BytesIO(image_data))
|
| 42 |
+
|
| 43 |
+
image = Image.fromarray(image) # Convert NumPy array to PIL Image
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
if task_prompt == 'Product Caption':
|
| 45 |
task_prompt = '<MORE_DETAILED_CAPTION>'
|
| 46 |
elif task_prompt == 'OCR':
|
| 47 |
task_prompt = '<OCR>'
|
| 48 |
|
|
|
|
| 49 |
results = run_example(task_prompt, image)
|
| 50 |
|
| 51 |
+
# Remove the key and get the text value
|
| 52 |
if results and task_prompt in results:
|
| 53 |
output_text = results[task_prompt]
|
| 54 |
else:
|
| 55 |
output_text = ""
|
| 56 |
|
| 57 |
+
# Convert newline characters to HTML line breaks
|
| 58 |
output_text = output_text.replace("\n\n", "<br><br>").replace("\n", "<br>")
|
| 59 |
|
| 60 |
return output_text
|
|
|
|
| 83 |
});
|
| 84 |
"""
|
| 85 |
|
| 86 |
+
single_task_list =[
|
| 87 |
+
'Product Caption', 'OCR'
|
| 88 |
+
]
|
| 89 |
|
| 90 |
with gr.Blocks(css=css) as demo:
|
| 91 |
gr.Markdown(DESCRIPTION)
|
| 92 |
with gr.Tab(label="Product Image Select"):
|
| 93 |
with gr.Row():
|
| 94 |
with gr.Column():
|
| 95 |
+
input_img = gr.Image(label="Input Picture")
|
| 96 |
task_prompt = gr.Dropdown(choices=single_task_list, label="Task Prompt", value="Product Caption")
|
| 97 |
submit_btn = gr.Button(value="Submit")
|
| 98 |
with gr.Column():
|
| 99 |
output_text = gr.HTML(label="Output Text", elem_id="output")
|
| 100 |
|
| 101 |
+
gr.Markdown("""
|
| 102 |
## How to use via API
|
| 103 |
To use this model via API, you can follow the example code below:
|
| 104 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 105 |
|
| 106 |
+
python
|
| 107 |
+
!pip install gradio_client
|
| 108 |
+
from gradio_client import Client, handle_file
|
| 109 |
+
|
| 110 |
+
client = Client("J-LAB/Fluxi-IA")
|
| 111 |
+
result = client.predict(
|
| 112 |
+
image=handle_file('https://raw.githubusercontent.com/gradio-app/gradio/main/test/test_files/bus.png'),
|
| 113 |
+
api_name="/process_image"
|
| 114 |
+
)
|
| 115 |
+
print(result)
|
| 116 |
+
|
| 117 |
""")
|
| 118 |
|
| 119 |
submit_btn.click(process_image, [input_img, task_prompt], [output_text])
|
| 120 |
|
| 121 |
demo.load(lambda: None, inputs=None, outputs=None, js=js)
|
| 122 |
|
| 123 |
+
demo.launch(debug=True)
|