ai-building-blocks / image_to_text.py
LiKenun's picture
Add image captioning sample
d56b9d9
raw
history blame
833 Bytes
import gc
from os import getenv
from PIL.Image import Image
from transformers import AutoProcessor, BlipForConditionalGeneration
from utils import get_pytorch_device, spaces_gpu
@spaces_gpu
def image_to_text(image: Image) -> list[str]:
image_to_text_model_id = getenv("IMAGE_TO_TEXT_MODEL")
pytorch_device = get_pytorch_device()
processor = AutoProcessor.from_pretrained(image_to_text_model_id)
model = BlipForConditionalGeneration.from_pretrained(image_to_text_model_id).to(pytorch_device)
inputs = processor(images=image, return_tensors="pt").to(pytorch_device)
generated_ids = model.generate(pixel_values=inputs.pixel_values, num_beams=3, max_length=20, min_length=5)
results = processor.batch_decode(generated_ids, skip_special_tokens=True)
del model, inputs
gc.collect()
return results