Spaces:

LiKenun
/

ai-building-blocks

Running on Zero

ai-building-blocks / image_to_text.py

Add image captioning sample

d56b9d9 about 1 month ago

833 Bytes

	import gc
	from os import getenv
	from PIL.Image import Image
	from transformers import AutoProcessor, BlipForConditionalGeneration
	from utils import get_pytorch_device, spaces_gpu


	@spaces_gpu
	def image_to_text(image: Image) -> list[str]:
	image_to_text_model_id = getenv("IMAGE_TO_TEXT_MODEL")
	pytorch_device = get_pytorch_device()
	processor = AutoProcessor.from_pretrained(image_to_text_model_id)
	model = BlipForConditionalGeneration.from_pretrained(image_to_text_model_id).to(pytorch_device)
	inputs = processor(images=image, return_tensors="pt").to(pytorch_device)
	generated_ids = model.generate(pixel_values=inputs.pixel_values, num_beams=3, max_length=20, min_length=5)
	results = processor.batch_decode(generated_ids, skip_special_tokens=True)
	del model, inputs
	gc.collect()
	return results