| import numpy as np | |
| import torch | |
| import torchvision.transforms as T | |
| from decord import VideoReader, cpu | |
| from PIL import Image | |
| from torchvision.transforms.functional import InterpolationMode | |
| from transformers import AutoModel, AutoTokenizer | |
| IMAGENET_MEAN = (0.485, 0.456, 0.406) | |
| IMAGENET_STD = (0.229, 0.224, 0.225) | |
| def build_transform(input_size): | |
| MEAN, STD = IMAGENET_MEAN, IMAGENET_STD | |
| transform = T.Compose([ | |
| T.Lambda(lambda img: img.convert('RGB') if img.mode != 'RGB' else img), | |
| T.Resize((input_size, input_size), interpolation=InterpolationMode.BICUBIC), | |
| T.ToTensor(), | |
| T.Normalize(mean=MEAN, std=STD) | |
| ]) | |
| return transform | |
| def load_image(image_file, input_size=800, max_num=12): | |
| image = Image.open(image_file).convert('RGB') | |
| transform = build_transform(input_size=input_size) | |
| pixel_values = [transform(image) for image in images] | |
| pixel_values = torch.stack(pixel_values) | |
| return pixel_values | |
| def main(image_path): | |
| path = "OpenGVLab/InternVL2_5-4B" | |
| model = AutoModel.from_pretrained( | |
| path, | |
| torch_dtype=torch.bfloat16, | |
| load_in_8bit=True, | |
| low_cpu_mem_usage=True, | |
| use_flash_attn=True, | |
| trust_remote_code=True).eval() | |
| tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False) | |
| pixel_values = load_image(image_path, max_num=12).to(torch.bfloat16).cuda() | |
| generation_config = dict(max_new_tokens=1024, do_sample=True) | |
| question = """<image>\n**Instruction:** | |
| Analyze the image to extract values for the specified keys. Use the detailed descriptions below to determine the correct value for each key. Handle missing or ambiguous data as instructed. | |
| --- | |
| ### Keys and Descriptions | |
| 1. **`surat_tanda_nomor_kendaraan_bermotor`** | |
| - **Extract**: The value of the field labeled as "Surat Tanda Nomor Kendaraan Bermotor" and this is titel. | |
| - **If the Field is Absent**: `"null"` | |
| - **If the Field is Present but No Value is Provided**: `"empty"` | |
| 2. **`tempat_tanggal`** | |
| - **Extract**: The location and date from the top right corner of the document. | |
| - **Note**: This field does not have a title such as "Tempat - Tanggal." | |
| - **Format**: `"CITY, DD MMM YYYY"` (e.g., `"JAKARTA, 07 DES 2018"`). | |
| - **If the Field is Absent**: `"null"` | |
| - **If the Field is Present but No Value is Provided**: `"empty"` | |
| 3. **`no`** | |
| - **Extract**: The value in the "NO" field. | |
| - **If the Field is Absent**: `"null"` | |
| - **If the Field is Present but No Value is Provided**: `"empty"` | |
| 4. **`nomor_registrasi`** | |
| - **Extract**: The "NOMOR REGISTRASI" field. | |
| - **If the Field is Absent**: `"null"` | |
| - **If the Field is Present but No Value is Provided**: `"empty"` | |
| 5. **`nama_pemilik`** | |
| - **Extract**: The "NAMA PEMILIK" field. | |
| - **If the Field is Absent**: `"null"` | |
| - **If the Field is Present but No Value is Provided**: `"empty"` | |
| 6. **`alamat`** | |
| - **Extract**: The "ALAMAT" field. | |
| - **If the Field is Absent**: `"null"` | |
| - **If the Field is Present but No Value is Provided**: `"empty"` | |
| 7. **`merk`** | |
| - **Extract**: The "MERK" field. | |
| - **If the Field is Absent**: `"null"` | |
| - **If the Field is Present but No Value is Provided**: `"empty"` | |
| 8. **`type`** | |
| - **Extract**: The "TYPE" field. | |
| - **If the Field is Absent**: `"null"` | |
| - **If the Field is Present but No Value is Provided**: `"empty"` | |
| 9. **`jenis`** | |
| - **Extract**: The "JENIS" field. | |
| - **If the Field is Absent**: `"null"` | |
| - **If the Field is Present but No Value is Provided**: `"empty"` | |
| 10. **`model`** | |
| - **Extract**: The "MODEL" field. | |
| - **If the Field is Absent**: `"null"` | |
| - **If the Field is Present but No Value is Provided**: `"empty"` | |
| 11. **`tahun_pembuatan`** | |
| - **Extract**: The "TAHUN PEMBUATAN" field. | |
| - **If the Field is Absent**: `"null"` | |
| - **If the Field is Present but No Value is Provided**: `"empty"` | |
| 12. **`isi_silinder_daya_listrik`** | |
| - **Extract**: The "ISI SILINDER / DAYA LISTRIK" field. | |
| - **If the Field is Absent**: `"null"` | |
| - **If the Field is Present but No Value is Provided**: `"empty"` | |
| 13. **`nomor_rangka`** | |
| - **Extract**: The "NOMOR RANGKA" field. | |
| - **If the Field is Absent**: `"null"` | |
| - **If the Field is Present but No Value is Provided**: `"empty"` | |
| 14. **`nomor_mesin`** | |
| - **Extract**: The "NOMOR MESIN" field. | |
| - **If the Field is Absent**: `"null"` | |
| - **If the Field is Present but No Value is Provided**: `"empty"` | |
| 15. **`nik_tdp_nie_kitas_kitap`** | |
| - **Extract**: The "NIK/TDP/NIE/KITAS/KITAP" field. | |
| - **If the Field is Absent**: `"null"` | |
| - **If the Field is Present but No Value is Provided**: `"empty"` | |
| 16. **`warna`** | |
| - **Extract**: The "WARNA" field. | |
| - **If the Field is Absent**: `"null"` | |
| - **If the Field is Present but No Value is Provided**: `"empty"` | |
| 17. **`bahan_bakar`** | |
| - **Extract**: The "BAHAN BAKAR" field. | |
| - **If the Field is Absent**: `"null"` | |
| - **If the Field is Present but No Value is Provided**: `"empty"` | |
| 18. **`warna_tnkb`** | |
| - **Extract**: The "WARNA TNKB" field. | |
| - **If the Field is Absent**: `"null"` | |
| - **If the Field is Present but No Value is Provided**: `"empty"` | |
| 19. **`tahun_registrasi`** | |
| - **Extract**: The "TAHUN REGISTRASI" field. | |
| - **If the Field is Absent**: `"null"` | |
| - **If the Field is Present but No Value is Provided**: `"empty"` | |
| 20. **`nomor_bpkb`** | |
| - **Extract**: The "NOMOR BPKB" field. | |
| - **If the Field is Absent**: `"null"` | |
| - **If the Field is Present but No Value is Provided**: `"empty"` | |
| 21. **`kode_lokasi`** | |
| - **Extract**: The "KODE LOKASI" field. | |
| - **If the Field is Absent**: `"null"` | |
| - **If the Field is Present but No Value is Provided**: `"empty"` | |
| 22. **`no_urut_pendaftaran`** | |
| - **Extract**: The "NO URUT PENDAFTARAN" field. | |
| - **If the Field is Absent**: `"null"` | |
| - **If the Field is Present but No Value is Provided**: `"empty"` | |
| 23. **`berlaku_sampai`** | |
| - **Extract**: The "BERLAKU SAMPAI" field. | |
| - **If the Field is Absent**: `"null"` | |
| - **If the Field is Present but No Value is Provided**: `"empty"` | |
| 24. **`qr_code`** | |
| - **Extract**: The value encoded in the QR code, if present. | |
| - **If No QR Code is Found**: `"null"` | |
| - **If a QR Code is Present but Contains No Data**: `"empty"` | |
| --- | |
| ### Output Format | |
| ```json | |
| { | |
| "surat_tanda_nomor_kendaraan_bermotor": "<value> OR empty OR null", | |
| "tempat_tanggal": "<value> OR empty OR null", | |
| "no": "<value> OR empty OR null", | |
| "nomor_registrasi": "<value> OR empty OR null", | |
| "nama_pemilik": "<value> OR empty OR null", | |
| "alamat": "<value> OR empty OR null", | |
| "merk": "<value> OR empty OR null", | |
| "type": "<value> OR empty OR null", | |
| "jenis": "<value> OR empty OR null", | |
| "model": "<value> OR empty OR null", | |
| "tahun_pembuatan": "<value> OR empty OR null", | |
| "isi_silinder_daya_listrik": "<value> OR empty OR null", | |
| "nomor_rangka": "<value> OR empty OR null", | |
| "nomor_mesin": "<value> OR empty OR null", | |
| "nik_tdp_nie_kitas_kitap": "<value> OR empty OR null", | |
| "warna": "<value> OR empty OR null", | |
| "bahan_bakar": "<value> OR empty OR null", | |
| "warna_tnkb": "<value> OR empty OR null", | |
| "tahun_registrasi": "<value> OR empty OR null", | |
| "nomor_bpkb": "<value> OR empty OR null", | |
| "kode_lokasi": "<value> OR empty OR null", | |
| "no_urut_pendaftaran": "<value> OR empty OR null", | |
| "berlaku_sampai": "<value> OR empty OR null" | |
| "qr_code" : "<value> OR empty OR null" | |
| } | |
| ### Reasoning Process | |
| For each key, explain your reasoning: | |
| Indicate whether the field was present. | |
| Justify the extracted value or the use of "null" or "empty" as per the conditions. | |
| Return Output: | |
| Generate a JSON object: | |
| { | |
| "reasoning": "reasoning for each key", | |
| "output JSON": "key-value pairs" | |
| } | |
| --- | |
| """ | |
| response = model.chat(tokenizer, pixel_values, question, generation_config) | |
| return (f'User: {question}\nAssistant: {response}') | |