# app.py import os import io import json import tempfile import base64 import requests from PIL import Image, ImageChops, ImageOps, ExifTags import numpy as np import streamlit as st import cv2 import easyocr import imagehash st.set_page_config(page_title="DocVerify - Prototype", layout="wide") # --- Config / Env --- GROQ_API_KEY = os.environ.get("GROQ_API_KEY") # REQUIRED GROQ_API_BASE = os.environ.get("GROQ_API_BASE", "https://api.groq.com/openai/v1") # default pattern (OpenAI-compatible) GROQ_MODEL = os.environ.get("GROQ_MODEL", "gpt-4o-mini") # change if your Groq model differs if not GROQ_API_KEY: st.warning("Set the GROQ_API_KEY environment variable before running (see README).") # Initialize OCR @st.cache_resource def get_ocr_reader(lang_list=["en","ur"]): # easyocr supports many languages; using english + urdu as default try: reader = easyocr.Reader(lang_list, gpu=False) except Exception as e: # fallback to english only reader = easyocr.Reader(["en"], gpu=False) return reader reader = get_ocr_reader() # ---------- Utility functions ---------- def load_image(file): image = Image.open(file).convert("RGB") return image def pdf_to_images(file_bytes): # lightweight: use pdf2image if available, else ask user to upload images try: from pdf2image import convert_from_bytes images = convert_from_bytes(file_bytes) # convert to RGB PIL images return [img.convert("RGB") for img in images] except Exception: return [] def image_to_cv2(img_pil): return cv2.cvtColor(np.array(img_pil), cv2.COLOR_RGB2BGR) def compute_ela(img_pil, quality=90): """ Error Level Analysis: save at lower quality and compute difference. Returns an image (PIL) and a scalar anomaly score (mean difference). """ temp = io.BytesIO() img_pil.save(temp, format="JPEG", quality=quality) temp.seek(0) compressed = Image.open(temp).convert("RGB") diff = ImageChops.difference(img_pil, compressed) # amplify for visibility extrema = diff.getextrema() # numeric anomaly score diff_np = np.array(diff).astype(np.float32) score = float(diff_np.mean()) # return difference image and score return diff, score def read_exif_info(img_pil): try: exif = img_pil._getexif() if not exif: return {} human = {} for tag, val in exif.items(): decoded = ExifTags.TAGS.get(tag, tag) human[decoded] = val return human except Exception: return {} def ocr_image(img_pil): # returns list of results: [(bbox, text, confidence), ...] try: res = reader.readtext(np.array(img_pil)) except Exception as e: # fallback: empty res = [] extracted_text = "\n".join([r[1] for r in res]) return res, extracted_text def signature_similarity(img_sig_pil, img_ref_pil): # compute perceptual hash difference (average_hash) try: h1 = imagehash.average_hash(img_sig_pil.convert("L").resize((300,100))) h2 = imagehash.average_hash(img_ref_pil.convert("L").resize((300,100))) dist = h1 - h2 # transform to similarity score in [0,1] score = max(0.0, 1.0 - (dist / 20.0)) return float(score), int(dist) except Exception: return None, None def call_groq_llm(prompt_text: str, model=GROQ_MODEL, base_url=GROQ_API_BASE, api_key=GROQ_API_KEY): """ Calls a Groq OpenAI-compatible endpoint. Payload is minimal: model + input. Response parsing is tolerant of a few shapes. """ if not api_key: raise ValueError("GROQ_API_KEY not provided") url = base_url.rstrip("/") + "/responses" headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"} payload = {"model": model, "input": prompt_text, "max_output_tokens": 512} # If the Groq endpoint you run differs, adjust base_url/model. r = requests.post(url, headers=headers, data=json.dumps(payload), timeout=60) r.raise_for_status() j = r.json() # Try a few common return shapes if "output_text" in j: return j["output_text"] # newer responses API: look into output -> [ { "content": [{"type":"output_text","text":"..."}]} ] try: out = j.get("output", []) if out and isinstance(out, list): c = out[0].get("content", []) for item in c: if item.get("type") == "output_text" and "text" in item: return item["text"] # fallback: string-join text fields texts = [] for item in c: if "text" in item: texts.append(item["text"]) if texts: return "\n".join(texts) except Exception: pass # final fallback: return pretty json return json.dumps(j, indent=2) # ---------- Streamlit UI ---------- st.title("DocVerify — Prototype (OCR + ELA + Groq LLM)") with st.sidebar: st.header("Upload options") uploaded = st.file_uploader("Upload document (image or PDF)", type=["png","jpg","jpeg","pdf"], accept_multiple_files=False) ref_sig = st.file_uploader("(Optional) Reference signature image for comparison", type=["png","jpg","jpeg"]) st.markdown("---") st.write("Settings:") st.slider("ELA quality (lower -> more difference shown)", 50, 98, 90, key="ela_q") st.checkbox("Show raw OCR result", value=True, key="show_ocr") st.checkbox("Run Groq LLM analysis (requires GROQ_API_KEY)", value=True, key="use_groq") st.markdown("---") st.info("This is a prototype. Do not rely on it as legal evidence. See README for details.") if not uploaded: st.info("Upload a document image or PDF to begin.") st.stop() # handle uploaded file file_bytes = uploaded.read() file_type = uploaded.type images = [] if uploaded.type == "application/pdf" or uploaded.name.lower().endswith(".pdf"): imgs = pdf_to_images(file_bytes) if not imgs: st.error("PDF processing requires pdf2image; if unavailable, upload images instead.") st.stop() images = imgs else: images = [load_image(io.BytesIO(file_bytes))] # show first page page_idx = st.number_input("Page index", min_value=0, max_value=len(images)-1, value=0, step=1) img = images[page_idx] st.subheader("Document preview (page %d)" % page_idx) st.image(img, use_column_width=True) # EXIF exif = read_exif_info(img) if exif: st.write("Detected metadata (EXIF):", exif) else: st.write("No EXIF metadata detected.") # OCR with st.spinner("Running OCR..."): ocr_results, extracted_text = ocr_image(img) if st.session_state.show_ocr: st.subheader("OCR extracted text") st.text_area("Extracted text (raw)", value=extracted_text, height=200) # ELA with st.spinner("Running ELA..."): ela_img, ela_score = compute_ela(img, quality=st.session_state.ela_q) st.subheader("Error Level Analysis (ELA)") st.write(f"ELA mean diff score: {ela_score:.3f} (higher usually => more manipulated)") buf = io.BytesIO() ela_img.save(buf, format="PNG") st.image(buf.getvalue(), caption="ELA difference image — bright regions may indicate changes", use_column_width=True) # Signature similarity (if user provided) sig_score = None sig_dist = None if ref_sig: ref_img = load_image(ref_sig) # attempt to auto-crop signature region by heuristics: find largest dark connected component near bottom-right # For prototype, allow user to crop manually by simple resize st.subheader("Signature comparison (user-supplied reference)") st.write("Reference signature (uploaded):") st.image(ref_img, width=200) # let user optionally crop region from document for comparison st.write("Crop the signature region from the document preview for comparison.") col1, col2 = st.columns(2) with col1: st.write("Manual signature crop (enter bounding box in pixels):") x = st.number_input("x", min_value=0, max_value=img.width-1, value=int(img.width*0.6)) y = st.number_input("y", min_value=0, max_value=img.height-1, value=int(img.height*0.7)) w = st.number_input("w", min_value=10, max_value=img.width, value=int(img.width*0.35)) h = st.number_input("h", min_value=10, max_value=img.height, value=int(img.height*0.15)) with col2: crop_btn = st.button("Crop & Compare") if crop_btn: x2 = min(img.width, x + w) y2 = min(img.height, y + h) doc_sig = img.crop((x, y, x2, y2)) st.image(doc_sig, caption="Cropped signature from document", width=300) sig_score, sig_dist = signature_similarity(doc_sig, ref_img) if sig_score is not None: st.write(f"Signature similarity score: {sig_score:.3f} (higher = more similar). Hash distance: {sig_dist}") else: st.write("Could not compute signature similarity.") # Simple heuristics summary heuristics = [] heuristics.append({"name":"ela_score","value":ela_score,"interpretation":"higher may indicate manipulated areas"}) if exif: heuristics.append({"name":"has_exif","value":True}) else: heuristics.append({"name":"has_exif","value":False}) if sig_score is not None: heuristics.append({"name":"signature_similarity","value":sig_score}) st.subheader("Heuristic summary") st.json(heuristics) # Build evidence package evidence = { "file_name": uploaded.name, "page_index": page_idx, "ocr_text_snippet": extracted_text[:2000], "ocr_full_text": extracted_text, "ela_score": ela_score, "exif": exif, "signature_similarity": sig_score, "notes": [] } # Add basic field extractions from OCR (naive searching for CNIC pattern) import re cnic_match = re.search(r"\d{5}-\d{7}-\d", extracted_text) if cnic_match: evidence["detected_cnic"] = cnic_match.group(0) evidence["notes"].append("Found CNIC-like pattern") else: evidence["notes"].append("No CNIC-like pattern found") # Prepare prompt for LLM prompt = f""" You are a document verification assistant. I will give you a JSON 'evidence' object with results from OCR, ELA, EXIF, signature comparison, and heuristics. Produce: 1) Short verdict (one sentence) with confidence (low/medium/high). 2) Bullet list of concrete findings (2-6 bullets). 3) Suggested next steps for verification (3-5 actionable things). 4) Caution / legal note to show the user. Evidence JSON: {json.dumps(evidence, indent=2)} """ st.subheader("LLM Analysis / Report") if st.session_state.use_groq: try: with st.spinner("Calling Groq LLM for analysis..."): llm_out = call_groq_llm(prompt) st.text_area("LLM report", value=llm_out, height=320) except Exception as e: st.error(f"Error calling Groq LLM: {e}\nMake sure GROQ_API_KEY and GROQ_API_BASE are set and endpoint is reachable.") else: st.info("Groq LLM analysis disabled. Enable 'Run Groq LLM analysis' in sidebar to call the model.") # Audit / download st.subheader("Export evidence") if st.button("Download evidence JSON"): b = io.BytesIO() b.write(json.dumps(evidence, indent=2).encode("utf-8")) b.seek(0) b64 = base64.b64encode(b.read()).decode() href = f'Download evidence JSON' st.markdown(href, unsafe_allow_html=True) st.markdown("---") st.markdown("**Notes:** This prototype provides *indications* — not legally certified results. For high-stakes verification, involve certified forensic/document examiners and official government APIs.")