Spaces:

sunbal7
/

SehatAI

Sleeping

App Files Files Community

sunbal7 commited on Sep 28

Commit

d6f9447

verified ·

1 Parent(s): adbb6e6

Delete app.py

Browse files

Files changed (1) hide show

app.py +0 -309

app.py DELETED Viewed

@@ -1,309 +0,0 @@
-# app.py
-import os
-import io
-import json
-import tempfile
-import base64
-import requests
-from PIL import Image, ImageChops, ImageOps, ExifTags
-import numpy as np
-import streamlit as st
-import cv2
-import easyocr
-import imagehash
-st.set_page_config(page_title="DocVerify - Prototype", layout="wide")
-# --- Config / Env ---
-GROQ_API_KEY = os.environ.get("GROQ_API_KEY")  # REQUIRED
-GROQ_API_BASE = os.environ.get("GROQ_API_BASE", "https://api.groq.com/openai/v1")  # default pattern (OpenAI-compatible)
-GROQ_MODEL = os.environ.get("GROQ_MODEL", "gpt-4o-mini")  # change if your Groq model differs
-if not GROQ_API_KEY:
-    st.warning("Set the GROQ_API_KEY environment variable before running (see README).")
-# Initialize OCR
-@st.cache_resource
-def get_ocr_reader(lang_list=["en","ur"]):
-    # easyocr supports many languages; using english + urdu as default
-    try:
-        reader = easyocr.Reader(lang_list, gpu=False)
-    except Exception as e:
-        # fallback to english only
-        reader = easyocr.Reader(["en"], gpu=False)
-    return reader
-reader = get_ocr_reader()
-# ---------- Utility functions ----------
-def load_image(file):
-    image = Image.open(file).convert("RGB")
-    return image
-def pdf_to_images(file_bytes):
-    # lightweight: use pdf2image if available, else ask user to upload images
-    try:
-        from pdf2image import convert_from_bytes
-        images = convert_from_bytes(file_bytes)
-        # convert to RGB PIL images
-        return [img.convert("RGB") for img in images]
-    except Exception:
-        return []
-def image_to_cv2(img_pil):
-    return cv2.cvtColor(np.array(img_pil), cv2.COLOR_RGB2BGR)
-def compute_ela(img_pil, quality=90):
-    """
-    Error Level Analysis: save at lower quality and compute difference.
-    Returns an image (PIL) and a scalar anomaly score (mean difference).
-    """
-    temp = io.BytesIO()
-    img_pil.save(temp, format="JPEG", quality=quality)
-    temp.seek(0)
-    compressed = Image.open(temp).convert("RGB")
-    diff = ImageChops.difference(img_pil, compressed)
-    # amplify for visibility
-    extrema = diff.getextrema()
-    # numeric anomaly score
-    diff_np = np.array(diff).astype(np.float32)
-    score = float(diff_np.mean())
-    # return difference image and score
-    return diff, score
-def read_exif_info(img_pil):
-    try:
-        exif = img_pil._getexif()
-        if not exif:
-            return {}
-        human = {}
-        for tag, val in exif.items():
-            decoded = ExifTags.TAGS.get(tag, tag)
-            human[decoded] = val
-        return human
-    except Exception:
-        return {}
-def ocr_image(img_pil):
-    # returns list of results: [(bbox, text, confidence), ...]
-    try:
-        res = reader.readtext(np.array(img_pil))
-    except Exception as e:
-        # fallback: empty
-        res = []
-    extracted_text = "\n".join([r[1] for r in res])
-    return res, extracted_text
-def signature_similarity(img_sig_pil, img_ref_pil):
-    # compute perceptual hash difference (average_hash)
-    try:
-        h1 = imagehash.average_hash(img_sig_pil.convert("L").resize((300,100)))
-        h2 = imagehash.average_hash(img_ref_pil.convert("L").resize((300,100)))
-        dist = h1 - h2
-        # transform to similarity score in [0,1]
-        score = max(0.0, 1.0 - (dist / 20.0))
-        return float(score), int(dist)
-    except Exception:
-        return None, None
-def call_groq_llm(prompt_text: str, model=GROQ_MODEL, base_url=GROQ_API_BASE, api_key=GROQ_API_KEY):
-    """
-    Calls a Groq OpenAI-compatible endpoint. Payload is minimal: model + input.
-    Response parsing is tolerant of a few shapes.
-    """
-    if not api_key:
-        raise ValueError("GROQ_API_KEY not provided")
-    url = base_url.rstrip("/") + "/responses"
-    headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
-    payload = {"model": model, "input": prompt_text, "max_output_tokens": 512}
-    # If the Groq endpoint you run differs, adjust base_url/model.
-    r = requests.post(url, headers=headers, data=json.dumps(payload), timeout=60)
-    r.raise_for_status()
-    j = r.json()
-    # Try a few common return shapes
-    if "output_text" in j:
-        return j["output_text"]
-    # newer responses API: look into output -> [ { "content": [{"type":"output_text","text":"..."}]} ]
-    try:
-        out = j.get("output", [])
-        if out and isinstance(out, list):
-            c = out[0].get("content", [])
-            for item in c:
-                if item.get("type") == "output_text" and "text" in item:
-                    return item["text"]
-            # fallback: string-join text fields
-            texts = []
-            for item in c:
-                if "text" in item:
-                    texts.append(item["text"])
-            if texts:
-                return "\n".join(texts)
-    except Exception:
-        pass
-    # final fallback: return pretty json
-    return json.dumps(j, indent=2)
-# ---------- Streamlit UI ----------
-st.title("DocVerify — Prototype (OCR + ELA + Groq LLM)")
-with st.sidebar:
-    st.header("Upload options")
-    uploaded = st.file_uploader("Upload document (image or PDF)", type=["png","jpg","jpeg","pdf"], accept_multiple_files=False)
-    ref_sig = st.file_uploader("(Optional) Reference signature image for comparison", type=["png","jpg","jpeg"])
-    st.markdown("---")
-    st.write("Settings:")
-    st.slider("ELA quality (lower -> more difference shown)", 50, 98, 90, key="ela_q")
-    st.checkbox("Show raw OCR result", value=True, key="show_ocr")
-    st.checkbox("Run Groq LLM analysis (requires GROQ_API_KEY)", value=True, key="use_groq")
-    st.markdown("---")
-    st.info("This is a prototype. Do not rely on it as legal evidence. See README for details.")
-if not uploaded:
-    st.info("Upload a document image or PDF to begin.")
-    st.stop()
-# handle uploaded file
-file_bytes = uploaded.read()
-file_type = uploaded.type
-images = []
-if uploaded.type == "application/pdf" or uploaded.name.lower().endswith(".pdf"):
-    imgs = pdf_to_images(file_bytes)
-    if not imgs:
-        st.error("PDF processing requires pdf2image; if unavailable, upload images instead.")
-        st.stop()
-    images = imgs
-else:
-    images = [load_image(io.BytesIO(file_bytes))]
-# show first page
-page_idx = st.number_input("Page index", min_value=0, max_value=len(images)-1, value=0, step=1)
-img = images[page_idx]
-st.subheader("Document preview (page %d)" % page_idx)
-st.image(img, use_column_width=True)
-# EXIF
-exif = read_exif_info(img)
-if exif:
-    st.write("Detected metadata (EXIF):", exif)
-else:
-    st.write("No EXIF metadata detected.")
-# OCR
-with st.spinner("Running OCR..."):
-    ocr_results, extracted_text = ocr_image(img)
-if st.session_state.show_ocr:
-    st.subheader("OCR extracted text")
-    st.text_area("Extracted text (raw)", value=extracted_text, height=200)
-# ELA
-with st.spinner("Running ELA..."):
-    ela_img, ela_score = compute_ela(img, quality=st.session_state.ela_q)
-st.subheader("Error Level Analysis (ELA)")
-st.write(f"ELA mean diff score: {ela_score:.3f} (higher usually => more manipulated)")
-buf = io.BytesIO()
-ela_img.save(buf, format="PNG")
-st.image(buf.getvalue(), caption="ELA difference image — bright regions may indicate changes", use_column_width=True)
-# Signature similarity (if user provided)
-sig_score = None
-sig_dist = None
-if ref_sig:
-    ref_img = load_image(ref_sig)
-    # attempt to auto-crop signature region by heuristics: find largest dark connected component near bottom-right
-    # For prototype, allow user to crop manually by simple resize
-    st.subheader("Signature comparison (user-supplied reference)")
-    st.write("Reference signature (uploaded):")
-    st.image(ref_img, width=200)
-    # let user optionally crop region from document for comparison
-    st.write("Crop the signature region from the document preview for comparison.")
-    col1, col2 = st.columns(2)
-    with col1:
-        st.write("Manual signature crop (enter bounding box in pixels):")
-        x = st.number_input("x", min_value=0, max_value=img.width-1, value=int(img.width*0.6))
-        y = st.number_input("y", min_value=0, max_value=img.height-1, value=int(img.height*0.7))
-        w = st.number_input("w", min_value=10, max_value=img.width, value=int(img.width*0.35))
-        h = st.number_input("h", min_value=10, max_value=img.height, value=int(img.height*0.15))
-    with col2:
-        crop_btn = st.button("Crop & Compare")
-    if crop_btn:
-        x2 = min(img.width, x + w)
-        y2 = min(img.height, y + h)
-        doc_sig = img.crop((x, y, x2, y2))
-        st.image(doc_sig, caption="Cropped signature from document", width=300)
-        sig_score, sig_dist = signature_similarity(doc_sig, ref_img)
-        if sig_score is not None:
-            st.write(f"Signature similarity score: {sig_score:.3f} (higher = more similar). Hash distance: {sig_dist}")
-        else:
-            st.write("Could not compute signature similarity.")
-# Simple heuristics summary
-heuristics = []
-heuristics.append({"name":"ela_score","value":ela_score,"interpretation":"higher may indicate manipulated areas"})
-if exif:
-    heuristics.append({"name":"has_exif","value":True})
-else:
-    heuristics.append({"name":"has_exif","value":False})
-if sig_score is not None:
-    heuristics.append({"name":"signature_similarity","value":sig_score})
-st.subheader("Heuristic summary")
-st.json(heuristics)
-# Build evidence package
-evidence = {
-    "file_name": uploaded.name,
-    "page_index": page_idx,
-    "ocr_text_snippet": extracted_text[:2000],
-    "ocr_full_text": extracted_text,
-    "ela_score": ela_score,
-    "exif": exif,
-    "signature_similarity": sig_score,
-    "notes": []
-}
-# Add basic field extractions from OCR (naive searching for CNIC pattern)
-import re
-cnic_match = re.search(r"\d{5}-\d{7}-\d", extracted_text)
-if cnic_match:
-    evidence["detected_cnic"] = cnic_match.group(0)
-    evidence["notes"].append("Found CNIC-like pattern")
-else:
-    evidence["notes"].append("No CNIC-like pattern found")
-# Prepare prompt for LLM
-prompt = f"""
-You are a document verification assistant. I will give you a JSON 'evidence' object with results from OCR, ELA, EXIF, signature comparison, and heuristics.
-Produce:
-1) Short verdict (one sentence) with confidence (low/medium/high).
-2) Bullet list of concrete findings (2-6 bullets).
-3) Suggested next steps for verification (3-5 actionable things).
-4) Caution / legal note to show the user.
-Evidence JSON:
-{json.dumps(evidence, indent=2)}
-"""
-st.subheader("LLM Analysis / Report")
-if st.session_state.use_groq:
-    try:
-        with st.spinner("Calling Groq LLM for analysis..."):
-            llm_out = call_groq_llm(prompt)
-        st.text_area("LLM report", value=llm_out, height=320)
-    except Exception as e:
-        st.error(f"Error calling Groq LLM: {e}\nMake sure GROQ_API_KEY and GROQ_API_BASE are set and endpoint is reachable.")
-else:
-    st.info("Groq LLM analysis disabled. Enable 'Run Groq LLM analysis' in sidebar to call the model.")
-# Audit / download
-st.subheader("Export evidence")
-if st.button("Download evidence JSON"):
-    b = io.BytesIO()
-    b.write(json.dumps(evidence, indent=2).encode("utf-8"))
-    b.seek(0)
-    b64 = base64.b64encode(b.read()).decode()
-    href = f'<a href="data:application/json;base64,{b64}" download="evidence_{uploaded.name}.json">Download evidence JSON</a>'
-    st.markdown(href, unsafe_allow_html=True)
-st.markdown("---")
-st.markdown("**Notes:** This prototype provides *indications* — not legally certified results. For high-stakes verification, involve certified forensic/document examiners and official government APIs.")