Spaces:

Sp2503
/

BERT_2.0

Sleeping

App Files Files Community

Sai809701 commited on Oct 29, 2025

Commit

5d68747

1 Parent(s): ad25530

first commit

Browse files

Files changed (9) hide show

BERT_Model_10000/config.json +130 -0
BERT_Model_10000/special_tokens_map.json +7 -0
BERT_Model_10000/tokenizer.json +0 -0
BERT_Model_10000/tokenizer_config.json +56 -0
BERT_Model_10000/vocab.txt +0 -0
Dockerfile +24 -0
app.py +110 -0
requirements.txt +5 -0
womens_legal_rights_india_10000.json +0 -0

BERT_Model_10000/config.json ADDED Viewed

	@@ -0,0 +1,130 @@

+{
+  "architectures": [
+    "BertForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "dtype": "float32",
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "Adoption",
+    "1": "Alimony",
+    "2": "Child Custody",
+    "3": "Child Support",
+    "4": "Consumer Rights",
+    "5": "Contracts & Consent",
+    "6": "Court Orders",
+    "7": "Criminal Law",
+    "8": "Cyber Harassment",
+    "9": "Domestic Violence",
+    "10": "Dowry Harassment",
+    "11": "Education Rights",
+    "12": "Employment Contracts",
+    "13": "Equal Pay",
+    "14": "Harassment Online",
+    "15": "Healthcare Rights",
+    "16": "Housing Rights",
+    "17": "Human Trafficking",
+    "18": "Immigrant Rights",
+    "19": "Insurance",
+    "20": "Legal Aid",
+    "21": "Legal Procedures",
+    "22": "Maintenance",
+    "23": "Marriage Rights",
+    "24": "Maternity Leave",
+    "25": "Medical Negligence",
+    "26": "Mental Health",
+    "27": "Mobile Privacy",
+    "28": "NGO Support",
+    "29": "POCSO",
+    "30": "POSH",
+    "31": "Passport Issues",
+    "32": "Pension Rights",
+    "33": "Police Complaint",
+    "34": "Property Rights",
+    "35": "Registration of Marriage",
+    "36": "Reproductive Rights",
+    "37": "Reservation",
+    "38": "Right to Information",
+    "39": "Sexual Assault",
+    "40": "Sexual and Reproductive Health",
+    "41": "Special Schemes",
+    "42": "Stalking",
+    "43": "Succession Law",
+    "44": "Surrogacy",
+    "45": "Trafficking",
+    "46": "Travel Consent",
+    "47": "Witness Protection",
+    "48": "Workplace Benefits",
+    "49": "Workplace Harassment"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "label2id": {
+    "Adoption": 0,
+    "Alimony": 1,
+    "Child Custody": 2,
+    "Child Support": 3,
+    "Consumer Rights": 4,
+    "Contracts & Consent": 5,
+    "Court Orders": 6,
+    "Criminal Law": 7,
+    "Cyber Harassment": 8,
+    "Domestic Violence": 9,
+    "Dowry Harassment": 10,
+    "Education Rights": 11,
+    "Employment Contracts": 12,
+    "Equal Pay": 13,
+    "Harassment Online": 14,
+    "Healthcare Rights": 15,
+    "Housing Rights": 16,
+    "Human Trafficking": 17,
+    "Immigrant Rights": 18,
+    "Insurance": 19,
+    "Legal Aid": 20,
+    "Legal Procedures": 21,
+    "Maintenance": 22,
+    "Marriage Rights": 23,
+    "Maternity Leave": 24,
+    "Medical Negligence": 25,
+    "Mental Health": 26,
+    "Mobile Privacy": 27,
+    "NGO Support": 28,
+    "POCSO": 29,
+    "POSH": 30,
+    "Passport Issues": 31,
+    "Pension Rights": 32,
+    "Police Complaint": 33,
+    "Property Rights": 34,
+    "Registration of Marriage": 35,
+    "Reproductive Rights": 36,
+    "Reservation": 37,
+    "Right to Information": 38,
+    "Sexual Assault": 39,
+    "Sexual and Reproductive Health": 40,
+    "Special Schemes": 41,
+    "Stalking": 42,
+    "Succession Law": 43,
+    "Surrogacy": 44,
+    "Trafficking": 45,
+    "Travel Consent": 46,
+    "Witness Protection": 47,
+    "Workplace Benefits": 48,
+    "Workplace Harassment": 49
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "problem_type": "single_label_classification",
+  "transformers_version": "4.57.1",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 30522
+}

BERT_Model_10000/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

BERT_Model_10000/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

BERT_Model_10000/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,56 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": false,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "extra_special_tokens": {},
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

BERT_Model_10000/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

Dockerfile ADDED Viewed

	@@ -0,0 +1,24 @@

+# Start from a slim Python 3.10 base image
+FROM python:3.10-slim
+# Set the working directory inside the container
+WORKDIR /app
+# Copy the requirements file first to leverage Docker cache
+COPY requirements.txt .
+# Install the Python dependencies
+# --no-cache-dir saves space
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy the rest of the application files
+# This includes app.py AND your model folder (bert-indian-legal-intent)
+COPY . .
+# Hugging Face Spaces *requires* the app to run on port 7860
+EXPOSE 7860
+# Command to run the FastAPI server using uvicorn
+# 0.0.0.0 makes it accessible outside the container
+# app:app refers to the 'app' object in the 'app.py' file
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

app.py ADDED Viewed

	@@ -0,0 +1,110 @@

+import torch
+import json
+from fastapi import FastAPI
+from pydantic import BaseModel
+from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
+# --- 1. Load Model and Tokenizer ---
+# Define the path to your trained model
+MODEL_PATH = "./BERT_Model_10000"
+# Define the path to your knowledge base
+KNOWLEDGE_BASE_PATH = "womens_legal_rights_india_10000.json"
+print("Loading tokenizer...")
+tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
+print("Loading classification model...")
+model = AutoModelForSequenceClassification.from_pretrained(MODEL_PATH)
+# Set device to GPU (cuda:0) if available, otherwise CPU
+device = 0 if torch.cuda.is_available() else -1
+print(f"Creating classification pipeline on device: {'cuda' if device == 0 else 'cpu'}...")
+# Create the text-classification pipeline
+classifier = pipeline(
+    "text-classification",
+    model=model,
+    tokenizer=tokenizer,
+    device=device
+)
+print("Classification pipeline loaded successfully.")
+# --- 2. Load Knowledge Base (Answers) ---
+intent_to_answer_map = {}
+print(f"Loading knowledge base from: {KNOWLEDGE_BASE_PATH}")
+try:
+    with open(KNOWLEDGE_BASE_PATH, 'r', encoding='utf-8') as f:
+        knowledge_base_data = json.load(f)
+    # Create a simple lookup map: Intent -> Answer
+    # This assumes the answer is the same for all questions of the same intent.
+    intent_to_answer_map = {
+        item['intent']: item['answer'] for item in knowledge_base_data
+    }
+    print(f"Knowledge base loaded with {len(intent_to_answer_map)} intent-to-answer mappings.")
+except FileNotFoundError:
+    print(f"CRITICAL ERROR: Knowledge base file not found at {KNOWLEDGE_BASE_PATH}")
+    # In a real app, you might want to exit if the KB can't be loaded
+except Exception as e:
+    print(f"Error loading knowledge base: {e}")
+# --- 3. Initialize FastAPI App ---
+app = FastAPI(
+    title="Legal Intent & Answer API",
+    description="API to predict the intent of a legal question and provide a suitable answer.",
+    version="1.1.0"
+)
+# --- 4. Define Request and Response Models ---
+# This is what the user must send in their POST request
+class Query(BaseModel):
+    text: str
+# This is what the API will return
+class PredictionResponse(BaseModel):
+    query: str
+    predicted_intent: str
+    confidence_score: float
+    answer: str
+# --- 5. Define API Endpoints ---
+@app.get("/")
+def read_root():
+    """
+    Root endpoint for health check.
+    """
+    return {"status": "API is running",
+            "message": "Post to /predict with a 'text' field to get an intent and answer."}
+@app.post("/predict", response_model=PredictionResponse)
+def predict_intent(query: Query):
+    """
+    Predicts the intent of a given legal question and provides a suitable answer.
+    """
+    print(f"Received query: {query.text}")
+    # 1. Get prediction from model
+    model_result = classifier(query.text)[0]
+    predicted_intent = model_result['label']
+    confidence_score = model_result['score']
+    # 2. Retrieve answer from our knowledge base
+    fallback_answer = "Could not find a specific answer for this intent. Please rephrase your question or contact a legal professional for advice."
+    answer = intent_to_answer_map.get(predicted_intent, fallback_answer)
+    # 3. Return the combined response
+    return {
+        "query": query.text,
+        "predicted_intent": predicted_intent,
+        "confidence_score": confidence_score,
+        "answer": answer
+    }

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+fastapi
+uvicorn[standard]
+torch
+transformers
+pydantic

womens_legal_rights_india_10000.json ADDED Viewed

The diff for this file is too large to render. See raw diff