Pulastya0 commited on
Commit
84135ff
Β·
1 Parent(s): ebc813e

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +52 -67
main.py CHANGED
@@ -1,14 +1,15 @@
1
  import os
2
  import json
3
  import chromadb
4
- from fastapi import FastAPI, HTTPException, Depends
5
  from pydantic import BaseModel, Field
6
  from typing import List
7
  import firebase_admin
8
  from firebase_admin import credentials, firestore
9
- from llm_handler import initialize_llm, get_rag_response
10
- import llm_handler
11
  from encoder import SentenceEncoder
 
 
12
 
13
  # --- Pydantic Models ---
14
  class UserProfile(BaseModel):
@@ -19,8 +20,6 @@ class UserProfile(BaseModel):
19
  class SearchQuery(BaseModel):
20
  query: str = Field(..., example="marketing internship in mumbai")
21
 
22
- # --- SCHEMA CHANGED HERE ---
23
- # Reverted to use 'id' and 'skills'
24
  class InternshipData(BaseModel):
25
  id: str = Field(..., example="int_021")
26
  title: str
@@ -43,35 +42,40 @@ class ChatMessage(BaseModel):
43
  class ChatResponse(BaseModel):
44
  response: str
45
 
46
- # --- FastAPI App & Firebase Initialization ---
47
  app = FastAPI(
48
- title="Internship Recommendation API",
49
- description="An API using Firestore for metadata, and ChromaDB for vector search.",
50
- version="2.1.0"
51
  )
52
 
53
- # Initialize Firebase ONCE at startup
 
54
  try:
55
- if 'FIREBASE_CREDS_JSON' in os.environ:
56
- creds_dict = json.loads(os.environ.get('FIREBASE_CREDS_JSON'))
 
57
  cred = credentials.Certificate(creds_dict)
 
 
 
 
58
  else:
 
59
  cred = credentials.Certificate('serviceAccountKey.json')
60
-
61
- firebase_admin.initialize_app(cred)
62
- db = firestore.client()
63
- print("βœ… Firebase connection initialized.")
64
  except Exception as e:
65
  print(f"❌ Could not initialize Firebase. Error: {e}")
66
- db = None
67
 
68
- # Dependency to provide the db client
69
  def get_db():
70
  if db is None:
71
  raise HTTPException(status_code=503, detail="Firestore connection not available.")
72
  return db
73
 
74
- # --- Global Variables for Model and ChromaDB ---
75
  encoder = None
76
  chroma_collection = None
77
 
@@ -82,13 +86,21 @@ def load_model_and_data():
82
  print("πŸš€ Loading sentence encoder model...")
83
  encoder = SentenceEncoder()
84
 
85
- client = chromadb.PersistentClient(path="/content/chroma_db")
 
 
 
 
86
  chroma_collection = client.get_or_create_collection(name="internships")
87
 
88
  print("βœ… ChromaDB client initialized and collection is ready.")
89
  print(f" - Internships in DB: {chroma_collection.count()}")
 
 
90
  llm_handler.encoder = encoder
91
  llm_handler.chroma_collection = chroma_collection
 
 
92
  initialize_llm()
93
 
94
  # --- API Endpoints ---
@@ -96,90 +108,63 @@ def load_model_and_data():
96
  def read_root():
97
  return {"message": "Welcome to the Internship Recommendation API!"}
98
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
  @app.post("/add-internship", response_model=StatusResponse)
100
  def add_internship(internship: InternshipData, db_client: firestore.Client = Depends(get_db)):
101
  if chroma_collection is None or encoder is None:
102
  raise HTTPException(status_code=503, detail="Server is not ready.")
103
-
104
- # --- SCHEMA CHANGED HERE ---
105
- # Using internship.id
106
  doc_ref = db_client.collection('internships').document(internship.id)
107
  if doc_ref.get().exists:
108
  raise HTTPException(status_code=400, detail="Internship ID already exists.")
109
-
110
- # Save to Firestore
111
  doc_ref.set(internship.dict())
112
-
113
- # --- SCHEMA CHANGED HERE ---
114
- # Using internship.skills
115
  text_to_encode = f"{internship.title}. {internship.description}. Skills: {', '.join(internship.skills)}"
116
  embedding = encoder.encode([text_to_encode])[0].tolist()
117
-
118
- # --- CRITICAL FIX RE-APPLIED HERE ---
119
- # Prepare metadata for ChromaDB, converting skills list to a JSON string
120
  metadata_for_chroma = internship.dict()
121
  metadata_for_chroma['skills'] = json.dumps(metadata_for_chroma['skills'])
122
-
123
- chroma_collection.add(
124
- # --- SCHEMA CHANGED HERE ---
125
- # Using internship.id
126
- ids=[internship.id],
127
- embeddings=[embedding],
128
- metadatas=[metadata_for_chroma]
129
- )
130
-
131
  print(f"βœ… Added internship to Firestore and ChromaDB: {internship.id}")
132
- # --- SCHEMA CHANGED HERE ---
133
  return {"status": "success", "internship_id": internship.id}
134
 
135
  @app.post("/profile-recommendations", response_model=RecommendationResponse)
136
  def get_profile_recommendations(profile: UserProfile):
137
  if chroma_collection is None or encoder is None:
138
  raise HTTPException(status_code=503, detail="Server is not ready.")
139
-
140
  query_text = f"Skills: {', '.join(profile.skills)}. Interests: {', '.join(profile.interests)}"
141
  query_embedding = encoder.encode([query_text])[0].tolist()
142
-
143
- results = chroma_collection.query(
144
- query_embeddings=[query_embedding],
145
- n_results=3
146
- )
147
-
148
  recommendations = []
149
  ids = results.get('ids', [[]])[0]
150
  distances = results.get('distances', [[]])[0]
151
-
152
  for i, internship_id in enumerate(ids):
153
- recommendations.append({
154
- "internship_id": internship_id,
155
- "score": 1 - distances[i]
156
- })
157
-
158
  return {"recommendations": recommendations}
159
 
160
  @app.post("/search", response_model=RecommendationResponse)
161
  def search_internships(search: SearchQuery):
162
  if chroma_collection is None or encoder is None:
163
  raise HTTPException(status_code=503, detail="Server is not ready.")
164
-
165
  query_embedding = encoder.encode([search.query])[0].tolist()
166
-
167
- results = chroma_collection.query(
168
- query_embeddings=[query_embedding],
169
- n_results=3
170
- )
171
-
172
  recommendations = []
173
  ids = results.get('ids', [[]])[0]
174
  distances = results.get('distances', [[]])[0]
175
-
176
  for i, internship_id in enumerate(ids):
177
- recommendations.append({
178
- "internship_id": internship_id,
179
- "score": 1 - distances[i]
180
- })
181
-
182
  return {"recommendations": recommendations}
 
183
  @app.post("/chat", response_model=ChatResponse)
184
  def chat_with_bot(message: ChatMessage):
185
  response = get_rag_response(message.query)
 
1
  import os
2
  import json
3
  import chromadb
4
+ from fastapi import FastAPI, HTTPException, Depends, Query
5
  from pydantic import BaseModel, Field
6
  from typing import List
7
  import firebase_admin
8
  from firebase_admin import credentials, firestore
9
+
 
10
  from encoder import SentenceEncoder
11
+ from llm_handler import initialize_llm, get_rag_response, llm_handler # Make sure llm_handler is imported
12
+ from populate_chroma import populate_vector_db # For the setup endpoint
13
 
14
  # --- Pydantic Models ---
15
  class UserProfile(BaseModel):
 
20
  class SearchQuery(BaseModel):
21
  query: str = Field(..., example="marketing internship in mumbai")
22
 
 
 
23
  class InternshipData(BaseModel):
24
  id: str = Field(..., example="int_021")
25
  title: str
 
42
  class ChatResponse(BaseModel):
43
  response: str
44
 
45
+ # --- FastAPI App Initialization ---
46
  app = FastAPI(
47
+ title="Internship Recommendation & Chatbot API",
48
+ description="An API using Firestore for metadata, ChromaDB for vector search, and an LLM for chat.",
49
+ version="3.0.0"
50
  )
51
 
52
+ # --- Firebase Initialization ---
53
+ db = None
54
  try:
55
+ firebase_creds = os.getenv("FIREBASE_CREDS_JSON")
56
+ if firebase_creds:
57
+ creds_dict = json.loads(firebase_creds)
58
  cred = credentials.Certificate(creds_dict)
59
+ if not firebase_admin._apps:
60
+ firebase_admin.initialize_app(cred)
61
+ db = firestore.client()
62
+ print("βœ… Firebase initialized with Hugging Face secret.")
63
  else:
64
+ # Fallback for local development if the secret isn't set
65
  cred = credentials.Certificate('serviceAccountKey.json')
66
+ if not firebase_admin._apps:
67
+ firebase_admin.initialize_app(cred)
68
+ db = firestore.client()
69
+ print("βœ… Firebase initialized with local key file.")
70
  except Exception as e:
71
  print(f"❌ Could not initialize Firebase. Error: {e}")
 
72
 
 
73
  def get_db():
74
  if db is None:
75
  raise HTTPException(status_code=503, detail="Firestore connection not available.")
76
  return db
77
 
78
+ # --- Global Variables ---
79
  encoder = None
80
  chroma_collection = None
81
 
 
86
  print("πŸš€ Loading sentence encoder model...")
87
  encoder = SentenceEncoder()
88
 
89
+ # --- THIS IS THE FIX ---
90
+ # Point ChromaDB to the correct writable persistent storage path on Hugging Face
91
+ chroma_db_path = "/data/chroma_db"
92
+
93
+ client = chromadb.PersistentClient(path=chroma_db_path)
94
  chroma_collection = client.get_or_create_collection(name="internships")
95
 
96
  print("βœ… ChromaDB client initialized and collection is ready.")
97
  print(f" - Internships in DB: {chroma_collection.count()}")
98
+
99
+ # Pass the loaded models to the llm_handler module
100
  llm_handler.encoder = encoder
101
  llm_handler.chroma_collection = chroma_collection
102
+
103
+ # Initialize the LLM
104
  initialize_llm()
105
 
106
  # --- API Endpoints ---
 
108
  def read_root():
109
  return {"message": "Welcome to the Internship Recommendation API!"}
110
 
111
+ @app.post("/setup")
112
+ def run_initial_setup(secret_key: str = Query(...)):
113
+ correct_key = os.getenv("SETUP_SECRET_KEY")
114
+ if not correct_key or secret_key != correct_key:
115
+ raise HTTPException(status_code=403, detail="Invalid secret key.")
116
+ try:
117
+ print("--- RUNNING DATABASE POPULATION SCRIPT ---")
118
+ populate_vector_db()
119
+ print("--- SETUP COMPLETE ---")
120
+ return {"status": "Setup completed successfully."}
121
+ except Exception as e:
122
+ raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}")
123
+
124
+
125
  @app.post("/add-internship", response_model=StatusResponse)
126
  def add_internship(internship: InternshipData, db_client: firestore.Client = Depends(get_db)):
127
  if chroma_collection is None or encoder is None:
128
  raise HTTPException(status_code=503, detail="Server is not ready.")
 
 
 
129
  doc_ref = db_client.collection('internships').document(internship.id)
130
  if doc_ref.get().exists:
131
  raise HTTPException(status_code=400, detail="Internship ID already exists.")
 
 
132
  doc_ref.set(internship.dict())
 
 
 
133
  text_to_encode = f"{internship.title}. {internship.description}. Skills: {', '.join(internship.skills)}"
134
  embedding = encoder.encode([text_to_encode])[0].tolist()
 
 
 
135
  metadata_for_chroma = internship.dict()
136
  metadata_for_chroma['skills'] = json.dumps(metadata_for_chroma['skills'])
137
+ chroma_collection.add(ids=[internship.id], embeddings=[embedding], metadatas=[metadata_for_chroma])
 
 
 
 
 
 
 
 
138
  print(f"βœ… Added internship to Firestore and ChromaDB: {internship.id}")
 
139
  return {"status": "success", "internship_id": internship.id}
140
 
141
  @app.post("/profile-recommendations", response_model=RecommendationResponse)
142
  def get_profile_recommendations(profile: UserProfile):
143
  if chroma_collection is None or encoder is None:
144
  raise HTTPException(status_code=503, detail="Server is not ready.")
 
145
  query_text = f"Skills: {', '.join(profile.skills)}. Interests: {', '.join(profile.interests)}"
146
  query_embedding = encoder.encode([query_text])[0].tolist()
147
+ results = chroma_collection.query(query_embeddings=[query_embedding], n_results=3)
 
 
 
 
 
148
  recommendations = []
149
  ids = results.get('ids', [[]])[0]
150
  distances = results.get('distances', [[]])[0]
 
151
  for i, internship_id in enumerate(ids):
152
+ recommendations.append({"internship_id": internship_id, "score": 1 - distances[i]})
 
 
 
 
153
  return {"recommendations": recommendations}
154
 
155
  @app.post("/search", response_model=RecommendationResponse)
156
  def search_internships(search: SearchQuery):
157
  if chroma_collection is None or encoder is None:
158
  raise HTTPException(status_code=503, detail="Server is not ready.")
 
159
  query_embedding = encoder.encode([search.query])[0].tolist()
160
+ results = chroma_collection.query(query_embeddings=[query_embedding], n_results=3)
 
 
 
 
 
161
  recommendations = []
162
  ids = results.get('ids', [[]])[0]
163
  distances = results.get('distances', [[]])[0]
 
164
  for i, internship_id in enumerate(ids):
165
+ recommendations.append({"internship_id": internship_id, "score": 1 - distances[i]})
 
 
 
 
166
  return {"recommendations": recommendations}
167
+
168
  @app.post("/chat", response_model=ChatResponse)
169
  def chat_with_bot(message: ChatMessage):
170
  response = get_rag_response(message.query)