Pulastya0 commited on
Commit
0c83855
Β·
1 Parent(s): 2bf46dd

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +70 -53
main.py CHANGED
@@ -8,15 +8,27 @@ import firebase_admin
8
  from firebase_admin import credentials, firestore
9
 
10
  from encoder import SentenceEncoder
11
- from llm_handler import initialize_llm, get_rag_response
12
- import llm_handler
13
- from populate_chroma import populate_vector_db # For the setup endpoint
14
-
15
- # --- Pydantic Models ---
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  class UserProfile(BaseModel):
17
- user_id: str
18
  skills: List[str] = Field(..., example=["python", "data analysis"])
19
- interests: List[str] = Field(..., example=["machine learning", "web development"])
20
 
21
  class SearchQuery(BaseModel):
22
  query: str = Field(..., example="marketing internship in mumbai")
@@ -37,20 +49,22 @@ class StatusResponse(BaseModel):
37
  status: str
38
  internship_id: str
39
 
40
- class ChatMessage(BaseModel):
41
- query: str
42
-
43
- class ChatResponse(BaseModel):
44
- response: str
45
-
46
- # --- FastAPI App Initialization ---
47
  app = FastAPI(
48
- title="Internship Recommendation & Chatbot API",
49
- description="An API using Firestore for metadata, ChromaDB for vector search, and an LLM for chat.",
50
- version="3.0.0"
 
 
 
 
51
  )
52
 
53
- # --- Firebase Initialization ---
 
 
54
  db = None
55
  try:
56
  firebase_creds = os.getenv("FIREBASE_CREDS_JSON")
@@ -62,66 +76,67 @@ try:
62
  db = firestore.client()
63
  print("βœ… Firebase initialized with Hugging Face secret.")
64
  else:
65
- # Fallback for local development if the secret isn't set
66
- cred = credentials.Certificate('serviceAccountKey.json')
67
- if not firebase_admin._apps:
68
- firebase_admin.initialize_app(cred)
69
- db = firestore.client()
70
- print("βœ… Firebase initialized with local key file.")
71
  except Exception as e:
72
- print(f"❌ Could not initialize Firebase. Error: {e}")
73
 
74
  def get_db():
75
  if db is None:
76
  raise HTTPException(status_code=503, detail="Firestore connection not available.")
77
  return db
78
 
79
- # --- Global Variables ---
 
 
80
  encoder = None
81
  chroma_collection = None
82
 
83
  @app.on_event("startup")
84
  def load_model_and_data():
85
  global encoder, chroma_collection
86
-
87
  print("πŸš€ Loading sentence encoder model...")
88
  encoder = SentenceEncoder()
89
 
90
- # --- THIS IS THE FIX ---
91
- # Point ChromaDB to the correct writable persistent storage path on Hugging Face
92
  chroma_db_path = "/data/chroma_db"
93
 
94
- client = chromadb.PersistentClient(path=chroma_db_path)
95
- chroma_collection = client.get_or_create_collection(name="internships")
96
-
97
- print("βœ… ChromaDB client initialized and collection is ready.")
98
- print(f" - Internships in DB: {chroma_collection.count()}")
99
-
100
- # Pass the loaded models to the llm_handler module
101
- llm_handler.encoder = encoder
102
- llm_handler.chroma_collection = chroma_collection
103
-
104
- # Initialize the LLM
105
- initialize_llm()
106
 
107
- # --- API Endpoints ---
 
 
108
  @app.get("/")
109
  def read_root():
110
  return {"message": "Welcome to the Internship Recommendation API!"}
111
 
 
 
 
 
112
  @app.post("/setup")
113
- def run_initial_setup(secret_key: str = Query(...)):
 
 
 
 
114
  correct_key = os.getenv("SETUP_SECRET_KEY")
115
  if not correct_key or secret_key != correct_key:
116
  raise HTTPException(status_code=403, detail="Invalid secret key.")
 
117
  try:
118
  print("--- RUNNING DATABASE POPULATION SCRIPT ---")
119
  populate_vector_db()
120
  print("--- SETUP COMPLETE ---")
121
  return {"status": "Setup completed successfully."}
122
  except Exception as e:
123
- raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}")
124
-
125
 
126
  @app.post("/add-internship", response_model=StatusResponse)
127
  def add_internship(internship: InternshipData, db_client: firestore.Client = Depends(get_db)):
@@ -143,14 +158,21 @@ def add_internship(internship: InternshipData, db_client: firestore.Client = Dep
143
  def get_profile_recommendations(profile: UserProfile):
144
  if chroma_collection is None or encoder is None:
145
  raise HTTPException(status_code=503, detail="Server is not ready.")
146
- query_text = f"Skills: {', '.join(profile.skills)}. Interests: {', '.join(profile.interests)}"
 
147
  query_embedding = encoder.encode([query_text])[0].tolist()
148
  results = chroma_collection.query(query_embeddings=[query_embedding], n_results=3)
 
149
  recommendations = []
150
  ids = results.get('ids', [[]])[0]
151
  distances = results.get('distances', [[]])[0]
 
152
  for i, internship_id in enumerate(ids):
153
- recommendations.append({"internship_id": internship_id, "score": 1 - distances[i]})
 
 
 
 
154
  return {"recommendations": recommendations}
155
 
156
  @app.post("/search", response_model=RecommendationResponse)
@@ -164,9 +186,4 @@ def search_internships(search: SearchQuery):
164
  distances = results.get('distances', [[]])[0]
165
  for i, internship_id in enumerate(ids):
166
  recommendations.append({"internship_id": internship_id, "score": 1 - distances[i]})
167
- return {"recommendations": recommendations}
168
-
169
- @app.post("/chat", response_model=ChatResponse)
170
- def chat_with_bot(message: ChatMessage):
171
- response = get_rag_response(message.query)
172
- return {"response": response}
 
8
  from firebase_admin import credentials, firestore
9
 
10
  from encoder import SentenceEncoder
11
+ from populate_chroma import populate_vector_db
12
+
13
+ # --------------------------------------------------------------------
14
+ # Cache setup (store HF models in /data for persistence on Hugging Face)
15
+ # --------------------------------------------------------------------
16
+ os.environ["HF_HOME"] = "/data/cache"
17
+ os.environ["SENTENCE_TRANSFORMERS_HOME"] = "/data/cache"
18
+
19
+ # --------------------------------------------------------------------
20
+ # Determine root_path dynamically
21
+ # Locally: root_path = ""
22
+ # On Hugging Face Spaces: root_path = "/username/space-name"
23
+ # --------------------------------------------------------------------
24
+ root_path = os.getenv("HF_SPACE_ROOT_PATH", "")
25
+
26
+ # --------------------------------------------------------------------
27
+ # Pydantic Models
28
+ # --------------------------------------------------------------------
29
  class UserProfile(BaseModel):
 
30
  skills: List[str] = Field(..., example=["python", "data analysis"])
31
+ sectors: List[str] = Field(..., example=["machine learning", "web development"])
32
 
33
  class SearchQuery(BaseModel):
34
  query: str = Field(..., example="marketing internship in mumbai")
 
49
  status: str
50
  internship_id: str
51
 
52
+ # --------------------------------------------------------------------
53
+ # FastAPI App
54
+ # --------------------------------------------------------------------
 
 
 
 
55
  app = FastAPI(
56
+ title="Internship Recommendation API",
57
+ description="An API using Firestore for metadata, and ChromaDB for vector search.",
58
+ version="2.2.0",
59
+ docs_url="/docs", # Swagger UI
60
+ redoc_url="/redoc", # ReDoc
61
+ openapi_url="/openapi.json", # OpenAPI schema
62
+ root_path=root_path # βœ… Fix for Hugging Face Spaces subpath issue
63
  )
64
 
65
+ # --------------------------------------------------------------------
66
+ # Firebase Initialization
67
+ # --------------------------------------------------------------------
68
  db = None
69
  try:
70
  firebase_creds = os.getenv("FIREBASE_CREDS_JSON")
 
76
  db = firestore.client()
77
  print("βœ… Firebase initialized with Hugging Face secret.")
78
  else:
79
+ raise Exception("FIREBASE_CREDS_JSON not found")
 
 
 
 
 
80
  except Exception as e:
81
+ print(f"❌ Could not initialize Firebase: {e}")
82
 
83
  def get_db():
84
  if db is None:
85
  raise HTTPException(status_code=503, detail="Firestore connection not available.")
86
  return db
87
 
88
+ # --------------------------------------------------------------------
89
+ # Global Variables (encoder + chroma)
90
+ # --------------------------------------------------------------------
91
  encoder = None
92
  chroma_collection = None
93
 
94
  @app.on_event("startup")
95
  def load_model_and_data():
96
  global encoder, chroma_collection
 
97
  print("πŸš€ Loading sentence encoder model...")
98
  encoder = SentenceEncoder()
99
 
100
+ # Point ChromaDB to the persistent /data storage path
 
101
  chroma_db_path = "/data/chroma_db"
102
 
103
+ try:
104
+ client = chromadb.PersistentClient(path=chroma_db_path)
105
+ chroma_collection = client.get_or_create_collection(name="internships")
106
+ print("βœ… ChromaDB client initialized and collection is ready.")
107
+ print(f" - Internships in DB: {chroma_collection.count()}")
108
+ except Exception as e:
109
+ print(f"❌ Error initializing ChromaDB: {e}")
110
+ raise
 
 
 
 
111
 
112
+ # --------------------------------------------------------------------
113
+ # Endpoints
114
+ # --------------------------------------------------------------------
115
  @app.get("/")
116
  def read_root():
117
  return {"message": "Welcome to the Internship Recommendation API!"}
118
 
119
+ @app.get("/healthz")
120
+ def health_check():
121
+ return {"status": "ok"}
122
+
123
  @app.post("/setup")
124
+ def run_initial_setup(secret_key: str = Query(..., example="your_secret_password")):
125
+ """
126
+ A secret endpoint to run the initial database setup.
127
+ This should only be run once after deployment.
128
+ """
129
  correct_key = os.getenv("SETUP_SECRET_KEY")
130
  if not correct_key or secret_key != correct_key:
131
  raise HTTPException(status_code=403, detail="Invalid secret key.")
132
+
133
  try:
134
  print("--- RUNNING DATABASE POPULATION SCRIPT ---")
135
  populate_vector_db()
136
  print("--- SETUP COMPLETE ---")
137
  return {"status": "Setup completed successfully."}
138
  except Exception as e:
139
+ raise HTTPException(status_code=500, detail=f"An error occurred during setup: {str(e)}")
 
140
 
141
  @app.post("/add-internship", response_model=StatusResponse)
142
  def add_internship(internship: InternshipData, db_client: firestore.Client = Depends(get_db)):
 
158
  def get_profile_recommendations(profile: UserProfile):
159
  if chroma_collection is None or encoder is None:
160
  raise HTTPException(status_code=503, detail="Server is not ready.")
161
+
162
+ query_text = f"Skills: {', '.join(profile.skills)}. Sectors: {', '.join(profile.sectors)}"
163
  query_embedding = encoder.encode([query_text])[0].tolist()
164
  results = chroma_collection.query(query_embeddings=[query_embedding], n_results=3)
165
+
166
  recommendations = []
167
  ids = results.get('ids', [[]])[0]
168
  distances = results.get('distances', [[]])[0]
169
+
170
  for i, internship_id in enumerate(ids):
171
+ recommendations.append({
172
+ "internship_id": internship_id,
173
+ "score": 1 - distances[i]
174
+ })
175
+
176
  return {"recommendations": recommendations}
177
 
178
  @app.post("/search", response_model=RecommendationResponse)
 
186
  distances = results.get('distances', [[]])[0]
187
  for i, internship_id in enumerate(ids):
188
  recommendations.append({"internship_id": internship_id, "score": 1 - distances[i]})
189
+ return {"recommendations": recommendations}