crazyforprogramming commited on
Commit
750c180
Β·
verified Β·
1 Parent(s): ff035d7

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +29 -49
app.py CHANGED
@@ -147,66 +147,46 @@ If the user is engaging in discussion, try to steer them towards getting in touc
147
  return system_prompt
148
 
149
  def chat(self, message, history):
150
- messages = (
151
- [{"role": "system", "content": self.system_prompt()}]
152
- + history
153
- + [{"role": "user", "content": message}]
154
- )
155
 
156
- # Clients
157
- gemini_client = OpenAI(
158
- api_key=os.getenv("GOOGLE_API_KEY"),
159
- base_url="https://generativelanguage.googleapis.com/v1beta/openai/"
160
- )
161
-
162
- openai_client = OpenAI(
163
- api_key=os.getenv("OPENAI_API_KEY")
164
- )
165
-
166
- def call_llm(client, model):
167
  return client.chat.completions.create(
168
  model=model,
169
  messages=messages,
170
- tools=tools
 
171
  )
172
 
173
- done = False
174
- use_openai = False
175
-
176
- while not done:
177
- try:
178
- # πŸ”Ή Try Gemini first
179
- if not use_openai:
180
- response = call_llm(gemini_client, "gemini-2.0-flash")
181
- else:
182
- response = call_llm(openai_client, "gpt-4o-mini")
183
-
184
- except Exception as e:
185
- error_msg = str(e).lower()
186
-
187
- # πŸ”₯ Detect quota exceeded
188
- if "quota" in error_msg or "resource_exhausted" in error_msg:
189
- print("⚠️ Gemini quota exceeded. Falling back to OpenAI...")
190
- use_openai = True
191
- continue
192
 
193
- # ❌ Unknown error
194
- print("❌ LLM error:", e)
195
- return "Sorry, something went wrong while processing your request."
 
 
 
 
196
 
197
- # πŸ” Tool calling flow
198
- choice = response.choices[0]
199
 
200
- if choice.finish_reason == "tool_calls":
201
- assistant_message = choice.message
202
- tool_calls = assistant_message.tool_calls
203
 
204
- tool_results = self.handle_tool_call(tool_calls)
 
205
 
206
- messages.append(assistant_message)
207
- messages.extend(tool_results)
208
- else:
209
- done = True
210
 
211
  return response.choices[0].message.content
212
 
 
147
  return system_prompt
148
 
149
  def chat(self, message, history):
150
+ messages = [
151
+ {"role": "system", "content": self.system_prompt()},
152
+ *history[-6:], # πŸ”₯ limit history to last N turns
153
+ {"role": "user", "content": message}
154
+ ]
155
 
156
+ def run(client, model):
 
 
 
 
 
 
 
 
 
 
157
  return client.chat.completions.create(
158
  model=model,
159
  messages=messages,
160
+ tools=tools,
161
+ max_tokens=512 # πŸ”’ hard cap
162
  )
163
 
164
+ # Clients
165
+ gemini = OpenAI(
166
+ api_key=os.getenv("GOOGLE_API_KEY"),
167
+ base_url="https://generativelanguage.googleapis.com/v1beta/openai/"
168
+ )
169
+ openai_client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
 
 
 
 
 
 
 
 
 
 
 
 
 
170
 
171
+ try:
172
+ response = run(gemini, "gemini-2.0-flash")
173
+ except Exception as e:
174
+ if "quota" in str(e).lower() or "resource_exhausted" in str(e).lower():
175
+ response = run(openai_client, "gpt-4o-mini")
176
+ else:
177
+ raise
178
 
179
+ choice = response.choices[0]
 
180
 
181
+ # πŸ” Handle tools ONCE (no loops)
182
+ if choice.finish_reason == "tool_calls":
183
+ tool_results = self.handle_tool_call(choice.message.tool_calls)
184
 
185
+ messages.append(choice.message)
186
+ messages.extend(tool_results)
187
 
188
+ # Final answer (NO tools this time)
189
+ response = run(gemini, "gemini-2.0-flash")
 
 
190
 
191
  return response.choices[0].message.content
192