Upload folder using huggingface_hub
Browse files
app.py
CHANGED
|
@@ -147,66 +147,46 @@ If the user is engaging in discussion, try to steer them towards getting in touc
|
|
| 147 |
return system_prompt
|
| 148 |
|
| 149 |
def chat(self, message, history):
|
| 150 |
-
messages =
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
|
| 156 |
-
|
| 157 |
-
gemini_client = OpenAI(
|
| 158 |
-
api_key=os.getenv("GOOGLE_API_KEY"),
|
| 159 |
-
base_url="https://generativelanguage.googleapis.com/v1beta/openai/"
|
| 160 |
-
)
|
| 161 |
-
|
| 162 |
-
openai_client = OpenAI(
|
| 163 |
-
api_key=os.getenv("OPENAI_API_KEY")
|
| 164 |
-
)
|
| 165 |
-
|
| 166 |
-
def call_llm(client, model):
|
| 167 |
return client.chat.completions.create(
|
| 168 |
model=model,
|
| 169 |
messages=messages,
|
| 170 |
-
tools=tools
|
|
|
|
| 171 |
)
|
| 172 |
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
if not use_openai:
|
| 180 |
-
response = call_llm(gemini_client, "gemini-2.0-flash")
|
| 181 |
-
else:
|
| 182 |
-
response = call_llm(openai_client, "gpt-4o-mini")
|
| 183 |
-
|
| 184 |
-
except Exception as e:
|
| 185 |
-
error_msg = str(e).lower()
|
| 186 |
-
|
| 187 |
-
# π₯ Detect quota exceeded
|
| 188 |
-
if "quota" in error_msg or "resource_exhausted" in error_msg:
|
| 189 |
-
print("β οΈ Gemini quota exceeded. Falling back to OpenAI...")
|
| 190 |
-
use_openai = True
|
| 191 |
-
continue
|
| 192 |
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 196 |
|
| 197 |
-
|
| 198 |
-
choice = response.choices[0]
|
| 199 |
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
|
| 204 |
-
|
|
|
|
| 205 |
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
else:
|
| 209 |
-
done = True
|
| 210 |
|
| 211 |
return response.choices[0].message.content
|
| 212 |
|
|
|
|
| 147 |
return system_prompt
|
| 148 |
|
| 149 |
def chat(self, message, history):
|
| 150 |
+
messages = [
|
| 151 |
+
{"role": "system", "content": self.system_prompt()},
|
| 152 |
+
*history[-6:], # π₯ limit history to last N turns
|
| 153 |
+
{"role": "user", "content": message}
|
| 154 |
+
]
|
| 155 |
|
| 156 |
+
def run(client, model):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 157 |
return client.chat.completions.create(
|
| 158 |
model=model,
|
| 159 |
messages=messages,
|
| 160 |
+
tools=tools,
|
| 161 |
+
max_tokens=512 # π hard cap
|
| 162 |
)
|
| 163 |
|
| 164 |
+
# Clients
|
| 165 |
+
gemini = OpenAI(
|
| 166 |
+
api_key=os.getenv("GOOGLE_API_KEY"),
|
| 167 |
+
base_url="https://generativelanguage.googleapis.com/v1beta/openai/"
|
| 168 |
+
)
|
| 169 |
+
openai_client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 170 |
|
| 171 |
+
try:
|
| 172 |
+
response = run(gemini, "gemini-2.0-flash")
|
| 173 |
+
except Exception as e:
|
| 174 |
+
if "quota" in str(e).lower() or "resource_exhausted" in str(e).lower():
|
| 175 |
+
response = run(openai_client, "gpt-4o-mini")
|
| 176 |
+
else:
|
| 177 |
+
raise
|
| 178 |
|
| 179 |
+
choice = response.choices[0]
|
|
|
|
| 180 |
|
| 181 |
+
# π Handle tools ONCE (no loops)
|
| 182 |
+
if choice.finish_reason == "tool_calls":
|
| 183 |
+
tool_results = self.handle_tool_call(choice.message.tool_calls)
|
| 184 |
|
| 185 |
+
messages.append(choice.message)
|
| 186 |
+
messages.extend(tool_results)
|
| 187 |
|
| 188 |
+
# Final answer (NO tools this time)
|
| 189 |
+
response = run(gemini, "gemini-2.0-flash")
|
|
|
|
|
|
|
| 190 |
|
| 191 |
return response.choices[0].message.content
|
| 192 |
|