import json from fastapi import APIRouter, HTTPException from fastapi.responses import StreamingResponse from core.llm import LLMClient from core.memory import save_user_state, load_user_state router = APIRouter() llm_client = LLMClient(provider="ollama") # Default to Ollama @router.post("/chat") async def chat(user_id: str, message: str): if not message: raise HTTPException(status_code=400, detail="Message is required") # Load user state from Redis user_state = load_user_state(user_id) conversation_history = json.loads(user_state.get("conversation", "[]")) if user_state else [] # Add user message to history conversation_history.append({"role": "user", "content": message}) # Generate AI response try: full_response = "" response_stream = llm_client.generate(prompt=message, stream=True) # Collect streamed response for chunk in response_stream: full_response += chunk # Save updated conversation conversation_history.append({"role": "assistant", "content": full_response}) save_user_state(user_id, {"conversation": json.dumps(conversation_history)}) return {"response": full_response} except Exception as e: raise HTTPException(status_code=500, detail=f"LLM generation failed: {e}")