jeanbaptdzd's picture
πŸ”§ Fix secret name: DOCKERHUB_ACCESS_KEY
4f0cd2a
"""Main FastAPI application entry point."""
import logging
import threading
from typing import Dict
from fastapi import FastAPI, status
from fastapi.responses import JSONResponse
from app import __version__
from app.config import settings
from app.logfire_config import configure_logfire
from app.middleware import api_key_guard
from app.middleware.rate_limit import rate_limit_middleware
from app.routers import openai_api
# Configure logging with level from settings
try:
log_level = getattr(logging, settings.log_level.upper())
except AttributeError:
print(f"Warning: Invalid log level '{settings.log_level}'. Falling back to INFO.")
log_level = logging.INFO
logging.basicConfig(level=log_level)
logger = logging.getLogger(__name__)
# Initialize Logfire before creating the app
try:
configure_logfire()
import logfire
logger.info("Logfire configured successfully")
except Exception as e:
logger.warning(f"Failed to configure Logfire: {e}. Continuing without Logfire.")
app = FastAPI(
title="LLM Pro Finance API (Transformers)",
description="OpenAI-compatible API for financial LLM inference",
version=__version__
)
# Instrument FastAPI with Logfire
try:
import logfire
logfire.instrument_fastapi(app)
logger.info("FastAPI instrumented with Logfire")
except Exception as e:
logger.warning(f"Failed to instrument FastAPI with Logfire: {e}")
# Mount routers
app.include_router(openai_api.router, prefix="/v1")
# Rate limiting middleware (applied first)
app.middleware("http")(rate_limit_middleware)
# Optional API key middleware
app.middleware("http")(api_key_guard)
@app.on_event("startup")
async def startup_event() -> None:
"""Startup event - initialize model in background thread.
Loads the model asynchronously to avoid blocking the API startup.
Model loading happens in a daemon thread so it doesn't prevent shutdown.
"""
logger.info("Starting LLM Pro Finance API...")
force_reload = settings.force_model_reload
if force_reload:
logger.info("Force model reload enabled (FORCE_MODEL_RELOAD=true)")
logger.info("Initializing model in background thread...")
def load_model() -> None:
"""Load the model in a background thread."""
from app.providers.transformers_provider import initialize_model
initialize_model(force_reload=force_reload)
# Start model loading in background thread
thread = threading.Thread(target=load_model, daemon=True)
thread.start()
logger.info("Model initialization started in background")
@app.get("/")
async def root() -> Dict[str, str]:
"""Root endpoint returning API status and information.
Returns:
Dictionary containing API status, service name, version, model, and backend.
"""
return {
"status": "ok",
"service": "Qwen Open Finance R 8B Inference",
"version": __version__,
"model": settings.model,
"backend": "Transformers"
}
@app.get("/health")
async def health() -> Dict[str, str]:
"""Liveness check endpoint for monitoring and load balancers.
Returns:
Dictionary indicating the service is alive.
"""
return {"status": "service alive", "service": "LLM Pro Finance API"}
@app.get("/ready")
async def ready() -> JSONResponse:
"""Readiness check endpoint for orchestrators and load balancers.
Checks if the model is loaded and ready to handle requests.
Returns 503 Service Unavailable if the model is not ready.
Returns:
JSONResponse with ready/model_loaded fields and appropriate status code.
"""
from app.providers.transformers_provider import is_model_ready
model_loaded = is_model_ready()
ready_status = model_loaded
response_data = {
"ready": ready_status,
"model_loaded": model_loaded,
"service": "LLM Pro Finance API"
}
if ready_status:
return JSONResponse(content=response_data, status_code=status.HTTP_200_OK)
else:
return JSONResponse(
content=response_data,
status_code=status.HTTP_503_SERVICE_UNAVAILABLE
)