Spaces:

jeanbaptdzd
/

open-finance-llm-8b

Paused

App Files Files Community

open-finance-llm-8b / app /main.py

jeanbaptdzd

🔧 Fix secret name: DOCKERHUB_ACCESS_KEY

4f0cd2a 5 days ago

raw

history blame contribute delete

4.19 kB

	"""Main FastAPI application entry point."""

	import logging
	import threading
	from typing import Dict

	from fastapi import FastAPI, status
	from fastapi.responses import JSONResponse

	from app import __version__
	from app.config import settings
	from app.logfire_config import configure_logfire
	from app.middleware import api_key_guard
	from app.middleware.rate_limit import rate_limit_middleware
	from app.routers import openai_api

	# Configure logging with level from settings
	try:
	log_level = getattr(logging, settings.log_level.upper())
	except AttributeError:
	print(f"Warning: Invalid log level '{settings.log_level}'. Falling back to INFO.")
	log_level = logging.INFO
	logging.basicConfig(level=log_level)
	logger = logging.getLogger(__name__)

	# Initialize Logfire before creating the app
	try:
	configure_logfire()
	import logfire
	logger.info("Logfire configured successfully")
	except Exception as e:
	logger.warning(f"Failed to configure Logfire: {e}. Continuing without Logfire.")

	app = FastAPI(
	title="LLM Pro Finance API (Transformers)",
	description="OpenAI-compatible API for financial LLM inference",
	version=__version__
	)

	# Instrument FastAPI with Logfire
	try:
	import logfire
	logfire.instrument_fastapi(app)
	logger.info("FastAPI instrumented with Logfire")
	except Exception as e:
	logger.warning(f"Failed to instrument FastAPI with Logfire: {e}")

	# Mount routers
	app.include_router(openai_api.router, prefix="/v1")

	# Rate limiting middleware (applied first)
	app.middleware("http")(rate_limit_middleware)

	# Optional API key middleware
	app.middleware("http")(api_key_guard)


	@app.on_event("startup")
	async def startup_event() -> None:
	"""Startup event - initialize model in background thread.

	Loads the model asynchronously to avoid blocking the API startup.
	Model loading happens in a daemon thread so it doesn't prevent shutdown.
	"""
	logger.info("Starting LLM Pro Finance API...")

	force_reload = settings.force_model_reload
	if force_reload:
	logger.info("Force model reload enabled (FORCE_MODEL_RELOAD=true)")

	logger.info("Initializing model in background thread...")

	def load_model() -> None:
	"""Load the model in a background thread."""
	from app.providers.transformers_provider import initialize_model
	initialize_model(force_reload=force_reload)

	# Start model loading in background thread
	thread = threading.Thread(target=load_model, daemon=True)
	thread.start()
	logger.info("Model initialization started in background")


	@app.get("/")
	async def root() -> Dict[str, str]:
	"""Root endpoint returning API status and information.

	Returns:
	Dictionary containing API status, service name, version, model, and backend.
	"""
	return {
	"status": "ok",
	"service": "Qwen Open Finance R 8B Inference",
	"version": __version__,
	"model": settings.model,
	"backend": "Transformers"
	}


	@app.get("/health")
	async def health() -> Dict[str, str]:
	"""Liveness check endpoint for monitoring and load balancers.

	Returns:
	Dictionary indicating the service is alive.
	"""
	return {"status": "service alive", "service": "LLM Pro Finance API"}


	@app.get("/ready")
	async def ready() -> JSONResponse:
	"""Readiness check endpoint for orchestrators and load balancers.

	Checks if the model is loaded and ready to handle requests.
	Returns 503 Service Unavailable if the model is not ready.

	Returns:
	JSONResponse with ready/model_loaded fields and appropriate status code.
	"""
	from app.providers.transformers_provider import is_model_ready

	model_loaded = is_model_ready()
	ready_status = model_loaded

	response_data = {
	"ready": ready_status,
	"model_loaded": model_loaded,
	"service": "LLM Pro Finance API"
	}

	if ready_status:
	return JSONResponse(content=response_data, status_code=status.HTTP_200_OK)
	else:
	return JSONResponse(
	content=response_data,
	status_code=status.HTTP_503_SERVICE_UNAVAILABLE
	)