import gradio as gr
import requests
import fitz  # PyMuPDF
from pleias_rag_interface import RAGWithCitations

# Initialize the Pleias RAG model
rag = RAGWithCitations(model_path_or_name="PleIAs/Pleias-RAG-350M")

def extract_text_from_pdf_url(url):
    try:
        response = requests.get(url)
        response.raise_for_status()
        doc = fitz.open(stream=response.content, filetype="pdf")
        text = ""
        for page in doc:
            text += page.get_text()
        return text.strip()
    except Exception as e:
        return f"[Error loading PDF: {str(e)}]"

def generate_answer(query, pdf_urls_str):
    pdf_urls = [url.strip() for url in pdf_urls_str.strip().split("\n") if url.strip()]
    
    sources = []
    for url in pdf_urls:
        text = extract_text_from_pdf_url(url)
        if not text.startswith("[Error"):
            sources.append({
                "text": text,
                "metadata": {"source": url}
            })
    
    if not sources:
        return "No valid PDFs found or unable to extract text."

    response = rag.generate(query, sources)
    return f"### Query:\n{query}\n\n### Answer:\n{response['raw_response']}\n\n### Source Info:\nBackend used: {response['backend_used']}"

# Gradio UI
iface = gr.Interface(
    fn=generate_answer,
    inputs=[
        gr.Textbox(label="Your Question", placeholder="What is this document about?"),
        gr.Textbox(lines=5, label="PDF URLs (one per line)", placeholder="https://example.com/doc1.pdf\nhttps://example.com/doc2.pdf")
    ],
    outputs=gr.Markdown(label="Model Response"),
    title="Pleias RAG PDF QA",
    description="Ask a question and get answers grounded in the content of the uploaded PDF URLs using the Pleias RAG model."
)

if __name__ == "__main__":
    iface.launch()