import gradio as gr import requests import fitz # PyMuPDF from pleias_rag_interface import RAGWithCitations # Initialize the Pleias RAG model rag = RAGWithCitations(model_path_or_name="PleIAs/Pleias-RAG-350M") def extract_text_from_pdf_url(url): try: response = requests.get(url) response.raise_for_status() doc = fitz.open(stream=response.content, filetype="pdf") text = "" for page in doc: text += page.get_text() return text.strip() except Exception as e: return f"[Error loading PDF: {str(e)}]" def generate_answer(query, pdf_urls_str): pdf_urls = [url.strip() for url in pdf_urls_str.strip().split("\n") if url.strip()] sources = [] for url in pdf_urls: text = extract_text_from_pdf_url(url) if not text.startswith("[Error"): sources.append({ "text": text, "metadata": {"source": url} }) if not sources: return "No valid PDFs found or unable to extract text." response = rag.generate(query, sources) return f"### Query:\n{query}\n\n### Answer:\n{response['raw_response']}\n\n### Source Info:\nBackend used: {response['backend_used']}" # Gradio UI iface = gr.Interface( fn=generate_answer, inputs=[ gr.Textbox(label="Your Question", placeholder="What is this document about?"), gr.Textbox(lines=5, label="PDF URLs (one per line)", placeholder="https://example.com/doc1.pdf\nhttps://example.com/doc2.pdf") ], outputs=gr.Markdown(label="Model Response"), title="Pleias RAG PDF QA", description="Ask a question and get answers grounded in the content of the uploaded PDF URLs using the Pleias RAG model." ) if __name__ == "__main__": iface.launch()