Spaces:

edouardlgp
/

Rag_with_Pleias

Runtime error

App Files Files Community

edouardlgp commited on May 10

Commit

355d8e8

verified ·

1 Parent(s): d7345e4

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -9

app.py CHANGED Viewed

@@ -1,10 +1,23 @@
 import gradio as gr
 import requests
 import fitz  # PyMuPDF
 from pleias_rag_interface import RAGWithCitations
-# Initialize the Pleias RAG model
-rag = RAGWithCitations(model_path_or_name="PleIAs/Pleias-RAG-350M")
 def extract_text_from_pdf_url(url):
     try:
@@ -20,8 +33,9 @@ def extract_text_from_pdf_url(url):
 def generate_answer(query, pdf_urls_str):
     pdf_urls = [url.strip() for url in pdf_urls_str.strip().split("\n") if url.strip()]
     sources = []
     for url in pdf_urls:
         text = extract_text_from_pdf_url(url)
         if not text.startswith("[Error"):
@@ -29,19 +43,21 @@ def generate_answer(query, pdf_urls_str):
                 "text": text,
                 "metadata": {"source": url}
             })
     if not sources:
-        return "No valid PDFs found or unable to extract text."
     response = rag.generate(query, sources)
-    return f"### Query:\n{query}\n\n### Answer:\n{response['raw_response']}\n\n### Source Info:\nBackend used: {response['backend_used']}"
-# Gradio UI
 iface = gr.Interface(
     fn=generate_answer,
     inputs=[
         gr.Textbox(label="Your Question", placeholder="What is this document about?"),
-        gr.Textbox(lines=5, label="PDF URLs (one per line)", placeholder="https://example.com/doc1.pdf\nhttps://example.com/doc2.pdf")
     ],
     outputs=gr.Markdown(label="Model Response"),
     title="Pleias RAG PDF QA",
@@ -49,4 +65,4 @@ iface = gr.Interface(
 )
 if __name__ == "__main__":
-    iface.launch()

 import gradio as gr
 import requests
 import fitz  # PyMuPDF
+import os
+from huggingface_hub import snapshot_download
 from pleias_rag_interface import RAGWithCitations
+# Pre-download the model at build time
+MODEL_REPO = "PleIAs/Pleias-RAG-350M"
+MODEL_CACHE_DIR = "./pleias_model"
+if not os.path.exists(MODEL_CACHE_DIR):
+    snapshot_download(repo_id=MODEL_REPO, local_dir=MODEL_CACHE_DIR, local_dir_use_symlinks=False)
+# Initialize the Pleias RAG model with pad token config patch
+rag = RAGWithCitations(model_path_or_name=MODEL_CACHE_DIR)
+if hasattr(rag, "tokenizer"):
+    rag.tokenizer.pad_token = rag.tokenizer.eos_token
+if hasattr(rag, "model"):
+    rag.model.config.pad_token_id = rag.tokenizer.eos_token_id
 def extract_text_from_pdf_url(url):
     try:
 def generate_answer(query, pdf_urls_str):
     pdf_urls = [url.strip() for url in pdf_urls_str.strip().split("\n") if url.strip()]
     sources = []
+    feedback = "### Loaded PDFs:\n"
     for url in pdf_urls:
         text = extract_text_from_pdf_url(url)
         if not text.startswith("[Error"):
                 "text": text,
                 "metadata": {"source": url}
             })
+            feedback += f"- ✅ {url[:80]}\n"
+        else:
+            feedback += f"- ❌ {url[:80]} (failed to load)\n"
     if not sources:
+        return "❌ No valid PDFs were loaded or parsed."
     response = rag.generate(query, sources)
+    return feedback + f"\n\n### Answer:\n{response['raw_response']}\n\n_Backend used: {response['backend_used']}_"
 iface = gr.Interface(
     fn=generate_answer,
     inputs=[
         gr.Textbox(label="Your Question", placeholder="What is this document about?"),
+        gr.Textbox(lines=5, label="PDF URLs (one per line)", placeholder="https://documents.un.org/doc/undoc/gen/n23/179/72/pdf/n2317972.pdf")
     ],
     outputs=gr.Markdown(label="Model Response"),
     title="Pleias RAG PDF QA",
 )
 if __name__ == "__main__":
+    iface.launch(share=True, ssr_mode=False)