Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,10 +1,23 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
import requests
|
| 3 |
import fitz # PyMuPDF
|
|
|
|
|
|
|
| 4 |
from pleias_rag_interface import RAGWithCitations
|
| 5 |
|
| 6 |
-
#
|
| 7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
|
| 9 |
def extract_text_from_pdf_url(url):
|
| 10 |
try:
|
|
@@ -20,8 +33,9 @@ def extract_text_from_pdf_url(url):
|
|
| 20 |
|
| 21 |
def generate_answer(query, pdf_urls_str):
|
| 22 |
pdf_urls = [url.strip() for url in pdf_urls_str.strip().split("\n") if url.strip()]
|
| 23 |
-
|
| 24 |
sources = []
|
|
|
|
|
|
|
| 25 |
for url in pdf_urls:
|
| 26 |
text = extract_text_from_pdf_url(url)
|
| 27 |
if not text.startswith("[Error"):
|
|
@@ -29,19 +43,21 @@ def generate_answer(query, pdf_urls_str):
|
|
| 29 |
"text": text,
|
| 30 |
"metadata": {"source": url}
|
| 31 |
})
|
| 32 |
-
|
|
|
|
|
|
|
|
|
|
| 33 |
if not sources:
|
| 34 |
-
return "No valid PDFs
|
| 35 |
|
| 36 |
response = rag.generate(query, sources)
|
| 37 |
-
return f"
|
| 38 |
|
| 39 |
-
# Gradio UI
|
| 40 |
iface = gr.Interface(
|
| 41 |
fn=generate_answer,
|
| 42 |
inputs=[
|
| 43 |
gr.Textbox(label="Your Question", placeholder="What is this document about?"),
|
| 44 |
-
gr.Textbox(lines=5, label="PDF URLs (one per line)", placeholder="https://
|
| 45 |
],
|
| 46 |
outputs=gr.Markdown(label="Model Response"),
|
| 47 |
title="Pleias RAG PDF QA",
|
|
@@ -49,4 +65,4 @@ iface = gr.Interface(
|
|
| 49 |
)
|
| 50 |
|
| 51 |
if __name__ == "__main__":
|
| 52 |
-
iface.launch()
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import requests
|
| 3 |
import fitz # PyMuPDF
|
| 4 |
+
import os
|
| 5 |
+
from huggingface_hub import snapshot_download
|
| 6 |
from pleias_rag_interface import RAGWithCitations
|
| 7 |
|
| 8 |
+
# Pre-download the model at build time
|
| 9 |
+
MODEL_REPO = "PleIAs/Pleias-RAG-350M"
|
| 10 |
+
MODEL_CACHE_DIR = "./pleias_model"
|
| 11 |
+
|
| 12 |
+
if not os.path.exists(MODEL_CACHE_DIR):
|
| 13 |
+
snapshot_download(repo_id=MODEL_REPO, local_dir=MODEL_CACHE_DIR, local_dir_use_symlinks=False)
|
| 14 |
+
|
| 15 |
+
# Initialize the Pleias RAG model with pad token config patch
|
| 16 |
+
rag = RAGWithCitations(model_path_or_name=MODEL_CACHE_DIR)
|
| 17 |
+
if hasattr(rag, "tokenizer"):
|
| 18 |
+
rag.tokenizer.pad_token = rag.tokenizer.eos_token
|
| 19 |
+
if hasattr(rag, "model"):
|
| 20 |
+
rag.model.config.pad_token_id = rag.tokenizer.eos_token_id
|
| 21 |
|
| 22 |
def extract_text_from_pdf_url(url):
|
| 23 |
try:
|
|
|
|
| 33 |
|
| 34 |
def generate_answer(query, pdf_urls_str):
|
| 35 |
pdf_urls = [url.strip() for url in pdf_urls_str.strip().split("\n") if url.strip()]
|
|
|
|
| 36 |
sources = []
|
| 37 |
+
feedback = "### Loaded PDFs:\n"
|
| 38 |
+
|
| 39 |
for url in pdf_urls:
|
| 40 |
text = extract_text_from_pdf_url(url)
|
| 41 |
if not text.startswith("[Error"):
|
|
|
|
| 43 |
"text": text,
|
| 44 |
"metadata": {"source": url}
|
| 45 |
})
|
| 46 |
+
feedback += f"- ✅ {url[:80]}\n"
|
| 47 |
+
else:
|
| 48 |
+
feedback += f"- ❌ {url[:80]} (failed to load)\n"
|
| 49 |
+
|
| 50 |
if not sources:
|
| 51 |
+
return "❌ No valid PDFs were loaded or parsed."
|
| 52 |
|
| 53 |
response = rag.generate(query, sources)
|
| 54 |
+
return feedback + f"\n\n### Answer:\n{response['raw_response']}\n\n_Backend used: {response['backend_used']}_"
|
| 55 |
|
|
|
|
| 56 |
iface = gr.Interface(
|
| 57 |
fn=generate_answer,
|
| 58 |
inputs=[
|
| 59 |
gr.Textbox(label="Your Question", placeholder="What is this document about?"),
|
| 60 |
+
gr.Textbox(lines=5, label="PDF URLs (one per line)", placeholder="https://documents.un.org/doc/undoc/gen/n23/179/72/pdf/n2317972.pdf")
|
| 61 |
],
|
| 62 |
outputs=gr.Markdown(label="Model Response"),
|
| 63 |
title="Pleias RAG PDF QA",
|
|
|
|
| 65 |
)
|
| 66 |
|
| 67 |
if __name__ == "__main__":
|
| 68 |
+
iface.launch(share=True, ssr_mode=False)
|