edouardlgp commited on
Commit
355d8e8
·
verified ·
1 Parent(s): d7345e4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -9
app.py CHANGED
@@ -1,10 +1,23 @@
1
  import gradio as gr
2
  import requests
3
  import fitz # PyMuPDF
 
 
4
  from pleias_rag_interface import RAGWithCitations
5
 
6
- # Initialize the Pleias RAG model
7
- rag = RAGWithCitations(model_path_or_name="PleIAs/Pleias-RAG-350M")
 
 
 
 
 
 
 
 
 
 
 
8
 
9
  def extract_text_from_pdf_url(url):
10
  try:
@@ -20,8 +33,9 @@ def extract_text_from_pdf_url(url):
20
 
21
  def generate_answer(query, pdf_urls_str):
22
  pdf_urls = [url.strip() for url in pdf_urls_str.strip().split("\n") if url.strip()]
23
-
24
  sources = []
 
 
25
  for url in pdf_urls:
26
  text = extract_text_from_pdf_url(url)
27
  if not text.startswith("[Error"):
@@ -29,19 +43,21 @@ def generate_answer(query, pdf_urls_str):
29
  "text": text,
30
  "metadata": {"source": url}
31
  })
32
-
 
 
 
33
  if not sources:
34
- return "No valid PDFs found or unable to extract text."
35
 
36
  response = rag.generate(query, sources)
37
- return f"### Query:\n{query}\n\n### Answer:\n{response['raw_response']}\n\n### Source Info:\nBackend used: {response['backend_used']}"
38
 
39
- # Gradio UI
40
  iface = gr.Interface(
41
  fn=generate_answer,
42
  inputs=[
43
  gr.Textbox(label="Your Question", placeholder="What is this document about?"),
44
- gr.Textbox(lines=5, label="PDF URLs (one per line)", placeholder="https://example.com/doc1.pdf\nhttps://example.com/doc2.pdf")
45
  ],
46
  outputs=gr.Markdown(label="Model Response"),
47
  title="Pleias RAG PDF QA",
@@ -49,4 +65,4 @@ iface = gr.Interface(
49
  )
50
 
51
  if __name__ == "__main__":
52
- iface.launch()
 
1
  import gradio as gr
2
  import requests
3
  import fitz # PyMuPDF
4
+ import os
5
+ from huggingface_hub import snapshot_download
6
  from pleias_rag_interface import RAGWithCitations
7
 
8
+ # Pre-download the model at build time
9
+ MODEL_REPO = "PleIAs/Pleias-RAG-350M"
10
+ MODEL_CACHE_DIR = "./pleias_model"
11
+
12
+ if not os.path.exists(MODEL_CACHE_DIR):
13
+ snapshot_download(repo_id=MODEL_REPO, local_dir=MODEL_CACHE_DIR, local_dir_use_symlinks=False)
14
+
15
+ # Initialize the Pleias RAG model with pad token config patch
16
+ rag = RAGWithCitations(model_path_or_name=MODEL_CACHE_DIR)
17
+ if hasattr(rag, "tokenizer"):
18
+ rag.tokenizer.pad_token = rag.tokenizer.eos_token
19
+ if hasattr(rag, "model"):
20
+ rag.model.config.pad_token_id = rag.tokenizer.eos_token_id
21
 
22
  def extract_text_from_pdf_url(url):
23
  try:
 
33
 
34
  def generate_answer(query, pdf_urls_str):
35
  pdf_urls = [url.strip() for url in pdf_urls_str.strip().split("\n") if url.strip()]
 
36
  sources = []
37
+ feedback = "### Loaded PDFs:\n"
38
+
39
  for url in pdf_urls:
40
  text = extract_text_from_pdf_url(url)
41
  if not text.startswith("[Error"):
 
43
  "text": text,
44
  "metadata": {"source": url}
45
  })
46
+ feedback += f"- ✅ {url[:80]}\n"
47
+ else:
48
+ feedback += f"- ❌ {url[:80]} (failed to load)\n"
49
+
50
  if not sources:
51
+ return "No valid PDFs were loaded or parsed."
52
 
53
  response = rag.generate(query, sources)
54
+ return feedback + f"\n\n### Answer:\n{response['raw_response']}\n\n_Backend used: {response['backend_used']}_"
55
 
 
56
  iface = gr.Interface(
57
  fn=generate_answer,
58
  inputs=[
59
  gr.Textbox(label="Your Question", placeholder="What is this document about?"),
60
+ gr.Textbox(lines=5, label="PDF URLs (one per line)", placeholder="https://documents.un.org/doc/undoc/gen/n23/179/72/pdf/n2317972.pdf")
61
  ],
62
  outputs=gr.Markdown(label="Model Response"),
63
  title="Pleias RAG PDF QA",
 
65
  )
66
 
67
  if __name__ == "__main__":
68
+ iface.launch(share=True, ssr_mode=False)