Spaces:
Running
Running
| import gradio as gr | |
| import pandas as pd | |
| import numpy as np | |
| import pickle | |
| from sentence_transformers import SentenceTransformer, util | |
| import gdown | |
| import os | |
| # --------- روابط Google Drive ---------- | |
| DRIVE_LINKS = { | |
| "books": "https://drive.google.com/uc?export=download&id=1FElHiASfiVLeuHWYaqd2Q5foxWRlJT-O", | |
| "theses": "https://drive.google.com/uc?export=download&id=1K2Mtze6ZdvfKUsFMCOWlRBjDq-ZnJNrv" | |
| } | |
| BOOKS_FILE = "book.xlsx" | |
| THESES_FILE = "theses.xlsx" | |
| # --------- تنزيل الملفات لو مش موجودة ---------- | |
| def download_from_drive(link, output): | |
| if not os.path.exists(output): | |
| gdown.download(link, output, quiet=False) | |
| download_from_drive(DRIVE_LINKS["books"], BOOKS_FILE) | |
| download_from_drive(DRIVE_LINKS["theses"], THESES_FILE) | |
| # --------- قراءة البيانات ---------- | |
| def load_data(file): | |
| df = pd.read_excel(file).fillna("غير متوافر") | |
| if "Title" not in df.columns and "العنوان" in df.columns: | |
| df["Title"] = df["العنوان"].astype(str) | |
| elif "Title" not in df.columns: | |
| df["Title"] = df.iloc[:,0].astype(str) | |
| return df | |
| books_df = load_data(BOOKS_FILE) | |
| theses_df = load_data(THESES_FILE) | |
| # --------- نموذج Semantic ---------- | |
| MODEL_NAME = "all-MiniLM-L6-v2" | |
| model = SentenceTransformer(MODEL_NAME) | |
| # --------- إنشاء Embeddings مرة واحدة ---------- | |
| def build_or_load_embeddings(df, name): | |
| path = f"{name}_embeddings.pkl" | |
| if os.path.exists(path): | |
| with open(path, "rb") as f: | |
| emb = pickle.load(f) | |
| if len(emb) == len(df): | |
| return emb | |
| texts = df["Title"].astype(str).tolist() | |
| emb = model.encode(texts, convert_to_numpy=True, show_progress_bar=True) | |
| with open(path, "wb") as f: | |
| pickle.dump(emb, f) | |
| return emb | |
| books_embeddings = build_or_load_embeddings(books_df, "books") | |
| theses_embeddings = build_or_load_embeddings(theses_df, "theses") | |
| # --------- دالة البحث ---------- | |
| def search(query, category, mode): | |
| if not query.strip(): | |
| return "⚠️ اكتب كلمة أو جملة للبحث" | |
| df = books_df if category=="Books" else theses_df | |
| emb = books_embeddings if category=="Books" else theses_embeddings | |
| if mode == "نصي": | |
| results = df[df["Title"].str.contains(query, case=False, na=False)] | |
| else: | |
| q_emb = model.encode([query], convert_to_numpy=True) | |
| scores = util.cos_sim(q_emb, emb)[0].cpu().numpy() | |
| idx = np.argsort(-scores) | |
| results = df.iloc[idx] | |
| if results.empty: | |
| return "❌ لم يتم العثور على نتائج" | |
| html = "<table border=1 style='border-collapse:collapse;width:100%;'>" | |
| html += "<tr>" + "".join([f"<th>{col}</th>" for col in results.columns]) + "</tr>" | |
| for _, row in results.iterrows(): | |
| html += "<tr>" + "".join([f"<td>{val}</td>" for val in row.values]) + "</tr>" | |
| html += "</table>" | |
| return html | |
| # --------- واجهة Gradio ---------- | |
| iface = gr.Interface( | |
| fn=search, | |
| inputs=[ | |
| gr.Textbox(label="اكتب كلمة البحث"), | |
| gr.Dropdown(["Books","Theses"], label="الفئة"), | |
| gr.Radio(["نصي","دلالي"], label="نوع البحث") | |
| ], | |
| outputs="html", | |
| title="البحث في المكتبة الرقمية" | |
| ) | |
| iface.launch() | |