import gradio as gr import pandas as pd import numpy as np import pickle from sentence_transformers import SentenceTransformer, util import gdown import os # --------- روابط Google Drive ---------- DRIVE_LINKS = { "books": "https://drive.google.com/uc?export=download&id=1FElHiASfiVLeuHWYaqd2Q5foxWRlJT-O", "theses": "https://drive.google.com/uc?export=download&id=1K2Mtze6ZdvfKUsFMCOWlRBjDq-ZnJNrv" } BOOKS_FILE = "book.xlsx" THESES_FILE = "theses.xlsx" # --------- تنزيل الملفات لو مش موجودة ---------- def download_from_drive(link, output): if not os.path.exists(output): gdown.download(link, output, quiet=False) download_from_drive(DRIVE_LINKS["books"], BOOKS_FILE) download_from_drive(DRIVE_LINKS["theses"], THESES_FILE) # --------- قراءة البيانات ---------- def load_data(file): df = pd.read_excel(file).fillna("غير متوافر") if "Title" not in df.columns and "العنوان" in df.columns: df["Title"] = df["العنوان"].astype(str) elif "Title" not in df.columns: df["Title"] = df.iloc[:,0].astype(str) return df books_df = load_data(BOOKS_FILE) theses_df = load_data(THESES_FILE) # --------- نموذج Semantic ---------- MODEL_NAME = "all-MiniLM-L6-v2" model = SentenceTransformer(MODEL_NAME) # --------- إنشاء Embeddings مرة واحدة ---------- def build_or_load_embeddings(df, name): path = f"{name}_embeddings.pkl" if os.path.exists(path): with open(path, "rb") as f: emb = pickle.load(f) if len(emb) == len(df): return emb texts = df["Title"].astype(str).tolist() emb = model.encode(texts, convert_to_numpy=True, show_progress_bar=True) with open(path, "wb") as f: pickle.dump(emb, f) return emb books_embeddings = build_or_load_embeddings(books_df, "books") theses_embeddings = build_or_load_embeddings(theses_df, "theses") # --------- دالة البحث ---------- def search(query, category, mode): if not query.strip(): return "⚠️ اكتب كلمة أو جملة للبحث" df = books_df if category=="Books" else theses_df emb = books_embeddings if category=="Books" else theses_embeddings if mode == "نصي": results = df[df["Title"].str.contains(query, case=False, na=False)] else: q_emb = model.encode([query], convert_to_numpy=True) scores = util.cos_sim(q_emb, emb)[0].cpu().numpy() idx = np.argsort(-scores) results = df.iloc[idx] if results.empty: return "❌ لم يتم العثور على نتائج" html = "" html += "" + "".join([f"" for col in results.columns]) + "" for _, row in results.iterrows(): html += "" + "".join([f"" for val in row.values]) + "" html += "
{col}
{val}
" return html # --------- واجهة Gradio ---------- iface = gr.Interface( fn=search, inputs=[ gr.Textbox(label="اكتب كلمة البحث"), gr.Dropdown(["Books","Theses"], label="الفئة"), gr.Radio(["نصي","دلالي"], label="نوع البحث") ], outputs="html", title="البحث في المكتبة الرقمية" ) iface.launch()