Spaces:

aelsaeed
/

LibrarySearchSpace

Running

App Files Files Community

LibrarySearchSpace / app.py

aelsaeed

Upload app.py

f0d9fe9 verified 9 days ago

raw

history blame

3.16 kB


	import os
	import pandas as pd
	import numpy as np
	import requests
	import gradio as gr
	import gdown
	import pickle

	BOOKS_FILE = "book.xlsx"
	THESES_FILE = "theses.xlsx"

	# روابط الملفات على Google Drive
	DRIVE_LINKS = {
	"books": "1FElHiASfiVLeuHWYaqd2Q5foxWRlJT-O",
	"theses": "1K2Mtze6ZdvfKUsFMCOWlRBjDq-ZnJNrv"
	}

	def download_from_drive(file_id, output):
	url = f"https://drive.google.com/uc?export=download&id={file_id}"
	gdown.download(url, output, quiet=True)

	# تنزيل الملفات إذا مش موجودة
	if not os.path.exists(BOOKS_FILE):
	download_from_drive(DRIVE_LINKS["books"], BOOKS_FILE)

	if not os.path.exists(THESES_FILE):
	download_from_drive(DRIVE_LINKS["theses"], THESES_FILE)

	# قراءة البيانات
	def load_data(file):
	df = pd.read_excel(file).fillna("غير متوافر")
	if "Title" not in df.columns and "العنوان" in df.columns:
	df["Title"] = df["العنوان"].astype(str)
	elif "Title" not in df.columns:
	df["Title"] = df.iloc[:,0].astype(str)
	return df

	books_df = load_data(BOOKS_FILE)
	theses_df = load_data(THESES_FILE)

	API_TOKEN = os.environ.get("HF_TOKEN")
	API_URL = "https://api-inference.huggingface.co/models/aelsaeed/all-MiniLM-L6-v2-api"
	HEADERS = {"Authorization": f"Bearer {HF_TOKEN}"}

	def get_embedding(text):
	response = requests.post(API_URL, headers=HEADERS, json={"inputs": [text]})
	return np.array(response.json()[0])

	def search(query, category, mode):
	if not query.strip():
	return "⚠️ اكتب كلمة أو جملة للبحث"
	if mode == "نصي":
	df = books_df if category=="Books" else theses_df
	results = df[df["Title"].str.contains(query, case=False, na=False)]
	else:
	df = books_df if category=="Books" else theses_df
	emb_cache_file = f"{category}_embeddings.pkl"
	if os.path.exists(emb_cache_file):
	with open(emb_cache_file,"rb") as f:
	embeddings = pickle.load(f)
	else:
	embeddings = np.array([get_embedding(t) for t in df["Title"].tolist()])
	with open(emb_cache_file,"wb") as f:
	pickle.dump(embeddings,f)
	query_emb = get_embedding(query)
	scores = np.dot(embeddings, query_emb) / (np.linalg.norm(embeddings,axis=1)*np.linalg.norm(query_emb))
	idx = np.argsort(-scores)
	results = df.iloc[idx]
	if results.empty:
	return "❌ لم يتم العثور على نتائج"
	html = "<table border=1 style='border-collapse:collapse;width:100%;'>"
	html += "<tr>" + "".join([f"<th>{col}</th>" for col in results.columns]) + "</tr>"
	for _, row in results.iterrows():
	html += "<tr>" + "".join([f"<td>{val}</td>" for val in row.values]) + "</tr>"
	html += "</table>"
	return html

	iface = gr.Interface(
	fn=search,
	inputs=[
	gr.Textbox(label="اكتب كلمة البحث"),
	gr.Dropdown(["Books","Theses"], label="الفئة"),
	gr.Radio(["نصي","دلالي"], label="نوع البحث")
	],
	outputs="html",
	title="البحث في المكتبة الرقمية"
	)

	iface.launch()