Muril-Model / precompute_embeddings.py
Sai809701
added all files
ac59d2f
raw
history blame contribute delete
568 Bytes
import torch
import pandas as pd
from sentence_transformers import SentenceTransformer
MODEL_PATH = './muril_combined_multilingual_model'
CSV_PATH = './muril_multilingual_dataset.csv'
EMB_PATH = './answer_embeddings.pt'
print("πŸ”„ Precomputing embeddings...")
model = SentenceTransformer(MODEL_PATH)
df = pd.read_csv(CSV_PATH).dropna(subset=['question', 'answer'])
answers = df['answer'].tolist()
answer_embeddings = model.encode(answers, convert_to_tensor=True)
torch.save(answer_embeddings, EMB_PATH)
print(f"βœ… Saved {len(answers)} embeddings to {EMB_PATH}")