Spaces:
Sleeping
Sleeping
File size: 568 Bytes
ac59d2f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 |
import torch
import pandas as pd
from sentence_transformers import SentenceTransformer
MODEL_PATH = './muril_combined_multilingual_model'
CSV_PATH = './muril_multilingual_dataset.csv'
EMB_PATH = './answer_embeddings.pt'
print("π Precomputing embeddings...")
model = SentenceTransformer(MODEL_PATH)
df = pd.read_csv(CSV_PATH).dropna(subset=['question', 'answer'])
answers = df['answer'].tolist()
answer_embeddings = model.encode(answers, convert_to_tensor=True)
torch.save(answer_embeddings, EMB_PATH)
print(f"β
Saved {len(answers)} embeddings to {EMB_PATH}")
|