File size: 568 Bytes
ac59d2f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
import torch
import pandas as pd
from sentence_transformers import SentenceTransformer

MODEL_PATH = './muril_combined_multilingual_model'
CSV_PATH = './muril_multilingual_dataset.csv'
EMB_PATH = './answer_embeddings.pt'

print("πŸ”„ Precomputing embeddings...")
model = SentenceTransformer(MODEL_PATH)
df = pd.read_csv(CSV_PATH).dropna(subset=['question', 'answer'])

answers = df['answer'].tolist()
answer_embeddings = model.encode(answers, convert_to_tensor=True)
torch.save(answer_embeddings, EMB_PATH)

print(f"βœ… Saved {len(answers)} embeddings to {EMB_PATH}")