Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
import torch
|
| 3 |
-
from transformers import AutoTokenizer, AlbertForSequenceClassification
|
| 4 |
import numpy as np
|
| 5 |
import os
|
| 6 |
import gdown
|
|
@@ -10,31 +10,49 @@ import logging
|
|
| 10 |
logging.basicConfig(level=logging.INFO)
|
| 11 |
logger = logging.getLogger(__name__)
|
| 12 |
|
| 13 |
-
# Define Google Drive
|
| 14 |
-
|
| 15 |
-
"sentiment":
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
}
|
| 20 |
|
| 21 |
# Define local directory to store downloaded models
|
| 22 |
save_dir = "./saved_models"
|
| 23 |
os.makedirs(save_dir, exist_ok=True)
|
| 24 |
|
| 25 |
-
# Download
|
| 26 |
-
for task,
|
| 27 |
output_dir = os.path.join(save_dir, task)
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
else:
|
| 37 |
-
logger.info(f"Model
|
| 38 |
|
| 39 |
# Define model paths
|
| 40 |
tasks = ["sentiment", "emotion", "hate_speech", "sarcasm"]
|
|
@@ -48,10 +66,11 @@ label_mappings = {
|
|
| 48 |
"sarcasm": ["no", "yes"]
|
| 49 |
}
|
| 50 |
|
| 51 |
-
# Load tokenizer
|
| 52 |
logger.info("Loading tokenizer...")
|
| 53 |
try:
|
| 54 |
-
|
|
|
|
| 55 |
except Exception as e:
|
| 56 |
logger.error(f"Failed to load tokenizer: {str(e)}")
|
| 57 |
raise
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import torch
|
| 3 |
+
from transformers import AutoTokenizer, AlbertForSequenceClassification, AlbertTokenizer
|
| 4 |
import numpy as np
|
| 5 |
import os
|
| 6 |
import gdown
|
|
|
|
| 10 |
logging.basicConfig(level=logging.INFO)
|
| 11 |
logger = logging.getLogger(__name__)
|
| 12 |
|
| 13 |
+
# Define Google Drive file IDs for each model's config and safetensors
|
| 14 |
+
model_file_ids = {
|
| 15 |
+
"sentiment": {
|
| 16 |
+
"config": "11jwMJmQMGkiVZWBRQ5BLFyot1520FYIQ",
|
| 17 |
+
"model": "115N5yiu9lfw4uJE5YxHNoHauHeYSSusu"
|
| 18 |
+
},
|
| 19 |
+
"emotion": {
|
| 20 |
+
"config": "1dSxK10jbZyRpMDCm6MCRf9Jy0weOzLP9",
|
| 21 |
+
"model": "1Y3rTtPfo4zu28OhsRybdJF6czZN46I0Y"
|
| 22 |
+
},
|
| 23 |
+
"hate_speech": {
|
| 24 |
+
"config": "1QTejES8BZQs3qnxom9ymiZkLRUAZ91NP",
|
| 25 |
+
"model": "1ol2xO4XbdHwP_HHCYsnX8iVutA6javy_"
|
| 26 |
+
},
|
| 27 |
+
"sarcasm": {
|
| 28 |
+
"config": "1ypl0j1Yp_-0szR4-P1-0CMyDYBwUn5Wz",
|
| 29 |
+
"model": "1pbByLvTIHO_sT9HMeypvXbsdHsLVzTdk"
|
| 30 |
+
}
|
| 31 |
}
|
| 32 |
|
| 33 |
# Define local directory to store downloaded models
|
| 34 |
save_dir = "./saved_models"
|
| 35 |
os.makedirs(save_dir, exist_ok=True)
|
| 36 |
|
| 37 |
+
# Download individual model files
|
| 38 |
+
for task, files in model_file_ids.items():
|
| 39 |
output_dir = os.path.join(save_dir, task)
|
| 40 |
+
os.makedirs(output_dir, exist_ok=True)
|
| 41 |
+
|
| 42 |
+
config_path = os.path.join(output_dir, "config.json")
|
| 43 |
+
model_path = os.path.join(output_dir, "model.safetensors")
|
| 44 |
+
|
| 45 |
+
if not os.path.exists(config_path):
|
| 46 |
+
logger.info(f"Downloading {task} config.json from Google Drive...")
|
| 47 |
+
gdown.download(f"https://drive.google.com/uc?id={files['config']}", config_path, quiet=False)
|
| 48 |
+
else:
|
| 49 |
+
logger.info(f"Config for {task} already exists, skipping download.")
|
| 50 |
+
|
| 51 |
+
if not os.path.exists(model_path):
|
| 52 |
+
logger.info(f"Downloading {task} model.safetensors from Google Drive...")
|
| 53 |
+
gdown.download(f"https://drive.google.com/uc?id={files['model']}", model_path, quiet=False)
|
| 54 |
else:
|
| 55 |
+
logger.info(f"Model for {task} already exists, skipping download.")
|
| 56 |
|
| 57 |
# Define model paths
|
| 58 |
tasks = ["sentiment", "emotion", "hate_speech", "sarcasm"]
|
|
|
|
| 66 |
"sarcasm": ["no", "yes"]
|
| 67 |
}
|
| 68 |
|
| 69 |
+
# Load tokenizer
|
| 70 |
logger.info("Loading tokenizer...")
|
| 71 |
try:
|
| 72 |
+
# Explicitly use AlbertTokenizer with SentencePiece
|
| 73 |
+
tokenizer = AlbertTokenizer.from_pretrained("ai4bharat/indic-bert", use_fast=False)
|
| 74 |
except Exception as e:
|
| 75 |
logger.error(f"Failed to load tokenizer: {str(e)}")
|
| 76 |
raise
|