rupakrpk93 commited on
Commit
72f1ae8
·
verified ·
1 Parent(s): 43e9ef6

Updated README with complete working model loading instructions

Browse files
Files changed (1) hide show
  1. README.md +64 -26
README.md CHANGED
@@ -66,52 +66,90 @@ config_path = hf_hub_download(
66
  filename="config.json"
67
  )
68
 
69
- # Step 3: Load the model (you need the model class definition)
70
- # Note: You'll need to define the GPT model architecture
71
- # The model architecture code is available in the repository
 
 
 
72
 
73
- # Step 4: Generate text
74
- def generate_odia_text(prompt, max_length=100):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  # Encode the prompt
76
  input_ids = tokenizer.encode_as_ids(prompt)
77
- input_tensor = torch.tensor(input_ids).unsqueeze(0)
78
 
79
- # Generate (assuming model is loaded)
80
- # output = model.generate(input_tensor, max_length)
 
81
 
82
  # Decode the output
83
- # generated_text = tokenizer.decode(output.squeeze().tolist())
84
- # return generated_text
85
- pass
86
  ```
87
 
88
  ### Example Usage
89
 
90
  ```python
91
  # Example 1: Simple text generation
92
- prompt = "ବର୍ଷା"
93
- # generated_text = generate_odia_text(prompt, max_length=200)
94
- # print(generated_text)
 
95
 
96
  # Example 2: Encode and decode text
97
  text = "ଓଡିଆ ଭାଷା ଏକ ସୁନ୍ଦର ଭାଷା"
98
  encoded = tokenizer.encode_as_ids(text)
 
99
  print(f"Encoded: {encoded}")
100
 
101
  decoded = tokenizer.decode(encoded)
102
  print(f"Decoded: {decoded}")
103
  ```
104
 
105
- ### Full Implementation Example
106
-
107
- For a complete working example with the model architecture:
108
-
109
- ```python
110
- # The full model architecture and implementation
111
- # is available in the repository files.
112
- # Please refer to the model implementation for complete code.
113
- ```
114
-
115
  ## Training Details
116
 
117
  ### Training Hyperparameters
@@ -127,15 +165,15 @@ For a complete working example with the model architecture:
127
  The model was trained on a combination of:
128
  1. **OdiaGenAIdata/fine_web2_odia_pt** - High-quality Odia web text
129
  2. **bigscience-data/roots_indic-or_indic_nlp_corpus** - Odia corpus from Indic NLP
 
130
 
131
- Total training samples: ~3.8M texts
132
 
133
  ## Limitations
134
 
135
  - Maximum context length is 256 tokens
136
  - Trained specifically on Odia text, may not perform well on other languages
137
  - May generate repetitive text for very long sequences
138
- - The model requires the custom GPT architecture code to run
139
 
140
  ## Intended Use
141
 
 
66
  filename="config.json"
67
  )
68
 
69
+ # Step 3: Load the model architecture and weights
70
+ # First, download the model architecture file
71
+ architecture_path = hf_hub_download(
72
+ repo_id="rupakrpk93/odia_tokenizers_test",
73
+ filename="model_architecture.py"
74
+ )
75
 
76
+ # Import the model classes
77
+ import sys
78
+ import importlib.util
79
+ spec = importlib.util.spec_from_file_location("model_architecture", architecture_path)
80
+ model_module = importlib.util.module_from_spec(spec)
81
+ sys.modules["model_architecture"] = model_module
82
+ spec.loader.exec_module(model_module)
83
+
84
+ # Import the classes we need
85
+ GPTConfig = model_module.GPTConfig
86
+ GPT = model_module.GPT
87
+
88
+ # Create model configuration
89
+ config = GPTConfig()
90
+
91
+ # Initialize and load the model
92
+ device = "cuda" if torch.cuda.is_available() else "cpu"
93
+ model = GPT(config)
94
+
95
+ # Load the pretrained weights
96
+ checkpoint = torch.load(model_path, map_location=device)
97
+
98
+ # Check if the state_dict is nested and extract it if necessary
99
+ if isinstance(checkpoint, dict) and 'model' in checkpoint:
100
+ state_dict = checkpoint['model']
101
+ else:
102
+ state_dict = checkpoint
103
+
104
+ # Remove the 'model.' prefix from keys if present
105
+ from collections import OrderedDict
106
+ new_state_dict = OrderedDict()
107
+ for k, v in state_dict.items():
108
+ if k.startswith('model.'):
109
+ new_state_dict[k[6:]] = v # Remove 'model.' prefix
110
+ else:
111
+ new_state_dict[k] = v
112
+
113
+ model.load_state_dict(new_state_dict)
114
+
115
+ model = model.to(device)
116
+ model.eval()
117
+ print(f"Model loaded successfully on {device}")
118
+
119
+ # Step 4: Generate text function
120
+ def generate_odia_text(prompt, max_length=100, temperature=0.8):
121
  # Encode the prompt
122
  input_ids = tokenizer.encode_as_ids(prompt)
123
+ input_tensor = torch.tensor(input_ids).unsqueeze(0).to(device)
124
 
125
+ # Generate
126
+ with torch.no_grad():
127
+ output = model.generate(input_tensor, max_length, temperature=temperature)
128
 
129
  # Decode the output
130
+ generated_text = tokenizer.decode(output.squeeze().tolist())
131
+ return generated_text
 
132
  ```
133
 
134
  ### Example Usage
135
 
136
  ```python
137
  # Example 1: Simple text generation
138
+ prompt = "ସେ କାଲି ସ୍କୁଲକୁ"
139
+ generated_text = generate_odia_text(prompt, max_length=200)
140
+ print(f"Prompt: {prompt}")
141
+ print(f"Generated: {generated_text}")
142
 
143
  # Example 2: Encode and decode text
144
  text = "ଓଡିଆ ଭାଷା ଏକ ସୁନ୍ଦର ଭାଷା"
145
  encoded = tokenizer.encode_as_ids(text)
146
+ print(f"Original: {text}")
147
  print(f"Encoded: {encoded}")
148
 
149
  decoded = tokenizer.decode(encoded)
150
  print(f"Decoded: {decoded}")
151
  ```
152
 
 
 
 
 
 
 
 
 
 
 
153
  ## Training Details
154
 
155
  ### Training Hyperparameters
 
165
  The model was trained on a combination of:
166
  1. **OdiaGenAIdata/fine_web2_odia_pt** - High-quality Odia web text
167
  2. **bigscience-data/roots_indic-or_indic_nlp_corpus** - Odia corpus from Indic NLP
168
+ 3. **Custom curated Odia dataset** - Additional hand-curated Odia texts
169
 
170
+ Total training samples: ~4M+ texts
171
 
172
  ## Limitations
173
 
174
  - Maximum context length is 256 tokens
175
  - Trained specifically on Odia text, may not perform well on other languages
176
  - May generate repetitive text for very long sequences
 
177
 
178
  ## Intended Use
179